A specialized low-cardinality text field. More...
#include <category.h>
Public Member Functions | |
virtual long | append (const char *dt, const char *df, const uint32_t nold, const uint32_t nnew, uint32_t nbuf, char *buf) |
Append the content in df to the directory dt. | |
virtual long | append (const void *, const ibis::bitvector &) |
Append the records in vals to the current working dataset. | |
category (const part *tbl, FILE *file) | |
category (const part *tbl, const char *name) | |
Construct a category object from a name. | |
category (const ibis::column &col) | |
Copy constructor. Copy from a collumn object with CATEGORY type. | |
category (const part *tbl, const char *name, const char *value, const char *dir=0, uint32_t nevt=0) | |
Construct a categorical column that has only one possible value. | |
virtual double | estimateCost (const ibis::qLike &cmp) const |
Estimate the cost of evaluating a Like expression. | |
virtual double | estimateCost (const ibis::qString &cmp) const |
Estimate the cost of evaluating a string lookup. | |
virtual double | estimateCost (const ibis::qMultiString &cmp) const |
Estimate the cost of looking up a group of strings. | |
ibis::direkte * | fillIndex (const char *dir=0) const |
Build an ibis::direkte index using the existing primary data. | |
const ibis::dictionary * | getDictionary () const |
Return a pointer to the dictionary used for the categorical values. | |
virtual const char * | getKey (uint32_t i) const |
Return the ith value in the dictionary. | |
virtual uint32_t | getNumKeys () const |
Return the number of key values. | |
virtual void | getString (uint32_t i, std::string &val) const |
Return the string at the i th row. | |
virtual const char * | isKey (const char *str) const |
Is the given string one of the keys in the dictionary? Return a null pointer if not. | |
virtual void | loadIndex (const char *=0, int=0) const throw () |
This function makes sure the index is ready. | |
virtual long | patternSearch (const char *pat) const |
Estimate the number of hits for a string pattern. | |
virtual long | patternSearch (const char *pat, ibis::bitvector &hits) const |
Find the records with string values that match the given pattern. | |
virtual void | print (std::ostream &out) const |
Print header info. | |
virtual std::vector < std::string > * | selectStrings (const bitvector &mask) const |
Retrieve the string values from the rows marked 1 in mask. | |
virtual array_t< uint32_t > * | selectUInts (const bitvector &mask) const |
Return the integers corresponding to the select strings. | |
int | setDictionary (const dictionary &) |
Replace the dictionary with the incoming one. | |
virtual long | stringSearch (const char *str, ibis::bitvector &hits) const |
Match a particular string. | |
virtual long | stringSearch (const std::vector< std::string > &vals, ibis::bitvector &hits) const |
Match a list of strings. | |
virtual long | stringSearch (const char *str) const |
Estimate the number of matches. | |
virtual long | stringSearch (const std::vector< std::string > &vals) const |
Estimate the total number of matches for a list of strings. | |
virtual void | write (FILE *file) const |
Write the current content to the metadata file for the data partition. | |
virtual | ~category () |
Destructor. |
A specialized low-cardinality text field.
It is also known as control values or categorical values. This implementation directly converts string values into bitvectors (as ibis::direkte), and does not store integer version of the string.
ibis::category::~category | ( | ) | [virtual] |
Destructor.
It also writes the dictionary to a file if the dictionary file is currently empty but the in-memory dictionary is not.
References ibis::util::getFileSize().
ibis::category::category | ( | const part * | tbl, |
const char * | name, | ||
const char * | value, | ||
const char * | dir = 0 , |
||
uint32_t | nevt = 0 |
||
) |
Construct a categorical column that has only one possible value.
Also builds the corresponding index.
References ibis::part::currentDataDir(), FASTBIT_DIRSEP, ibis::dictionary::insert(), ibis::column::lower, ibis::column::name(), ibis::part::nRows(), ibis::column::upper, ibis::dictionary::write(), and ibis::direkte::write().
virtual long ibis::category::append | ( | const void * | vals, |
const ibis::bitvector & | msk | ||
) | [inline, virtual] |
Append the records in vals to the current working dataset.
The 'void*' in this function follows the convention of the function getValuesArray (not writeData), i.e., for the ten fixed-size elementary data types, it is array_t<type>* and for string-valued columns it is std::vector<std::string>*.
Return the number of entries actually written to disk or a negative number to indicate error conditions.
Reimplemented from ibis::text.
ibis::direkte * ibis::category::fillIndex | ( | const char * | dir = 0 | ) | const |
Build an ibis::direkte index using the existing primary data.
If the dictionary exists and the size is one, it builds a dummy index. Otherwise, it reads the primary data file to update the dictionary and complete a new ibis::direkte index.
References ibis::fileManager::buffer< T >::address(), ibis::direkte::append(), FASTBIT_DIRSEP, ibis::gVerbose, ibis::array_t< T >::resize(), ibis::array_t< T >::size(), ibis::fileManager::buffer< T >::size(), ibis::array_t< T >::sort(), ibis::array_t< T >::swap(), UnixOpen, ibis::direkte::write(), and ibis::array_t< T >::write().
void ibis::category::getString | ( | uint32_t | i, |
std::string & | str | ||
) | const [virtual] |
Return the string at the i
th row.
If the .int file is present, it will be used, otherwise this function uses the raw data file.
Reimplemented from ibis::text.
References ibis::fileManager::getFile(), ibis::fileManager::instance(), ibis::util::readString(), and ibis::array_t< T >::size().
Referenced by ibis::bord::column::restoreCategoriesAsStrings().
const char * ibis::category::isKey | ( | const char * | str | ) | const [virtual] |
Is the given string one of the keys in the dictionary? Return a null pointer if not.
void ibis::category::loadIndex | ( | const char * | = 0 , |
int | = 0 |
||
) | const throw () [virtual] |
This function makes sure the index is ready.
It can also be called to initialize all the internal data members because the lazy initialization in the constructor of this class.
Reimplemented from ibis::text.
Referenced by ibis::mensa::combineCategories().
std::vector< std::string > * ibis::category::selectStrings | ( | const bitvector & | mask | ) | const [virtual] |
Retrieve the string values from the rows marked 1 in mask.
Reimplemented from ibis::text.
References ibis::bitvector::cnt(), ibis::util::getFileSize(), ibis::direkte::keys(), ibis::relic::keys(), ibis::util::log2(), and ibis::text::selectStrings().
int ibis::category::setDictionary | ( | const dictionary & | sup | ) |
Replace the dictionary with the incoming one.
The incoming dictionary is expected to contain more words than the existing one. If a larger dictionary is provided, this function will replace the internally kept dictionary and update the index associated with the column.
References ibis::fileManager::buffer< T >::address(), ibis::array_t< T >::clear(), FASTBIT_DIRSEP, ibis::gVerbose, ibis::dictionary::morph(), ibis::array_t< T >::read(), ibis::direkte::remapKeys(), ibis::array_t< T >::reserve(), ibis::dictionary::size(), ibis::array_t< T >::size(), ibis::fileManager::buffer< T >::size(), ibis::array_t< T >::swap(), UnixOpen, and ibis::array_t< T >::write().
Referenced by ibis::mensa::combineCategories().
![]() |