A specialized low-cardinality text field. More...
#include <category.h>
Public Member Functions | |
virtual long | append (const char *dt, const char *df, const uint32_t nold, const uint32_t nnew, uint32_t nbuf, char *buf) |
Append the content in df to the directory dt. | |
virtual long | append (const void *, const ibis::bitvector &) |
Append the records in vals to the current working dataset. | |
category (const ibis::column &col) | |
Copy constructor. Copy from a collumn object with CATEGORY type. | |
category (const part *tbl, const char *name, const char *value, const char *dir=0, uint32_t nevt=0) | |
Construct a column that has only one possible value. | |
category (const part *tbl, FILE *file) | |
category (const part *tbl, const char *name) | |
Construct a category object from a name. | |
virtual double | estimateCost (const ibis::qLike &cmp) const |
Estimate the cost of evaluating a Like expression. | |
virtual double | estimateCost (const ibis::qString &cmp) const |
Estimate the cost of evaluating a string lookup. | |
virtual double | estimateCost (const ibis::qMultiString &cmp) const |
Estimate the cost of looking up a group of strings. | |
ibis::relic * | fillIndex (const char *dir=0) const |
Build an ibis::relic index using the existing primary data. | |
virtual const char * | getKey (uint32_t i) const |
Return the ith value in the dictionary. | |
virtual uint32_t | getNumKeys () const |
Return the number of key values. | |
virtual void | getString (uint32_t i, std::string &val) const |
Retrieve the string value represented by the integer i. | |
virtual const char * | isKey (const char *str) const |
Is the given string one of the keys in the dictionary? Return a null pointer if not. | |
virtual long | patternSearch (const char *pat) const |
Estimate the number of hits for a string pattern. | |
virtual long | patternSearch (const char *pat, ibis::bitvector &hits) const |
Find the records with string values that match the given pattern. | |
virtual void | print (std::ostream &out) const |
Print header info. | |
virtual std::vector < std::string > * | selectStrings (const bitvector &mask) const |
Retrieve the string values from the rows marked 1 in mask. | |
virtual array_t< uint32_t > * | selectUInts (const bitvector &mask) const |
Return the integers corresponding to the select strings. | |
virtual long | stringSearch (const std::vector< std::string > &vals, ibis::bitvector &hits) const |
Match a list of strings. | |
virtual long | stringSearch (const std::vector< std::string > &vals) const |
Estimate the total number of matches for a list of strings. | |
virtual long | stringSearch (const char *str, ibis::bitvector &hits) const |
Match a particular string. | |
virtual long | stringSearch (const char *str) const |
Estimate the number of matches. | |
virtual void | write (FILE *file) const |
Write the current content to the metadata file for the data partition. | |
virtual | ~category () |
Destructor. |
A specialized low-cardinality text field.
It is also known as control values or categorical values. This implementation directly converts string values into bitvectors (as ibis::relic), and does not store integer version of the string.
ibis::category::~category | ( | ) | [virtual] |
Destructor.
It also writes the dictionary to a file if the dictionary is not empty.
References ibis::util::getFileSize().
ibis::category::category | ( | const part * | tbl, |
const char * | name, | ||
const char * | value, | ||
const char * | dir = 0 , |
||
uint32_t | nevt = 0 |
||
) |
Construct a column that has only one possible value.
Also build the corresponding index.
References ibis::part::currentDataDir(), ibis::dictionary::insert(), ibis::column::lower, ibis::column::name(), ibis::part::nRows(), ibis::column::upper, ibis::relic::write(), and ibis::dictionary::write().
virtual long ibis::category::append | ( | const void * | vals, |
const ibis::bitvector & | msk | ||
) | [inline, virtual] |
Append the records in vals to the current working dataset.
The 'void*' in this function follows the convention of the function getValuesArray (not writeData), i.e., for the ten fixed-size elementary data types, it is array_t<type>* and for string-valued columns it is std::vector<std::string>*.
Return the number of entries actually written to disk or a negative number to indicate error conditions.
Reimplemented from ibis::text.
ibis::relic * ibis::category::fillIndex | ( | const char * | dir = 0 | ) | const |
Build an ibis::relic index using the existing primary data.
If the dictionary exists and the size is one, it builds a dummy index. Otherwise, it reads the primary data file to update the dictionary and complete a new ibis::relic index.
References ibis::fileManager::buffer< T >::address(), ibis::relic::append(), ibis::gVerbose, ibis::array_t< T >::resize(), ibis::array_t< T >::size(), ibis::fileManager::buffer< T >::size(), ibis::array_t< T >::swap(), UnixOpen, ibis::relic::write(), and ibis::array_t< T >::write().
const char * ibis::category::isKey | ( | const char * | str | ) | const [virtual] |
Is the given string one of the keys in the dictionary? Return a null pointer if not.
std::vector< std::string > * ibis::category::selectStrings | ( | const bitvector & | mask | ) | const [virtual] |
Retrieve the string values from the rows marked 1 in mask.
Reimplemented from ibis::text.
References ibis::bitvector::cnt(), and ibis::text::selectStrings().
![]() |