A class to provide minimal support for byte arrays. More...
#include <category.h>
Public Member Functions | |
virtual long | append (const void *, const ibis::bitvector &) |
Append the records in vals to the current working dataset. | |
virtual long | append (const char *dt, const char *df, const uint32_t nold, const uint32_t nnew, uint32_t nbuf, char *buf) |
Append the content in df to the end of files in dt . | |
blob (const part *, FILE *) | |
Contruct a blob by reading from a metadata file. | |
blob (const part *, const char *) | |
Construct a blob from a name. | |
blob (const ibis::column &) | |
Copy an existing column object of type ibis::BLOB. | |
virtual void | computeMinMax () |
Compute the actual min/max values. | |
virtual void | computeMinMax (const char *) |
Compute the actual min/max values. | |
virtual void | computeMinMax (const char *, double &, double &) const |
Compute the actual min/max of the data in directory dir . | |
long | countRawBytes (const bitvector &) const |
Count the number of bytes in the blobs selected by the mask. | |
virtual double | getActualMax () const |
Compute the actual maximum value by reading the data or examining the index. | |
virtual double | getActualMin () const |
A group of functions to compute some basic statistics for the attribute values. | |
int | getBlob (uint32_t ind, unsigned char *&buf, uint32_t &size) const |
Extract a single binary object. | |
virtual double | getSum () const |
Compute the sum of all values by reading the data. | |
virtual int | getValuesArray (void *) const |
Copy all rows of the column into an array_t object. | |
virtual long | indexSize () const |
Compute the index size (in bytes). | |
virtual void | loadIndex (const char *, int) const throw () |
Load the index associated with the column. | |
virtual void | print (std::ostream &) const |
Print information about this column. | |
virtual array_t< signed char > * | selectBytes (const bitvector &) const |
Retrieve selected 1-byte integer values. | |
virtual array_t< double > * | selectDoubles (const bitvector &) const |
Put the selected values into an array as doubles. | |
virtual array_t< float > * | selectFloats (const bitvector &) const |
Put selected values of a float column into an array. | |
virtual array_t< int32_t > * | selectInts (const bitvector &) const |
Return selected rows of the column in an array_t object. | |
virtual array_t< int64_t > * | selectLongs (const bitvector &) const |
Return selected rows of the column in an array_t object. | |
int | selectRawBytes (const bitvector &, array_t< unsigned char > &, array_t< uint32_t > &) const |
Extract the blobs from the rows marked 1 in the mask. | |
virtual array_t< int16_t > * | selectShorts (const bitvector &) const |
Return selected rows of the column in an array_t object. | |
virtual std::vector < std::string > * | selectStrings (const bitvector &) const |
Return the selected rows as strings. | |
virtual array_t< unsigned char > * | selectUBytes (const bitvector &) const |
Return selected rows of the column in an array_t object. | |
virtual array_t< uint32_t > * | selectUInts (const bitvector &) const |
Return selected rows of the column in an array_t object. | |
virtual array_t< uint64_t > * | selectULongs (const bitvector &) const |
Return selected rows of the column in an array_t object. | |
virtual array_t< uint16_t > * | selectUShorts (const bitvector &) const |
Return selected rows of the column in an array_t object. | |
virtual long | stringSearch (const char *, ibis::bitvector &) const |
virtual long | stringSearch (const std::vector< std::string > &, ibis::bitvector &) const |
virtual long | stringSearch (const char *) const |
virtual long | stringSearch (const std::vector< std::string > &) const |
virtual void | write (FILE *) const |
Write metadata about the column. | |
virtual long | writeData (const char *dir, uint32_t nold, uint32_t nnew, ibis::bitvector &mask, const void *va1, void *va2) |
Write the content of BLOBs packed into two arrays va1 and va2. | |
Protected Member Functions | |
int | extractAll (const bitvector &, array_t< unsigned char > &, array_t< uint32_t > &, const array_t< unsigned char > &, const array_t< int64_t > &) const |
Extract entries marked 1 in mask from raw to buffer. | |
int | extractAll (const bitvector &, array_t< unsigned char > &, array_t< uint32_t > &, const char *, const array_t< int64_t > &) const |
Retrieve all binary objects marked 1 in the mask. | |
int | extractSome (const bitvector &, array_t< unsigned char > &, array_t< uint32_t > &, const array_t< unsigned char > &, const array_t< int64_t > &, const uint32_t) const |
Extract entries marked 1 in mask from raw to buffer subject to a limit on the buffer size. | |
int | extractSome (const bitvector &, array_t< unsigned char > &, array_t< uint32_t > &, const char *, const array_t< int64_t > &, const uint32_t) const |
Retrieve binary objects marked 1 in the mask subject to the specified limit on buffer size. | |
int | extractSome (const bitvector &, array_t< unsigned char > &, array_t< uint32_t > &, const char *, const char *, const uint32_t) const |
Retrieve binary objects marked 1 in the mask subject to the specified limit on buffer size. | |
int | readBlob (uint32_t ind, unsigned char *&buf, uint32_t &size, const array_t< int64_t > &starts, const char *datafile) const |
Read a single binary object. | |
int | readBlob (uint32_t ind, unsigned char *&buf, uint32_t &size, const char *spfile, const char *datafile) const |
Read a single binary object. |
A class to provide minimal support for byte arrays.
Since a byte array may contain any arbitrary byte values, we can not rely on the null terminator any more, nor use std::string as the container for each array. It is intended to store opaque data that can not be searched.
virtual long ibis::blob::append | ( | const void * | vals, |
const ibis::bitvector & | msk | ||
) | [inline, virtual] |
Append the records in vals to the current working dataset.
The 'void*' in this function follows the convention of the function getValuesArray (not writeData), i.e., for the ten fixed-size elementary data types, it is array_t<type>* and for string-valued columns it is std::vector<std::string>*.
Return the number of entries actually written to disk or a negative number to indicate error conditions.
Reimplemented from ibis::column.
long ibis::blob::append | ( | const char * | dt, |
const char * | df, | ||
const uint32_t | nold, | ||
const uint32_t | nnew, | ||
uint32_t | nbuf, | ||
char * | buf | ||
) | [virtual] |
Append the content in df
to the end of files in dt
.
It returns the number of rows appended or a negative number to indicate error conditions.
Reimplemented from ibis::column.
References ibis::fileManager::buffer< T >::address(), ibis::bitvector::adjustSize(), ibis::bitvector::cnt(), ibis::util::guardBase::dismiss(), FASTBIT_DIRSEP, ibis::gVerbose, ibis::bitvector::read(), ibis::fileManager::buffer< T >::size(), ibis::bitvector::size(), UnixOpen, and ibis::bitvector::write().
virtual void ibis::blob::computeMinMax | ( | ) | [inline, virtual] |
Compute the actual min/max values.
It actually goes through all the values. This function reads the data in the active data directory and modifies the member variables to record the actual min/max.
Reimplemented from ibis::column.
virtual void ibis::blob::computeMinMax | ( | const char * | dir | ) | [inline, virtual] |
Compute the actual min/max values.
It actually goes through all the values. This function reads the data in the given directory and modifies the member variables to record the actual min/max.
Reimplemented from ibis::column.
virtual void ibis::blob::computeMinMax | ( | const char * | dir, |
double & | min, | ||
double & | max | ||
) | const [inline, virtual] |
Compute the actual min/max of the data in directory dir
.
Report the actual min/max found back through output arguments min
and max
. This version does not modify the min/max recorded in this column object.
Reimplemented from ibis::column.
long ibis::blob::countRawBytes | ( | const bitvector & | mask | ) | const |
Count the number of bytes in the blobs selected by the mask.
This function can be used to compute the memory requirement before actually retrieving the blobs.
It returns a negative number in case of error.
References ibis::array_t< T >::clear(), ibis::bitvector::cnt(), FASTBIT_DIRSEP, ibis::fileManager::getFile(), ibis::gVerbose, ibis::fileManager::instance(), ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::size(), ibis::bitvector::size(), and UnixOpen.
int ibis::blob::extractAll | ( | const bitvector & | mask, |
ibis::array_t< unsigned char > & | buffer, | ||
ibis::array_t< uint32_t > & | positions, | ||
const array_t< unsigned char > & | raw, | ||
const array_t< int64_t > & | starts | ||
) | const [protected] |
Extract entries marked 1 in mask from raw to buffer.
Fill positions to indicate the start and end positions of each raw binary object. Caller has determined that there is sufficient amount of space to perform this operations and have reserved enough space for buffer. Even though that may not be a guarantee, we proceed as if it is.
References ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::push_back(), ibis::array_t< T >::resize(), and ibis::array_t< T >::size().
int ibis::blob::extractAll | ( | const bitvector & | mask, |
ibis::array_t< unsigned char > & | buffer, | ||
ibis::array_t< uint32_t > & | positions, | ||
const char * | rawfile, | ||
const array_t< int64_t > & | starts | ||
) | const [protected] |
Retrieve all binary objects marked 1 in the mask.
The caller has reserved enough space for buffer and positions. This function simply needs to open rawfile and read the content into buffer. It also assigns values in starts to mark the boundaries of the binary objects.
References ibis::gVerbose, ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::push_back(), ibis::array_t< T >::resize(), ibis::array_t< T >::size(), and UnixOpen.
int ibis::blob::extractSome | ( | const bitvector & | mask, |
ibis::array_t< unsigned char > & | buffer, | ||
ibis::array_t< uint32_t > & | positions, | ||
const array_t< unsigned char > & | raw, | ||
const array_t< int64_t > & | starts, | ||
const uint32_t | limit | ||
) | const [protected] |
Extract entries marked 1 in mask from raw to buffer subject to a limit on the buffer size.
Fill positions to indicate the start and end positions of each raw binary object. Caller has determined that there is the amount of space to perform this operations and have reserved enough space for buffer. Even though that may not be a guarantee, we proceed as if it is.
References ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::push_back(), ibis::array_t< T >::resize(), and ibis::array_t< T >::size().
int ibis::blob::extractSome | ( | const bitvector & | mask, |
ibis::array_t< unsigned char > & | buffer, | ||
ibis::array_t< uint32_t > & | positions, | ||
const char * | rawfile, | ||
const array_t< int64_t > & | starts, | ||
const uint32_t | limit | ||
) | const [protected] |
Retrieve binary objects marked 1 in the mask subject to the specified limit on buffer size.
The caller has reserved enough space for buffer and positions. This function simply needs to open rawfile and read the content into buffer. It also assigns values in starts to mark the boundaries of the binary objects.
References ibis::gVerbose, ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::push_back(), ibis::array_t< T >::resize(), ibis::array_t< T >::size(), and UnixOpen.
int ibis::blob::extractSome | ( | const bitvector & | mask, |
ibis::array_t< unsigned char > & | buffer, | ||
ibis::array_t< uint32_t > & | positions, | ||
const char * | rawfile, | ||
const char * | spfile, | ||
const uint32_t | limit | ||
) | const [protected] |
Retrieve binary objects marked 1 in the mask subject to the specified limit on buffer size.
The caller has reserved enough space for buffer and positions. This function needs to open both rawfile and spfile. It reads starting positions in spfile to determine where to read the content from rawfile into buffer. It also assigns values in starts to mark the boundaries of the binary objects in buffer.
References ibis::gVerbose, ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::push_back(), ibis::array_t< T >::resize(), ibis::array_t< T >::size(), and UnixOpen.
virtual double ibis::blob::getActualMax | ( | ) | const [inline, virtual] |
Compute the actual maximum value by reading the data or examining the index.
It returns -DBL_MAX in case of error.
Reimplemented from ibis::column.
virtual double ibis::blob::getActualMin | ( | ) | const [inline, virtual] |
A group of functions to compute some basic statistics for the attribute values.
Compute the actual minimum value by reading the data or examining the index. It returns DBL_MAX in case of error.
Reimplemented from ibis::column.
int ibis::blob::getBlob | ( | uint32_t | ind, |
unsigned char *& | buf, | ||
uint32_t & | size | ||
) | const |
Extract a single binary object.
This function is only defined for ibis::blob, therefore the caller must explicitly case a column* to blob*. It needs to access two files, a file for start positions and another for raw binary data. Thus it has a large startup cost associated with opening the files and seeking to the right places on disk. If there is enough memory available, it will attempt to make these files available for later invocations of this function by making their content available through array_t objects. If it fails to create the desired array_t objects, it will fall back to use explicit I/O function calls.
References ibis::array_t< T >::clear(), ibis::util::copy(), FASTBIT_DIRSEP, ibis::fileManager::getFile(), ibis::fileManager::instance(), and ibis::array_t< T >::size().
Referenced by ibis::mensa::cursor::dumpIJ().
virtual int ibis::blob::getValuesArray | ( | void * | vals | ) | const [inline, virtual] |
Copy all rows of the column into an array_t object.
The incoming argument must be array_t<Type>*. This function will explicitly cast vals into one of the ten supported numerical data types.
It returns 0 to indicate success, and a negative number to indicate error.
Reimplemented from ibis::column.
virtual long ibis::blob::indexSize | ( | ) | const [inline, virtual] |
Compute the index size (in bytes).
Return a negative value if the index file does not exist.
Reimplemented from ibis::column.
virtual void ibis::blob::loadIndex | ( | const char * | iopt, |
int | ropt | ||
) | const throw () [inline, virtual] |
Load the index associated with the column.
iopt | This option is passed to ibis::index::create to be used if a new index is to be created. |
ropt | This option is passed to ibis::index::create to control the reading operations for reconstitute the index object from an index file. |
Reimplemented from ibis::column.
int ibis::blob::readBlob | ( | uint32_t | ind, |
unsigned char *& | buf, | ||
uint32_t & | size, | ||
const array_t< int64_t > & | starts, | ||
const char * | datafile | ||
) | const [protected] |
Read a single binary object.
The starting position is available in an array_t object. It only needs to explicitly open the data file to read.
References ibis::gVerbose, and UnixOpen.
int ibis::blob::readBlob | ( | uint32_t | ind, |
unsigned char *& | buf, | ||
uint32_t & | size, | ||
const char * | spfile, | ||
const char * | datafile | ||
) | const [protected] |
Read a single binary object.
This function opens both starting position file and data file explicitly.
References ibis::gVerbose, and UnixOpen.
virtual array_t<signed char>* ibis::blob::selectBytes | ( | const bitvector & | mask | ) | const [inline, virtual] |
Retrieve selected 1-byte integer values.
Note that unsigned integers are simply treated as signed integers.
Reimplemented from ibis::column.
virtual array_t<double>* ibis::blob::selectDoubles | ( | const bitvector & | mask | ) | const [inline, virtual] |
Put the selected values into an array as doubles.
Reimplemented from ibis::column.
virtual array_t<float>* ibis::blob::selectFloats | ( | const bitvector & | mask | ) | const [inline, virtual] |
Put selected values of a float column into an array.
Reimplemented from ibis::column.
virtual array_t<int32_t>* ibis::blob::selectInts | ( | const bitvector & | mask | ) | const [inline, virtual] |
Return selected rows of the column in an array_t object.
Reimplemented from ibis::column.
virtual array_t<int64_t>* ibis::blob::selectLongs | ( | const bitvector & | mask | ) | const [inline, virtual] |
Return selected rows of the column in an array_t object.
Can be called on all integral types. Note that 64-byte unsigned integers are simply treated as signed integers. This may cause the values to be interperted incorrectly. Shorter version of unsigned integers are treated correctly as positive values.
Reimplemented from ibis::column.
int ibis::blob::selectRawBytes | ( | const bitvector & | mask, |
ibis::array_t< unsigned char > & | buffer, | ||
ibis::array_t< uint32_t > & | positions | ||
) | const |
Extract the blobs from the rows marked 1 in the mask.
Upon successful completion, buffer will contain all the raw bytes packed together, positions will contain the starting positions of all blobs, and the return value will be the number of blobs retrieved. The positions are intentionally chosen to be 32-bit integers, so that it would not be possible to retrieve very large objects this way. The number of blobs retrieved may be less than the number of rows marked 1 in mask if do so will cause buffer to be more 4GB in size. On a typical machine, retrieving this function will attempt to use no more than half of the free memory available to ibis::fileManager upon entering this function, which usually would be much less than 4GB. To determine how much memory would be needed by the buffer to full retrieve all blobs marked 1, use function ibis::blob::countRawBytes.
A negative value will be returned in case of error.
References ibis::fileManager::bytesFree(), ibis::array_t< T >::capacity(), ibis::array_t< T >::clear(), ibis::bitvector::cnt(), FASTBIT_DIRSEP, ibis::fileManager::getFile(), ibis::gVerbose, ibis::fileManager::instance(), ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::reserve(), ibis::array_t< T >::size(), and ibis::bitvector::size().
virtual array_t<int16_t>* ibis::blob::selectShorts | ( | const bitvector & | mask | ) | const [inline, virtual] |
Return selected rows of the column in an array_t object.
Can convert all integers 2-byte or less in length. Note that unsigned integers are simply treated as signed integers. Shoter types of signed integers are treated correctly as positive values.
Reimplemented from ibis::column.
virtual std::vector<std::string>* ibis::blob::selectStrings | ( | const bitvector & | mask | ) | const [inline, virtual] |
Return the selected rows as strings.
This version returns a std::vector<std::string>, which provides wholly self-contained string values. It may take more memory than necessary, and the memory usage of std::string is not tracked by FastBit. The advantage is that it should work regardless of the actual data type of the column.
Reimplemented from ibis::column.
virtual array_t<unsigned char>* ibis::blob::selectUBytes | ( | const bitvector & | mask | ) | const [inline, virtual] |
Return selected rows of the column in an array_t object.
Reimplemented from ibis::column.
virtual array_t<uint32_t>* ibis::blob::selectUInts | ( | const bitvector & | mask | ) | const [inline, virtual] |
Return selected rows of the column in an array_t object.
Can be called on columns of unsigned integral types, UINT, CATEGORY, USHORT, and UBYTE.
Reimplemented from ibis::column.
virtual array_t<uint64_t>* ibis::blob::selectULongs | ( | const bitvector & | mask | ) | const [inline, virtual] |
Return selected rows of the column in an array_t object.
Can be called on all unsigned integral types.
Reimplemented from ibis::column.
virtual array_t<uint16_t>* ibis::blob::selectUShorts | ( | const bitvector & | mask | ) | const [inline, virtual] |
Return selected rows of the column in an array_t object.
Reimplemented from ibis::column.
long ibis::blob::writeData | ( | const char * | dir, |
uint32_t | nold, | ||
uint32_t | nnew, | ||
ibis::bitvector & | mask, | ||
const void * | va1, | ||
void * | va2 | ||
) | [virtual] |
Write the content of BLOBs packed into two arrays va1 and va2.
All BLOBs are packed together one after another in va1 and their starting positions are stored in va2. The last element of va2 is the total number of bytes in va1. The array va2 is expected to hold (nnew+1) 64-bit integers.
Reimplemented from ibis::column.
References ibis::bitvector::adjustSize(), ibis::util::guardBase::dismiss(), FASTBIT_DIRSEP, ibis::gVerbose, and UnixOpen.
![]() |