A roster is a list of values in ascending order plus their original positions. More...
#include <iroster.h>
Public Member Functions | |
const array_t< uint32_t > & | array () const |
const ibis::column * | getColumn () const |
template<typename T > | |
int | locate (const std::vector< T > &vals, ibis::bitvector &positions) const |
Locate the values and set their positions in the bitvector. | |
template<typename T > | |
int | locate (const std::vector< T > &vals, std::vector< uint32_t > &positions) const |
Locate the values and set their positions in the bitvector. | |
template<> | |
int | locate (const ibis::array_t< double > &vals, ibis::bitvector &positions) const |
This explicit specialization of the locate function does not require column type to match the incoming data type. | |
template<typename T > | |
int | locate (const ibis::array_t< T > &vals, std::vector< uint32_t > &positions) const |
Locate the values and set their positions in the bitvector. | |
template<> | |
int | locate (const std::vector< double > &vals, ibis::bitvector &positions) const |
This explicit specialization of the locate function does not require column type to match the incoming data type. | |
template<typename T > | |
int | locate (const ibis::array_t< T > &vals, ibis::bitvector &positions) const |
Locate the values and set their positions in the bitvector. | |
const char * | name () const |
uint32_t | operator[] (uint32_t i) const |
Return the row number of the ith smallest value. | |
void | print (std::ostream &out) const |
Output a minimal information about the roster list. | |
int | read (ibis::fileManager::storage *st) |
int | read (const char *idxfile) |
roster (const ibis::column *c, const char *dir=0) | |
Construct a roster list. | |
roster (const ibis::column *c, ibis::fileManager::storage *st, uint32_t offset=8) | |
Reconstruct from content of a fileManager::storage . | |
uint32_t | size () const |
int | write (const char *dt) const |
Write two files, .ind for indices and .srt to the sorted values. | |
int | writeSorted (const char *dt) const |
Write the sorted version of the attribute values to a .srt file. | |
Static Public Member Functions | |
template<class T > | |
static long | mergeBlock2 (const char *dsrc, const char *dout, const uint32_t segment, array_t< T > &buf1, array_t< T > &buf2, array_t< T > &buf3) |
A two-way merge algorithm. | |
Protected Member Functions | |
template<typename T > | |
int | icSearch (const std::vector< T > &vals, std::vector< uint32_t > &pos) const |
In-core searching function. | |
template<typename T > | |
int | icSearch (const ibis::array_t< T > &vals, std::vector< uint32_t > &pos) const |
In-core searching function. | |
uint32_t | locate (const double &val) const |
template<typename inT , typename myT > | |
int | locate2 (const std::vector< inT > &, std::vector< uint32_t > &) const |
Cast the incoming values into the type of the column (myT) and then locate the positions of the records that match one of the values. | |
template<typename inT , typename myT > | |
int | locate2 (const ibis::array_t< inT > &, std::vector< uint32_t > &) const |
Cast the incoming values into the type of the column (myT) and then locate the positions of the records that match one of the values. | |
template<typename T > | |
int | oocSearch (const ibis::array_t< T > &vals, std::vector< uint32_t > &pos) const |
Out-of-core search function. | |
template<typename T > | |
int | oocSearch (const std::vector< T > &vals, std::vector< uint32_t > &pos) const |
Out-of-core search function. |
A roster is a list of values in ascending order plus their original positions.
It can use an external sort if the data and indices can not fit into memory. The indices will be written to a file with extension .ind and the sorted values in a file with extension .srt. If the indices can not be loaded into memory as a whole, the .ind file will be opened for future read operations.
ibis::roster::roster | ( | const ibis::column * | c, |
const char * | dir = 0 |
||
) |
Construct a roster list.
It attempts to read a roster list from the specified directory. If a roster list can not be read and dir is not nil, this function will attempt to sort the existing data records to build a roster list.
References ibis::fileManager::bytesFree(), ibis::part::currentDataDir(), ibis::column::elementSize(), ibis::gVerbose, ibis::part::nRows(), print(), roster(), and ibis::array_t< T >::size().
Referenced by roster().
ibis::roster::roster | ( | const ibis::column * | c, |
ibis::fileManager::storage * | st, | ||
uint32_t | offset = 8 |
||
) |
Reconstruct from content of a fileManager::storage
.
The content of the file (following the 8-byte header) is the index array ind
.
References ibis::gVerbose, print(), and roster().
int ibis::roster::icSearch | ( | const std::vector< T > & | vals, |
std::vector< uint32_t > & | pos | ||
) | const [protected] |
In-core searching function.
Attempts to read .ind and .srt into memory. Returns a negative value if it fails to read the necessary data files into memory.
References ibis::fileManager::getFile(), ibis::gVerbose, and ibis::fileManager::instance().
int ibis::roster::icSearch | ( | const ibis::array_t< T > & | vals, |
std::vector< uint32_t > & | pos | ||
) | const [protected] |
In-core searching function.
Attempts to read .ind and .srt into memory. Returns a negative value if it fails to read the necessary data files into memory.
References ibis::fileManager::getFile(), ibis::gVerbose, ibis::fileManager::instance(), and ibis::array_t< T >::size().
int ibis::roster::locate | ( | const ibis::array_t< T > & | vals, |
ibis::bitvector & | positions | ||
) | const |
Locate the values and set their positions in the bitvector.
Return the positions of the matching entries as a bitvector. Return a negative value for error, zero or a positive value for in case of success. The input values are assumed to be sorted in ascending order.
References ibis::bitvector::adjustSize(), ibis::bitvector::clear(), ibis::bitvector::decompress(), ibis::gVerbose, ibis::bitvector::set(), ibis::bitvector::setBit(), and ibis::array_t< T >::size().
Referenced by ibis::column::evaluateRange(), and ibis::keywords::readTermDocFile().
int ibis::roster::locate | ( | const ibis::array_t< double > & | vals, |
ibis::bitvector & | positions | ||
) | const |
This explicit specialization of the locate function does not require column type to match the incoming data type.
Instead, it casts the incoming data type explicitly before performing any comparisons.
References ibis::bitvector::adjustSize(), ibis::BYTE, ibis::bitvector::decompress(), ibis::DOUBLE, ibis::FLOAT, ibis::gVerbose, ibis::INT, ibis::LONG, ibis::bitvector::set(), ibis::bitvector::setBit(), ibis::SHORT, ibis::array_t< T >::size(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
int ibis::roster::locate | ( | const ibis::array_t< T > & | vals, |
std::vector< uint32_t > & | positions | ||
) | const |
Locate the values and set their positions in the bitvector.
Error code:
Return the positions as a list of 32-bit integers.
vals
.roster
object.References ibis::gVerbose, and ibis::array_t< T >::size().
int ibis::roster::locate | ( | const std::vector< T > & | vals, |
ibis::bitvector & | positions | ||
) | const |
Locate the values and set their positions in the bitvector.
Return the positions of the matching entries as a bitvector. Return a negative value for error, zero or a positive value for success. The input values are assumed to be sorted in ascending order.
References ibis::bitvector::adjustSize(), ibis::bitvector::clear(), ibis::bitvector::decompress(), ibis::gVerbose, ibis::bitvector::set(), and ibis::bitvector::setBit().
int ibis::roster::locate | ( | const std::vector< double > & | vals, |
ibis::bitvector & | positions | ||
) | const |
This explicit specialization of the locate function does not require column type to match the incoming data type.
Instead, it casts the incoming data type explicitly before performing any comparisons.
References ibis::bitvector::adjustSize(), ibis::BYTE, ibis::bitvector::decompress(), ibis::DOUBLE, ibis::FLOAT, ibis::gVerbose, ibis::INT, ibis::LONG, ibis::bitvector::set(), ibis::bitvector::setBit(), ibis::SHORT, ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
int ibis::roster::locate | ( | const std::vector< T > & | vals, |
std::vector< uint32_t > & | positions | ||
) | const |
Locate the values and set their positions in the bitvector.
Error code:
Return the positions as a list of 32-bit integers.
vals
.roster
object.References ibis::gVerbose.
int ibis::roster::locate2 | ( | const std::vector< inT > & | vals, |
std::vector< uint32_t > & | positions | ||
) | const [protected] |
Cast the incoming values into the type of the column (myT) and then locate the positions of the records that match one of the values.
int ibis::roster::locate2 | ( | const ibis::array_t< inT > & | vals, |
std::vector< uint32_t > & | positions | ||
) | const [protected] |
Cast the incoming values into the type of the column (myT) and then locate the positions of the records that match one of the values.
References ibis::array_t< T >::size().
template long ibis::roster::mergeBlock2 | ( | const char * | dsrc, |
const char * | dout, | ||
const uint32_t | segment, | ||
array_t< T > & | buf1, | ||
array_t< T > & | buf2, | ||
array_t< T > & | buf3 | ||
) | [static] |
A two-way merge algorithm.
Uses std::less<T> for comparisons. Assumes the sorted segment size is segment
elements of type T.
References ibis::array_t< T >::clear(), ibis::gVerbose, ibis::array_t< T >::push_back(), ibis::array_t< T >::read(), ibis::horometer::realTime(), ibis::array_t< T >::resize(), ibis::array_t< T >::size(), ibis::horometer::start(), ibis::horometer::stop(), and UnixOpen.
int ibis::roster::oocSearch | ( | const std::vector< T > & | vals, |
std::vector< uint32_t > & | pos | ||
) | const [protected] |
Out-of-core search function.
It requires at least .ind file to be in memory. Need to implement a version that can read both .ind and .srt files during search.
References ibis::fileManager::buffer< T >::address(), ibis::fileManager::buffer< T >::size(), and UnixOpen.
int ibis::roster::oocSearch | ( | const ibis::array_t< T > & | vals, |
std::vector< uint32_t > & | pos | ||
) | const [protected] |
Out-of-core search function.
It requires at least .ind file to be in memory. Need to implement a version that can read both .ind and .srt files during search.
References ibis::fileManager::buffer< T >::address(), ibis::fileManager::buffer< T >::size(), ibis::array_t< T >::size(), and UnixOpen.
void ibis::roster::print | ( | std::ostream & | out | ) | const |
Output a minimal information about the roster list.
Print a terse message about the roster.
If the roster list is not initialized correctly, it prints a warning message.
Referenced by roster().
int ibis::roster::write | ( | const char * | df | ) | const |
Write two files, .ind for indices and .srt to the sorted values.
Write both .ind and .srt file.
The argument can be the name of the ouput directory, then column name will be added. If the last segment of the name (before the last directory separator) matches the file name of the column, it is assumed to be the data file name and only the extension .ind and .srt will be added.
References ibis::fileManager::flushFile(), and ibis::fileManager::instance().
int ibis::roster::writeSorted | ( | const char * | df | ) | const |
Write the sorted version of the attribute values to a .srt file.
Write the sorted values into .srt file.
Attempt to read the whole column into memory first. If it fails to do so, it will read one value at a time from the original data file.
References ibis::BYTE, ibis::DOUBLE, ibis::FLOAT, ibis::fileManager::getFile(), ibis::util::getFileSize(), ibis::gVerbose, ibis::fileManager::instance(), ibis::INT, ibis::LONG, ibis::SHORT, ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
![]() |