The basic bitmap index. More...
#include <irelic.h>
Public Member Functions | |
virtual long | append (const char *dt, const char *df, uint32_t nnew) |
Create an index based on data in df and append the result to the index in dt. | |
long | append (const ibis::relic &tail) |
Append tail to this index. | |
long | append (const array_t< uint32_t > &ind) |
Append a list of integers. | |
virtual void | binBoundaries (std::vector< double > &b) const |
Return all distinct values as the bin boundaries. | |
virtual void | binWeights (std::vector< uint32_t > &b) const |
Return the exact count for each distinct value. | |
virtual uint32_t | estimate (const ibis::qDiscreteRange &) const |
Compute the number of hits satisfying the discrete range expression. | |
virtual void | estimate (const ibis::qDiscreteRange &expr, ibis::bitvector &lower, ibis::bitvector &upper) const |
Estimate the hits for discrete ranges, i.e., those translated from 'a IN (x, y, ..)'. | |
virtual void | estimate (const ibis::relic &idx2, const ibis::deprecatedJoin &expr, const ibis::bitvector &mask, ibis::bitvector64 &lower, ibis::bitvector64 &upper) const |
Estimate the pairs for the range join operator. | |
virtual void | estimate (const ibis::relic &idx2, const ibis::deprecatedJoin &expr, const ibis::bitvector &mask, const ibis::qRange *const range1, const ibis::qRange *const range2, ibis::bitvector64 &lower, ibis::bitvector64 &upper) const |
virtual int64_t | estimate (const ibis::relic &idx2, const ibis::deprecatedJoin &expr, const ibis::bitvector &mask) const |
Estimate an upper bound for the number of pairs produced from marked records. | |
virtual void | estimate (const ibis::qContinuousRange &expr, ibis::bitvector &lower, ibis::bitvector &upper) const |
Computes an approximation of hits as a pair of lower and upper bounds. | |
virtual int64_t | estimate (const ibis::relic &idx2, const ibis::deprecatedJoin &expr, const ibis::bitvector &mask, const ibis::qRange *const range1, const ibis::qRange *const range2) const |
virtual uint32_t | estimate (const ibis::qContinuousRange &expr) const |
Return the number of hits satisfying the given continuous range expression. | |
virtual double | estimateCost (const ibis::qContinuousRange &expr) const |
Estimate the cost of resolving the continuous range expression. | |
virtual double | estimateCost (const ibis::qDiscreteRange &expr) const |
Estimate the cost of resolving the discrete range expression. | |
virtual long | evaluate (const ibis::qContinuousRange &expr, ibis::bitvector &hits) const |
Compute the hits as a bitvector . | |
virtual long | evaluate (const ibis::qDiscreteRange &expr, ibis::bitvector &hits) const |
Resolve a discrete range condition. | |
virtual long | getCumulativeDistribution (std::vector< double > &bds, std::vector< uint32_t > &cts) const |
Compute a cumulative distribition. | |
virtual long | getDistribution (std::vector< double > &bds, std::vector< uint32_t > &cts) const |
Compute a histogram. | |
virtual double | getMax () const |
The maximum value recorded in the index. | |
virtual double | getMin () const |
The minimum value recorded in the index. | |
virtual double | getSum () const |
Compute the sum of all values of the column indexed. | |
array_t< uint32_t > * | keys (const ibis::bitvector &mask) const |
Convert the bitvector mask into bin numbers. | |
virtual const char * | name () const |
Returns the name of the index, similar to the function type , but returns a string instead. | |
virtual void | print (std::ostream &out) const |
The printing function. | |
virtual int | read (ibis::fileManager::storage *st) |
Reconstruct an index from a piece of consecutive memory. | |
virtual int | read (const char *idxfile) |
Read the index contained from the speficied location. | |
relic (const ibis::column *c, uint32_t card, array_t< uint32_t > &ints) | |
Construct an index from an integer array. | |
relic (const ibis::column *c, const char *f=0) | |
Construct a basic bitmap index. | |
relic (const ibis::column *c, ibis::fileManager::storage *st, size_t start=8) | |
Reconstruct from content of fileManager::storage. | |
relic (const ibis::column *c, uint32_t popu, uint32_t ntpl=0) | |
Construct a dummy index. | |
virtual void | speedTest (std::ostream &out) const |
Time some logical operations and print out their speed. | |
virtual INDEX_TYPE | type () const |
Returns an index type identifier. | |
virtual float | undecidable (const ibis::qDiscreteRange &, ibis::bitvector &iffy) const |
virtual float | undecidable (const ibis::qContinuousRange &, ibis::bitvector &iffy) const |
This class and its derived classes should produce exact answers, therefore no undecidable rows. | |
virtual int | write (const char *dt) const |
Write the content of the index to the specified location. | |
Protected Member Functions | |
virtual void | clear () |
Clear the existing content. | |
virtual double | computeSum () const |
Compute the sum of all values of the column indexed. | |
template<typename E > | |
void | construct (const array_t< E > &arr) |
Construct an index from in-memory values. | |
void | construct (const char *f=0) |
Construct a new index in memory. | |
virtual size_t | getSerialSize () const throw () |
Estiamte the size of the index in a file. | |
void | locate (const ibis::qContinuousRange &expr, uint32_t &hit0, uint32_t &hit1) const |
Locate the bitmaps covered by the range expression. | |
uint32_t | locate (const double &val) const |
Find the smallest i such that vals[i] > val. | |
int | write32 (int fdes) const |
Write the content to a file already opened. | |
int | write64 (int fdes) const |
Write the content to a file already opened. | |
Protected Attributes | |
array_t< double > | vals |
The basic bitmap index.
It generates one bitmap for each distinct value.
ibis::relic::relic | ( | const ibis::column * | c, |
const char * | f = 0 |
||
) |
Construct a basic bitmap index.
It attempts to read an index from the specified location. If that fails it creates one from current data.
References ibis::index::bits, ibis::CATEGORY, clear(), ibis::index::col, construct(), ibis::gVerbose, ibis::column::name(), ibis::part::name(), ibis::index::nrows, ibis::part::nRows(), print(), read(), relic(), ibis::TEXT, and ibis::column::type().
Referenced by relic().
ibis::relic::relic | ( | const ibis::column * | c, |
uint32_t | popu, | ||
uint32_t | ntpl = 0 |
||
) |
Construct a dummy index.
All entries have the same value popu
. This is used to generate index for meta tags from STAR data.
References ibis::index::bits, clear(), ibis::index::col, ibis::gVerbose, ibis::column::name(), ibis::part::name(), ibis::index::nrows, ibis::part::nRows(), print(), relic(), and ibis::array_t< T >::resize().
ibis::relic::relic | ( | const ibis::column * | c, |
ibis::fileManager::storage * | st, | ||
size_t | start = 8 |
||
) |
Reconstruct from content of fileManager::storage.
The content of the file (following the 8-byte header) is
- nrows(uint32_t) -- number of bits in each bit sequences - nobs (uint32_t) -- number of bit sequences - card (uint32_t) -- the number of distinct values, i.e., cardinality - (padding to ensure the next data element is on 8-byte boundary) - values (double[card]) -- the values as doubles - offset ([nobs+1]) -- the starting positions of the bit sequences (as bit vectors) - bitvectors -- the bitvectors one after another
References ibis::fileManager::storage::begin(), ibis::index::bits, clear(), ibis::index::col, ibis::gVerbose, ibis::index::initBitmaps(), ibis::index::initOffsets(), ibis::column::name(), ibis::part::name(), ibis::index::nrows, print(), and relic().
long ibis::relic::append | ( | const char * | dt, |
const char * | df, | ||
uint32_t | nnew | ||
) | [virtual] |
Create an index based on data in df and append the result to the index in dt.
Reimplemented from ibis::index.
Reimplemented in ibis::slice, ibis::fade, ibis::sbiad, ibis::sapid, ibis::fuzz, ibis::bylt, and ibis::zona.
References ibis::fileManager::storage::begin(), ibis::fileManager::flushFile(), ibis::fileManager::getFile(), ibis::util::getFileSize(), ibis::gVerbose, ibis::fileManager::instance(), ibis::index::RELIC, and ibis::TEXT.
Referenced by ibis::zona::append(), ibis::fuzz::append(), ibis::bylt::append(), ibis::category::append(), and ibis::category::fillIndex().
long ibis::relic::append | ( | const ibis::relic & | tail | ) |
Append tail to this index.
This function first convert *this into a map then back into the linear data structure.
References ibis::index::activate(), ibis::index::bits, ibis::index::col, ibis::bitvector::copy(), ibis::gVerbose, ibis::index::nrows, ibis::bitvector::set(), and ibis::array_t< T >::size().
long ibis::relic::append | ( | const array_t< uint32_t > & | ind | ) |
Append a list of integers.
The integers are treated as bin numbers. This function is primarily used by ibis::category::append().
References ibis::array_t< T >::push_back(), and ibis::array_t< T >::size().
void ibis::relic::clear | ( | ) | [protected, virtual] |
Clear the existing content.
Free the objectes pointed to by the pointers.
Reimplemented from ibis::index.
Reimplemented in ibis::slice, ibis::fade, ibis::fuzz, ibis::bylt, and ibis::zona.
References ibis::index::clear().
Referenced by ibis::zona::clear(), ibis::fuzz::clear(), ibis::bylt::clear(), ibis::slice::clear(), ibis::fade::clear(), and relic().
void ibis::relic::construct | ( | const array_t< E > & | arr | ) | [protected] |
Construct an index from in-memory values.
The type E
is intended to be element types supported in column.h.
References ibis::gVerbose, ibis::fileManager::instance(), ibis::fileManager::signalMemoryAvailable(), and ibis::array_t< T >::size().
Referenced by relic().
void ibis::relic::construct | ( | const char * | f = 0 | ) | [protected] |
Construct a new index in memory.
Generate a new index.
The basic bitmap index contains one bitmap per distinct value. The string f can be the name of the index file (the corresponding data file is assumed to be without the '.idx' suffix), the name of the data file, or the directory contain the data file
References ibis::gVerbose, ibis::fileManager::instance(), and ibis::fileManager::signalMemoryAvailable().
void ibis::relic::estimate | ( | const ibis::relic & | idx2, |
const ibis::deprecatedJoin & | expr, | ||
const ibis::bitvector & | mask, | ||
ibis::bitvector64 & | lower, | ||
ibis::bitvector64 & | upper | ||
) | const [virtual] |
Estimate the pairs for the range join operator.
Only records that are masked are evaluated.
References ibis::bitvector64::bytes(), ibis::bitvector64::clear(), ibis::bitvector::cnt(), ibis::index::col, ibis::horometer::CPUTime(), ibis::math::term::eval(), ibis::gVerbose, ibis::horometer::realTime(), ibis::horometer::start(), and ibis::horometer::stop().
void ibis::relic::estimate | ( | const ibis::relic & | idx2, |
const ibis::deprecatedJoin & | expr, | ||
const ibis::bitvector & | mask, | ||
const ibis::qRange *const | range1, | ||
const ibis::qRange *const | range2, | ||
ibis::bitvector64 & | lower, | ||
ibis::bitvector64 & | upper | ||
) | const [virtual] |
range1
is for column 1 in the join expression and range2
is for column 2 in the join expression. No name matching is performed. References ibis::bitvector64::bytes(), ibis::bitvector64::clear(), ibis::bitvector::cnt(), ibis::index::col, ibis::horometer::CPUTime(), ibis::math::term::eval(), ibis::gVerbose, ibis::horometer::realTime(), ibis::horometer::start(), and ibis::horometer::stop().
int64_t ibis::relic::estimate | ( | const ibis::relic & | idx2, |
const ibis::deprecatedJoin & | expr, | ||
const ibis::bitvector & | mask | ||
) | const [virtual] |
Estimate an upper bound for the number of pairs produced from marked records.
References ibis::bitvector::cnt(), ibis::index::col, ibis::horometer::CPUTime(), ibis::math::term::eval(), ibis::gVerbose, ibis::horometer::realTime(), ibis::horometer::start(), and ibis::horometer::stop().
uint32_t ibis::relic::estimate | ( | const ibis::qContinuousRange & | expr | ) | const [virtual] |
Return the number of hits satisfying the given continuous range expression.
Reimplemented from ibis::index.
Reimplemented in ibis::slice, ibis::fade, ibis::fuzz, ibis::bylt, and ibis::zona.
virtual void ibis::relic::estimate | ( | const ibis::qContinuousRange & | , |
ibis::bitvector & | lower, | ||
ibis::bitvector & | upper | ||
) | const [inline, virtual] |
Computes an approximation of hits as a pair of lower and upper bounds.
expr | the query expression to be evaluated. |
lower | a bitvector marking a subset of the hits. All rows marked with one (1) are definitely hits. |
upper | a bitvector marking a superset of the hits. All hits are marked with one, but some of the rows marked one may not be hits. If the variable upper is empty, the variable lower is assumed to contain the exact answer. |
Reimplemented from ibis::index.
Reimplemented in ibis::slice.
References ibis::bitvector::clear(), and evaluate().
virtual void ibis::relic::estimate | ( | const ibis::qDiscreteRange & | expr, |
ibis::bitvector & | lower, | ||
ibis::bitvector & | upper | ||
) | const [inline, virtual] |
Estimate the hits for discrete ranges, i.e., those translated from 'a IN (x, y, ..)'.
A trivial implementation to indicate the index can not determine any row.
Reimplemented from ibis::index.
References ibis::bitvector::clear(), and evaluate().
double ibis::relic::estimateCost | ( | const ibis::qDiscreteRange & | expr | ) | const [virtual] |
Estimate the cost of resolving the discrete range expression.
The answer is in the number of bytes needed from this index.
Reimplemented from ibis::index.
Reimplemented in ibis::slice.
References ibis::qDiscreteRange::getValues(), and ibis::array_t< T >::size().
double ibis::relic::estimateCost | ( | const ibis::qContinuousRange & | expr | ) | const [virtual] |
Estimate the cost of resolving the continuous range expression.
The answer is in the number of bytes needed from this index.
Reimplemented from ibis::index.
Reimplemented in ibis::slice, ibis::fade, ibis::fuzz, ibis::bylt, and ibis::zona.
long ibis::relic::evaluate | ( | const ibis::qDiscreteRange & | expr, |
ibis::bitvector & | answer | ||
) | const [virtual] |
Resolve a discrete range condition.
The answer is a bitvector marking the rows satisfying the range conditions.
Reimplemented from ibis::index.
Reimplemented in ibis::slice, ibis::fade, ibis::sbiad, and ibis::sapid.
References ibis::bitvector::cnt(), ibis::qDiscreteRange::getValues(), ibis::bitvector::set(), and ibis::array_t< T >::size().
void ibis::relic::locate | ( | const ibis::qContinuousRange & | expr, |
uint32_t & | hit0, | ||
uint32_t & | hit1 | ||
) | const [protected] |
Locate the bitmaps covered by the range expression.
Bitmaps hit0 (inclusive) through hit1 (execlusive) correspond to values satisfy the range expression expr.
References ibis::gVerbose, ibis::qContinuousRange::leftBound(), and ibis::qContinuousRange::rightBound().
virtual const char* ibis::relic::name | ( | ) | const [inline, virtual] |
Returns the name of the index, similar to the function type
, but returns a string instead.
Implements ibis::index.
Reimplemented in ibis::slice, ibis::fade, ibis::sbiad, ibis::sapid, ibis::fuzz, ibis::bylt, and ibis::zona.
virtual float ibis::relic::undecidable | ( | const ibis::qContinuousRange & | , |
ibis::bitvector & | iffy | ||
) | const [inline, virtual] |
This class and its derived classes should produce exact answers, therefore no undecidable rows.
Reimplemented from ibis::index.
References ibis::bitvector::clear().
int ibis::relic::write | ( | const char * | dt | ) | const [virtual] |
Write the content of the index to the specified location.
The actual index file name is determined by the function indexFileName.
Implements ibis::index.
Reimplemented in ibis::slice, ibis::fade, ibis::sbiad, ibis::sapid, ibis::fuzz, ibis::bylt, and ibis::zona.
References ibis::fileManager::flushFile(), ibis::gVerbose, ibis::fileManager::instance(), ibis::index::RELIC, and UnixOpen.
Referenced by ibis::category::append(), ibis::category::category(), and ibis::category::fillIndex().
![]() |