A data structure for representing user queries. More...
#include <query.h>
Classes | |
class | readLock |
A read lock on a query object. More... | |
class | result |
The class ibis::query::result allows user to retrieve query result one row at a time. More... | |
class | weight |
A class to be used for reordering the terms in the where clauses. More... | |
class | writeLock |
A write lock on a query object. More... | |
Public Types | |
enum | QUERY_STATE { UNINITIALIZED, SET_COMPONENTS, SET_RIDS, SET_PREDICATE, SPECIFIED, QUICK_ESTIMATE, FULL_EVALUATE, BUNDLES_TRUNCATED, HITS_TRUNCATED } |
Public Member Functions | |
void | clear () |
Releases the resources held by the query object. | |
void | clearErrorMessage () const |
Reset the last error message to blank. | |
const selectClause & | components () const |
Return a list of names specified in the select clause. | |
void | contractQuery () |
Contracts where clause to preferred bounds. | |
long | countHits () const |
Count the number of hits. | |
const char * | dir () const |
The query token. | |
int | estimate () |
Function to perform estimation. | |
int | evaluate (const bool evalSelect=false) |
Computes the exact hits. | |
void | expandQuery () |
Expands where clause to preferred bounds. | |
long | getExpandedHits (ibis::bitvector &) const |
Get a bitvector containing all rows satisfying the query condition. | |
long | getHitRows (std::vector< uint32_t > &rids) const |
Extract the positions of the bits that are 1s in the solution. | |
const ibis::bitvector * | getHitVector () const |
Return the pointer to the internal hit vector. | |
const char * | getLastError () const |
Return the last error message recorded internally. | |
long | getMaxNumHits () const |
Return the number of records in the upper bound. | |
long | getMinNumHits () const |
Return the number of records in the lower bound. | |
long | getNumHits () const |
Compute the number of records in the exact solution. | |
virtual const char * | getSelectClause () const |
Return the select clause string. | |
QUERY_STATE | getState () const |
Return the current state of query. | |
const RIDSet * | getUserRIDs () const |
Return a const pointer to the copy of the user supplied RID set. | |
virtual const char * | getWhereClause () const |
Return the where clause string. | |
const char * | id () const |
Functions about the identity of the query. | |
long | limit (const char *names, int direction, uint32_t keep, bool updateHits=true) |
Truncate the results to provide the top-K rows. | |
void | logMessage (const char *event, const char *fmt,...) const |
Used to print information about the progress or state of query processing. | |
int | orderby (const char *names, int direction) const |
Re-order the results according to the new "ORDER BY" specification. | |
const part * | partition () const |
Return the pointer to the data partition used to process the query. | |
void | printSelected (std::ostream &out) const |
Print the values of the selected columns to the specified output stream. | |
void | printSelectedWithRID (std::ostream &out) const |
Print the values of the columns in the select clause without functions. | |
query (const char *dir, const ibis::partList &tl) | |
Constructor. | |
query (const char *uid=0, const part *et=0, const char *pref=0) | |
Constructor. Generates a new query on the given data partition et. | |
RIDSet * | readRIDs () const |
Read RIDs from the file named "-rids". | |
std::string | removeComplexConditions () |
Separate out the sub-expressions that are not simple. | |
long | sequentialScan (ibis::bitvector &bv) const |
Return a (new) bitvector that contains the result of directly scan the raw data to determine what records satisfy the user specified conditions. | |
int | setPartition (const ibis::part *tbl) |
Resets the data partition used to evaluate the query conditions to the partition specified in the argument. | |
int | setRIDs (const RIDSet &set) |
Specify a list of Row IDs for the query object. | |
virtual int | setSelectClause (const char *str) |
Specifies the select clause for the query. | |
int | setTable (const ibis::part *tbl) |
This is deprecated, will be removed soon. | |
int | setWhereClause (const ibis::qExpr *qexp) |
Specify the where clause through a qExpr object. | |
int | setWhereClause (const std::vector< const char * > &names, const std::vector< double > &lbounds, const std::vector< double > &rbounds) |
Specify the where clause as a set of conjunctive ranges. | |
int | setWhereClause (const char *str) |
Specify the where clause in string form. | |
time_t | timestamp () const |
The time stamp on the data used to process the query. | |
const char * | userName () const |
User started the query. | |
void | writeRIDs (const RIDSet *rids) const |
Write the list of RIDs to a file named "-rids". | |
array_t< signed char > * | getQualifiedBytes (const char *column_name) |
The functions getQualifiedTTT return the values of selected columns in the records that satisfies the specified conditions. | |
array_t< unsigned char > * | getQualifiedUBytes (const char *column_name) |
Retrieve the values of column_name as 8-bit unsigned integers. | |
array_t< int16_t > * | getQualifiedShorts (const char *column_name) |
Retrieve the values of column_name as 16-bit integers. | |
array_t< uint16_t > * | getQualifiedUShorts (const char *column_name) |
Retrieve the values of column_name as 16-bit unsigned integers. | |
array_t< int32_t > * | getQualifiedInts (const char *column_name) |
Retrieve integer values from records satisfying the query conditions. | |
array_t< uint32_t > * | getQualifiedUInts (const char *column_name) |
Retrieve unsigned integer values from records satisfying the query conditions. | |
array_t< int64_t > * | getQualifiedLongs (const char *column_name) |
Retrieve values of column_name as 64-bit integers. | |
array_t< uint64_t > * | getQualifiedULongs (const char *column_name) |
Retrieve values of column_name as 64-bit unsigned integers. | |
array_t< float > * | getQualifiedFloats (const char *column_name) |
Retrieve floating-point values from records satisfying the query conditions. | |
array_t< double > * | getQualifiedDoubles (const char *column_name) |
Retrieve double precision floating-point values from records satisfying the query conditions. | |
std::vector< std::string > * | getQualifiedStrings (const char *column_name) |
Retrieve string values from records satisfying the query conditions. | |
RIDSet * | getRIDs () const |
Return the list of row IDs of the hits. | |
RIDSet * | getRIDs (const ibis::bitvector &mask) const |
Return a list of row IDs that match the mask. | |
const RIDSet * | getRIDsInBundle (const uint32_t bid) const |
Return the list of row IDs of the hits within the specified bundle. | |
template<> | |
int64_t | countEqualPairs (const array_t< int32_t > &val1, const array_t< uint32_t > &val2) const |
template<> | |
int64_t | countEqualPairs (const array_t< uint32_t > &val1, const array_t< int32_t > &val2) const |
template<> | |
int64_t | countDeltaPairs (const array_t< uint32_t > &val1, const array_t< int32_t > &val2, const uint32_t &delta) const |
template<> | |
int64_t | countDeltaPairs (const array_t< int32_t > &val1, const array_t< uint32_t > &val2, const int32_t &delta) const |
template<> | |
int64_t | recordEqualPairs (const array_t< uint32_t > &val1, const array_t< int32_t > &val2, const array_t< uint32_t > &ind1, const array_t< uint32_t > &ind2, const char *filename) const |
template<> | |
int64_t | recordEqualPairs (const array_t< int32_t > &val1, const array_t< uint32_t > &val2, const array_t< uint32_t > &ind1, const array_t< uint32_t > &ind2, const char *filename) const |
template<> | |
int64_t | recordDeltaPairs (const array_t< uint32_t > &val1, const array_t< int32_t > &val2, const array_t< uint32_t > &ind1, const array_t< uint32_t > &ind2, const uint32_t &delta, const char *filename) const |
template<> | |
int64_t | recordDeltaPairs (const array_t< int32_t > &val1, const array_t< uint32_t > &val2, const array_t< uint32_t > &ind1, const array_t< uint32_t > &ind2, const int32_t &delta, const char *filename) const |
Static Public Member Functions | |
static bool | isValidToken (const char *tok) |
Is the given string a valid query token. | |
static void | keepQueryRecords () |
Tell the destructor to leave stored information on disk. | |
static void | removeQueryRecords () |
Tell the destructor to remove all stored information about queries. | |
static unsigned | tokenLength () |
Length of the query token. | |
Protected Member Functions | |
int | computeHits () |
template<typename T1 , typename T2 > | |
int64_t | countDeltaPairs (const array_t< T1 > &val1, const array_t< T2 > &val2, const T1 &delta) const |
Assume the two input arrays are sorted in ascending order, count the number of elements that are with delta of each other. | |
template<typename T1 , typename T2 > | |
int64_t | countEqualPairs (const array_t< T1 > &val1, const array_t< T2 > &val2) const |
Assume the two input arrays are sorted in ascending order, count the number of elements that match. | |
uint32_t | countPages (unsigned wordsize) const |
Count the number of pages accessed to retrieve every value in the hit vector. | |
int | doContract (ibis::qExpr *exp0) const |
Contract range conditions to remove the need of candidate check. | |
void | doEstimate (const qExpr *term, ibis::bitvector &low, ibis::bitvector &high) const |
Use the indexes only. | |
int | doEvaluate (const qExpr *term, ibis::bitvector &hits) const |
Evaluate one term of a query conditions. | |
int | doEvaluate (const qExpr *term, const ibis::bitvector &mask, ibis::bitvector &hits) const |
Evaluate one term of a query conditions. | |
int | doExpand (ibis::qExpr *exp0) const |
Expand range conditions to remove the need of candidate check. | |
int | doScan (const qExpr *term, ibis::bitvector &hits) const |
Read the data partition to resolve the query expression. | |
int | doScan (const qExpr *term, const ibis::bitvector &mask, ibis::bitvector &hits) const |
Read the data partition to resolve the query conditions. | |
void | gainReadAccess (const char *mesg) const |
void | gainWriteAccess (const char *mesg) const |
void | getBounds () |
bool | hasBundles () const |
void | logError (const char *event, const char *fmt,...) const |
void | logWarning (const char *event, const char *fmt,...) const |
int64_t | mergePairs (const char *pairfile) const |
void | orderPairs (const char *pairfile) const |
Sort the content of the file as ibis::rid_t. | |
void | printRIDs (const RIDSet &ridset) const |
Export the Row IDs of the hits to log file. | |
int64_t | processJoin () |
Process the join operation and return the number of pairs. | |
void | readHits () |
Read the results of the query. | |
void | readQuery (const ibis::partList &tl) |
Read the status information from disk. | |
template<typename T1 , typename T2 > | |
int64_t | recordDeltaPairs (const array_t< T1 > &val1, const array_t< T2 > &val2, const array_t< uint32_t > &ind1, const array_t< uint32_t > &ind2, const T1 &delta, const char *pairfile) const |
template<typename T1 , typename T2 > | |
int64_t | recordEqualPairs (const array_t< T1 > &val1, const array_t< T2 > &val2, const array_t< uint32_t > &ind1, const array_t< uint32_t > &ind2, const char *pairfile) const |
void | releaseAccess (const char *mesg) const |
void | removeFiles () |
Remove the files written by this object. | |
void | reorderExpr () |
int64_t | sortEquiJoin (const ibis::deprecatedJoin &cmp, const ibis::bitvector &mask, const char *pairfile) const |
Perform equi-join by sorting the selected values. | |
int64_t | sortEquiJoin (const ibis::deprecatedJoin &cmp, const ibis::bitvector &mask) const |
Performing an equi-join by sorting the selected values first. | |
int64_t | sortJoin (const std::vector< const ibis::deprecatedJoin * > &terms, const ibis::bitvector &mask) const |
int64_t | sortJoin (const ibis::deprecatedJoin &cmp, const ibis::bitvector &mask) const |
int64_t | sortRangeJoin (const ibis::deprecatedJoin &cmp, const ibis::bitvector &mask, const char *pairfile) const |
Performing range join by sorting the selected values. | |
int64_t | sortRangeJoin (const ibis::deprecatedJoin &cmp, const ibis::bitvector &mask) const |
Performing a range join by sorting the selected values. | |
void | writeHits () const |
Write the results of the query. | |
virtual void | writeQuery () |
Write the basic information about the query to disk. | |
Protected Attributes | |
selectClause | comps |
Select clause. | |
whereClause | conds |
Query conditions. | |
ibis::part::readLock * | dslock |
A read lock on the mypart. | |
ibis::bitvector * | hits |
Solution in bitvector form (or lower bound) | |
char | lastError [MAX_LINE+PATH_MAX] |
The warning/error message. | |
QUERY_STATE | state |
Status of the query. | |
ibis::bitvector * | sup |
Estimated upper bound. | |
char * | user |
Name of the user who specified the query. | |
Friends | |
class | readLock |
class | writeLock |
A data structure for representing user queries.
This is the primary entry for user to take advantage of bitmap indexing facilities. A query is a very limited version of the SQL SELECT statement. It is only defined on one data partition and it takes a where clause and a select clause. The where clause is mandatory!
It contains a list of range conditions joined together with logical operators, such as "temperature > 700 and 100 <= presessure < 350". Records whose attribute values satisfy the conditions defined in the where clause is considered hits. A query may retrieve values of variables/columns specified in the select clause. A select clause is optional. If specified, it contains a list of column names. These attributes must not be NULL in order for a record to be a hit. The select clause may also contain column names appearing as the argument to one of the four aggregation functions: avg
, var
, max
, min
and sum
. For example, "temperature, pressure,
average(ho2_concentration)" may be a select statement for a Chemistry application. Note that If one needs to include arithmetic expressions in the select clause, use the function ibis::table::select instead of using this class.
The hits can be computed in two ways by using functions estimate
or evaluate
. The function estimate
can take advantage of the indices to give two approximate solutions, one as an upper bound and the other as a lower bound. The bitmap indices will be automatically built according to the specification if they are not present. The accuracy of the bounds depend on the nature of the indices available. If no index can be constructed, the lower bound would be empty and the upper bound would include every record. When the function evaluate
is called, the exact solution is computed no matter whether the function estimate
has been called or not. The solution produced is recorded as a bit vector. The user may use ibis::bitvector::indexSet to extract the record numbers of the hits or use one of the functions getQualifiedInts
, getQualifiedFloats
, and getQualifiedDoubles
to retrieve the values of the selected attributes. Additionally, one may call either printSelected
or printSelectedWithRID
to print the selected values to the specified I/O stream.
ibis::query::query | ( | const char * | dir, |
const ibis::partList & | tl | ||
) |
Constructor.
Construct a query from the content stored in the named directory.
Reconstructs query from stored information in the named directory dir
. This is only used for recovering from program crashes.
It is used to recover a query from crash, not intended for user to manually construct a query in a directory.
References comps, conds, dir(), ibis::selectClause::empty(), ibis::whereClause::getExpr(), ibis::gVerbose, hits, readHits(), readQuery(), state, ibis::util::strnewdup(), and sup.
ibis::query::query | ( | const char * | uid = 0 , |
const part * | et = 0 , |
||
const char * | pref = 0 |
||
) |
Constructor. Generates a new query on the given data partition et.
Construct a query object from scratch.
If recovery is desired or the query objects has its own special prefix, a cache directory is created to store some information about the query such as the query conditions and the resulting solutions. The stored information enables it to be reconstructed in case of crash.
References ibis::gParameters(), ibis::gVerbose, lastError, ibis::util::strnewdup(), and user.
void ibis::query::clear | ( | ) |
Releases the resources held by the query object.
It re-initializes the select clause and the where clause to blank.
References ibis::fileManager::flushDir(), ibis::gParameters(), ibis::gVerbose, ibis::fileManager::instance(), and ibis::util::removeDir().
void ibis::query::contractQuery | ( | ) |
Contracts where clause to preferred bounds.
Similar to function exandQuery, but makes the bounds of the range conditions narrower rather than wider.
References comps, conds, doContract(), dslock, ibis::selectClause::empty(), ibis::whereClause::empty(), ibis::whereClause::getExpr(), hits, removeFiles(), ibis::whereClause::resetString(), state, sup, and writeQuery().
int64_t ibis::query::countDeltaPairs | ( | const array_t< T1 > & | val1, |
const array_t< T2 > & | val2, | ||
const T1 & | delta | ||
) | const [protected] |
Assume the two input arrays are sorted in ascending order, count the number of elements that are with delta of each other.
Note that both template arguments should be elemental types or they must support operators -, +, == and < with mixed types.
References ibis::array_t< T >::size().
int64_t ibis::query::countDeltaPairs | ( | const array_t< uint32_t > & | val1, |
const array_t< int32_t > & | val2, | ||
const uint32_t & | delta | ||
) | const |
This is an explicit specialization of a protected member of ibis::query class.
References ibis::array_t< T >::find(), ibis::gVerbose, and ibis::array_t< T >::size().
int64_t ibis::query::countDeltaPairs | ( | const array_t< int32_t > & | val1, |
const array_t< uint32_t > & | val2, | ||
const int32_t & | delta | ||
) | const |
This is an explicit specialization of a protected member of ibis::query class.
References ibis::array_t< T >::find(), and ibis::array_t< T >::size().
int64_t ibis::query::countEqualPairs | ( | const array_t< T1 > & | val1, |
const array_t< T2 > & | val2 | ||
) | const [protected] |
Assume the two input arrays are sorted in ascending order, count the number of elements that match.
Note that both template arguments should be elemental types or they must support operators == and < with mixed types.
References ibis::gVerbose, and ibis::array_t< T >::size().
int64_t ibis::query::countEqualPairs | ( | const array_t< int32_t > & | val1, |
const array_t< uint32_t > & | val2 | ||
) | const |
This is an explicit specialization of a protected member of ibis::query class.
References ibis::array_t< T >::find(), and ibis::array_t< T >::size().
int64_t ibis::query::countEqualPairs | ( | const array_t< uint32_t > & | val1, |
const array_t< int32_t > & | val2 | ||
) | const |
This is an explicit specialization of a protected member of ibis::query class.
References ibis::array_t< T >::find(), ibis::gVerbose, and ibis::array_t< T >::size().
long ibis::query::countHits | ( | ) | const |
Count the number of hits.
Don't generate the hit vector if not already there. It only work for queries containing a single range condition. Furthermore, this function does not obtain a read lock on the query or the partition. Therefore it is possible for another thread to modify the query object while the evaluation is in progress.
References ibis::bitvector::cnt(), conds, ibis::part::countHits(), ibis::whereClause::empty(), ibis::whereClause::getExpr(), hits, ibis::part::nRows(), and sup.
uint32_t ibis::query::countPages | ( | unsigned | wordsize | ) | const [protected] |
Count the number of pages accessed to retrieve every value in the hit vector.
References ibis::gVerbose, and ibis::fileManager::pageSize().
Referenced by evaluate().
const char* ibis::query::dir | ( | ) | const [inline] |
The query token.
For persistent data
Referenced by ibis::bundle1::bundle1(), ibis::bundles::bundles(), query(), ibis::bundles::write(), and ibis::bundle1::write().
void ibis::query::doEstimate | ( | const qExpr * | term, |
ibis::bitvector & | low, | ||
ibis::bitvector & | high | ||
) | const [protected] |
Use the indexes only.
Treat nil term as matching every row to allow empty where clauses to be interpreted as matching everything (to conform to SQL standard).
References ibis::bitvector::bytes(), ibis::bitvector::clear(), ibis::bitvector::cnt(), ibis::bitvector::copy(), ibis::bitvector::flip(), ibis::gVerbose, ibis::compRange::inRange(), ibis::compRange::isConstant(), ibis::math::term::isTrue(), ibis::bitvector::set(), and ibis::bitvector::size().
int ibis::query::doEvaluate | ( | const qExpr * | term, |
ibis::bitvector & | ht | ||
) | const [protected] |
Evaluate one term of a query conditions.
Evaluate the query expression.
Combines the operations on index and the sequential scan in one function.
References ibis::bitvector::bytes(), ibis::bitvector::cnt(), ibis::bitvector::flip(), ibis::gVerbose, ibis::compRange::inRange(), ibis::qExpr::isConstant(), ibis::compRange::isConstant(), ibis::math::term::isTrue(), ibis::bitvector::set(), and ibis::bitvector::size().
int ibis::query::doEvaluate | ( | const qExpr * | term, |
const ibis::bitvector & | mask, | ||
ibis::bitvector & | ht | ||
) | const [protected] |
Evaluate one term of a query conditions.
Evaluate the query expression with mask.
Combines the operations on index and the sequential scan in one function.
References ibis::bitvector::bytes(), ibis::bitvector::cnt(), ibis::bitvector::copy(), ibis::bitvector::flip(), ibis::gVerbose, ibis::compRange::inRange(), ibis::qExpr::isConstant(), ibis::compRange::isConstant(), ibis::math::term::isTrue(), ibis::bitvector::set(), and ibis::bitvector::size().
int ibis::query::doScan | ( | const qExpr * | term, |
ibis::bitvector & | hits | ||
) | const [protected] |
Read the data partition to resolve the query expression.
Perform a sequential scan.
References ibis::bitvector::cnt(), ibis::bitvector::flip(), ibis::gVerbose, ibis::compRange::inRange(), ibis::qExpr::isConstant(), ibis::compRange::isConstant(), ibis::math::term::isTrue(), and ibis::bitvector::set().
int ibis::query::doScan | ( | const qExpr * | term, |
const ibis::bitvector & | mask, | ||
ibis::bitvector & | hits | ||
) | const [protected] |
Read the data partition to resolve the query conditions.
Masked sequential scan.
References ibis::bitvector::bytes(), ibis::bitvector::cnt(), ibis::bitvector::copy(), ibis::qExpr::getLeft(), ibis::qExpr::getRight(), ibis::qExpr::getType(), ibis::gVerbose, ibis::compRange::inRange(), ibis::qExpr::isConstant(), ibis::compRange::isConstant(), ibis::math::term::isTrue(), ibis::horometer::realTime(), ibis::bitvector::set(), ibis::bitvector::size(), ibis::horometer::start(), and ibis::horometer::stop().
int ibis::query::estimate | ( | ) |
Function to perform estimation.
It computes a lower and an upper bound of hits. This is done by using the indexes. If necessary it will build new indexes. The lower bound contains only records that are hits and the upper bound contains all hits but may also contain some records that are not hits.
Returns 0 for success, a negative value for error.
References ibis::bitvector::bytes(), ibis::bitvector::cnt(), comps, conds, ibis::horometer::CPUTime(), dslock, ibis::selectClause::empty(), ibis::whereClause::empty(), ibis::whereClause::getExpr(), ibis::whereClause::getString(), ibis::gVerbose, hits, ibis::fileManager::instance(), logMessage(), ibis::part::nColumns(), ibis::part::nRows(), ibis::fileManager::pageCount(), ibis::horometer::realTime(), removeFiles(), ibis::bitvector::set(), ibis::bitvector::size(), ibis::horometer::start(), state, ibis::horometer::stop(), sup, and ibis::part::timestamp().
Referenced by getNumHits(), and removeComplexConditions().
int ibis::query::evaluate | ( | const bool | evalSelect = false | ) |
Computes the exact hits.
The same answer shall be computed whether there is any index or not. The argument evalSelect indicates whether the select clause should be evaluated at the same time. If its value is true, the columns specified in the select clause will be retrieved from disk and stored in the temporary location for this query. If not, the qualified values will be retrieved from disk when one of getRIDs, getQualifiedInts, getQualifiedFloats, and getQualifiedDoubles is issued. In the later case, only the specified column is retrieved. In addition, the values of column at the time of the function are read, which can be potentially different different from the time when the function evaluate was called.
Returns 0 for success, a negative value for error.
References ibis::bitvector::bytes(), ibis::bitvector::clusteringFactor(), ibis::bitvector::cnt(), comps, conds, countPages(), ibis::horometer::CPUTime(), ibis::bundle::create(), dslock, ibis::selectClause::empty(), ibis::whereClause::empty(), ibis::part::emptyCache(), ibis::util::envLock, ibis::whereClause::getExpr(), ibis::whereClause::getString(), ibis::gVerbose, hits, ibis::fileManager::iBeat(), ibis::fileManager::instance(), logMessage(), ibis::part::name(), ibis::part::nColumns(), ibis::part::nRows(), ibis::fileManager::pageCount(), ibis::fileManager::pageSize(), ibis::bitvector::randomSize(), ibis::horometer::realTime(), removeFiles(), ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::horometer::start(), state, ibis::horometer::stop(), sup, ibis::part::timestamp(), user, ibis::bundle::write(), writeHits(), and writeQuery().
Referenced by ibis::bord::computeHits(), getQualifiedBytes(), getQualifiedDoubles(), getQualifiedFloats(), getQualifiedInts(), getQualifiedLongs(), getQualifiedShorts(), getQualifiedStrings(), getQualifiedUBytes(), getQualifiedUInts(), getQualifiedULongs(), getQualifiedUShorts(), limit(), ibis::part::quickTest(), and removeComplexConditions().
void ibis::query::expandQuery | ( | ) |
Expands where clause to preferred bounds.
This is to make sure the function estimate will give exact answer. It does nothing if there is no preferred bounds in the indices.
References comps, conds, doExpand(), dslock, ibis::selectClause::empty(), ibis::whereClause::empty(), ibis::whereClause::getExpr(), hits, removeFiles(), ibis::whereClause::resetString(), state, sup, and writeQuery().
long ibis::query::getExpandedHits | ( | ibis::bitvector & | res | ) | const |
Get a bitvector containing all rows satisfying the query condition.
The resulting bitvector inculdes both active rows and inactive rows.
References ibis::bitvector::clear(), and ibis::bitvector::cnt().
Referenced by ibis::part::stringToBitvector().
long ibis::query::getHitRows | ( | std::vector< uint32_t > & | rids | ) | const |
Extract the positions of the bits that are 1s in the solution.
This is only valid after the query has been evaluated. If it has not been evaluated, it will return a negative number to indicate error. Upon a successful completion of this function, the return value should be the rids.size().
References ibis::bitvector::cnt(), ibis::gVerbose, hits, and sup.
const ibis::bitvector* ibis::query::getHitVector | ( | ) | const [inline] |
Return the pointer to the internal hit vector.
The user should NOT attempt to free the returned pointer. It is intended to be called after calling ibis::query::evaluate.
References hits.
Referenced by ibis::bundle1::bundle1(), ibis::bundles::bundles(), and ibis::part::quickTest().
long int ibis::query::getNumHits | ( | ) | const |
Compute the number of records in the exact solution.
This function will return the number of hits based on the internally stored information or other inexpensive options. It will not perform a full evaluation to compute the numbers of hits. It is intended to be called after calling ibis::query::evaluate. The return value will be -1 if it is not able to determine the number of hits.
References ibis::bitvector::cnt(), conds, ibis::part::countHits(), ibis::whereClause::empty(), estimate(), ibis::whereClause::getExpr(), hits, ibis::part::nRows(), state, and sup.
Referenced by ibis::bundle1::bundle1(), ibis::bundles::bundles(), ibis::bord::computeHits(), and ibis::part::quickTest().
ibis::array_t< signed char > * ibis::query::getQualifiedBytes | ( | const char * | colname | ) |
The functions getQualifiedTTT
return the values of selected columns in the records that satisfies the specified conditions.
An implicit casting will be performed if possible.
The caller must call the operator delete
to free the pointers returned.
Any column in the data partition may be used with getQualifiedTTT
, not just those given in the select clause. The content returned is read from disk when these functions are called.
Retrieve the values of column_name as 8-bit integers.
A null pointer will be returned if the underlying values can not be safely cast into 32-bit integers.
References evaluate(), ibis::gVerbose, hits, logMessage(), ibis::part::selectBytes(), ibis::array_t< T >::size(), state, and ibis::part::timestamp().
ibis::array_t< double > * ibis::query::getQualifiedDoubles | ( | const char * | colname | ) |
Retrieve double precision floating-point values from records satisfying the query conditions.
An implicit casting will be performed if the specified column is not of type double.
Note that casting from 64-bit integers to double may cause loss of precision; casting of 32-bit floating-point values to 64-bit version may lead to spurious precision.
References dslock, evaluate(), ibis::gVerbose, hits, logMessage(), ibis::part::selectDoubles(), ibis::array_t< T >::size(), state, and ibis::part::timestamp().
ibis::array_t< float > * ibis::query::getQualifiedFloats | ( | const char * | colname | ) |
Retrieve floating-point values from records satisfying the query conditions.
An implicit casting will be performed if possible.
A null pointer will be returned if the underlying values can not be safely cast into 32-bit floating-point values.
References dslock, evaluate(), ibis::gVerbose, hits, logMessage(), ibis::part::selectFloats(), ibis::array_t< T >::size(), state, and ibis::part::timestamp().
ibis::array_t< int32_t > * ibis::query::getQualifiedInts | ( | const char * | colname | ) |
Retrieve integer values from records satisfying the query conditions.
An implicit casting will be performed if possible.
A null pointer will be returned if the underlying values can not be safely cast into 32-bit integers.
References evaluate(), ibis::gVerbose, hits, logMessage(), ibis::part::selectInts(), ibis::array_t< T >::size(), state, and ibis::part::timestamp().
ibis::array_t< int64_t > * ibis::query::getQualifiedLongs | ( | const char * | colname | ) |
Retrieve values of column_name as 64-bit integers.
An implicit casting will be performed if possible.
A null pointer will be returned if the underlying values can not be safely cast into 32-bit integers.
References evaluate(), ibis::gVerbose, hits, logMessage(), ibis::part::selectLongs(), ibis::array_t< T >::size(), state, and ibis::part::timestamp().
ibis::array_t< int16_t > * ibis::query::getQualifiedShorts | ( | const char * | colname | ) |
Retrieve the values of column_name as 16-bit integers.
An implicit casting will be performed if possible.
A null pointer will be returned if the underlying values can not be safely cast into 32-bit integers.
References evaluate(), ibis::gVerbose, hits, logMessage(), ibis::part::selectShorts(), ibis::array_t< T >::size(), state, and ibis::part::timestamp().
std::vector< std::string > * ibis::query::getQualifiedStrings | ( | const char * | colname | ) |
Retrieve string values from records satisfying the query conditions.
The argument colname
must be the name of a string-valued column, otherwise a null pointer will be returned.
References dslock, evaluate(), ibis::gVerbose, hits, logMessage(), ibis::part::selectStrings(), state, and ibis::part::timestamp().
ibis::array_t< unsigned char > * ibis::query::getQualifiedUBytes | ( | const char * | colname | ) |
Retrieve the values of column_name as 8-bit unsigned integers.
An implicit casting will be performed if possible.
A null pointer will be returned if the underlying values can not be safely cast into 32-bit unsigned integers.
References evaluate(), ibis::gVerbose, hits, logMessage(), ibis::part::selectUBytes(), ibis::array_t< T >::size(), state, and ibis::part::timestamp().
ibis::array_t< uint32_t > * ibis::query::getQualifiedUInts | ( | const char * | colname | ) |
Retrieve unsigned integer values from records satisfying the query conditions.
An implicit casting will be performed if possible.
A null pointer will be returned if the underlying values can not be safely cast into 32-bit unsigned integers.
References evaluate(), ibis::gVerbose, hits, logMessage(), ibis::part::selectUInts(), ibis::array_t< T >::size(), state, and ibis::part::timestamp().
ibis::array_t< uint64_t > * ibis::query::getQualifiedULongs | ( | const char * | colname | ) |
Retrieve values of column_name as 64-bit unsigned integers.
An implicit casting will be performed if possible.
A null pointer will be returned if the underlying values can not be safely cast into 32-bit unsigned integers.
References evaluate(), ibis::gVerbose, hits, logMessage(), ibis::part::selectULongs(), ibis::array_t< T >::size(), state, and ibis::part::timestamp().
ibis::array_t< uint16_t > * ibis::query::getQualifiedUShorts | ( | const char * | colname | ) |
Retrieve the values of column_name as 16-bit unsigned integers.
An implicit casting will be performed if possible.
A null pointer will be returned if the underlying values can not be safely cast into 32-bit unsigned integers.
References evaluate(), ibis::gVerbose, hits, logMessage(), ibis::part::selectUShorts(), ibis::array_t< T >::size(), state, and ibis::part::timestamp().
ibis::RIDSet * ibis::query::getRIDs | ( | ) | const |
Return the list of row IDs of the hits.
FastBit has a built-in type called ibis::rid_t.
User may use it to provide a global row identifier for each row. We call such row identifiers (RIDs) the extenal RIDs. In many cases, there is no external RIDs provided by the user, then there is still a set of implicit RIDs numbered from 0 to nRows()-1. This function will retrieve the extenal RIDs if they are present, otherwise, it will return the implicit RIDs.
References ibis::bitvector::cnt(), ibis::array_t< T >::deepCopy(), ibis::gVerbose, hits, logMessage(), ibis::part::nRows(), ibis::array_t< T >::push_back(), readRIDs(), ibis::array_t< T >::reserve(), ibis::array_t< T >::size(), state, ibis::part::timestamp(), ibis::rid_t::value, and writeRIDs().
Referenced by ibis::part::quickTest().
ibis::RIDSet * ibis::query::getRIDs | ( | const ibis::bitvector & | mask | ) | const |
Return a list of row IDs that match the mask.
The data type for row identifiers is ibis::rid_t, which can be treated as unsigned 64-bit integer.
These identifiers (RIDs) can be either provided by the user (external RIDs) or internally generated from row positions (implicit RIDs). If the user has not provided external RIDs, then this function simply decodes the positions of the bits that are marked 1 and places the positions in the output array.
The return value can be null if this query object is not associated with a data partition or the mask contains no bit marked 1.
References ibis::bitvector::cnt(), ibis::gVerbose, logMessage(), ibis::part::name(), ibis::part::nRows(), and ibis::array_t< T >::size().
bool ibis::query::isValidToken | ( | const char * | tok | ) | [static] |
Is the given string a valid query token.
Return true if it has the expected token format, otherwise false.
References ibis::util::charIndex, ibis::gVerbose, and tokenLength().
long ibis::query::limit | ( | const char * | names, |
int | direction, | ||
uint32_t | keep, | ||
bool | updateHits = true |
||
) |
Truncate the results to provide the top-K rows.
It returns the number of results kept, which is the smaller of the current number of rows and the input argument keep
. A negative value is returned in case of error, e.g., query has not been fully specified. If the 4th argument is true, the internal hit vector is updated to match the truncated solution. Otherwise, the internal hit vector is left unchanged. Since the functions getNumHits and getQualifiedTTT uses this internal hit vector, it is generally a good idea to update the hit vector. On the other hand, one may wish to avoid this update if the hit vector is to be kept for some purpose.
References ibis::horometer::CPUTime(), ibis::bundle::create(), evaluate(), ibis::part::evaluateRIDSet(), ibis::bundle::getRIDs(), ibis::gVerbose, hits, logMessage(), ibis::horometer::realTime(), ibis::bundle::size(), ibis::horometer::start(), state, ibis::horometer::stop(), ibis::bundle::truncate(), and ibis::bundle::write().
void ibis::query::logMessage | ( | const char * | event, |
const char * | fmt, | ||
... | |||
) | const |
Used to print information about the progress or state of query processing.
It prefixes each message with a query token.
References ibis::util::getLocalTime().
Referenced by ibis::bundle::create(), estimate(), evaluate(), getMaxNumHits(), getMinNumHits(), getQualifiedBytes(), getQualifiedDoubles(), getQualifiedFloats(), getQualifiedInts(), getQualifiedLongs(), getQualifiedShorts(), getQualifiedStrings(), getQualifiedUBytes(), getQualifiedUInts(), getQualifiedULongs(), getQualifiedUShorts(), getRIDs(), getRIDsInBundle(), getState(), limit(), orderby(), removeComplexConditions(), setPartition(), setRIDs(), setSelectClause(), and setWhereClause().
int ibis::query::orderby | ( | const char * | names, |
int | direction | ||
) | const |
Re-order the results according to the new "ORDER BY" specification.
It returns 0 if it completes successfully. It returns a negative number to indicate error. If direction
>= 0, sort the values in ascending order, otherwise, sort them in descending order.
References ibis::horometer::CPUTime(), ibis::bundle::create(), ibis::gVerbose, logMessage(), ibis::horometer::realTime(), ibis::bundle::reorder(), ibis::horometer::start(), state, ibis::horometer::stop(), and ibis::bundle::write().
void ibis::query::orderPairs | ( | const char * | pfile | ) | const [protected] |
Sort the content of the file as ibis::rid_t.
It reads the content of the file one block at a time during the initial sorting of the blocks. It then merges the sorted blocks to produce a overall sorted file. Note that ibis::rid_t is simply a pair of integers. Sinc the pairs are recorded as pairs of integers too, this should work.
References ibis::fileManager::bytesFree(), ibis::util::getFileSize(), ibis::gVerbose, ibis::fileManager::instance(), PREFERRED_BLOCK_SIZE, and UnixOpen.
void ibis::query::printRIDs | ( | const RIDSet & | ridset | ) | const [protected] |
Export the Row IDs of the hits to log file.
ibis::query::setMyDir
This function prints a list of RIDs to the log file.
References ibis::gVerbose, and ibis::array_t< T >::size().
void ibis::query::printSelected | ( | std::ostream & | out | ) | const |
Print the values of the selected columns to the specified output stream.
The printed values are grouped by the columns without functions. For each group, the functions are evaluated on the columns named in the function. This is equivalent to have implicit "GROUP BY" and "ORDER BY" keywords on all columns appears without a function in the select clause.
References ibis::bundle::create(), ibis::bundle::print(), and ibis::bundle::write().
void ibis::query::printSelectedWithRID | ( | std::ostream & | out | ) | const |
Print the values of the columns in the select clause without functions.
One the groups of unique values are printed. For each group, the row ID (RID) of the rows are also printed.
References ibis::bundle::create(), ibis::bundle::printAll(), and ibis::bundle::write().
int64_t ibis::query::processJoin | ( | ) | [protected] |
Process the join operation and return the number of pairs.
This function only counts the number of hits; it does produce the actual tuples for the results of join.
Additionally, it performs only self-join, i.e., join a partition with itself. This is only meant to test some algorithms for evaluating joins.
References ibis::index::BINNING, ibis::bitvector::bytes(), ibis::fileManager::bytesFree(), ibis::fileManager::bytesInUse(), ibis::bitvector64::clear(), ibis::bitvector::clusteringFactor(), ibis::bitvector::cnt(), ibis::bitvector64::cnt(), ibis::horometer::CPUTime(), ibis::math::barrel::equivalent(), ibis::qExpr::findRange(), ibis::column::getNullMask(), ibis::gVerbose, ibis::bitvector64::markovSize(), ibis::column::name(), ibis::util::outerProduct(), ibis::horometer::realTime(), ibis::math::barrel::recordVariable(), ibis::index::RELIC, ibis::bitvector64::set(), ibis::bitvector::size(), ibis::bitvector64::size(), ibis::horometer::start(), ibis::horometer::stop(), and ibis::index::type().
void ibis::query::readQuery | ( | const ibis::partList & | tl | ) | [protected] |
Read the status information from disk.
A function to read the query file in a directory -- used by the constructor that takes a directory name as the argument the file contains:
References ibis::rid_t::name::event, ibis::gVerbose, ibis::rid_t::name::run, and ibis::util::strnewdup().
Referenced by query().
ibis::RIDSet * ibis::query::readRIDs | ( | ) | const |
Read RIDs from the file named "-rids".
Return a pointer to ibis::RIDSet.
References ibis::fileManager::getFile(), ibis::fileManager::instance(), and ibis::array_t< T >::size().
Referenced by ibis::bundle1::bundle1(), ibis::bundles::bundles(), getRIDs(), and getRIDsInBundle().
int64_t ibis::query::recordDeltaPairs | ( | const array_t< uint32_t > & | val1, |
const array_t< int32_t > & | val2, | ||
const array_t< uint32_t > & | ind1, | ||
const array_t< uint32_t > & | ind2, | ||
const uint32_t & | delta, | ||
const char * | filename | ||
) | const |
This is an explicit specialization of a protected member of ibis::query class.
References ibis::array_t< T >::find(), ibis::gVerbose, ibis::array_t< T >::size(), and UnixOpen.
int64_t ibis::query::recordDeltaPairs | ( | const array_t< int32_t > & | val1, |
const array_t< uint32_t > & | val2, | ||
const array_t< uint32_t > & | ind1, | ||
const array_t< uint32_t > & | ind2, | ||
const int32_t & | delta, | ||
const char * | filename | ||
) | const |
This is an explicit specialization of a protected member of ibis::query class.
References ibis::array_t< T >::find(), ibis::gVerbose, ibis::array_t< T >::size(), and UnixOpen.
int64_t ibis::query::recordEqualPairs | ( | const array_t< int32_t > & | val1, |
const array_t< uint32_t > & | val2, | ||
const array_t< uint32_t > & | ind1, | ||
const array_t< uint32_t > & | ind2, | ||
const char * | filename | ||
) | const |
This is an explicit specialization of a protected member of ibis::query class.
References ibis::array_t< T >::find(), ibis::gVerbose, ibis::array_t< T >::size(), and UnixOpen.
int64_t ibis::query::recordEqualPairs | ( | const array_t< uint32_t > & | val1, |
const array_t< int32_t > & | val2, | ||
const array_t< uint32_t > & | ind1, | ||
const array_t< uint32_t > & | ind2, | ||
const char * | filename | ||
) | const |
This is an explicit specialization of a protected member of ibis::query class.
References ibis::array_t< T >::find(), ibis::gVerbose, ibis::array_t< T >::size(), and UnixOpen.
std::string ibis::query::removeComplexConditions | ( | ) |
Separate out the sub-expressions that are not simple.
This is intended to allow the overall where clause to be evaluated in separated steps, where the simple conditions are left for this software to handle and the more complex ones are to be handled by another software. The set of conditions remain with this query object and the conditions returned by this function are assumed to be connected with the operator AND. If the top-most operator in the WHERE clause is not an AND operator, the whole clause will be returned if it contains any conditions that is not simple, otherwise, an empty string will be returned.
References ibis::whereClause::clear(), conds, ibis::whereClause::empty(), estimate(), evaluate(), ibis::whereClause::getString(), ibis::gVerbose, hits, logMessage(), ibis::part::nRows(), ibis::qExpr::print(), ibis::bitvector::set(), setWhereClause(), state, and sup.
long ibis::query::sequentialScan | ( | ibis::bitvector & | bv | ) | const |
Return a (new) bitvector that contains the result of directly scan the raw data to determine what records satisfy the user specified conditions.
It is mostly used for testing purposes. It can be called any time after the where clause is set, and does not change the state of the current query.
References ibis::bitvector::clear(), ibis::bitvector::cnt(), ibis::bitvector::copy(), ibis::horometer::CPUTime(), ibis::gVerbose, ibis::bitvector::print(), ibis::horometer::realTime(), ibis::horometer::start(), and ibis::horometer::stop().
Referenced by ibis::part::quickTest().
int ibis::query::setPartition | ( | const ibis::part * | tbl | ) |
Resets the data partition used to evaluate the query conditions to the partition specified in the argument.
Integer error code: 0: successful completion of the requested operation.
-1: nil pointer to data partition or empty partition. -2: invalid string for select clause. -3: select clause contains invalid column name. -4: invalid string for where clause. -5: where clause can not be parsed correctly. -6: where clause contains invalid column names or unsupported functions. -7: empty rid list for set rid operation. -8: neither rids nor range conditions are set. -9: encountered some exceptional conditions during query evaluations. -10: no private directory to store bundles. -11: Query not fully evaluated.
References comps, conds, dslock, ibis::whereClause::empty(), ibis::selectClause::empty(), ibis::whereClause::getExpr(), ibis::whereClause::getString(), ibis::gVerbose, hits, logMessage(), ibis::part::name(), ibis::part::nColumns(), ibis::part::nRows(), removeFiles(), state, sup, ibis::whereClause::verify(), ibis::selectClause::verify(), and writeQuery().
Referenced by setTable().
int ibis::query::setRIDs | ( | const RIDSet & | set | ) |
Specify a list of Row IDs for the query object.
Select the records with an RID in the list of RIDs.
References comps, ibis::array_t< T >::deepCopy(), ibis::selectClause::empty(), ibis::gVerbose, hits, logMessage(), removeFiles(), ibis::array_t< T >::size(), state, sup, and writeQuery().
Referenced by ibis::part::quickTest().
int ibis::query::setSelectClause | ( | const char * | str | ) | [virtual] |
Specifies the select clause for the query.
The select clause is a string of attribute names (plus the four predefined functions, avg
, var
, max
, min
and sum
) separated by spaces, commas (,) or semicolons(;). Repeated calls to this function simply overwrite the previous definition of the select clause. If no select clause is specified, the where clause alone determines whether record is a hit or not. The select clause will be reordered to make the plain column names without functions appear before with functions.
References comps, conds, ibis::whereClause::getExpr(), ibis::gVerbose, hits, logMessage(), ibis::part::name(), removeFiles(), state, sup, ibis::selectClause::swap(), ibis::selectClause::verify(), and writeQuery().
Referenced by ibis::part::quickTest().
int ibis::query::setWhereClause | ( | const char * | str | ) |
Specify the where clause in string form.
The where clause is a string representing a list of range conditions.
A where clause is mandatory if a query is to be estimated or evaluated. This function may be called multiple times and each invocation will overwrite the previous where clause.
References comps, conds, ibis::selectClause::empty(), ibis::whereClause::getExpr(), ibis::whereClause::getString(), ibis::gVerbose, hits, logMessage(), ibis::part::name(), ibis::qExpr::printFull(), removeFiles(), state, sup, ibis::whereClause::swap(), ibis::whereClause::verify(), and writeQuery().
Referenced by ibis::bord::computeHits(), ibis::part::quickTest(), removeComplexConditions(), and ibis::part::stringToBitvector().
int ibis::query::setWhereClause | ( | const ibis::qExpr * | qx | ) |
Specify the where clause through a qExpr object.
This function accepts a user constructed query expression object.
It can be used to bypass the parsing of where clause string.
References comps, conds, ibis::selectClause::empty(), ibis::whereClause::getExpr(), ibis::whereClause::getString(), ibis::gVerbose, hits, ibis::part::name(), ibis::qExpr::nItems(), removeFiles(), ibis::whereClause::resetString(), ibis::whereClause::setExpr(), state, sup, ibis::whereClause::swap(), ibis::whereClause::verify(), and writeQuery().
int ibis::query::setWhereClause | ( | const std::vector< const char * > & | names, |
const std::vector< double > & | lbounds, | ||
const std::vector< double > & | rbounds | ||
) |
Specify the where clause as a set of conjunctive ranges.
This function accepts a set of range conditions expressed by the three vectors.
The arrays are expected to be of the same size, and each triplet <names[i], lbounds[i], rbounds[i]> are interpreted as
names[i] between lbounds[i] and rbounds[i]
The range conditions are joined together with the AND operator. If vectors lbounds and rbounds are not the same size, then the missing one is consider to represent an open boundary. For example, if lbounds[4] exists but not rbounds[4], they the range condition is interpreted as
lbounds[4] <= names[4]
References comps, conds, ibis::selectClause::empty(), ibis::whereClause::getExpr(), ibis::gVerbose, hits, ibis::part::name(), removeFiles(), ibis::whereClause::setExpr(), ibis::qExpr::setLeft(), ibis::qExpr::setRight(), state, sup, ibis::whereClause::verify(), and writeQuery().
int64_t ibis::query::sortEquiJoin | ( | const ibis::deprecatedJoin & | cmp, |
const ibis::bitvector & | mask, | ||
const char * | pairfile | ||
) | const [protected] |
Perform equi-join by sorting the selected values.
This version reads the values marked to be 1 in the bitvector mask
. It writes the the pairs satisfying the join condition to a file name pairfile
.
References ibis::CATEGORY, ibis::horometer::CPUTime(), ibis::DOUBLE, ibis::FLOAT, ibis::gVerbose, ibis::INT, ibis::horometer::realTime(), ibis::column::selectValues(), ibis::array_t< T >::size(), ibis::array_t< T >::stableSort(), ibis::horometer::start(), ibis::horometer::stop(), ibis::column::type(), and ibis::UINT.
int64_t ibis::query::sortEquiJoin | ( | const ibis::deprecatedJoin & | cmp, |
const ibis::bitvector & | mask | ||
) | const [protected] |
Performing an equi-join by sorting the selected values first.
This version reads the values marked to be 1 in the bitvector mask
and performs the actual operation of counting the number of pairs with equal values in memory.
References ibis::CATEGORY, ibis::horometer::CPUTime(), ibis::DOUBLE, ibis::FLOAT, ibis::gVerbose, ibis::INT, ibis::horometer::realTime(), ibis::column::selectValues(), ibis::horometer::start(), ibis::horometer::stop(), ibis::column::type(), and ibis::UINT.
int64_t ibis::query::sortRangeJoin | ( | const ibis::deprecatedJoin & | cmp, |
const ibis::bitvector & | mask | ||
) | const [protected] |
Performing a range join by sorting the selected values.
The sorting is performed through std::sort
algorithm.
References ibis::CATEGORY, ibis::horometer::CPUTime(), ibis::DOUBLE, ibis::math::term::eval(), ibis::FLOAT, ibis::gVerbose, ibis::INT, ibis::horometer::realTime(), ibis::column::selectValues(), ibis::horometer::start(), ibis::horometer::stop(), ibis::column::type(), and ibis::UINT.
void ibis::query::writeQuery | ( | ) | [protected, virtual] |
Write the basic information about the query to disk.
Write the content of the current query into a file.
Referenced by contractQuery(), evaluate(), expandQuery(), setPartition(), setRIDs(), setSelectClause(), and setWhereClause().
![]() |