Class ibis::mensa contains multiple (horizontal) data partitions (ibis::part
) to form a logical data table.
More...
#include <mensa.h>
Classes | |
class | cursor |
Public Member Functions | |
virtual int | addPartition (const char *dir) |
Add data partitions defined in the named directory. | |
virtual int | backup (const char *dir, const char *tname=0, const char *tdesc=0) const |
Write the current content to the specified output directory in the raw binary format. | |
virtual int | buildIndex (const char *, const char *) |
The following functions deal with auxillary data for accelerating query processing, primarily for building indexes. | |
virtual int | buildIndexes (const char *) |
Create indexes for every column of the table. | |
virtual stringList | columnNames () const |
Return the column names in a list. | |
virtual typeList | columnTypes () const |
Return the column types in a list. | |
virtual ibis::table::cursor * | createCursor () const |
Create a cursor object to perform row-wise data access. | |
virtual void | describe (std::ostream &) const |
Print a description of the table to the specified output stream. | |
virtual int | dump (std::ostream &, uint64_t, const char *) const |
Print the first nr rows. | |
virtual int | dump (std::ostream &, const char *) const |
Print the values in ASCII form to the specified output stream. | |
virtual int | dump (std::ostream &, uint64_t, uint64_t, const char *) const |
Print nr rows starting with row offset. | |
virtual void | dumpNames (std::ostream &, const char *) const |
Print all column names on one line. | |
virtual void | estimate (const char *cond, uint64_t &nmin, uint64_t &nmax) const |
Estimate the number of rows satisfying the selection conditions. | |
virtual void | estimate (const ibis::qExpr *cond, uint64_t &nmin, uint64_t &nmax) const |
Estimate the number of rows satisfying the selection conditions. | |
virtual int64_t | getColumnAsBytes (const char *, char *, uint64_t=0, uint64_t=0) const |
Retrieve all values of the named column. | |
virtual int64_t | getColumnAsDoubles (const char *, double *, uint64_t=0, uint64_t=0) const |
virtual int64_t | getColumnAsDoubles (const char *, std::vector< double > &, uint64_t=0, uint64_t=0) const |
Retrieve all values of the named column. | |
virtual int64_t | getColumnAsFloats (const char *, float *, uint64_t=0, uint64_t=0) const |
virtual int64_t | getColumnAsInts (const char *, int32_t *, uint64_t=0, uint64_t=0) const |
Retrieve all values of the named column. | |
virtual int64_t | getColumnAsLongs (const char *, int64_t *, uint64_t=0, uint64_t=0) const |
virtual int64_t | getColumnAsShorts (const char *, int16_t *, uint64_t=0, uint64_t=0) const |
Retrieve all values of the named column. | |
virtual int64_t | getColumnAsStrings (const char *, std::vector< std::string > &, uint64_t=0, uint64_t=0) const |
virtual int64_t | getColumnAsUBytes (const char *, unsigned char *, uint64_t=0, uint64_t=0) const |
Retrieve all values of the named column. | |
virtual int64_t | getColumnAsUInts (const char *, uint32_t *, uint64_t=0, uint64_t=0) const |
Retrieve all values of the named column. | |
virtual int64_t | getColumnAsULongs (const char *, uint64_t *, uint64_t=0, uint64_t=0) const |
virtual int64_t | getColumnAsUShorts (const char *, uint16_t *, uint64_t=0, uint64_t=0) const |
Retrieve all values of the named column. | |
virtual double | getColumnMax (const char *) const |
Compute the maximum of all valid values in the name column. | |
virtual double | getColumnMin (const char *) const |
Compute the minimum of all valid values in the name column. | |
virtual long | getHistogram (const char *, const char *, double, double, double, std::vector< uint32_t > &) const |
virtual long | getHistogram2D (const char *, const char *, double, double, double, const char *, double, double, double, std::vector< uint32_t > &) const |
Compute a two-dimension histogram on columns cname1 and cname2 . | |
virtual long | getHistogram3D (const char *, const char *, double, double, double, const char *, double, double, double, const char *, double, double, double, std::vector< uint32_t > &) const |
Compute a three-dimensional histogram on the named columns. | |
virtual int | getPartitions (std::vector< const ibis::part * > &) const |
Retrieve the list of partitions. | |
virtual table * | groupby (const stringList &) const |
Directly performing group-by on the base data (without selection) is not currently supported. | |
virtual table * | groupby (const char *) const |
Directly performing group-by on the base data (without selection) is not currently supported. | |
virtual const char * | indexSpec (const char *) const |
Retrieve the current indexing option. | |
virtual void | indexSpec (const char *, const char *) |
Replace the current indexing option. | |
mensa (const char *dir) | |
This function expects a valid data directory to find data partitions. | |
mensa (const char *dir1, const char *dir2) | |
This function expects a pair of data directories to define data partitions. | |
virtual uint32_t | nColumns () const |
Number of columns. | |
virtual uint64_t | nRows () const |
The number of rows in this table. | |
virtual void | orderby (const stringList &, const std::vector< bool > &) |
Reordering the rows using the specified columns. | |
virtual void | orderby (const stringList &) |
Reordering the rows using the specified columns. | |
virtual void | orderby (const char *str) |
Reorder the rows. The column names are separated by commas. | |
virtual void | reverseRows () |
Reversing the ordering of the rows on disk requires too much work but has no obvious benefit. | |
virtual table * | select (const char *sel, const char *cond) const |
Given a set of column names and a set of selection conditions, compute another table that represents the selected values. | |
virtual table * | select2 (const char *sel, const char *cond, const char *pts) const |
A variation of the function select defined in ibis::table. | |
Protected Member Functions | |
void | clear () |
Clear the existing content. | |
int64_t | computeHits (const char *cond) const |
Compute the number of hits. | |
Protected Attributes | |
ibis::table::namesTypes | naty |
A combined list of columns names. | |
uint64_t | nrows |
ibis::partList | parts |
List of data partitions. | |
Friends | |
class | cursor |
Class ibis::mensa contains multiple (horizontal) data partitions (ibis::part
) to form a logical data table.
The base data contained in this table is logically immutable as reordering rows (through function reorder
) does not change the overall content of the table. The functions reverseRows
and groupby
are not implmented.
ibis::mensa::mensa | ( | const char * | dir | ) | [explicit] |
This function expects a valid data directory to find data partitions.
If the incoming directory is not a valid string, it will use ibis::gParameter() to find data partitions.
References ibis::table::desc_, ibis::util::gatherParts(), ibis::gParameters(), ibis::gVerbose, ibis::table::name_, naty, and parts.
ibis::mensa::mensa | ( | const char * | dir1, |
const char * | dir2 | ||
) |
This function expects a pair of data directories to define data partitions.
If either dir1 and dir2 is not valid, it will attempt to find data partitions using global parameters ibis::gParameters().
References ibis::table::desc_, ibis::util::gatherParts(), ibis::gParameters(), ibis::gVerbose, ibis::table::name_, naty, and parts.
int ibis::mensa::addPartition | ( | const char * | dir | ) | [virtual] |
Add data partitions defined in the named directory.
It uses opendir and friends to traverse the subdirectories, which means it will only able to descend to subdirectories on unix and compatible systems.
Reimplemented from ibis::table.
Reimplemented in ibis::liga.
References ibis::util::gatherParts(), ibis::gParameters(), and ibis::gVerbose.
int ibis::mensa::backup | ( | const char * | dir, |
const char * | tname = 0 , |
||
const char * | tdesc = 0 |
||
) | const [virtual] |
Write the current content to the specified output directory in the raw binary format.
May optionally overwrite the name and description of the table.
Implements ibis::table.
References ibis::gVerbose.
int ibis::mensa::buildIndex | ( | const char * | colname, |
const char * | option | ||
) | [virtual] |
The following functions deal with auxillary data for accelerating query processing, primarily for building indexes.
Create the index for the named column. The existing index will be replaced. If an indexing option is not specified, it will use the internally recorded option for the named column or the table containing the column.
Implements ibis::table.
References ibis::index::create(), ibis::gVerbose, and ibis::column::indexSpec().
int ibis::mensa::buildIndexes | ( | const char * | options | ) | [virtual] |
Create indexes for every column of the table.
Existing indexes will be replaced. If an indexing option is not specified, the internally recorded options will be used.
Implements ibis::table.
ibis::table::stringList ibis::mensa::columnNames | ( | ) | const [virtual] |
Return the column names in a list.
Implements ibis::table.
int ibis::mensa::dump | ( | std::ostream & | out, |
const char * | del | ||
) | const [virtual] |
Print the values in ASCII form to the specified output stream.
The default delimiter is coma (","), which produces Comma-Separated-Values (CSV).
Implements ibis::table.
References ibis::mensa::cursor::dumpBlock(), and ibis::mensa::cursor::fetch().
Referenced by ibis::mensa::cursor::dumpSome().
int ibis::mensa::dump | ( | std::ostream & | out, |
uint64_t | offset, | ||
uint64_t | nr, | ||
const char * | del | ||
) | const [inline, virtual] |
Print nr rows starting with row offset.
Note that the row number starts with 0, i.e., the first row is row 0.
Implements ibis::table.
References ibis::mensa::cursor::dumpSome(), ibis::mensa::cursor::fetch(), and parts.
void ibis::mensa::estimate | ( | const char * | cond, |
uint64_t & | nmin, | ||
uint64_t & | nmax | ||
) | const [virtual] |
Estimate the number of rows satisfying the selection conditions.
The number of rows is between [nmin
, nmax
] (inclusive).
Implements ibis::table.
References ibis::countQuery::estimate(), ibis::countQuery::getMaxNumHits(), ibis::countQuery::getMinNumHits(), ibis::countQuery::setPartition(), and ibis::countQuery::setWhereClause().
void ibis::mensa::estimate | ( | const ibis::qExpr * | cond, |
uint64_t & | nmin, | ||
uint64_t & | nmax | ||
) | const [virtual] |
Estimate the number of rows satisfying the selection conditions.
The number of rows is between [nmin
, nmax
] (inclusive).
Implements ibis::table.
References ibis::countQuery::estimate(), ibis::countQuery::getMaxNumHits(), ibis::countQuery::getMinNumHits(), ibis::countQuery::setPartition(), and ibis::countQuery::setWhereClause().
int64_t ibis::mensa::getColumnAsBytes | ( | const char * | cname, |
char * | vals, | ||
uint64_t | begin = 0 , |
||
uint64_t | end = 0 |
||
) | const [virtual] |
Retrieve all values of the named column.
The member functions of this class only support access to one column at a time. Use table::cursor
class for row-wise accesses.
The arguments begin and end are given in row numbers starting from 0. If begin < end, then rows begin till end-1 are packed into the output array. If begin >= end, then the values from begin till end of the table is packed into the output array. The default values where both begin and end are 0 define a range covering all rows of the table.
Implements ibis::table.
References ibis::BYTE, ibis::part::getColumn(), ibis::column::getValuesArray(), ibis::part::nRows(), and ibis::UBYTE.
int64_t ibis::mensa::getColumnAsDoubles | ( | const char * | cname, |
std::vector< double > & | vals, | ||
uint64_t | begin = 0 , |
||
uint64_t | end = 0 |
||
) | const [virtual] |
Retrieve all values of the named column.
The member functions of this class only support access to one column at a time. Use table::cursor
class for row-wise accesses.
The arguments begin and end are given in row numbers starting from 0. If begin < end, then rows begin till end-1 are packed into the output array. If begin >= end, then the values from begin till end of the table is packed into the output array. The default values where both begin and end are 0 define a range covering all rows of the table.
Implements ibis::table.
References ibis::BYTE, ibis::DOUBLE, ibis::FLOAT, ibis::part::getColumn(), ibis::column::getValuesArray(), ibis::gVerbose, ibis::INT, ibis::part::nRows(), ibis::SHORT, ibis::UBYTE, ibis::UINT, and ibis::USHORT.
int64_t ibis::mensa::getColumnAsDoubles | ( | const char * | cn, |
double * | vals, | ||
uint64_t | begin = 0 , |
||
uint64_t | end = 0 |
||
) | const [virtual] |
Implements ibis::table.
References ibis::BYTE, ibis::DOUBLE, ibis::FLOAT, ibis::part::getColumn(), ibis::column::getValuesArray(), ibis::INT, ibis::part::nRows(), ibis::SHORT, ibis::UBYTE, ibis::UINT, and ibis::USHORT.
int64_t ibis::mensa::getColumnAsFloats | ( | const char * | cn, |
float * | vals, | ||
uint64_t | begin = 0 , |
||
uint64_t | end = 0 |
||
) | const [virtual] |
Implements ibis::table.
References ibis::BYTE, ibis::FLOAT, ibis::part::getColumn(), ibis::column::getValuesArray(), ibis::part::nRows(), ibis::SHORT, ibis::UBYTE, and ibis::USHORT.
int64_t ibis::mensa::getColumnAsInts | ( | const char * | cname, |
int32_t * | vals, | ||
uint64_t | begin = 0 , |
||
uint64_t | end = 0 |
||
) | const [virtual] |
Retrieve all values of the named column.
The member functions of this class only support access to one column at a time. Use table::cursor
class for row-wise accesses.
The arguments begin and end are given in row numbers starting from 0. If begin < end, then rows begin till end-1 are packed into the output array. If begin >= end, then the values from begin till end of the table is packed into the output array. The default values where both begin and end are 0 define a range covering all rows of the table.
Implements ibis::table.
References ibis::BYTE, ibis::part::getColumn(), ibis::column::getValuesArray(), ibis::INT, ibis::part::nRows(), ibis::SHORT, ibis::UBYTE, ibis::UINT, and ibis::USHORT.
int64_t ibis::mensa::getColumnAsLongs | ( | const char * | cn, |
int64_t * | vals, | ||
uint64_t | begin = 0 , |
||
uint64_t | end = 0 |
||
) | const [virtual] |
Implements ibis::table.
References ibis::BYTE, ibis::part::getColumn(), ibis::column::getValuesArray(), ibis::INT, ibis::LONG, ibis::part::nRows(), ibis::SHORT, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
int64_t ibis::mensa::getColumnAsShorts | ( | const char * | cname, |
int16_t * | vals, | ||
uint64_t | begin = 0 , |
||
uint64_t | end = 0 |
||
) | const [virtual] |
Retrieve all values of the named column.
The member functions of this class only support access to one column at a time. Use table::cursor
class for row-wise accesses.
The arguments begin and end are given in row numbers starting from 0. If begin < end, then rows begin till end-1 are packed into the output array. If begin >= end, then the values from begin till end of the table is packed into the output array. The default values where both begin and end are 0 define a range covering all rows of the table.
Implements ibis::table.
References ibis::BYTE, ibis::part::getColumn(), ibis::column::getValuesArray(), ibis::part::nRows(), ibis::SHORT, ibis::UBYTE, and ibis::USHORT.
int64_t ibis::mensa::getColumnAsStrings | ( | const char * | cn, |
std::vector< std::string > & | vals, | ||
uint64_t | begin = 0 , |
||
uint64_t | end = 0 |
||
) | const [virtual] |
Implements ibis::table.
References ibis::BYTE, ibis::CATEGORY, ibis::DOUBLE, ibis::FLOAT, ibis::part::getColumn(), ibis::util::getString(), ibis::column::getValuesArray(), ibis::gVerbose, ibis::INT, ibis::LONG, ibis::part::nRows(), ibis::SHORT, ibis::TEXT, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
int64_t ibis::mensa::getColumnAsUBytes | ( | const char * | cname, |
unsigned char * | vals, | ||
uint64_t | begin = 0 , |
||
uint64_t | end = 0 |
||
) | const [virtual] |
Retrieve all values of the named column.
The member functions of this class only support access to one column at a time. Use table::cursor
class for row-wise accesses.
The arguments begin and end are given in row numbers starting from 0. If begin < end, then rows begin till end-1 are packed into the output array. If begin >= end, then the values from begin till end of the table is packed into the output array. The default values where both begin and end are 0 define a range covering all rows of the table.
Implements ibis::table.
References ibis::BYTE, ibis::part::getColumn(), ibis::column::getValuesArray(), ibis::part::nRows(), and ibis::UBYTE.
int64_t ibis::mensa::getColumnAsUInts | ( | const char * | cname, |
uint32_t * | vals, | ||
uint64_t | begin = 0 , |
||
uint64_t | end = 0 |
||
) | const [virtual] |
Retrieve all values of the named column.
The member functions of this class only support access to one column at a time. Use table::cursor
class for row-wise accesses.
The arguments begin and end are given in row numbers starting from 0. If begin < end, then rows begin till end-1 are packed into the output array. If begin >= end, then the values from begin till end of the table is packed into the output array. The default values where both begin and end are 0 define a range covering all rows of the table.
Implements ibis::table.
References ibis::BYTE, ibis::part::getColumn(), ibis::column::getValuesArray(), ibis::INT, ibis::part::nRows(), ibis::SHORT, ibis::UBYTE, ibis::UINT, and ibis::USHORT.
int64_t ibis::mensa::getColumnAsULongs | ( | const char * | cn, |
uint64_t * | vals, | ||
uint64_t | begin = 0 , |
||
uint64_t | end = 0 |
||
) | const [virtual] |
Implements ibis::table.
References ibis::BYTE, ibis::part::getColumn(), ibis::column::getValuesArray(), ibis::INT, ibis::LONG, ibis::part::nRows(), ibis::SHORT, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
int64_t ibis::mensa::getColumnAsUShorts | ( | const char * | cname, |
uint16_t * | vals, | ||
uint64_t | begin = 0 , |
||
uint64_t | end = 0 |
||
) | const [virtual] |
Retrieve all values of the named column.
The member functions of this class only support access to one column at a time. Use table::cursor
class for row-wise accesses.
The arguments begin and end are given in row numbers starting from 0. If begin < end, then rows begin till end-1 are packed into the output array. If begin >= end, then the values from begin till end of the table is packed into the output array. The default values where both begin and end are 0 define a range covering all rows of the table.
Implements ibis::table.
References ibis::BYTE, ibis::part::getColumn(), ibis::column::getValuesArray(), ibis::part::nRows(), ibis::SHORT, ibis::UBYTE, and ibis::USHORT.
double ibis::mensa::getColumnMax | ( | const char * | cname | ) | const [virtual] |
Compute the maximum of all valid values in the name column.
In case of error, such as an invalid column name or an empty table, this function will return FASTBIT_DOUBLE_NULL or -DBL_MAX to ensure that the following test fails getColumnMin <= getColumnMax.
Implements ibis::table.
References ibis::column::getActualMax().
double ibis::mensa::getColumnMin | ( | const char * | cname | ) | const [virtual] |
Compute the minimum of all valid values in the name column.
In case of error, such as an invalid column name or an empty table, this function will return FASTBIT_DOUBLE_NULL or DBL_MAX to ensure that the following test fails getColumnMin <= getColumnMax.
Implements ibis::table.
References ibis::column::getActualMin().
long ibis::mensa::getHistogram | ( | const char * | constraints, |
const char * | cname, | ||
double | begin, | ||
double | end, | ||
double | stride, | ||
std::vector< uint32_t > & | counts | ||
) | const [virtual] |
Compute the histogram of the named column. This version uses the user specified bins:
[begin, begin+stride) [begin+stride, begin+2*stride) ....
A record is placed in bin
(x - begin) / stride,
where the first bin is bin 0. The total number of bins is
1 + floor((end - begin) / stride).
stride
is considered as an error. end
is less than begin
, an empty array counts
is returned along with return value 0. Implements ibis::table.
long ibis::mensa::getHistogram2D | ( | const char * | constraints, |
const char * | cname1, | ||
double | begin1, | ||
double | end1, | ||
double | stride1, | ||
const char * | cname2, | ||
double | begin2, | ||
double | end2, | ||
double | stride2, | ||
std::vector< uint32_t > & | counts | ||
) | const [virtual] |
Compute a two-dimension histogram on columns cname1
and cname2
.
The bins along each dimension are defined the same way as in function getHistogram
. The array counts
stores the two-dimensional bins with the first dimension as the slow varying dimension following C convention for ordering multi-dimensional arrays.
Implements ibis::table.
long ibis::mensa::getHistogram3D | ( | const char * | constraints, |
const char * | cname1, | ||
double | begin1, | ||
double | end1, | ||
double | stride1, | ||
const char * | cname2, | ||
double | begin2, | ||
double | end2, | ||
double | stride2, | ||
const char * | cname3, | ||
double | begin3, | ||
double | end3, | ||
double | stride3, | ||
std::vector< uint32_t > & | counts | ||
) | const [virtual] |
Compute a three-dimensional histogram on the named columns.
The triplets <begin, end, stride> are used the same ways in getHistogram
and getHistogram2D
. The three dimensional bins are linearized in counts
with the first being the slowest varying dimension and the third being the fastest varying dimension following the C convention for ordering multi-dimensional arrays.
Implements ibis::table.
virtual table* ibis::mensa::groupby | ( | const stringList & | ) | const [inline, virtual] |
Directly performing group-by on the base data (without selection) is not currently supported.
Implements ibis::table.
virtual table* ibis::mensa::groupby | ( | const char * | ) | const [inline, virtual] |
Directly performing group-by on the base data (without selection) is not currently supported.
Reimplemented from ibis::table.
void ibis::mensa::indexSpec | ( | const char * | opt, |
const char * | colname | ||
) | [virtual] |
Replace the current indexing option.
If no column name is specified, it resets the indexing option for the table.
Implements ibis::table.
References ibis::column::indexSpec().
const char * ibis::mensa::indexSpec | ( | const char * | colname | ) | const [virtual] |
Retrieve the current indexing option.
If no column name is specified, it retrieve the indexing option for the table.
Implements ibis::table.
References ibis::column::indexSpec().
uint32_t ibis::mensa::nColumns | ( | ) | const [virtual] |
Number of columns.
It actually returns the number of columns of the first data partition. This is consistent with other functions such as columnTypes and columnNames.
Implements ibis::table.
void ibis::mensa::orderby | ( | const stringList & | names | ) | [virtual] |
Reordering the rows using the specified columns.
Each data partition is reordered separately.
Implements ibis::table.
References ibis::gVerbose.
void ibis::mensa::orderby | ( | const stringList & | names, |
const std::vector< bool > & | asc | ||
) | [virtual] |
Reordering the rows using the specified columns.
Each data partition is reordered separately.
Implements ibis::table.
References ibis::gVerbose.
Referenced by orderby().
virtual void ibis::mensa::reverseRows | ( | ) | [inline, virtual] |
Reversing the ordering of the rows on disk requires too much work but has no obvious benefit.
Implements ibis::table.
ibis::table * ibis::mensa::select | ( | const char * | sel, |
const char * | cond | ||
) | const [virtual] |
Given a set of column names and a set of selection conditions, compute another table that represents the selected values.
Implements ibis::table.
References ibis::table::select().
ibis::table * ibis::mensa::select2 | ( | const char * | sel, |
const char * | cond, | ||
const char * | pts | ||
) | const [virtual] |
A variation of the function select defined in ibis::table.
It accepts an extra argument for caller to specify a list of names of data partitions that will participate in the select operation. The argument pts may contain wild characters accepted by SQL function 'LIKE', more specifically, '_' and '%'. If the argument pts is a nil pointer or an empty string
References ibis::table::computeHits(), ibis::gVerbose, ibis::table::select(), and ibis::util::strMatch().
![]() |