An expandable table. More...
#include <tafel.h>
Classes | |
struct | column |
In-memory version of a column. More... | |
Public Types | |
typedef std::map< const char *, column *, ibis::lessi > | columnList |
Public Member Functions | |
virtual int | addColumn (const char *cname, ibis::TYPE_T ctype, const char *cdesc, const char *idx) |
Add metadata about a new column. | |
virtual int | append (const char *cname, uint64_t begin, uint64_t end, void *values) |
Copy the incoming values to rows [begin:end) of column cn. | |
virtual int | appendRow (const ibis::table::row &) |
Add one row. | |
virtual int | appendRow (const char *, const char *) |
Append a row stored in ASCII form. | |
virtual int | appendRows (const std::vector< ibis::table::row > &) |
Add multiple rows. | |
virtual uint32_t | capacity () const |
Capacity of the memory cache. | |
virtual void | clearData () |
Remove all data recorded. | |
virtual void | describe (std::ostream &) const |
Print a description of the table to the specified output stream. | |
const columnList & | getColumns () const |
The list of columns stored in memory. | |
virtual uint32_t | mColumns () const |
The number of columns in this table. | |
virtual uint32_t | mRows () const |
The maximum number of rows in any column. | |
virtual int | readCSV (const char *filename, int maxrows, const char *outputdir, const char *delimiters) |
Read the content of the named file as comma-separated values. | |
virtual int | readSQLDump (const char *filename, std::string &tname, int maxrows, const char *outputdir) |
Read a SQL dump from database systems such as MySQL. | |
virtual int32_t | reserveSpace (uint32_t) |
Attempt to reserve enough memory for maxr rows to be stored in memory. | |
virtual int | SQLCreateTable (const char *stmt, std::string &) |
Ingest a complete SQL CREATE TABLE statement. | |
virtual table * | toTable (const char *nm=0, const char *de=0) |
Stop expanding the current set of data records. | |
virtual int | write (const char *dir, const char *tname=0, const char *tdesc=0, const char *idx=0, const char *nvpairs=0) const |
Write the data values and update the metadata file. | |
virtual int | writeMetaData (const char *dir, const char *tname=0, const char *tdesc=0, const char *idx=0, const char *nvpairs=0) const |
Write the metadata file if no metadata file already exists in the given directory. | |
Protected Member Functions | |
template<typename T > | |
void | append (const T *in, ibis::bitvector::word_t be, ibis::bitvector::word_t en, array_t< T > &out, const T &fill, ibis::bitvector &mask) const |
Add values to an array of type T. | |
template<typename T > | |
void | append (const std::vector< std::string > &nm, const std::vector< T > &va, std::vector< array_t< T > * > &buf, std::vector< ibis::bitvector * > &msk) |
Append one row to columns of a particular type. | |
void | appendRaw (const ibis::array_t< unsigned char > *in, ibis::bitvector::word_t be, ibis::bitvector::word_t en, std::vector< std::string > &out, ibis::bitvector &mask) const |
void | appendString (const std::vector< std::string > &nm, const std::vector< std::string > &va, std::vector< std::vector< std::string > * > &buf, std::vector< ibis::bitvector * > &msk) |
Append one row to string-valued columns. | |
void | appendString (const std::vector< std::string > *in, ibis::bitvector::word_t be, ibis::bitvector::word_t en, std::vector< std::string > &out, ibis::bitvector &mask) const |
Copy the incoming strings to out[be:en-1]. | |
int | assignDefaultValue (ibis::tafel::column &col, const char *val) const |
Assign the default value for the given column. | |
void | clear () |
Clear all content. Removes both data and metadata. | |
int32_t | doReserve (uint32_t) |
Reserve space for maxr records in memory. | |
template<typename T > | |
void | locate (ibis::TYPE_T, std::vector< array_t< T > * > &buf, std::vector< ibis::bitvector * > &msk) const |
Locate the buffers and masks associated with a data type. | |
void | locateString (ibis::TYPE_T t, std::vector< std::vector< std::string > * > &buf, std::vector< ibis::bitvector * > &msk) const |
Locate the buffers and masks associated with a string-valued data type. | |
void | normalize () |
Make all short columns catch up with the longest one. | |
int | parseLine (const char *str, const char *del, const char *id) |
Digest a line of text and place the values identified into the corresponding columns. | |
uint32_t | preferredSize () const |
Compute the number of rows that are likely to fit in available memory. | |
int | readSQLStatement (std::istream &, ibis::fileManager::buffer< char > &, ibis::fileManager::buffer< char > &) const |
Read one complete SQL statment from an SQL dump file. | |
Protected Attributes | |
std::vector< column * > | colorder |
Order of columns as they were specified through addColumn . | |
columnList | cols |
List of columns in alphabetical order. | |
ibis::bitvector::word_t | mrows |
Number of rows of this table. |
An expandable table.
It inherents from ibis::tablex only, therefore does not support any querying functions. It stores all its content in memory, therefore it can only handle relatively small number of rows.
To perform queries on the underlying data, convert this object into a table object. Call function write to make the in memory data persistent.
int ibis::tafel::addColumn | ( | const char * | cn, |
ibis::TYPE_T | ct, | ||
const char * | cd, | ||
const char * | idx | ||
) | [virtual] |
Add metadata about a new column.
Return value
Implements ibis::tablex.
References ibis::BLOB, ibis::BYTE, ibis::CATEGORY, colorder, cols, ibis::tafel::column::desc, ibis::DOUBLE, ibis::FLOAT, ibis::gVerbose, ibis::tafel::column::indexSpec, ibis::INT, ibis::LONG, ibis::tafel::column::name, ibis::SHORT, ibis::TEXT, ibis::tafel::column::type, ibis::UBYTE, ibis::UINT, ibis::ULONG, ibis::UNKNOWN_TYPE, ibis::USHORT, and ibis::tafel::column::values.
Referenced by ibis::tablex::parseNamesAndTypes().
void ibis::tafel::append | ( | const T * | in, |
ibis::bitvector::word_t | be, | ||
ibis::bitvector::word_t | en, | ||
array_t< T > & | out, | ||
const T & | fill, | ||
ibis::bitvector & | mask | ||
) | const [protected] |
Add values to an array of type T.
The input values (in) are copied to out[be:en-1]. If the array out has less then be elements to start with, it will be filled with value fill. The output mask indicates whether the values in array out are valid. This version works with one column as at a time.
References ibis::bitvector::adjustSize(), ibis::bitvector::appendFill(), ibis::util::copy(), ibis::gVerbose, ibis::array_t< T >::resize(), and ibis::array_t< T >::size().
void ibis::tafel::append | ( | const std::vector< std::string > & | nm, |
const std::vector< T > & | va, | ||
std::vector< array_t< T > * > & | buf, | ||
std::vector< ibis::bitvector * > & | msk | ||
) | [protected] |
Append one row to columns of a particular type.
This version with multiple columns but only one row.
References ibis::array_t< T >::push_back().
int ibis::tafel::appendRow | ( | const char * | line, |
const char * | delimiters | ||
) | [virtual] |
Append a row stored in ASCII form.
The ASCII form of the values are assumed to be separated by comma (,) or space, but additional delimiters may be added through the second argument.
Return the number of values added to the new row.
Implements ibis::tablex.
References ibis::gVerbose.
int ibis::tafel::appendRow | ( | const ibis::table::row & | ) | [virtual] |
Add one row.
If an array of names has the same number of elements as the array of values, the names are used as column names. If the names are not specified explicitly, the values are assigned to the columns of the same data type in the order as they are specified through addColumn
or if the same order as they are recreated from an existing dataset (which is typically alphabetical).
Return the number of values added to the new row.
append
, this function can not be used to introduce new columns in a table. A new column must be added with addColumn
.Implements ibis::tablex.
References ibis::BYTE, ibis::table::row::bytesnames, ibis::CATEGORY, ibis::table::row::catsnames, ibis::DOUBLE, ibis::table::row::doublesnames, ibis::FLOAT, ibis::table::row::floatsnames, ibis::INT, ibis::table::row::intsnames, ibis::LONG, ibis::table::row::longsnames, ibis::table::row::nColumns(), ibis::SHORT, ibis::table::row::shortsnames, ibis::TEXT, ibis::table::row::textsnames, ibis::UBYTE, ibis::table::row::ubytesnames, ibis::UINT, ibis::table::row::uintsnames, ibis::ULONG, ibis::table::row::ulongsnames, ibis::USHORT, and ibis::table::row::ushortsnames.
int ibis::tafel::appendRows | ( | const std::vector< ibis::table::row > & | ) | [virtual] |
Add multiple rows.
Rows in the incoming vector are processed on after another. The ordering of the values in earlier rows are automatically carried over to the later rows until another set of names is specified.
Return the number of new rows added.
Implements ibis::tablex.
References ibis::BYTE, ibis::CATEGORY, ibis::DOUBLE, ibis::FLOAT, ibis::INT, ibis::LONG, ibis::SHORT, ibis::TEXT, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
void ibis::tafel::appendString | ( | const std::vector< std::string > * | in, |
ibis::bitvector::word_t | be, | ||
ibis::bitvector::word_t | en, | ||
std::vector< std::string > & | out, | ||
ibis::bitvector & | mask | ||
) | const [protected] |
Copy the incoming strings to out[be:en-1].
Work with one column at a time.
References ibis::bitvector::adjustSize(), ibis::bitvector::appendFill(), ibis::util::copy(), and ibis::gVerbose.
void ibis::tafel::appendString | ( | const std::vector< std::string > & | nm, |
const std::vector< std::string > & | va, | ||
std::vector< std::vector< std::string > * > & | buf, | ||
std::vector< ibis::bitvector * > & | msk | ||
) | [protected] |
Append one row to string-valued columns.
This version with multiple columns but only one row.
int ibis::tafel::assignDefaultValue | ( | ibis::tafel::column & | col, |
const char * | val | ||
) | const [protected] |
Assign the default value for the given column.
Returns 0 on success and a negative number for error.
References ibis::BLOB, ibis::BYTE, ibis::CATEGORY, ibis::tafel::column::defval, ibis::DOUBLE, ibis::FLOAT, ibis::gVerbose, ibis::INT, ibis::LONG, ibis::tafel::column::name, ibis::util::readString(), ibis::SHORT, ibis::TEXT, ibis::tafel::column::type, ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
uint32_t ibis::tafel::capacity | ( | ) | const [virtual] |
Capacity of the memory cache.
Report the maximum number of rows can be stored with this object before more memory will be allocated. A return value of zero (0) may also indicate that it does not know about its capacity.
Reimplemented from ibis::tablex.
References ibis::BYTE, ibis::CATEGORY, ibis::bitvector::clear(), ibis::DOUBLE, ibis::FLOAT, ibis::INT, ibis::LONG, ibis::tafel::column::mask, ibis::SHORT, ibis::TEXT, ibis::tafel::column::type, ibis::UBYTE, ibis::UINT, ibis::ULONG, ibis::USHORT, and ibis::tafel::column::values.
void ibis::tafel::clearData | ( | ) | [virtual] |
Remove all data recorded.
Keeps the information about columns. It is intended to prepare for new rows after invoking the function write.
Implements ibis::tablex.
References ibis::BLOB, ibis::BYTE, ibis::CATEGORY, ibis::array_t< T >::clear(), ibis::bitvector::clear(), ibis::DOUBLE, ibis::FLOAT, ibis::INT, ibis::LONG, ibis::tafel::column::mask, ibis::SHORT, ibis::tafel::column::starts, ibis::TEXT, ibis::tafel::column::type, ibis::UBYTE, ibis::UINT, ibis::ULONG, ibis::USHORT, and ibis::tafel::column::values.
int32_t ibis::tafel::doReserve | ( | uint32_t | maxr | ) | [protected] |
Reserve space for maxr records in memory.
This function does not perform error checking. The public version of it reserveSpace does.
References ibis::BLOB, ibis::BYTE, ibis::array_t< T >::capacity(), ibis::CATEGORY, ibis::bitvector::clear(), ibis::DOUBLE, ibis::FLOAT, ibis::gVerbose, ibis::INT, ibis::LONG, ibis::tafel::column::mask, ibis::array_t< T >::reserve(), ibis::array_t< T >::resize(), ibis::SHORT, ibis::tafel::column::starts, ibis::TEXT, ibis::tafel::column::type, ibis::UBYTE, ibis::UINT, ibis::ULONG, ibis::USHORT, and ibis::tafel::column::values.
int ibis::tafel::parseLine | ( | const char * | str, |
const char * | del, | ||
const char * | id | ||
) | [protected] |
Digest a line of text and place the values identified into the corresponding columns.
The actual values are extracted by ibis::util::readInt, ibis::util::readUInt, ibis::util::readDouble and ibis::util::readString. When any of these functions returns an error condition, this function assumes the value to be recorded is a NULL. The presence of a NULL value is marked by a 0-bit in the mask associated with the column. The actual value in the associated buffer is the largest integer value for an integer column and a quiet NaN for floating-point valued column.
References ibis::BLOB, ibis::BYTE, ibis::CATEGORY, ibis::DOUBLE, ibis::FLOAT, ibis::gVerbose, ibis::INT, ibis::LONG, ibis::tafel::column::mask, ibis::tafel::column::name, ibis::array_t< T >::push_back(), ibis::util::readDouble(), ibis::util::readInt(), ibis::util::readString(), ibis::util::readUInt(), ibis::array_t< T >::resize(), ibis::SHORT, ibis::tafel::column::starts, ibis::TEXT, ibis::tafel::column::type, ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, ibis::USHORT, and ibis::tafel::column::values.
uint32_t ibis::tafel::preferredSize | ( | ) | const [protected] |
Compute the number of rows that are likely to fit in available memory.
It only count string valued column to cost 16 bytes for each row. This can be a significant underestimate of the actual cost. Memory fragmentation may also significantly reduce the available space.
References ibis::BYTE, ibis::fileManager::bytesFree(), ibis::util::coarsen(), ibis::DOUBLE, ibis::FLOAT, ibis::INT, ibis::LONG, ibis::SHORT, ibis::tafel::column::type, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
int ibis::tafel::readCSV | ( | const char * | inputfile, |
int | maxrows, | ||
const char * | outputdir, | ||
const char * | delimiters | ||
) | [virtual] |
Read the content of the named file as comma-separated values.
Append the records to this table. If the argument maxrows is greater than 0, this function will reserve space to read this many records. If the total number of records is more than maxrows and the output directory name is specified, then the records will be written the outputdir and the memory is made available for later records. If outputdir is not specified, this function attempts to expand the memory allocated, which may run out of memory. Furthermore, repeated allocations can be time-consuming.
By default the records are delimited by comma (,) and blank space. One may specify alternative delimiters using the last argument.
Upon successful completion of this funciton, the return value is the number of rows processed. However, not all of them may remain in memory because ealier rows may have been written to disk.
Implements ibis::tablex.
References ibis::fileManager::buffer< T >::address(), ibis::util::coarsen(), ibis::horometer::CPUTime(), ibis::gVerbose, ibis::horometer::realTime(), ibis::fileManager::buffer< T >::resize(), ibis::fileManager::buffer< T >::size(), ibis::horometer::start(), and ibis::horometer::stop().
int ibis::tafel::readSQLDump | ( | const char * | inputfile, |
std::string & | tname, | ||
int | maxrows, | ||
const char * | outputdir | ||
) | [virtual] |
Read a SQL dump from database systems such as MySQL.
The entire file will be read into memory in one shot unless both maxrows and outputdir are specified. In cases where both maxrows and outputdir are specified, this function reads a maximum of maxrows before write the data to outputdir under the name tname, which leaves no more than maxrows number of rows in memory. The value returned from this function is the number of rows processed including those written to disk. Use function mRows to determine how many are still in memory.
If the SQL dump file contains statement to create table, then the existing metadata is overwritten. Otherwise, it reads insert statements and convert the ASCII data into binary format in memory.
Implements ibis::tablex.
References ibis::fileManager::buffer< T >::address(), ibis::util::coarsen(), ibis::horometer::CPUTime(), ibis::util::delimiters, ibis::gVerbose, ibis::util::readString(), ibis::horometer::realTime(), ibis::horometer::start(), and ibis::horometer::stop().
int ibis::tafel::readSQLStatement | ( | std::istream & | sqlfile, |
ibis::fileManager::buffer< char > & | stmt, | ||
ibis::fileManager::buffer< char > & | line | ||
) | const [protected] |
Read one complete SQL statment from an SQL dump file.
It will read one line at a time until a semicolon ';' is found. It will expand the buffers as needed. The return value is either the number of bytes in the SQL statement or an eror code (less than 0).
References ibis::fileManager::buffer< T >::address(), ibis::gVerbose, ibis::fileManager::buffer< T >::resize(), and ibis::fileManager::buffer< T >::size().
int32_t ibis::tafel::reserveSpace | ( | uint32_t | maxr | ) | [virtual] |
Attempt to reserve enough memory for maxr rows to be stored in memory.
This function will not reserve space for more than 1 billion rows. If maxr is less than mrows, it will simply return mrows. It calls doReserve to performs the actual reservations. If doReserve throws an exception, it will reduce the value of maxr and try again. It will give up after 5 tries and return -1, otherwise, it returns the actual capacity allocated.
Reimplemented from ibis::tablex.
References ibis::BYTE, ibis::fileManager::bytesFree(), ibis::DOUBLE, ibis::FLOAT, ibis::gVerbose, ibis::INT, ibis::LONG, ibis::SHORT, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
int ibis::tafel::SQLCreateTable | ( | const char * | stmt, |
std::string & | tname | ||
) | [virtual] |
Ingest a complete SQL CREATE TABLE statement.
Creates all metadata specified. It extracts the table name (into tname) to be used later by functions such as write and writeMetaData.
The statement is expected to be in the form of "create table tname (column1, column2, ...)". It can not contain embedded comments.
Because the SQL standard supports many more data types than FastBit does, many SQL column types are mapped in a crude manner. Here is the current list.
References ibis::BLOB, ibis::BYTE, ibis::CATEGORY, ibis::tafel::column::defval, ibis::DOUBLE, ibis::FLOAT, ibis::gVerbose, ibis::INT, ibis::LONG, ibis::tafel::column::name, ibis::util::readString(), ibis::SHORT, ibis::TEXT, ibis::tafel::column::type, ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, ibis::USHORT, and ibis::tafel::column::values.
ibis::table * ibis::tafel::toTable | ( | const char * | nm = 0 , |
const char * | de = 0 |
||
) | [virtual] |
Stop expanding the current set of data records.
Convert a tablex object into a table object, so that they can participate in queries. The data records held by the tablex object is transfered to the table object, however, the metadata remains with this object.
Implements ibis::tablex.
References ibis::gVerbose, ibis::tafel::column::name, ibis::array_t< T >::resize(), ibis::column::setNullMask(), ibis::tafel::column::type, ibis::UNKNOWN_TYPE, and ibis::tafel::column::values.
int ibis::tafel::write | ( | const char * | dir, |
const char * | tname = 0 , |
||
const char * | tdesc = 0 , |
||
const char * | idx = 0 , |
||
const char * | nvpairs = 0 |
||
) | const [virtual] |
Write the data values and update the metadata file.
Return error code:
Implements ibis::tablex.
References ibis::BLOB, ibis::BYTE, ibis::CATEGORY, ibis::util::checksum(), ibis::horometer::CPUTime(), ibis::tafel::column::defval, ibis::part::description(), ibis::DOUBLE, ibis::part::emptyCache(), ibis::FLOAT, ibis::fileManager::flushDir(), ibis::part::getColumn(), ibis::gParameters(), ibis::gVerbose, ibis::tafel::column::indexSpec, ibis::part::indexSpec(), ibis::fileManager::instance(), ibis::INT, ibis::LONG, ibis::tafel::column::mask, ibis::part::metaTags(), ibis::part::name(), ibis::part::nColumns(), ibis::part::nRows(), ibis::horometer::realTime(), ibis::bitvector::set(), ibis::SHORT, ibis::horometer::start(), ibis::tafel::column::starts, ibis::horometer::stop(), ibis::TEXT, ibis::column::type(), ibis::tafel::column::type, ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, UnixOpen, ibis::USHORT, ibis::tafel::column::values, ibis::part::writeColumn(), ibis::part::writeRaw(), and ibis::part::writeString().
int ibis::tafel::writeMetaData | ( | const char * | dir, |
const char * | tname = 0 , |
||
const char * | tdesc = 0 , |
||
const char * | idx = 0 , |
||
const char * | nvpairs = 0 |
||
) | const [virtual] |
Write the metadata file if no metadata file already exists in the given directory.
Return error code:
Implements ibis::tablex.
References ibis::util::checksum(), ibis::horometer::CPUTime(), ibis::tafel::column::desc, ibis::fileManager::flushDir(), ibis::util::getFileSize(), ibis::gParameters(), ibis::gVerbose, ibis::tafel::column::indexSpec, ibis::fileManager::instance(), ibis::tafel::column::name, ibis::horometer::realTime(), ibis::horometer::start(), ibis::horometer::stop(), ibis::TEXT, ibis::tafel::column::type, and ibis::TYPESTRING.
![]() |