table.h
Go to the documentation of this file.
00001 // File: $Id$
00002 // Author: John Wu <John.Wu at ACM.org>
00003 // Copyright 2007-2012 the Regents of the University of California
00004 #ifndef IBIS_TABLE_H
00005 #define IBIS_TABLE_H
00006 
00016 #include <iostream>     // std::ostream
00017 #include <vector>       // std::vector
00018 #include <map>          // std::map
00019 #include <string>       // std::string
00020 #include "const.h"      // intxx_t, uintxx_t, ... from stdint.h
00021 
00022 namespace ibis {
00023 
00025     enum TYPE_T {
00027         UNKNOWN_TYPE=0,
00029         OID,
00030         BYTE,   
00031         UBYTE,  
00032         SHORT,  
00033         USHORT, 
00034         INT,    
00035         UINT,   
00036         LONG,   
00037         ULONG,  
00038         FLOAT,  
00039         DOUBLE, 
00040 
00041 
00042         CATEGORY,
00046         TEXT,
00051         BLOB,
00053         UDT
00054     };
00056     FASTBIT_CXX_DLLSPEC extern const char** TYPESTRING;
00058     FASTBIT_CXX_DLLSPEC extern const char* TYPECODE;
00059 
00060     class table;
00061     class tablex;
00062     class tableList;
00063 } // namespace ibis
00064 
00074 class FASTBIT_CXX_DLLSPEC ibis::table {
00075 public:
00079     static ibis::table* create(ibis::part&);
00083     static ibis::table* create(const ibis::partList&);
00087     static ibis::table* create(const char* dir);
00093     static ibis::table* create(const char* dir1, const char* dir2);
00094 
00096     virtual ~table() {};
00097 
00100     virtual const char* name() const {return name_.c_str();}
00102     virtual const char* description() const {return desc_.c_str();}
00104     virtual uint64_t nRows() const =0;
00106     virtual uint32_t nColumns() const =0;
00107 
00111     typedef ibis::array_t<const char*> stringList;
00113     typedef ibis::array_t<ibis::TYPE_T> typeList;
00117     typedef ibis::array_t<void *> bufferList;
00119     typedef std::map<const char*, ibis::TYPE_T, ibis::lessi> namesTypes;
00120 
00121     virtual stringList columnNames() const =0; 
00122     virtual typeList columnTypes() const =0; 
00123 
00125     virtual void describe(std::ostream&) const =0;
00127     virtual void dumpNames(std::ostream& out, const char* del=", ") const =0;
00131     virtual int dump(std::ostream& out, const char* del=", ") const =0;
00133     virtual int dump(std::ostream& out, uint64_t nr,
00134                      const char* del=", ") const =0;
00137     virtual int dump(std::ostream& out, uint64_t offset, uint64_t nr,
00138                      const char* del=", ") const =0;
00142     virtual int backup(const char* dir, const char* tname=0,
00143                        const char* tdesc=0) const =0;
00144 
00147     virtual void estimate(const char* cond,
00148                           uint64_t& nmin, uint64_t& nmax) const =0;
00151     virtual void estimate(const ibis::qExpr* cond,
00152                           uint64_t& nmin, uint64_t& nmax) const =0;
00155     virtual table* select(const char* sel, const char* cond) const =0;
00158     virtual table* select(const char* sel, const ibis::qExpr* cond) const;
00159 
00161     static table* select(const ibis::constPartList& parts,
00162                          const char* sel, const char* cond);
00164     static table* select(const ibis::constPartList& parts,
00165                          const char* sel, const ibis::qExpr* cond);
00167     static int64_t computeHits(const ibis::constPartList& parts,
00168                                const char* cond);
00170     static int64_t computeHits(const ibis::constPartList& parts,
00171                                const ibis::qExpr* cond);
00172 
00179     virtual table* groupby(const stringList&) const =0;
00182     virtual table* groupby(const char*) const;
00190     virtual void orderby(const stringList&)=0;
00191     virtual void orderby(const stringList&, const std::vector<bool>&)=0;
00193     virtual void orderby(const char*);
00195     virtual void reverseRows()=0;
00196 
00209     virtual int addPartition(const char*) {return -1;}
00211     virtual int getPartitions(ibis::constPartList&) const {
00212         return -1;}
00213 
00228     virtual int buildIndex(const char* colname, const char* option=0) =0;
00233     virtual int buildIndexes(const char* options=0) =0;
00236     virtual const char* indexSpec(const char* colname=0) const =0;
00239     virtual void indexSpec(const char* opt, const char* colname=0) =0;
00251     virtual int combineCategories(const stringList&) {return 0;}
00253 
00271     virtual int64_t
00272         getColumnAsBytes(const char* cname, char* vals,
00273                          uint64_t begin=0, uint64_t end=0) const =0;
00274     virtual int64_t
00275         getColumnAsUBytes(const char* cname, unsigned char* vals,
00276                           uint64_t begin=0, uint64_t end=0) const =0;
00277     virtual int64_t
00278         getColumnAsShorts(const char* cname, int16_t* vals,
00279                           uint64_t begin=0, uint64_t end=0) const =0;
00280     virtual int64_t
00281         getColumnAsUShorts(const char* cname, uint16_t* vals,
00282                            uint64_t begin=0, uint64_t end=0) const =0;
00283     virtual int64_t
00284         getColumnAsInts(const char* cname, int32_t* vals,
00285                         uint64_t begin=0, uint64_t end=0) const =0;
00286     virtual int64_t
00287         getColumnAsUInts(const char* cname, uint32_t* vals,
00288                          uint64_t begin=0, uint64_t end=0) const =0;
00289     virtual int64_t
00290         getColumnAsLongs(const char* cname, int64_t* vals,
00291                          uint64_t begin=0, uint64_t end=0) const =0;
00292     virtual int64_t
00293         getColumnAsULongs(const char* cname, uint64_t* vals,
00294                           uint64_t begin=0, uint64_t end=0) const =0;
00295     virtual int64_t
00296         getColumnAsFloats(const char* cname, float* vals,
00297                           uint64_t begin=0, uint64_t end=0) const =0;
00298     virtual int64_t
00299         getColumnAsDoubles(const char* cname, double* vals,
00300                            uint64_t begin=0, uint64_t end=0) const =0;
00301     virtual int64_t
00302         getColumnAsDoubles(const char* cname, std::vector<double>& vals,
00303                            uint64_t begin=0, uint64_t end=0) const =0;
00307     virtual int64_t
00308         getColumnAsStrings(const char* cname, std::vector<std::string>& vals,
00309                            uint64_t begin=0, uint64_t end=0) const =0;
00310 
00316     virtual double getColumnMin(const char* cname) const =0;
00322     virtual double getColumnMax(const char* cname) const =0;
00324 
00338     virtual long getHistogram(const char* constraints,
00339                               const char* cname,
00340                               double begin, double end, double stride,
00341                               std::vector<uint32_t>& counts) const =0;
00348     virtual long getHistogram2D(const char* constraints,
00349                                 const char* cname1,
00350                                 double begin1, double end1, double stride1,
00351                                 const char* cname2,
00352                                 double begin2, double end2, double stride2,
00353                                 std::vector<uint32_t>& counts) const =0;
00360     virtual long getHistogram3D(const char* constraints,
00361                                 const char* cname1,
00362                                 double begin1, double end1, double stride1,
00363                                 const char* cname2,
00364                                 double begin2, double end2, double stride2,
00365                                 const char* cname3,
00366                                 double begin3, double end3, double stride3,
00367                                 std::vector<uint32_t>& counts) const =0;
00369 
00371     struct row {
00372         std::vector<std::string>   bytesnames; 
00373         std::vector<signed char>   bytesvalues;
00374         std::vector<std::string>   ubytesnames; 
00375         std::vector<unsigned char> ubytesvalues;
00376         std::vector<std::string>   shortsnames; 
00377         std::vector<int16_t>       shortsvalues;
00378         std::vector<std::string>   ushortsnames; 
00379         std::vector<uint16_t>      ushortsvalues;
00380         std::vector<std::string>   intsnames; 
00381         std::vector<int32_t>       intsvalues;
00382         std::vector<std::string>   uintsnames; 
00383         std::vector<uint32_t>      uintsvalues;
00384         std::vector<std::string>   longsnames; 
00385         std::vector<int64_t>       longsvalues;
00386         std::vector<std::string>   ulongsnames; 
00387         std::vector<uint64_t>      ulongsvalues;
00388         std::vector<std::string>   floatsnames; 
00389         std::vector<float>         floatsvalues;
00390         std::vector<std::string>   doublesnames; 
00391         std::vector<double>        doublesvalues;
00392         std::vector<std::string>   catsnames; 
00393         std::vector<std::string>   catsvalues;
00394         std::vector<std::string>   textsnames; 
00395         std::vector<std::string>   textsvalues;
00396         std::vector<std::string>   blobsnames; 
00397         std::vector<std::string>   blobsvalues;
00398 
00400         void clear();
00402         void clearValues();
00404         uint32_t nColumns() const {
00405             return bytesvalues.size() + ubytesvalues.size() +
00406                 shortsvalues.size() + ushortsvalues.size() +
00407                 intsvalues.size() + uintsvalues.size() +
00408                 longsvalues.size() + ulongsvalues.size() +
00409                 floatsvalues.size() + doublesvalues.size() +
00410                 catsvalues.size() + textsvalues.size() + blobsvalues.size();}
00411     }; // struct row
00412 
00413     // Cursor class for row-wise data accesses.
00414     class cursor;
00416     virtual cursor* createCursor() const =0;
00417 
00418     static void parseNames(char* in, stringList& out);
00419     static void parseOrderby(char* in, stringList& out,
00420                              std::vector<bool>& direc);
00421 
00422     static void* allocateBuffer(ibis::TYPE_T, size_t);
00423     static void freeBuffer(void* buffer, ibis::TYPE_T type);
00424     static void freeBuffers(bufferList&, typeList&);
00425 
00426 protected:
00427 
00428     std::string name_;  
00429     std::string desc_;  
00430 
00432     table() {};
00434     table(const char* na, const char* de)
00435         : name_(na?na:""), desc_(de?de:na?na:"") {};
00436 
00437 private:
00438     // re-enforce the prohibitions on copying and assignment.
00439     table(const table&);
00440     table& operator=(const table&);
00441 }; // class ibis::table
00442 
00455 class FASTBIT_CXX_DLLSPEC ibis::tablex {
00456 public:
00458     static ibis::tablex* create();
00459 //     /// Make the incoming table expandable.  Not yet implemented
00460 //     static ibis::tablex* makeExtensible(ibis::table* t);
00461 
00462     virtual ~tablex() {}; // nothing to do.
00463 
00465     virtual int addColumn(const char* cname, ibis::TYPE_T ctype,
00466                           const char* cdesc=0, const char* idx=0) =0;
00467 
00488     virtual int append(const char* cname, uint64_t begin, uint64_t end,
00489                        void* values) =0;
00490 
00520     virtual int appendRow(const ibis::table::row&) =0;
00526     virtual int appendRow(const char* line, const char* delimiters=0) = 0;
00534     virtual int appendRows(const std::vector<ibis::table::row>&) =0;
00535 
00558     virtual int readCSV(const char* inputfile, int maxrows=0,
00559                         const char* outputdir=0, const char* delimiters=0) =0;
00573     virtual int readSQLDump(const char* inputfile, std::string& tname,
00574                             int maxrows=0, const char* outputdir=0) =0;
00575 
00577     virtual int readNamesAndTypes(const char* filename);
00579     virtual int parseNamesAndTypes(const char* txt);
00580 
00618     virtual int write(const char* dir, const char* tname=0,
00619                       const char* tdesc=0, const char* idx=0,
00620                       const char* nvpairs=0) const =0;
00634     virtual int writeMetaData(const char* dir, const char* tname=0,
00635                               const char* tdesc=0, const char* idx=0,
00636                               const char* nvpairs=0) const =0;
00637 
00641     virtual void clearData() =0;
00653     virtual int32_t reserveSpace(uint32_t) {return 0;}
00663     virtual uint32_t capacity() const {return 0;}
00664 
00666     virtual uint32_t mRows() const =0;
00668     virtual uint32_t mColumns() const =0;
00670     virtual void describe(std::ostream&) const =0;
00671 
00677     virtual table* toTable(const char* nm=0, const char* de=0) =0;
00678 
00679 protected:
00680     tablex() {}; // Derived classes need this.
00681 
00682 private:
00683     tablex(const tablex&); // no copying
00684     tablex& operator=(const tablex&); // no assignment
00685 }; // class ibis::tablex
00686 
00690 class FASTBIT_CXX_DLLSPEC ibis::tableList {
00691 public:
00692     typedef std::map< const char*, ibis::table*, ibis::lessi > tableSet;
00693     typedef tableSet::const_iterator iterator;
00694 
00697     bool empty() const {return tables.empty();}
00699     uint32_t size() const {return tables.size();}
00701     iterator begin() const {return tables.begin();}
00705     iterator end() const {return tables.end();}
00706 
00709     const ibis::table* operator[](const char* tname) const {
00710         tableSet::const_iterator it = tables.find(tname);
00711         if (it != tables.end())
00712             return (*it).second;
00713         else
00714             return 0;
00715     }
00716 
00722     void add(ibis::table*& tb) {
00723         tableSet::iterator it = tables.find(tb->name());
00724         if (it == tables.end()) {
00725             tables[tb->name()] = tb;
00726             tb=0;
00727         }
00728         else {
00729             ibis::table* tmp = (*it).second;
00730             tables[tb->name()] = tb;
00731             tb = tmp;
00732         }
00733     }
00734 
00738     void remove(const char* tname) {
00739         tableSet::iterator it = tables.find(tname);
00740         if (it != tables.end()) {
00741             ibis::table* tmp = (*it).second;
00742             tables.erase(it);
00743             delete tmp;
00744         }
00745     }
00746 
00748     tableList() {};
00749 
00751     ~tableList() {
00752         while (! tables.empty()) {
00753             tableSet::iterator it = tables.begin();
00754             ibis::table* tmp = (*it).second;
00755             tables.erase(it);
00756             delete tmp;
00757         }
00758     }
00759 
00760 private:
00762     tableSet tables;
00763 
00764     // Can not copy or assign.
00765     tableList(const tableList&);
00766     tableList& operator=(const tableList&);
00767 }; // ibis::tableList
00768 
00774 class FASTBIT_CXX_DLLSPEC ibis::table::cursor {
00775 public:
00776     virtual ~cursor() {};
00777     virtual uint64_t nRows() const =0;
00778     virtual uint32_t nColumns() const =0;
00779     virtual ibis::table::typeList columnTypes() const =0;
00780     virtual ibis::table::stringList columnNames() const =0;
00783     virtual int fetch() =0;
00787     virtual int fetch(uint64_t rownum) =0;
00792     virtual uint64_t getCurrentRowNumber() const =0;
00793 
00796     virtual int fetch(ibis::table::row&) =0;
00799     virtual int fetch(uint64_t rownum, ibis::table::row&) =0;
00800 
00802     virtual int dump(std::ostream& out, const char* del=", ") const =0;
00803 
00807     virtual int getColumnAsByte(const char* cname, char&) const =0;
00808     virtual int getColumnAsUByte(const char* cname, unsigned char&) const =0;
00809     virtual int getColumnAsShort(const char* cname, int16_t&) const =0;
00810     virtual int getColumnAsUShort(const char* cname, uint16_t&) const =0;
00811     virtual int getColumnAsInt(const char* cname, int32_t&) const =0;
00812     virtual int getColumnAsUInt(const char* cname, uint32_t&) const =0;
00813     virtual int getColumnAsLong(const char* cname, int64_t&) const =0;
00814     virtual int getColumnAsULong(const char* cname, uint64_t&) const =0;
00815     virtual int getColumnAsFloat(const char* cname, float&) const =0;
00816     virtual int getColumnAsDouble(const char* cname, double&) const =0;
00817     virtual int getColumnAsString(const char* cname, std::string&) const =0;
00818 
00824     virtual int getColumnAsByte(uint32_t cnum, char& val) const =0;
00825     virtual int getColumnAsUByte(uint32_t cnum, unsigned char& val) const =0;
00826     virtual int getColumnAsShort(uint32_t cnum, int16_t& val) const =0;
00827     virtual int getColumnAsUShort(uint32_t cnum, uint16_t& val) const =0;
00828     virtual int getColumnAsInt(uint32_t cnum, int32_t& val) const =0;
00829     virtual int getColumnAsUInt(uint32_t cnum, uint32_t& val) const =0;
00830     virtual int getColumnAsLong(uint32_t cnum, int64_t& val) const =0;
00831     virtual int getColumnAsULong(uint32_t cnum, uint64_t& val) const =0;
00832     virtual int getColumnAsFloat(uint32_t cnum, float& val) const =0;
00833     virtual int getColumnAsDouble(uint32_t cnum, double& val) const =0;
00834     virtual int getColumnAsString(uint32_t cnum, std::string& val) const =0;
00835 
00836 protected:
00837     cursor() {};
00838     cursor(const cursor&); // not implemented
00839     cursor& operator=(const cursor&) ; // not implemented
00840 }; // ibis::table::cursor
00841 
00842 inline void ibis::table::row::clear() {
00843     bytesnames.clear();
00844     bytesvalues.clear();
00845     ubytesnames.clear();
00846     ubytesvalues.clear();
00847     shortsnames.clear();
00848     shortsvalues.clear();
00849     ushortsnames.clear();
00850     ushortsvalues.clear();
00851     intsnames.clear();
00852     intsvalues.clear();
00853     uintsnames.clear();
00854     uintsvalues.clear();
00855     longsnames.clear();
00856     longsvalues.clear();
00857     ulongsnames.clear();
00858     ulongsvalues.clear();
00859     floatsnames.clear();
00860     floatsvalues.clear();
00861     doublesnames.clear();
00862     doublesvalues.clear();
00863     catsnames.clear();
00864     catsvalues.clear();
00865     textsnames.clear();
00866     textsvalues.clear();
00867     blobsnames.clear();
00868     blobsvalues.clear();
00869 } // ibis::table::row::clear
00870 
00871 inline void ibis::table::row::clearValues() {
00872     bytesvalues.clear();
00873     ubytesvalues.clear();
00874     shortsvalues.clear();
00875     ushortsvalues.clear();
00876     intsvalues.clear();
00877     uintsvalues.clear();
00878     longsvalues.clear();
00879     ulongsvalues.clear();
00880     floatsvalues.clear();
00881     doublesvalues.clear();
00882     catsvalues.clear();
00883     textsvalues.clear();
00884     blobsvalues.clear();
00885 } // ibis::table::row::clearValues
00886 #endif // IBIS_TABLE_H

Make It A Bit Faster
Contact us
Disclaimers
FastBit source code
FastBit mailing list archive