table.h
Go to the documentation of this file.
00001 // File: $Id$
00002 // Author: John Wu <John.Wu at ACM.org>
00003 // Copyright 2007-2011 the Regents of the University of California
00004 #ifndef IBIS_TABLE_H
00005 #define IBIS_TABLE_H
00006 
00016 #include <iostream>     // std::ostream
00017 #include <vector>       // std::vector
00018 #include <map>          // std::map
00019 #include <string>       // std::string
00020 #include "const.h"      // intxx_t, uintxx_t, ... from stdint.h
00021 
00022 namespace ibis {
00023 
00025     enum TYPE_T {
00027         UNKNOWN_TYPE=0,
00029         OID,
00030         BYTE,   
00031         UBYTE,  
00032         SHORT,  
00033         USHORT, 
00034         INT,    
00035         UINT,   
00036         LONG,   
00037         ULONG,  
00038         FLOAT,  
00039         DOUBLE, 
00040 
00041 
00042         CATEGORY,
00046         TEXT,
00051         BLOB
00052     };
00054     FASTBIT_CXX_DLLSPEC extern const char** TYPESTRING;
00056     FASTBIT_CXX_DLLSPEC extern const char* TYPECODE;
00057 
00058     class table;
00059     class tablex;
00060     class tableList;
00061 } // namespace ibis
00062 
00072 class FASTBIT_CXX_DLLSPEC ibis::table {
00073 public:
00077     static ibis::table* create(ibis::part&);
00081     static ibis::table* create(const ibis::partList&);
00085     static ibis::table* create(const char* dir);
00091     static ibis::table* create(const char* dir1, const char* dir2);
00092 
00094     virtual ~table() {};
00095 
00098     virtual const char* name() const {return name_.c_str();}
00100     virtual const char* description() const {return desc_.c_str();}
00102     virtual uint64_t nRows() const =0;
00104     virtual uint32_t nColumns() const =0;
00105 
00109     typedef ibis::array_t<const char*> stringList;
00111     typedef ibis::array_t<ibis::TYPE_T> typeList;
00115     typedef ibis::array_t<void *> bufferList;
00117     typedef std::map<const char*, ibis::TYPE_T, ibis::lessi> namesTypes;
00118 
00119     virtual stringList columnNames() const =0; 
00120     virtual typeList columnTypes() const =0; 
00121 
00123     virtual void describe(std::ostream&) const =0;
00125     virtual void dumpNames(std::ostream& out, const char* del=", ") const =0;
00129     virtual int dump(std::ostream& out, const char* del=", ") const =0;
00131     virtual int dump(std::ostream& out, uint64_t nr,
00132                      const char* del=", ") const =0;
00135     virtual int dump(std::ostream& out, uint64_t offset, uint64_t nr,
00136                      const char* del=", ") const =0;
00140     virtual int backup(const char* dir, const char* tname=0,
00141                        const char* tdesc=0) const =0;
00142 
00145     virtual void estimate(const char* cond,
00146                           uint64_t& nmin, uint64_t& nmax) const =0;
00149     virtual void estimate(const ibis::qExpr* cond,
00150                           uint64_t& nmin, uint64_t& nmax) const =0;
00153     virtual table* select(const char* sel, const char* cond) const =0;
00156     virtual table* select(const char* sel, const ibis::qExpr* cond) const;
00157 
00159     static table* select(const std::vector<const ibis::part*>& parts,
00160                          const char* sel, const char* cond);
00162     static table* select(const std::vector<const ibis::part*>& parts,
00163                          const char* sel, const ibis::qExpr* cond);
00165     static int64_t computeHits(const std::vector<const ibis::part*>& parts,
00166                                const char* cond);
00168     static int64_t computeHits(const std::vector<const ibis::part*>& parts,
00169                                const ibis::qExpr* cond);
00170 
00177     virtual table* groupby(const stringList&) const =0;
00180     virtual table* groupby(const char*) const;
00188     virtual void orderby(const stringList&)=0;
00189     virtual void orderby(const stringList&, const std::vector<bool>&)=0;
00191     virtual void orderby(const char*);
00193     virtual void reverseRows()=0;
00194 
00207     virtual int addPartition(const char*) {return -1;}
00209     virtual int getPartitions(std::vector<const ibis::part*>&) const {
00210         return -1;}
00211 
00226     virtual int buildIndex(const char* colname, const char* option=0) =0;
00231     virtual int buildIndexes(const char* options=0) =0;
00234     virtual const char* indexSpec(const char* colname=0) const =0;
00237     virtual void indexSpec(const char* opt, const char* colname=0) =0;
00239 
00257     virtual int64_t
00258         getColumnAsBytes(const char* cname, char* vals,
00259                          uint64_t begin=0, uint64_t end=0) const =0;
00260     virtual int64_t
00261         getColumnAsUBytes(const char* cname, unsigned char* vals,
00262                           uint64_t begin=0, uint64_t end=0) const =0;
00263     virtual int64_t
00264         getColumnAsShorts(const char* cname, int16_t* vals,
00265                           uint64_t begin=0, uint64_t end=0) const =0;
00266     virtual int64_t
00267         getColumnAsUShorts(const char* cname, uint16_t* vals,
00268                            uint64_t begin=0, uint64_t end=0) const =0;
00269     virtual int64_t
00270         getColumnAsInts(const char* cname, int32_t* vals,
00271                         uint64_t begin=0, uint64_t end=0) const =0;
00272     virtual int64_t
00273         getColumnAsUInts(const char* cname, uint32_t* vals,
00274                          uint64_t begin=0, uint64_t end=0) const =0;
00275     virtual int64_t
00276         getColumnAsLongs(const char* cname, int64_t* vals,
00277                          uint64_t begin=0, uint64_t end=0) const =0;
00278     virtual int64_t
00279         getColumnAsULongs(const char* cname, uint64_t* vals,
00280                           uint64_t begin=0, uint64_t end=0) const =0;
00281     virtual int64_t
00282         getColumnAsFloats(const char* cname, float* vals,
00283                           uint64_t begin=0, uint64_t end=0) const =0;
00284     virtual int64_t
00285         getColumnAsDoubles(const char* cname, double* vals,
00286                            uint64_t begin=0, uint64_t end=0) const =0;
00287     virtual int64_t
00288         getColumnAsDoubles(const char* cname, std::vector<double>& vals,
00289                            uint64_t begin=0, uint64_t end=0) const =0;
00293     virtual int64_t
00294         getColumnAsStrings(const char* cname, std::vector<std::string>& vals,
00295                            uint64_t begin=0, uint64_t end=0) const =0;
00296 
00302     virtual double getColumnMin(const char* cname) const =0;
00308     virtual double getColumnMax(const char* cname) const =0;
00310 
00324     virtual long getHistogram(const char* constraints,
00325                               const char* cname,
00326                               double begin, double end, double stride,
00327                               std::vector<uint32_t>& counts) const =0;
00334     virtual long getHistogram2D(const char* constraints,
00335                                 const char* cname1,
00336                                 double begin1, double end1, double stride1,
00337                                 const char* cname2,
00338                                 double begin2, double end2, double stride2,
00339                                 std::vector<uint32_t>& counts) const =0;
00346     virtual long getHistogram3D(const char* constraints,
00347                                 const char* cname1,
00348                                 double begin1, double end1, double stride1,
00349                                 const char* cname2,
00350                                 double begin2, double end2, double stride2,
00351                                 const char* cname3,
00352                                 double begin3, double end3, double stride3,
00353                                 std::vector<uint32_t>& counts) const =0;
00355 
00357     struct row {
00358         std::vector<std::string>   bytesnames; 
00359         std::vector<signed char>   bytesvalues;
00360         std::vector<std::string>   ubytesnames; 
00361         std::vector<unsigned char> ubytesvalues;
00362         std::vector<std::string>   shortsnames; 
00363         std::vector<int16_t>       shortsvalues;
00364         std::vector<std::string>   ushortsnames; 
00365         std::vector<uint16_t>      ushortsvalues;
00366         std::vector<std::string>   intsnames; 
00367         std::vector<int32_t>       intsvalues;
00368         std::vector<std::string>   uintsnames; 
00369         std::vector<uint32_t>      uintsvalues;
00370         std::vector<std::string>   longsnames; 
00371         std::vector<int64_t>       longsvalues;
00372         std::vector<std::string>   ulongsnames; 
00373         std::vector<uint64_t>      ulongsvalues;
00374         std::vector<std::string>   floatsnames; 
00375         std::vector<float>         floatsvalues;
00376         std::vector<std::string>   doublesnames; 
00377         std::vector<double>        doublesvalues;
00378         std::vector<std::string>   catsnames; 
00379         std::vector<std::string>   catsvalues;
00380         std::vector<std::string>   textsnames; 
00381         std::vector<std::string>   textsvalues;
00382         std::vector<std::string>   blobsnames; 
00383         std::vector<std::string>   blobsvalues;
00384 
00386         void clear();
00388         void clearValues();
00390         uint32_t nColumns() const {
00391             return bytesvalues.size() + ubytesvalues.size() +
00392                 shortsvalues.size() + ushortsvalues.size() +
00393                 intsvalues.size() + uintsvalues.size() +
00394                 longsvalues.size() + ulongsvalues.size() +
00395                 floatsvalues.size() + doublesvalues.size() +
00396                 catsvalues.size() + textsvalues.size() + blobsvalues.size();}
00397     }; // struct row
00398 
00399     // Cursor class for row-wise data accesses.
00400     class cursor;
00402     virtual cursor* createCursor() const =0;
00403 
00404     static void parseNames(char* in, stringList& out);
00405     static void parseNames(char* in, stringList& out, std::vector<bool>& direc);
00406 
00407     static void* allocateBuffer(ibis::TYPE_T, size_t);
00408     static void freeBuffer(void* buffer, ibis::TYPE_T type);
00409     static void freeBuffers(bufferList&, typeList&);
00410 
00411 protected:
00412 
00413     std::string name_;  
00414     std::string desc_;  
00415 
00417     table() {};
00419     table(const char* na, const char* de)
00420         : name_(na?na:""), desc_(de?de:na?na:"") {};
00421 
00422 private:
00423     // re-enforce the prohibitions on copying and assignment.
00424     table(const table&);
00425     table& operator=(const table&);
00426 }; // class ibis::table
00427 
00440 class FASTBIT_CXX_DLLSPEC ibis::tablex {
00441 public:
00443     static ibis::tablex* create();
00444 //     /// Make the incoming table expandable.  Not yet implemented
00445 //     static ibis::tablex* makeExtensible(ibis::table* t);
00446 
00447     virtual ~tablex() {}; // nothing to do.
00448 
00450     virtual int addColumn(const char* cname, ibis::TYPE_T ctype,
00451                           const char* cdesc=0, const char* idx=0) =0;
00452 
00473     virtual int append(const char* cname, uint64_t begin, uint64_t end,
00474                        void* values) =0;
00475 
00505     virtual int appendRow(const ibis::table::row&) =0;
00511     virtual int appendRow(const char* line, const char* delimiters=0) = 0;
00519     virtual int appendRows(const std::vector<ibis::table::row>&) =0;
00520 
00543     virtual int readCSV(const char* inputfile, int maxrows=0,
00544                         const char* outputdir=0, const char* delimiters=0) =0;
00558     virtual int readSQLDump(const char* inputfile, std::string& tname,
00559                             int maxrows=0, const char* outputdir=0) =0;
00560 
00562     virtual int readNamesAndTypes(const char* filename);
00564     virtual int parseNamesAndTypes(const char* txt);
00565 
00603     virtual int write(const char* dir, const char* tname=0,
00604                       const char* tdesc=0, const char* idx=0,
00605                       const char* nvpairs=0) const =0;
00619     virtual int writeMetaData(const char* dir, const char* tname=0,
00620                               const char* tdesc=0, const char* idx=0,
00621                               const char* nvpairs=0) const =0;
00622 
00626     virtual void clearData() =0;
00638     virtual int32_t reserveSpace(uint32_t) {return 0;}
00648     virtual uint32_t capacity() const {return 0;}
00649 
00651     virtual uint32_t mRows() const =0;
00653     virtual uint32_t mColumns() const =0;
00655     virtual void describe(std::ostream&) const =0;
00656 
00662     virtual table* toTable(const char* nm=0, const char* de=0) =0;
00663 
00664 protected:
00665     tablex() {}; // Derived classes need this.
00666 
00667 private:
00668     tablex(const tablex&); // no copying
00669     tablex& operator=(const tablex&); // no assignment
00670 }; // class ibis::tablex
00671 
00675 class FASTBIT_CXX_DLLSPEC ibis::tableList {
00676 public:
00677     typedef std::map< const char*, ibis::table*, ibis::lessi > tableSet;
00678     typedef tableSet::const_iterator iterator;
00679 
00682     bool empty() const {return tables.empty();}
00684     uint32_t size() const {return tables.size();}
00686     iterator begin() const {return tables.begin();}
00690     iterator end() const {return tables.end();}
00691 
00694     const ibis::table* operator[](const char* tname) const {
00695         tableSet::const_iterator it = tables.find(tname);
00696         if (it != tables.end())
00697             return (*it).second;
00698         else
00699             return 0;
00700     }
00701 
00707     void add(ibis::table*& tb) {
00708         tableSet::iterator it = tables.find(tb->name());
00709         if (it == tables.end()) {
00710             tables[tb->name()] = tb;
00711             tb=0;
00712         }
00713         else {
00714             ibis::table* tmp = (*it).second;
00715             tables[tb->name()] = tb;
00716             tb = tmp;
00717         }
00718     }
00719 
00723     void remove(const char* tname) {
00724         tableSet::iterator it = tables.find(tname);
00725         if (it != tables.end()) {
00726             ibis::table* tmp = (*it).second;
00727             tables.erase(it);
00728             delete tmp;
00729         }
00730     }
00731 
00733     tableList() {};
00734 
00736     ~tableList() {
00737         while (! tables.empty()) {
00738             tableSet::iterator it = tables.begin();
00739             ibis::table* tmp = (*it).second;
00740             tables.erase(it);
00741             delete tmp;
00742         }
00743     }
00744 
00745 private:
00747     tableSet tables;
00748 
00749     // Can not copy or assign.
00750     tableList(const tableList&);
00751     tableList& operator=(const tableList&);
00752 }; // ibis::tableList
00753 
00759 class FASTBIT_CXX_DLLSPEC ibis::table::cursor {
00760 public:
00761     virtual ~cursor() {};
00762     virtual uint64_t nRows() const =0;
00763     virtual uint32_t nColumns() const =0;
00764     virtual ibis::table::typeList columnTypes() const =0;
00765     virtual ibis::table::stringList columnNames() const =0;
00768     virtual int fetch() =0;
00772     virtual int fetch(uint64_t rownum) =0;
00777     virtual uint64_t getCurrentRowNumber() const =0;
00778 
00781     virtual int fetch(ibis::table::row&) =0;
00784     virtual int fetch(uint64_t rownum, ibis::table::row&) =0;
00785 
00787     virtual int dump(std::ostream& out, const char* del=", ") const =0;
00788 
00792     virtual int getColumnAsByte(const char* cname, char&) const =0;
00793     virtual int getColumnAsUByte(const char* cname, unsigned char&) const =0;
00794     virtual int getColumnAsShort(const char* cname, int16_t&) const =0;
00795     virtual int getColumnAsUShort(const char* cname, uint16_t&) const =0;
00796     virtual int getColumnAsInt(const char* cname, int32_t&) const =0;
00797     virtual int getColumnAsUInt(const char* cname, uint32_t&) const =0;
00798     virtual int getColumnAsLong(const char* cname, int64_t&) const =0;
00799     virtual int getColumnAsULong(const char* cname, uint64_t&) const =0;
00800     virtual int getColumnAsFloat(const char* cname, float&) const =0;
00801     virtual int getColumnAsDouble(const char* cname, double&) const =0;
00802     virtual int getColumnAsString(const char* cname, std::string&) const =0;
00803 
00809     virtual int getColumnAsByte(uint32_t cnum, char& val) const =0;
00810     virtual int getColumnAsUByte(uint32_t cnum, unsigned char& val) const =0;
00811     virtual int getColumnAsShort(uint32_t cnum, int16_t& val) const =0;
00812     virtual int getColumnAsUShort(uint32_t cnum, uint16_t& val) const =0;
00813     virtual int getColumnAsInt(uint32_t cnum, int32_t& val) const =0;
00814     virtual int getColumnAsUInt(uint32_t cnum, uint32_t& val) const =0;
00815     virtual int getColumnAsLong(uint32_t cnum, int64_t& val) const =0;
00816     virtual int getColumnAsULong(uint32_t cnum, uint64_t& val) const =0;
00817     virtual int getColumnAsFloat(uint32_t cnum, float& val) const =0;
00818     virtual int getColumnAsDouble(uint32_t cnum, double& val) const =0;
00819     virtual int getColumnAsString(uint32_t cnum, std::string& val) const =0;
00820 
00821 protected:
00822     cursor() {};
00823     cursor(const cursor&); // not implemented
00824     cursor& operator=(const cursor&) ; // not implemented
00825 }; // ibis::table::cursor
00826 
00827 inline void ibis::table::row::clear() {
00828     bytesnames.clear();
00829     bytesvalues.clear();
00830     ubytesnames.clear();
00831     ubytesvalues.clear();
00832     shortsnames.clear();
00833     shortsvalues.clear();
00834     ushortsnames.clear();
00835     ushortsvalues.clear();
00836     intsnames.clear();
00837     intsvalues.clear();
00838     uintsnames.clear();
00839     uintsvalues.clear();
00840     longsnames.clear();
00841     longsvalues.clear();
00842     ulongsnames.clear();
00843     ulongsvalues.clear();
00844     floatsnames.clear();
00845     floatsvalues.clear();
00846     doublesnames.clear();
00847     doublesvalues.clear();
00848     catsnames.clear();
00849     catsvalues.clear();
00850     textsnames.clear();
00851     textsvalues.clear();
00852     blobsnames.clear();
00853     blobsvalues.clear();
00854 } // ibis::table::row::clear
00855 
00856 inline void ibis::table::row::clearValues() {
00857     bytesvalues.clear();
00858     ubytesvalues.clear();
00859     shortsvalues.clear();
00860     ushortsvalues.clear();
00861     intsvalues.clear();
00862     uintsvalues.clear();
00863     longsvalues.clear();
00864     ulongsvalues.clear();
00865     floatsvalues.clear();
00866     doublesvalues.clear();
00867     catsvalues.clear();
00868     textsvalues.clear();
00869     blobsvalues.clear();
00870 } // ibis::table::row::clearValues
00871 #endif // IBIS_TABLE_H

Make It A Bit Faster
Contact us
Disclaimers
FastBit source code
FastBit mailing list archive