mensa.h
Go to the documentation of this file.
00001 // File: $Id$
00002 // Author: John Wu <John.Wu at acm.org>
00003 //      Lawrence Berkeley National Laboratory
00004 // Copyright 2007-2012 the Regents of the University of California
00005 #ifndef IBIS_MENSA_H
00006 #define IBIS_MENSA_H
00007 #include "table.h"      // ibis::table
00008 #include "array_t.h"    // ibis::array_t
00009 
00017 namespace ibis {
00018     class mensa;
00019     class liga;
00020 } // namespace ibis
00021 
00029 class ibis::mensa : public ibis::table {
00030 public:
00031     mensa() : nrows(0) {};
00032     explicit mensa(const char* dir);
00033     mensa(const char* dir1, const char* dir2);
00034     virtual ~mensa() {clear();}
00035 
00036     virtual uint64_t nRows() const {return nrows;}
00037     virtual uint32_t nColumns() const;
00038 
00039     virtual typeList columnTypes() const;
00040     virtual stringList columnNames() const;
00041     virtual int addPartition(const char* dir);
00042 
00043     virtual void describe(std::ostream&) const;
00044     virtual void dumpNames(std::ostream&, const char*) const;
00045     virtual int dump(std::ostream&, const char*) const;
00046     virtual int dump(std::ostream&, uint64_t, const char*) const;
00047     virtual int dump(std::ostream&, uint64_t, uint64_t, const char*) const;
00048     virtual int backup(const char* dir, const char* tname=0,
00049                        const char* tdesc=0) const;
00050 
00051     virtual int64_t
00052     getColumnAsBytes(const char*, char*, uint64_t =0, uint64_t =0) const;
00053     virtual int64_t
00054     getColumnAsUBytes(const char*, unsigned char*,
00055                       uint64_t =0, uint64_t =0) const;
00056     virtual int64_t
00057     getColumnAsShorts(const char*, int16_t*, uint64_t =0, uint64_t =0) const;
00058     virtual int64_t
00059     getColumnAsUShorts(const char*, uint16_t*, uint64_t =0, uint64_t =0) const;
00060     virtual int64_t
00061     getColumnAsInts(const char*, int32_t*, uint64_t =0, uint64_t =0) const;
00062     virtual int64_t
00063     getColumnAsUInts(const char*, uint32_t*, uint64_t =0, uint64_t =0) const;
00064     virtual int64_t
00065     getColumnAsLongs(const char*, int64_t*, uint64_t =0, uint64_t =0) const;
00066     virtual int64_t
00067     getColumnAsULongs(const char*, uint64_t*, uint64_t =0, uint64_t =0) const;
00068     virtual int64_t
00069     getColumnAsFloats(const char*, float*, uint64_t =0, uint64_t =0) const;
00070     virtual int64_t
00071     getColumnAsDoubles(const char*, double*, uint64_t =0, uint64_t =0) const;
00072     virtual int64_t
00073     getColumnAsDoubles(const char*, std::vector<double>&,
00074                        uint64_t =0, uint64_t =0) const;
00075     virtual int64_t
00076     getColumnAsStrings(const char*, std::vector<std::string>&,
00077                        uint64_t =0, uint64_t =0) const;
00078     virtual double getColumnMin(const char*) const;
00079     virtual double getColumnMax(const char*) const;
00080 
00081     virtual long getHistogram(const char*, const char*,
00082                               double, double, double,
00083                               std::vector<uint32_t>&) const;
00084     virtual long getHistogram2D(const char*, const char*,
00085                                 double, double, double,
00086                                 const char*,
00087                                 double, double, double,
00088                                 std::vector<uint32_t>&) const;
00089     virtual long getHistogram3D(const char*, const char*,
00090                                 double, double, double,
00091                                 const char*,
00092                                 double, double, double,
00093                                 const char*,
00094                                 double, double, double,
00095                                 std::vector<uint32_t>&) const;
00096 
00097     virtual void estimate(const char* cond,
00098                           uint64_t& nmin, uint64_t& nmax) const;
00099     virtual void estimate(const ibis::qExpr* cond,
00100                           uint64_t& nmin, uint64_t& nmax) const;
00101     using table::select;
00102     virtual table* select(const char* sel, const char* cond) const;
00103     virtual table* select2(const char* sel, const char* cond,
00104                            const char* pts) const;
00105 
00106     virtual void orderby(const stringList&, const std::vector<bool>&);
00107     virtual void orderby(const stringList&);
00108     virtual void orderby(const char *str) {ibis::table::orderby(str);}
00111     virtual void reverseRows() {};
00114     virtual table* groupby(const stringList&) const {return 0;}
00117     virtual table* groupby(const char *) const {return 0;}
00118 
00119     virtual int buildIndex(const char*, const char*);
00120     virtual int buildIndexes(const char*);
00121     virtual const char* indexSpec(const char*) const;
00122     virtual void indexSpec(const char*, const char*);
00123     virtual int getPartitions(ibis::constPartList &) const;
00124     virtual int combineCategories(const ibis::table::stringList&);
00125 
00126     // Cursor class for row-wise data accesses.
00127     class cursor;
00129     virtual ibis::table::cursor* createCursor() const;
00130 
00131 protected:
00133     ibis::partList parts;
00135     ibis::table::namesTypes naty;
00136     uint64_t nrows;
00137 
00139     void clear();
00141     int64_t computeHits(const char* cond) const {
00142         return ibis::table::computeHits
00143             (reinterpret_cast<const ibis::constPartList&>(parts),
00144              cond);}
00145 
00146 private:
00147     // disallow copying.
00148     mensa(const mensa&);
00149     mensa& operator=(const mensa&);
00150 
00151     friend class cursor;
00152 }; // ibis::mensa
00153 
00154 class ibis::mensa::cursor : public ibis::table::cursor {
00155 public:
00156     cursor(const ibis::mensa& t);
00157     virtual ~cursor() {clearBuffers();};
00158 
00159     virtual uint64_t nRows() const {return tab.nRows();}
00160     virtual uint32_t nColumns() const {return tab.nColumns();}
00161     virtual ibis::table::stringList columnNames() const {
00162         return tab.columnNames();}
00163     virtual ibis::table::typeList columnTypes() const {
00164         return tab.columnTypes();}
00165     virtual int fetch();
00166     virtual int fetch(uint64_t);
00167     virtual int fetch(ibis::table::row&);
00168     virtual int fetch(uint64_t, ibis::table::row&);
00169     virtual uint64_t getCurrentRowNumber() const {return curRow;}
00170     virtual int dump(std::ostream& out, const char* del) const;
00171 
00172     int dumpBlock(std::ostream& out, const char* del);
00173     int dumpSome(std::ostream& out, uint64_t nr, const char* del);
00174 
00175     virtual int getColumnAsByte(const char*, char&) const;
00176     virtual int getColumnAsUByte(const char*, unsigned char&) const;
00177     virtual int getColumnAsShort(const char*, int16_t&) const;
00178     virtual int getColumnAsUShort(const char*, uint16_t&) const;
00179     virtual int getColumnAsInt(const char*, int32_t&) const;
00180     virtual int getColumnAsUInt(const char*, uint32_t&) const;
00181     virtual int getColumnAsLong(const char*, int64_t&) const;
00182     virtual int getColumnAsULong(const char*, uint64_t&) const;
00183     virtual int getColumnAsFloat(const char*, float&) const;
00184     virtual int getColumnAsDouble(const char*, double&) const;
00185     virtual int getColumnAsString(const char*, std::string&) const;
00186 
00187     virtual int getColumnAsByte(uint32_t, char&) const;
00188     virtual int getColumnAsUByte(uint32_t, unsigned char&) const;
00189     virtual int getColumnAsShort(uint32_t, int16_t&) const;
00190     virtual int getColumnAsUShort(uint32_t, uint16_t&) const;
00191     virtual int getColumnAsInt(uint32_t, int32_t&) const;
00192     virtual int getColumnAsUInt(uint32_t, uint32_t&) const;
00193     virtual int getColumnAsLong(uint32_t, int64_t&) const;
00194     virtual int getColumnAsULong(uint32_t, uint64_t&) const;
00195     virtual int getColumnAsFloat(uint32_t, float&) const;
00196     virtual int getColumnAsDouble(uint32_t, double&) const;
00197     virtual int getColumnAsString(uint32_t, std::string&) const;
00198 
00199 protected:
00205     struct bufferElement {
00206         const char* cname; 
00207         ibis::TYPE_T ctype; 
00208         mutable void* cval; 
00209 
00210         bufferElement() : cname(0), ctype(ibis::UNKNOWN_TYPE), cval(0) {}
00211         ~bufferElement();
00212     }; // bufferElement
00213     typedef std::map<const char*, uint32_t, ibis::lessi> bufferMap;
00214     std::vector<bufferElement> buffer;
00215     bufferMap bufmap;
00216     const ibis::mensa& tab;
00217     unsigned curPart;
00218     unsigned preferred_block_size;
00219     uint64_t pBegin; 
00220     uint64_t bBegin; 
00221     uint64_t bEnd;   
00222     int64_t  curRow; 
00223 
00224     void clearBuffers();
00225     int  fillBuffers() const;
00226     int  fillBuffer(uint32_t) const;
00227     void fillRow(ibis::table::row& res) const;
00228     int  dumpIJ(std::ostream&, uint32_t, uint32_t) const;
00229 
00230 private:
00231     cursor();
00232     cursor(const cursor&);
00233     cursor& operator=(const cursor&);
00234 }; // ibis::mensa::cursor
00235 
00244 class ibis::liga : public ibis::mensa {
00245 public:
00246     liga(ibis::part&);
00247     liga(const ibis::partList&);
00248     ~liga();
00249 
00252     virtual int addPartition(const char*) {return -1;}
00253 
00254 private:
00255     liga();
00256     liga(const liga&);
00257     liga& operator=(const liga&);
00258 }; // ibis::liga
00259 
00260 inline int
00261 ibis::mensa::cursor::getColumnAsByte(const char* cn, char& val) const {
00262     if (curRow < 0 || curPart >= tab.parts.size() || cn == 0 || *cn == 0)
00263         return -1;
00264     bufferMap::const_iterator it = bufmap.find(cn);
00265     if (it != bufmap.end())
00266         return getColumnAsByte((*it).second, val);
00267     else
00268         return -2;
00269 } // ibis::mensa::cursor::getColumnAsByte
00270 
00271 inline int
00272 ibis::mensa::cursor::getColumnAsUByte(const char* cn,
00273                                       unsigned char& val) const {
00274     if (curRow < 0 || curPart >= tab.parts.size() || cn == 0 || *cn == 0)
00275         return -1;
00276     bufferMap::const_iterator it = bufmap.find(cn);
00277     if (it != bufmap.end())
00278         return getColumnAsUByte((*it).second, val);
00279     else
00280         return -2;
00281 } // ibis::mensa::cursor::getColumnAsUByte
00282 
00283 inline int
00284 ibis::mensa::cursor::getColumnAsShort(const char* cn, int16_t& val) const {
00285     if (curRow < 0 || curPart >= tab.parts.size() || cn == 0 || *cn == 0)
00286         return -1;
00287     bufferMap::const_iterator it = bufmap.find(cn);
00288     if (it != bufmap.end())
00289         return getColumnAsShort((*it).second, val);
00290     else
00291         return -2;
00292 } // ibis::mensa::cursor::getColumnAsShort
00293 
00294 inline int
00295 ibis::mensa::cursor::getColumnAsUShort(const char* cn, uint16_t& val) const {
00296     if (curRow < 0 || curPart >= tab.parts.size() || cn == 0 || *cn == 0)
00297         return -1;
00298     bufferMap::const_iterator it = bufmap.find(cn);
00299     if (it != bufmap.end())
00300         return getColumnAsUShort((*it).second, val);
00301     else
00302         return -2;
00303 } // ibis::mensa::cursor::getColumnAsUShort
00304 
00305 inline int
00306 ibis::mensa::cursor::getColumnAsInt(const char* cn, int32_t& val) const {
00307     if (curRow < 0 || curPart >= tab.parts.size() || cn == 0 || *cn == 0)
00308         return -1;
00309     bufferMap::const_iterator it = bufmap.find(cn);
00310     if (it != bufmap.end())
00311         return getColumnAsInt((*it).second, val);
00312     else
00313         return -2;
00314 } // ibis::mensa::cursor::getColumnAsInt
00315 
00316 inline int
00317 ibis::mensa::cursor::getColumnAsUInt(const char* cn, uint32_t& val) const {
00318     if (curRow < 0 || curPart >= tab.parts.size() || cn == 0 || *cn == 0)
00319         return -1;
00320     bufferMap::const_iterator it = bufmap.find(cn);
00321     if (it != bufmap.end())
00322         return getColumnAsUInt((*it).second, val);
00323     else
00324         return -2;
00325 } // ibis::mensa::cursor::getColumnAsUInt
00326 
00327 inline int
00328 ibis::mensa::cursor::getColumnAsLong(const char* cn, int64_t& val) const {
00329     if (curRow < 0 || curPart >= tab.parts.size() || cn == 0 || *cn == 0)
00330         return -1;
00331     bufferMap::const_iterator it = bufmap.find(cn);
00332     if (it != bufmap.end())
00333         return getColumnAsLong((*it).second, val);
00334     else
00335         return -2;
00336 } // ibis::mensa::cursor::getColumnAsLong
00337 
00338 inline int
00339 ibis::mensa::cursor::getColumnAsULong(const char* cn, uint64_t& val) const {
00340     if (curRow < 0 || curPart >= tab.parts.size() || cn == 0 || *cn == 0)
00341         return -1;
00342     bufferMap::const_iterator it = bufmap.find(cn);
00343     if (it != bufmap.end())
00344         return getColumnAsULong((*it).second, val);
00345     else
00346         return -2;
00347 } // ibis::mensa::cursor::getColumnAsULong
00348 
00349 inline int
00350 ibis::mensa::cursor::getColumnAsFloat(const char* cn, float& val) const {
00351     if (curRow < 0 || curPart >= tab.parts.size() || cn == 0 || *cn == 0)
00352         return -1;
00353     bufferMap::const_iterator it = bufmap.find(cn);
00354     if (it != bufmap.end())
00355         return getColumnAsFloat((*it).second, val);
00356     else
00357         return -2;
00358 } // ibis::mensa::cursor::getColumnAsFloat
00359 
00360 inline int
00361 ibis::mensa::cursor::getColumnAsDouble(const char* cn, double& val) const {
00362     if (curRow < 0 || curPart >= tab.parts.size() || cn == 0 || *cn == 0)
00363         return -1;
00364     bufferMap::const_iterator it = bufmap.find(cn);
00365     if (it != bufmap.end())
00366         return getColumnAsDouble((*it).second, val);
00367     else
00368         return -2;
00369 } // ibis::mensa::cursor::getColumnAsDouble
00370 
00371 inline int
00372 ibis::mensa::cursor::getColumnAsString(const char* cn,
00373                                        std::string& val) const {
00374     if (curRow < 0 || curPart >= tab.parts.size() || cn == 0 || *cn == 0)
00375         return -1;
00376     bufferMap::const_iterator it = bufmap.find(cn);
00377     if (it != bufmap.end())
00378         return getColumnAsString((*it).second, val);
00379     else
00380         return -2;
00381 } // ibis::mensa::cursor::getColumnAsString
00382 
00383 inline int
00384 ibis::mensa::dump(std::ostream& out, uint64_t nr, const char* del) const {
00385     if (parts.empty() || nr == 0) return 0;
00386     ibis::mensa::cursor cur(*this);
00387     int ierr = cur.dumpSome(out, nr, del);
00388     return ierr;
00389 } // ibis::mensa::dump
00390 
00391 inline int
00392 ibis::mensa::dump(std::ostream& out, uint64_t off, uint64_t nr,
00393                   const char* del) const {
00394     if (parts.empty() || nr == 0 || off > nrows) return 0;
00395     ibis::mensa::cursor cur(*this);
00396     int ierr = cur.fetch(off);
00397     if (ierr < 0) return ierr;
00398 
00399     ierr = cur.dumpSome(out, nr, del);
00400     return ierr;
00401 } // ibis::mensa::dump
00402 #endif // IBIS_MENSA_H

Make It A Bit Faster
Contact us
Disclaimers
FastBit source code
FastBit mailing list archive