mensa.h
Go to the documentation of this file.
00001 // File: $Id$
00002 // Author: John Wu <John.Wu at acm.org>
00003 //      Lawrence Berkeley National Laboratory
00004 // Copyright 2007-2011 the Regents of the University of California
00005 #ifndef IBIS_MENSA_H
00006 #define IBIS_MENSA_H
00007 #include "table.h"      // ibis::table
00008 #include "array_t.h"    // ibis::array_t
00009 
00017 namespace ibis {
00018     class mensa;
00019     class liga;
00020 } // namespace ibis
00021 
00029 class ibis::mensa : public ibis::table {
00030 public:
00031     mensa() : nrows(0) {};
00032     explicit mensa(const char* dir);
00033     mensa(const char* dir1, const char* dir2);
00034     virtual ~mensa() {clear();}
00035 
00036     virtual uint64_t nRows() const {return nrows;}
00037     virtual uint32_t nColumns() const;
00038 
00039     virtual typeList columnTypes() const;
00040     virtual stringList columnNames() const;
00041     virtual int addPartition(const char* dir);
00042 
00043     virtual void describe(std::ostream&) const;
00044     virtual void dumpNames(std::ostream&, const char*) const;
00045     virtual int dump(std::ostream&, const char*) const;
00046     virtual int dump(std::ostream&, uint64_t, const char*) const;
00047     virtual int dump(std::ostream&, uint64_t, uint64_t, const char*) const;
00048     virtual int backup(const char* dir, const char* tname=0,
00049                        const char* tdesc=0) const;
00050 
00051     virtual int64_t
00052     getColumnAsBytes(const char*, char*, uint64_t =0, uint64_t =0) const;
00053     virtual int64_t
00054     getColumnAsUBytes(const char*, unsigned char*, uint64_t =0, uint64_t =0) const;
00055     virtual int64_t
00056     getColumnAsShorts(const char*, int16_t*, uint64_t =0, uint64_t =0) const;
00057     virtual int64_t
00058     getColumnAsUShorts(const char*, uint16_t*, uint64_t =0, uint64_t =0) const;
00059     virtual int64_t
00060     getColumnAsInts(const char*, int32_t*, uint64_t =0, uint64_t =0) const;
00061     virtual int64_t
00062     getColumnAsUInts(const char*, uint32_t*, uint64_t =0, uint64_t =0) const;
00063     virtual int64_t
00064     getColumnAsLongs(const char*, int64_t*, uint64_t =0, uint64_t =0) const;
00065     virtual int64_t
00066     getColumnAsULongs(const char*, uint64_t*, uint64_t =0, uint64_t =0) const;
00067     virtual int64_t
00068     getColumnAsFloats(const char*, float*, uint64_t =0, uint64_t =0) const;
00069     virtual int64_t
00070     getColumnAsDoubles(const char*, double*, uint64_t =0, uint64_t =0) const;
00071     virtual int64_t
00072     getColumnAsDoubles(const char*, std::vector<double>&,
00073                        uint64_t =0, uint64_t =0) const;
00074     virtual int64_t
00075     getColumnAsStrings(const char*, std::vector<std::string>&,
00076                        uint64_t =0, uint64_t =0) const;
00077     virtual double getColumnMin(const char*) const;
00078     virtual double getColumnMax(const char*) const;
00079 
00080     virtual long getHistogram(const char*, const char*,
00081                               double, double, double,
00082                               std::vector<uint32_t>&) const;
00083     virtual long getHistogram2D(const char*, const char*,
00084                                 double, double, double,
00085                                 const char*,
00086                                 double, double, double,
00087                                 std::vector<uint32_t>&) const;
00088     virtual long getHistogram3D(const char*, const char*,
00089                                 double, double, double,
00090                                 const char*,
00091                                 double, double, double,
00092                                 const char*,
00093                                 double, double, double,
00094                                 std::vector<uint32_t>&) const;
00095 
00096     virtual void estimate(const char* cond,
00097                           uint64_t& nmin, uint64_t& nmax) const;
00098     virtual void estimate(const ibis::qExpr* cond,
00099                           uint64_t& nmin, uint64_t& nmax) const;
00100     using table::select;
00101     virtual table* select(const char* sel, const char* cond) const;
00102     virtual table* select2(const char* sel, const char* cond,
00103                            const char* pts) const;
00104 
00105     virtual void orderby(const stringList&, const std::vector<bool>&);
00106     virtual void orderby(const stringList&);
00107     virtual void orderby(const char *str) {ibis::table::orderby(str);}
00110     virtual void reverseRows() {};
00113     virtual table* groupby(const stringList&) const {return 0;}
00116     virtual table* groupby(const char *) const {return 0;}
00117 
00118     virtual int buildIndex(const char*, const char*);
00119     virtual int buildIndexes(const char*);
00120     virtual const char* indexSpec(const char*) const;
00121     virtual void indexSpec(const char*, const char*);
00122     virtual int getPartitions(std::vector<const ibis::part*> &) const;
00123 
00124     // Cursor class for row-wise data accesses.
00125     class cursor;
00127     virtual ibis::table::cursor* createCursor() const;
00128 
00129 protected:
00131     ibis::partList parts;
00133     ibis::table::namesTypes naty;
00134     uint64_t nrows;
00135 
00137     void clear();
00139     int64_t computeHits(const char* cond) const {
00140         return ibis::table::computeHits
00141             (reinterpret_cast<const std::vector<const ibis::part*>&>(parts),
00142              cond);}
00143 
00144 private:
00145     // disallow copying.
00146     mensa(const mensa&);
00147     mensa& operator=(const mensa&);
00148 
00149     friend class cursor;
00150 }; // ibis::mensa
00151 
00152 class ibis::mensa::cursor : public ibis::table::cursor {
00153 public:
00154     cursor(const ibis::mensa& t);
00155     virtual ~cursor() {clearBuffers();};
00156 
00157     virtual uint64_t nRows() const {return tab.nRows();}
00158     virtual uint32_t nColumns() const {return tab.nColumns();}
00159     virtual ibis::table::stringList columnNames() const {
00160         return tab.columnNames();}
00161     virtual ibis::table::typeList columnTypes() const {
00162         return tab.columnTypes();}
00163     virtual int fetch();
00164     virtual int fetch(uint64_t);
00165     virtual int fetch(ibis::table::row&);
00166     virtual int fetch(uint64_t, ibis::table::row&);
00167     virtual uint64_t getCurrentRowNumber() const {return curRow;}
00168     virtual int dump(std::ostream& out, const char* del) const;
00169 
00170     int dumpBlock(std::ostream& out, const char* del);
00171     int dumpSome(std::ostream& out, uint64_t nr, const char* del);
00172 
00173     virtual int getColumnAsByte(const char*, char&) const;
00174     virtual int getColumnAsUByte(const char*, unsigned char&) const;
00175     virtual int getColumnAsShort(const char*, int16_t&) const;
00176     virtual int getColumnAsUShort(const char*, uint16_t&) const;
00177     virtual int getColumnAsInt(const char*, int32_t&) const;
00178     virtual int getColumnAsUInt(const char*, uint32_t&) const;
00179     virtual int getColumnAsLong(const char*, int64_t&) const;
00180     virtual int getColumnAsULong(const char*, uint64_t&) const;
00181     virtual int getColumnAsFloat(const char*, float&) const;
00182     virtual int getColumnAsDouble(const char*, double&) const;
00183     virtual int getColumnAsString(const char*, std::string&) const;
00184 
00185     virtual int getColumnAsByte(uint32_t, char&) const;
00186     virtual int getColumnAsUByte(uint32_t, unsigned char&) const;
00187     virtual int getColumnAsShort(uint32_t, int16_t&) const;
00188     virtual int getColumnAsUShort(uint32_t, uint16_t&) const;
00189     virtual int getColumnAsInt(uint32_t, int32_t&) const;
00190     virtual int getColumnAsUInt(uint32_t, uint32_t&) const;
00191     virtual int getColumnAsLong(uint32_t, int64_t&) const;
00192     virtual int getColumnAsULong(uint32_t, uint64_t&) const;
00193     virtual int getColumnAsFloat(uint32_t, float&) const;
00194     virtual int getColumnAsDouble(uint32_t, double&) const;
00195     virtual int getColumnAsString(uint32_t, std::string&) const;
00196 
00197 protected:
00203     struct bufferElement {
00204         const char* cname; 
00205         ibis::TYPE_T ctype; 
00206         mutable void* cval; 
00207 
00208         bufferElement() : cname(0), ctype(ibis::UNKNOWN_TYPE), cval(0) {}
00209         ~bufferElement();
00210     }; // bufferElement
00211     typedef std::map<const char*, uint32_t, ibis::lessi> bufferMap;
00212     std::vector<bufferElement> buffer;
00213     bufferMap bufmap;
00214     const ibis::mensa& tab;
00215     unsigned curPart;
00216     unsigned preferred_block_size;
00217     uint64_t pBegin; 
00218     uint64_t bBegin; 
00219     uint64_t bEnd;   
00220     int64_t  curRow; 
00221 
00222     void clearBuffers();
00223     int  fillBuffers() const;
00224     int  fillBuffer(uint32_t) const;
00225     void fillRow(ibis::table::row& res) const;
00226     int  dumpIJ(std::ostream&, uint32_t, uint32_t) const;
00227 
00228 private:
00229     cursor();
00230     cursor(const cursor&);
00231     cursor& operator=(const cursor&);
00232 }; // ibis::mensa::cursor
00233 
00242 class ibis::liga : public ibis::mensa {
00243 public:
00244     liga(ibis::part&);
00245     liga(const ibis::partList&);
00246     ~liga();
00247 
00250     virtual int addPartition(const char*) {return -1;}
00251 
00252 private:
00253     liga();
00254     liga(const liga&);
00255     liga& operator=(const liga&);
00256 }; // ibis::liga
00257 
00258 inline int
00259 ibis::mensa::cursor::getColumnAsByte(const char* cn, char& val) const {
00260     if (curRow < 0 || curPart >= tab.parts.size() || cn == 0 || *cn == 0)
00261         return -1;
00262     bufferMap::const_iterator it = bufmap.find(cn);
00263     if (it != bufmap.end())
00264         return getColumnAsByte((*it).second, val);
00265     else
00266         return -2;
00267 } // ibis::mensa::cursor::getColumnAsByte
00268 
00269 inline int
00270 ibis::mensa::cursor::getColumnAsUByte(const char* cn,
00271                                       unsigned char& val) const {
00272     if (curRow < 0 || curPart >= tab.parts.size() || cn == 0 || *cn == 0)
00273         return -1;
00274     bufferMap::const_iterator it = bufmap.find(cn);
00275     if (it != bufmap.end())
00276         return getColumnAsUByte((*it).second, val);
00277     else
00278         return -2;
00279 } // ibis::mensa::cursor::getColumnAsUByte
00280 
00281 inline int
00282 ibis::mensa::cursor::getColumnAsShort(const char* cn, int16_t& val) const {
00283     if (curRow < 0 || curPart >= tab.parts.size() || cn == 0 || *cn == 0)
00284         return -1;
00285     bufferMap::const_iterator it = bufmap.find(cn);
00286     if (it != bufmap.end())
00287         return getColumnAsShort((*it).second, val);
00288     else
00289         return -2;
00290 } // ibis::mensa::cursor::getColumnAsShort
00291 
00292 inline int
00293 ibis::mensa::cursor::getColumnAsUShort(const char* cn, uint16_t& val) const {
00294     if (curRow < 0 || curPart >= tab.parts.size() || cn == 0 || *cn == 0)
00295         return -1;
00296     bufferMap::const_iterator it = bufmap.find(cn);
00297     if (it != bufmap.end())
00298         return getColumnAsUShort((*it).second, val);
00299     else
00300         return -2;
00301 } // ibis::mensa::cursor::getColumnAsUShort
00302 
00303 inline int
00304 ibis::mensa::cursor::getColumnAsInt(const char* cn, int32_t& val) const {
00305     if (curRow < 0 || curPart >= tab.parts.size() || cn == 0 || *cn == 0)
00306         return -1;
00307     bufferMap::const_iterator it = bufmap.find(cn);
00308     if (it != bufmap.end())
00309         return getColumnAsInt((*it).second, val);
00310     else
00311         return -2;
00312 } // ibis::mensa::cursor::getColumnAsInt
00313 
00314 inline int
00315 ibis::mensa::cursor::getColumnAsUInt(const char* cn, uint32_t& val) const {
00316     if (curRow < 0 || curPart >= tab.parts.size() || cn == 0 || *cn == 0)
00317         return -1;
00318     bufferMap::const_iterator it = bufmap.find(cn);
00319     if (it != bufmap.end())
00320         return getColumnAsUInt((*it).second, val);
00321     else
00322         return -2;
00323 } // ibis::mensa::cursor::getColumnAsUInt
00324 
00325 inline int
00326 ibis::mensa::cursor::getColumnAsLong(const char* cn, int64_t& val) const {
00327     if (curRow < 0 || curPart >= tab.parts.size() || cn == 0 || *cn == 0)
00328         return -1;
00329     bufferMap::const_iterator it = bufmap.find(cn);
00330     if (it != bufmap.end())
00331         return getColumnAsLong((*it).second, val);
00332     else
00333         return -2;
00334 } // ibis::mensa::cursor::getColumnAsLong
00335 
00336 inline int
00337 ibis::mensa::cursor::getColumnAsULong(const char* cn, uint64_t& val) const {
00338     if (curRow < 0 || curPart >= tab.parts.size() || cn == 0 || *cn == 0)
00339         return -1;
00340     bufferMap::const_iterator it = bufmap.find(cn);
00341     if (it != bufmap.end())
00342         return getColumnAsULong((*it).second, val);
00343     else
00344         return -2;
00345 } // ibis::mensa::cursor::getColumnAsULong
00346 
00347 inline int
00348 ibis::mensa::cursor::getColumnAsFloat(const char* cn, float& val) const {
00349     if (curRow < 0 || curPart >= tab.parts.size() || cn == 0 || *cn == 0)
00350         return -1;
00351     bufferMap::const_iterator it = bufmap.find(cn);
00352     if (it != bufmap.end())
00353         return getColumnAsFloat((*it).second, val);
00354     else
00355         return -2;
00356 } // ibis::mensa::cursor::getColumnAsFloat
00357 
00358 inline int
00359 ibis::mensa::cursor::getColumnAsDouble(const char* cn, double& val) const {
00360     if (curRow < 0 || curPart >= tab.parts.size() || cn == 0 || *cn == 0)
00361         return -1;
00362     bufferMap::const_iterator it = bufmap.find(cn);
00363     if (it != bufmap.end())
00364         return getColumnAsDouble((*it).second, val);
00365     else
00366         return -2;
00367 } // ibis::mensa::cursor::getColumnAsDouble
00368 
00369 inline int
00370 ibis::mensa::cursor::getColumnAsString(const char* cn,
00371                                        std::string& val) const {
00372     if (curRow < 0 || curPart >= tab.parts.size() || cn == 0 || *cn == 0)
00373         return -1;
00374     bufferMap::const_iterator it = bufmap.find(cn);
00375     if (it != bufmap.end())
00376         return getColumnAsString((*it).second, val);
00377     else
00378         return -2;
00379 } // ibis::mensa::cursor::getColumnAsString
00380 
00381 inline int
00382 ibis::mensa::dump(std::ostream& out, uint64_t nr, const char* del) const {
00383     if (parts.empty() || nr == 0) return 0;
00384     ibis::mensa::cursor cur(*this);
00385     int ierr = cur.dumpSome(out, nr, del);
00386     return ierr;
00387 } // ibis::mensa::dump
00388 
00389 inline int
00390 ibis::mensa::dump(std::ostream& out, uint64_t off, uint64_t nr,
00391                   const char* del) const {
00392     if (parts.empty() || nr == 0 || off > nrows) return 0;
00393     ibis::mensa::cursor cur(*this);
00394     int ierr = cur.fetch(off);
00395     if (ierr < 0) return ierr;
00396 
00397     ierr = cur.dumpSome(out, nr, del);
00398     return ierr;
00399 } // ibis::mensa::dump
00400 #endif // IBIS_MENSA_H

Make It A Bit Faster
Contact us
Disclaimers
FastBit source code
FastBit mailing list archive