column.h
Go to the documentation of this file.
00001 //File: $Id$
00002 // Author: John Wu <John.Wu at ACM.org>
00003 // Copyright 2000-2011 the Regents of the University of California
00004 #ifndef IBIS_COLUMN_H
00005 #define IBIS_COLUMN_H
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 #include "table.h"      // ibis::TYPE_T
00014 #include "qExpr.h"      // ibis::qContinuousRange
00015 #include "bitvector.h"
00016 #include <string>
00017 
00018 namespace ibis { // additional names to the namespace ibis
00019     // derived classes of ibis::column, implemented in category.cpp
00020     class category;   // for categorical values (low-cardinality text fields)
00021     class text;       // arbitrary cardinality text fields
00022     class blob;       // text may contain null characters
00023 
00024     // the following are used for storing selected values of different types
00025     // of columns (implemented in colValues.cpp)
00026     class colBytes;
00027     class colUBytes;
00028     class colShorts;
00029     class colUShorts;
00030     class colInts;
00031     class colUInts;
00032     class colLongs;
00033     class colULongs;
00034     class colFloats;
00035     class colDoubles;
00036     class colStrings;
00037 } // namespace
00038 
00049 class FASTBIT_CXX_DLLSPEC ibis::column {
00050 public:
00051 
00052     virtual ~column();
00054     column(const part* tbl, FILE* file);
00056     column(const part* tbl, ibis::TYPE_T t, const char* name,
00057            const char* desc="", double low=DBL_MAX, double high=-DBL_MAX);
00058     column(const column& rhs); 
00059 
00062     ibis::TYPE_T type() const {return m_type;}
00064     const char* name() const {return m_name.c_str();}
00066     void name(const char* nm) {m_name = nm;}
00068     const char* description() const {return m_desc.c_str();}
00070     const double& lowerBound() const {return lower;}
00072     const double& upperBound() const {return upper;}
00073 
00074     int elementSize() const;
00075     bool isFloat() const;
00076     bool isInteger() const;
00077     bool isSignedInteger() const;
00078     bool isUnsignedInteger() const;
00079     bool isNumeric() const;
00080     bool isSorted() const {return m_sorted;} 
00081     void description(const char* d) {m_desc = d;}
00082     void lowerBound(double d) {lower = d;}
00083     void upperBound(double d) {upper = d;}
00084     const part* partition() const {return thePart;}
00085     void isSorted(bool);
00086 
00087     // function related to index/bin
00088     const char* indexSpec() const; 
00089     uint32_t numBins() const; 
00090 
00091     void indexSpec(const char* spec) {m_bins=spec;}
00093     void preferredBounds(std::vector<double>&) const;
00095     void binWeights(std::vector<uint32_t>&) const;
00096 
00100     virtual void computeMinMax();
00101     virtual void computeMinMax(const char *dir);
00105     virtual void computeMinMax(const char *dir,
00106                                double& min, double &max) const;
00107 
00108     virtual void loadIndex(const char* iopt=0, int ropt=0) const throw ();
00109     virtual void unloadIndex() const;
00110     virtual long indexSize() const;
00111 
00112     uint32_t indexedRows() const;
00113     void indexSpeedTest() const;
00114     void purgeIndexFile(const char *dir=0) const;
00115 
00116     const char* dataFileName(std::string& fname, const char *dir=0) const;
00117     const char* nullMaskName(std::string& fname) const;
00118     void getNullMask(bitvector& mask) const;
00119     int  setNullMask(const bitvector&);
00120 
00123     virtual void getString(uint32_t, std::string &) const {};
00127     virtual const char* findString(const char*) const
00128     {return static_cast<const char*>(0);}
00129 
00130     array_t<int32_t>* getIntArray() const;
00131     array_t<float>*   getFloatArray() const;
00132     array_t<double>*  getDoubleArray() const;
00133     virtual int getValuesArray(void* vals) const;
00134     virtual ibis::fileManager::storage* getRawData() const;
00135 
00136     virtual array_t<signed char>*   selectBytes(const bitvector& mask) const;
00137     virtual array_t<unsigned char>* selectUBytes(const bitvector& mask) const;
00138     virtual array_t<int16_t>*  selectShorts(const bitvector& mask) const;
00139     virtual array_t<uint16_t>* selectUShorts(const bitvector& mask) const;
00140     virtual array_t<int32_t>*  selectInts(const bitvector& mask) const;
00141     virtual array_t<uint32_t>* selectUInts(const bitvector& mask) const;
00142     virtual array_t<int64_t>*  selectLongs(const bitvector& mask) const;
00143     virtual array_t<uint64_t>* selectULongs(const bitvector& mask) const;
00144     virtual array_t<float>*    selectFloats(const bitvector& mask) const;
00145     virtual array_t<double>*   selectDoubles(const bitvector& mask) const;
00146     virtual std::vector<std::string>*
00147         selectStrings(const bitvector& mask) const;
00148 
00149     long selectValues(const bitvector& mask, void* vals) const;
00150     long selectValues(const bitvector& mask,
00151                       void* vals, array_t<uint32_t>& inds) const;
00152 
00154     virtual void write(FILE* file) const;
00156     virtual void print(std::ostream& out) const;
00158     void logMessage(const char* event, const char* fmt, ...) const;
00160     void logWarning(const char* event, const char* fmt, ...) const;
00161 
00164     int expandRange(ibis::qContinuousRange& rng) const;
00167     int contractRange(ibis::qContinuousRange& rng) const;
00168 
00178     virtual long estimateRange(const ibis::qContinuousRange& cmp,
00179                                ibis::bitvector& low,
00180                                ibis::bitvector& high) const;
00182     virtual long estimateRange(const ibis::qDiscreteRange& cmp,
00183                                ibis::bitvector& low,
00184                                ibis::bitvector& high) const;
00186     virtual long estimateRange(const ibis::qIntHod& cmp,
00187                                ibis::bitvector& low,
00188                                ibis::bitvector& high) const;
00190     virtual long estimateRange(const ibis::qUIntHod& cmp,
00191                                ibis::bitvector& low,
00192                                ibis::bitvector& high) const;
00193 
00196     virtual long evaluateRange(const ibis::qContinuousRange& cmp,
00197                                const ibis::bitvector& mask,
00198                                ibis::bitvector& res) const;
00200     virtual long evaluateRange(const ibis::qDiscreteRange& cmp,
00201                                const ibis::bitvector& mask,
00202                                ibis::bitvector& res) const;
00204     virtual long evaluateRange(const ibis::qIntHod& cmp,
00205                                const ibis::bitvector& mask,
00206                                ibis::bitvector& res) const;
00208     virtual long evaluateRange(const ibis::qUIntHod& cmp,
00209                                const ibis::bitvector& mask,
00210                                ibis::bitvector& res) const;
00211 
00212     virtual long stringSearch(const char*, ibis::bitvector&) const;
00213     virtual long stringSearch(const std::vector<std::string>&,
00214                               ibis::bitvector&) const;
00215     virtual long stringSearch(const char*) const;
00216     virtual long stringSearch(const std::vector<std::string>&) const;
00217     virtual long keywordSearch(const char*, ibis::bitvector&) const;
00218     virtual long keywordSearch(const char*) const;
00219     virtual long patternSearch(const char*) const;
00220     virtual long patternSearch(const char*, ibis::bitvector &) const;
00221 
00225     virtual long estimateRange(const ibis::qContinuousRange& cmp) const;
00227     virtual long estimateRange(const ibis::qDiscreteRange& cmp) const;
00229     virtual long estimateRange(const ibis::qIntHod& cmp) const;
00231     virtual long estimateRange(const ibis::qUIntHod& cmp) const;
00232 
00234     virtual double estimateCost(const ibis::qContinuousRange& cmp) const;
00236     virtual double estimateCost(const ibis::qDiscreteRange& cmp) const;
00238     virtual double estimateCost(const ibis::qIntHod& cmp) const;
00240     virtual double estimateCost(const ibis::qUIntHod& cmp) const;
00242     virtual double estimateCost(const ibis::qString&) const {
00243         return 0;}
00245     virtual double estimateCost(const ibis::qMultiString&) const {
00246         return 0;}
00247 
00248     virtual float getUndecidable(const ibis::qContinuousRange& cmp,
00249                                  ibis::bitvector& iffy) const;
00251     virtual float getUndecidable(const ibis::qDiscreteRange& cmp,
00252                                  ibis::bitvector& iffy) const;
00254     virtual float getUndecidable(const ibis::qIntHod& cmp,
00255                                  ibis::bitvector& iffy) const;
00257     virtual float getUndecidable(const ibis::qUIntHod& cmp,
00258                                  ibis::bitvector& iffy) const;
00259 
00261     virtual long append(const char* dt, const char* df, const uint32_t nold,
00262                         const uint32_t nnew, uint32_t nbuf, char* buf);
00263 
00264     virtual long append(const void* vals, const ibis::bitvector& msk);
00265     virtual long writeData(const char* dir, uint32_t nold, uint32_t nnew,
00266                            ibis::bitvector& mask, const void *va1,
00267                            void *va2=0);
00268     template <typename T>
00269     long castAndWrite(const array_t<double>& vals, ibis::bitvector& mask,
00270                       const T special);
00271     virtual long saveSelected(const ibis::bitvector& sel, const char *dest,
00272                               char *buf, uint32_t nbuf);
00273     virtual long truncateData(const char* dir, uint32_t nent,
00274                               ibis::bitvector& mask) const;
00275 
00281     virtual double getActualMin() const;
00284     virtual double getActualMax() const;
00286     virtual double getSum() const;
00293     long getCumulativeDistribution(std::vector<double>& bounds,
00294                                    std::vector<uint32_t>& counts) const;
00306     long getDistribution(std::vector<double>& bbs,
00307                          std::vector<uint32_t>& counts) const;
00309     class info;
00310     class indexLock;
00311     class mutexLock;
00312 
00313 protected:
00314     // protected member variables
00315     const part* thePart; 
00316     ibis::bitvector mask_;
00317     ibis::TYPE_T m_type;
00318     std::string m_name; 
00319     std::string m_desc; 
00320     std::string m_bins; 
00321     bool m_sorted;      
00322     double lower;       
00323     double upper;       
00324 
00325     mutable ibis::index* idx;
00327     mutable ibis::util::sharedInt32 idxcnt;
00328 
00330     void logError(const char* event, const char* fmt, ...) const;
00333     long string2int(int fptr, dictionary& dic, uint32_t nbuf, char* buf,
00334                     array_t<uint32_t>& out) const;
00336     double computeMin() const;
00338     double computeMax() const;
00340     double computeSum() const;
00343     void actualMinMax(const char *fname, const ibis::bitvector& mask,
00344                       double &min, double &max) const;
00346     template <typename T>
00347     void actualMinMax(const array_t<T>& vals, const ibis::bitvector& mask,
00348                       double& min, double& max) const;
00350     template <typename T>
00351     T computeMin(const array_t<T>& vals,
00352                  const ibis::bitvector& mask) const;
00354     template <typename T>
00355     T computeMax(const array_t<T>& vals,
00356                  const ibis::bitvector& mask) const;
00358     template <typename T>
00359     double computeSum(const array_t<T>& vals,
00360                       const ibis::bitvector& mask) const;
00361 
00363     virtual int searchSorted(const ibis::qContinuousRange&,
00364                              ibis::bitvector&) const;
00366     virtual int searchSorted(const ibis::qDiscreteRange&,
00367                              ibis::bitvector&) const;
00369     virtual int searchSorted(const ibis::qIntHod&,
00370                              ibis::bitvector&) const;
00372     virtual int searchSorted(const ibis::qUIntHod&,
00373                              ibis::bitvector&) const;
00375     template <typename T> int
00376         searchSortedICC(const array_t<T>& vals,
00377                         const ibis::qContinuousRange& rng,
00378                         ibis::bitvector& hits) const;
00380     template <typename T> int
00381         searchSortedICD(const array_t<T>& vals,
00382                         const ibis::qDiscreteRange& rng,
00383                         ibis::bitvector& hits) const;
00385     template <typename T> int
00386         searchSortedICD(const array_t<T>& vals,
00387                         const ibis::qIntHod& rng,
00388                         ibis::bitvector& hits) const;
00390     template <typename T> int
00391         searchSortedICD(const array_t<T>& vals,
00392                         const ibis::qUIntHod& rng,
00393                         ibis::bitvector& hits) const;
00395     template <typename T> int
00396         searchSortedOOCC(const char* fname,
00397                          const ibis::qContinuousRange& rng,
00398                          ibis::bitvector& hits) const;
00400     template <typename T> int
00401         searchSortedOOCD(const char* fname,
00402                          const ibis::qDiscreteRange& rng,
00403                          ibis::bitvector& hits) const;
00405     template <typename T> int
00406         searchSortedOOCD(const char* fname,
00407                          const ibis::qIntHod& rng,
00408                          ibis::bitvector& hits) const;
00410     template <typename T> int
00411         searchSortedOOCD(const char* fname,
00412                          const ibis::qUIntHod& rng,
00413                          ibis::bitvector& hits) const;
00414 
00416     template <typename T> uint32_t
00417         findLower(int fdes, const uint32_t nr, const T tgt) const;
00419     template <typename T> uint32_t
00420         findUpper(int fdes, const uint32_t nr, const T tgt) const;
00421     template <typename T>
00422         long selectValuesT(const bitvector& mask, array_t<T>& vals) const;
00423     template <typename T>
00424         long selectValuesT(const bitvector& mask,
00425                            array_t<T>& vals, array_t<uint32_t>& inds) const;
00426     template <typename T>
00427         long selectToStrings(const bitvector& mask,
00428                              std::vector<std::string>& str) const;
00430     template <typename T>
00431         long appendValues(const array_t<T>&, const ibis::bitvector&);
00433     long appendStrings(const std::vector<std::string>&, const ibis::bitvector&);
00434 
00435     class readLock;
00436     class writeLock;
00437     class softWriteLock;
00438     friend class readLock;
00439     friend class writeLock;
00440     friend class indexLock;
00441     friend class mutexLock;
00442     friend class softWriteLock;
00443 
00444 private:
00447     mutable pthread_rwlock_t rwlock;
00449     mutable pthread_mutex_t mutex;
00450 
00451     column& operator=(const column&); // no assignment
00452 }; // ibis::column
00453 
00456 class FASTBIT_CXX_DLLSPEC ibis::column::info {
00457  public:
00458     const char* name;           
00459     const char* description;    
00460     const double expectedMin;   
00461     const double expectedMax;   
00462     const ibis::TYPE_T type;    
00463     info(const ibis::column& col)
00464         : name(col.name()), description(col.description()),
00465           expectedMin(col.lowerBound()),
00466           expectedMax(col.upperBound()), type(col.type()) {};
00467     info(const info& rhs)
00468         : name(rhs.name), description(rhs.description),
00469         expectedMin(rhs.expectedMin),
00470         expectedMax(rhs.expectedMax),
00471         type(rhs.type) {};
00472  private:
00473     info();
00474     info& operator=(const info&);
00475 }; // ibis::column::info
00476 
00480 class ibis::column::indexLock {
00481 public:
00482     ~indexLock();
00483     indexLock(const ibis::column* col, const char* m);
00484     const ibis::index* getIndex() const {return theColumn->idx;};
00485 
00486 private:
00487     const ibis::column* theColumn;
00488     const char* mesg;
00489 
00490     indexLock();
00491     indexLock(const indexLock&);
00492     indexLock& operator=(const indexLock&);
00493 }; // ibis::column::indexLock
00494 
00496 class ibis::column::mutexLock {
00497 public:
00498     mutexLock(const ibis::column* col, const char* m)
00499         : theColumn(col), mesg(m) {
00500         if (ibis::gVerbose > 9)
00501             col->logMessage("gainExclusiveAccess",
00502                             "pthread_mutex_lock for %s", m);
00503         int ierr = pthread_mutex_lock(&(col->mutex));
00504         if (0 != ierr)
00505             col->logWarning("gainExclusiveAccess", "pthread_mutex_lock for %s "
00506                             "returned %d (%s)", m, ierr, strerror(ierr));
00507     }
00508     ~mutexLock() {
00509         if (ibis::gVerbose > 9)
00510             theColumn->logMessage("releaseExclusiveAccess",
00511                                   "pthread_mutex_unlock for %s", mesg);
00512         int ierr = pthread_mutex_unlock(&(theColumn->mutex));
00513         if (0 != ierr)
00514             theColumn->logWarning("releaseExclusiveAccess",
00515                                   "pthread_mutex_unlock for %s returned %d "
00516                                   "(%s)", mesg, ierr, strerror(ierr));
00517     }
00518 
00519 private:
00520     const ibis::column* theColumn;
00521     const char* mesg;
00522 
00523     mutexLock() {}; // no default constructor
00524     mutexLock(const mutexLock&) {}; // can not copy
00525     mutexLock& operator=(const mutexLock&);
00526 }; // ibis::column::mutexLock
00527 
00529 class ibis::column::writeLock {
00530 public:
00531     writeLock(const ibis::column* col, const char* m);
00532     ~writeLock();
00533 
00534 private:
00535     const ibis::column* theColumn;
00536     const char* mesg;
00537 
00538     writeLock();
00539     writeLock(const writeLock&);
00540     writeLock& operator=(const writeLock&);
00541 }; // ibis::column::writeLock
00542 
00544 class ibis::column::softWriteLock {
00545 public:
00546     softWriteLock(const ibis::column* col, const char* m);
00547     ~softWriteLock();
00548     bool isLocked() const {return(locked==0);}
00549 
00550 private:
00551     const ibis::column* theColumn;
00552     const char* mesg;
00553     const int locked;
00554 
00555     softWriteLock();
00556     softWriteLock(const softWriteLock&);
00557     softWriteLock& operator=(const softWriteLock&);
00558 }; // ibis::column::softWriteLock
00559 
00561 class ibis::column::readLock {
00562 public:
00563     readLock(const ibis::column* col, const char* m);
00564     ~readLock();
00565 
00566 private:
00567     const ibis::column* theColumn;
00568     const char* mesg;
00569 
00570     readLock();
00571     readLock(const readLock&);
00572     readLock& operator=(const readLock&);
00573 }; // ibis::column::readLock
00574 
00576 inline int ibis::column::elementSize() const {
00577     int sz = -1;
00578     switch (m_type) {
00579     case ibis::OID: sz = sizeof(rid_t); break;
00580     case ibis::INT: sz = sizeof(int32_t); break;
00581     case ibis::UINT: sz = sizeof(uint32_t); break;
00582     case ibis::LONG: sz = sizeof(int64_t); break;
00583     case ibis::ULONG: sz = sizeof(uint64_t); break;
00584     case ibis::FLOAT: sz = sizeof(float); break;
00585     case ibis::DOUBLE: sz = sizeof(double); break;
00586     case ibis::BYTE: sz = sizeof(char); break;
00587     case ibis::UBYTE: sz = sizeof(unsigned char); break;
00588     case ibis::SHORT: sz = sizeof(int16_t); break;
00589     case ibis::USHORT: sz = sizeof(uint16_t); break;
00590     case ibis::CATEGORY: sz = 0; break; // no fixed size per element
00591     case ibis::TEXT: sz = 0; break; // no fixed size per element
00592     default: sz = -1; break;
00593     }
00594     return sz;
00595 } // ibis::column::elementSize
00596 
00598 inline bool ibis::column::isFloat() const {
00599     return(m_type == ibis::FLOAT || m_type == ibis::DOUBLE);
00600 } // ibis::column::isFloat
00601 
00603 inline bool ibis::column::isInteger() const {
00604     return(m_type == ibis::BYTE || m_type == ibis::UBYTE ||
00605            m_type == ibis::SHORT || m_type == ibis::USHORT ||
00606            m_type == ibis::INT || m_type == ibis::UINT ||
00607            m_type == ibis::LONG || m_type == ibis::ULONG);
00608 } // ibis::column::isInteger
00609 
00611 inline bool ibis::column::isSignedInteger() const {
00612     return(m_type == ibis::BYTE || m_type == ibis::SHORT ||
00613            m_type == ibis::INT || m_type == ibis::LONG);
00614 } // ibis::column::isSignedInteger
00615 
00617 inline bool ibis::column::isUnsignedInteger() const {
00618     return(m_type == ibis::UBYTE || m_type == ibis::USHORT ||
00619            m_type == ibis::UINT || m_type == ibis::ULONG);
00620 } // ibis::column::isUnsignedInteger
00621 
00623 inline bool ibis::column::isNumeric() const {
00624     return(m_type == ibis::BYTE || m_type == ibis::UBYTE ||
00625            m_type == ibis::SHORT || m_type == ibis::USHORT ||
00626            m_type == ibis::INT || m_type == ibis::UINT ||
00627            m_type == ibis::LONG || m_type == ibis::ULONG ||
00628            m_type == ibis::FLOAT || m_type == ibis::DOUBLE);
00629 } // ibis::column::isNumeric
00630 
00631 // the operator to print a column to an output stream
00632 inline std::ostream& operator<<(std::ostream& out, const ibis::column& prop) {
00633     prop.print(out);
00634     return out;
00635 }
00636 
00637 namespace ibis { // for template specialization
00638     template <> long column::selectToStrings<signed char>
00639     (const bitvector& mask, std::vector<std::string>& str) const;
00640     template <> long column::selectToStrings<unsigned char>
00641     (const bitvector& mask, std::vector<std::string>& str) const;
00642 }
00643 #endif // IBIS_COLUMN_H

Make It A Bit Faster
Contact us
Disclaimers
FastBit source code
FastBit mailing list archive