column.h
Go to the documentation of this file.
00001 //File: $Id$
00002 // Author: John Wu <John.Wu at ACM.org>
00003 // Copyright 2000-2012 the Regents of the University of California
00004 #ifndef IBIS_COLUMN_H
00005 #define IBIS_COLUMN_H
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 #include "table.h"      // ibis::TYPE_T
00014 #include "qExpr.h"      // ibis::qContinuousRange
00015 #include "bitvector.h"
00016 #include <string>
00017 
00018 namespace ibis { // additional names to the namespace ibis
00019     // derived classes of ibis::column, implemented in category.cpp
00020     class category;   // for categorical values (low-cardinality text fields)
00021     class text;       // arbitrary cardinality text fields
00022     class blob;       // text may contain null characters
00023 
00024     // the following are used for storing selected values of different types
00025     // of columns (implemented in colValues.cpp)
00026     class colBytes;
00027     class colUBytes;
00028     class colShorts;
00029     class colUShorts;
00030     class colInts;
00031     class colUInts;
00032     class colLongs;
00033     class colULongs;
00034     class colFloats;
00035     class colDoubles;
00036     class colStrings;
00037 } // namespace
00038 
00049 class FASTBIT_CXX_DLLSPEC ibis::column {
00050 public:
00051 
00052     virtual ~column();
00054     column(const part* tbl, FILE* file);
00056     column(const part* tbl, ibis::TYPE_T t, const char* name,
00057            const char* desc="", double low=DBL_MAX, double high=-DBL_MAX);
00058     column(const column& rhs); 
00059 
00062     ibis::TYPE_T type() const {return m_type;}
00064     const char* name() const {return m_name.c_str();}
00066     void name(const char* nm) {m_name = nm;}
00068     const char* description() const {return m_desc.c_str();}
00070     const double& lowerBound() const {return lower;}
00072     const double& upperBound() const {return upper;}
00073 
00074     int elementSize() const;
00075     bool isFloat() const;
00076     bool isInteger() const;
00077     bool isSignedInteger() const;
00078     bool isUnsignedInteger() const;
00079     bool isNumeric() const;
00080     bool isSorted() const {return m_sorted;} 
00081     void description(const char* d) {m_desc = d;}
00082     void lowerBound(double d) {lower = d;}
00083     void upperBound(double d) {upper = d;}
00084     const part* partition() const {return thePart;}
00085     void isSorted(bool);
00086 
00087     // function related to index/bin
00088     const char* indexSpec() const; 
00089     uint32_t numBins() const; 
00090 
00091     void indexSpec(const char* spec) {m_bins=spec;}
00093     void preferredBounds(std::vector<double>&) const;
00095     void binWeights(std::vector<uint32_t>&) const;
00096 
00097     virtual void computeMinMax();
00098     virtual void computeMinMax(const char *dir);
00099     virtual void computeMinMax(const char *dir,
00100                                double& min, double &max) const;
00101 
00102     virtual void loadIndex(const char* iopt=0, int ropt=0) const throw ();
00103     virtual void unloadIndex() const;
00104     virtual long indexSize() const;
00105 
00106     uint32_t indexedRows() const;
00107     void indexSpeedTest() const;
00108     void purgeIndexFile(const char *dir=0) const;
00109 
00110     const char* dataFileName(std::string& fname, const char *dir=0) const;
00111     const char* nullMaskName(std::string& fname) const;
00112     void getNullMask(bitvector& mask) const;
00113     int  setNullMask(const bitvector&);
00114 
00117     virtual void getString(uint32_t, std::string &) const {};
00121     virtual const char* findString(const char*) const
00122     {return static_cast<const char*>(0);}
00123 
00124     array_t<int32_t>* getIntArray() const;
00125     array_t<float>*   getFloatArray() const;
00126     array_t<double>*  getDoubleArray() const;
00127     virtual int getValuesArray(void* vals) const;
00128     virtual ibis::fileManager::storage* getRawData() const;
00129 
00130     virtual array_t<signed char>*   selectBytes(const bitvector& mask) const;
00131     virtual array_t<unsigned char>* selectUBytes(const bitvector& mask) const;
00132     virtual array_t<int16_t>*  selectShorts(const bitvector& mask) const;
00133     virtual array_t<uint16_t>* selectUShorts(const bitvector& mask) const;
00134     virtual array_t<int32_t>*  selectInts(const bitvector& mask) const;
00135     virtual array_t<uint32_t>* selectUInts(const bitvector& mask) const;
00136     virtual array_t<int64_t>*  selectLongs(const bitvector& mask) const;
00137     virtual array_t<uint64_t>* selectULongs(const bitvector& mask) const;
00138     virtual array_t<float>*    selectFloats(const bitvector& mask) const;
00139     virtual array_t<double>*   selectDoubles(const bitvector& mask) const;
00140     virtual std::vector<std::string>*
00141         selectStrings(const bitvector& mask) const;
00142 
00143     long selectValues(const bitvector&, void*) const;
00144     long selectValues(const bitvector&, void*, array_t<uint32_t>&) const;
00145     long selectValues(const ibis::qContinuousRange&, void*) const;
00146 
00148     virtual void write(FILE* file) const;
00150     virtual void print(std::ostream& out) const;
00152     void logMessage(const char* event, const char* fmt, ...) const;
00154     void logWarning(const char* event, const char* fmt, ...) const;
00155 
00158     int expandRange(ibis::qContinuousRange& rng) const;
00161     int contractRange(ibis::qContinuousRange& rng) const;
00162 
00163     virtual long evaluateRange(const ibis::qContinuousRange& cmp,
00164                                const ibis::bitvector& mask,
00165                                ibis::bitvector& res) const;
00167     virtual long evaluateRange(const ibis::qDiscreteRange& cmp,
00168                                const ibis::bitvector& mask,
00169                                ibis::bitvector& res) const;
00171     virtual long evaluateRange(const ibis::qIntHod& cmp,
00172                                const ibis::bitvector& mask,
00173                                ibis::bitvector& res) const;
00175     virtual long evaluateRange(const ibis::qUIntHod& cmp,
00176                                const ibis::bitvector& mask,
00177                                ibis::bitvector& res) const;
00178 
00179     virtual long stringSearch(const char*, ibis::bitvector&) const;
00180     virtual long stringSearch(const std::vector<std::string>&,
00181                               ibis::bitvector&) const;
00182     virtual long stringSearch(const char*) const;
00183     virtual long stringSearch(const std::vector<std::string>&) const;
00184     virtual long keywordSearch(const char*, ibis::bitvector&) const;
00185     virtual long keywordSearch(const char*) const;
00186     virtual long patternSearch(const char*) const;
00187     virtual long patternSearch(const char*, ibis::bitvector &) const;
00188 
00189     virtual long evaluateAndSelect(const ibis::qContinuousRange&,
00190                                    const ibis::bitvector&, void*,
00191                                    ibis::bitvector&) const;
00192 
00202     virtual long estimateRange(const ibis::qContinuousRange& cmp,
00203                                ibis::bitvector& low,
00204                                ibis::bitvector& high) const;
00206     virtual long estimateRange(const ibis::qDiscreteRange& cmp,
00207                                ibis::bitvector& low,
00208                                ibis::bitvector& high) const;
00210     virtual long estimateRange(const ibis::qIntHod& cmp,
00211                                ibis::bitvector& low,
00212                                ibis::bitvector& high) const;
00214     virtual long estimateRange(const ibis::qUIntHod& cmp,
00215                                ibis::bitvector& low,
00216                                ibis::bitvector& high) const;
00217 
00218     virtual long estimateRange(const ibis::qContinuousRange& cmp) const;
00219     virtual long estimateRange(const ibis::qDiscreteRange& cmp) const;
00221     virtual long estimateRange(const ibis::qIntHod& cmp) const;
00223     virtual long estimateRange(const ibis::qUIntHod& cmp) const;
00224 
00226     virtual double estimateCost(const ibis::qContinuousRange& cmp) const;
00228     virtual double estimateCost(const ibis::qDiscreteRange& cmp) const;
00230     virtual double estimateCost(const ibis::qIntHod& cmp) const;
00232     virtual double estimateCost(const ibis::qUIntHod& cmp) const;
00234     virtual double estimateCost(const ibis::qString&) const {
00235         return 0;}
00237     virtual double estimateCost(const ibis::qMultiString&) const {
00238         return 0;}
00239 
00240     virtual float getUndecidable(const ibis::qContinuousRange& cmp,
00241                                  ibis::bitvector& iffy) const;
00243     virtual float getUndecidable(const ibis::qDiscreteRange& cmp,
00244                                  ibis::bitvector& iffy) const;
00246     virtual float getUndecidable(const ibis::qIntHod& cmp,
00247                                  ibis::bitvector& iffy) const;
00249     virtual float getUndecidable(const ibis::qUIntHod& cmp,
00250                                  ibis::bitvector& iffy) const;
00251 
00253     virtual long append(const char* dt, const char* df, const uint32_t nold,
00254                         const uint32_t nnew, uint32_t nbuf, char* buf);
00255 
00256     virtual long append(const void* vals, const ibis::bitvector& msk);
00257     virtual long writeData(const char* dir, uint32_t nold, uint32_t nnew,
00258                            ibis::bitvector& mask, const void *va1,
00259                            void *va2=0);
00260     template <typename T>
00261     long castAndWrite(const array_t<double>& vals, ibis::bitvector& mask,
00262                       const T special);
00263     virtual long saveSelected(const ibis::bitvector& sel, const char *dest,
00264                               char *buf, uint32_t nbuf);
00265     virtual long truncateData(const char* dir, uint32_t nent,
00266                               ibis::bitvector& mask) const;
00267 
00273     virtual double getActualMin() const;
00276     virtual double getActualMax() const;
00278     virtual double getSum() const;
00285     long getCumulativeDistribution(std::vector<double>& bounds,
00286                                    std::vector<uint32_t>& counts) const;
00298     long getDistribution(std::vector<double>& bbs,
00299                          std::vector<uint32_t>& counts) const;
00301     class info;
00302     class indexLock;
00303     class mutexLock;
00304 
00305 protected:
00306     // protected member variables
00307     const part* thePart; 
00308     ibis::bitvector mask_;
00309     ibis::TYPE_T m_type;
00310     std::string m_name; 
00311     std::string m_desc; 
00312     std::string m_bins; 
00313     bool m_sorted;      
00314     double lower;       
00315     double upper;       
00316 
00317     mutable ibis::index* idx;
00319     mutable ibis::util::sharedInt32 idxcnt;
00320 
00322     void logError(const char* event, const char* fmt, ...) const;
00325     long string2int(int fptr, dictionary& dic, uint32_t nbuf, char* buf,
00326                     array_t<uint32_t>& out) const;
00328     double computeMin() const;
00330     double computeMax() const;
00332     double computeSum() const;
00335     void actualMinMax(const char *fname, const ibis::bitvector& mask,
00336                       double &min, double &max) const;
00338     template <typename T>
00339     void actualMinMax(const array_t<T>& vals, const ibis::bitvector& mask,
00340                       double& min, double& max) const;
00342     template <typename T>
00343     T computeMin(const array_t<T>& vals,
00344                  const ibis::bitvector& mask) const;
00346     template <typename T>
00347     T computeMax(const array_t<T>& vals,
00348                  const ibis::bitvector& mask) const;
00350     template <typename T>
00351     double computeSum(const array_t<T>& vals,
00352                       const ibis::bitvector& mask) const;
00353 
00355     virtual int searchSorted(const ibis::qContinuousRange&,
00356                              ibis::bitvector&) const;
00358     virtual int searchSorted(const ibis::qDiscreteRange&,
00359                              ibis::bitvector&) const;
00361     virtual int searchSorted(const ibis::qIntHod&,
00362                              ibis::bitvector&) const;
00364     virtual int searchSorted(const ibis::qUIntHod&,
00365                              ibis::bitvector&) const;
00367     template <typename T> int
00368         searchSortedICC(const array_t<T>& vals,
00369                         const ibis::qContinuousRange& rng,
00370                         ibis::bitvector& hits) const;
00372     template <typename T> int
00373         searchSortedICD(const array_t<T>& vals,
00374                         const ibis::qDiscreteRange& rng,
00375                         ibis::bitvector& hits) const;
00377     template <typename T> int
00378         searchSortedICD(const array_t<T>& vals,
00379                         const ibis::qIntHod& rng,
00380                         ibis::bitvector& hits) const;
00382     template <typename T> int
00383         searchSortedICD(const array_t<T>& vals,
00384                         const ibis::qUIntHod& rng,
00385                         ibis::bitvector& hits) const;
00387     template <typename T> int
00388         searchSortedOOCC(const char* fname,
00389                          const ibis::qContinuousRange& rng,
00390                          ibis::bitvector& hits) const;
00392     template <typename T> int
00393         searchSortedOOCD(const char* fname,
00394                          const ibis::qDiscreteRange& rng,
00395                          ibis::bitvector& hits) const;
00397     template <typename T> int
00398         searchSortedOOCD(const char* fname,
00399                          const ibis::qIntHod& rng,
00400                          ibis::bitvector& hits) const;
00402     template <typename T> int
00403         searchSortedOOCD(const char* fname,
00404                          const ibis::qUIntHod& rng,
00405                          ibis::bitvector& hits) const;
00406 
00408     template <typename T> uint32_t
00409         findLower(int fdes, const uint32_t nr, const T tgt) const;
00411     template <typename T> uint32_t
00412         findUpper(int fdes, const uint32_t nr, const T tgt) const;
00413 
00414     template <typename T>
00415         long selectValuesT(const char*, const bitvector&, array_t<T>&) const;
00416     template <typename T>
00417         long selectValuesT(const char*, const bitvector& mask,
00418                            array_t<T>& vals, array_t<uint32_t>& inds) const;
00419     template <typename T>
00420         long selectToStrings(const char*, const bitvector&,
00421                              std::vector<std::string>&) const;
00422 
00424     template <typename T>
00425         long appendValues(const array_t<T>&, const ibis::bitvector&);
00427     long appendStrings(const std::vector<std::string>&, const ibis::bitvector&);
00428 
00429     class readLock;
00430     class writeLock;
00431     class softWriteLock;
00432     friend class readLock;
00433     friend class writeLock;
00434     friend class indexLock;
00435     friend class mutexLock;
00436     friend class softWriteLock;
00437 
00438 private:
00441     mutable pthread_rwlock_t rwlock;
00443     mutable pthread_mutex_t mutex;
00444 
00445     column& operator=(const column&); // no assignment
00446 }; // ibis::column
00447 
00450 class FASTBIT_CXX_DLLSPEC ibis::column::info {
00451  public:
00452     const char* name;           
00453     const char* description;    
00454     const double expectedMin;   
00455     const double expectedMax;   
00456     const ibis::TYPE_T type;    
00457     info(const ibis::column& col);
00458     info(const info& rhs)
00459         : name(rhs.name), description(rhs.description),
00460         expectedMin(rhs.expectedMin),
00461         expectedMax(rhs.expectedMax),
00462         type(rhs.type) {};
00463 
00464  private:
00465     info();
00466     info& operator=(const info&);
00467 }; // ibis::column::info
00468 
00472 class ibis::column::indexLock {
00473 public:
00474     ~indexLock();
00475     indexLock(const ibis::column* col, const char* m);
00476     const ibis::index* getIndex() const {return theColumn->idx;};
00477 
00478 private:
00479     const ibis::column* theColumn;
00480     const char* mesg;
00481 
00482     indexLock();
00483     indexLock(const indexLock&);
00484     indexLock& operator=(const indexLock&);
00485 }; // ibis::column::indexLock
00486 
00488 class ibis::column::mutexLock {
00489 public:
00490     mutexLock(const ibis::column* col, const char* m)
00491         : theColumn(col), mesg(m) {
00492         if (ibis::gVerbose > 9)
00493             col->logMessage("gainExclusiveAccess",
00494                             "pthread_mutex_lock for %s", m);
00495         int ierr = pthread_mutex_lock(&(col->mutex));
00496         if (0 != ierr)
00497             col->logWarning("gainExclusiveAccess", "pthread_mutex_lock for %s "
00498                             "returned %d (%s)", m, ierr, strerror(ierr));
00499     }
00500     ~mutexLock() {
00501         if (ibis::gVerbose > 9)
00502             theColumn->logMessage("releaseExclusiveAccess",
00503                                   "pthread_mutex_unlock for %s", mesg);
00504         int ierr = pthread_mutex_unlock(&(theColumn->mutex));
00505         if (0 != ierr)
00506             theColumn->logWarning("releaseExclusiveAccess",
00507                                   "pthread_mutex_unlock for %s returned %d "
00508                                   "(%s)", mesg, ierr, strerror(ierr));
00509     }
00510 
00511 private:
00512     const ibis::column* theColumn;
00513     const char* mesg;
00514 
00515     mutexLock() {}; // no default constructor
00516     mutexLock(const mutexLock&) {}; // can not copy
00517     mutexLock& operator=(const mutexLock&);
00518 }; // ibis::column::mutexLock
00519 
00521 class ibis::column::writeLock {
00522 public:
00523     writeLock(const ibis::column* col, const char* m);
00524     ~writeLock();
00525 
00526 private:
00527     const ibis::column* theColumn;
00528     const char* mesg;
00529 
00530     writeLock();
00531     writeLock(const writeLock&);
00532     writeLock& operator=(const writeLock&);
00533 }; // ibis::column::writeLock
00534 
00536 class ibis::column::softWriteLock {
00537 public:
00538     softWriteLock(const ibis::column* col, const char* m);
00539     ~softWriteLock();
00540     bool isLocked() const {return(locked==0);}
00541 
00542 private:
00543     const ibis::column* theColumn;
00544     const char* mesg;
00545     const int locked;
00546 
00547     softWriteLock();
00548     softWriteLock(const softWriteLock&);
00549     softWriteLock& operator=(const softWriteLock&);
00550 }; // ibis::column::softWriteLock
00551 
00553 class ibis::column::readLock {
00554 public:
00555     readLock(const ibis::column* col, const char* m);
00556     ~readLock();
00557 
00558 private:
00559     const ibis::column* theColumn;
00560     const char* mesg;
00561 
00562     readLock();
00563     readLock(const readLock&);
00564     readLock& operator=(const readLock&);
00565 }; // ibis::column::readLock
00566 
00568 inline int ibis::column::elementSize() const {
00569     int sz = -1;
00570     switch (m_type) {
00571     case ibis::OID: sz = sizeof(rid_t); break;
00572     case ibis::INT: sz = sizeof(int32_t); break;
00573     case ibis::UINT: sz = sizeof(uint32_t); break;
00574     case ibis::LONG: sz = sizeof(int64_t); break;
00575     case ibis::ULONG: sz = sizeof(uint64_t); break;
00576     case ibis::FLOAT: sz = sizeof(float); break;
00577     case ibis::DOUBLE: sz = sizeof(double); break;
00578     case ibis::BYTE: sz = sizeof(char); break;
00579     case ibis::UBYTE: sz = sizeof(unsigned char); break;
00580     case ibis::SHORT: sz = sizeof(int16_t); break;
00581     case ibis::USHORT: sz = sizeof(uint16_t); break;
00582     case ibis::CATEGORY: sz = 0; break; // no fixed size per element
00583     case ibis::TEXT: sz = 0; break; // no fixed size per element
00584     default: sz = -1; break;
00585     }
00586     return sz;
00587 } // ibis::column::elementSize
00588 
00590 inline bool ibis::column::isFloat() const {
00591     return(m_type == ibis::FLOAT || m_type == ibis::DOUBLE);
00592 } // ibis::column::isFloat
00593 
00595 inline bool ibis::column::isInteger() const {
00596     return(m_type == ibis::BYTE || m_type == ibis::UBYTE ||
00597            m_type == ibis::SHORT || m_type == ibis::USHORT ||
00598            m_type == ibis::INT || m_type == ibis::UINT ||
00599            m_type == ibis::LONG || m_type == ibis::ULONG);
00600 } // ibis::column::isInteger
00601 
00603 inline bool ibis::column::isSignedInteger() const {
00604     return(m_type == ibis::BYTE || m_type == ibis::SHORT ||
00605            m_type == ibis::INT || m_type == ibis::LONG);
00606 } // ibis::column::isSignedInteger
00607 
00609 inline bool ibis::column::isUnsignedInteger() const {
00610     return(m_type == ibis::UBYTE || m_type == ibis::USHORT ||
00611            m_type == ibis::UINT || m_type == ibis::ULONG);
00612 } // ibis::column::isUnsignedInteger
00613 
00615 inline bool ibis::column::isNumeric() const {
00616     return(m_type == ibis::BYTE || m_type == ibis::UBYTE ||
00617            m_type == ibis::SHORT || m_type == ibis::USHORT ||
00618            m_type == ibis::INT || m_type == ibis::UINT ||
00619            m_type == ibis::LONG || m_type == ibis::ULONG ||
00620            m_type == ibis::FLOAT || m_type == ibis::DOUBLE);
00621 } // ibis::column::isNumeric
00622 
00623 // the operator to print a column to an output stream
00624 inline std::ostream& operator<<(std::ostream& out, const ibis::column& prop) {
00625     prop.print(out);
00626     return out;
00627 }
00628 
00629 namespace ibis { // for template specialization
00630     template <> long column::selectToStrings<signed char>
00631     (const char*, const bitvector&, std::vector<std::string>&) const;
00632     template <> long column::selectToStrings<unsigned char>
00633     (const char*, const bitvector&, std::vector<std::string>&) const;
00634 }
00635 #endif // IBIS_COLUMN_H

Make It A Bit Faster
Contact us
Disclaimers
FastBit source code
FastBit mailing list archive