00001 //File: $Id$ 00002 // Author: John Wu <John.Wu at ACM.org> 00003 // Copyright 2000-2011 the Regents of the University of California 00004 #ifndef IBIS_COLUMN_H 00005 #define IBIS_COLUMN_H 00006 00007 00008 00009 00010 00011 00012 00013 #include "table.h" // ibis::TYPE_T 00014 #include "qExpr.h" // ibis::qContinuousRange 00015 #include "bitvector.h" 00016 #include <string> 00017 00018 namespace ibis { // additional names to the namespace ibis 00019 // derived classes of ibis::column, implemented in category.cpp 00020 class category; // for categorical values (low-cardinality text fields) 00021 class text; // arbitrary cardinality text fields 00022 class blob; // text may contain null characters 00023 00024 // the following are used for storing selected values of different types 00025 // of columns (implemented in colValues.cpp) 00026 class colBytes; 00027 class colUBytes; 00028 class colShorts; 00029 class colUShorts; 00030 class colInts; 00031 class colUInts; 00032 class colLongs; 00033 class colULongs; 00034 class colFloats; 00035 class colDoubles; 00036 class colStrings; 00037 } // namespace 00038 00049 class FASTBIT_CXX_DLLSPEC ibis::column { 00050 public: 00051 00052 virtual ~column(); 00054 column(const part* tbl, FILE* file); 00056 column(const part* tbl, ibis::TYPE_T t, const char* name, 00057 const char* desc="", double low=DBL_MAX, double high=-DBL_MAX); 00058 column(const column& rhs); 00059 00062 ibis::TYPE_T type() const {return m_type;} 00064 const char* name() const {return m_name.c_str();} 00066 void name(const char* nm) {m_name = nm;} 00068 const char* description() const {return m_desc.c_str();} 00070 const double& lowerBound() const {return lower;} 00072 const double& upperBound() const {return upper;} 00073 00074 int elementSize() const; 00075 bool isFloat() const; 00076 bool isInteger() const; 00077 bool isSignedInteger() const; 00078 bool isUnsignedInteger() const; 00079 bool isNumeric() const; 00080 bool isSorted() const {return m_sorted;} 00081 void description(const char* d) {m_desc = d;} 00082 void lowerBound(double d) {lower = d;} 00083 void upperBound(double d) {upper = d;} 00084 const part* partition() const {return thePart;} 00085 void isSorted(bool); 00086 00087 // function related to index/bin 00088 const char* indexSpec() const; 00089 uint32_t numBins() const; 00090 00091 void indexSpec(const char* spec) {m_bins=spec;} 00093 void preferredBounds(std::vector<double>&) const; 00095 void binWeights(std::vector<uint32_t>&) const; 00096 00100 virtual void computeMinMax(); 00101 virtual void computeMinMax(const char *dir); 00105 virtual void computeMinMax(const char *dir, 00106 double& min, double &max) const; 00107 00108 virtual void loadIndex(const char* iopt=0, int ropt=0) const throw (); 00109 virtual void unloadIndex() const; 00110 virtual long indexSize() const; 00111 00112 uint32_t indexedRows() const; 00113 void indexSpeedTest() const; 00114 void purgeIndexFile(const char *dir=0) const; 00115 00116 const char* dataFileName(std::string& fname, const char *dir=0) const; 00117 const char* nullMaskName(std::string& fname) const; 00118 void getNullMask(bitvector& mask) const; 00119 int setNullMask(const bitvector&); 00120 00123 virtual void getString(uint32_t, std::string &) const {}; 00127 virtual const char* findString(const char*) const 00128 {return static_cast<const char*>(0);} 00129 00130 array_t<int32_t>* getIntArray() const; 00131 array_t<float>* getFloatArray() const; 00132 array_t<double>* getDoubleArray() const; 00133 virtual int getValuesArray(void* vals) const; 00134 virtual ibis::fileManager::storage* getRawData() const; 00135 00136 virtual array_t<signed char>* selectBytes(const bitvector& mask) const; 00137 virtual array_t<unsigned char>* selectUBytes(const bitvector& mask) const; 00138 virtual array_t<int16_t>* selectShorts(const bitvector& mask) const; 00139 virtual array_t<uint16_t>* selectUShorts(const bitvector& mask) const; 00140 virtual array_t<int32_t>* selectInts(const bitvector& mask) const; 00141 virtual array_t<uint32_t>* selectUInts(const bitvector& mask) const; 00142 virtual array_t<int64_t>* selectLongs(const bitvector& mask) const; 00143 virtual array_t<uint64_t>* selectULongs(const bitvector& mask) const; 00144 virtual array_t<float>* selectFloats(const bitvector& mask) const; 00145 virtual array_t<double>* selectDoubles(const bitvector& mask) const; 00146 virtual std::vector<std::string>* 00147 selectStrings(const bitvector& mask) const; 00148 00149 long selectValues(const bitvector& mask, void* vals) const; 00150 long selectValues(const bitvector& mask, 00151 void* vals, array_t<uint32_t>& inds) const; 00152 00154 virtual void write(FILE* file) const; 00156 virtual void print(std::ostream& out) const; 00158 void logMessage(const char* event, const char* fmt, ...) const; 00160 void logWarning(const char* event, const char* fmt, ...) const; 00161 00164 int expandRange(ibis::qContinuousRange& rng) const; 00167 int contractRange(ibis::qContinuousRange& rng) const; 00168 00178 virtual long estimateRange(const ibis::qContinuousRange& cmp, 00179 ibis::bitvector& low, 00180 ibis::bitvector& high) const; 00182 virtual long estimateRange(const ibis::qDiscreteRange& cmp, 00183 ibis::bitvector& low, 00184 ibis::bitvector& high) const; 00186 virtual long estimateRange(const ibis::qIntHod& cmp, 00187 ibis::bitvector& low, 00188 ibis::bitvector& high) const; 00190 virtual long estimateRange(const ibis::qUIntHod& cmp, 00191 ibis::bitvector& low, 00192 ibis::bitvector& high) const; 00193 00196 virtual long evaluateRange(const ibis::qContinuousRange& cmp, 00197 const ibis::bitvector& mask, 00198 ibis::bitvector& res) const; 00200 virtual long evaluateRange(const ibis::qDiscreteRange& cmp, 00201 const ibis::bitvector& mask, 00202 ibis::bitvector& res) const; 00204 virtual long evaluateRange(const ibis::qIntHod& cmp, 00205 const ibis::bitvector& mask, 00206 ibis::bitvector& res) const; 00208 virtual long evaluateRange(const ibis::qUIntHod& cmp, 00209 const ibis::bitvector& mask, 00210 ibis::bitvector& res) const; 00211 00212 virtual long stringSearch(const char*, ibis::bitvector&) const; 00213 virtual long stringSearch(const std::vector<std::string>&, 00214 ibis::bitvector&) const; 00215 virtual long stringSearch(const char*) const; 00216 virtual long stringSearch(const std::vector<std::string>&) const; 00217 virtual long keywordSearch(const char*, ibis::bitvector&) const; 00218 virtual long keywordSearch(const char*) const; 00219 virtual long patternSearch(const char*) const; 00220 virtual long patternSearch(const char*, ibis::bitvector &) const; 00221 00225 virtual long estimateRange(const ibis::qContinuousRange& cmp) const; 00227 virtual long estimateRange(const ibis::qDiscreteRange& cmp) const; 00229 virtual long estimateRange(const ibis::qIntHod& cmp) const; 00231 virtual long estimateRange(const ibis::qUIntHod& cmp) const; 00232 00234 virtual double estimateCost(const ibis::qContinuousRange& cmp) const; 00236 virtual double estimateCost(const ibis::qDiscreteRange& cmp) const; 00238 virtual double estimateCost(const ibis::qIntHod& cmp) const; 00240 virtual double estimateCost(const ibis::qUIntHod& cmp) const; 00242 virtual double estimateCost(const ibis::qString&) const { 00243 return 0;} 00245 virtual double estimateCost(const ibis::qMultiString&) const { 00246 return 0;} 00247 00248 virtual float getUndecidable(const ibis::qContinuousRange& cmp, 00249 ibis::bitvector& iffy) const; 00251 virtual float getUndecidable(const ibis::qDiscreteRange& cmp, 00252 ibis::bitvector& iffy) const; 00254 virtual float getUndecidable(const ibis::qIntHod& cmp, 00255 ibis::bitvector& iffy) const; 00257 virtual float getUndecidable(const ibis::qUIntHod& cmp, 00258 ibis::bitvector& iffy) const; 00259 00261 virtual long append(const char* dt, const char* df, const uint32_t nold, 00262 const uint32_t nnew, uint32_t nbuf, char* buf); 00263 00264 virtual long append(const void* vals, const ibis::bitvector& msk); 00265 virtual long writeData(const char* dir, uint32_t nold, uint32_t nnew, 00266 ibis::bitvector& mask, const void *va1, 00267 void *va2=0); 00268 template <typename T> 00269 long castAndWrite(const array_t<double>& vals, ibis::bitvector& mask, 00270 const T special); 00271 virtual long saveSelected(const ibis::bitvector& sel, const char *dest, 00272 char *buf, uint32_t nbuf); 00273 virtual long truncateData(const char* dir, uint32_t nent, 00274 ibis::bitvector& mask) const; 00275 00281 virtual double getActualMin() const; 00284 virtual double getActualMax() const; 00286 virtual double getSum() const; 00293 long getCumulativeDistribution(std::vector<double>& bounds, 00294 std::vector<uint32_t>& counts) const; 00306 long getDistribution(std::vector<double>& bbs, 00307 std::vector<uint32_t>& counts) const; 00309 class info; 00310 class indexLock; 00311 class mutexLock; 00312 00313 protected: 00314 // protected member variables 00315 const part* thePart; 00316 ibis::bitvector mask_; 00317 ibis::TYPE_T m_type; 00318 std::string m_name; 00319 std::string m_desc; 00320 std::string m_bins; 00321 bool m_sorted; 00322 double lower; 00323 double upper; 00324 00325 mutable ibis::index* idx; 00327 mutable ibis::util::sharedInt32 idxcnt; 00328 00330 void logError(const char* event, const char* fmt, ...) const; 00333 long string2int(int fptr, dictionary& dic, uint32_t nbuf, char* buf, 00334 array_t<uint32_t>& out) const; 00336 double computeMin() const; 00338 double computeMax() const; 00340 double computeSum() const; 00343 void actualMinMax(const char *fname, const ibis::bitvector& mask, 00344 double &min, double &max) const; 00346 template <typename T> 00347 void actualMinMax(const array_t<T>& vals, const ibis::bitvector& mask, 00348 double& min, double& max) const; 00350 template <typename T> 00351 T computeMin(const array_t<T>& vals, 00352 const ibis::bitvector& mask) const; 00354 template <typename T> 00355 T computeMax(const array_t<T>& vals, 00356 const ibis::bitvector& mask) const; 00358 template <typename T> 00359 double computeSum(const array_t<T>& vals, 00360 const ibis::bitvector& mask) const; 00361 00363 virtual int searchSorted(const ibis::qContinuousRange&, 00364 ibis::bitvector&) const; 00366 virtual int searchSorted(const ibis::qDiscreteRange&, 00367 ibis::bitvector&) const; 00369 virtual int searchSorted(const ibis::qIntHod&, 00370 ibis::bitvector&) const; 00372 virtual int searchSorted(const ibis::qUIntHod&, 00373 ibis::bitvector&) const; 00375 template <typename T> int 00376 searchSortedICC(const array_t<T>& vals, 00377 const ibis::qContinuousRange& rng, 00378 ibis::bitvector& hits) const; 00380 template <typename T> int 00381 searchSortedICD(const array_t<T>& vals, 00382 const ibis::qDiscreteRange& rng, 00383 ibis::bitvector& hits) const; 00385 template <typename T> int 00386 searchSortedICD(const array_t<T>& vals, 00387 const ibis::qIntHod& rng, 00388 ibis::bitvector& hits) const; 00390 template <typename T> int 00391 searchSortedICD(const array_t<T>& vals, 00392 const ibis::qUIntHod& rng, 00393 ibis::bitvector& hits) const; 00395 template <typename T> int 00396 searchSortedOOCC(const char* fname, 00397 const ibis::qContinuousRange& rng, 00398 ibis::bitvector& hits) const; 00400 template <typename T> int 00401 searchSortedOOCD(const char* fname, 00402 const ibis::qDiscreteRange& rng, 00403 ibis::bitvector& hits) const; 00405 template <typename T> int 00406 searchSortedOOCD(const char* fname, 00407 const ibis::qIntHod& rng, 00408 ibis::bitvector& hits) const; 00410 template <typename T> int 00411 searchSortedOOCD(const char* fname, 00412 const ibis::qUIntHod& rng, 00413 ibis::bitvector& hits) const; 00414 00416 template <typename T> uint32_t 00417 findLower(int fdes, const uint32_t nr, const T tgt) const; 00419 template <typename T> uint32_t 00420 findUpper(int fdes, const uint32_t nr, const T tgt) const; 00421 template <typename T> 00422 long selectValuesT(const bitvector& mask, array_t<T>& vals) const; 00423 template <typename T> 00424 long selectValuesT(const bitvector& mask, 00425 array_t<T>& vals, array_t<uint32_t>& inds) const; 00426 template <typename T> 00427 long selectToStrings(const bitvector& mask, 00428 std::vector<std::string>& str) const; 00430 template <typename T> 00431 long appendValues(const array_t<T>&, const ibis::bitvector&); 00433 long appendStrings(const std::vector<std::string>&, const ibis::bitvector&); 00434 00435 class readLock; 00436 class writeLock; 00437 class softWriteLock; 00438 friend class readLock; 00439 friend class writeLock; 00440 friend class indexLock; 00441 friend class mutexLock; 00442 friend class softWriteLock; 00443 00444 private: 00447 mutable pthread_rwlock_t rwlock; 00449 mutable pthread_mutex_t mutex; 00450 00451 column& operator=(const column&); // no assignment 00452 }; // ibis::column 00453 00456 class FASTBIT_CXX_DLLSPEC ibis::column::info { 00457 public: 00458 const char* name; 00459 const char* description; 00460 const double expectedMin; 00461 const double expectedMax; 00462 const ibis::TYPE_T type; 00463 info(const ibis::column& col) 00464 : name(col.name()), description(col.description()), 00465 expectedMin(col.lowerBound()), 00466 expectedMax(col.upperBound()), type(col.type()) {}; 00467 info(const info& rhs) 00468 : name(rhs.name), description(rhs.description), 00469 expectedMin(rhs.expectedMin), 00470 expectedMax(rhs.expectedMax), 00471 type(rhs.type) {}; 00472 private: 00473 info(); 00474 info& operator=(const info&); 00475 }; // ibis::column::info 00476 00480 class ibis::column::indexLock { 00481 public: 00482 ~indexLock(); 00483 indexLock(const ibis::column* col, const char* m); 00484 const ibis::index* getIndex() const {return theColumn->idx;}; 00485 00486 private: 00487 const ibis::column* theColumn; 00488 const char* mesg; 00489 00490 indexLock(); 00491 indexLock(const indexLock&); 00492 indexLock& operator=(const indexLock&); 00493 }; // ibis::column::indexLock 00494 00496 class ibis::column::mutexLock { 00497 public: 00498 mutexLock(const ibis::column* col, const char* m) 00499 : theColumn(col), mesg(m) { 00500 if (ibis::gVerbose > 9) 00501 col->logMessage("gainExclusiveAccess", 00502 "pthread_mutex_lock for %s", m); 00503 int ierr = pthread_mutex_lock(&(col->mutex)); 00504 if (0 != ierr) 00505 col->logWarning("gainExclusiveAccess", "pthread_mutex_lock for %s " 00506 "returned %d (%s)", m, ierr, strerror(ierr)); 00507 } 00508 ~mutexLock() { 00509 if (ibis::gVerbose > 9) 00510 theColumn->logMessage("releaseExclusiveAccess", 00511 "pthread_mutex_unlock for %s", mesg); 00512 int ierr = pthread_mutex_unlock(&(theColumn->mutex)); 00513 if (0 != ierr) 00514 theColumn->logWarning("releaseExclusiveAccess", 00515 "pthread_mutex_unlock for %s returned %d " 00516 "(%s)", mesg, ierr, strerror(ierr)); 00517 } 00518 00519 private: 00520 const ibis::column* theColumn; 00521 const char* mesg; 00522 00523 mutexLock() {}; // no default constructor 00524 mutexLock(const mutexLock&) {}; // can not copy 00525 mutexLock& operator=(const mutexLock&); 00526 }; // ibis::column::mutexLock 00527 00529 class ibis::column::writeLock { 00530 public: 00531 writeLock(const ibis::column* col, const char* m); 00532 ~writeLock(); 00533 00534 private: 00535 const ibis::column* theColumn; 00536 const char* mesg; 00537 00538 writeLock(); 00539 writeLock(const writeLock&); 00540 writeLock& operator=(const writeLock&); 00541 }; // ibis::column::writeLock 00542 00544 class ibis::column::softWriteLock { 00545 public: 00546 softWriteLock(const ibis::column* col, const char* m); 00547 ~softWriteLock(); 00548 bool isLocked() const {return(locked==0);} 00549 00550 private: 00551 const ibis::column* theColumn; 00552 const char* mesg; 00553 const int locked; 00554 00555 softWriteLock(); 00556 softWriteLock(const softWriteLock&); 00557 softWriteLock& operator=(const softWriteLock&); 00558 }; // ibis::column::softWriteLock 00559 00561 class ibis::column::readLock { 00562 public: 00563 readLock(const ibis::column* col, const char* m); 00564 ~readLock(); 00565 00566 private: 00567 const ibis::column* theColumn; 00568 const char* mesg; 00569 00570 readLock(); 00571 readLock(const readLock&); 00572 readLock& operator=(const readLock&); 00573 }; // ibis::column::readLock 00574 00576 inline int ibis::column::elementSize() const { 00577 int sz = -1; 00578 switch (m_type) { 00579 case ibis::OID: sz = sizeof(rid_t); break; 00580 case ibis::INT: sz = sizeof(int32_t); break; 00581 case ibis::UINT: sz = sizeof(uint32_t); break; 00582 case ibis::LONG: sz = sizeof(int64_t); break; 00583 case ibis::ULONG: sz = sizeof(uint64_t); break; 00584 case ibis::FLOAT: sz = sizeof(float); break; 00585 case ibis::DOUBLE: sz = sizeof(double); break; 00586 case ibis::BYTE: sz = sizeof(char); break; 00587 case ibis::UBYTE: sz = sizeof(unsigned char); break; 00588 case ibis::SHORT: sz = sizeof(int16_t); break; 00589 case ibis::USHORT: sz = sizeof(uint16_t); break; 00590 case ibis::CATEGORY: sz = 0; break; // no fixed size per element 00591 case ibis::TEXT: sz = 0; break; // no fixed size per element 00592 default: sz = -1; break; 00593 } 00594 return sz; 00595 } // ibis::column::elementSize 00596 00598 inline bool ibis::column::isFloat() const { 00599 return(m_type == ibis::FLOAT || m_type == ibis::DOUBLE); 00600 } // ibis::column::isFloat 00601 00603 inline bool ibis::column::isInteger() const { 00604 return(m_type == ibis::BYTE || m_type == ibis::UBYTE || 00605 m_type == ibis::SHORT || m_type == ibis::USHORT || 00606 m_type == ibis::INT || m_type == ibis::UINT || 00607 m_type == ibis::LONG || m_type == ibis::ULONG); 00608 } // ibis::column::isInteger 00609 00611 inline bool ibis::column::isSignedInteger() const { 00612 return(m_type == ibis::BYTE || m_type == ibis::SHORT || 00613 m_type == ibis::INT || m_type == ibis::LONG); 00614 } // ibis::column::isSignedInteger 00615 00617 inline bool ibis::column::isUnsignedInteger() const { 00618 return(m_type == ibis::UBYTE || m_type == ibis::USHORT || 00619 m_type == ibis::UINT || m_type == ibis::ULONG); 00620 } // ibis::column::isUnsignedInteger 00621 00623 inline bool ibis::column::isNumeric() const { 00624 return(m_type == ibis::BYTE || m_type == ibis::UBYTE || 00625 m_type == ibis::SHORT || m_type == ibis::USHORT || 00626 m_type == ibis::INT || m_type == ibis::UINT || 00627 m_type == ibis::LONG || m_type == ibis::ULONG || 00628 m_type == ibis::FLOAT || m_type == ibis::DOUBLE); 00629 } // ibis::column::isNumeric 00630 00631 // the operator to print a column to an output stream 00632 inline std::ostream& operator<<(std::ostream& out, const ibis::column& prop) { 00633 prop.print(out); 00634 return out; 00635 } 00636 00637 namespace ibis { // for template specialization 00638 template <> long column::selectToStrings<signed char> 00639 (const bitvector& mask, std::vector<std::string>& str) const; 00640 template <> long column::selectToStrings<unsigned char> 00641 (const bitvector& mask, std::vector<std::string>& str) const; 00642 } 00643 #endif // IBIS_COLUMN_H
![]() |