00001 //File: $Id$ 00002 // Author: John Wu <John.Wu at ACM.org> 00003 // Copyright 2000-2012 the Regents of the University of California 00004 #ifndef IBIS_COLUMN_H 00005 #define IBIS_COLUMN_H 00006 00007 00008 00009 00010 00011 00012 00013 #include "table.h" // ibis::TYPE_T 00014 #include "qExpr.h" // ibis::qContinuousRange 00015 #include "bitvector.h" 00016 #include <string> 00017 00018 namespace ibis { // additional names to the namespace ibis 00019 // derived classes of ibis::column, implemented in category.cpp 00020 class category; // for categorical values (low-cardinality text fields) 00021 class text; // arbitrary cardinality text fields 00022 class blob; // text may contain null characters 00023 00024 // the following are used for storing selected values of different types 00025 // of columns (implemented in colValues.cpp) 00026 class colBytes; 00027 class colUBytes; 00028 class colShorts; 00029 class colUShorts; 00030 class colInts; 00031 class colUInts; 00032 class colLongs; 00033 class colULongs; 00034 class colFloats; 00035 class colDoubles; 00036 class colStrings; 00037 } // namespace 00038 00049 class FASTBIT_CXX_DLLSPEC ibis::column { 00050 public: 00051 00052 virtual ~column(); 00054 column(const part* tbl, FILE* file); 00056 column(const part* tbl, ibis::TYPE_T t, const char* name, 00057 const char* desc="", double low=DBL_MAX, double high=-DBL_MAX); 00058 column(const column& rhs); 00059 00062 ibis::TYPE_T type() const {return m_type;} 00064 const char* name() const {return m_name.c_str();} 00066 void name(const char* nm) {m_name = nm;} 00068 const char* description() const {return m_desc.c_str();} 00070 const double& lowerBound() const {return lower;} 00072 const double& upperBound() const {return upper;} 00073 00074 int elementSize() const; 00075 bool isFloat() const; 00076 bool isInteger() const; 00077 bool isSignedInteger() const; 00078 bool isUnsignedInteger() const; 00079 bool isNumeric() const; 00080 bool isSorted() const {return m_sorted;} 00081 void description(const char* d) {m_desc = d;} 00082 void lowerBound(double d) {lower = d;} 00083 void upperBound(double d) {upper = d;} 00084 const part* partition() const {return thePart;} 00085 void isSorted(bool); 00086 00087 // function related to index/bin 00088 const char* indexSpec() const; 00089 uint32_t numBins() const; 00090 00091 void indexSpec(const char* spec) {m_bins=spec;} 00093 void preferredBounds(std::vector<double>&) const; 00095 void binWeights(std::vector<uint32_t>&) const; 00096 00097 virtual void computeMinMax(); 00098 virtual void computeMinMax(const char *dir); 00099 virtual void computeMinMax(const char *dir, 00100 double& min, double &max) const; 00101 00102 virtual void loadIndex(const char* iopt=0, int ropt=0) const throw (); 00103 virtual void unloadIndex() const; 00104 virtual long indexSize() const; 00105 00106 uint32_t indexedRows() const; 00107 void indexSpeedTest() const; 00108 void purgeIndexFile(const char *dir=0) const; 00109 00110 const char* dataFileName(std::string& fname, const char *dir=0) const; 00111 const char* nullMaskName(std::string& fname) const; 00112 void getNullMask(bitvector& mask) const; 00113 int setNullMask(const bitvector&); 00114 00117 virtual void getString(uint32_t, std::string &) const {}; 00121 virtual const char* findString(const char*) const 00122 {return static_cast<const char*>(0);} 00123 00124 array_t<int32_t>* getIntArray() const; 00125 array_t<float>* getFloatArray() const; 00126 array_t<double>* getDoubleArray() const; 00127 virtual int getValuesArray(void* vals) const; 00128 virtual ibis::fileManager::storage* getRawData() const; 00129 00130 virtual array_t<signed char>* selectBytes(const bitvector& mask) const; 00131 virtual array_t<unsigned char>* selectUBytes(const bitvector& mask) const; 00132 virtual array_t<int16_t>* selectShorts(const bitvector& mask) const; 00133 virtual array_t<uint16_t>* selectUShorts(const bitvector& mask) const; 00134 virtual array_t<int32_t>* selectInts(const bitvector& mask) const; 00135 virtual array_t<uint32_t>* selectUInts(const bitvector& mask) const; 00136 virtual array_t<int64_t>* selectLongs(const bitvector& mask) const; 00137 virtual array_t<uint64_t>* selectULongs(const bitvector& mask) const; 00138 virtual array_t<float>* selectFloats(const bitvector& mask) const; 00139 virtual array_t<double>* selectDoubles(const bitvector& mask) const; 00140 virtual std::vector<std::string>* 00141 selectStrings(const bitvector& mask) const; 00142 00143 long selectValues(const bitvector&, void*) const; 00144 long selectValues(const bitvector&, void*, array_t<uint32_t>&) const; 00145 long selectValues(const ibis::qContinuousRange&, void*) const; 00146 00148 virtual void write(FILE* file) const; 00150 virtual void print(std::ostream& out) const; 00152 void logMessage(const char* event, const char* fmt, ...) const; 00154 void logWarning(const char* event, const char* fmt, ...) const; 00155 00158 int expandRange(ibis::qContinuousRange& rng) const; 00161 int contractRange(ibis::qContinuousRange& rng) const; 00162 00163 virtual long evaluateRange(const ibis::qContinuousRange& cmp, 00164 const ibis::bitvector& mask, 00165 ibis::bitvector& res) const; 00167 virtual long evaluateRange(const ibis::qDiscreteRange& cmp, 00168 const ibis::bitvector& mask, 00169 ibis::bitvector& res) const; 00171 virtual long evaluateRange(const ibis::qIntHod& cmp, 00172 const ibis::bitvector& mask, 00173 ibis::bitvector& res) const; 00175 virtual long evaluateRange(const ibis::qUIntHod& cmp, 00176 const ibis::bitvector& mask, 00177 ibis::bitvector& res) const; 00178 00179 virtual long stringSearch(const char*, ibis::bitvector&) const; 00180 virtual long stringSearch(const std::vector<std::string>&, 00181 ibis::bitvector&) const; 00182 virtual long stringSearch(const char*) const; 00183 virtual long stringSearch(const std::vector<std::string>&) const; 00184 virtual long keywordSearch(const char*, ibis::bitvector&) const; 00185 virtual long keywordSearch(const char*) const; 00186 virtual long patternSearch(const char*) const; 00187 virtual long patternSearch(const char*, ibis::bitvector &) const; 00188 00189 virtual long evaluateAndSelect(const ibis::qContinuousRange&, 00190 const ibis::bitvector&, void*, 00191 ibis::bitvector&) const; 00192 00202 virtual long estimateRange(const ibis::qContinuousRange& cmp, 00203 ibis::bitvector& low, 00204 ibis::bitvector& high) const; 00206 virtual long estimateRange(const ibis::qDiscreteRange& cmp, 00207 ibis::bitvector& low, 00208 ibis::bitvector& high) const; 00210 virtual long estimateRange(const ibis::qIntHod& cmp, 00211 ibis::bitvector& low, 00212 ibis::bitvector& high) const; 00214 virtual long estimateRange(const ibis::qUIntHod& cmp, 00215 ibis::bitvector& low, 00216 ibis::bitvector& high) const; 00217 00218 virtual long estimateRange(const ibis::qContinuousRange& cmp) const; 00219 virtual long estimateRange(const ibis::qDiscreteRange& cmp) const; 00221 virtual long estimateRange(const ibis::qIntHod& cmp) const; 00223 virtual long estimateRange(const ibis::qUIntHod& cmp) const; 00224 00226 virtual double estimateCost(const ibis::qContinuousRange& cmp) const; 00228 virtual double estimateCost(const ibis::qDiscreteRange& cmp) const; 00230 virtual double estimateCost(const ibis::qIntHod& cmp) const; 00232 virtual double estimateCost(const ibis::qUIntHod& cmp) const; 00234 virtual double estimateCost(const ibis::qString&) const { 00235 return 0;} 00237 virtual double estimateCost(const ibis::qMultiString&) const { 00238 return 0;} 00239 00240 virtual float getUndecidable(const ibis::qContinuousRange& cmp, 00241 ibis::bitvector& iffy) const; 00243 virtual float getUndecidable(const ibis::qDiscreteRange& cmp, 00244 ibis::bitvector& iffy) const; 00246 virtual float getUndecidable(const ibis::qIntHod& cmp, 00247 ibis::bitvector& iffy) const; 00249 virtual float getUndecidable(const ibis::qUIntHod& cmp, 00250 ibis::bitvector& iffy) const; 00251 00253 virtual long append(const char* dt, const char* df, const uint32_t nold, 00254 const uint32_t nnew, uint32_t nbuf, char* buf); 00255 00256 virtual long append(const void* vals, const ibis::bitvector& msk); 00257 virtual long writeData(const char* dir, uint32_t nold, uint32_t nnew, 00258 ibis::bitvector& mask, const void *va1, 00259 void *va2=0); 00260 template <typename T> 00261 long castAndWrite(const array_t<double>& vals, ibis::bitvector& mask, 00262 const T special); 00263 virtual long saveSelected(const ibis::bitvector& sel, const char *dest, 00264 char *buf, uint32_t nbuf); 00265 virtual long truncateData(const char* dir, uint32_t nent, 00266 ibis::bitvector& mask) const; 00267 00273 virtual double getActualMin() const; 00276 virtual double getActualMax() const; 00278 virtual double getSum() const; 00285 long getCumulativeDistribution(std::vector<double>& bounds, 00286 std::vector<uint32_t>& counts) const; 00298 long getDistribution(std::vector<double>& bbs, 00299 std::vector<uint32_t>& counts) const; 00301 class info; 00302 class indexLock; 00303 class mutexLock; 00304 00305 protected: 00306 // protected member variables 00307 const part* thePart; 00308 ibis::bitvector mask_; 00309 ibis::TYPE_T m_type; 00310 std::string m_name; 00311 std::string m_desc; 00312 std::string m_bins; 00313 bool m_sorted; 00314 double lower; 00315 double upper; 00316 00317 mutable ibis::index* idx; 00319 mutable ibis::util::sharedInt32 idxcnt; 00320 00322 void logError(const char* event, const char* fmt, ...) const; 00325 long string2int(int fptr, dictionary& dic, uint32_t nbuf, char* buf, 00326 array_t<uint32_t>& out) const; 00328 double computeMin() const; 00330 double computeMax() const; 00332 double computeSum() const; 00335 void actualMinMax(const char *fname, const ibis::bitvector& mask, 00336 double &min, double &max) const; 00338 template <typename T> 00339 void actualMinMax(const array_t<T>& vals, const ibis::bitvector& mask, 00340 double& min, double& max) const; 00342 template <typename T> 00343 T computeMin(const array_t<T>& vals, 00344 const ibis::bitvector& mask) const; 00346 template <typename T> 00347 T computeMax(const array_t<T>& vals, 00348 const ibis::bitvector& mask) const; 00350 template <typename T> 00351 double computeSum(const array_t<T>& vals, 00352 const ibis::bitvector& mask) const; 00353 00355 virtual int searchSorted(const ibis::qContinuousRange&, 00356 ibis::bitvector&) const; 00358 virtual int searchSorted(const ibis::qDiscreteRange&, 00359 ibis::bitvector&) const; 00361 virtual int searchSorted(const ibis::qIntHod&, 00362 ibis::bitvector&) const; 00364 virtual int searchSorted(const ibis::qUIntHod&, 00365 ibis::bitvector&) const; 00367 template <typename T> int 00368 searchSortedICC(const array_t<T>& vals, 00369 const ibis::qContinuousRange& rng, 00370 ibis::bitvector& hits) const; 00372 template <typename T> int 00373 searchSortedICD(const array_t<T>& vals, 00374 const ibis::qDiscreteRange& rng, 00375 ibis::bitvector& hits) const; 00377 template <typename T> int 00378 searchSortedICD(const array_t<T>& vals, 00379 const ibis::qIntHod& rng, 00380 ibis::bitvector& hits) const; 00382 template <typename T> int 00383 searchSortedICD(const array_t<T>& vals, 00384 const ibis::qUIntHod& rng, 00385 ibis::bitvector& hits) const; 00387 template <typename T> int 00388 searchSortedOOCC(const char* fname, 00389 const ibis::qContinuousRange& rng, 00390 ibis::bitvector& hits) const; 00392 template <typename T> int 00393 searchSortedOOCD(const char* fname, 00394 const ibis::qDiscreteRange& rng, 00395 ibis::bitvector& hits) const; 00397 template <typename T> int 00398 searchSortedOOCD(const char* fname, 00399 const ibis::qIntHod& rng, 00400 ibis::bitvector& hits) const; 00402 template <typename T> int 00403 searchSortedOOCD(const char* fname, 00404 const ibis::qUIntHod& rng, 00405 ibis::bitvector& hits) const; 00406 00408 template <typename T> uint32_t 00409 findLower(int fdes, const uint32_t nr, const T tgt) const; 00411 template <typename T> uint32_t 00412 findUpper(int fdes, const uint32_t nr, const T tgt) const; 00413 00414 template <typename T> 00415 long selectValuesT(const char*, const bitvector&, array_t<T>&) const; 00416 template <typename T> 00417 long selectValuesT(const char*, const bitvector& mask, 00418 array_t<T>& vals, array_t<uint32_t>& inds) const; 00419 template <typename T> 00420 long selectToStrings(const char*, const bitvector&, 00421 std::vector<std::string>&) const; 00422 00424 template <typename T> 00425 long appendValues(const array_t<T>&, const ibis::bitvector&); 00427 long appendStrings(const std::vector<std::string>&, const ibis::bitvector&); 00428 00429 class readLock; 00430 class writeLock; 00431 class softWriteLock; 00432 friend class readLock; 00433 friend class writeLock; 00434 friend class indexLock; 00435 friend class mutexLock; 00436 friend class softWriteLock; 00437 00438 private: 00441 mutable pthread_rwlock_t rwlock; 00443 mutable pthread_mutex_t mutex; 00444 00445 column& operator=(const column&); // no assignment 00446 }; // ibis::column 00447 00450 class FASTBIT_CXX_DLLSPEC ibis::column::info { 00451 public: 00452 const char* name; 00453 const char* description; 00454 const double expectedMin; 00455 const double expectedMax; 00456 const ibis::TYPE_T type; 00457 info(const ibis::column& col); 00458 info(const info& rhs) 00459 : name(rhs.name), description(rhs.description), 00460 expectedMin(rhs.expectedMin), 00461 expectedMax(rhs.expectedMax), 00462 type(rhs.type) {}; 00463 00464 private: 00465 info(); 00466 info& operator=(const info&); 00467 }; // ibis::column::info 00468 00472 class ibis::column::indexLock { 00473 public: 00474 ~indexLock(); 00475 indexLock(const ibis::column* col, const char* m); 00476 const ibis::index* getIndex() const {return theColumn->idx;}; 00477 00478 private: 00479 const ibis::column* theColumn; 00480 const char* mesg; 00481 00482 indexLock(); 00483 indexLock(const indexLock&); 00484 indexLock& operator=(const indexLock&); 00485 }; // ibis::column::indexLock 00486 00488 class ibis::column::mutexLock { 00489 public: 00490 mutexLock(const ibis::column* col, const char* m) 00491 : theColumn(col), mesg(m) { 00492 if (ibis::gVerbose > 9) 00493 col->logMessage("gainExclusiveAccess", 00494 "pthread_mutex_lock for %s", m); 00495 int ierr = pthread_mutex_lock(&(col->mutex)); 00496 if (0 != ierr) 00497 col->logWarning("gainExclusiveAccess", "pthread_mutex_lock for %s " 00498 "returned %d (%s)", m, ierr, strerror(ierr)); 00499 } 00500 ~mutexLock() { 00501 if (ibis::gVerbose > 9) 00502 theColumn->logMessage("releaseExclusiveAccess", 00503 "pthread_mutex_unlock for %s", mesg); 00504 int ierr = pthread_mutex_unlock(&(theColumn->mutex)); 00505 if (0 != ierr) 00506 theColumn->logWarning("releaseExclusiveAccess", 00507 "pthread_mutex_unlock for %s returned %d " 00508 "(%s)", mesg, ierr, strerror(ierr)); 00509 } 00510 00511 private: 00512 const ibis::column* theColumn; 00513 const char* mesg; 00514 00515 mutexLock() {}; // no default constructor 00516 mutexLock(const mutexLock&) {}; // can not copy 00517 mutexLock& operator=(const mutexLock&); 00518 }; // ibis::column::mutexLock 00519 00521 class ibis::column::writeLock { 00522 public: 00523 writeLock(const ibis::column* col, const char* m); 00524 ~writeLock(); 00525 00526 private: 00527 const ibis::column* theColumn; 00528 const char* mesg; 00529 00530 writeLock(); 00531 writeLock(const writeLock&); 00532 writeLock& operator=(const writeLock&); 00533 }; // ibis::column::writeLock 00534 00536 class ibis::column::softWriteLock { 00537 public: 00538 softWriteLock(const ibis::column* col, const char* m); 00539 ~softWriteLock(); 00540 bool isLocked() const {return(locked==0);} 00541 00542 private: 00543 const ibis::column* theColumn; 00544 const char* mesg; 00545 const int locked; 00546 00547 softWriteLock(); 00548 softWriteLock(const softWriteLock&); 00549 softWriteLock& operator=(const softWriteLock&); 00550 }; // ibis::column::softWriteLock 00551 00553 class ibis::column::readLock { 00554 public: 00555 readLock(const ibis::column* col, const char* m); 00556 ~readLock(); 00557 00558 private: 00559 const ibis::column* theColumn; 00560 const char* mesg; 00561 00562 readLock(); 00563 readLock(const readLock&); 00564 readLock& operator=(const readLock&); 00565 }; // ibis::column::readLock 00566 00568 inline int ibis::column::elementSize() const { 00569 int sz = -1; 00570 switch (m_type) { 00571 case ibis::OID: sz = sizeof(rid_t); break; 00572 case ibis::INT: sz = sizeof(int32_t); break; 00573 case ibis::UINT: sz = sizeof(uint32_t); break; 00574 case ibis::LONG: sz = sizeof(int64_t); break; 00575 case ibis::ULONG: sz = sizeof(uint64_t); break; 00576 case ibis::FLOAT: sz = sizeof(float); break; 00577 case ibis::DOUBLE: sz = sizeof(double); break; 00578 case ibis::BYTE: sz = sizeof(char); break; 00579 case ibis::UBYTE: sz = sizeof(unsigned char); break; 00580 case ibis::SHORT: sz = sizeof(int16_t); break; 00581 case ibis::USHORT: sz = sizeof(uint16_t); break; 00582 case ibis::CATEGORY: sz = 0; break; // no fixed size per element 00583 case ibis::TEXT: sz = 0; break; // no fixed size per element 00584 default: sz = -1; break; 00585 } 00586 return sz; 00587 } // ibis::column::elementSize 00588 00590 inline bool ibis::column::isFloat() const { 00591 return(m_type == ibis::FLOAT || m_type == ibis::DOUBLE); 00592 } // ibis::column::isFloat 00593 00595 inline bool ibis::column::isInteger() const { 00596 return(m_type == ibis::BYTE || m_type == ibis::UBYTE || 00597 m_type == ibis::SHORT || m_type == ibis::USHORT || 00598 m_type == ibis::INT || m_type == ibis::UINT || 00599 m_type == ibis::LONG || m_type == ibis::ULONG); 00600 } // ibis::column::isInteger 00601 00603 inline bool ibis::column::isSignedInteger() const { 00604 return(m_type == ibis::BYTE || m_type == ibis::SHORT || 00605 m_type == ibis::INT || m_type == ibis::LONG); 00606 } // ibis::column::isSignedInteger 00607 00609 inline bool ibis::column::isUnsignedInteger() const { 00610 return(m_type == ibis::UBYTE || m_type == ibis::USHORT || 00611 m_type == ibis::UINT || m_type == ibis::ULONG); 00612 } // ibis::column::isUnsignedInteger 00613 00615 inline bool ibis::column::isNumeric() const { 00616 return(m_type == ibis::BYTE || m_type == ibis::UBYTE || 00617 m_type == ibis::SHORT || m_type == ibis::USHORT || 00618 m_type == ibis::INT || m_type == ibis::UINT || 00619 m_type == ibis::LONG || m_type == ibis::ULONG || 00620 m_type == ibis::FLOAT || m_type == ibis::DOUBLE); 00621 } // ibis::column::isNumeric 00622 00623 // the operator to print a column to an output stream 00624 inline std::ostream& operator<<(std::ostream& out, const ibis::column& prop) { 00625 prop.print(out); 00626 return out; 00627 } 00628 00629 namespace ibis { // for template specialization 00630 template <> long column::selectToStrings<signed char> 00631 (const char*, const bitvector&, std::vector<std::string>&) const; 00632 template <> long column::selectToStrings<unsigned char> 00633 (const char*, const bitvector&, std::vector<std::string>&) const; 00634 } 00635 #endif // IBIS_COLUMN_H
![]() |