00001 //File: $Id$ 00002 // Author: John Wu <John.Wu at ACM.org> 00003 // Lawrence Berkeley National Laboratory 00004 // Copyright 2000-2011 the Regents of the University of California 00005 #ifndef IBIS_INDEX_H 00006 #define IBIS_INDEX_H 00007 00008 00009 00010 00011 00012 00013 00014 00015 00016 00017 00018 00019 00020 00021 00022 00023 00024 #if defined(_WIN32) && defined(_MSC_VER) 00025 #pragma warning(disable:4786) // some identifier longer than 256 characters 00026 #endif 00027 #include "qExpr.h" 00028 #include "bitvector.h" 00029 00030 #include <string> 00031 #include <limits> // std::numeric_limits 00032 00033 namespace ibis { // the concrete classes of index hierarchy 00034 class bin; // equal size bins (equality encoded bitmap index) 00035 class range; // one-sided range encoded (cumulative) index 00036 class mesa; // interval encoded index (two-sided ranges) 00037 class ambit; // two-level, cumulative index (cumulate on all levels) 00038 class pale; // two-level, cumulative on the fine level only 00039 class pack; // two-level, cumulative on the coarse level only 00040 class zone; // two-level, do not cumulate on either levels 00041 class relic; // the basic bitmap index (one bitmap per distinct value) 00042 class slice; // the bit slice index (binary encoding of ibis::relic) 00043 class fade; // a more controlled slice (multicomponent range code) 00044 class sbiad; // Italian translation of fade (multicomponent interval code) 00045 class sapid; // closest word to sbiad in English (multicomponent equality 00046 // code) 00047 class egale; // French word for "equal" (multicomponent equality code on 00048 // bins) 00049 class moins; // French word for "less" (multicomponent range code on bins) 00050 class entre; // French word for "in between" (multicomponent interval code 00051 // on bins) 00052 class bak; // Dutch word for "bin" (simple equality encoding for 00053 // values mapped to reduced precision floating-point 00054 // values) 00055 class bak2; // a variation of bak that splits each bak bin in two 00056 // class waaier;// Dutch word for "range" (simple range encoding for values 00057 // // mapped to reduced precision floating-point values) 00058 // class tussen;// Dutch word for "in between" (simple interval encoding 00059 // // for values mapped to reduce precision floating-point 00060 // // values) 00061 // class uguale;// Italian word for "equal" (multicomponent version of bak) 00062 // class meno; // Italian word for "less" (multicomponent version of waaier) 00063 // class fra; // Italian word for "in between" (multicomponent version of 00064 // // tussen) 00065 class keywords;// A boolean version of term-document matrix. 00066 class mesh; // Composite index on 2-D regular mesh 00067 class band; // Composite index on 2-D bands. 00068 class direkte;// Directly use the integer values as bin numbers. 00069 class bylt; // Unbinned version of pack. 00070 class zona; // Unbinned version of zone. 00071 class fuzz; // Unbinned version of interval-equality encoding. 00072 class fuge; // Binned version of interval-equality encoding. 00073 } // namespace ibis 00074 00081 class ibis::index { 00082 public: 00086 enum INDEX_TYPE { 00087 BINNING=0, 00088 RANGE, 00089 MESA, 00090 AMBIT, 00091 PALE, 00092 PACK, 00093 ZONE, 00094 RELIC, 00095 ROSTER, 00096 SLICE, 00097 FADE, 00098 SBIAD, 00099 SAPID, 00100 EGALE, 00101 MOINS, 00102 ENTRE, 00103 BAK, 00104 BAK2, 00105 00106 00107 KEYWORDS, 00108 MESH, 00109 BAND, 00110 DIREKTE, 00111 GENERIC, 00112 BYLT, 00113 FUZZ, 00114 ZONA, 00115 FUGE, 00116 EXTERN 00117 }; 00118 00119 static index* create(const column* c, const char* name=0, 00120 const char* spec=0, int inEntirety=0); 00121 static bool isIndex(const char* f, INDEX_TYPE t); 00123 virtual ~index () {clear();}; 00124 00126 virtual INDEX_TYPE type() const = 0; 00129 virtual const char* name() const = 0; 00130 00133 virtual long evaluate(const ibis::qContinuousRange& expr, 00134 ibis::bitvector& hits) const = 0; 00137 virtual long evaluate(const ibis::qDiscreteRange&, 00138 ibis::bitvector&) const {return -1;} 00139 00150 virtual void estimate(const ibis::qContinuousRange&, 00151 ibis::bitvector& lower, 00152 ibis::bitvector& upper) const { 00153 lower.set(0, nrows); upper.set(1, nrows);} 00155 virtual uint32_t estimate(const ibis::qContinuousRange&) const { 00156 return nrows;} 00164 virtual float undecidable(const ibis::qContinuousRange&, 00165 ibis::bitvector&) const {return 0.5;} 00166 00169 virtual void estimate(const ibis::qDiscreteRange& expr, 00170 ibis::bitvector& lower, 00171 ibis::bitvector& upper) const; 00172 virtual uint32_t estimate(const ibis::qDiscreteRange& expr) const; 00173 virtual float undecidable(const ibis::qDiscreteRange& expr, 00174 ibis::bitvector& iffy) const; 00175 00177 virtual void estimate(const ibis::index& idx2, 00178 const ibis::deprecatedJoin& expr, 00179 ibis::bitvector64& lower, 00180 ibis::bitvector64& upper) const; 00183 virtual void estimate(const ibis::index& idx2, 00184 const ibis::deprecatedJoin& expr, 00185 const ibis::bitvector& mask, 00186 ibis::bitvector64& lower, 00187 ibis::bitvector64& upper) const; 00188 virtual void estimate(const ibis::index& idx2, 00189 const ibis::deprecatedJoin& expr, 00190 const ibis::bitvector& mask, 00191 const ibis::qRange* const range1, 00192 const ibis::qRange* const range2, 00193 ibis::bitvector64& lower, 00194 ibis::bitvector64& upper) const; 00196 virtual int64_t estimate(const ibis::index& idx2, 00197 const ibis::deprecatedJoin& expr) const; 00200 virtual int64_t estimate(const ibis::index& idx2, 00201 const ibis::deprecatedJoin& expr, 00202 const ibis::bitvector& mask) const; 00203 virtual int64_t estimate(const ibis::index& idx2, 00204 const ibis::deprecatedJoin& expr, 00205 const ibis::bitvector& mask, 00206 const ibis::qRange* const range1, 00207 const ibis::qRange* const range2) const; 00208 00210 virtual void estimate(const ibis::deprecatedJoin& expr, 00211 const ibis::bitvector& mask, 00212 const ibis::qRange* const range1, 00213 const ibis::qRange* const range2, 00214 ibis::bitvector64& lower, 00215 ibis::bitvector64& upper) const; 00216 virtual int64_t estimate(const ibis::deprecatedJoin& expr, 00217 const ibis::bitvector& mask, 00218 const ibis::qRange* const range1, 00219 const ibis::qRange* const range2) const; 00220 00222 virtual double estimateCost(const ibis::qContinuousRange&) const { 00223 return (offset32.empty() ? (nrows<<3) : offset32.back());} 00224 virtual double estimateCost(const ibis::qDiscreteRange&) const { 00225 return (offset32.empty() ? (nrows<<3) : offset32.back());} 00226 00229 virtual void print(std::ostream& out) const = 0; 00233 virtual int write(const char* name) const = 0; 00237 virtual int read(const char* name) = 0; 00240 virtual int read(ibis::fileManager::storage* st) = 0; 00241 00243 virtual long append(const char*, const char*, uint32_t) {return -1;} 00244 00246 virtual void speedTest(std::ostream&) const {}; 00248 virtual uint32_t numBitvectors() const {return bits.size();} 00250 virtual const ibis::bitvector* getBitvector(uint32_t i) const { 00251 if (i < bits.size()) { 00252 if (bits[i] == 0) 00253 activate(i); 00254 return bits[i]; 00255 } 00256 else { 00257 return 0; 00258 } 00259 } 00260 00263 virtual void binBoundaries(std::vector<double>&) const {return;} 00264 virtual void binWeights(std::vector<uint32_t>&) const {return;} 00265 00267 virtual long getCumulativeDistribution 00268 (std::vector<double>& bds, std::vector<uint32_t>& cts) const; 00270 virtual long getDistribution 00271 (std::vector<double>& bbs, std::vector<uint32_t>& cts) const; 00273 virtual double getMin() const { 00274 return std::numeric_limits<double>::quiet_NaN();} 00276 virtual double getMax() const { 00277 return std::numeric_limits<double>::quiet_NaN();} 00281 virtual double getSum() const { 00282 return std::numeric_limits<double>::quiet_NaN();} 00284 uint32_t getNRows() const {return nrows;} 00285 00291 virtual int expandRange(ibis::qContinuousRange&) const {return 0;} 00292 virtual int contractRange(ibis::qContinuousRange&) const {return 0;} 00293 00294 typedef std::map< double, ibis::bitvector* > VMap; 00295 typedef std::map< double, uint32_t > histogram; 00296 template <typename E> 00297 static void mapValues(const array_t<E>& val, VMap& bmap); 00298 template <typename E> 00299 static void mapValues(const array_t<E>& val, histogram& hist, 00300 uint32_t count=0); 00301 template <typename E> 00302 static void mapValues(const array_t<E>& val, array_t<E>& bounds, 00303 std::vector<uint32_t>& cnts); 00304 template <typename E1, typename E2> 00305 static void mapValues(const array_t<E1>& val1, const array_t<E2>& val2, 00306 array_t<E1>& bnd1, array_t<E2>& bnd2, 00307 std::vector<uint32_t>& cnts); 00310 static void divideCounts(array_t<uint32_t>& bounds, 00311 const array_t<uint32_t>& cnt); 00312 00313 // three static functions to perform the task of summing up bit sequences 00314 static void addBits(const std::vector<ibis::bitvector*>& bits, 00315 uint32_t ib, uint32_t ie, ibis::bitvector& res); 00316 static void sumBits(const std::vector<ibis::bitvector*>& bits, 00317 uint32_t ib, uint32_t ie, ibis::bitvector& res); 00318 static void sumBits(const std::vector<ibis::bitvector*>& bits, 00319 const ibis::bitvector& tot, uint32_t ib, uint32_t ie, 00320 ibis::bitvector& res); 00321 // a static function to assign bases for multicomponent schemes 00322 static void setBases(array_t<uint32_t>& bases, uint32_t card, 00323 uint32_t nbase = 2); 00324 00325 protected: 00326 // shared members for all indexes 00328 const ibis::column* col; 00331 mutable ibis::fileManager::storage* str; 00333 mutable const char* fname; 00335 mutable array_t<int32_t> offset32; 00339 mutable array_t<int64_t> offset64; 00341 mutable std::vector<ibis::bitvector*> bits; 00345 uint32_t nrows; 00346 00350 index(const ibis::column* c=0) : col(c), str(0), fname(0), nrows(0) {} 00351 index(const ibis::column* c, ibis::fileManager::storage* s); 00352 00354 void dataFileName(const char* f, std::string& name) const; 00356 void indexFileName(const char* f, std::string& name) const; 00357 static void indexFileName(std::string& name, const ibis::column* col1, 00358 const ibis::column* col2, const char* f=0); 00359 00361 virtual void activate() const; 00363 virtual void activate(uint32_t i) const; 00366 virtual void activate(uint32_t i, uint32_t j) const; 00368 virtual void clear(); 00369 00371 // both VMap and histogram assume that all supported data types can be 00372 // safely stored in double 00375 void mapValues(const char* f, VMap& bmap) const; 00377 void mapValues(const char* f, histogram& hist, uint32_t count=0) const; 00378 void computeMinMax(const char* f, double& min, double& max) const; 00380 void optionalUnpack(std::vector<ibis::bitvector*>& bits, 00381 const char* opt); 00384 void addBins(uint32_t ib, uint32_t ie, ibis::bitvector& res) const; 00387 void addBins(uint32_t ib, uint32_t ie, ibis::bitvector& res, 00388 const ibis::bitvector& tot) const; 00391 void sumBins(uint32_t ib, uint32_t ie, ibis::bitvector& res) const; 00394 void sumBins(uint32_t ib, uint32_t ie, ibis::bitvector& res, 00395 uint32_t ib0, uint32_t ie0) const; 00396 00397 int initOffsets(int fdes, const char offsize, size_t start, 00398 uint32_t nobs); 00399 int initOffsets(ibis::fileManager::storage* st, size_t start, 00400 uint32_t nobs); 00401 void initBitmaps(int fdes); 00402 void initBitmaps(ibis::fileManager::storage* st); 00403 00404 class barrel; 00405 00406 private: 00407 index(const index&); // no copy constructor 00408 index& operator=(const index&); // no assignment operator 00409 }; // ibis::index 00410 00411 00414 class ibis::index::barrel : public ibis::math::barrel { 00415 public: 00416 barrel(const ibis::math::term* t) : ibis::math::barrel(t) {} 00417 00418 void setValue(uint32_t i, double v) {varvalues[i] = v;} 00419 }; // ibis::index::barrel 00420 #endif // IBIS_INDEX_H
![]() |