index.h
Go to the documentation of this file.
00001 //File: $Id$
00002 // Author: John Wu <John.Wu at ACM.org>
00003 //         Lawrence Berkeley National Laboratory
00004 // Copyright 2000-2011 the Regents of the University of California
00005 #ifndef IBIS_INDEX_H
00006 #define IBIS_INDEX_H
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 #if defined(_WIN32) && defined(_MSC_VER)
00025 #pragma warning(disable:4786)   // some identifier longer than 256 characters
00026 #endif
00027 #include "qExpr.h"
00028 #include "bitvector.h"
00029 
00030 #include <string>
00031 #include <limits>       // std::numeric_limits
00032 
00033 namespace ibis { // the concrete classes of index hierarchy
00034     class bin;   // equal size bins (equality encoded bitmap index)
00035     class range; // one-sided range encoded (cumulative) index
00036     class mesa;  // interval encoded index (two-sided ranges)
00037     class ambit; // two-level, cumulative index (cumulate on all levels)
00038     class pale;  // two-level, cumulative on the fine level only
00039     class pack;  // two-level, cumulative on the coarse level only
00040     class zone;  // two-level, do not cumulate on either levels
00041     class relic; // the basic bitmap index (one bitmap per distinct value)
00042     class slice; // the bit slice index (binary encoding of ibis::relic)
00043     class fade;  // a more controlled slice (multicomponent range code)
00044     class sbiad; // Italian translation of fade (multicomponent interval code)
00045     class sapid; // closest word to sbiad in English (multicomponent equality
00046                  // code)
00047     class egale; // French word for "equal" (multicomponent equality code on
00048                  // bins)
00049     class moins; // French word for "less" (multicomponent range code on bins)
00050     class entre; // French word for "in between" (multicomponent interval code
00051                  // on bins)
00052     class bak;   // Dutch word for "bin" (simple equality encoding for
00053                  // values mapped to reduced precision floating-point
00054                  // values)
00055     class bak2;  // a variation of bak that splits each bak bin in two
00056 //     class waaier;// Dutch word for "range" (simple range encoding for values
00057 //               // mapped to reduced precision floating-point values)
00058 //     class tussen;// Dutch word for "in between" (simple interval encoding
00059 //               // for values mapped to reduce precision floating-point
00060 //               // values)
00061 //     class uguale;// Italian word for "equal" (multicomponent version of bak)
00062 //     class meno;  // Italian word for "less" (multicomponent version of waaier)
00063 //     class fra;   // Italian word for "in between" (multicomponent version of
00064 //               // tussen)
00065     class keywords;// A boolean version of term-document matrix.
00066     class mesh;  // Composite index on 2-D regular mesh
00067     class band;  // Composite index on 2-D bands.
00068     class direkte;// Directly use the integer values as bin numbers.
00069     class bylt;  // Unbinned version of pack.
00070     class zona;  // Unbinned version of zone.
00071     class fuzz;  // Unbinned version of interval-equality encoding.
00072     class fuge;  // Binned version of interval-equality encoding.
00073 } // namespace ibis
00074 
00081 class ibis::index {
00082 public:
00086     enum INDEX_TYPE {
00087         BINNING=0,      
00088         RANGE,  
00089         MESA,   
00090         AMBIT,  
00091         PALE,   
00092         PACK,   
00093         ZONE,   
00094         RELIC,  
00095         ROSTER, 
00096         SLICE,  
00097         FADE,   
00098         SBIAD,  
00099         SAPID,  
00100         EGALE,  
00101         MOINS,  
00102         ENTRE,  
00103         BAK,    
00104         BAK2,   
00105 
00106 
00107         KEYWORDS,       
00108         MESH,   
00109         BAND,   
00110         DIREKTE,
00111         GENERIC,
00112         BYLT,   
00113         FUZZ,   
00114         ZONA,   
00115         FUGE,   
00116         EXTERN  
00117     };
00118 
00119     static index* create(const column* c, const char* name=0,
00120                          const char* spec=0, int inEntirety=0);
00121     static bool isIndex(const char* f, INDEX_TYPE t);
00123     virtual ~index () {clear();};
00124 
00126     virtual INDEX_TYPE type() const = 0;
00129     virtual const char* name() const = 0;
00130 
00133     virtual long evaluate(const ibis::qContinuousRange& expr,
00134                           ibis::bitvector& hits) const = 0;
00137     virtual long evaluate(const ibis::qDiscreteRange&,
00138                           ibis::bitvector&) const {return -1;}
00139 
00150     virtual void estimate(const ibis::qContinuousRange&,
00151                           ibis::bitvector& lower,
00152                           ibis::bitvector& upper) const {
00153         lower.set(0, nrows); upper.set(1, nrows);}
00155     virtual uint32_t estimate(const ibis::qContinuousRange&) const {
00156         return nrows;}
00164     virtual float undecidable(const ibis::qContinuousRange&,
00165                               ibis::bitvector&) const {return 0.5;}
00166 
00169     virtual void estimate(const ibis::qDiscreteRange& expr,
00170                           ibis::bitvector& lower,
00171                           ibis::bitvector& upper) const;
00172     virtual uint32_t estimate(const ibis::qDiscreteRange& expr) const;
00173     virtual float undecidable(const ibis::qDiscreteRange& expr,
00174                               ibis::bitvector& iffy) const;
00175 
00177     virtual void estimate(const ibis::index& idx2,
00178                           const ibis::deprecatedJoin& expr,
00179                           ibis::bitvector64& lower,
00180                           ibis::bitvector64& upper) const;
00183     virtual void estimate(const ibis::index& idx2,
00184                           const ibis::deprecatedJoin& expr,
00185                           const ibis::bitvector& mask,
00186                           ibis::bitvector64& lower,
00187                           ibis::bitvector64& upper) const;
00188     virtual void estimate(const ibis::index& idx2,
00189                           const ibis::deprecatedJoin& expr,
00190                           const ibis::bitvector& mask,
00191                           const ibis::qRange* const range1,
00192                           const ibis::qRange* const range2,
00193                           ibis::bitvector64& lower,
00194                           ibis::bitvector64& upper) const;
00196     virtual int64_t estimate(const ibis::index& idx2,
00197                              const ibis::deprecatedJoin& expr) const;
00200     virtual int64_t estimate(const ibis::index& idx2,
00201                              const ibis::deprecatedJoin& expr,
00202                              const ibis::bitvector& mask) const;
00203     virtual int64_t estimate(const ibis::index& idx2,
00204                              const ibis::deprecatedJoin& expr,
00205                              const ibis::bitvector& mask,
00206                              const ibis::qRange* const range1,
00207                              const ibis::qRange* const range2) const;
00208 
00210     virtual void estimate(const ibis::deprecatedJoin& expr,
00211                           const ibis::bitvector& mask,
00212                           const ibis::qRange* const range1,
00213                           const ibis::qRange* const range2,
00214                           ibis::bitvector64& lower,
00215                           ibis::bitvector64& upper) const;
00216     virtual int64_t estimate(const ibis::deprecatedJoin& expr,
00217                              const ibis::bitvector& mask,
00218                              const ibis::qRange* const range1,
00219                              const ibis::qRange* const range2) const;
00220 
00222     virtual double estimateCost(const ibis::qContinuousRange&) const {
00223         return (offset32.empty() ? (nrows<<3) : offset32.back());}
00224     virtual double estimateCost(const ibis::qDiscreteRange&) const {
00225         return (offset32.empty() ? (nrows<<3) : offset32.back());}
00226 
00229     virtual void print(std::ostream& out) const = 0;
00233     virtual int write(const char* name) const = 0;
00237     virtual int read(const char* name) = 0;
00240     virtual int read(ibis::fileManager::storage* st) = 0;
00241 
00243     virtual long append(const char*, const char*, uint32_t) {return -1;}
00244 
00246     virtual void speedTest(std::ostream&) const {};
00248     virtual uint32_t numBitvectors() const {return bits.size();}
00250     virtual const ibis::bitvector* getBitvector(uint32_t i) const {
00251         if (i < bits.size()) {
00252             if (bits[i] == 0)
00253                 activate(i);
00254             return bits[i];
00255         }
00256         else {
00257             return 0;
00258         }
00259     }
00260 
00263     virtual void binBoundaries(std::vector<double>&) const {return;}
00264     virtual void binWeights(std::vector<uint32_t>&) const {return;}
00265 
00267     virtual long getCumulativeDistribution
00268     (std::vector<double>& bds, std::vector<uint32_t>& cts) const;
00270     virtual long getDistribution
00271     (std::vector<double>& bbs, std::vector<uint32_t>& cts) const;
00273     virtual double getMin() const {
00274         return std::numeric_limits<double>::quiet_NaN();}
00276     virtual double getMax() const {
00277         return std::numeric_limits<double>::quiet_NaN();}
00281     virtual double getSum() const {
00282         return std::numeric_limits<double>::quiet_NaN();}
00284     uint32_t getNRows() const {return nrows;}
00285 
00291     virtual int expandRange(ibis::qContinuousRange&) const {return 0;}
00292     virtual int contractRange(ibis::qContinuousRange&) const {return 0;}
00293 
00294     typedef std::map< double, ibis::bitvector* > VMap;
00295     typedef std::map< double, uint32_t > histogram;
00296     template <typename E>
00297     static void mapValues(const array_t<E>& val, VMap& bmap);
00298     template <typename E>
00299     static void mapValues(const array_t<E>& val, histogram& hist,
00300                           uint32_t count=0);
00301     template <typename E>
00302     static void mapValues(const array_t<E>& val, array_t<E>& bounds,
00303                           std::vector<uint32_t>& cnts);
00304     template <typename E1, typename E2>
00305     static void mapValues(const array_t<E1>& val1, const array_t<E2>& val2,
00306                           array_t<E1>& bnd1, array_t<E2>& bnd2,
00307                           std::vector<uint32_t>& cnts);
00310     static void divideCounts(array_t<uint32_t>& bounds,
00311                              const array_t<uint32_t>& cnt);
00312 
00313     // three static functions to perform the task of summing up bit sequences
00314     static void addBits(const std::vector<ibis::bitvector*>& bits,
00315                         uint32_t ib, uint32_t ie, ibis::bitvector& res);
00316     static void sumBits(const std::vector<ibis::bitvector*>& bits,
00317                         uint32_t ib, uint32_t ie, ibis::bitvector& res);
00318     static void sumBits(const std::vector<ibis::bitvector*>& bits,
00319                         const ibis::bitvector& tot, uint32_t ib, uint32_t ie,
00320                         ibis::bitvector& res);
00321     // a static function to assign bases for multicomponent schemes
00322     static void setBases(array_t<uint32_t>& bases, uint32_t card,
00323                          uint32_t nbase = 2);
00324 
00325 protected:
00326     // shared members for all indexes
00328     const ibis::column* col;
00331     mutable ibis::fileManager::storage* str;
00333     mutable const char* fname;
00335     mutable array_t<int32_t> offset32;
00339     mutable array_t<int64_t> offset64;
00341     mutable std::vector<ibis::bitvector*> bits;
00345     uint32_t nrows;
00346 
00350     index(const ibis::column* c=0) : col(c), str(0), fname(0), nrows(0) {}
00351     index(const ibis::column* c, ibis::fileManager::storage* s);
00352 
00354     void dataFileName(const char* f, std::string& name) const;
00356     void indexFileName(const char* f, std::string& name) const;
00357     static void indexFileName(std::string& name, const ibis::column* col1,
00358                               const ibis::column* col2, const char* f=0);
00359 
00361     virtual void activate() const;
00363     virtual void activate(uint32_t i) const;
00366     virtual void activate(uint32_t i, uint32_t j) const;
00368     virtual void clear();
00369 
00371     // both VMap and histogram assume that all supported data types can be
00372     // safely stored in double
00375     void mapValues(const char* f, VMap& bmap) const;
00377     void mapValues(const char* f, histogram& hist, uint32_t count=0) const;
00378     void computeMinMax(const char* f, double& min, double& max) const;
00380     void optionalUnpack(std::vector<ibis::bitvector*>& bits,
00381                         const char* opt);
00384     void addBins(uint32_t ib, uint32_t ie, ibis::bitvector& res) const;
00387     void addBins(uint32_t ib, uint32_t ie, ibis::bitvector& res,
00388                  const ibis::bitvector& tot) const;
00391     void sumBins(uint32_t ib, uint32_t ie, ibis::bitvector& res) const;
00394     void sumBins(uint32_t ib, uint32_t ie, ibis::bitvector& res,
00395                  uint32_t ib0, uint32_t ie0) const;
00396 
00397     int initOffsets(int fdes, const char offsize, size_t start,
00398                     uint32_t nobs);
00399     int initOffsets(ibis::fileManager::storage* st, size_t start,
00400                     uint32_t nobs);
00401     void initBitmaps(int fdes);
00402     void initBitmaps(ibis::fileManager::storage* st);
00403 
00404     class barrel;
00405 
00406 private:
00407     index(const index&); // no copy constructor
00408     index& operator=(const index&); // no assignment operator
00409 }; // ibis::index
00410 
00411 
00414 class ibis::index::barrel : public ibis::math::barrel {
00415 public:
00416     barrel(const ibis::math::term* t) : ibis::math::barrel(t) {}
00417 
00418     void setValue(uint32_t i, double v) {varvalues[i] = v;}
00419 }; // ibis::index::barrel
00420 #endif // IBIS_INDEX_H

Make It A Bit Faster
Contact us
Disclaimers
FastBit source code
FastBit mailing list archive