irelic.h
Go to the documentation of this file.
00001 //File: $Id$
00002 // Author: John Wu <John.Wu at ACM.org>
00003 //         Lawrence Berkeley National Laboratory
00004 // Copyright 2000-2011 the Regents of the University of California
00005 #ifndef IBIS_IRELIC_H
00006 #define IBIS_IRELIC_H
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 #if defined(_WIN32) && defined(_MSC_VER)
00015 #pragma warning(disable:4786)   // some identifier longer than 256 characters
00016 #endif
00017 #include "index.h"
00018 
00021 class ibis::relic : public ibis::index {
00022 public:
00023     virtual ~relic() {clear();};
00024     relic(const ibis::column* c, const char* f = 0);
00025     relic(const ibis::column* c, uint32_t popu, uint32_t ntpl=0);
00026     relic(const ibis::column* c, uint32_t card, array_t<uint32_t>& ints);
00027     relic(const ibis::column* c, ibis::fileManager::storage* st,
00028           size_t start = 8);
00029 
00030     virtual void print(std::ostream& out) const;
00031     virtual int  write(const char* dt) const;
00032     virtual int  read(const char* idxfile);
00033     virtual int  read(ibis::fileManager::storage* st);
00034     virtual long append(const char* dt, const char* df, uint32_t nnew);
00035 
00036     using ibis::index::estimate;
00037     using ibis::index::estimateCost;
00038     virtual long evaluate(const ibis::qContinuousRange& expr,
00039                           ibis::bitvector& hits) const;
00040     virtual long evaluate(const ibis::qDiscreteRange& expr,
00041                           ibis::bitvector& hits) const;
00042 
00043     virtual void estimate(const ibis::qContinuousRange& expr,
00044                           ibis::bitvector& lower,
00045                           ibis::bitvector& upper) const {
00046         (void) evaluate(expr, lower);
00047         upper.clear();
00048     }
00049     virtual uint32_t estimate(const ibis::qContinuousRange& expr) const;
00052     virtual float undecidable(const ibis::qContinuousRange&,
00053                               ibis::bitvector& iffy) const {
00054         iffy.clear();
00055         return 0.0;
00056     }
00057     virtual void estimate(const ibis::qDiscreteRange& expr,
00058                           ibis::bitvector& lower,
00059                           ibis::bitvector& upper) const {
00060         evaluate(expr, lower);
00061         upper.clear();
00062     }
00063     virtual uint32_t estimate(const ibis::qDiscreteRange&) const;
00064     virtual float undecidable(const ibis::qDiscreteRange&,
00065                               ibis::bitvector& iffy) const {
00066         iffy.clear();
00067         return 0.0;
00068     }
00069 
00070     virtual double estimateCost(const ibis::qContinuousRange& expr) const;
00071     virtual double estimateCost(const ibis::qDiscreteRange& expr) const;
00072 
00075     virtual void estimate(const ibis::relic& idx2,
00076                           const ibis::deprecatedJoin& expr,
00077                           const ibis::bitvector& mask,
00078                           ibis::bitvector64& lower,
00079                           ibis::bitvector64& upper) const;
00080     virtual void estimate(const ibis::relic& idx2,
00081                           const ibis::deprecatedJoin& expr,
00082                           const ibis::bitvector& mask,
00083                           const ibis::qRange* const range1,
00084                           const ibis::qRange* const range2,
00085                           ibis::bitvector64& lower,
00086                           ibis::bitvector64& upper) const;
00089     virtual int64_t estimate(const ibis::relic& idx2,
00090                              const ibis::deprecatedJoin& expr,
00091                              const ibis::bitvector& mask) const;
00092     virtual int64_t estimate(const ibis::relic& idx2,
00093                              const ibis::deprecatedJoin& expr,
00094                              const ibis::bitvector& mask,
00095                              const ibis::qRange* const range1,
00096                              const ibis::qRange* const range2) const;
00097 
00098     virtual INDEX_TYPE type() const {return RELIC;}
00099     virtual const char* name() const {return "basic";}
00100     // bin boundaries and counts of each bin
00101     virtual void binBoundaries(std::vector<double>& b) const;
00102     virtual void binWeights(std::vector<uint32_t>& b) const;
00103 
00104     virtual long getCumulativeDistribution(std::vector<double>& bds,
00105                                            std::vector<uint32_t>& cts) const;
00106     virtual long getDistribution(std::vector<double>& bds,
00107                                  std::vector<uint32_t>& cts) const;
00108     virtual double getMin() const {return (vals.empty()?DBL_MAX:vals[0]);}
00109     virtual double getMax() const {return (vals.empty()?-DBL_MAX:vals.back());}
00110     virtual double getSum() const;
00111 
00112     virtual void speedTest(std::ostream& out) const;
00113     long append(const ibis::relic& tail);
00114     long append(const array_t<uint32_t>& ind);
00115     array_t<uint32_t>* keys(const ibis::bitvector& mask) const;
00116 
00117 protected:
00118     // protected member variables
00119     array_t<double> vals;
00120 
00121     int write32(int fdes) const;
00122     int write64(int fdes) const;
00123     // protected member functions
00124     uint32_t locate(const double& val) const;
00125     void     locate(const ibis::qContinuousRange& expr,
00126                     uint32_t& hit0, uint32_t& hit1) const;
00127 
00128     // a dummy constructor
00129     relic() : ibis::index() {}
00130     // free current resources, re-initialized all member variables
00131     virtual void clear();
00132     virtual double computeSum() const;
00133     virtual size_t getSerialSize() const throw();
00134 
00137     template <typename E>
00138     void construct(const array_t<E>& arr);
00140     void construct(const char* f = 0);
00141 
00142 private:
00143     // private member functions
00144     int64_t equiJoin(const ibis::relic& idx2, const ibis::bitvector& mask,
00145                      ibis::bitvector64& hits) const;
00146     int64_t deprecatedJoin(const ibis::relic& idx2, const ibis::bitvector& mask,
00147                       const double& delta, ibis::bitvector64& hits) const;
00148     int64_t compJoin(const ibis::relic& idx2, const ibis::bitvector& mask,
00149                      const ibis::math::term& delta,
00150                      ibis::bitvector64& hits) const;
00151 
00152     int64_t equiJoin(const ibis::relic& idx2,
00153                      const ibis::bitvector& mask) const;
00154     int64_t deprecatedJoin(const ibis::relic& idx2, const ibis::bitvector& mask,
00155                       const double& delta) const;
00156     int64_t compJoin(const ibis::relic& idx2, const ibis::bitvector& mask,
00157                      const ibis::math::term& delta) const;
00158 
00159     int64_t equiJoin(const ibis::relic& idx2, const ibis::bitvector& mask,
00160                      const ibis::qRange* const range1,
00161                      const ibis::qRange* const range2,
00162                      ibis::bitvector64& hits) const;
00163     int64_t deprecatedJoin(const ibis::relic& idx2, const ibis::bitvector& mask,
00164                       const ibis::qRange* const range1,
00165                       const ibis::qRange* const range2,
00166                       const double& delta, ibis::bitvector64& hits) const;
00169     int64_t compJoin(const ibis::relic& idx2, const ibis::bitvector& mask,
00170                      const ibis::qRange* const range1,
00171                      const ibis::qRange* const range2,
00172                      const ibis::math::term& delta,
00173                      ibis::bitvector64& hits) const {
00174         return compJoin(idx2, mask, delta, hits);
00175     }
00176 
00177     int64_t equiJoin(const ibis::relic& idx2,
00178                      const ibis::bitvector& mask,
00179                      const ibis::qRange* const range1,
00180                      const ibis::qRange* const range2) const;
00181     int64_t deprecatedJoin(const ibis::relic& idx2, const ibis::bitvector& mask,
00182                       const ibis::qRange* const range1,
00183                       const ibis::qRange* const range2,
00184                       const double& delta) const;
00187     int64_t compJoin(const ibis::relic& idx2, const ibis::bitvector& mask,
00188                      const ibis::qRange* const range1,
00189                      const ibis::qRange* const range2,
00190                      const ibis::math::term& delta) const {
00191         return compJoin(idx2, mask, delta);
00192     }
00193 
00194     relic(const relic&);
00195     relic& operator=(const relic&);
00196 }; // ibis::relic
00197 
00199 class ibis::slice : public ibis::relic {
00200 public:
00201     virtual ~slice() {clear();};
00202     slice(const ibis::column* c = 0, const char* f = 0);
00203     slice(const ibis::column* c, ibis::fileManager::storage* st,
00204           size_t start = 8);
00205 
00206     virtual int write(const char* dt) const;
00207     virtual void print(std::ostream& out) const;
00208     virtual int read(const char* idxfile);
00209     virtual int read(ibis::fileManager::storage* st);
00210 
00211     virtual long append(const char* dt, const char* df, uint32_t nnew);
00212 
00213     using ibis::relic::estimate;
00214     using ibis::relic::estimateCost;
00215     virtual long evaluate(const ibis::qContinuousRange& expr,
00216                           ibis::bitvector& hits) const;
00217     virtual long evaluate(const ibis::qDiscreteRange& expr,
00218                           ibis::bitvector& hits) const;
00219 
00220     virtual void estimate(const ibis::qContinuousRange& expr,
00221                           ibis::bitvector& lower,
00222                           ibis::bitvector& upper) const;
00223     virtual uint32_t estimate(const ibis::qContinuousRange& expr) const;
00224     virtual INDEX_TYPE type() const {return SLICE;}
00225     virtual const char* name() const {return "bit-sliced";}
00226     // number of records in each bin
00227     virtual void binWeights(std::vector<uint32_t>& b) const;
00228     virtual double getSum() const;
00229 
00230     virtual void speedTest(std::ostream& out) const;
00231     virtual double estimateCost(const ibis::qContinuousRange&) const {
00232         double ret;
00233         if (offset64.size() > bits.size())
00234             ret = offset64.back();
00235         else if (offset32.size() > bits.size())
00236             ret = offset32.back();
00237         else
00238             ret = 0.0;
00239         return ret;
00240     }
00241     virtual double estimateCost(const ibis::qDiscreteRange& expr) const {
00242         double ret;
00243         if (offset64.size() > bits.size())
00244             ret = offset64.back();
00245         else if (offset32.size() > bits.size())
00246             ret = offset32.back();
00247         else
00248             ret = 0.0;
00249         return ret;
00250     }
00251 
00252 protected:
00253     virtual void clear();
00254     virtual size_t getSerialSize() const throw();
00255 
00256 private:
00257     // private member variables
00258     array_t<uint32_t> cnts; // the counts for each distinct value
00259 
00260     // private member functions
00261     void construct1(const char* f = 0); // uses more temporary storage
00262     void construct2(const char* f = 0); // passes through data twice
00263     void setBit(const uint32_t i, const double val);
00264 
00265     int write32(int fdes) const;
00266     int write64(int fdes) const;
00267     void evalGE(ibis::bitvector& res, uint32_t b) const;
00268     void evalEQ(ibis::bitvector& res, uint32_t b) const;
00269 
00270     slice(const slice&);
00271     slice& operator=(const slice&);
00272 }; // ibis::slice
00273 
00276 class ibis::fade : public ibis::relic {
00277 public:
00278     virtual ~fade() {clear();};
00279     fade(const ibis::column* c = 0, const char* f = 0,
00280          const uint32_t nbase = 2);
00281     fade(const ibis::column* c, ibis::fileManager::storage* st,
00282          size_t start = 8);
00283 
00284     virtual int write(const char* dt) const;
00285     virtual void print(std::ostream& out) const;
00286     virtual int read(const char* idxfile);
00287     virtual int read(ibis::fileManager::storage* st);
00288 
00289     virtual long append(const char* dt, const char* df, uint32_t nnew);
00290 
00291     using ibis::relic::estimate;
00292     using ibis::relic::estimateCost;
00293     virtual long evaluate(const ibis::qContinuousRange& expr,
00294                           ibis::bitvector& hits) const;
00295     virtual long evaluate(const ibis::qDiscreteRange& expr,
00296                           ibis::bitvector& hits) const;
00297 
00298     virtual uint32_t estimate(const ibis::qContinuousRange& expr) const;
00299     virtual INDEX_TYPE type() const {return FADE;}
00300     virtual const char* name() const {return "multi-level range";}
00301 
00302     virtual void speedTest(std::ostream& out) const;
00303     // number of records in each bin
00304     virtual void binWeights(std::vector<uint32_t>& b) const;
00305     virtual double getSum() const;
00306     virtual double estimateCost(const ibis::qContinuousRange& expr) const;
00307     //virtual double estimateCost(const ibis::qDiscreteRange& expr) const;
00308 
00309 protected:
00310     // protected member variables
00311     array_t<uint32_t> cnts; // the counts for each distinct value
00312     array_t<uint32_t> bases;// the values of the bases used
00313 
00314     // protected member functions to be used by derived classes
00315     int write32(int fdes) const;
00316     int write64(int fdes) const;
00317     virtual void clear();
00318     virtual size_t getSerialSize() const throw();
00319 
00320 private:
00321     // private member functions
00322     void setBit(const uint32_t i, const double val);
00323     void construct1(const char* f = 0, const uint32_t nbase = 2);
00324     void construct2(const char* f = 0, const uint32_t nbase = 2);
00325 
00326     void evalEQ(ibis::bitvector& res, uint32_t b) const;
00327     void evalLE(ibis::bitvector& res, uint32_t b) const;
00328     void evalLL(ibis::bitvector& res, uint32_t b0, uint32_t b1) const;
00329 
00330     fade(const fade&);
00331     fade& operator=(const fade&);
00332 }; // ibis::fade
00333 
00336 class ibis::sbiad : public ibis::fade {
00337 public:
00338     virtual ~sbiad() {clear();};
00339     sbiad(const ibis::column* c = 0, const char* f = 0,
00340           const uint32_t nbase = 2);
00341     sbiad(const ibis::column* c, ibis::fileManager::storage* st,
00342           size_t start = 8);
00343 
00344     virtual int write(const char* dt) const;
00345     virtual void print(std::ostream& out) const;
00346     virtual long append(const char* dt, const char* df, uint32_t nnew);
00347 
00348     virtual long evaluate(const ibis::qContinuousRange& expr,
00349                           ibis::bitvector& hits) const;
00350     virtual long evaluate(const ibis::qDiscreteRange& expr,
00351                           ibis::bitvector& hits) const;
00352 
00353     virtual INDEX_TYPE type() const {return SBIAD;}
00354     virtual const char* name() const {return "multi-level interval";}
00355 
00356     virtual void speedTest(std::ostream& out) const;
00357     //virtual double estimateCost(const ibis::qContinuousRange& expr) const;
00358     //virtual double estimateCost(const ibis::qDiscreteRange& expr) const;
00359 
00360 private:
00361     // private member functions
00362     void setBit(const uint32_t i, const double val);
00363     void construct1(const char* f = 0, const uint32_t nbase = 2);
00364     void construct2(const char* f = 0, const uint32_t nbase = 2);
00365 
00366     void evalEQ(ibis::bitvector& res, uint32_t b) const;
00367     void evalLE(ibis::bitvector& res, uint32_t b) const;
00368     void evalLL(ibis::bitvector& res, uint32_t b0, uint32_t b1) const;
00369 
00370     sbiad(const sbiad&);
00371     sbiad& operator=(const sbiad&);
00372 }; // ibis::sbiad
00373 
00375 class ibis::sapid : public ibis::fade {
00376 public:
00377     virtual ~sapid() {clear();};
00378     sapid(const ibis::column* c = 0, const char* f = 0,
00379           const uint32_t nbase = 2);
00380     sapid(const ibis::column* c, ibis::fileManager::storage* st,
00381           size_t start = 8);
00382 
00383     virtual int write(const char* dt) const;
00384     virtual void print(std::ostream& out) const;
00385     virtual long append(const char* dt, const char* df, uint32_t nnew);
00386 
00387     virtual long evaluate(const ibis::qContinuousRange& expr,
00388                           ibis::bitvector& hits) const;
00389     virtual long evaluate(const ibis::qDiscreteRange& expr,
00390                           ibis::bitvector& hits) const;
00391 
00392     virtual INDEX_TYPE type() const {return SAPID;}
00393     virtual const char* name() const {return "multi-level equality";}
00394 
00395     virtual void speedTest(std::ostream& out) const;
00396     //virtual double estimateCost(const ibis::qContinuousRange& expr) const;
00397     //virtual double estimateCost(const ibis::qDiscreteRange& expr) const;
00398 
00399 private:
00400     // private member functions
00401     void setBit(const uint32_t i, const double val);
00402     void construct1(const char* f = 0, const uint32_t nbase = 2);
00403     void construct2(const char* f = 0, const uint32_t nbase = 2);
00404 
00405     void addBits_(uint32_t ib, uint32_t ie, ibis::bitvector& res) const;
00406     void evalEQ(ibis::bitvector& res, uint32_t b) const;
00407     void evalLE(ibis::bitvector& res, uint32_t b) const;
00408     void evalLL(ibis::bitvector& res, uint32_t b0, uint32_t b1) const;
00409 
00410     sapid(const sapid&);
00411     sapid& operator=(const sapid&);
00412 }; // ibis::sapid
00413 
00418 class ibis::fuzz : public ibis::relic {
00419 public:
00420     virtual ~fuzz() {clear();};
00421     fuzz(const ibis::column* c = 0, const char* f = 0);
00422     fuzz(const ibis::column* c, ibis::fileManager::storage* st,
00423          size_t start = 8);
00424 
00425     virtual int write(const char* dt) const;
00426     virtual void print(std::ostream& out) const;
00427     virtual int read(const char* idxfile);
00428     virtual int read(ibis::fileManager::storage* st);
00429 
00430     virtual long append(const char* dt, const char* df, uint32_t nnew);
00431 
00432     using ibis::relic::evaluate;
00433     using ibis::relic::estimate;
00434     using ibis::relic::estimateCost;
00435     virtual long evaluate(const ibis::qContinuousRange& expr,
00436                           ibis::bitvector& hits) const;
00437     virtual uint32_t estimate(const ibis::qContinuousRange& expr) const;
00438     virtual double estimateCost(const ibis::qContinuousRange& expr) const;
00439 
00440     virtual INDEX_TYPE type() const {return FUZZ;}
00441     virtual const char* name() const {return "interval-equality";}
00442 
00443 protected:
00444     virtual void clear();
00445     virtual size_t getSerialSize() const throw();
00446 
00447 private:
00451     mutable std::vector<ibis::bitvector*> cbits;
00452     array_t<uint32_t> cbounds;
00453     mutable array_t<int32_t> coffset32;
00454     mutable array_t<int64_t> coffset64;
00455 
00456     void coarsen(); // given fine level, add coarse level
00457     void activateCoarse() const; // activate all coarse level bitmaps
00458     void activateCoarse(uint32_t i) const; // activate one bitmap
00459     void activateCoarse(uint32_t i, uint32_t j) const;
00460 
00461     int writeCoarse32(int fdes) const;
00462     int writeCoarse64(int fdes) const;
00463     int readCoarse(const char *fn);
00464     void clearCoarse();
00465 
00467     long coarseEstimate(uint32_t lo, uint32_t hi) const;
00469     long coarseEvaluate(uint32_t lo, uint32_t hi, ibis::bitvector& res) const;
00470 
00471     fuzz(const fuzz&);
00472     fuzz& operator=(const fuzz&);
00473 }; // ibis::fuzz
00474 
00479 class ibis::bylt : public ibis::relic {
00480 public:
00481     virtual ~bylt() {clear();};
00482     bylt(const ibis::column* c = 0, const char* f = 0);
00483     bylt(const ibis::column* c, ibis::fileManager::storage* st,
00484          size_t start = 8);
00485 
00486     virtual int write(const char* dt) const;
00487     virtual void print(std::ostream& out) const;
00488     virtual int read(const char* idxfile);
00489     virtual int read(ibis::fileManager::storage* st);
00490 
00491     virtual long append(const char* dt, const char* df, uint32_t nnew);
00492 
00493     using ibis::relic::evaluate;
00494     using ibis::relic::estimate;
00495     using ibis::relic::estimateCost;
00496     virtual long evaluate(const ibis::qContinuousRange& expr,
00497                           ibis::bitvector& hits) const;
00498     virtual uint32_t estimate(const ibis::qContinuousRange& expr) const;
00499     virtual double estimateCost(const ibis::qContinuousRange& expr) const;
00500 
00501     virtual INDEX_TYPE type() const {return BYLT;}
00502     virtual const char* name() const {return "range-equality";}
00503 
00504 protected:
00505     virtual void clear();
00506     virtual size_t getSerialSize() const throw();
00507 
00508 private:
00509     // the fine level is stored in ibis::relic, the parent class, only the
00510     // coarse bins are stored here.  The coarse bins use integer bin
00511     // boundaries; these integers are indices to the array vals and bits.
00512     mutable std::vector<ibis::bitvector*> cbits;
00513     array_t<uint32_t> cbounds;
00514     mutable array_t<int32_t> coffset32;
00515     mutable array_t<int64_t> coffset64;
00516 
00517     void coarsen(); // given fine level, add coarse level
00518     void activateCoarse() const; // activate all coarse level bitmaps
00519     void activateCoarse(uint32_t i) const; // activate one bitmap
00520     void activateCoarse(uint32_t i, uint32_t j) const;
00521 
00522     int writeCoarse32(int fdes) const;
00523     int writeCoarse64(int fdes) const;
00524     int readCoarse(const char *fn);
00525 
00526     bylt(const bylt&);
00527     bylt& operator=(const bylt&);
00528 }; // ibis::bylt
00529 
00534 class ibis::zona : public ibis::relic {
00535 public:
00536     virtual ~zona() {clear();};
00537     zona(const ibis::column* c = 0, const char* f = 0);
00538     zona(const ibis::column* c, ibis::fileManager::storage* st,
00539          size_t start = 8);
00540 
00541     virtual int write(const char* dt) const;
00542     virtual void print(std::ostream& out) const;
00543     virtual int read(const char* idxfile);
00544     virtual int read(ibis::fileManager::storage* st);
00545 
00546     virtual long append(const char* dt, const char* df, uint32_t nnew);
00547 
00548     using ibis::relic::evaluate;
00549     using ibis::relic::estimate;
00550     using ibis::relic::estimateCost;
00551     virtual long evaluate(const ibis::qContinuousRange& expr,
00552                           ibis::bitvector& hits) const;
00553     virtual uint32_t estimate(const ibis::qContinuousRange& expr) const;
00554     virtual double estimateCost(const ibis::qContinuousRange& expr) const;
00555 
00556     virtual INDEX_TYPE type() const {return ZONA;}
00557     virtual const char* name() const {return "equality-equality";}
00558 
00559 protected:
00560     virtual void clear();
00561     virtual size_t getSerialSize() const throw();
00562 
00563 private:
00564     // the fine level is stored in ibis::relic, the parent class, only the
00565     // coarse bins are stored here.  The coarse bins use integer bin
00566     // boundaries; these integers are indices to the array vals and bits.
00567     mutable std::vector<ibis::bitvector*> cbits;
00568     array_t<uint32_t> cbounds;
00569     mutable array_t<int32_t> coffset32;
00570     mutable array_t<int64_t> coffset64;
00571 
00572     void coarsen(); // given fine level, add coarse level
00573     void activateCoarse() const; // activate all coarse level bitmaps
00574     void activateCoarse(uint32_t i) const; // activate one bitmap
00575     void activateCoarse(uint32_t i, uint32_t j) const;
00576 
00577     int writeCoarse32(int fdes) const;
00578     int writeCoarse64(int fdes) const;
00579     int readCoarse(const char *fn);
00580 
00581     zona(const zona&);
00582     zona& operator=(const zona&);
00583 }; // ibis::zona
00584 #endif // IBIS_IRELIC_H

Make It A Bit Faster
Contact us
Disclaimers
FastBit source code
FastBit mailing list archive