ibin.h
Go to the documentation of this file.
00001 //File: $Id$
00002 // Author: John Wu <John.Wu at ACM.org>
00003 //         Lawrence Berkeley National Laboratory
00004 // Copyright 2000-2011 the Regents of the University of California
00005 #ifndef IBIS_IBIN_H
00006 #define IBIS_IBIN_H
00007 
00008 
00009 
00010 
00011 
00012 
00013 #include "index.h"
00014 #if defined(_WIN32) && defined(_MSC_VER)
00015 #pragma warning(disable:4786)   // some identifier longer than 256 characters
00016 #undef min
00017 #undef max
00018 #endif
00019 
00027 class ibis::bin : public ibis::index {
00028 public:
00029 
00030     virtual ~bin() {clear();};
00031     bin(const ibis::bin& rhs);
00032     bin(const ibis::column* c=0, const char* f=0);
00033     bin(const ibis::column* c, ibis::fileManager::storage* st,
00034         size_t offset = 8);
00035     bin(const ibis::column* c, const char* f, const array_t<double>& bd);
00036     bin(const ibis::column* c, const char* f, const std::vector<double>& bd);
00037 
00038     virtual void print(std::ostream& out) const;
00039     virtual int write(const char* dt) const; // write to the named file
00040     virtual int read(const char* idxfile);
00041     virtual int read(ibis::fileManager::storage* st);
00042     virtual long append(const char* dt, const char* df, uint32_t nnew);
00043 
00044     using ibis::index::evaluate;
00045     using ibis::index::estimate;
00046     using ibis::index::estimateCost;
00047     virtual long evaluate(const ibis::qContinuousRange& expr,
00048                           ibis::bitvector& hits) const;
00049     virtual long evaluate(const ibis::qDiscreteRange& expr,
00050                           ibis::bitvector& hits) const {
00051         return ibis::index::evaluate(expr, hits);
00052     }
00053 
00054     virtual void estimate(const ibis::qContinuousRange& expr,
00055                           ibis::bitvector& lower,
00056                           ibis::bitvector& upper) const;
00057     virtual uint32_t estimate(const ibis::qContinuousRange& expr) const;
00058     using ibis::index::undecidable;
00059     virtual float undecidable(const ibis::qContinuousRange& expr,
00060                               ibis::bitvector& iffy) const;
00061 
00063     virtual void estimate(const ibis::deprecatedJoin& expr,
00064                           ibis::bitvector64& lower,
00065                           ibis::bitvector64& upper) const;
00066     virtual void estimate(const ibis::deprecatedJoin& expr,
00067                           const ibis::bitvector& mask,
00068                           ibis::bitvector64& lower,
00069                           ibis::bitvector64& upper) const;
00070     virtual void estimate(const ibis::deprecatedJoin& expr,
00071                           const ibis::bitvector& mask,
00072                           const ibis::qRange* const range1,
00073                           const ibis::qRange* const range2,
00074                           ibis::bitvector64& lower,
00075                           ibis::bitvector64& upper) const;
00076     virtual int64_t estimate(const ibis::deprecatedJoin& expr,
00077                              const ibis::bitvector& mask,
00078                              const ibis::qRange* const range1,
00079                              const ibis::qRange* const range2) const;
00080 
00082     virtual void estimate(const ibis::bin& idx2,
00083                           const ibis::deprecatedJoin& expr,
00084                           ibis::bitvector64& lower,
00085                           ibis::bitvector64& upper) const;
00086     virtual void estimate(const ibis::bin& idx2,
00087                           const ibis::deprecatedJoin& expr,
00088                           const ibis::bitvector& mask,
00089                           ibis::bitvector64& lower,
00090                           ibis::bitvector64& upper) const;
00091     virtual void estimate(const ibis::bin& idx2,
00092                           const ibis::deprecatedJoin& expr,
00093                           const ibis::bitvector& mask,
00094                           const ibis::qRange* const range1,
00095                           const ibis::qRange* const range2,
00096                           ibis::bitvector64& lower,
00097                           ibis::bitvector64& upper) const;
00098     virtual int64_t estimate(const ibis::bin& idx2,
00099                              const ibis::deprecatedJoin& expr) const;
00100     virtual int64_t estimate(const ibis::bin& idx2,
00101                              const ibis::deprecatedJoin& expr,
00102                              const ibis::bitvector& mask) const;
00103     virtual int64_t estimate(const ibis::bin& idx2,
00104                              const ibis::deprecatedJoin& expr,
00105                              const ibis::bitvector& mask,
00106                              const ibis::qRange* const range1,
00107                              const ibis::qRange* const range2) const;
00108 
00109     virtual INDEX_TYPE type() const {return BINNING;}
00110     virtual const char* name() const {return "bin";}
00111     virtual uint32_t numBins() const {return (nobs>2?nobs-2:0);}
00112     // bin boundaries and counts of each bin
00113     virtual void binBoundaries(std::vector<double>&) const;
00114     virtual void binWeights(std::vector<uint32_t>&) const;
00115     // expand/contract the boundaries of a range condition
00116     virtual int  expandRange(ibis::qContinuousRange& rng) const;
00117     virtual int  contractRange(ibis::qContinuousRange& rng) const;
00118     virtual void speedTest(std::ostream& out) const;
00119     virtual double estimateCost(const ibis::qContinuousRange& expr) const;
00120     virtual double estimateCost(const ibis::qDiscreteRange& expr) const;
00121 
00122     virtual long getCumulativeDistribution(std::vector<double>& bds,
00123                                            std::vector<uint32_t>& cts) const;
00124     virtual long getDistribution(std::vector<double>& bbs,
00125                                  std::vector<uint32_t>& cts) const;
00126     virtual double getMin() const;
00127     virtual double getMax() const;
00128     virtual double getSum() const;
00129 
00131     int read(int fdes, size_t offset, const char *fname, const char *header);
00133     long append(const ibis::bin& tail);
00135     long append(const array_t<uint32_t>& ind);
00136     array_t<uint32_t>* indices(const ibis::bitvector& mask) const;
00139     long checkBin(const ibis::qRange& cmp, uint32_t jbin,
00140                   ibis::bitvector& res) const;
00144     long checkBin(const ibis::qRange& cmp, uint32_t jbin,
00145                   const ibis::bitvector& mask, ibis::bitvector& res) const;
00146 
00163     struct granule {
00164         double minm, maxm; // min and max of values less than the target
00165         double minp, maxp; // min and max of values greater than the target
00166         ibis::bitvector* loce; 
00167         ibis::bitvector* locm; 
00168         ibis::bitvector* locp; 
00169 
00171         granule() : minm(DBL_MAX), maxm(-DBL_MAX), minp(DBL_MAX),
00172                     maxp(-DBL_MAX), loce(0), locm(0), locp(0) {};
00174         ~granule() {delete loce; delete locm; delete locp;};
00175     private:
00176         granule(const granule&); // no copy constructor
00177         granule& operator=(const granule&); // no assignment
00178     };
00179     // key = target value
00180     typedef std::map< double, granule* > granuleMap;
00181 
00182 protected:
00183     // member variables shared by all derived classes -- the derived classes
00184     // are allowed to interpret the actual content differently.
00185     uint32_t nobs;              
00186     array_t<double> bounds;     
00187     array_t<double> maxval;     
00188     array_t<double> minval;     
00189 
00191     bin(const ibis::column* c, const uint32_t nbits,
00192         ibis::fileManager::storage* st, size_t offset = 8);
00193 
00195     void construct(const char*);
00197     void binning(const char* f, const std::vector<double>& bd);
00198     void binning(const char* f, const array_t<double>& bd);
00201     void binning(const char* f);
00204     template <typename E>
00205     void binningT(const char* fname);
00206     template <typename E>
00207     long checkBin0(const ibis::qRange& cmp, uint32_t jbin,
00208                    ibis::bitvector& res) const;
00209     template <typename E>
00210     long checkBin1(const ibis::qRange& cmp, uint32_t jbin,
00211                    const ibis::bitvector& mask, ibis::bitvector& res) const;
00213     template <typename E>
00214     long binOrderT(const char* fname) const;
00215     long binOrder(const char* fname) const;
00216 
00218     void setBoundaries(const char* f);
00219     void setBoundaries(array_t<double>& bnds,
00220                        const ibis::bin& bin0) const;
00221     void setBoundaries(array_t<double>& bnds,
00222                        const ibis::bin& idx1,
00223                        const array_t<uint32_t> cnt1,
00224                        const array_t<uint32_t> cnt0) const;
00225     // functions to deal with in-memory arrays
00226     template <typename E>
00227     void construct(const array_t<E>& varr);
00228     template <typename E>
00229     void binning(const array_t<E>& varr);
00230     template <typename E>
00231     void binning(const array_t<E>& varr, const array_t<double>& bd);
00232     template <typename E>
00233     void setBoundaries(const array_t<E>& varr);
00234     template <typename E>
00235     void scanAndPartition(const array_t<E>&, unsigned);
00236     template <typename E>
00237     void mapGranules(const array_t<E>&, granuleMap& gmap) const;
00238     void printGranules(std::ostream& out, const granuleMap& gmap) const;
00239     void convertGranules(granuleMap& gmap);
00240 
00242     void readBinBoundaries(const char* name, uint32_t nb);
00244     void scanAndPartition(const char*, unsigned, uint32_t nbins=0);
00246     void addBounds(double lbd, double rbd, uint32_t nbins, uint32_t eqw);
00248     uint32_t parseNbins() const;
00250     unsigned parseScale() const;
00252     unsigned parsePrec() const;
00253 
00254     virtual size_t getSerialSize() const throw();
00257     void divideBitmaps(const std::vector<ibis::bitvector*>& bms,
00258                        std::vector<unsigned>& parts) const;
00259 
00261     virtual double computeSum() const;
00263     virtual void adjustLength(uint32_t nrows);
00265     virtual uint32_t locate(const double& val) const;
00267     virtual void locate(const ibis::qContinuousRange& expr,
00268                         uint32_t& cand0, uint32_t& cand1) const;
00270     virtual void locate(const ibis::qContinuousRange& expr,
00271                         uint32_t& cand0, uint32_t& cand1,
00272                         uint32_t& hit0, uint32_t& hit1) const;
00274     void swap(bin& rhs) {
00275         const ibis::column* c = col;
00276         col = rhs.col;
00277         rhs.col = c;
00278         uint32_t tmp = nobs;
00279         nobs = rhs.nobs;
00280         rhs.nobs = tmp;
00281         tmp = nrows;
00282         nrows = rhs.nrows;
00283         rhs.nrows = tmp;
00284         bounds.swap(rhs.bounds);
00285         maxval.swap(rhs.maxval);
00286         minval.swap(rhs.minval);
00287         bits.swap(rhs.bits);
00288     } // swap
00289 
00290     virtual void clear();
00291     int write32(int fptr) const;
00292     int write64(int fptr) const;
00293 
00294 private:
00295     // private member functions
00296     bin& operator=(const bin&);
00297 
00298     unsigned parseScale(const char*) const;
00299 
00300     void print(std::ostream& out, const uint32_t tot,
00301                const double& lbound, const double& rbound) const;
00302 
00304     void equiJoin(ibis::bitvector64& lower,
00305                   ibis::bitvector64& iffy) const;
00306     void equiJoin(const ibis::bin& idx2,
00307                   ibis::bitvector64& lower,
00308                   ibis::bitvector64& iffy) const;
00309     void deprecatedJoin(const double& delta,
00310                    ibis::bitvector64& lower,
00311                    ibis::bitvector64& iffy) const;
00312     void deprecatedJoin(const ibis::bin& idx2,
00313                    const double& delta,
00314                    ibis::bitvector64& lower,
00315                    ibis::bitvector64& iffy) const;
00316     void compJoin(const ibis::math::term *expr,
00317                   ibis::bitvector64& lower,
00318                   ibis::bitvector64& iffy) const;
00319     void compJoin(const ibis::bin& idx2,
00320                   const ibis::math::term *expr,
00321                   ibis::bitvector64& lower,
00322                   ibis::bitvector64& iffy) const;
00323     void equiJoin(const ibis::bitvector& mask,
00324                   ibis::bitvector64& lower,
00325                   ibis::bitvector64& iffy) const;
00326     void equiJoin(const ibis::bin& idx2,
00327                   const ibis::bitvector& mask,
00328                   ibis::bitvector64& lower,
00329                   ibis::bitvector64& iffy) const;
00330     void deprecatedJoin(const double& delta,
00331                    const ibis::bitvector& mask,
00332                    ibis::bitvector64& lower,
00333                    ibis::bitvector64& iffy) const;
00334     void deprecatedJoin(const ibis::bin& idx2,
00335                    const double& delta,
00336                    const ibis::bitvector& mask,
00337                    ibis::bitvector64& lower,
00338                    ibis::bitvector64& iffy) const;
00339     void compJoin(const ibis::math::term *expr,
00340                   const ibis::bitvector& mask,
00341                   ibis::bitvector64& lower,
00342                   ibis::bitvector64& iffy) const;
00343     void compJoin(const ibis::bin& idx2,
00344                   const ibis::math::term *expr,
00345                   const ibis::bitvector& mask,
00346                   ibis::bitvector64& lower,
00347                   ibis::bitvector64& iffy) const;
00348 
00349     void equiJoin(const ibis::bitvector& mask,
00350                   const ibis::qRange* const range1,
00351                   const ibis::qRange* const range2,
00352                   ibis::bitvector64& sure,
00353                   ibis::bitvector64& iffy) const;
00354     void deprecatedJoin(const double& delta,
00355                    const ibis::bitvector& mask,
00356                    const ibis::qRange* const range1,
00357                    const ibis::qRange* const range2,
00358                    ibis::bitvector64& sure,
00359                    ibis::bitvector64& iffy) const;
00360     void compJoin(const ibis::math::term *delta,
00361                   const ibis::bitvector& mask,
00362                   const ibis::qRange* const range1,
00363                   const ibis::qRange* const range2,
00364                   ibis::bitvector64& sure,
00365                   ibis::bitvector64& iffy) const;
00366 
00367     int64_t equiJoin(const ibis::bitvector& mask,
00368                      const ibis::qRange* const range1,
00369                      const ibis::qRange* const range2) const;
00370     int64_t deprecatedJoin(const double& delta,
00371                       const ibis::bitvector& mask,
00372                       const ibis::qRange* const range1,
00373                       const ibis::qRange* const range2) const;
00374     int64_t compJoin(const ibis::math::term *delta,
00375                      const ibis::bitvector& mask,
00376                      const ibis::qRange* const range1,
00377                      const ibis::qRange* const range2) const;
00378 
00379     void equiJoin(const ibis::bin& idx2,
00380                   const ibis::bitvector& mask,
00381                   const ibis::qRange* const range1,
00382                   const ibis::qRange* const range2,
00383                   ibis::bitvector64& sure,
00384                   ibis::bitvector64& iffy) const;
00385     void deprecatedJoin(const ibis::bin& idx2,
00386                    const double& delta,
00387                    const ibis::bitvector& mask,
00388                    const ibis::qRange* const range1,
00389                    const ibis::qRange* const range2,
00390                    ibis::bitvector64& sure,
00391                    ibis::bitvector64& iffy) const;
00392     void compJoin(const ibis::bin& idx2,
00393                   const ibis::math::term *delta,
00394                   const ibis::bitvector& mask,
00395                   const ibis::qRange* const range1,
00396                   const ibis::qRange* const range2,
00397                   ibis::bitvector64& sure,
00398                   ibis::bitvector64& iffy) const;
00399 
00400     int64_t equiJoin(const ibis::bin& idx2,
00401                      const ibis::bitvector& mask,
00402                      const ibis::qRange* const range1,
00403                      const ibis::qRange* const range2) const;
00404     int64_t deprecatedJoin(const ibis::bin& idx2,
00405                       const double& delta,
00406                       const ibis::bitvector& mask,
00407                       const ibis::qRange* const range1,
00408                       const ibis::qRange* const range2) const;
00409     int64_t compJoin(const ibis::bin& idx2,
00410                      const ibis::math::term *delta,
00411                      const ibis::bitvector& mask,
00412                      const ibis::qRange* const range1,
00413                      const ibis::qRange* const range2) const;
00414 
00415     // need these friendships to access the protected member variables
00416     friend class ibis::mesa;
00417     friend class ibis::range;
00418     friend class ibis::ambit;
00419     friend class ibis::pack;
00420     friend class ibis::pale;
00421     friend class ibis::zone;
00422     friend class ibis::mesh;
00423     friend class ibis::band;
00424 }; // ibis::bin
00425 
00429 class ibis::range : public ibis::bin {
00430 public:
00431 
00432     virtual ~range() {};
00433     range(const ibis::column* c=0, const char* f=0);
00434     range(const ibis::column* c, ibis::fileManager::storage* st,
00435           size_t offset = 8);
00436     explicit range(const ibis::bin& rhs); // convert a bin to a range
00437 
00438     virtual int read(const char* idxfile);
00439     virtual int read(ibis::fileManager::storage* st);
00440     virtual int write(const char* dt) const; // write to the named file
00441     virtual void print(std::ostream& out) const;
00442     virtual long append(const char* dt, const char* df, uint32_t nnew);
00443 
00444     virtual long evaluate(const ibis::qContinuousRange& expr,
00445                           ibis::bitvector& hits) const;
00446     virtual long evaluate(const ibis::qDiscreteRange& expr,
00447                           ibis::bitvector& hits) const {
00448         return ibis::index::evaluate(expr, hits);
00449     }
00450 
00451     using ibis::bin::estimate;
00452     using ibis::bin::estimateCost;
00453     virtual void estimate(const ibis::qContinuousRange& expr,
00454                           ibis::bitvector& lower,
00455                           ibis::bitvector& upper) const;
00456     virtual uint32_t estimate(const ibis::qContinuousRange& expr) const;
00457     using ibis::bin::undecidable;
00458     virtual float undecidable(const ibis::qContinuousRange& expr,
00459                               ibis::bitvector& iffy) const;
00460 
00461     virtual INDEX_TYPE type() const {return RANGE;}
00462     virtual const char* name() const {return "range";}
00463     virtual uint32_t numBins() const {return (nobs>1?nobs-1:0);}
00464     // bin boundaries and counts of each bin
00465     virtual void binBoundaries(std::vector<double>&) const;
00466     virtual void binWeights(std::vector<uint32_t>&) const;
00467     // expand/contract the boundaries of a range condition
00468     virtual int  expandRange(ibis::qContinuousRange& range) const;
00469     virtual int  contractRange(ibis::qContinuousRange& range) const;
00470     virtual double getMax() const;
00471     virtual double getSum() const;
00472 
00473     int read(int fdes, size_t offset, const char *fname, const char *);
00474     long append(const ibis::range& tail);
00475     virtual void speedTest(std::ostream& out) const;
00476 
00477 protected:
00478     // protected member variables
00479     double max1, min1; // the min and max of the bin not explicitly tracked
00480 
00481     // have to have its own locate functions because a bin is not explicitly
00482     // stored
00483     virtual uint32_t locate(const double& val) const {
00484         return ibis::bin::locate(val);
00485     }
00486     virtual void locate(const ibis::qContinuousRange& expr,
00487                         uint32_t& cand0, uint32_t& cand1) const;
00488     virtual void locate(const ibis::qContinuousRange& expr,
00489                         uint32_t& cand0, uint32_t& cand1,
00490                         uint32_t& hit0, uint32_t& hit1) const;
00491     virtual double computeSum() const;
00493     void construct(const char*);
00495     void construct(const char* f, const array_t<double>& bd);
00496     virtual size_t getSerialSize() const throw();
00497 
00498 private:
00499     // private member functions
00500     int write32(int fptr) const; // write to the given stream
00501     int write64(int fptr) const; // write to the given stream
00502     void print(std::ostream& out, const uint32_t tot, const double& lbound,
00503                const double& rbound) const;
00504 
00505     friend class ibis::pale; // pale uses ibis::range
00506 }; // ibis::range
00507 
00510 class ibis::mesa : public ibis::bin {
00511 public:
00512     virtual ~mesa() {};
00513     mesa(const ibis::column* c=0, const char* f=0);
00514     mesa(const ibis::column* c, ibis::fileManager::storage* st,
00515          size_t offset = 8);
00516     explicit mesa(const ibis::bin& rhs); // convert a bin to a mesa
00517 
00518     virtual void print(std::ostream& out) const;
00519     virtual int write(const char* dt) const; // write to the named file
00520     virtual long append(const char* dt, const char* df, uint32_t nnew);
00521 
00522     virtual long evaluate(const ibis::qContinuousRange& expr,
00523                           ibis::bitvector& hits) const;
00524     virtual long evaluate(const ibis::qDiscreteRange& expr,
00525                           ibis::bitvector& hits) const {
00526         return ibis::index::evaluate(expr, hits);
00527     }
00528 
00529     using ibis::bin::estimate;
00530     using ibis::bin::estimateCost;
00531     virtual void estimate(const ibis::qContinuousRange& expr,
00532                           ibis::bitvector& lower,
00533                           ibis::bitvector& upper) const;
00534     virtual uint32_t estimate(const ibis::qContinuousRange& expr) const;
00535     using ibis::bin::undecidable;
00536     virtual float undecidable(const ibis::qContinuousRange& expr,
00537                               ibis::bitvector& iffy) const;
00538 
00539     virtual INDEX_TYPE type() const {return MESA;}
00540     virtual const char* name() const {return "interval";}
00541     virtual uint32_t numBins() const {return (nobs>2?nobs-2:0);}
00542     // bin boundaries and counts of each bin
00543     virtual void binBoundaries(std::vector<double>&) const;
00544     virtual void binWeights(std::vector<uint32_t>&) const;
00545     virtual double getSum() const;
00546 
00547     virtual void speedTest(std::ostream& out) const;
00548     long append(const ibis::mesa& tail);
00549 
00550 protected:
00551     virtual double computeSum() const;
00553     void construct(const char*);
00554     virtual size_t getSerialSize() const throw();
00555 
00556 private:
00557     // private member functions
00558 
00559     mesa(const mesa&);
00560     mesa& operator=(const mesa&);
00561 }; // ibis::mesa
00562 
00565 class ibis::ambit : public ibis::bin {
00566 public:
00567     virtual ~ambit() {clear();};
00568     ambit(const ibis::column* c=0, const char* f=0);
00569     ambit(const ibis::column* c, ibis::fileManager::storage* st,
00570           size_t offset = 8);
00571     explicit ambit(const ibis::bin& rhs); // convert from a ibis::bin
00572 
00573     virtual int read(const char* idxfile);
00574     virtual int read(ibis::fileManager::storage* st);
00575     virtual int write(const char* dt) const;
00576     virtual void print(std::ostream& out) const;
00577     virtual long append(const char* dt, const char* df, uint32_t nnew);
00578 
00579     virtual long evaluate(const ibis::qContinuousRange& expr,
00580                           ibis::bitvector& hits) const;
00581     virtual long evaluate(const ibis::qDiscreteRange& expr,
00582                           ibis::bitvector& hits) const {
00583         return ibis::index::evaluate(expr, hits);
00584     }
00585 
00586     using ibis::bin::estimate;
00587     using ibis::bin::estimateCost;
00588     virtual void estimate(const ibis::qContinuousRange& expr,
00589                           ibis::bitvector& lower,
00590                           ibis::bitvector& upper) const;
00591     using ibis::bin::undecidable;
00592     virtual float undecidable(const ibis::qContinuousRange& expr,
00593                               ibis::bitvector& iffy) const;
00594 
00595     virtual INDEX_TYPE type() const {return AMBIT;}
00596     virtual const char* name() const {return "range-range";}
00597     virtual uint32_t numBins() const {return (nobs>1?nobs-1:0);}
00598     // bin boundaries and counts of each bin
00599     virtual void binBoundaries(std::vector<double>&) const;
00600     virtual void binWeights(std::vector<uint32_t>&) const;
00601     virtual void adjustLength(uint32_t nrows);
00602     virtual double getSum() const;
00603 
00604     virtual void speedTest(std::ostream& out) const;
00605     long append(const ibis::ambit& tail);
00606 
00607 protected:
00608     virtual double computeSum() const;
00609     virtual void clear();
00611     void construct(const char* f, const array_t<double>& bd);
00612     virtual size_t getSerialSize() const throw();
00613 
00614 private:
00615     // min and max of range nobs (the one that is not explicitly recorded)
00616     double max1, min1;
00617     std::vector<ibis::ambit*> sub;
00618 
00619     // private member functions
00620     int write32(int fptr) const;
00621     int write64(int fptr) const;
00622     int read(int fdes, size_t offset, const char *fn, const char *header);
00623     void print(std::ostream& out, const uint32_t tot, const double& lbound,
00624                const double& rbound) const;
00625 
00626     ambit(const ambit&);
00627     ambit& operator=(const ambit&);
00628 }; // ibis::ambit
00629 
00632 class ibis::pale : public ibis::bin {
00633 public:
00634     virtual ~pale() {clear();};
00635     pale(const ibis::column* c, ibis::fileManager::storage* st,
00636          size_t offset = 8);
00637     explicit pale(const ibis::bin& rhs); // convert from a ibis::bin
00638 
00639     virtual int read(const char* idxfile);
00640     virtual int read(ibis::fileManager::storage* st);
00641     virtual int write(const char* dt) const;
00642     virtual void print(std::ostream& out) const;
00643     virtual long append(const char* dt, const char* df, uint32_t nnew);
00644 
00645     virtual long evaluate(const ibis::qContinuousRange& expr,
00646                           ibis::bitvector& hits) const;
00647     virtual long evaluate(const ibis::qDiscreteRange& expr,
00648                           ibis::bitvector& hits) const {
00649         return ibis::index::evaluate(expr, hits);
00650     }
00651 
00652     using ibis::bin::estimate;
00653     virtual void estimate(const ibis::qContinuousRange& expr,
00654                           ibis::bitvector& lower,
00655                           ibis::bitvector& upper) const;
00656     using ibis::bin::undecidable;
00657     virtual float undecidable(const ibis::qContinuousRange& expr,
00658                               ibis::bitvector& iffy) const;
00659 
00660     virtual INDEX_TYPE type() const {return PALE;}
00661     virtual const char* name() const {return "equality-range";}
00662     virtual uint32_t numBins() const {return (nobs>1?nobs-1:0);}
00663     // bin boundaries and counts of each bin
00664     virtual void binBoundaries(std::vector<double>&) const;
00665     virtual void binWeights(std::vector<uint32_t>&) const;
00666     virtual void adjustLength(uint32_t nrows);
00667 
00668     virtual void speedTest(std::ostream& out) const;
00669     long append(const ibis::pale& tail);
00670 
00671 protected:
00672     virtual void clear();
00673     virtual size_t getSerialSize() const throw();
00674 
00675 private:
00676     // private member variables
00677     std::vector<ibis::range*> sub;
00678 
00679     // private member functions
00680     int write32(int fptr) const;
00681     int write64(int fptr) const;
00682 
00683     pale(const pale&);
00684     pale& operator=(const pale&);
00685 }; // ibis::pale
00686 
00689 class ibis::pack : public ibis::bin {
00690 public:
00691     virtual ~pack() {clear();};
00692     pack(const ibis::column* c, ibis::fileManager::storage* st,
00693           size_t offset = 8);
00694     explicit pack(const ibis::bin& rhs); // convert from a ibis::bin
00695 
00696     virtual int read(const char* idxfile);
00697     virtual int read(ibis::fileManager::storage* st);
00698     virtual int write(const char* dt) const;
00699     virtual void print(std::ostream& out) const;
00700     virtual long append(const char* dt, const char* df, uint32_t nnew);
00701 
00702     virtual long evaluate(const ibis::qContinuousRange& expr,
00703                           ibis::bitvector& hits) const;
00704     virtual long evaluate(const ibis::qDiscreteRange& expr,
00705                           ibis::bitvector& hits) const {
00706         return ibis::index::evaluate(expr, hits);
00707     }
00708 
00709     using ibis::bin::estimate;
00710     virtual void estimate(const ibis::qContinuousRange& expr,
00711                           ibis::bitvector& lower,
00712                           ibis::bitvector& upper) const;
00713     using ibis::bin::undecidable;
00714     virtual float undecidable(const ibis::qContinuousRange& expr,
00715                               ibis::bitvector& iffy) const;
00716 
00717     virtual INDEX_TYPE type() const {return PACK;}
00718     virtual const char* name() const {return "range-equality";}
00719     virtual uint32_t numBins() const {return (nobs>1?nobs-1:0);}
00720     // bin boundaries and counts of each bin
00721     virtual void binBoundaries(std::vector<double>&) const;
00722     virtual void binWeights(std::vector<uint32_t>&) const;
00723     virtual void adjustLength(uint32_t nrows);
00724     virtual double getSum() const;
00725 
00726     virtual void speedTest(std::ostream& out) const;
00727     long append(const ibis::pack& tail);
00728 
00729 protected:
00730     virtual double computeSum() const;
00731     virtual void clear();
00732     virtual size_t getSerialSize() const throw();
00733 
00734 private:
00735     // private member variables
00736     // min and max of range nobs (the one that is not explicitly recorded)
00737     double max1, min1;
00738     std::vector<ibis::bin*> sub;
00739 
00740     // private member functions
00741     int write32(int fptr) const;
00742     int write64(int fptr) const;
00743 
00744     pack(const pack&);
00745     pack& operator=(const pack&);
00746 }; // ibis::pack
00747 
00750 class ibis::zone : public ibis::bin {
00751 public:
00752     virtual ~zone() {clear();};
00753     zone(const ibis::column* c, ibis::fileManager::storage* st,
00754          size_t offset = 8);
00755     explicit zone(const ibis::bin& rhs); // convert from a ibis::bin
00756 
00757     virtual int read(const char* idxfile);
00758     virtual int read(ibis::fileManager::storage* st);
00759     virtual int write(const char* dt) const;
00760     virtual void print(std::ostream& out) const;
00761     virtual long append(const char* dt, const char* df, uint32_t nnew);
00762 
00763     virtual long evaluate(const ibis::qContinuousRange& expr,
00764                           ibis::bitvector& hits) const;
00765     virtual long evaluate(const ibis::qDiscreteRange& expr,
00766                           ibis::bitvector& hits) const {
00767         return ibis::index::evaluate(expr, hits);
00768     }
00769 
00770     using ibis::bin::estimate;
00771     virtual void estimate(const ibis::qContinuousRange& expr,
00772                           ibis::bitvector& lower,
00773                           ibis::bitvector& upper) const;
00774     using ibis::bin::undecidable;
00775     virtual float undecidable(const ibis::qContinuousRange& expr,
00776                               ibis::bitvector& iffy) const;
00777 
00778     virtual INDEX_TYPE type() const {return ZONE;}
00779     virtual const char* name() const {return "equality-equality";}
00780     virtual uint32_t numBins() const {return (nobs>1?nobs-1:0);}
00781     // bin boundaries and counts of each bin
00782     virtual void binBoundaries(std::vector<double>&) const;
00783     virtual void binWeights(std::vector<uint32_t>&) const;
00784     virtual void adjustLength(uint32_t nrows);
00785 
00786     virtual void speedTest(std::ostream& out) const;
00787     long append(const ibis::zone& tail);
00788 
00789 protected:
00790     virtual void clear();
00791     virtual size_t getSerialSize() const throw();
00792 
00793 private:
00794     // private member variable
00795     std::vector<ibis::bin*> sub;
00796 
00797     // private member functions
00798     int write32(int fptr) const;
00799     int write64(int fptr) const;
00800 
00801     zone(const zone&);
00802     zone& operator=(const zone&);
00803 }; // ibis::zone
00804 
00810 class ibis::fuge : public ibis::bin {
00811 public:
00812     virtual ~fuge() {clear();};
00813     fuge(const ibis::column* c, ibis::fileManager::storage* st,
00814          size_t offset = 8);
00815     fuge(const ibis::column*, const char*);
00816     explicit fuge(const ibis::bin& rhs); // convert from a ibis::bin
00817 
00818     virtual int read(const char* idxfile);
00819     virtual int read(ibis::fileManager::storage* st);
00820     virtual void print(std::ostream& out) const;
00821     virtual int write(const char* dt) const;
00822     virtual long append(const char* dt, const char* df, uint32_t nnew);
00823 
00824     virtual long evaluate(const ibis::qContinuousRange& expr,
00825                           ibis::bitvector& hits) const;
00826     virtual long evaluate(const ibis::qDiscreteRange& expr,
00827                           ibis::bitvector& hits) const {
00828         return ibis::index::evaluate(expr, hits);
00829     }
00830 
00831     using ibis::bin::estimate;
00832     virtual void estimate(const ibis::qContinuousRange& expr,
00833                           ibis::bitvector& lower,
00834                           ibis::bitvector& upper) const;
00835 
00836     virtual INDEX_TYPE type() const {return FUGE;}
00837     virtual const char* name() const {return "interval-equality";}
00838     virtual void adjustLength(uint32_t nrows);
00839 
00840     long append(const ibis::fuge& tail);
00841 
00842 protected:
00843     virtual void clear() {clearCoarse(); ibis::bin::clear();}
00844     virtual size_t getSerialSize() const throw();
00845 
00846 private:
00847     // private member variable
00848     mutable std::vector<ibis::bitvector*> cbits;
00849     array_t<uint32_t> cbounds;
00850     mutable array_t<int32_t> coffset32;
00851     mutable array_t<int64_t> coffset64;
00852 
00853     void coarsen(); // given fine level, add coarse level
00854     void activateCoarse() const; // activate all coarse level bitmaps
00855     void activateCoarse(uint32_t i) const; // activate one bitmap
00856     void activateCoarse(uint32_t i, uint32_t j) const;
00857 
00858     int writeCoarse32(int fdes) const;
00859     int writeCoarse64(int fdes) const;
00860     int readCoarse(const char *fn);
00861     void clearCoarse();
00862 
00864     long coarseEstimate(uint32_t lo, uint32_t hi) const;
00866     long coarseEvaluate(uint32_t lo, uint32_t hi, ibis::bitvector& res) const;
00867 
00868     fuge(const fuge&);
00869     fuge& operator=(const fuge&);
00870 }; // ibis::fuge
00871 
00875 class ibis::egale : public ibis::bin {
00876 public:
00877     virtual ~egale() {clear();};
00878     egale(const ibis::column* c = 0, const char* f = 0,
00879           const uint32_t nbase = 2);
00880     egale(const ibis::column* c, ibis::fileManager::storage* st,
00881           size_t offset = 8);
00882     egale(const ibis::bin& rhs, const uint32_t nbase = 2);
00883 
00884     virtual int read(const char* idxfile);
00885     virtual int read(ibis::fileManager::storage* st);
00886     virtual int write(const char* dt) const;
00887     virtual void print(std::ostream& out) const;
00888     virtual long append(const char* dt, const char* df, uint32_t nnew);
00889 
00890     virtual long evaluate(const ibis::qContinuousRange& expr,
00891                           ibis::bitvector& hits) const;
00892     virtual long evaluate(const ibis::qDiscreteRange& expr,
00893                           ibis::bitvector& hits) const {
00894         return ibis::index::evaluate(expr, hits);
00895     }
00896 
00897     using ibis::bin::estimate;
00898     virtual void estimate(const ibis::qContinuousRange& expr,
00899                           ibis::bitvector& lower,
00900                           ibis::bitvector& upper) const;
00901     virtual uint32_t estimate(const ibis::qContinuousRange& expr) const;
00902     using ibis::bin::undecidable;
00903     virtual float undecidable(const ibis::qContinuousRange& expr,
00904                               ibis::bitvector& iffy) const;
00905 
00906     virtual INDEX_TYPE type() const {return EGALE;}
00907     virtual const char* name() const {return "MCBin";}
00908     // bin boundaries and counts of each bin
00909     virtual void binBoundaries(std::vector<double>& b) const;
00910     virtual void binWeights(std::vector<uint32_t>& b) const;
00911     virtual double getSum() const;
00912 
00913     virtual void speedTest(std::ostream& out) const;
00914     long append(const ibis::egale& tail);
00915     long append(const array_t<uint32_t>& ind);
00916 
00917 protected:
00918     // protected member variables
00919     uint32_t nbits;             // number of bitvectors, (size of bits)
00920     uint32_t nbases;            // size of array bases
00921     array_t<uint32_t> cnts;     // number of records in each bin
00922     array_t<uint32_t> bases;    // the size of the bases used
00923 
00924     // protected member functions
00925     egale(const ibis::column* c, const char* f, const array_t<double>& bd,
00926           const array_t<uint32_t> bs);
00927     void addBits_(uint32_t ib, uint32_t ie, ibis::bitvector& res) const;
00928     virtual double computeSum() const;
00929     virtual void clear() {
00930         cnts.clear(); bases.clear();
00931         ibis::bin::clear();
00932     }
00933 
00934     int write32(int fdes) const;
00935     int write64(int fdes) const;
00936     void construct(const char* f);
00937     virtual size_t getSerialSize() const throw();
00938 
00939 private:
00940     // private member functions
00941     void setBit(const uint32_t i, const double val);
00942     void convert();
00943 
00944     void evalEQ(ibis::bitvector& res, uint32_t b) const;
00945     void evalLE(ibis::bitvector& res, uint32_t b) const;
00946     void evalLL(ibis::bitvector& res, uint32_t b0, uint32_t b1) const;
00947 
00948     egale(const egale&);
00949     egale& operator=(const egale&);
00950 }; // ibis::egale
00951 
00955 class ibis::moins : public ibis::egale {
00956 public:
00957     virtual int write(const char* dt) const;
00958     virtual void print(std::ostream& out) const;
00959     virtual long append(const char* dt, const char* df, uint32_t nnew);
00960 
00961     virtual long evaluate(const ibis::qContinuousRange& expr,
00962                           ibis::bitvector& hits) const;
00963     virtual long evaluate(const ibis::qDiscreteRange& expr,
00964                           ibis::bitvector& hits) const {
00965         return ibis::index::evaluate(expr, hits);
00966     }
00967 
00968     using ibis::egale::estimate;
00969     virtual void estimate(const ibis::qContinuousRange& expr,
00970                           ibis::bitvector& lower,
00971                           ibis::bitvector& upper) const;
00972     virtual uint32_t estimate(const ibis::qContinuousRange& expr) const;
00973     virtual INDEX_TYPE type() const {return MOINS;}
00974     virtual const char* name() const {return "MCBRange";}
00975 
00976     virtual ~moins() {clear();};
00977     moins(const ibis::column* c = 0, const char* f = 0,
00978           const uint32_t nbase = 2);
00979     moins(const ibis::column* c, ibis::fileManager::storage* st,
00980           size_t offset = 8);
00981     moins(const ibis::bin& rhs, const uint32_t nbase = 2);
00982 
00983     virtual void speedTest(std::ostream& out) const;
00984     virtual double getSum() const;
00985 
00986     long append(const ibis::moins& tail);
00987     long append(const array_t<uint32_t>& ind);
00988 
00989 protected:
00990     virtual double computeSum() const;
00991 
00992 private:
00993     // private member functions
00994     moins(const ibis::column* c, const char* f, const array_t<double>& bd,
00995           const array_t<uint32_t> bs);
00996     void convert();
00997 
00998     void evalEQ(ibis::bitvector& res, uint32_t b) const;
00999     void evalLE(ibis::bitvector& res, uint32_t b) const;
01000     void evalLL(ibis::bitvector& res, uint32_t b0, uint32_t b1) const;
01001 
01002     moins(const moins&);
01003     moins& operator=(const moins&);
01004 }; // ibis::moins
01005 
01009 class ibis::entre : public ibis::egale {
01010 public:
01011     virtual ~entre() {clear();};
01012     entre(const ibis::column* c = 0, const char* f = 0,
01013           const uint32_t nbase = 2);
01014     entre(const ibis::column* c, ibis::fileManager::storage* st,
01015           size_t offset = 8);
01016     entre(const ibis::bin& rhs, const uint32_t nbase = 2);
01017 
01018     virtual int write(const char* dt) const;
01019     virtual void print(std::ostream& out) const;
01020     virtual long append(const char* dt, const char* df, uint32_t nnew);
01021 
01022     virtual long evaluate(const ibis::qContinuousRange& expr,
01023                           ibis::bitvector& hits) const;
01024     virtual long evaluate(const ibis::qDiscreteRange& expr,
01025                           ibis::bitvector& hits) const {
01026         return ibis::index::evaluate(expr, hits);
01027     }
01028 
01029     using ibis::egale::estimate;
01030     virtual void estimate(const ibis::qContinuousRange& expr,
01031                           ibis::bitvector& lower,
01032                           ibis::bitvector& upper) const;
01033     virtual uint32_t estimate(const ibis::qContinuousRange& expr) const;
01034     virtual INDEX_TYPE type() const {return ENTRE;}
01035     virtual const char* name() const {return "MCBInterval";}
01036 
01037     virtual void speedTest(std::ostream& out) const;
01038     virtual double getSum() const;
01039 
01040     long append(const ibis::entre& tail);
01041     long append(const array_t<uint32_t>& ind);
01042 
01043 protected:
01044     virtual double computeSum() const;
01045 
01046 private:
01047     // private member functions
01048     entre(const ibis::column* c, const char* f, const array_t<double>& bd,
01049           const array_t<uint32_t> bs);
01050     void convert();
01051 
01052     void evalEQ(ibis::bitvector& res, uint32_t b) const;
01053     void evalLE(ibis::bitvector& res, uint32_t b) const;
01054     void evalLL(ibis::bitvector& res, uint32_t b0, uint32_t b1) const;
01055 
01056     entre(const entre&);
01057     entre& operator=(const entre&);
01058 }; // ibis::entre
01059 
01066 class ibis::bak : public ibis::bin {
01067 public:
01068     virtual ~bak() {clear();};
01069     bak(const ibis::column* c=0, const char* f=0);
01070     bak(const ibis::column* c, ibis::fileManager::storage* st,
01071         size_t offset = 8) : ibis::bin(c, st, offset) {};
01072 
01073     virtual void print(std::ostream& out) const;
01074     virtual int write(const char* dt) const; // write to the named file
01075     using ibis::bin::read;
01076     virtual int read(const char* idxfile);
01077     virtual long append(const char* dt, const char* df, uint32_t nnew);
01078     virtual INDEX_TYPE type() const {return BAK;}
01079     virtual const char* name() const
01080     {return "equality code on mapped values";}
01081     // bin boundaries and counts of each bin
01082     virtual void binBoundaries(std::vector<double>&) const;
01083     virtual void binWeights(std::vector<uint32_t>&) const;
01084     // expand/contract the boundaries of a range condition
01085     virtual int  expandRange(ibis::qContinuousRange& rng) const;
01086     virtual int  contractRange(ibis::qContinuousRange& rng) const;
01087 
01088     long append(const ibis::bin& tail);
01089 
01090     // a simple structure to record the position of the values mapped to the
01091     // same value.  The bitvector marked the locations of the values and the
01092     // min and max record the actual minimum and maximum value encountered.
01093     struct grain {
01094         double min, max;
01095         ibis::bitvector* loc;
01096 
01097         // the default construct, user to explicitly allocated the bitvector
01098         grain() : min(DBL_MAX), max(-DBL_MAX), loc(0) {}
01099         ~grain() {delete loc;}
01100     };
01101 
01102     typedef std::map< double, grain > bakMap;
01103 
01104 
01105 protected:
01106 
01107     // reads all values and records positions in bmap
01108     void mapValues(const char* f, bakMap& bmap) const;
01109     void printMap(std::ostream& out, const bakMap& bmap) const;
01110 
01111     virtual uint32_t locate(const double& val) const;
01112     virtual void locate(const ibis::qContinuousRange& expr,
01113                         uint32_t& cand0, uint32_t& cand1) const {
01114         ibis::bin::locate(expr, cand0, cand1);
01115     }
01116     virtual void locate(const ibis::qContinuousRange& expr,
01117                         uint32_t& cand0, uint32_t& cand1,
01118                         uint32_t& hit0, uint32_t& hit1) const {
01119         ibis::bin::locate(expr, cand0, cand1, hit0, hit1);
01120     }
01121 
01122 private:
01123     // coverts the std::map structure into the structure defined in ibis::bin
01124     void construct(bakMap& bmap);
01125 
01126     bak(const bak&);
01127     const bak& operator&=(const bak&);
01128 }; // ibis::bak
01129 
01135 class ibis::bak2 : public ibis::bin {
01136 public:
01137     virtual ~bak2() {clear();};
01138     bak2(const ibis::column* c=0, const char* f=0);
01139     bak2(const ibis::column* c, ibis::fileManager::storage* st,
01140          size_t offset = 8) : ibis::bin(c, st, offset) {};
01141 
01142     virtual void print(std::ostream& out) const;
01143     virtual int write(const char* dt) const; // write to the named file
01144     using ibis::bin::read;
01145     virtual int read(const char* idxfile);
01146     virtual long append(const char* dt, const char* df, uint32_t nnew);
01147     virtual INDEX_TYPE type() const {return BAK;}
01148     virtual const char* name() const
01149     {return "equality code on mapped values";}
01150     // bin boundaries and counts of each bin
01151     virtual void binBoundaries(std::vector<double>&) const;
01152     virtual void binWeights(std::vector<uint32_t>&) const;
01153     // expand/contract the boundaries of a range condition
01154     virtual int  expandRange(ibis::qContinuousRange& rng) const;
01155     virtual int  contractRange(ibis::qContinuousRange& rng) const;
01156 
01157     long append(const ibis::bin& tail);
01158 
01163     struct grain {
01164         double minm, maxm, minp, maxp;
01165         ibis::bitvector* locm; 
01166         ibis::bitvector* loce; 
01167         ibis::bitvector* locp; 
01168 
01169         // the default construct, user to explicitly allocated the bitvector
01170         grain() : minm(DBL_MAX), maxm(-DBL_MAX), minp(DBL_MAX), maxp(-DBL_MAX),
01171                   locm(0), loce(0), locp(0) {}
01172         ~grain() {delete locm; delete loce; delete locp;}
01173     };
01174 
01175     typedef std::map< double, grain > bakMap;
01176 
01177 
01178 protected:
01179 
01181     void mapValues(const char* f, bakMap& bmap) const;
01182     void printMap(std::ostream& out, const bakMap& bmap) const;
01183 
01184     virtual uint32_t locate(const double& val) const;
01185     virtual void locate(const ibis::qContinuousRange& expr,
01186                         uint32_t& cand0, uint32_t& cand1) const {
01187         ibis::bin::locate(expr, cand0, cand1);
01188     }
01189     virtual void locate(const ibis::qContinuousRange& expr,
01190                         uint32_t& cand0, uint32_t& cand1,
01191                         uint32_t& hit0, uint32_t& hit1) const {
01192         ibis::bin::locate(expr, cand0, cand1, hit0, hit1);
01193     }
01194 
01195 private:
01198     void construct(bakMap& bmap);
01199 
01200     bak2(const bak2&);
01201     bak2& operator=(const bak2&);
01202 }; // ibis::bak2
01203 
01204 #endif // IBIS_IBIN_H

Make It A Bit Faster
Contact us
Disclaimers
FastBit source code
FastBit mailing list archive