00001 //File: $Id$ 00002 // Author: John Wu <John.Wu at ACM.org> 00003 // Lawrence Berkeley National Laboratory 00004 // Copyright 2000-2011 the Regents of the University of California 00005 #ifndef IBIS_IBIN_H 00006 #define IBIS_IBIN_H 00007 00008 00009 00010 00011 00012 00013 #include "index.h" 00014 #if defined(_WIN32) && defined(_MSC_VER) 00015 #pragma warning(disable:4786) // some identifier longer than 256 characters 00016 #undef min 00017 #undef max 00018 #endif 00019 00027 class ibis::bin : public ibis::index { 00028 public: 00029 00030 virtual ~bin() {clear();}; 00031 bin(const ibis::bin& rhs); 00032 bin(const ibis::column* c=0, const char* f=0); 00033 bin(const ibis::column* c, ibis::fileManager::storage* st, 00034 size_t offset = 8); 00035 bin(const ibis::column* c, const char* f, const array_t<double>& bd); 00036 bin(const ibis::column* c, const char* f, const std::vector<double>& bd); 00037 00038 virtual void print(std::ostream& out) const; 00039 virtual int write(const char* dt) const; // write to the named file 00040 virtual int read(const char* idxfile); 00041 virtual int read(ibis::fileManager::storage* st); 00042 virtual long append(const char* dt, const char* df, uint32_t nnew); 00043 00044 using ibis::index::evaluate; 00045 using ibis::index::estimate; 00046 using ibis::index::estimateCost; 00047 virtual long evaluate(const ibis::qContinuousRange& expr, 00048 ibis::bitvector& hits) const; 00049 virtual long evaluate(const ibis::qDiscreteRange& expr, 00050 ibis::bitvector& hits) const { 00051 return ibis::index::evaluate(expr, hits); 00052 } 00053 00054 virtual void estimate(const ibis::qContinuousRange& expr, 00055 ibis::bitvector& lower, 00056 ibis::bitvector& upper) const; 00057 virtual uint32_t estimate(const ibis::qContinuousRange& expr) const; 00058 using ibis::index::undecidable; 00059 virtual float undecidable(const ibis::qContinuousRange& expr, 00060 ibis::bitvector& iffy) const; 00061 00063 virtual void estimate(const ibis::deprecatedJoin& expr, 00064 ibis::bitvector64& lower, 00065 ibis::bitvector64& upper) const; 00066 virtual void estimate(const ibis::deprecatedJoin& expr, 00067 const ibis::bitvector& mask, 00068 ibis::bitvector64& lower, 00069 ibis::bitvector64& upper) const; 00070 virtual void estimate(const ibis::deprecatedJoin& expr, 00071 const ibis::bitvector& mask, 00072 const ibis::qRange* const range1, 00073 const ibis::qRange* const range2, 00074 ibis::bitvector64& lower, 00075 ibis::bitvector64& upper) const; 00076 virtual int64_t estimate(const ibis::deprecatedJoin& expr, 00077 const ibis::bitvector& mask, 00078 const ibis::qRange* const range1, 00079 const ibis::qRange* const range2) const; 00080 00082 virtual void estimate(const ibis::bin& idx2, 00083 const ibis::deprecatedJoin& expr, 00084 ibis::bitvector64& lower, 00085 ibis::bitvector64& upper) const; 00086 virtual void estimate(const ibis::bin& idx2, 00087 const ibis::deprecatedJoin& expr, 00088 const ibis::bitvector& mask, 00089 ibis::bitvector64& lower, 00090 ibis::bitvector64& upper) const; 00091 virtual void estimate(const ibis::bin& idx2, 00092 const ibis::deprecatedJoin& expr, 00093 const ibis::bitvector& mask, 00094 const ibis::qRange* const range1, 00095 const ibis::qRange* const range2, 00096 ibis::bitvector64& lower, 00097 ibis::bitvector64& upper) const; 00098 virtual int64_t estimate(const ibis::bin& idx2, 00099 const ibis::deprecatedJoin& expr) const; 00100 virtual int64_t estimate(const ibis::bin& idx2, 00101 const ibis::deprecatedJoin& expr, 00102 const ibis::bitvector& mask) const; 00103 virtual int64_t estimate(const ibis::bin& idx2, 00104 const ibis::deprecatedJoin& expr, 00105 const ibis::bitvector& mask, 00106 const ibis::qRange* const range1, 00107 const ibis::qRange* const range2) const; 00108 00109 virtual INDEX_TYPE type() const {return BINNING;} 00110 virtual const char* name() const {return "bin";} 00111 virtual uint32_t numBins() const {return (nobs>2?nobs-2:0);} 00112 // bin boundaries and counts of each bin 00113 virtual void binBoundaries(std::vector<double>&) const; 00114 virtual void binWeights(std::vector<uint32_t>&) const; 00115 // expand/contract the boundaries of a range condition 00116 virtual int expandRange(ibis::qContinuousRange& rng) const; 00117 virtual int contractRange(ibis::qContinuousRange& rng) const; 00118 virtual void speedTest(std::ostream& out) const; 00119 virtual double estimateCost(const ibis::qContinuousRange& expr) const; 00120 virtual double estimateCost(const ibis::qDiscreteRange& expr) const; 00121 00122 virtual long getCumulativeDistribution(std::vector<double>& bds, 00123 std::vector<uint32_t>& cts) const; 00124 virtual long getDistribution(std::vector<double>& bbs, 00125 std::vector<uint32_t>& cts) const; 00126 virtual double getMin() const; 00127 virtual double getMax() const; 00128 virtual double getSum() const; 00129 00131 int read(int fdes, size_t offset, const char *fname, const char *header); 00133 long append(const ibis::bin& tail); 00135 long append(const array_t<uint32_t>& ind); 00136 array_t<uint32_t>* indices(const ibis::bitvector& mask) const; 00139 long checkBin(const ibis::qRange& cmp, uint32_t jbin, 00140 ibis::bitvector& res) const; 00144 long checkBin(const ibis::qRange& cmp, uint32_t jbin, 00145 const ibis::bitvector& mask, ibis::bitvector& res) const; 00146 00163 struct granule { 00164 double minm, maxm; // min and max of values less than the target 00165 double minp, maxp; // min and max of values greater than the target 00166 ibis::bitvector* loce; 00167 ibis::bitvector* locm; 00168 ibis::bitvector* locp; 00169 00171 granule() : minm(DBL_MAX), maxm(-DBL_MAX), minp(DBL_MAX), 00172 maxp(-DBL_MAX), loce(0), locm(0), locp(0) {}; 00174 ~granule() {delete loce; delete locm; delete locp;}; 00175 private: 00176 granule(const granule&); // no copy constructor 00177 granule& operator=(const granule&); // no assignment 00178 }; 00179 // key = target value 00180 typedef std::map< double, granule* > granuleMap; 00181 00182 protected: 00183 // member variables shared by all derived classes -- the derived classes 00184 // are allowed to interpret the actual content differently. 00185 uint32_t nobs; 00186 array_t<double> bounds; 00187 array_t<double> maxval; 00188 array_t<double> minval; 00189 00191 bin(const ibis::column* c, const uint32_t nbits, 00192 ibis::fileManager::storage* st, size_t offset = 8); 00193 00195 void construct(const char*); 00197 void binning(const char* f, const std::vector<double>& bd); 00198 void binning(const char* f, const array_t<double>& bd); 00201 void binning(const char* f); 00204 template <typename E> 00205 void binningT(const char* fname); 00206 template <typename E> 00207 long checkBin0(const ibis::qRange& cmp, uint32_t jbin, 00208 ibis::bitvector& res) const; 00209 template <typename E> 00210 long checkBin1(const ibis::qRange& cmp, uint32_t jbin, 00211 const ibis::bitvector& mask, ibis::bitvector& res) const; 00213 template <typename E> 00214 long binOrderT(const char* fname) const; 00215 long binOrder(const char* fname) const; 00216 00218 void setBoundaries(const char* f); 00219 void setBoundaries(array_t<double>& bnds, 00220 const ibis::bin& bin0) const; 00221 void setBoundaries(array_t<double>& bnds, 00222 const ibis::bin& idx1, 00223 const array_t<uint32_t> cnt1, 00224 const array_t<uint32_t> cnt0) const; 00225 // functions to deal with in-memory arrays 00226 template <typename E> 00227 void construct(const array_t<E>& varr); 00228 template <typename E> 00229 void binning(const array_t<E>& varr); 00230 template <typename E> 00231 void binning(const array_t<E>& varr, const array_t<double>& bd); 00232 template <typename E> 00233 void setBoundaries(const array_t<E>& varr); 00234 template <typename E> 00235 void scanAndPartition(const array_t<E>&, unsigned); 00236 template <typename E> 00237 void mapGranules(const array_t<E>&, granuleMap& gmap) const; 00238 void printGranules(std::ostream& out, const granuleMap& gmap) const; 00239 void convertGranules(granuleMap& gmap); 00240 00242 void readBinBoundaries(const char* name, uint32_t nb); 00244 void scanAndPartition(const char*, unsigned, uint32_t nbins=0); 00246 void addBounds(double lbd, double rbd, uint32_t nbins, uint32_t eqw); 00248 uint32_t parseNbins() const; 00250 unsigned parseScale() const; 00252 unsigned parsePrec() const; 00253 00254 virtual size_t getSerialSize() const throw(); 00257 void divideBitmaps(const std::vector<ibis::bitvector*>& bms, 00258 std::vector<unsigned>& parts) const; 00259 00261 virtual double computeSum() const; 00263 virtual void adjustLength(uint32_t nrows); 00265 virtual uint32_t locate(const double& val) const; 00267 virtual void locate(const ibis::qContinuousRange& expr, 00268 uint32_t& cand0, uint32_t& cand1) const; 00270 virtual void locate(const ibis::qContinuousRange& expr, 00271 uint32_t& cand0, uint32_t& cand1, 00272 uint32_t& hit0, uint32_t& hit1) const; 00274 void swap(bin& rhs) { 00275 const ibis::column* c = col; 00276 col = rhs.col; 00277 rhs.col = c; 00278 uint32_t tmp = nobs; 00279 nobs = rhs.nobs; 00280 rhs.nobs = tmp; 00281 tmp = nrows; 00282 nrows = rhs.nrows; 00283 rhs.nrows = tmp; 00284 bounds.swap(rhs.bounds); 00285 maxval.swap(rhs.maxval); 00286 minval.swap(rhs.minval); 00287 bits.swap(rhs.bits); 00288 } // swap 00289 00290 virtual void clear(); 00291 int write32(int fptr) const; 00292 int write64(int fptr) const; 00293 00294 private: 00295 // private member functions 00296 bin& operator=(const bin&); 00297 00298 unsigned parseScale(const char*) const; 00299 00300 void print(std::ostream& out, const uint32_t tot, 00301 const double& lbound, const double& rbound) const; 00302 00304 void equiJoin(ibis::bitvector64& lower, 00305 ibis::bitvector64& iffy) const; 00306 void equiJoin(const ibis::bin& idx2, 00307 ibis::bitvector64& lower, 00308 ibis::bitvector64& iffy) const; 00309 void deprecatedJoin(const double& delta, 00310 ibis::bitvector64& lower, 00311 ibis::bitvector64& iffy) const; 00312 void deprecatedJoin(const ibis::bin& idx2, 00313 const double& delta, 00314 ibis::bitvector64& lower, 00315 ibis::bitvector64& iffy) const; 00316 void compJoin(const ibis::math::term *expr, 00317 ibis::bitvector64& lower, 00318 ibis::bitvector64& iffy) const; 00319 void compJoin(const ibis::bin& idx2, 00320 const ibis::math::term *expr, 00321 ibis::bitvector64& lower, 00322 ibis::bitvector64& iffy) const; 00323 void equiJoin(const ibis::bitvector& mask, 00324 ibis::bitvector64& lower, 00325 ibis::bitvector64& iffy) const; 00326 void equiJoin(const ibis::bin& idx2, 00327 const ibis::bitvector& mask, 00328 ibis::bitvector64& lower, 00329 ibis::bitvector64& iffy) const; 00330 void deprecatedJoin(const double& delta, 00331 const ibis::bitvector& mask, 00332 ibis::bitvector64& lower, 00333 ibis::bitvector64& iffy) const; 00334 void deprecatedJoin(const ibis::bin& idx2, 00335 const double& delta, 00336 const ibis::bitvector& mask, 00337 ibis::bitvector64& lower, 00338 ibis::bitvector64& iffy) const; 00339 void compJoin(const ibis::math::term *expr, 00340 const ibis::bitvector& mask, 00341 ibis::bitvector64& lower, 00342 ibis::bitvector64& iffy) const; 00343 void compJoin(const ibis::bin& idx2, 00344 const ibis::math::term *expr, 00345 const ibis::bitvector& mask, 00346 ibis::bitvector64& lower, 00347 ibis::bitvector64& iffy) const; 00348 00349 void equiJoin(const ibis::bitvector& mask, 00350 const ibis::qRange* const range1, 00351 const ibis::qRange* const range2, 00352 ibis::bitvector64& sure, 00353 ibis::bitvector64& iffy) const; 00354 void deprecatedJoin(const double& delta, 00355 const ibis::bitvector& mask, 00356 const ibis::qRange* const range1, 00357 const ibis::qRange* const range2, 00358 ibis::bitvector64& sure, 00359 ibis::bitvector64& iffy) const; 00360 void compJoin(const ibis::math::term *delta, 00361 const ibis::bitvector& mask, 00362 const ibis::qRange* const range1, 00363 const ibis::qRange* const range2, 00364 ibis::bitvector64& sure, 00365 ibis::bitvector64& iffy) const; 00366 00367 int64_t equiJoin(const ibis::bitvector& mask, 00368 const ibis::qRange* const range1, 00369 const ibis::qRange* const range2) const; 00370 int64_t deprecatedJoin(const double& delta, 00371 const ibis::bitvector& mask, 00372 const ibis::qRange* const range1, 00373 const ibis::qRange* const range2) const; 00374 int64_t compJoin(const ibis::math::term *delta, 00375 const ibis::bitvector& mask, 00376 const ibis::qRange* const range1, 00377 const ibis::qRange* const range2) const; 00378 00379 void equiJoin(const ibis::bin& idx2, 00380 const ibis::bitvector& mask, 00381 const ibis::qRange* const range1, 00382 const ibis::qRange* const range2, 00383 ibis::bitvector64& sure, 00384 ibis::bitvector64& iffy) const; 00385 void deprecatedJoin(const ibis::bin& idx2, 00386 const double& delta, 00387 const ibis::bitvector& mask, 00388 const ibis::qRange* const range1, 00389 const ibis::qRange* const range2, 00390 ibis::bitvector64& sure, 00391 ibis::bitvector64& iffy) const; 00392 void compJoin(const ibis::bin& idx2, 00393 const ibis::math::term *delta, 00394 const ibis::bitvector& mask, 00395 const ibis::qRange* const range1, 00396 const ibis::qRange* const range2, 00397 ibis::bitvector64& sure, 00398 ibis::bitvector64& iffy) const; 00399 00400 int64_t equiJoin(const ibis::bin& idx2, 00401 const ibis::bitvector& mask, 00402 const ibis::qRange* const range1, 00403 const ibis::qRange* const range2) const; 00404 int64_t deprecatedJoin(const ibis::bin& idx2, 00405 const double& delta, 00406 const ibis::bitvector& mask, 00407 const ibis::qRange* const range1, 00408 const ibis::qRange* const range2) const; 00409 int64_t compJoin(const ibis::bin& idx2, 00410 const ibis::math::term *delta, 00411 const ibis::bitvector& mask, 00412 const ibis::qRange* const range1, 00413 const ibis::qRange* const range2) const; 00414 00415 // need these friendships to access the protected member variables 00416 friend class ibis::mesa; 00417 friend class ibis::range; 00418 friend class ibis::ambit; 00419 friend class ibis::pack; 00420 friend class ibis::pale; 00421 friend class ibis::zone; 00422 friend class ibis::mesh; 00423 friend class ibis::band; 00424 }; // ibis::bin 00425 00429 class ibis::range : public ibis::bin { 00430 public: 00431 00432 virtual ~range() {}; 00433 range(const ibis::column* c=0, const char* f=0); 00434 range(const ibis::column* c, ibis::fileManager::storage* st, 00435 size_t offset = 8); 00436 explicit range(const ibis::bin& rhs); // convert a bin to a range 00437 00438 virtual int read(const char* idxfile); 00439 virtual int read(ibis::fileManager::storage* st); 00440 virtual int write(const char* dt) const; // write to the named file 00441 virtual void print(std::ostream& out) const; 00442 virtual long append(const char* dt, const char* df, uint32_t nnew); 00443 00444 virtual long evaluate(const ibis::qContinuousRange& expr, 00445 ibis::bitvector& hits) const; 00446 virtual long evaluate(const ibis::qDiscreteRange& expr, 00447 ibis::bitvector& hits) const { 00448 return ibis::index::evaluate(expr, hits); 00449 } 00450 00451 using ibis::bin::estimate; 00452 using ibis::bin::estimateCost; 00453 virtual void estimate(const ibis::qContinuousRange& expr, 00454 ibis::bitvector& lower, 00455 ibis::bitvector& upper) const; 00456 virtual uint32_t estimate(const ibis::qContinuousRange& expr) const; 00457 using ibis::bin::undecidable; 00458 virtual float undecidable(const ibis::qContinuousRange& expr, 00459 ibis::bitvector& iffy) const; 00460 00461 virtual INDEX_TYPE type() const {return RANGE;} 00462 virtual const char* name() const {return "range";} 00463 virtual uint32_t numBins() const {return (nobs>1?nobs-1:0);} 00464 // bin boundaries and counts of each bin 00465 virtual void binBoundaries(std::vector<double>&) const; 00466 virtual void binWeights(std::vector<uint32_t>&) const; 00467 // expand/contract the boundaries of a range condition 00468 virtual int expandRange(ibis::qContinuousRange& range) const; 00469 virtual int contractRange(ibis::qContinuousRange& range) const; 00470 virtual double getMax() const; 00471 virtual double getSum() const; 00472 00473 int read(int fdes, size_t offset, const char *fname, const char *); 00474 long append(const ibis::range& tail); 00475 virtual void speedTest(std::ostream& out) const; 00476 00477 protected: 00478 // protected member variables 00479 double max1, min1; // the min and max of the bin not explicitly tracked 00480 00481 // have to have its own locate functions because a bin is not explicitly 00482 // stored 00483 virtual uint32_t locate(const double& val) const { 00484 return ibis::bin::locate(val); 00485 } 00486 virtual void locate(const ibis::qContinuousRange& expr, 00487 uint32_t& cand0, uint32_t& cand1) const; 00488 virtual void locate(const ibis::qContinuousRange& expr, 00489 uint32_t& cand0, uint32_t& cand1, 00490 uint32_t& hit0, uint32_t& hit1) const; 00491 virtual double computeSum() const; 00493 void construct(const char*); 00495 void construct(const char* f, const array_t<double>& bd); 00496 virtual size_t getSerialSize() const throw(); 00497 00498 private: 00499 // private member functions 00500 int write32(int fptr) const; // write to the given stream 00501 int write64(int fptr) const; // write to the given stream 00502 void print(std::ostream& out, const uint32_t tot, const double& lbound, 00503 const double& rbound) const; 00504 00505 friend class ibis::pale; // pale uses ibis::range 00506 }; // ibis::range 00507 00510 class ibis::mesa : public ibis::bin { 00511 public: 00512 virtual ~mesa() {}; 00513 mesa(const ibis::column* c=0, const char* f=0); 00514 mesa(const ibis::column* c, ibis::fileManager::storage* st, 00515 size_t offset = 8); 00516 explicit mesa(const ibis::bin& rhs); // convert a bin to a mesa 00517 00518 virtual void print(std::ostream& out) const; 00519 virtual int write(const char* dt) const; // write to the named file 00520 virtual long append(const char* dt, const char* df, uint32_t nnew); 00521 00522 virtual long evaluate(const ibis::qContinuousRange& expr, 00523 ibis::bitvector& hits) const; 00524 virtual long evaluate(const ibis::qDiscreteRange& expr, 00525 ibis::bitvector& hits) const { 00526 return ibis::index::evaluate(expr, hits); 00527 } 00528 00529 using ibis::bin::estimate; 00530 using ibis::bin::estimateCost; 00531 virtual void estimate(const ibis::qContinuousRange& expr, 00532 ibis::bitvector& lower, 00533 ibis::bitvector& upper) const; 00534 virtual uint32_t estimate(const ibis::qContinuousRange& expr) const; 00535 using ibis::bin::undecidable; 00536 virtual float undecidable(const ibis::qContinuousRange& expr, 00537 ibis::bitvector& iffy) const; 00538 00539 virtual INDEX_TYPE type() const {return MESA;} 00540 virtual const char* name() const {return "interval";} 00541 virtual uint32_t numBins() const {return (nobs>2?nobs-2:0);} 00542 // bin boundaries and counts of each bin 00543 virtual void binBoundaries(std::vector<double>&) const; 00544 virtual void binWeights(std::vector<uint32_t>&) const; 00545 virtual double getSum() const; 00546 00547 virtual void speedTest(std::ostream& out) const; 00548 long append(const ibis::mesa& tail); 00549 00550 protected: 00551 virtual double computeSum() const; 00553 void construct(const char*); 00554 virtual size_t getSerialSize() const throw(); 00555 00556 private: 00557 // private member functions 00558 00559 mesa(const mesa&); 00560 mesa& operator=(const mesa&); 00561 }; // ibis::mesa 00562 00565 class ibis::ambit : public ibis::bin { 00566 public: 00567 virtual ~ambit() {clear();}; 00568 ambit(const ibis::column* c=0, const char* f=0); 00569 ambit(const ibis::column* c, ibis::fileManager::storage* st, 00570 size_t offset = 8); 00571 explicit ambit(const ibis::bin& rhs); // convert from a ibis::bin 00572 00573 virtual int read(const char* idxfile); 00574 virtual int read(ibis::fileManager::storage* st); 00575 virtual int write(const char* dt) const; 00576 virtual void print(std::ostream& out) const; 00577 virtual long append(const char* dt, const char* df, uint32_t nnew); 00578 00579 virtual long evaluate(const ibis::qContinuousRange& expr, 00580 ibis::bitvector& hits) const; 00581 virtual long evaluate(const ibis::qDiscreteRange& expr, 00582 ibis::bitvector& hits) const { 00583 return ibis::index::evaluate(expr, hits); 00584 } 00585 00586 using ibis::bin::estimate; 00587 using ibis::bin::estimateCost; 00588 virtual void estimate(const ibis::qContinuousRange& expr, 00589 ibis::bitvector& lower, 00590 ibis::bitvector& upper) const; 00591 using ibis::bin::undecidable; 00592 virtual float undecidable(const ibis::qContinuousRange& expr, 00593 ibis::bitvector& iffy) const; 00594 00595 virtual INDEX_TYPE type() const {return AMBIT;} 00596 virtual const char* name() const {return "range-range";} 00597 virtual uint32_t numBins() const {return (nobs>1?nobs-1:0);} 00598 // bin boundaries and counts of each bin 00599 virtual void binBoundaries(std::vector<double>&) const; 00600 virtual void binWeights(std::vector<uint32_t>&) const; 00601 virtual void adjustLength(uint32_t nrows); 00602 virtual double getSum() const; 00603 00604 virtual void speedTest(std::ostream& out) const; 00605 long append(const ibis::ambit& tail); 00606 00607 protected: 00608 virtual double computeSum() const; 00609 virtual void clear(); 00611 void construct(const char* f, const array_t<double>& bd); 00612 virtual size_t getSerialSize() const throw(); 00613 00614 private: 00615 // min and max of range nobs (the one that is not explicitly recorded) 00616 double max1, min1; 00617 std::vector<ibis::ambit*> sub; 00618 00619 // private member functions 00620 int write32(int fptr) const; 00621 int write64(int fptr) const; 00622 int read(int fdes, size_t offset, const char *fn, const char *header); 00623 void print(std::ostream& out, const uint32_t tot, const double& lbound, 00624 const double& rbound) const; 00625 00626 ambit(const ambit&); 00627 ambit& operator=(const ambit&); 00628 }; // ibis::ambit 00629 00632 class ibis::pale : public ibis::bin { 00633 public: 00634 virtual ~pale() {clear();}; 00635 pale(const ibis::column* c, ibis::fileManager::storage* st, 00636 size_t offset = 8); 00637 explicit pale(const ibis::bin& rhs); // convert from a ibis::bin 00638 00639 virtual int read(const char* idxfile); 00640 virtual int read(ibis::fileManager::storage* st); 00641 virtual int write(const char* dt) const; 00642 virtual void print(std::ostream& out) const; 00643 virtual long append(const char* dt, const char* df, uint32_t nnew); 00644 00645 virtual long evaluate(const ibis::qContinuousRange& expr, 00646 ibis::bitvector& hits) const; 00647 virtual long evaluate(const ibis::qDiscreteRange& expr, 00648 ibis::bitvector& hits) const { 00649 return ibis::index::evaluate(expr, hits); 00650 } 00651 00652 using ibis::bin::estimate; 00653 virtual void estimate(const ibis::qContinuousRange& expr, 00654 ibis::bitvector& lower, 00655 ibis::bitvector& upper) const; 00656 using ibis::bin::undecidable; 00657 virtual float undecidable(const ibis::qContinuousRange& expr, 00658 ibis::bitvector& iffy) const; 00659 00660 virtual INDEX_TYPE type() const {return PALE;} 00661 virtual const char* name() const {return "equality-range";} 00662 virtual uint32_t numBins() const {return (nobs>1?nobs-1:0);} 00663 // bin boundaries and counts of each bin 00664 virtual void binBoundaries(std::vector<double>&) const; 00665 virtual void binWeights(std::vector<uint32_t>&) const; 00666 virtual void adjustLength(uint32_t nrows); 00667 00668 virtual void speedTest(std::ostream& out) const; 00669 long append(const ibis::pale& tail); 00670 00671 protected: 00672 virtual void clear(); 00673 virtual size_t getSerialSize() const throw(); 00674 00675 private: 00676 // private member variables 00677 std::vector<ibis::range*> sub; 00678 00679 // private member functions 00680 int write32(int fptr) const; 00681 int write64(int fptr) const; 00682 00683 pale(const pale&); 00684 pale& operator=(const pale&); 00685 }; // ibis::pale 00686 00689 class ibis::pack : public ibis::bin { 00690 public: 00691 virtual ~pack() {clear();}; 00692 pack(const ibis::column* c, ibis::fileManager::storage* st, 00693 size_t offset = 8); 00694 explicit pack(const ibis::bin& rhs); // convert from a ibis::bin 00695 00696 virtual int read(const char* idxfile); 00697 virtual int read(ibis::fileManager::storage* st); 00698 virtual int write(const char* dt) const; 00699 virtual void print(std::ostream& out) const; 00700 virtual long append(const char* dt, const char* df, uint32_t nnew); 00701 00702 virtual long evaluate(const ibis::qContinuousRange& expr, 00703 ibis::bitvector& hits) const; 00704 virtual long evaluate(const ibis::qDiscreteRange& expr, 00705 ibis::bitvector& hits) const { 00706 return ibis::index::evaluate(expr, hits); 00707 } 00708 00709 using ibis::bin::estimate; 00710 virtual void estimate(const ibis::qContinuousRange& expr, 00711 ibis::bitvector& lower, 00712 ibis::bitvector& upper) const; 00713 using ibis::bin::undecidable; 00714 virtual float undecidable(const ibis::qContinuousRange& expr, 00715 ibis::bitvector& iffy) const; 00716 00717 virtual INDEX_TYPE type() const {return PACK;} 00718 virtual const char* name() const {return "range-equality";} 00719 virtual uint32_t numBins() const {return (nobs>1?nobs-1:0);} 00720 // bin boundaries and counts of each bin 00721 virtual void binBoundaries(std::vector<double>&) const; 00722 virtual void binWeights(std::vector<uint32_t>&) const; 00723 virtual void adjustLength(uint32_t nrows); 00724 virtual double getSum() const; 00725 00726 virtual void speedTest(std::ostream& out) const; 00727 long append(const ibis::pack& tail); 00728 00729 protected: 00730 virtual double computeSum() const; 00731 virtual void clear(); 00732 virtual size_t getSerialSize() const throw(); 00733 00734 private: 00735 // private member variables 00736 // min and max of range nobs (the one that is not explicitly recorded) 00737 double max1, min1; 00738 std::vector<ibis::bin*> sub; 00739 00740 // private member functions 00741 int write32(int fptr) const; 00742 int write64(int fptr) const; 00743 00744 pack(const pack&); 00745 pack& operator=(const pack&); 00746 }; // ibis::pack 00747 00750 class ibis::zone : public ibis::bin { 00751 public: 00752 virtual ~zone() {clear();}; 00753 zone(const ibis::column* c, ibis::fileManager::storage* st, 00754 size_t offset = 8); 00755 explicit zone(const ibis::bin& rhs); // convert from a ibis::bin 00756 00757 virtual int read(const char* idxfile); 00758 virtual int read(ibis::fileManager::storage* st); 00759 virtual int write(const char* dt) const; 00760 virtual void print(std::ostream& out) const; 00761 virtual long append(const char* dt, const char* df, uint32_t nnew); 00762 00763 virtual long evaluate(const ibis::qContinuousRange& expr, 00764 ibis::bitvector& hits) const; 00765 virtual long evaluate(const ibis::qDiscreteRange& expr, 00766 ibis::bitvector& hits) const { 00767 return ibis::index::evaluate(expr, hits); 00768 } 00769 00770 using ibis::bin::estimate; 00771 virtual void estimate(const ibis::qContinuousRange& expr, 00772 ibis::bitvector& lower, 00773 ibis::bitvector& upper) const; 00774 using ibis::bin::undecidable; 00775 virtual float undecidable(const ibis::qContinuousRange& expr, 00776 ibis::bitvector& iffy) const; 00777 00778 virtual INDEX_TYPE type() const {return ZONE;} 00779 virtual const char* name() const {return "equality-equality";} 00780 virtual uint32_t numBins() const {return (nobs>1?nobs-1:0);} 00781 // bin boundaries and counts of each bin 00782 virtual void binBoundaries(std::vector<double>&) const; 00783 virtual void binWeights(std::vector<uint32_t>&) const; 00784 virtual void adjustLength(uint32_t nrows); 00785 00786 virtual void speedTest(std::ostream& out) const; 00787 long append(const ibis::zone& tail); 00788 00789 protected: 00790 virtual void clear(); 00791 virtual size_t getSerialSize() const throw(); 00792 00793 private: 00794 // private member variable 00795 std::vector<ibis::bin*> sub; 00796 00797 // private member functions 00798 int write32(int fptr) const; 00799 int write64(int fptr) const; 00800 00801 zone(const zone&); 00802 zone& operator=(const zone&); 00803 }; // ibis::zone 00804 00810 class ibis::fuge : public ibis::bin { 00811 public: 00812 virtual ~fuge() {clear();}; 00813 fuge(const ibis::column* c, ibis::fileManager::storage* st, 00814 size_t offset = 8); 00815 fuge(const ibis::column*, const char*); 00816 explicit fuge(const ibis::bin& rhs); // convert from a ibis::bin 00817 00818 virtual int read(const char* idxfile); 00819 virtual int read(ibis::fileManager::storage* st); 00820 virtual void print(std::ostream& out) const; 00821 virtual int write(const char* dt) const; 00822 virtual long append(const char* dt, const char* df, uint32_t nnew); 00823 00824 virtual long evaluate(const ibis::qContinuousRange& expr, 00825 ibis::bitvector& hits) const; 00826 virtual long evaluate(const ibis::qDiscreteRange& expr, 00827 ibis::bitvector& hits) const { 00828 return ibis::index::evaluate(expr, hits); 00829 } 00830 00831 using ibis::bin::estimate; 00832 virtual void estimate(const ibis::qContinuousRange& expr, 00833 ibis::bitvector& lower, 00834 ibis::bitvector& upper) const; 00835 00836 virtual INDEX_TYPE type() const {return FUGE;} 00837 virtual const char* name() const {return "interval-equality";} 00838 virtual void adjustLength(uint32_t nrows); 00839 00840 long append(const ibis::fuge& tail); 00841 00842 protected: 00843 virtual void clear() {clearCoarse(); ibis::bin::clear();} 00844 virtual size_t getSerialSize() const throw(); 00845 00846 private: 00847 // private member variable 00848 mutable std::vector<ibis::bitvector*> cbits; 00849 array_t<uint32_t> cbounds; 00850 mutable array_t<int32_t> coffset32; 00851 mutable array_t<int64_t> coffset64; 00852 00853 void coarsen(); // given fine level, add coarse level 00854 void activateCoarse() const; // activate all coarse level bitmaps 00855 void activateCoarse(uint32_t i) const; // activate one bitmap 00856 void activateCoarse(uint32_t i, uint32_t j) const; 00857 00858 int writeCoarse32(int fdes) const; 00859 int writeCoarse64(int fdes) const; 00860 int readCoarse(const char *fn); 00861 void clearCoarse(); 00862 00864 long coarseEstimate(uint32_t lo, uint32_t hi) const; 00866 long coarseEvaluate(uint32_t lo, uint32_t hi, ibis::bitvector& res) const; 00867 00868 fuge(const fuge&); 00869 fuge& operator=(const fuge&); 00870 }; // ibis::fuge 00871 00875 class ibis::egale : public ibis::bin { 00876 public: 00877 virtual ~egale() {clear();}; 00878 egale(const ibis::column* c = 0, const char* f = 0, 00879 const uint32_t nbase = 2); 00880 egale(const ibis::column* c, ibis::fileManager::storage* st, 00881 size_t offset = 8); 00882 egale(const ibis::bin& rhs, const uint32_t nbase = 2); 00883 00884 virtual int read(const char* idxfile); 00885 virtual int read(ibis::fileManager::storage* st); 00886 virtual int write(const char* dt) const; 00887 virtual void print(std::ostream& out) const; 00888 virtual long append(const char* dt, const char* df, uint32_t nnew); 00889 00890 virtual long evaluate(const ibis::qContinuousRange& expr, 00891 ibis::bitvector& hits) const; 00892 virtual long evaluate(const ibis::qDiscreteRange& expr, 00893 ibis::bitvector& hits) const { 00894 return ibis::index::evaluate(expr, hits); 00895 } 00896 00897 using ibis::bin::estimate; 00898 virtual void estimate(const ibis::qContinuousRange& expr, 00899 ibis::bitvector& lower, 00900 ibis::bitvector& upper) const; 00901 virtual uint32_t estimate(const ibis::qContinuousRange& expr) const; 00902 using ibis::bin::undecidable; 00903 virtual float undecidable(const ibis::qContinuousRange& expr, 00904 ibis::bitvector& iffy) const; 00905 00906 virtual INDEX_TYPE type() const {return EGALE;} 00907 virtual const char* name() const {return "MCBin";} 00908 // bin boundaries and counts of each bin 00909 virtual void binBoundaries(std::vector<double>& b) const; 00910 virtual void binWeights(std::vector<uint32_t>& b) const; 00911 virtual double getSum() const; 00912 00913 virtual void speedTest(std::ostream& out) const; 00914 long append(const ibis::egale& tail); 00915 long append(const array_t<uint32_t>& ind); 00916 00917 protected: 00918 // protected member variables 00919 uint32_t nbits; // number of bitvectors, (size of bits) 00920 uint32_t nbases; // size of array bases 00921 array_t<uint32_t> cnts; // number of records in each bin 00922 array_t<uint32_t> bases; // the size of the bases used 00923 00924 // protected member functions 00925 egale(const ibis::column* c, const char* f, const array_t<double>& bd, 00926 const array_t<uint32_t> bs); 00927 void addBits_(uint32_t ib, uint32_t ie, ibis::bitvector& res) const; 00928 virtual double computeSum() const; 00929 virtual void clear() { 00930 cnts.clear(); bases.clear(); 00931 ibis::bin::clear(); 00932 } 00933 00934 int write32(int fdes) const; 00935 int write64(int fdes) const; 00936 void construct(const char* f); 00937 virtual size_t getSerialSize() const throw(); 00938 00939 private: 00940 // private member functions 00941 void setBit(const uint32_t i, const double val); 00942 void convert(); 00943 00944 void evalEQ(ibis::bitvector& res, uint32_t b) const; 00945 void evalLE(ibis::bitvector& res, uint32_t b) const; 00946 void evalLL(ibis::bitvector& res, uint32_t b0, uint32_t b1) const; 00947 00948 egale(const egale&); 00949 egale& operator=(const egale&); 00950 }; // ibis::egale 00951 00955 class ibis::moins : public ibis::egale { 00956 public: 00957 virtual int write(const char* dt) const; 00958 virtual void print(std::ostream& out) const; 00959 virtual long append(const char* dt, const char* df, uint32_t nnew); 00960 00961 virtual long evaluate(const ibis::qContinuousRange& expr, 00962 ibis::bitvector& hits) const; 00963 virtual long evaluate(const ibis::qDiscreteRange& expr, 00964 ibis::bitvector& hits) const { 00965 return ibis::index::evaluate(expr, hits); 00966 } 00967 00968 using ibis::egale::estimate; 00969 virtual void estimate(const ibis::qContinuousRange& expr, 00970 ibis::bitvector& lower, 00971 ibis::bitvector& upper) const; 00972 virtual uint32_t estimate(const ibis::qContinuousRange& expr) const; 00973 virtual INDEX_TYPE type() const {return MOINS;} 00974 virtual const char* name() const {return "MCBRange";} 00975 00976 virtual ~moins() {clear();}; 00977 moins(const ibis::column* c = 0, const char* f = 0, 00978 const uint32_t nbase = 2); 00979 moins(const ibis::column* c, ibis::fileManager::storage* st, 00980 size_t offset = 8); 00981 moins(const ibis::bin& rhs, const uint32_t nbase = 2); 00982 00983 virtual void speedTest(std::ostream& out) const; 00984 virtual double getSum() const; 00985 00986 long append(const ibis::moins& tail); 00987 long append(const array_t<uint32_t>& ind); 00988 00989 protected: 00990 virtual double computeSum() const; 00991 00992 private: 00993 // private member functions 00994 moins(const ibis::column* c, const char* f, const array_t<double>& bd, 00995 const array_t<uint32_t> bs); 00996 void convert(); 00997 00998 void evalEQ(ibis::bitvector& res, uint32_t b) const; 00999 void evalLE(ibis::bitvector& res, uint32_t b) const; 01000 void evalLL(ibis::bitvector& res, uint32_t b0, uint32_t b1) const; 01001 01002 moins(const moins&); 01003 moins& operator=(const moins&); 01004 }; // ibis::moins 01005 01009 class ibis::entre : public ibis::egale { 01010 public: 01011 virtual ~entre() {clear();}; 01012 entre(const ibis::column* c = 0, const char* f = 0, 01013 const uint32_t nbase = 2); 01014 entre(const ibis::column* c, ibis::fileManager::storage* st, 01015 size_t offset = 8); 01016 entre(const ibis::bin& rhs, const uint32_t nbase = 2); 01017 01018 virtual int write(const char* dt) const; 01019 virtual void print(std::ostream& out) const; 01020 virtual long append(const char* dt, const char* df, uint32_t nnew); 01021 01022 virtual long evaluate(const ibis::qContinuousRange& expr, 01023 ibis::bitvector& hits) const; 01024 virtual long evaluate(const ibis::qDiscreteRange& expr, 01025 ibis::bitvector& hits) const { 01026 return ibis::index::evaluate(expr, hits); 01027 } 01028 01029 using ibis::egale::estimate; 01030 virtual void estimate(const ibis::qContinuousRange& expr, 01031 ibis::bitvector& lower, 01032 ibis::bitvector& upper) const; 01033 virtual uint32_t estimate(const ibis::qContinuousRange& expr) const; 01034 virtual INDEX_TYPE type() const {return ENTRE;} 01035 virtual const char* name() const {return "MCBInterval";} 01036 01037 virtual void speedTest(std::ostream& out) const; 01038 virtual double getSum() const; 01039 01040 long append(const ibis::entre& tail); 01041 long append(const array_t<uint32_t>& ind); 01042 01043 protected: 01044 virtual double computeSum() const; 01045 01046 private: 01047 // private member functions 01048 entre(const ibis::column* c, const char* f, const array_t<double>& bd, 01049 const array_t<uint32_t> bs); 01050 void convert(); 01051 01052 void evalEQ(ibis::bitvector& res, uint32_t b) const; 01053 void evalLE(ibis::bitvector& res, uint32_t b) const; 01054 void evalLL(ibis::bitvector& res, uint32_t b0, uint32_t b1) const; 01055 01056 entre(const entre&); 01057 entre& operator=(const entre&); 01058 }; // ibis::entre 01059 01066 class ibis::bak : public ibis::bin { 01067 public: 01068 virtual ~bak() {clear();}; 01069 bak(const ibis::column* c=0, const char* f=0); 01070 bak(const ibis::column* c, ibis::fileManager::storage* st, 01071 size_t offset = 8) : ibis::bin(c, st, offset) {}; 01072 01073 virtual void print(std::ostream& out) const; 01074 virtual int write(const char* dt) const; // write to the named file 01075 using ibis::bin::read; 01076 virtual int read(const char* idxfile); 01077 virtual long append(const char* dt, const char* df, uint32_t nnew); 01078 virtual INDEX_TYPE type() const {return BAK;} 01079 virtual const char* name() const 01080 {return "equality code on mapped values";} 01081 // bin boundaries and counts of each bin 01082 virtual void binBoundaries(std::vector<double>&) const; 01083 virtual void binWeights(std::vector<uint32_t>&) const; 01084 // expand/contract the boundaries of a range condition 01085 virtual int expandRange(ibis::qContinuousRange& rng) const; 01086 virtual int contractRange(ibis::qContinuousRange& rng) const; 01087 01088 long append(const ibis::bin& tail); 01089 01090 // a simple structure to record the position of the values mapped to the 01091 // same value. The bitvector marked the locations of the values and the 01092 // min and max record the actual minimum and maximum value encountered. 01093 struct grain { 01094 double min, max; 01095 ibis::bitvector* loc; 01096 01097 // the default construct, user to explicitly allocated the bitvector 01098 grain() : min(DBL_MAX), max(-DBL_MAX), loc(0) {} 01099 ~grain() {delete loc;} 01100 }; 01101 01102 typedef std::map< double, grain > bakMap; 01103 01104 01105 protected: 01106 01107 // reads all values and records positions in bmap 01108 void mapValues(const char* f, bakMap& bmap) const; 01109 void printMap(std::ostream& out, const bakMap& bmap) const; 01110 01111 virtual uint32_t locate(const double& val) const; 01112 virtual void locate(const ibis::qContinuousRange& expr, 01113 uint32_t& cand0, uint32_t& cand1) const { 01114 ibis::bin::locate(expr, cand0, cand1); 01115 } 01116 virtual void locate(const ibis::qContinuousRange& expr, 01117 uint32_t& cand0, uint32_t& cand1, 01118 uint32_t& hit0, uint32_t& hit1) const { 01119 ibis::bin::locate(expr, cand0, cand1, hit0, hit1); 01120 } 01121 01122 private: 01123 // coverts the std::map structure into the structure defined in ibis::bin 01124 void construct(bakMap& bmap); 01125 01126 bak(const bak&); 01127 const bak& operator&=(const bak&); 01128 }; // ibis::bak 01129 01135 class ibis::bak2 : public ibis::bin { 01136 public: 01137 virtual ~bak2() {clear();}; 01138 bak2(const ibis::column* c=0, const char* f=0); 01139 bak2(const ibis::column* c, ibis::fileManager::storage* st, 01140 size_t offset = 8) : ibis::bin(c, st, offset) {}; 01141 01142 virtual void print(std::ostream& out) const; 01143 virtual int write(const char* dt) const; // write to the named file 01144 using ibis::bin::read; 01145 virtual int read(const char* idxfile); 01146 virtual long append(const char* dt, const char* df, uint32_t nnew); 01147 virtual INDEX_TYPE type() const {return BAK;} 01148 virtual const char* name() const 01149 {return "equality code on mapped values";} 01150 // bin boundaries and counts of each bin 01151 virtual void binBoundaries(std::vector<double>&) const; 01152 virtual void binWeights(std::vector<uint32_t>&) const; 01153 // expand/contract the boundaries of a range condition 01154 virtual int expandRange(ibis::qContinuousRange& rng) const; 01155 virtual int contractRange(ibis::qContinuousRange& rng) const; 01156 01157 long append(const ibis::bin& tail); 01158 01163 struct grain { 01164 double minm, maxm, minp, maxp; 01165 ibis::bitvector* locm; 01166 ibis::bitvector* loce; 01167 ibis::bitvector* locp; 01168 01169 // the default construct, user to explicitly allocated the bitvector 01170 grain() : minm(DBL_MAX), maxm(-DBL_MAX), minp(DBL_MAX), maxp(-DBL_MAX), 01171 locm(0), loce(0), locp(0) {} 01172 ~grain() {delete locm; delete loce; delete locp;} 01173 }; 01174 01175 typedef std::map< double, grain > bakMap; 01176 01177 01178 protected: 01179 01181 void mapValues(const char* f, bakMap& bmap) const; 01182 void printMap(std::ostream& out, const bakMap& bmap) const; 01183 01184 virtual uint32_t locate(const double& val) const; 01185 virtual void locate(const ibis::qContinuousRange& expr, 01186 uint32_t& cand0, uint32_t& cand1) const { 01187 ibis::bin::locate(expr, cand0, cand1); 01188 } 01189 virtual void locate(const ibis::qContinuousRange& expr, 01190 uint32_t& cand0, uint32_t& cand1, 01191 uint32_t& hit0, uint32_t& hit1) const { 01192 ibis::bin::locate(expr, cand0, cand1, hit0, hit1); 01193 } 01194 01195 private: 01198 void construct(bakMap& bmap); 01199 01200 bak2(const bak2&); 01201 bak2& operator=(const bak2&); 01202 }; // ibis::bak2 01203 01204 #endif // IBIS_IBIN_H
![]() |