00001 // $Id$ 00002 // Author: John Wu <John.Wu at ACM.org> 00003 // Lawrence Berkeley National Laboratory 00004 // Copyright 2000-2012 the Regents of the University of California 00005 #ifndef BITVECTOR_H 00006 #define BITVECTOR_H 00007 00008 00009 00010 #include "array_t.h" // alternative to std::vector 00011 00012 #if defined(_MSC_VER) && defined(_WIN32) 00013 //disable warnings on extern before template instantiation 00014 #pragma warning (disable : 4231) 00015 #endif 00016 #if defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__)) && defined(CXX_USE_DLL) && !defined(DLL_EXPORT) 00017 extern template class FASTBIT_CXX_DLLSPEC ibis::array_t<uint32_t>; 00018 #endif 00019 00020 00062 class FASTBIT_CXX_DLLSPEC ibis::bitvector { 00063 public: 00064 typedef uint32_t word_t; 00065 00067 ~bitvector() {clear();}; 00068 // constructors of bitvector class 00069 bitvector(); 00070 bitvector(const bitvector& bv); 00071 explicit bitvector(const array_t<word_t>& arr); 00072 explicit bitvector(const char* file); 00073 00074 inline const bitvector& operator=(const bitvector& bv); 00075 inline bitvector& copy(const bitvector& bv); 00076 inline bitvector& swap(bitvector& bv); 00077 00078 // use bv to replace part of the existing value, match the ith bit with 00079 // the first of bv, return reference to self 00080 //bitvector& copy(const word_t i, const bitvector& bv); 00081 void setBit(const word_t i, int val); 00082 inline void turnOnRawBit(const word_t i); 00084 void erase(word_t i, word_t j); 00085 00086 void set(int val, word_t n); 00087 void clear(); 00088 00089 bitvector& operator+=(const bitvector& bv); 00090 inline bitvector& operator+=(int b); 00091 void appendWord(word_t w); 00092 inline void appendFill(int val, word_t n); 00093 00095 int operator==(const bitvector& rhs) const; 00096 00098 void flip(); 00100 void operator&=(const bitvector& rhs); 00103 bitvector* operator&(const bitvector&) const; 00105 void operator|=(const bitvector& rhs); 00107 bitvector* operator|(const bitvector&) const; 00109 void operator^=(const bitvector& rhs); 00111 bitvector* operator^(const bitvector&) const; 00113 void operator-=(const bitvector& rhs); 00116 bitvector* operator-(const bitvector&) const; 00117 00118 void subset(const bitvector& mask, bitvector& res) const; 00119 word_t count(const bitvector& mask) const; 00120 00121 // I/O functions. 00124 void read(const char *fn); 00126 void write(const char *fn) const; 00128 void write(int fdes) const; 00132 void write(array_t<word_t>& arr) const; 00133 00134 void compress(); 00135 void decompress(); 00136 word_t compressible() const; 00139 bool isCompressed() const {return (m_vec.size()*MAXBITS < nbits);} 00140 00141 inline word_t size() const throw(); 00142 inline void sloppySize(word_t n) const; 00143 inline word_t cnt() const; 00144 inline word_t count() const {return cnt();} 00145 inline word_t sloppyCount() const; 00146 inline word_t numFillWords() const; 00148 uint32_t bytes() const throw() { 00149 return (m_vec.size()*sizeof(word_t) + sizeof(bitvector)); 00150 }; 00154 uint32_t getSerialSize() const throw() { 00155 return (m_vec.size() + 1 + (active.nbits>0)) * sizeof(word_t); 00156 }; 00158 static word_t bitsPerLiteral() {return MAXBITS;} 00159 inline static double randomSize(word_t nb, word_t nc); 00160 inline static double markovSize(word_t nb, word_t nc, double f); 00161 static double clusteringFactor(word_t nb, word_t nc, word_t sz); 00162 00163 void adjustSize(word_t nv, word_t nt); 00164 void reserve(unsigned nb, unsigned nc, double cf=0.0); 00165 00168 bool empty() const {return all0s() && active.val == 0;} 00169 00170 std::ostream& print(std::ostream &) const; 00171 00173 class iterator; 00174 inline iterator end(); 00175 inline iterator begin(); 00176 00178 class const_iterator; 00179 inline const_iterator end() const; 00180 inline const_iterator begin() const; 00181 00183 class indexSet; 00184 inline indexSet firstIndexSet() const; 00185 00186 // give accesses to some friends 00187 friend class indexSet; 00188 friend class iterator; 00189 friend class const_iterator; 00190 00191 protected: 00192 inline bool all0s() const; 00193 inline bool all1s() const; 00194 00195 private: 00196 // static members, i.e., constants to be used internally 00197 static const unsigned MAXBITS; 00198 static const unsigned SECONDBIT; 00199 static const word_t FILLBIT; 00200 static const word_t HEADER0; 00201 static const word_t HEADER1; 00202 static const word_t ALLONES; 00203 static const word_t MAXCNT; 00204 00211 struct active_word { 00212 word_t val; // the value 00213 word_t nbits; // total number of bits 00214 00215 active_word() : val(0), nbits(0) {}; 00216 void reset() {val = 0; nbits = 0;}; 00217 int is_full() const {return (nbits >= MAXBITS);}; 00218 void append(int b) { // append a single bit 00219 val <<= 1; nbits ++; val += b; 00220 }; 00221 }; // struct active_word 00222 00225 struct run { 00226 int isFill; 00227 int fillBit; 00228 word_t nWords; 00229 array_t<word_t>::const_iterator it; 00230 run() : isFill(0), fillBit(0), nWords(0), it(0) {}; 00231 void decode() { 00232 fillBit = (*it > HEADER1); 00233 if (*it > ALLONES) { 00234 nWords = (*it & MAXCNT); 00235 isFill = 1; 00236 } 00237 else { 00238 nWords = 1; 00239 isFill = 0; 00240 } 00241 }; 00244 void operator--() { 00245 if (nWords > 1) {--nWords;} 00246 else {++ it; nWords = 0;} 00247 }; 00250 void operator-=(word_t len) { 00251 while (len > 0) { 00252 if (nWords == 0) decode(); 00253 if (isFill != 0) { 00254 if (nWords > len) {nWords -= len; len = 0;} 00255 else if (nWords == len) {nWords = 0; len = 0; ++ it;} 00256 else {len -= nWords; ++ it; nWords = 0;} 00257 } 00258 else {-- len; ++ it; nWords = 0;} 00259 } 00260 }; 00261 }; 00262 friend struct run; 00263 friend struct active_word; 00264 00265 // member variables of bitvector class 00266 mutable word_t nbits; 00267 mutable word_t nset; 00268 active_word active; 00269 array_t<word_t> m_vec; 00270 00271 // private functions of bitvector class 00272 word_t count_c1(const bitvector& mask) const; 00273 word_t count_c2(const bitvector& mask) const; 00274 // The following three functions all performs or operation, _c2 and _c1 00275 // generate compressed solutions, but _c0, _d1, and _d2 generate 00276 // uncompressed solutions. 00277 // or_c2 assumes there are compressed word in both operands 00278 // or_c1 *this may contain compressed word, but not rhs 00279 // or_c0 assumes both operands are not compressed 00280 // or_d1 *this contains no compressed word and is overwritten with the 00281 // result 00282 // or_d2 both *this and rhs are compressed, but res is not compressed 00283 void or_c2(const bitvector& rhs, bitvector& res) const; 00284 void or_c1(const bitvector& rhs, bitvector& res) const; 00285 void or_c0(const bitvector& rhs); 00286 void or_d1(const bitvector& rhs); 00287 void or_d2(const bitvector& rhs, bitvector& res) const; 00288 void and_c2(const bitvector& rhs, bitvector& res) const; 00289 void and_c1(const bitvector& rhs, bitvector& res) const; 00290 void and_c0(const bitvector& rhs); 00291 void and_d1(const bitvector& rhs); 00292 void and_d2(const bitvector& rhs, bitvector& res) const; 00293 void xor_c2(const bitvector& rhs, bitvector& res) const; 00294 void xor_c1(const bitvector& rhs, bitvector& res) const; 00295 void xor_c0(const bitvector& rhs); 00296 void xor_d1(const bitvector& rhs); 00297 void xor_d2(const bitvector& rhs, bitvector& res) const; 00298 void minus_c2(const bitvector& rhs, bitvector& res) const; 00299 void minus_c1(const bitvector& rhs, bitvector& res) const; 00300 void minus_c1x(const bitvector& rhs, bitvector& res) const; 00301 void minus_c0(const bitvector& rhs); 00302 void minus_d1(const bitvector& rhs); 00303 void minus_d2(const bitvector& rhs, bitvector& res) const; 00304 inline void copy_runs(run& it, word_t& nw); // copy nw words 00305 inline void copy_runsn(run& it, word_t& nw); // copy nw words and negate 00306 inline void copy_fill(array_t<word_t>::iterator& jt, run& it); 00307 inline void copy_runs(array_t<word_t>::iterator& jt, run& it, 00308 word_t& nw); 00309 inline void copy_runsn(array_t<word_t>::iterator& jt, run& it, 00310 word_t& nw); 00311 void decompress(array_t<word_t>& tmp) const; 00312 void copy_comp(array_t<word_t>& tmp) const; 00313 inline void append_active(); 00314 inline void append_counter(int val, word_t cnt); 00315 inline word_t cnt_ones(word_t) const; // number of ones in a word 00316 inline word_t cnt_bits(word_t) const; // number of bits in a word 00317 word_t do_cnt() const throw (); 00318 }; // class bitvector 00319 00321 class ibis::bitvector::const_iterator { 00322 public: 00323 const_iterator() : compressed(0), ind(0), nbits(0), literalvalue(0), 00324 fillbit(0), active(0) {} 00325 00326 const_iterator(const const_iterator& r) 00327 : compressed(r.compressed), ind(r.ind), nbits(r.nbits), 00328 literalvalue(r.literalvalue), fillbit(r.fillbit), active(r.active), 00329 end(r.end), begin(r.begin), it(r.it) {}; 00330 const_iterator& operator=(const const_iterator& r) { 00331 compressed = r.compressed; ind = r.ind; nbits = r.nbits; 00332 literalvalue = r.literalvalue; fillbit = r.fillbit; active = r.active; 00333 end = r.end; begin = r.begin; it = r.it; 00334 return *this;} 00335 00336 inline bool operator*() const; 00337 inline int operator!=(const const_iterator& rhs) const throw (); 00338 inline int operator==(const const_iterator& rhs) const throw (); 00339 00340 inline const_iterator& operator++(); 00341 inline const_iterator& operator--(); 00342 const_iterator& operator+=(int incr); 00343 00344 private: 00345 ibis::bitvector::word_t compressed; 00346 ibis::bitvector::word_t ind; 00347 ibis::bitvector::word_t nbits; 00348 ibis::bitvector::word_t literalvalue; 00349 int fillbit; 00350 const active_word* active; 00351 array_t<word_t>::const_iterator end; 00352 array_t<word_t>::const_iterator begin; 00353 array_t<word_t>::const_iterator it; 00354 00355 void decodeWord(); 00356 00357 // give three functions of bitvector access to private variables 00358 friend void ibis::bitvector::erase(word_t i, word_t j); 00359 friend const_iterator ibis::bitvector::begin() const; 00360 friend const_iterator ibis::bitvector::end() const; 00361 friend class ibis::bitvector::iterator; 00362 }; // class ibis::bitvector::const_iterator 00363 00371 class ibis::bitvector::iterator { 00372 public: 00373 iterator() : compressed(0), ind(0), nbits(0), literalvalue(0), fillbit(0), 00374 bitv(0), active(0), vec(0) {} 00375 iterator(const iterator& r) 00376 : compressed(r.compressed), ind(r.ind), nbits(r.nbits), 00377 literalvalue(r.literalvalue), fillbit(r.fillbit), bitv(r.bitv), 00378 active(r.active), vec(r.vec), it(r.it) {}; 00379 const iterator& operator=(const iterator& r) { 00380 compressed = r.compressed; ind = r.ind; nbits = r.nbits; 00381 literalvalue = r.literalvalue; fillbit = r.fillbit; bitv = r.bitv; 00382 active = r.active; vec = r.vec; it = r.it; return *this;} 00383 00384 inline bool operator*() const; // still can not modify this 00385 inline int operator!=(const const_iterator& rhs) const throw (); 00386 inline int operator==(const const_iterator& rhs) const throw (); 00387 inline int operator!=(const iterator& rhs) const throw (); 00388 inline int operator==(const iterator& rhs) const throw (); 00389 00390 inline iterator& operator++(); 00391 inline iterator& operator--(); 00392 iterator& operator+=(int incr); 00393 const iterator& operator=(int val); 00394 00395 private: 00396 ibis::bitvector::word_t compressed; 00397 ibis::bitvector::word_t ind; 00398 ibis::bitvector::word_t nbits; 00399 ibis::bitvector::word_t literalvalue; 00400 int fillbit; 00401 ibis::bitvector* bitv; 00402 active_word* active; 00403 array_t<word_t>* vec; 00404 array_t<word_t>::iterator it; 00405 00406 void decodeWord(); 00407 00408 friend iterator ibis::bitvector::begin(); 00409 friend iterator ibis::bitvector::end(); 00410 }; // class ibis::bitvector::iterator 00411 00421 class FASTBIT_CXX_DLLSPEC ibis::bitvector::indexSet { 00422 public: 00424 indexSet() : it(0), end(0), active(0), nind(0) {} 00426 indexSet(const indexSet& rhs) 00427 : it(rhs.it), end(rhs.end), active(rhs.active), nind(rhs.nind) { 00428 ind[0] = rhs.ind[0]; 00429 ind[1] = rhs.ind[1]; 00430 ind[2] = rhs.ind[2]; 00431 ind[3] = rhs.ind[3]; 00432 ind[4] = rhs.ind[4]; 00433 ind[5] = rhs.ind[5]; 00434 ind[6] = rhs.ind[6]; 00435 ind[7] = rhs.ind[7]; 00436 ind[8] = rhs.ind[8]; 00437 ind[9] = rhs.ind[9]; 00438 ind[10] = rhs.ind[10]; 00439 ind[11] = rhs.ind[11]; 00440 ind[12] = rhs.ind[12]; 00441 ind[13] = rhs.ind[13]; 00442 ind[14] = rhs.ind[14]; 00443 ind[15] = rhs.ind[15]; 00444 ind[16] = rhs.ind[16]; 00445 ind[17] = rhs.ind[17]; 00446 ind[18] = rhs.ind[18]; 00447 ind[19] = rhs.ind[19]; 00448 ind[20] = rhs.ind[20]; 00449 ind[21] = rhs.ind[21]; 00450 ind[22] = rhs.ind[22]; 00451 ind[23] = rhs.ind[23]; 00452 ind[24] = rhs.ind[24]; 00453 ind[25] = rhs.ind[25]; 00454 ind[26] = rhs.ind[26]; 00455 ind[27] = rhs.ind[27]; 00456 ind[28] = rhs.ind[28]; 00457 ind[29] = rhs.ind[29]; 00458 ind[30] = rhs.ind[30]; 00459 ind[31] = rhs.ind[31]; 00460 } 00462 indexSet& operator=(const indexSet& rhs) { 00463 it = rhs.it; 00464 end = rhs.end; 00465 active = rhs.active; 00466 nind = rhs.nind; 00467 ind[0] = rhs.ind[0]; 00468 ind[1] = rhs.ind[1]; 00469 ind[2] = rhs.ind[2]; 00470 ind[3] = rhs.ind[3]; 00471 ind[4] = rhs.ind[4]; 00472 ind[5] = rhs.ind[5]; 00473 ind[6] = rhs.ind[6]; 00474 ind[7] = rhs.ind[7]; 00475 ind[8] = rhs.ind[8]; 00476 ind[9] = rhs.ind[9]; 00477 ind[10] = rhs.ind[10]; 00478 ind[11] = rhs.ind[11]; 00479 ind[12] = rhs.ind[12]; 00480 ind[13] = rhs.ind[13]; 00481 ind[14] = rhs.ind[14]; 00482 ind[15] = rhs.ind[15]; 00483 ind[16] = rhs.ind[16]; 00484 ind[17] = rhs.ind[17]; 00485 ind[18] = rhs.ind[18]; 00486 ind[19] = rhs.ind[19]; 00487 ind[20] = rhs.ind[20]; 00488 ind[21] = rhs.ind[21]; 00489 ind[22] = rhs.ind[22]; 00490 ind[23] = rhs.ind[23]; 00491 ind[24] = rhs.ind[24]; 00492 ind[25] = rhs.ind[25]; 00493 ind[26] = rhs.ind[26]; 00494 ind[27] = rhs.ind[27]; 00495 ind[28] = rhs.ind[28]; 00496 ind[29] = rhs.ind[29]; 00497 ind[30] = rhs.ind[30]; 00498 ind[31] = rhs.ind[31]; 00499 return *this; 00500 } 00501 00502 //int operator!=(const indexSet& rhs) const {return (it != rhs.it);}; 00504 bool isRange() const {return (nind>=ibis::bitvector::MAXBITS);} 00506 const word_t* indices() const {return ind;}; 00508 word_t nIndices() const {return nind;} 00510 const word_t& currentWord() const {return *it;} 00511 indexSet& operator++(); 00512 00513 // allow bitvector::firstIndexSet() to access private member variables 00514 friend indexSet ibis::bitvector::firstIndexSet() const; 00515 00516 private: 00517 array_t<word_t>::const_iterator it; 00518 array_t<word_t>::const_iterator end; 00519 const active_word* active; // points back to the active word 00520 word_t nind; // number of indices 00521 word_t ind[32]; 00522 }; // class ibis::bitvector::indexSet 00523 00530 inline void ibis::bitvector::sloppySize(word_t n) const { 00531 nbits = n-active.nbits; 00532 #if defined(WAH_CHECK_SIZE) 00533 word_t nb = do_cnt(); 00534 if (nb != nbits) { 00535 const_cast<ibis::bitvector*>(this)->adjustSize(0, nbits); 00536 LOGGER(ibis::gVerbose >= 0) 00537 << "bitvector::sloppySize -- adjust the number of bits to " 00538 << n; 00539 } 00540 #endif 00541 } // ibis::bitvector::sloppySize 00542 00552 inline ibis::bitvector::word_t ibis::bitvector::sloppyCount() const { 00553 if (nset > 0) { 00554 return nset; 00555 } 00556 else if (active.nbits == 0 || active.val == 0) { 00557 if (m_vec.empty() || 00558 (m_vec.size() == 1 && 00559 (m_vec[0] == 0 || 00560 (m_vec[0]>=HEADER0 && m_vec[0]<HEADER1)))) 00561 return 0; 00562 else 00563 return 2; 00564 } 00565 else { 00566 return 1; 00567 } 00568 } // ibis::bitvector::sloppyCount 00569 00572 inline bool ibis::bitvector::all0s() const { 00573 if (m_vec.empty()) { 00574 return true; 00575 } 00576 else if (m_vec.size() == 1) { 00577 return (m_vec[0] == 0 || (m_vec[0]>=HEADER0 && m_vec[0]<HEADER1)); 00578 } 00579 else { 00580 return false; 00581 } 00582 } // ibis::bitvector::all0s 00583 00586 inline bool ibis::bitvector::all1s() const { 00587 if (m_vec.size() == 1) { 00588 return (m_vec[0] == ALLONES || (m_vec[0] > HEADER1)); 00589 } 00590 else { 00591 return false; 00592 } 00593 } // ibis::bitvector::all1s 00594 00596 inline ibis::bitvector::word_t 00597 ibis::bitvector::cnt_bits(ibis::bitvector::word_t val) const { 00598 return ((val>ALLONES) ? ((val&MAXCNT)*MAXBITS) : MAXBITS); 00599 } // ibis::bitvector::cnt_bits 00600 00602 inline ibis::bitvector::word_t 00603 ibis::bitvector::cnt_ones(ibis::bitvector::word_t val) const { 00604 // number of 1 bits in a value between 0 and 255 00605 static const word_t table[256] = { 00606 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 00607 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 00608 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 00609 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 00610 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 00611 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 00612 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 00613 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 00614 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 00615 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 00616 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 00617 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 00618 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 00619 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 00620 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 00621 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; 00622 return table[val&0xFFU] + table[(val>>8)&0xFFU] + 00623 table[(val>>16)&0xFFU] + table[(val>>24)&0xFFU]; 00624 } // ibis::bitvector::cnt_ones 00625 00627 inline ibis::bitvector::word_t ibis::bitvector::size() const throw() { 00628 return ((nbits?nbits:(nbits=do_cnt()))+active.nbits); 00629 } // ibis::bitvector::size 00630 00632 inline ibis::bitvector::word_t ibis::bitvector::cnt() const { 00633 if (nset==0 && !m_vec.empty()) 00634 nbits = do_cnt(); 00635 return (nset+cnt_ones(active.val)); 00636 } // ibis::bitvector::cnt 00637 00639 inline ibis::bitvector::word_t ibis::bitvector::numFillWords() const { 00640 word_t cnt=0; 00641 array_t<ibis::bitvector::word_t>::const_iterator it = m_vec.begin(); 00642 while (it != m_vec.end()) { 00643 cnt += (*it >> ibis::bitvector::MAXBITS); 00644 it++; 00645 } 00646 return cnt; 00647 } // inline word_t ibis::bitvector::numFillWords() const { 00648 00652 inline const ibis::bitvector& 00653 ibis::bitvector::operator=(const ibis::bitvector& bv) { 00654 nbits = bv.nbits; nset = bv.nset; active = bv.active; 00655 m_vec.deepCopy(bv.m_vec); 00656 return *this; 00657 } 00658 00660 inline ibis::bitvector& ibis::bitvector::copy(const ibis::bitvector& bv) { 00661 nbits = bv.nbits; nset = bv.nset; active = bv.active; 00662 m_vec.deepCopy(bv.m_vec); 00663 return *this; 00664 } 00665 00666 inline ibis::bitvector& ibis::bitvector::swap(bitvector& bv) { 00667 word_t tmp; 00668 tmp = bv.nbits; bv.nbits = nbits; nbits = tmp; 00669 tmp = bv.nset; bv.nset = nset; nset = tmp; 00670 active_word atmp = bv.active; 00671 bv.active = active; active = atmp; 00672 m_vec.swap(bv.m_vec); 00673 return *this; 00674 } 00675 00679 inline void ibis::bitvector::append_active() { 00680 // std::cout << "before: active.val = " << std::hex << active.val; 00681 // if (m_vec.size()) 00682 // std::cout << ", m_vec.back() = " << m_vec.back(); 00683 // std::cout << std::dec << std::endl; 00684 if (m_vec.empty()) { 00685 m_vec.push_back(active.val); 00686 } 00687 else if (active.val == 0) {// incoming word is zero 00688 if (m_vec.back() == 0) { 00689 m_vec.back() = (HEADER0 + 2); 00690 } 00691 else if (m_vec.back() >= HEADER0 && m_vec.back() < HEADER1) { 00692 ++ m_vec.back(); 00693 } 00694 else { 00695 m_vec.push_back(active.val); 00696 } 00697 } 00698 else if (active.val == ALLONES) {// incoming word is allones 00699 if (m_vec.back() == ALLONES) { 00700 m_vec.back() = (HEADER1 | 2); 00701 } 00702 else if (m_vec.back() >= HEADER1) { 00703 ++m_vec.back(); 00704 } 00705 else { 00706 m_vec.push_back(active.val); 00707 } 00708 } 00709 else { // incoming word contains a mixture of bits 00710 m_vec.push_back(active.val); 00711 } 00712 nbits += MAXBITS; 00713 active.reset(); 00714 nset = 0; 00715 // std::cout << "after: m_vec.back() = " << std::hex << m_vec.back() 00716 // << std::dec << std::endl; 00717 } // ibis::bitvector::append_active 00718 00722 inline void ibis::bitvector::append_counter(int val, word_t cnt) { 00723 word_t head = 2 + val; 00724 word_t w = (head << SECONDBIT) + cnt; 00725 nbits += cnt*MAXBITS; 00726 if (m_vec.empty()) { 00727 m_vec.push_back(w); 00728 } 00729 else if ((m_vec.back()>>SECONDBIT) == head) { 00730 m_vec.back() += cnt; 00731 } 00732 else if ((m_vec.back()==ALLONES) && head==3) { 00733 m_vec.back() = w + 1; 00734 } 00735 else if ((m_vec.back() == 0) && head==2) { 00736 m_vec.back() = w + 1; 00737 } 00738 else { 00739 m_vec.push_back(w); 00740 } 00741 } // ibis::bitvector::append_counter 00742 00743 // append a single bit 00744 inline ibis::bitvector& ibis::bitvector::operator+=(int b) { 00745 active.append(b); 00746 if (active.is_full()) 00747 append_active(); 00748 return *this; 00749 } // ibis::bitvector::operator+= 00750 00755 inline void ibis::bitvector::appendFill(int val, word_t n) { 00756 if (n == 0) return; 00757 if (active.nbits > 0) { 00758 word_t tmp = (MAXBITS - active.nbits); 00759 if (tmp > n) tmp = n; 00760 active.nbits += tmp; 00761 active.val <<= tmp; 00762 n -= tmp; 00763 if (val != 0) 00764 active.val |= (1U<<tmp) - 1; 00765 if (active.nbits >= MAXBITS) 00766 append_active(); 00767 } 00768 if (n >= MAXBITS) { 00769 word_t cnt = n / MAXBITS; 00770 if (cnt > 1) 00771 append_counter(val, cnt); 00772 else if (val != 0) { 00773 active.val = ALLONES; 00774 append_active(); 00775 } 00776 else { 00777 active.val = 0; 00778 append_active(); 00779 } 00780 n -= cnt * MAXBITS; 00781 } 00782 if (n > 0) { 00783 active.nbits = n; 00784 active.val = val*((1U<<n)-1); 00785 } 00786 } // ibis::bitvector::appendFill 00787 00793 inline void ibis::bitvector::copy_runs(run& it, word_t& nw) { 00794 // deal with the first word -- attach it to the last word in m_vec 00795 if (it.isFill != 0) { 00796 append_counter(it.fillBit, it.nWords); 00797 nw -= it.nWords; 00798 } 00799 else { 00800 active.val = *(it.it); 00801 append_active(); 00802 -- nw; 00803 } 00804 ++ it.it; 00805 it.nWords = 0; 00806 nset = 0; 00807 nbits += MAXBITS * nw; 00808 00809 while (nw > 0) { // copy the words 00810 it.decode(); 00811 if (nw >= it.nWords) { 00812 m_vec.push_back(*(it.it)); 00813 nw -= it.nWords; 00814 it.nWords = 0; 00815 ++ it.it; 00816 } 00817 else { 00818 break; 00819 } 00820 } 00821 nbits -= MAXBITS * nw; 00822 } // ibis::bitvector::copy_runs 00823 00826 inline void ibis::bitvector::copy_runsn(run& it, word_t& nw) { 00827 // deal with the first word -- need to attach it to the last word in m_vec 00828 if (it.isFill != 0) { 00829 append_counter(!it.fillBit, it.nWords); 00830 nw -= it.nWords; 00831 } 00832 else { 00833 active.val = ALLONES ^ *(it.it); 00834 append_active(); 00835 -- nw; 00836 } 00837 ++ it.it; // advance to the next word 00838 it.nWords = 0; 00839 nset = 0; 00840 nbits += MAXBITS * nw; 00841 00842 while (nw > 0) { // copy the words 00843 it.decode(); 00844 if (nw >= it.nWords) { 00845 m_vec.push_back((it.isFill?FILLBIT:ALLONES) ^ *(it.it)); 00846 nw -= it.nWords; 00847 it.nWords = 0; 00848 ++ it.it; 00849 } 00850 else { 00851 break; 00852 } 00853 } 00854 nbits -= MAXBITS * nw; 00855 } // ibis::bitvector::copy_runsn 00856 00860 inline void ibis::bitvector::copy_fill 00861 (array_t<ibis::bitvector::word_t>::iterator& jt, run& it) { 00862 if (it.fillBit == 0) { 00863 while (it.nWords > 3) { 00864 *jt = 0; 00865 jt[1] = 0; 00866 jt[2] = 0; 00867 jt[3] = 0; 00868 jt += 4; 00869 it.nWords -= 4; 00870 } 00871 if (it.nWords == 1) { 00872 *jt = 0; ++jt; 00873 } 00874 else if (it.nWords == 2) { 00875 *jt = 0; jt[1] = 0; jt += 2; 00876 } 00877 else if (it.nWords == 3) { 00878 *jt = 0; jt[1] = 0; jt[2] = 0; jt += 3; 00879 } 00880 } 00881 else { 00882 while (it.nWords > 3) { 00883 *jt = ALLONES; 00884 jt[1] = ALLONES; 00885 jt[2] = ALLONES; 00886 jt[3] = ALLONES; 00887 jt += 4; 00888 it.nWords -= 4; 00889 } 00890 if (it.nWords == 1) { 00891 *jt = ALLONES; ++jt; 00892 } 00893 else if (it.nWords == 2) { 00894 *jt = ALLONES; jt[1] = ALLONES; jt += 2; 00895 } 00896 else if (it.nWords == 3) { 00897 *jt = ALLONES; jt[1] = ALLONES; jt[2] = ALLONES; jt += 3; 00898 } 00899 } 00900 it.nWords = 0; 00901 ++ it.it; 00902 } // ibis::bitvector::copy_fill 00903 00908 inline void ibis::bitvector::copy_runs 00909 (array_t<ibis::bitvector::word_t>::iterator& jt, run& it, word_t& nw) { 00910 while (nw >= it.nWords && nw > 0) { 00911 if (it.isFill != 0) { // copy a fill 00912 const array_t<word_t>::iterator iend = jt + it.nWords; 00913 if (it.fillBit == 0) { 00914 while (jt < iend) { 00915 *jt = 0; 00916 ++ jt; 00917 } 00918 } 00919 else { 00920 while (jt < iend) { 00921 *jt = ALLONES; 00922 ++ jt; 00923 } 00924 } 00925 nw -= it.nWords; 00926 } 00927 else { // copy a single word 00928 *jt = *(it.it); 00929 ++ jt; 00930 -- nw; 00931 } 00932 ++ it.it; // advance to the next word 00933 if (nw > 0) { 00934 it.decode(); 00935 } 00936 else { 00937 it.nWords = 0; 00938 return; 00939 } 00940 } 00941 } // ibis::bitvector::copy_runs 00942 00945 inline void ibis::bitvector::copy_runsn 00946 (array_t<ibis::bitvector::word_t>::iterator& jt, run& it, word_t& nw) { 00947 while (nw >= it.nWords) { 00948 if (it.isFill != 0) { // a fill 00949 const array_t<word_t>::iterator iend = jt + it.nWords; 00950 if (it.fillBit == 0) { 00951 while (jt < iend) { 00952 *jt = ALLONES; 00953 ++ jt; 00954 } 00955 } 00956 else { 00957 while (jt < iend) { 00958 *jt = 0; 00959 ++ jt; 00960 } 00961 } 00962 nw -= it.nWords; 00963 } 00964 else { // a literal word 00965 *jt = *(it.it) ^ ALLONES; 00966 ++ jt; 00967 -- nw; 00968 } 00969 ++ it.it; // advance to the next word 00970 if (nw > 0) { 00971 it.decode(); 00972 } 00973 else { 00974 it.nWords = 0; 00975 return; 00976 } 00977 } 00978 } // ibis::bitvector::copy_runsn 00979 00980 inline ibis::bitvector::iterator ibis::bitvector::begin() { 00981 iterator it; 00982 it.it = m_vec.begin(); 00983 it.vec = &m_vec; 00984 it.bitv = this; 00985 it.active = &active; 00986 it.decodeWord(); 00987 return it; 00988 } // ibis::bitvector::begin 00989 00990 inline ibis::bitvector::iterator ibis::bitvector::end() { 00991 iterator it; 00992 it.ind = 0; 00993 it.compressed = 0; 00994 it.nbits = 0; 00995 it.literalvalue = 0; 00996 it.fillbit = 0; 00997 it.it = m_vec.end() + 1; 00998 it.vec = &m_vec; 00999 it.bitv = this; 01000 it.active = &active; 01001 return it; 01002 } // ibis::bitvector::end 01003 01004 inline ibis::bitvector::const_iterator ibis::bitvector::begin() const { 01005 const_iterator it; 01006 it.it = m_vec.begin(); 01007 it.begin = m_vec.begin(); 01008 it.end = m_vec.end(); 01009 it.active = &active; 01010 it.decodeWord(); 01011 return it; 01012 } // ibis::bitvector::begin 01013 01014 // dereference -- no error checking 01015 inline bool ibis::bitvector::iterator::operator*() const { 01016 #if defined(DEBUG) && DEBUG + 0 > 1 01017 if (vec==0 || it<vec->begin() || it>vec->end()) 01018 throw "bitvector::const_iterator not initialized correctly."; 01019 #endif 01020 if (compressed != 0) 01021 return (fillbit != 0); 01022 else 01023 return (1 & (literalvalue >> (bitvector::SECONDBIT - ind))); 01024 } // ibis::bitvector::iterator::operator* 01025 01026 // comparison only based on the iterator 01027 inline int ibis::bitvector::iterator::operator!= 01028 (const ibis::bitvector::const_iterator& rhs) const throw () { 01029 return (it != rhs.it); 01030 } 01031 inline int ibis::bitvector::iterator::operator== 01032 (const ibis::bitvector::const_iterator& rhs) const throw () { 01033 return (it == rhs.it); 01034 } 01035 inline int ibis::bitvector::iterator::operator!= 01036 (const ibis::bitvector::iterator& rhs) const throw () { 01037 return (it != rhs.it); 01038 } 01039 inline int ibis::bitvector::iterator::operator== 01040 (const ibis::bitvector::iterator& rhs) const throw () { 01041 return (it == rhs.it); 01042 } 01043 01044 // increment by one 01045 inline ibis::bitvector::iterator& ibis::bitvector::iterator::operator++() { 01046 #if defined(DEBUG) && DEBUG + 0 > 1 01047 if (vec==0 || it<vec->begin() || it>vec->end()) 01048 throw "bitvector::iterator not formed correctly."; 01049 #endif 01050 if (ind+1<nbits) {++ind;} 01051 else {++ it; decodeWord();} 01052 return *this; 01053 } 01054 01055 // decrement by one 01056 inline ibis::bitvector::iterator& ibis::bitvector::iterator::operator--() { 01057 #if defined(DEBUG) && DEBUG + 0 > 1 01058 if (vec==0 || it<vec->begin() || it>vec->end()+1) 01059 throw "bitvector::iterator not formed correctly."; 01060 #endif 01061 if (ind) -- ind; 01062 else { 01063 if (it <= vec->end()) -- it; 01064 else if (active->nbits) it = vec->end(); 01065 else it = vec->end() - 1; 01066 decodeWord(); 01067 if (nbits) ind = nbits - 1; 01068 } 01069 return *this; 01070 } 01071 01072 inline ibis::bitvector::const_iterator ibis::bitvector::end() const { 01073 const_iterator it; 01074 it.ind = 0; 01075 it.compressed = 0; 01076 it.nbits = 0; 01077 it.literalvalue = 0; 01078 it.fillbit = 0; 01079 it.it = m_vec.end() + 1; 01080 it.begin = m_vec.begin(); 01081 it.end = m_vec.end(); 01082 it.active = &active; 01083 return it; 01084 } // ibis::bitvector::end() 01085 01086 // dereference -- no error checking 01087 inline bool ibis::bitvector::const_iterator::operator*() const { 01088 #if defined(DEBUG) && DEBUG + 0 > 1 01089 if (it==0 || end==0 || it>end || nbits<=ind) 01090 throw "bitvector::const_iterator not initialized correctly."; 01091 #endif 01092 if (compressed != 0) 01093 return (fillbit != 0); 01094 else 01095 return (1 & (literalvalue >> (bitvector::SECONDBIT - ind))); 01096 } 01097 01098 // comparison only based on the iterator 01099 inline int ibis::bitvector::const_iterator::operator!= 01100 (const ibis::bitvector::const_iterator& rhs) const throw (){ 01101 return (it != rhs.it); 01102 } 01103 inline int ibis::bitvector::const_iterator::operator== 01104 (const ibis::bitvector::const_iterator& rhs) const throw () { 01105 return (it == rhs.it); 01106 } 01107 01108 // increment by one 01109 inline ibis::bitvector::const_iterator& 01110 ibis::bitvector::const_iterator::operator++() { 01111 #if defined(DEBUG) && DEBUG + 0 > 1 01112 if (it==0 || end==0 || it>end) 01113 throw "bitvector::const_iterator not formed correctly."; 01114 #endif 01115 if (ind+1<nbits) {++ind;} 01116 else {++ it; decodeWord();} 01117 return *this; 01118 } 01119 01120 // decrement by one 01121 inline ibis::bitvector::const_iterator& 01122 ibis::bitvector::const_iterator::operator--() { 01123 #if defined(DEBUG) && DEBUG + 0 > 1 01124 if (it==0 || end==0 || it>end) 01125 throw "bitvector::const_iterator not formed correctly."; 01126 #endif 01127 if (ind) -- ind; 01128 else { 01129 if (it <= end) -- it; 01130 else if (active->nbits) it = end; 01131 else it = end - 1; 01132 decodeWord(); 01133 if (nbits) ind = nbits - 1; 01134 } 01135 return *this; 01136 } 01137 01138 inline ibis::bitvector::indexSet ibis::bitvector::firstIndexSet() const { 01139 indexSet is; 01140 if (m_vec.end() > m_vec.begin()) { 01141 is.it = m_vec.begin() - 1; 01142 is.end = m_vec.end(); 01143 } 01144 else { 01145 is.it = 0; 01146 is.end = 0; 01147 } 01148 is.active = &active; 01149 is.ind[0] = static_cast<word_t>(-1); 01150 is.nind = 0; 01151 ++is; 01152 return is; 01153 } // ibis::bitvector::firstIndexSet; 01154 01158 inline double ibis::bitvector::randomSize(word_t nb, word_t nc) { 01159 double sz = 0.0; 01160 if (nb > 0 && nb >= nc && nb > MAXBITS) { 01161 const double den = static_cast<double>(nc) / 01162 static_cast<double>(nb); 01163 const word_t nw = nb / MAXBITS; 01164 sz = 3.0 + nw - nw * (pow(1.0-den, static_cast<int>(2*MAXBITS)) 01165 + pow(den, static_cast<int>(2*MAXBITS))); 01166 } 01167 return sz*sizeof(word_t); 01168 } // ibis::bitvector::randomSize 01169 01174 inline double ibis::bitvector::markovSize(word_t nb, word_t nc, double f) { 01175 double sz = 0.0; 01176 if (nb > 0 && nb >= nc && nb > MAXBITS) { 01177 const double den = static_cast<double>(nc) / 01178 static_cast<double>(nb); 01179 const word_t nw = nb / MAXBITS; 01180 if ((den <= 0.5 && f > 1.00001) || (den > 0.5 && (1.0-den)*f > den)) 01181 sz = ((1.0-den) * pow(1.0-den/((1.0-den)*f), 01182 static_cast<int>(2*MAXBITS-3)) + 01183 den * pow(1.0-1.0/f, static_cast<int>(2*MAXBITS-3))); 01184 else 01185 sz = (pow(1.0-den, static_cast<int>(2*MAXBITS)) + 01186 pow(den, static_cast<int>(2*MAXBITS))); 01187 sz = 3.0 + nw * (1.0 - sz); 01188 } 01189 return sz*sizeof(word_t); 01190 } // ibis::bitvector::markovSize 01191 01193 inline void ibis::bitvector::turnOnRawBit(const word_t ind) { 01194 if (ind < nbits) { // in regular words 01195 m_vec[ind / MAXBITS] |= (1 << (SECONDBIT - (ind % MAXBITS))); 01196 nset = 0; // don't track nset 01197 } 01198 else { // assume to be in the active word 01199 active.val |= (1 << (active.nbits - (ind - nbits) - 1)); 01200 #if defined(DEBUG) 01201 if (ind >= nbits + active.nbits || 01202 active.val >= (1U << active.nbits)) { 01203 LOGGER(ibis::gVerbose >= 0) 01204 << "bitvector::turnOnRawBit(" << ind 01205 << ") found a bad active word"; 01206 } 01207 #endif 01208 } 01209 } // ibis::bitvector::turnOnRawBit 01210 01211 std::ostream& operator<<(std::ostream&, const ibis::bitvector&); 01212 #endif // __BITVECTOR_H
![]() |