00001 // $Id$ 00002 // Author: John Wu <John.Wu at ACM.org> 00003 // Lawrence Berkeley National Laboratory 00004 // Copyright 2000-2011 the Regents of the University of California 00005 #ifndef BITVECTOR_H 00006 #define BITVECTOR_H 00007 00008 00009 00010 #include "array_t.h" // alternative to std::vector 00011 00012 #include <iomanip> // setw 00013 00014 #if defined(_MSC_VER) && defined(_WIN32) 00015 //disable warnings on extern before template instantiation 00016 #pragma warning (disable : 4231) 00017 #endif 00018 #if defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__)) && defined(CXX_USE_DLL) && !defined(DLL_EXPORT) 00019 extern template class FASTBIT_CXX_DLLSPEC ibis::array_t<uint32_t>; 00020 #endif 00021 00022 00064 class FASTBIT_CXX_DLLSPEC ibis::bitvector { 00065 public: 00066 typedef uint32_t word_t; 00067 00069 ~bitvector() {clear();}; 00070 // constructors of bitvector class 00071 bitvector(); 00072 bitvector(const bitvector& bv); 00073 explicit bitvector(const array_t<word_t>& arr); 00074 explicit bitvector(const char* file); 00075 00076 inline const bitvector& operator=(const bitvector& bv); 00077 inline bitvector& copy(const bitvector& bv); 00078 inline bitvector& swap(bitvector& bv); 00079 00080 // use bv to replace part of the existing value, match the ith bit with 00081 // the first of bv, return reference to self 00082 //bitvector& copy(const word_t i, const bitvector& bv); 00083 void setBit(const word_t i, int val); 00084 inline void turnOnRawBit(const word_t i); 00086 void erase(word_t i, word_t j); 00087 00088 void set(int val, word_t n); 00089 void clear(); 00090 00091 bitvector& operator+=(const bitvector& bv); 00092 inline bitvector& operator+=(int b); 00093 void appendWord(word_t w); 00094 inline void appendFill(int val, word_t n); 00095 00097 int operator==(const bitvector& rhs) const; 00098 00100 void flip(); 00102 void operator&=(const bitvector& rhs); 00105 bitvector* operator&(const bitvector&) const; 00107 void operator|=(const bitvector& rhs); 00109 bitvector* operator|(const bitvector&) const; 00111 void operator^=(const bitvector& rhs); 00113 bitvector* operator^(const bitvector&) const; 00115 void operator-=(const bitvector& rhs); 00118 bitvector* operator-(const bitvector&) const; 00119 00120 void subset(const bitvector& mask, bitvector& res) const; 00121 word_t count(const bitvector& mask) const; 00122 00123 // I/O functions. 00126 void read(const char *fn); 00128 void write(const char *fn) const; 00130 void write(int fdes) const; 00134 void write(array_t<word_t>& arr) const; 00135 00136 void compress(); 00137 void decompress(); 00138 word_t compressible() const; 00139 00140 inline word_t size() const throw(); 00141 inline void sloppySize(word_t n) const; 00142 inline word_t cnt() const; 00143 inline word_t numFillWords() const; 00145 uint32_t bytes() const throw() { 00146 return (m_vec.size()*sizeof(word_t) + sizeof(bitvector)); 00147 }; 00151 uint32_t getSerialSize() const throw() { 00152 return (m_vec.size() + 1 + (active.nbits>0)) * sizeof(word_t); 00153 }; 00155 static word_t bitsPerLiteral() {return MAXBITS;} 00156 inline static double randomSize(word_t nb, word_t nc); 00157 inline static double markovSize(word_t nb, word_t nc, double f); 00158 static double clusteringFactor(word_t nb, word_t nc, word_t sz); 00159 00160 void adjustSize(word_t nv, word_t nt); 00161 void reserve(unsigned nb, unsigned nc, double cf=0.0); 00162 00165 bool empty() const {return all0s() && active.val == 0;} 00166 00167 std::ostream& print(std::ostream &) const; 00168 00170 class iterator; 00171 inline iterator end(); 00172 inline iterator begin(); 00173 00175 class const_iterator; 00176 inline const_iterator end() const; 00177 inline const_iterator begin() const; 00178 00180 class indexSet; 00181 inline indexSet firstIndexSet() const; 00182 00183 // give accesses to some friends 00184 friend class indexSet; 00185 friend class iterator; 00186 friend class const_iterator; 00187 00188 protected: 00189 bool isCompressed() const {return (m_vec.size()*MAXBITS < nbits);} 00190 inline bool all0s() const; 00191 inline bool all1s() const; 00192 00193 private: 00194 // static members, i.e., constants to be used internally 00195 static const unsigned MAXBITS; 00196 static const unsigned SECONDBIT; 00197 static const word_t FILLBIT; 00198 static const word_t HEADER0; 00199 static const word_t HEADER1; 00200 static const word_t ALLONES; 00201 static const word_t MAXCNT; 00202 00209 struct active_word { 00210 word_t val; // the value 00211 word_t nbits; // total number of bits 00212 00213 active_word() : val(0), nbits(0) {}; 00214 void reset() {val = 0; nbits = 0;}; 00215 int is_full() const {return (nbits >= MAXBITS);}; 00216 void append(int b) { // append a single bit 00217 val <<= 1; nbits ++; val += b; 00218 }; 00219 }; // struct active_word 00220 00223 struct run { 00224 int isFill; 00225 int fillBit; 00226 word_t nWords; 00227 array_t<word_t>::const_iterator it; 00228 run() : isFill(0), fillBit(0), nWords(0), it(0) {}; 00229 void decode() { 00230 fillBit = (*it > HEADER1); 00231 if (*it > ALLONES) { 00232 nWords = (*it & MAXCNT); 00233 isFill = 1; 00234 } 00235 else { 00236 nWords = 1; 00237 isFill = 0; 00238 } 00239 }; 00242 void operator--() { 00243 if (nWords > 1) {--nWords;} 00244 else {++ it; nWords = 0;} 00245 }; 00248 void operator-=(word_t len) { 00249 while (len > 0) { 00250 if (nWords == 0) decode(); 00251 if (isFill != 0) { 00252 if (nWords > len) {nWords -= len; len = 0;} 00253 else if (nWords == len) {nWords = 0; len = 0; ++ it;} 00254 else {len -= nWords; ++ it; nWords = 0;} 00255 } 00256 else {-- len; ++ it; nWords = 0;} 00257 } 00258 }; 00259 }; 00260 friend struct run; 00261 friend struct active_word; 00262 00263 // member variables of bitvector class 00264 mutable word_t nbits; 00265 mutable word_t nset; 00266 active_word active; 00267 array_t<word_t> m_vec; 00268 00269 // private functions of bitvector class 00270 word_t count_c1(const bitvector& mask) const; 00271 word_t count_c2(const bitvector& mask) const; 00272 // The following three functions all performs or operation, _c2 and _c1 00273 // generate compressed solutions, but _c0, _d1, and _d2 generate 00274 // uncompressed solutions. 00275 // or_c2 assumes there are compressed word in both operands 00276 // or_c1 *this may contain compressed word, but not rhs 00277 // or_c0 assumes both operands are not compressed 00278 // or_d1 *this contains no compressed word and is overwritten with the 00279 // result 00280 // or_d2 both *this and rhs are compressed, but res is not compressed 00281 void or_c2(const bitvector& rhs, bitvector& res) const; 00282 void or_c1(const bitvector& rhs, bitvector& res) const; 00283 void or_c0(const bitvector& rhs); 00284 void or_d1(const bitvector& rhs); 00285 void or_d2(const bitvector& rhs, bitvector& res) const; 00286 void and_c2(const bitvector& rhs, bitvector& res) const; 00287 void and_c1(const bitvector& rhs, bitvector& res) const; 00288 void and_c0(const bitvector& rhs); 00289 void and_d1(const bitvector& rhs); 00290 void and_d2(const bitvector& rhs, bitvector& res) const; 00291 void xor_c2(const bitvector& rhs, bitvector& res) const; 00292 void xor_c1(const bitvector& rhs, bitvector& res) const; 00293 void xor_c0(const bitvector& rhs); 00294 void xor_d1(const bitvector& rhs); 00295 void xor_d2(const bitvector& rhs, bitvector& res) const; 00296 void minus_c2(const bitvector& rhs, bitvector& res) const; 00297 void minus_c1(const bitvector& rhs, bitvector& res) const; 00298 void minus_c1x(const bitvector& rhs, bitvector& res) const; 00299 void minus_c0(const bitvector& rhs); 00300 void minus_d1(const bitvector& rhs); 00301 void minus_d2(const bitvector& rhs, bitvector& res) const; 00302 inline void copy_runs(run& it, word_t& nw); // copy nw words 00303 inline void copy_runsn(run& it, word_t& nw); // copy nw words and negate 00304 inline void copy_fill(array_t<word_t>::iterator& jt, run& it); 00305 inline void copy_runs(array_t<word_t>::iterator& jt, run& it, 00306 word_t& nw); 00307 inline void copy_runsn(array_t<word_t>::iterator& jt, run& it, 00308 word_t& nw); 00309 void decompress(array_t<word_t>& tmp) const; 00310 void copy_comp(array_t<word_t>& tmp) const; 00311 inline void append_active(); 00312 inline void append_counter(int val, word_t cnt); 00313 inline word_t cnt_ones(word_t) const; // number of ones in a word 00314 inline word_t cnt_bits(word_t) const; // number of bits in a word 00317 word_t do_cnt() const throw (); 00318 }; // end class bitvector 00319 00329 class FASTBIT_CXX_DLLSPEC ibis::bitvector::indexSet { 00330 public: 00331 // let the compiler define most of the canonical functions 00332 00333 // allow bitvector::firstIndexSet() to access private member variables 00334 friend indexSet ibis::bitvector::firstIndexSet() const; 00335 00336 //int operator!=(const indexSet& rhs) const {return (it != rhs.it);}; 00337 bool isRange() const {return (nind>=ibis::bitvector::MAXBITS);} 00338 const word_t* indices() const {return ind;}; 00339 word_t nIndices() const {return nind;} 00340 const word_t& currentWord() const {return *it;} 00341 indexSet& operator++(); 00342 00343 private: 00344 array_t<word_t>::const_iterator it; 00345 array_t<word_t>::const_iterator end; 00346 const active_word* active; // points back to the active word 00347 word_t nind; // number of indices 00348 word_t ind[32]; 00349 }; // class ibis::bitvector::indexSet 00350 00352 class ibis::bitvector::const_iterator { 00353 public: 00354 const_iterator() : compressed(0), ind(0), nbits(0), literalvalue(0), 00355 fillbit(0), active(0) {} 00356 00357 const_iterator(const const_iterator& r) 00358 : compressed(r.compressed), ind(r.ind), nbits(r.nbits), 00359 literalvalue(r.literalvalue), fillbit(r.fillbit), active(r.active), 00360 end(r.end), begin(r.begin), it(r.it) {}; 00361 const_iterator& operator=(const const_iterator& r) { 00362 compressed = r.compressed; ind = r.ind; nbits = r.nbits; 00363 literalvalue = r.literalvalue; fillbit = r.fillbit; active = r.active; 00364 end = r.end; begin = r.begin; it = r.it; 00365 return *this;} 00366 00367 inline bool operator*() const; 00368 inline int operator!=(const const_iterator& rhs) const throw (); 00369 inline int operator==(const const_iterator& rhs) const throw (); 00370 00371 inline const_iterator& operator++(); 00372 inline const_iterator& operator--(); 00373 const_iterator& operator+=(int incr); 00374 00375 private: 00376 ibis::bitvector::word_t compressed; 00377 ibis::bitvector::word_t ind; 00378 ibis::bitvector::word_t nbits; 00379 ibis::bitvector::word_t literalvalue; 00380 int fillbit; 00381 const active_word* active; 00382 array_t<word_t>::const_iterator end; 00383 array_t<word_t>::const_iterator begin; 00384 array_t<word_t>::const_iterator it; 00385 00386 void decodeWord(); 00387 00388 // give three functions of bitvector access to private variables 00389 friend void ibis::bitvector::erase(word_t i, word_t j); 00390 friend const_iterator ibis::bitvector::begin() const; 00391 friend const_iterator ibis::bitvector::end() const; 00392 friend class ibis::bitvector::iterator; 00393 }; // end class ibis::bitvector::const_iterator 00394 00402 class ibis::bitvector::iterator { 00403 public: 00404 iterator() : compressed(0), ind(0), nbits(0), literalvalue(0), fillbit(0), 00405 bitv(0), active(0), vec(0) {} 00406 iterator(const iterator& r) 00407 : compressed(r.compressed), ind(r.ind), nbits(r.nbits), 00408 literalvalue(r.literalvalue), fillbit(r.fillbit), bitv(r.bitv), 00409 active(r.active), vec(r.vec), it(r.it) {}; 00410 const iterator& operator=(const iterator& r) { 00411 compressed = r.compressed; ind = r.ind; nbits = r.nbits; 00412 literalvalue = r.literalvalue; fillbit = r.fillbit; bitv = r.bitv; 00413 active = r.active; vec = r.vec; it = r.it; return *this;} 00414 00415 inline bool operator*() const; // still can not modify this 00416 inline int operator!=(const const_iterator& rhs) const throw (); 00417 inline int operator==(const const_iterator& rhs) const throw (); 00418 inline int operator!=(const iterator& rhs) const throw (); 00419 inline int operator==(const iterator& rhs) const throw (); 00420 00421 inline iterator& operator++(); 00422 inline iterator& operator--(); 00423 iterator& operator+=(int incr); 00424 const iterator& operator=(int val); 00425 00426 private: 00427 ibis::bitvector::word_t compressed; 00428 ibis::bitvector::word_t ind; 00429 ibis::bitvector::word_t nbits; 00430 ibis::bitvector::word_t literalvalue; 00431 int fillbit; 00432 ibis::bitvector* bitv; 00433 active_word* active; 00434 array_t<word_t>* vec; 00435 array_t<word_t>::iterator it; 00436 00437 void decodeWord(); 00438 00439 friend iterator ibis::bitvector::begin(); 00440 friend iterator ibis::bitvector::end(); 00441 }; // end class ibis::bitvector::iterator 00442 00449 inline void ibis::bitvector::sloppySize(word_t n) const { 00450 nbits = n-active.nbits; 00451 #if defined(WAH_CHECK_SIZE) 00452 word_t nb = do_cnt(); 00453 if (nb != nbits) { 00454 const_cast<ibis::bitvector*>(this)->adjustSize(0, nbits); 00455 LOGGER(ibis::gVerbose >= 0) 00456 << "bitvector::sloppySize -- adjust the number of bits to " 00457 << n; 00458 } 00459 #endif 00460 } // ibis::bitvector::sloppySize 00461 00463 inline bool ibis::bitvector::all0s() const { 00464 if (m_vec.empty()) { 00465 return true; 00466 } 00467 else if (m_vec.size() == 1) { 00468 return (m_vec[0] == 0 || (m_vec[0]>=HEADER0 && m_vec[0]<HEADER1)); 00469 } 00470 else { 00471 return false; 00472 } 00473 } // ibis::bitvector::all0s 00474 00476 inline bool ibis::bitvector::all1s() const { 00477 if (m_vec.size() == 1) { 00478 return (m_vec[0] == ALLONES || (m_vec[0] > HEADER1)); 00479 } 00480 else { 00481 return false; 00482 } 00483 } // ibis::bitvector::all1s 00484 00486 inline ibis::bitvector::word_t 00487 ibis::bitvector::cnt_bits(ibis::bitvector::word_t val) const { 00488 return ((val>ALLONES) ? ((val&MAXCNT)*MAXBITS) : MAXBITS); 00489 } // ibis::bitvector::cnt_bits 00490 00492 inline ibis::bitvector::word_t 00493 ibis::bitvector::cnt_ones(ibis::bitvector::word_t val) const { 00494 // number of 1 bits in a value between 0 and 255 00495 static const word_t table[256] = { 00496 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 00497 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 00498 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 00499 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 00500 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 00501 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 00502 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 00503 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 00504 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 00505 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 00506 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 00507 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 00508 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 00509 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 00510 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 00511 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; 00512 return table[val&0xFFU] + table[(val>>8)&0xFFU] + 00513 table[(val>>16)&0xFFU] + table[(val>>24)&0xFFU]; 00514 } // ibis::bitvector::cnt_ones 00515 00517 inline ibis::bitvector::word_t ibis::bitvector::size() const throw() { 00518 return ((nbits?nbits:(nbits=do_cnt()))+active.nbits); 00519 } // ibis::bitvector::size 00520 00522 inline ibis::bitvector::word_t ibis::bitvector::cnt() const { 00523 if (nset==0 && !m_vec.empty()) 00524 nbits = do_cnt(); 00525 return (nset+cnt_ones(active.val)); 00526 } // ibis::bitvector::cnt 00527 00529 inline ibis::bitvector::word_t ibis::bitvector::numFillWords() const { 00530 word_t cnt=0; 00531 array_t<ibis::bitvector::word_t>::const_iterator it = m_vec.begin(); 00532 while (it != m_vec.end()) { 00533 cnt += (*it >> ibis::bitvector::MAXBITS); 00534 it++; 00535 } 00536 return cnt; 00537 } // inline word_t ibis::bitvector::numFillWords() const { 00538 00542 inline const ibis::bitvector& 00543 ibis::bitvector::operator=(const ibis::bitvector& bv) { 00544 nbits = bv.nbits; nset = bv.nset; active = bv.active; 00545 m_vec.deepCopy(bv.m_vec); 00546 return *this; 00547 } 00548 00550 inline ibis::bitvector& ibis::bitvector::copy(const ibis::bitvector& bv) { 00551 nbits = bv.nbits; nset = bv.nset; active = bv.active; 00552 m_vec.deepCopy(bv.m_vec); 00553 return *this; 00554 } 00555 00556 inline ibis::bitvector& ibis::bitvector::swap(bitvector& bv) { 00557 word_t tmp; 00558 tmp = bv.nbits; bv.nbits = nbits; nbits = tmp; 00559 tmp = bv.nset; bv.nset = nset; nset = tmp; 00560 active_word atmp = bv.active; 00561 bv.active = active; active = atmp; 00562 m_vec.swap(bv.m_vec); 00563 return *this; 00564 } 00565 00569 inline void ibis::bitvector::append_active() { 00570 // std::cout << "before: active.val = " << std::hex << active.val; 00571 // if (m_vec.size()) 00572 // std::cout << ", m_vec.back() = " << m_vec.back(); 00573 // std::cout << std::dec << std::endl; 00574 if (m_vec.empty()) { 00575 m_vec.push_back(active.val); 00576 } 00577 else if (active.val == 0) {// incoming word is zero 00578 if (m_vec.back() == 0) { 00579 m_vec.back() = (HEADER0 + 2); 00580 } 00581 else if (m_vec.back() >= HEADER0 && m_vec.back() < HEADER1) { 00582 ++ m_vec.back(); 00583 } 00584 else { 00585 m_vec.push_back(active.val); 00586 } 00587 } 00588 else if (active.val == ALLONES) {// incoming word is allones 00589 if (m_vec.back() == ALLONES) { 00590 m_vec.back() = (HEADER1 | 2); 00591 } 00592 else if (m_vec.back() >= HEADER1) { 00593 ++m_vec.back(); 00594 } 00595 else { 00596 m_vec.push_back(active.val); 00597 } 00598 } 00599 else { // incoming word contains a mixture of bits 00600 m_vec.push_back(active.val); 00601 } 00602 nbits += MAXBITS; 00603 active.reset(); 00604 nset = 0; 00605 // std::cout << "after: m_vec.back() = " << std::hex << m_vec.back() 00606 // << std::dec << std::endl; 00607 } // ibis::bitvector::append_active 00608 00612 inline void ibis::bitvector::append_counter(int val, word_t cnt) { 00613 word_t head = 2 + val; 00614 word_t w = (head << SECONDBIT) + cnt; 00615 nbits += cnt*MAXBITS; 00616 if (m_vec.empty()) { 00617 m_vec.push_back(w); 00618 } 00619 else if ((m_vec.back()>>SECONDBIT) == head) { 00620 m_vec.back() += cnt; 00621 } 00622 else if ((m_vec.back()==ALLONES) && head==3) { 00623 m_vec.back() = w + 1; 00624 } 00625 else if ((m_vec.back() == 0) && head==2) { 00626 m_vec.back() = w + 1; 00627 } 00628 else { 00629 m_vec.push_back(w); 00630 } 00631 } // ibis::bitvector::append_counter 00632 00633 // append a single bit 00634 inline ibis::bitvector& ibis::bitvector::operator+=(int b) { 00635 active.append(b); 00636 if (active.is_full()) 00637 append_active(); 00638 return *this; 00639 } // ibis::bitvector::operator+= 00640 00645 inline void ibis::bitvector::appendFill(int val, word_t n) { 00646 if (n == 0) return; 00647 if (active.nbits > 0) { 00648 word_t tmp = (MAXBITS - active.nbits); 00649 if (tmp > n) tmp = n; 00650 active.nbits += tmp; 00651 active.val <<= tmp; 00652 n -= tmp; 00653 if (val != 0) 00654 active.val |= (1U<<tmp) - 1; 00655 if (active.nbits >= MAXBITS) 00656 append_active(); 00657 } 00658 if (n >= MAXBITS) { 00659 word_t cnt = n / MAXBITS; 00660 if (cnt > 1) 00661 append_counter(val, cnt); 00662 else if (val != 0) { 00663 active.val = ALLONES; 00664 append_active(); 00665 } 00666 else { 00667 active.val = 0; 00668 append_active(); 00669 } 00670 n -= cnt * MAXBITS; 00671 } 00672 if (n > 0) { 00673 active.nbits = n; 00674 active.val = val*((1U<<n)-1); 00675 } 00676 } // ibis::bitvector::appendFill 00677 00683 inline void ibis::bitvector::copy_runs(run& it, word_t& nw) { 00684 #if defined(DEBUG) && DEBUG + 0 > 1 00685 LOGGER(ibis::gVerbose >= 0) 00686 << "bitvector::copy_runs(0x" 00687 << std::hex << std::setw(8) << std::setfill('0') 00688 << *(it.it) << ", " << std::dec << nw 00689 << ") ... it.nWords = " << it.nWords; 00690 #endif 00691 // deal with the first word -- attach it to the last word in m_vec 00692 if (it.isFill != 0) { 00693 append_counter(it.fillBit, it.nWords); 00694 nw -= it.nWords; 00695 } 00696 else { 00697 active.val = *(it.it); 00698 append_active(); 00699 -- nw; 00700 } 00701 ++ it.it; 00702 it.nWords = 0; 00703 nset = 0; 00704 nbits += MAXBITS * nw; 00705 00706 while (nw > 0) { // copy the words 00707 it.decode(); 00708 if (nw >= it.nWords) { 00709 m_vec.push_back(*(it.it)); 00710 nw -= it.nWords; 00711 it.nWords = 0; 00712 ++ it.it; 00713 } 00714 else { 00715 break; 00716 } 00717 } 00718 nbits -= MAXBITS * nw; 00719 } // ibis::bitvector::copy_runs 00720 00723 inline void ibis::bitvector::copy_runsn(run& it, word_t& nw) { 00724 #if defined(DEBUG) && DEBUG + 0 > 1 00725 LOGGER(ibis::gVerbose >= 0) 00726 << "bitvector::copy_runsn(0x" 00727 << std::hex << std::setw(8) << std::setfill('0') 00728 << *(it.it) << ", " << std::dec << nw 00729 << ") ... it.nWords = " << it.nWords; 00730 #endif 00731 // deal with the first word -- need to attach it to the last word in m_vec 00732 if (it.isFill != 0) { 00733 append_counter(!it.fillBit, it.nWords); 00734 nw -= it.nWords; 00735 } 00736 else { 00737 active.val = ALLONES ^ *(it.it); 00738 append_active(); 00739 -- nw; 00740 } 00741 ++ it.it; // advance to the next word 00742 it.nWords = 0; 00743 nset = 0; 00744 nbits += MAXBITS * nw; 00745 00746 while (nw > 0) { // copy the words 00747 it.decode(); 00748 if (nw >= it.nWords) { 00749 m_vec.push_back((it.isFill?FILLBIT:ALLONES) ^ *(it.it)); 00750 nw -= it.nWords; 00751 it.nWords = 0; 00752 ++ it.it; 00753 } 00754 else { 00755 break; 00756 } 00757 } 00758 nbits -= MAXBITS * nw; 00759 } // ibis::bitvector::copy_runsn 00760 00764 inline void ibis::bitvector::copy_fill 00765 (array_t<ibis::bitvector::word_t>::iterator& jt, run& it) { 00766 if (it.fillBit == 0) { 00767 while (it.nWords > 3) { 00768 *jt = 0; 00769 jt[1] = 0; 00770 jt[2] = 0; 00771 jt[3] = 0; 00772 jt += 4; 00773 it.nWords -= 4; 00774 } 00775 if (it.nWords == 1) { 00776 *jt = 0; ++jt; 00777 } 00778 else if (it.nWords == 2) { 00779 *jt = 0; jt[1] = 0; jt += 2; 00780 } 00781 else if (it.nWords == 3) { 00782 *jt = 0; jt[1] = 0; jt[2] = 0; jt += 3; 00783 } 00784 } 00785 else { 00786 while (it.nWords > 3) { 00787 *jt = ALLONES; 00788 jt[1] = ALLONES; 00789 jt[2] = ALLONES; 00790 jt[3] = ALLONES; 00791 jt += 4; 00792 it.nWords -= 4; 00793 } 00794 if (it.nWords == 1) { 00795 *jt = ALLONES; ++jt; 00796 } 00797 else if (it.nWords == 2) { 00798 *jt = ALLONES; jt[1] = ALLONES; jt += 2; 00799 } 00800 else if (it.nWords == 3) { 00801 *jt = ALLONES; jt[1] = ALLONES; jt[2] = ALLONES; jt += 3; 00802 } 00803 } 00804 it.nWords = 0; 00805 ++ it.it; 00806 } // ibis::bitvector::copy_fill 00807 00812 inline void ibis::bitvector::copy_runs 00813 (array_t<ibis::bitvector::word_t>::iterator& jt, run& it, word_t& nw) { 00814 while (nw >= it.nWords && nw > 0) { 00815 if (it.isFill != 0) { // copy a fill 00816 const array_t<word_t>::iterator iend = jt + it.nWords; 00817 if (it.fillBit == 0) { 00818 while (jt < iend) { 00819 *jt = 0; 00820 ++ jt; 00821 } 00822 } 00823 else { 00824 while (jt < iend) { 00825 *jt = ALLONES; 00826 ++ jt; 00827 } 00828 } 00829 nw -= it.nWords; 00830 } 00831 else { // copy a single word 00832 *jt = *(it.it); 00833 ++ jt; 00834 -- nw; 00835 } 00836 ++ it.it; // advance to the next word 00837 if (nw > 0) { 00838 it.decode(); 00839 } 00840 else { 00841 it.nWords = 0; 00842 return; 00843 } 00844 } 00845 } // ibis::bitvector::copy_runs 00846 00849 inline void ibis::bitvector::copy_runsn 00850 (array_t<ibis::bitvector::word_t>::iterator& jt, run& it, word_t& nw) { 00851 while (nw >= it.nWords) { 00852 if (it.isFill != 0) { // a fill 00853 const array_t<word_t>::iterator iend = jt + it.nWords; 00854 if (it.fillBit == 0) { 00855 while (jt < iend) { 00856 *jt = ALLONES; 00857 ++ jt; 00858 } 00859 } 00860 else { 00861 while (jt < iend) { 00862 *jt = 0; 00863 ++ jt; 00864 } 00865 } 00866 nw -= it.nWords; 00867 } 00868 else { // a literal word 00869 *jt = *(it.it) ^ ALLONES; 00870 ++ jt; 00871 -- nw; 00872 } 00873 ++ it.it; // advance to the next word 00874 if (nw > 0) { 00875 it.decode(); 00876 } 00877 else { 00878 it.nWords = 0; 00879 return; 00880 } 00881 } 00882 } // ibis::bitvector::copy_runsn 00883 00884 inline ibis::bitvector::iterator ibis::bitvector::begin() { 00885 iterator it; 00886 it.it = m_vec.begin(); 00887 it.vec = &m_vec; 00888 it.bitv = this; 00889 it.active = &active; 00890 it.decodeWord(); 00891 return it; 00892 } // ibis::bitvector::begin 00893 00894 inline ibis::bitvector::iterator ibis::bitvector::end() { 00895 iterator it; 00896 it.ind = 0; 00897 it.compressed = 0; 00898 it.nbits = 0; 00899 it.literalvalue = 0; 00900 it.fillbit = 0; 00901 it.it = m_vec.end() + 1; 00902 it.vec = &m_vec; 00903 it.bitv = this; 00904 it.active = &active; 00905 return it; 00906 } // ibis::bitvector::end 00907 00908 inline ibis::bitvector::const_iterator ibis::bitvector::begin() const { 00909 const_iterator it; 00910 it.it = m_vec.begin(); 00911 it.begin = m_vec.begin(); 00912 it.end = m_vec.end(); 00913 it.active = &active; 00914 it.decodeWord(); 00915 return it; 00916 } // ibis::bitvector::begin 00917 00918 // dereference -- no error checking 00919 inline bool ibis::bitvector::iterator::operator*() const { 00920 #if defined(DEBUG) && DEBUG + 0 > 1 00921 if (vec==0 || it<vec->begin() || it>vec->end()) 00922 throw "bitvector::const_iterator not initialized correctly."; 00923 #endif 00924 if (compressed != 0) 00925 return (fillbit != 0); 00926 else 00927 return (1 & (literalvalue >> (bitvector::SECONDBIT - ind))); 00928 } // ibis::bitvector::iterator::operator* 00929 00930 // comparison only based on the iterator 00931 inline int ibis::bitvector::iterator::operator!= 00932 (const ibis::bitvector::const_iterator& rhs) const throw () { 00933 return (it != rhs.it); 00934 } 00935 inline int ibis::bitvector::iterator::operator== 00936 (const ibis::bitvector::const_iterator& rhs) const throw () { 00937 return (it == rhs.it); 00938 } 00939 inline int ibis::bitvector::iterator::operator!= 00940 (const ibis::bitvector::iterator& rhs) const throw () { 00941 return (it != rhs.it); 00942 } 00943 inline int ibis::bitvector::iterator::operator== 00944 (const ibis::bitvector::iterator& rhs) const throw () { 00945 return (it == rhs.it); 00946 } 00947 00948 // increment by one 00949 inline ibis::bitvector::iterator& ibis::bitvector::iterator::operator++() { 00950 #if defined(DEBUG) && DEBUG + 0 > 1 00951 if (vec==0 || it<vec->begin() || it>vec->end()) 00952 throw "bitvector::iterator not formed correctly."; 00953 #endif 00954 if (ind+1<nbits) {++ind;} 00955 else {++ it; decodeWord();} 00956 return *this; 00957 } 00958 00959 // decrement by one 00960 inline ibis::bitvector::iterator& ibis::bitvector::iterator::operator--() { 00961 #if defined(DEBUG) && DEBUG + 0 > 1 00962 if (vec==0 || it<vec->begin() || it>vec->end()+1) 00963 throw "bitvector::iterator not formed correctly."; 00964 #endif 00965 if (ind) -- ind; 00966 else { 00967 if (it <= vec->end()) -- it; 00968 else if (active->nbits) it = vec->end(); 00969 else it = vec->end() - 1; 00970 decodeWord(); 00971 if (nbits) ind = nbits - 1; 00972 } 00973 return *this; 00974 } 00975 00976 inline ibis::bitvector::const_iterator ibis::bitvector::end() const { 00977 const_iterator it; 00978 it.ind = 0; 00979 it.compressed = 0; 00980 it.nbits = 0; 00981 it.literalvalue = 0; 00982 it.fillbit = 0; 00983 it.it = m_vec.end() + 1; 00984 it.begin = m_vec.begin(); 00985 it.end = m_vec.end(); 00986 it.active = &active; 00987 return it; 00988 } // ibis::bitvector::end() 00989 00990 // dereference -- no error checking 00991 inline bool ibis::bitvector::const_iterator::operator*() const { 00992 #if defined(DEBUG) && DEBUG + 0 > 1 00993 if (it==0 || end==0 || it>end || nbits<=ind) 00994 throw "bitvector::const_iterator not initialized correctly."; 00995 #endif 00996 if (compressed != 0) 00997 return (fillbit != 0); 00998 else 00999 return (1 & (literalvalue >> (bitvector::SECONDBIT - ind))); 01000 } 01001 01002 // comparison only based on the iterator 01003 inline int ibis::bitvector::const_iterator::operator!= 01004 (const ibis::bitvector::const_iterator& rhs) const throw (){ 01005 return (it != rhs.it); 01006 } 01007 inline int ibis::bitvector::const_iterator::operator== 01008 (const ibis::bitvector::const_iterator& rhs) const throw () { 01009 return (it == rhs.it); 01010 } 01011 01012 // increment by one 01013 inline ibis::bitvector::const_iterator& 01014 ibis::bitvector::const_iterator::operator++() { 01015 #if defined(DEBUG) && DEBUG + 0 > 1 01016 if (it==0 || end==0 || it>end) 01017 throw "bitvector::const_iterator not formed correctly."; 01018 #endif 01019 if (ind+1<nbits) {++ind;} 01020 else {++ it; decodeWord();} 01021 return *this; 01022 } 01023 01024 // decrement by one 01025 inline ibis::bitvector::const_iterator& 01026 ibis::bitvector::const_iterator::operator--() { 01027 #if defined(DEBUG) && DEBUG + 0 > 1 01028 if (it==0 || end==0 || it>end) 01029 throw "bitvector::const_iterator not formed correctly."; 01030 #endif 01031 if (ind) -- ind; 01032 else { 01033 if (it <= end) -- it; 01034 else if (active->nbits) it = end; 01035 else it = end - 1; 01036 decodeWord(); 01037 if (nbits) ind = nbits - 1; 01038 } 01039 return *this; 01040 } 01041 01042 inline ibis::bitvector::indexSet ibis::bitvector::firstIndexSet() const { 01043 indexSet is; 01044 if (m_vec.end() > m_vec.begin()) { 01045 is.it = m_vec.begin() - 1; 01046 is.end = m_vec.end(); 01047 } 01048 else { 01049 is.it = 0; 01050 is.end = 0; 01051 } 01052 is.active = &active; 01053 is.ind[0] = static_cast<word_t>(-1); 01054 is.nind = 0; 01055 ++is; 01056 return is; 01057 } // end ibis::bitvector::firstIndexSet; 01058 01062 inline double ibis::bitvector::randomSize(word_t nb, word_t nc) { 01063 double sz = 0.0; 01064 if (nb > 0 && nb >= nc && nb > MAXBITS) { 01065 const double den = static_cast<double>(nc) / 01066 static_cast<double>(nb); 01067 const word_t nw = nb / MAXBITS; 01068 sz = 3.0 + nw - nw * (pow(1.0-den, static_cast<int>(2*MAXBITS)) 01069 + pow(den, static_cast<int>(2*MAXBITS))); 01070 } 01071 return sz*sizeof(word_t); 01072 } // end ibis::bitvector::randomSize 01073 01078 inline double ibis::bitvector::markovSize(word_t nb, word_t nc, double f) { 01079 double sz = 0.0; 01080 if (nb > 0 && nb >= nc && nb > MAXBITS) { 01081 const double den = static_cast<double>(nc) / 01082 static_cast<double>(nb); 01083 const word_t nw = nb / MAXBITS; 01084 if ((den <= 0.5 && f > 1.00001) || (den > 0.5 && (1.0-den)*f > den)) 01085 sz = ((1.0-den) * pow(1.0-den/((1.0-den)*f), 01086 static_cast<int>(2*MAXBITS-3)) + 01087 den * pow(1.0-1.0/f, static_cast<int>(2*MAXBITS-3))); 01088 else 01089 sz = (pow(1.0-den, static_cast<int>(2*MAXBITS)) + 01090 pow(den, static_cast<int>(2*MAXBITS))); 01091 sz = 3.0 + nw * (1.0 - sz); 01092 } 01093 return sz*sizeof(word_t); 01094 } // end ibis::bitvector::markovSize 01095 01097 inline void ibis::bitvector::turnOnRawBit(const word_t ind) { 01098 if (ind < nbits) { // in regular words 01099 m_vec[ind / MAXBITS] |= (1 << (SECONDBIT - (ind % MAXBITS))); 01100 nset = 0; // don't track nset 01101 } 01102 else { // assume to be in the active word 01103 active.val |= (1 << (active.nbits - (ind - nbits) - 1)); 01104 #if defined(DEBUG) 01105 if (ind >= nbits + active.nbits || 01106 active.val >= (1U << active.nbits)) { 01107 LOGGER(ibis::gVerbose >= 0) 01108 << "bitvector::turnOnRawBit(" << ind 01109 << ") found a bad active word"; 01110 } 01111 #endif 01112 } 01113 } // ibis::bitvector::turnOnRawBit 01114 01115 std::ostream& operator<<(std::ostream&, const ibis::bitvector&); 01116 #endif // __BITVECTOR_H
![]() |