00001 // $Id$ 00002 // Author: John Wu <John.Wu at ACM.org> 00003 // Lawrence Berkeley National Laboratory 00004 // Copyright 2000-2011 the Regents of the University of California 00005 #ifndef BITVECTOR64_H 00006 #define BITVECTOR64_H 00007 00008 00009 00010 #include "array_t.h" // alternative to std::vector 00011 00012 #include <stdio.h> // sprintf 00013 #include <limits.h> // INT_MAX 00014 #include <iomanip> // setw() 00015 00016 00056 class ibis::bitvector64 { 00057 public: 00058 typedef uint64_t word_t; 00059 00060 // constructors of bitvector64 class 00061 bitvector64() : nbits(0), nset(0), active(), m_vec() {}; 00062 ~bitvector64() {clear();}; 00063 bitvector64(const bitvector64& bv) : nbits(bv.nbits), nset(bv.nset), 00064 active(bv.active), m_vec(bv.m_vec) {}; 00065 bitvector64(const array_t<word_t>& arr); 00066 bitvector64(const char* file); 00067 inline bitvector64& operator=(const bitvector64& bv); 00068 inline bitvector64& copy(const bitvector64& bv); 00069 inline bitvector64& swap(bitvector64& bv); 00070 // use bv to replace part of the existing value, match the ith bit with 00071 // the first of bv, return reference to self 00072 //bitvector64& copy(const word_t i, const bitvector64& bv); 00075 void setBit(const word_t i, int val); 00077 void erase(word_t i, word_t j); 00078 00081 void set(int val, word_t n); 00083 void clear() {nbits = 0; nset = 0; active.reset(); m_vec.clear();} 00084 00085 bitvector64& operator+=(const bitvector64& bv); 00086 inline bitvector64& operator+=(int b); 00087 void appendWord(word_t w); 00088 00089 inline void appendFill(int val, word_t n); 00090 00092 int operator==(const bitvector64& rhs) const; 00093 00095 void flip(); 00097 void operator&=(const bitvector64& rhs); 00099 bitvector64* operator&(const bitvector64&) const; 00101 void operator|=(const bitvector64& rhs); 00103 bitvector64* operator|(const bitvector64&) const; 00105 void operator^=(const bitvector64& rhs); 00107 bitvector64* operator^(const bitvector64&) const; 00109 void operator-=(const bitvector64& rhs); 00112 bitvector64* operator-(const bitvector64&) const; 00113 00114 // I/O functions. 00117 void read(const char *fn); 00119 void write(const char *fn) const; 00120 void write(FILE* fptr) const; 00122 void write(array_t<word_t>& arr) const; 00123 00124 void compress(); 00125 void decompress(); 00126 00127 word_t compressible() const; 00128 00130 word_t cnt() const { 00131 if (nset==0) do_cnt(); return (nset+cnt_ones(active.val)); 00132 }; 00133 00135 word_t size() const throw() {return (nbits+active.nbits);}; 00137 inline word_t numFillWords() const; 00139 word_t bytes() const throw() { 00140 return (m_vec.size()*sizeof(word_t) + sizeof(bitvector64)); 00141 }; 00144 word_t getSerialSize() const throw() { 00145 return (m_vec.size() + 1 + (active.nbits>0)); 00146 }; 00148 static unsigned bitsPerLiteral() {return MAXBITS;} 00152 inline static double randomSize(word_t nb, word_t nc); 00157 inline static double markovSize(word_t nb, word_t nc, double f); 00160 static double clusteringFactor(word_t nb, word_t nc, word_t nw); 00161 00167 void adjustSize(word_t nv, word_t nt); 00168 std::ostream& print(std::ostream &) const; 00169 00171 class iterator; 00172 inline iterator end(); 00173 inline iterator begin(); 00174 00176 class const_iterator; 00177 inline const_iterator end() const; 00178 inline const_iterator begin() const; 00179 00181 class indexSet; 00182 inline indexSet firstIndexSet() const; 00183 00184 // give accesses to some friends 00185 friend class indexSet; 00186 friend class iterator; 00187 friend class const_iterator; 00188 00189 protected: 00190 bool isCompressed() const {return (m_vec.size()*MAXBITS < nbits);} 00191 inline bool all0s() const; 00192 inline bool all1s() const; 00193 00194 private: 00195 // static members, i.e., constants to be used internally 00196 static const unsigned MAXBITS; 00197 static const unsigned SECONDBIT; 00198 static const word_t FILLBIT; 00199 static const word_t HEADER0; 00200 static const word_t HEADER1; 00201 static const word_t ALLONES; 00202 static const word_t MAXCNT; 00203 00210 struct active_word { 00211 word_t val; // the value 00212 word_t nbits; // total number of bits 00213 00214 active_word() : val(0), nbits(0) {}; 00215 void reset() {val = 0; nbits = 0;}; 00216 int is_full() const {return (nbits >= MAXBITS);}; 00217 void append(int b) { // append a single bit 00218 val <<= 1; nbits ++; val += b; 00219 }; 00220 }; // struct active_word 00221 00224 struct run { 00225 int isFill; 00226 int fillBit; 00227 word_t nWords; 00228 array_t<word_t>::const_iterator it; 00229 run() : isFill(0), fillBit(0), nWords(0), it(0) {}; 00230 void decode() { 00231 fillBit = (*it > HEADER1); 00232 if (*it > ALLONES) { 00233 nWords = (*it & MAXCNT); 00234 isFill = 1; 00235 } 00236 else { 00237 nWords = 1; 00238 isFill = 0; 00239 } 00240 }; 00243 void operator--() { 00244 if (nWords > 1) {--nWords;} 00245 else {++ it; nWords = 0;} 00246 }; 00249 void operator-=(word_t len) { 00250 while (len > 0) { 00251 if (nWords == 0) decode(); 00252 if (isFill) { 00253 if (nWords > len) {nWords -= len; len = 0;} 00254 else if (nWords == len) {nWords = 0; len = 0; ++ it;} 00255 else {len -= nWords; ++ it; nWords = 0;} 00256 } 00257 else {-- len; ++ it; nWords = 0;} 00258 } 00259 }; 00260 }; 00261 friend struct run; 00262 friend struct active_word; 00263 00264 // member variables of bitvector64 class 00265 word_t nbits; 00266 mutable word_t nset; 00267 active_word active; 00268 array_t<word_t> m_vec; 00269 00270 // private functions of bitvector64 class 00271 // The following three functions all performs or operation, _c2 and _c1 00272 // generate compressed solutions, but _c0, _d1, and _d2 generate 00273 // uncompressed solutions. 00274 // or_c2 assumes there are compressed word in both operands 00275 // or_c1 *this may contain compressed word, but not rhs 00276 // or_c0 assumes both operands are not compressed 00277 // or_d1 *this contains no compressed word and is overwritten with the 00278 // result 00279 // or_d2 both *this and rhs are compressed, but res will not be compressed 00280 void or_c2(const bitvector64& rhs, bitvector64& res) const; 00281 void or_c1(const bitvector64& rhs, bitvector64& res) const; 00282 void or_c0(const bitvector64& rhs); 00283 void or_d1(const bitvector64& rhs); 00284 void or_d2(const bitvector64& rhs, bitvector64& res) const; 00285 void and_c2(const bitvector64& rhs, bitvector64& res) const; 00286 void and_c1(const bitvector64& rhs, bitvector64& res) const; 00287 void and_c0(const bitvector64& rhs); 00288 void and_d1(const bitvector64& rhs); 00289 void and_d2(const bitvector64& rhs, bitvector64& res) const; 00290 void xor_c2(const bitvector64& rhs, bitvector64& res) const; 00291 void xor_c1(const bitvector64& rhs, bitvector64& res) const; 00292 void xor_c0(const bitvector64& rhs); 00293 void xor_d1(const bitvector64& rhs); 00294 void xor_d2(const bitvector64& rhs, bitvector64& res) const; 00295 void minus_c2(const bitvector64& rhs, bitvector64& res) const; 00296 void minus_c1(const bitvector64& rhs, bitvector64& res) const; 00297 void minus_c1x(const bitvector64& rhs, bitvector64& res) const; 00298 void minus_c0(const bitvector64& rhs); 00299 void minus_d1(const bitvector64& rhs); 00300 void minus_d2(const bitvector64& rhs, bitvector64& res) const; 00301 inline void copy_runs(run& it, word_t& nw); // copy nw words 00302 inline void copy_runsn(run& it, word_t& nw); // copy nw words and negate 00303 inline void copy_fill(array_t<word_t>::iterator& jt, run& it); 00304 inline void copy_runs(array_t<word_t>::iterator& jt, run& it, 00305 word_t& nw); 00306 inline void copy_runsn(array_t<word_t>::iterator& jt, run& it, 00307 word_t& nw); 00308 void decompress(array_t<word_t>& tmp) const; 00309 void copy_comp(array_t<word_t>& tmp) const; 00310 inline void append_active(); 00311 inline void append_counter(int val, word_t cnt); 00312 inline unsigned cnt_ones(word_t) const; // number of 1s in a literal word 00313 inline word_t cnt_bits(word_t) const; // number of bits in a word 00314 word_t do_cnt() const; // count the number of bits and number of ones 00315 }; // end class bitvector64 00316 00326 class ibis::bitvector64::indexSet { 00327 public: 00328 // let the compiler define most of the canonical functions 00329 00330 // allow bitvector64::firstIndexSet() to access private variables of this 00331 // class 00332 friend indexSet ibis::bitvector64::firstIndexSet() const; 00333 00334 //int operator!=(const indexSet& rhs) const {return (it != rhs.it);}; 00335 bool isRange() const {return (nind>=ibis::bitvector64::MAXBITS);} 00336 const word_t* indices() const {return ind;}; 00337 word_t nIndices() const {return nind;} 00338 indexSet& operator++(); 00339 00340 private: 00341 array_t<word_t>::const_iterator it; 00342 array_t<word_t>::const_iterator end; 00343 const active_word* active; // points back to the active word 00344 word_t nind; // number of indices 00345 word_t ind[64]; 00346 }; // class ibis::bitvector64::indexSet 00347 00349 class ibis::bitvector64::const_iterator { 00350 public: 00351 inline bool operator*() const; 00352 inline int operator!=(const const_iterator& rhs) const throw (); 00353 inline int operator==(const const_iterator& rhs) const throw (); 00354 00355 inline const_iterator& operator++(); 00356 inline const_iterator& operator--(); 00357 const_iterator& operator+=(int64_t incr); 00358 00359 private: 00360 ibis::bitvector64::word_t compressed; 00361 ibis::bitvector64::word_t ind; 00362 ibis::bitvector64::word_t nbits; 00363 ibis::bitvector64::word_t literalvalue; 00364 int fillbit; 00365 const active_word* active; 00366 array_t<word_t>::const_iterator end; 00367 array_t<word_t>::const_iterator begin; 00368 array_t<word_t>::const_iterator it; 00369 00370 void decodeWord(); 00371 00372 // give three functions of bitvector64 access to private variables 00373 friend void ibis::bitvector64::erase(word_t i, word_t j); 00374 friend const_iterator ibis::bitvector64::begin() const; 00375 friend const_iterator ibis::bitvector64::end() const; 00376 friend class ibis::bitvector64::iterator; 00377 }; // end class ibis::bitvector64::const_iterator 00378 00386 class ibis::bitvector64::iterator { 00387 public: 00388 inline bool operator*() const; // still can not modify this 00389 inline int operator!=(const const_iterator& rhs) const throw (); 00390 inline int operator==(const const_iterator& rhs) const throw (); 00391 inline int operator!=(const iterator& rhs) const throw (); 00392 inline int operator==(const iterator& rhs) const throw (); 00393 00394 inline iterator& operator++(); 00395 inline iterator& operator--(); 00396 iterator& operator+=(int64_t incr); 00397 iterator& operator=(int val); 00398 00399 private: 00400 ibis::bitvector64::word_t compressed; 00401 ibis::bitvector64::word_t ind; 00402 ibis::bitvector64::word_t nbits; 00403 ibis::bitvector64::word_t literalvalue; 00404 int fillbit; 00405 ibis::bitvector64* bitv; 00406 active_word* active; 00407 array_t<word_t>* vec; 00408 array_t<word_t>::iterator it; 00409 00410 void decodeWord(); 00411 00412 friend iterator ibis::bitvector64::begin(); 00413 friend iterator ibis::bitvector64::end(); 00414 }; // end class ibis::bitvector64::iterator 00415 00417 inline bool ibis::bitvector64::all0s() const { 00418 if (m_vec.empty()) { 00419 return true; 00420 } 00421 else if (m_vec.size() == 1) { 00422 return (m_vec[0] == 0 || (m_vec[0] >= HEADER0 && m_vec[0] < HEADER1)); 00423 } 00424 else { 00425 return false; 00426 } 00427 } // ibis::bitvector64::all0s 00428 00430 inline bool ibis::bitvector64::all1s() const { 00431 if (m_vec.size() == 1) { 00432 return (m_vec[0] == ALLONES || (m_vec[0] > HEADER1)); 00433 } 00434 else { 00435 return false; 00436 } 00437 } // ibis::bitvector64::all1s() 00438 00440 inline ibis::bitvector64::word_t 00441 ibis::bitvector64::cnt_bits(ibis::bitvector64::word_t val) const { 00442 return ((val>ALLONES) ? ((val&MAXCNT)*MAXBITS) : MAXBITS); 00443 } 00444 00446 inline unsigned 00447 ibis::bitvector64::cnt_ones(ibis::bitvector64::word_t val) const { 00448 // number of 1 bits in a value between 0 and 255 00449 static const unsigned table[256] = { 00450 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 00451 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 00452 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 00453 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 00454 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 00455 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 00456 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 00457 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 00458 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 00459 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 00460 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 00461 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 00462 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 00463 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 00464 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 00465 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; 00466 //assert(8 == sizeof(word_t)); 00467 return table[val&0xFF] + table[(val>>8)&0xFF] + 00468 table[(val>>16)&0xFF] + table[(val>>24)&0xFF] + 00469 table[(val>>32)&0xFF] + table[(val>>40)&0xFF] + 00470 table[(val>>48)&0xFF] + table[val>>56]; 00471 } // inline unsigned ibis::bitvector64::cnt_ones(word_t) const 00472 00473 inline ibis::bitvector64::word_t 00474 ibis::bitvector64::numFillWords() const { 00475 word_t cnt=0; 00476 array_t<word_t>::const_iterator it = m_vec.begin(); 00477 while (it != m_vec.end()) { 00478 cnt += (*it >> ibis::bitvector64::MAXBITS); 00479 it++; 00480 } 00481 return cnt; 00482 } // ibis::bitvector64::numFillWords 00483 00484 // The assignment operator. Wanted to use shallow copy for efficiency 00485 // consideration, but SHALLOW copy causes unexpected problem in test 00486 // program bitty.cpp change to deep copy 00487 inline ibis::bitvector64& 00488 ibis::bitvector64::operator=(const ibis::bitvector64& bv) { 00489 nbits = bv.nbits; nset = bv.nset; active = bv.active; 00490 m_vec.deepCopy(bv.m_vec); 00491 return *this; 00492 } 00493 00494 // deep copy 00495 inline ibis::bitvector64& 00496 ibis::bitvector64::copy(const ibis::bitvector64& bv) { 00497 nbits = bv.nbits; nset = bv.nset; active = bv.active; 00498 m_vec.deepCopy(bv.m_vec); 00499 return *this; 00500 } 00501 00502 inline ibis::bitvector64& 00503 ibis::bitvector64::swap(bitvector64& bv) { 00504 word_t tmp; 00505 tmp = bv.nbits; bv.nbits = nbits; nbits = tmp; 00506 tmp = bv.nset; bv.nset = nset; nset = tmp; 00507 active_word atmp = bv.active; 00508 bv.active = active; active = atmp; 00509 m_vec.swap(bv.m_vec); 00510 return *this; 00511 } 00512 00513 // append_active assumes that nbits == MAXBITS and refers to MAXBITS instead 00514 // of nbits 00515 inline void ibis::bitvector64::append_active() { 00516 if (m_vec.empty()) { 00517 m_vec.push_back(active.val); 00518 } 00519 else if (active.val == 0) {// incoming word is zero 00520 if (m_vec.back() == 0) { 00521 m_vec.back() = (HEADER0 + 2); 00522 } 00523 else if (m_vec.back() >= HEADER0 && m_vec.back() < HEADER1) { 00524 ++ m_vec.back(); 00525 } 00526 else { 00527 m_vec.push_back(active.val); 00528 } 00529 } 00530 else if (active.val == ALLONES) {// incoming word is allones 00531 if (m_vec.back() == ALLONES) { 00532 m_vec.back() = (HEADER1 | 2); 00533 } 00534 else if (m_vec.back() >= HEADER1) { 00535 ++m_vec.back(); 00536 } 00537 else { 00538 m_vec.push_back(active.val); 00539 } 00540 } 00541 else { // incoming word contains a mixture of bits 00542 m_vec.push_back(active.val); 00543 } 00544 nbits += MAXBITS; 00545 active.reset(); 00546 nset = 0; 00547 } // void ibis::bitvector64::append_active() 00548 00549 // a private function to append a single counter when the active word is 00550 // empty cnt is assumed to be multiples of MAXBITS (more than MAXBITS) 00551 inline void ibis::bitvector64::append_counter(int val, word_t cnt) { 00552 word_t head = 2 + val; 00553 word_t w = (head << SECONDBIT) + cnt; 00554 nbits += cnt*MAXBITS; 00555 if (m_vec.empty()) { 00556 m_vec.push_back(w); 00557 } 00558 else if ((m_vec.back()>>SECONDBIT) == head) { 00559 m_vec.back() += cnt; 00560 } 00561 else if ((m_vec.back()==ALLONES) && head==3) { 00562 m_vec.back() = w + 1; 00563 } 00564 else if ((m_vec.back() == 0) && head==2) { 00565 m_vec.back() = w + 1; 00566 } 00567 else { 00568 m_vec.push_back(w); 00569 } 00570 } // void append_counter() 00571 00572 // append a single bit 00573 inline ibis::bitvector64& ibis::bitvector64::operator+=(int b) { 00574 active.append(b); 00575 if (active.is_full()) append_active(); 00576 return *this; 00577 } // ibis::bitvector64& ibis::bitvector64::operator+=(int b) 00578 00579 // append n bits of val (n may be arbitrary integer, value must be either 0 00580 // or 1) 00581 inline void ibis::bitvector64::appendFill(int val, 00582 ibis::bitvector64::word_t n) { 00583 if (active.nbits > 0) { 00584 word_t tmp = (MAXBITS - active.nbits); 00585 if (tmp > n) tmp = n; 00586 active.nbits += tmp; 00587 active.val <<= tmp; 00588 n -= tmp; 00589 if (val) 00590 active.val |= (static_cast<word_t>(1)<<tmp) - 1; 00591 if (active.nbits >= MAXBITS) append_active(); 00592 } 00593 if (n >= MAXBITS) { 00594 word_t cnt = n / MAXBITS; 00595 if (cnt > 1) 00596 append_counter(val, cnt); 00597 else if (val) { 00598 active.val = ALLONES; 00599 append_active(); 00600 } 00601 else { 00602 active.val = 0; 00603 append_active(); 00604 } 00605 n -= cnt * MAXBITS; 00606 } 00607 if (n > 0) { 00608 active.nbits = n; 00609 active.val = val*((static_cast<word_t>(1)<<n)-1); 00610 } 00611 } // ibis::bitvector64::appendFill(val, n) 00612 00613 // append nw words starting from 'it' to the current bit vector -- assume 00614 // active is empty 00615 inline void ibis::bitvector64::copy_runs(run& it, word_t& nw) { 00616 #if defined(DEBUG) && DEBUG + 0 > 1 00617 LOGGER(ibis::gVerbose >= 0) 00618 << "bitvector64::copy_runs(0x" 00619 << std::hex << std::setw(8) << std::setfill('0') 00620 << *(it.it) << ", " << std::dec << nw 00621 << ") ... it.nWords = " << it.nWords; 00622 #endif 00623 // deal with the first word -- need to attach it to the last word in m_vec 00624 if (it.isFill) { 00625 append_counter(it.fillBit, it.nWords); 00626 nw -= it.nWords; 00627 } 00628 else { 00629 active.val = *(it.it); 00630 append_active(); 00631 -- nw; 00632 } 00633 ++ it.it; // advance to the next word 00634 //if (nw == 0) {it.nWords = 0; return;} 00635 it.decode(); 00636 nset = 0; 00637 nbits += MAXBITS * nw; 00638 while (nw >= it.nWords && nw > 0) { // only need to copy the word 00639 m_vec.push_back(*(it.it)); 00640 nw -= it.nWords; 00641 ++ it.it; // advance to the next word 00642 it.decode(); 00643 } 00644 nbits -= MAXBITS * nw; 00645 } // ibis::bitvector64::copy_runs 00646 00647 // append nw words starting from it to the current bit vector -- assume 00648 // active is empty 00649 inline void ibis::bitvector64::copy_runsn(run& it, word_t& nw) { 00650 #if defined(DEBUG) && DEBUG + 0 > 1 00651 LOGGER(ibis::gVerbose >= 0) 00652 << "bitvector64::copy_runsn(0x" 00653 << std::hex << std::setw(8) << std::setfill('0') 00654 << *(it.it) << ", " << std::dec << nw 00655 << ") ... it.nWords = " << it.nWords; 00656 #endif 00657 // deal with the first word -- need to attach it to the last word in m_vec 00658 if (it.isFill) { 00659 append_counter(!it.fillBit, it.nWords); 00660 nw -= it.nWords; 00661 } 00662 else { 00663 active.val = ALLONES ^ *(it.it); 00664 append_active(); 00665 -- nw; 00666 } 00667 ++ it.it; // advance to the next word 00668 //if (nw == 0) {it.nWords = 0; return;} 00669 it.decode(); 00670 nset = 0; 00671 nbits += MAXBITS * nw; 00672 while (nw >= it.nWords && nw > 0) { // only need to copy the word 00673 m_vec.push_back((it.isFill?FILLBIT:ALLONES) ^ *(it.it)); 00674 nw -= it.nWords; 00675 ++ it.it; // advance to the next word 00676 it.decode(); 00677 } 00678 nbits -= MAXBITS * nw; 00679 } // ibis::bitvector64::copy_runsn 00680 00681 // copy the fill in "run it" as literal words 00682 inline void ibis::bitvector64::copy_fill 00683 (array_t<ibis::bitvector64::word_t>::iterator& jt, run& it) { 00684 const array_t<word_t>::iterator iend = jt + it.nWords; 00685 if (it.fillBit == 0) { 00686 switch (it.nWords) { 00687 case 0: break; 00688 case 1: 00689 *jt = 0; ++jt; break; 00690 case 2: 00691 *jt = 0; ++jt; *jt = 0; ++jt; break; 00692 case 3: 00693 *jt = 0; ++jt; *jt = 0; ++jt; *jt = 0; ++jt; break; 00694 default: 00695 while (jt < iend) { 00696 *jt = 0; 00697 ++ jt; 00698 } 00699 break; 00700 } 00701 } 00702 else { 00703 switch (it.nWords) { 00704 case 0: break; 00705 case 1: 00706 *jt = ALLONES; ++jt; break; 00707 case 2: 00708 *jt = ALLONES; ++jt; *jt = ALLONES; ++jt; break; 00709 case 3: 00710 *jt = ALLONES; ++jt; *jt = ALLONES; ++jt; *jt = ALLONES; ++jt; 00711 break; 00712 default: 00713 while (jt < iend) { 00714 *jt = ALLONES; 00715 ++ jt; 00716 } 00717 break; 00718 } 00719 } 00720 it.nWords = 0; 00721 ++ it.it; 00722 } // ibis::bitvector64::copy_fill 00723 00724 // copy the next nw words (nw X MAXBITS bits) starting with run it 00725 // to an array_t as uncompressed words 00726 inline void ibis::bitvector64::copy_runs 00727 (array_t<ibis::bitvector64::word_t>::iterator& jt, run& it, word_t& nw) { 00728 while (nw >= it.nWords && nw > 0) { // only need to copy the word 00729 if (it.isFill) { 00730 const array_t<word_t>::iterator iend = jt + it.nWords; 00731 if (it.fillBit == 0) { 00732 while (jt < iend) { 00733 *jt = 0; 00734 ++ jt; 00735 } 00736 } 00737 else { 00738 while (jt < iend) { 00739 *jt = ALLONES; 00740 ++ jt; 00741 } 00742 } 00743 nw -= it.nWords; 00744 } 00745 else { 00746 *jt = *(it.it); 00747 ++ jt; 00748 -- nw; 00749 } 00750 ++ it.it; // advance to the next word 00751 it.decode(); 00752 } 00753 } // ibis::bitvector64::copy_runs 00754 00755 // copy the complements of the next nw words (nw X MAXBITS bits) 00756 // starting with "run it" as uncompressed words 00757 inline void ibis::bitvector64::copy_runsn 00758 (array_t<ibis::bitvector64::word_t>::iterator& jt, run& it, word_t& nw) { 00759 while (nw >= it.nWords && nw > 0) { // only need to copy the word 00760 if (it.isFill) { 00761 const array_t<word_t>::iterator iend = jt + it.nWords; 00762 if (it.fillBit == 0) { 00763 while (jt < iend) { 00764 *jt = ALLONES; 00765 ++ jt; 00766 } 00767 } 00768 else { 00769 while (jt < iend) { 00770 *jt = 0; 00771 ++ jt; 00772 } 00773 } 00774 nw -= it.nWords; 00775 } 00776 else { 00777 *jt = *(it.it) ^ ALLONES; 00778 ++ jt; 00779 -- nw; 00780 } 00781 ++ it.it; // advance to the next word 00782 it.decode(); 00783 } 00784 } // ibis::bitvector64::copy_runsn 00785 00786 inline ibis::bitvector64::iterator ibis::bitvector64::begin() { 00787 iterator it; 00788 it.it = m_vec.begin(); 00789 it.vec = &m_vec; 00790 it.bitv = this; 00791 it.active = &active; 00792 it.decodeWord(); 00793 return it; 00794 } // ibis::bitvector64::begin() 00795 00796 inline ibis::bitvector64::iterator ibis::bitvector64::end() { 00797 iterator it; 00798 it.ind = 0; 00799 it.compressed = 0; 00800 it.nbits = 0; 00801 it.literalvalue = 0; 00802 it.fillbit = 0; 00803 it.it = m_vec.end() + 1; 00804 it.vec = &m_vec; 00805 it.bitv = this; 00806 it.active = &active; 00807 return it; 00808 } // ibis::bitvector64::end() 00809 00810 inline ibis::bitvector64::const_iterator ibis::bitvector64::begin() const { 00811 const_iterator it; 00812 it.it = m_vec.begin(); 00813 it.begin = m_vec.begin(); 00814 it.end = m_vec.end(); 00815 it.active = &active; 00816 it.decodeWord(); 00817 return it; 00818 } // ibis::bitvector64::begin() 00819 00820 // dereference -- no error checking 00821 inline bool ibis::bitvector64::iterator::operator*() const { 00822 #if defined(DEBUG) && DEBUG + 0 > 1 00823 if (vec==0 || it<vec->begin() || it>vec->end()) 00824 throw "bitvector64::const_iterator not initialized correctly."; 00825 #endif 00826 if (compressed != 0) 00827 return (fillbit != 0); 00828 else 00829 return ((word_t)1 & (literalvalue >> (bitvector64::SECONDBIT - ind))); 00830 } 00831 00832 // comparison only based on the iterator 00833 inline int ibis::bitvector64::iterator::operator!= 00834 (const ibis::bitvector64::const_iterator& rhs) const throw () { 00835 return (it != rhs.it); 00836 } 00837 inline int ibis::bitvector64::iterator::operator== 00838 (const ibis::bitvector64::const_iterator& rhs) const throw () { 00839 return (it == rhs.it); 00840 } 00841 inline int ibis::bitvector64::iterator::operator!= 00842 (const ibis::bitvector64::iterator& rhs) const throw () { 00843 return (it != rhs.it); 00844 } 00845 inline int ibis::bitvector64::iterator::operator== 00846 (const ibis::bitvector64::iterator& rhs) const throw () { 00847 return (it == rhs.it); 00848 } 00849 00850 // increment by one 00851 inline ibis::bitvector64::iterator& ibis::bitvector64::iterator::operator++() { 00852 #if defined(DEBUG) && DEBUG + 0 > 1 00853 if (vec==0 || it<vec->begin() || it>vec->end()) 00854 throw "bitvector64::iterator not formed correctly."; 00855 #endif 00856 if (ind+1<nbits) {++ind;} 00857 else {++ it; decodeWord();} 00858 return *this; 00859 } 00860 00861 // decrement by one 00862 inline ibis::bitvector64::iterator& ibis::bitvector64::iterator::operator--() { 00863 #if defined(DEBUG) && DEBUG + 0 > 1 00864 if (vec==0 || it<vec->begin() || it>vec->end()+1) 00865 throw "bitvector64::iterator not formed correctly."; 00866 #endif 00867 if (ind) -- ind; 00868 else { 00869 if (it <= vec->end()) -- it; 00870 else if (active->nbits) it = vec->end(); 00871 else it = vec->end() - 1; 00872 decodeWord(); 00873 if (nbits) ind = nbits - 1; 00874 } 00875 return *this; 00876 } 00877 00878 inline ibis::bitvector64::const_iterator ibis::bitvector64::end() const { 00879 const_iterator it; 00880 it.ind = 0; 00881 it.compressed = 0; 00882 it.nbits = 0; 00883 it.literalvalue = 0; 00884 it.fillbit = 0; 00885 it.it = m_vec.end() + 1; 00886 it.begin = m_vec.begin(); 00887 it.end = m_vec.end(); 00888 it.active = &active; 00889 return it; 00890 } // ibis::bitvector64::end() 00891 00892 // dereference -- no error checking 00893 inline bool ibis::bitvector64::const_iterator::operator*() const { 00894 #if defined(DEBUG) && DEBUG + 0 > 1 00895 if (it==0 || end==0 || it>end || nbits<=ind) 00896 throw "bitvector64::const_iterator not initialized correctly."; 00897 #endif 00898 if (compressed != 0) 00899 return (fillbit != 0); 00900 else 00901 return ((word_t)1 & (literalvalue >> (bitvector64::SECONDBIT - ind))); 00902 } 00903 00904 // comparison only based on the iterator 00905 inline int ibis::bitvector64::const_iterator::operator!= 00906 (const ibis::bitvector64::const_iterator& rhs) const throw (){ 00907 return (it != rhs.it); 00908 } 00909 inline int ibis::bitvector64::const_iterator::operator== 00910 (const ibis::bitvector64::const_iterator& rhs) const throw () { 00911 return (it == rhs.it); 00912 } 00913 00914 // increment by one 00915 inline ibis::bitvector64::const_iterator& 00916 ibis::bitvector64::const_iterator::operator++() { 00917 #if defined(DEBUG) && DEBUG + 0 > 1 00918 if (it==0 || end==0 || it>end) 00919 throw "bitvector64::const_iterator not formed correctly."; 00920 #endif 00921 if (ind+1<nbits) {++ind;} 00922 else {++ it; decodeWord();} 00923 return *this; 00924 } 00925 00926 // decrement by one 00927 inline ibis::bitvector64::const_iterator& 00928 ibis::bitvector64::const_iterator::operator--() { 00929 #if defined(DEBUG) && DEBUG + 0 > 1 00930 if (it==0 || end==0 || it>end) 00931 throw "bitvector64::const_iterator not formed correctly."; 00932 #endif 00933 if (ind) -- ind; 00934 else { 00935 if (it <= end) -- it; 00936 else if (active->nbits) it = end; 00937 else it = end - 1; 00938 decodeWord(); 00939 if (nbits) ind = nbits - 1; 00940 } 00941 return *this; 00942 } 00943 00944 inline ibis::bitvector64::indexSet ibis::bitvector64::firstIndexSet() const { 00945 indexSet is; 00946 if (m_vec.end() > m_vec.begin()) { 00947 is.it = m_vec.begin() - 1; 00948 is.end = m_vec.end(); 00949 } 00950 else { 00951 is.it = 0; 00952 is.end = 0; 00953 } 00954 is.active = &active; 00955 is.ind[0] = static_cast<word_t>(-1); 00956 is.nind = 0; 00957 ++is; 00958 return is; 00959 } // end ibis::bitvector64::firstIndexSet; 00960 00961 inline double ibis::bitvector64::randomSize(word_t nb, word_t nc) { 00962 double sz = 0.0; 00963 if (nb > 0 && nb >= nc) { 00964 const double den = static_cast<double>(nc) / 00965 static_cast<double>(nb); 00966 const word_t nw = (nb > SECONDBIT ? nb / SECONDBIT - 1 : 0); 00967 sz = 3.0 + nw - nw * 00968 (pow(1.0-den, static_cast<int>(2*SECONDBIT)) + 00969 pow(den, static_cast<int>(2*SECONDBIT))); 00970 } 00971 return sz*sizeof(word_t); 00972 } // end ibis::bitvector64::randomSize 00973 00974 inline double 00975 ibis::bitvector64::markovSize(word_t nb, word_t nc, double f) { 00976 double sz = 0.0; 00977 if (nb > 0 && nb >= nc) { 00978 const double den = static_cast<double>(nc) / 00979 static_cast<double>(nb); 00980 const word_t nw = (nb > SECONDBIT ? nb / SECONDBIT - 1 : 0); 00981 if ((den <= 0.5 && f > 1.0) || (den > 0.5 && (1.0-den)*f > den)) 00982 sz = ((1.0-den) * pow(1.0-den/((1.0-den)*f), 00983 static_cast<int>(2*MAXBITS-3)) + 00984 den * pow(1.0-1.0/f, static_cast<int>(2*MAXBITS-3))); 00985 else 00986 sz = (pow(1.0-den, static_cast<int>(2*SECONDBIT)) + 00987 pow(den, static_cast<int>(2*SECONDBIT))); 00988 sz = 3.0 + nw * (1.0 - sz); 00989 } 00990 return sz*sizeof(word_t); 00991 } // end ibis::bitvector64::markovSize 00992 00993 std::ostream& operator<<(std::ostream&, const ibis::bitvector64&); 00994 #endif // __BITVECTOR64_H
![]() |