bitvector.h
Go to the documentation of this file.
00001 // $Id$
00002 //      Author: John Wu <John.Wu at ACM.org>
00003 //              Lawrence Berkeley National Laboratory
00004 //      Copyright 2000-2011 the Regents of the University of California
00005 #ifndef BITVECTOR_H
00006 #define BITVECTOR_H
00007 
00008 
00009 
00010 #include "array_t.h"    // alternative to std::vector
00011 
00012 #include <iomanip>      // setw
00013 
00014 #if defined(_MSC_VER) && defined(_WIN32)
00015 //disable warnings on extern before template instantiation
00016 #pragma warning (disable : 4231)
00017 #endif
00018 #if defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__)) && defined(CXX_USE_DLL) && !defined(DLL_EXPORT)
00019 extern template class FASTBIT_CXX_DLLSPEC ibis::array_t<uint32_t>;
00020 #endif
00021 
00022 
00064 class FASTBIT_CXX_DLLSPEC ibis::bitvector {
00065 public:
00066     typedef uint32_t word_t;
00067 
00069     ~bitvector() {clear();};
00070     // constructors of bitvector class
00071     bitvector();
00072     bitvector(const bitvector& bv);
00073     explicit bitvector(const array_t<word_t>& arr);
00074     explicit bitvector(const char* file);
00075 
00076     inline const bitvector& operator=(const bitvector& bv);
00077     inline bitvector& copy(const bitvector& bv);
00078     inline bitvector& swap(bitvector& bv);
00079 
00080     // use bv to replace part of the existing value, match the ith bit with
00081     // the first of bv, return reference to self
00082     //bitvector& copy(const word_t i, const bitvector& bv);
00083     void setBit(const word_t i, int val);
00084     inline void turnOnRawBit(const word_t i);
00086     void erase(word_t i, word_t j);
00087 
00088     void set(int val, word_t n);
00089     void clear();
00090 
00091     bitvector& operator+=(const bitvector& bv); 
00092     inline bitvector& operator+=(int b);        
00093     void appendWord(word_t w);          
00094     inline void appendFill(int val, word_t n);
00095 
00097     int operator==(const bitvector& rhs) const;
00098 
00100     void flip();
00102     void operator&=(const bitvector& rhs);
00105     bitvector* operator&(const bitvector&) const;
00107     void operator|=(const bitvector& rhs);
00109     bitvector* operator|(const bitvector&) const;
00111     void operator^=(const bitvector& rhs);
00113     bitvector* operator^(const bitvector&) const;
00115     void operator-=(const bitvector& rhs);
00118     bitvector* operator-(const bitvector&) const;
00119 
00120     void subset(const bitvector& mask, bitvector& res) const;
00121     word_t count(const bitvector& mask) const;
00122 
00123     // I/O functions.
00126     void read(const char *fn);
00128     void write(const char *fn) const;
00130     void write(int fdes) const;
00134     void write(array_t<word_t>& arr) const;
00135 
00136     void compress();    
00137     void decompress();  
00138     word_t compressible() const;
00139 
00140     inline word_t size() const throw();
00141     inline void sloppySize(word_t n) const;
00142     inline word_t cnt() const;
00143     inline word_t numFillWords() const;
00145     uint32_t bytes() const throw() {
00146         return (m_vec.size()*sizeof(word_t) + sizeof(bitvector));
00147     };
00151     uint32_t getSerialSize() const throw() {
00152         return (m_vec.size() + 1 + (active.nbits>0)) * sizeof(word_t);
00153     };
00155     static word_t bitsPerLiteral() {return MAXBITS;}
00156     inline static double randomSize(word_t nb, word_t nc);
00157     inline static double markovSize(word_t nb, word_t nc, double f);
00158     static double clusteringFactor(word_t nb, word_t nc, word_t sz);
00159 
00160     void adjustSize(word_t nv, word_t nt);
00161     void reserve(unsigned nb, unsigned nc, double cf=0.0);
00162 
00165     bool empty() const {return all0s() && active.val == 0;}
00166 
00167     std::ostream& print(std::ostream &) const; 
00168 
00170     class iterator;
00171     inline iterator end();
00172     inline iterator begin();
00173 
00175     class const_iterator;
00176     inline const_iterator end() const;
00177     inline const_iterator begin() const;
00178 
00180     class indexSet;
00181     inline indexSet firstIndexSet() const;
00182 
00183     // give accesses to some friends
00184     friend class indexSet;
00185     friend class iterator;
00186     friend class const_iterator;
00187 
00188 protected:
00189     bool isCompressed() const {return (m_vec.size()*MAXBITS < nbits);}
00190     inline bool all0s() const;
00191     inline bool all1s() const;
00192 
00193 private:
00194     // static members, i.e., constants to be used internally
00195     static const unsigned MAXBITS;
00196     static const unsigned SECONDBIT;
00197     static const word_t FILLBIT;
00198     static const word_t HEADER0;
00199     static const word_t HEADER1;
00200     static const word_t ALLONES;
00201     static const word_t MAXCNT;
00202 
00209     struct active_word {
00210         word_t val;     // the value
00211         word_t nbits;   // total number of bits
00212 
00213         active_word() : val(0), nbits(0) {};
00214         void reset() {val = 0; nbits = 0;};
00215         int is_full() const {return (nbits >= MAXBITS);};
00216         void append(int b) { // append a single bit
00217             val <<= 1; nbits ++; val += b;
00218         };
00219     }; // struct active_word
00220 
00223     struct run {
00224         int isFill;
00225         int fillBit;
00226         word_t nWords;
00227         array_t<word_t>::const_iterator it;
00228         run() : isFill(0), fillBit(0), nWords(0), it(0) {};
00229         void decode() { 
00230             fillBit = (*it > HEADER1);
00231             if (*it > ALLONES) {
00232                 nWords = (*it & MAXCNT);
00233                 isFill = 1;
00234             }
00235             else {
00236                 nWords = 1;
00237                 isFill = 0;
00238             }
00239         };
00242         void operator--() {
00243             if (nWords > 1) {--nWords;}
00244             else {++ it; nWords = 0;}
00245         };
00248         void operator-=(word_t len) {
00249             while (len > 0) {
00250                 if (nWords == 0) decode();
00251                 if (isFill != 0) {
00252                     if (nWords > len) {nWords -= len; len = 0;}
00253                     else if (nWords == len) {nWords = 0; len = 0; ++ it;}
00254                     else {len -= nWords; ++ it; nWords = 0;}
00255                 }
00256                 else {-- len; ++ it; nWords = 0;}
00257             }
00258         };
00259     };
00260     friend struct run;
00261     friend struct active_word;
00262 
00263     // member variables of bitvector class
00264     mutable word_t nbits;       
00265     mutable word_t nset;        
00266     active_word active;         
00267     array_t<word_t> m_vec;      
00268 
00269     // private functions of bitvector class
00270     word_t count_c1(const bitvector& mask) const;
00271     word_t count_c2(const bitvector& mask) const;
00272     // The following three functions all performs or operation, _c2 and _c1
00273     // generate compressed solutions, but _c0, _d1, and _d2 generate
00274     // uncompressed solutions.
00275     // or_c2 assumes there are compressed word in both operands
00276     // or_c1 *this may contain compressed word, but not rhs
00277     // or_c0 assumes both operands are not compressed
00278     // or_d1 *this contains no compressed word and is overwritten with the
00279     //       result
00280     // or_d2 both *this and rhs are compressed, but res is not compressed
00281     void or_c2(const bitvector& rhs, bitvector& res) const;
00282     void or_c1(const bitvector& rhs, bitvector& res) const;
00283     void or_c0(const bitvector& rhs);
00284     void or_d1(const bitvector& rhs);
00285     void or_d2(const bitvector& rhs, bitvector& res) const;
00286     void and_c2(const bitvector& rhs, bitvector& res) const;
00287     void and_c1(const bitvector& rhs, bitvector& res) const;
00288     void and_c0(const bitvector& rhs);
00289     void and_d1(const bitvector& rhs);
00290     void and_d2(const bitvector& rhs, bitvector& res) const;
00291     void xor_c2(const bitvector& rhs, bitvector& res) const;
00292     void xor_c1(const bitvector& rhs, bitvector& res) const;
00293     void xor_c0(const bitvector& rhs);
00294     void xor_d1(const bitvector& rhs);
00295     void xor_d2(const bitvector& rhs, bitvector& res) const;
00296     void minus_c2(const bitvector& rhs, bitvector& res) const;
00297     void minus_c1(const bitvector& rhs, bitvector& res) const;
00298     void minus_c1x(const bitvector& rhs, bitvector& res) const;
00299     void minus_c0(const bitvector& rhs);
00300     void minus_d1(const bitvector& rhs);
00301     void minus_d2(const bitvector& rhs, bitvector& res) const;
00302     inline void copy_runs(run& it, word_t& nw); // copy nw words
00303     inline void copy_runsn(run& it, word_t& nw); // copy nw words and negate
00304     inline void copy_fill(array_t<word_t>::iterator& jt, run& it);
00305     inline void copy_runs(array_t<word_t>::iterator& jt, run& it,
00306                           word_t& nw);
00307     inline void copy_runsn(array_t<word_t>::iterator& jt, run& it,
00308                            word_t& nw);
00309     void decompress(array_t<word_t>& tmp) const;
00310     void copy_comp(array_t<word_t>& tmp) const;
00311     inline void append_active();
00312     inline void append_counter(int val, word_t cnt);
00313     inline word_t cnt_ones(word_t) const; // number of ones in a word
00314     inline word_t cnt_bits(word_t) const; // number of bits in a word
00317     word_t do_cnt() const throw ();
00318 }; // end class bitvector
00319 
00329 class FASTBIT_CXX_DLLSPEC ibis::bitvector::indexSet {
00330 public:
00331     // let the compiler define most of the canonical functions
00332 
00333     // allow bitvector::firstIndexSet() to access private member variables
00334     friend indexSet ibis::bitvector::firstIndexSet() const;
00335 
00336     //int operator!=(const indexSet& rhs) const {return (it != rhs.it);};
00337     bool isRange() const {return (nind>=ibis::bitvector::MAXBITS);}
00338     const word_t* indices() const {return ind;};
00339     word_t nIndices() const {return nind;}
00340     const word_t& currentWord() const {return *it;}
00341     indexSet& operator++();
00342 
00343 private:
00344     array_t<word_t>::const_iterator it;
00345     array_t<word_t>::const_iterator end;
00346     const active_word* active; // points back to the active word
00347     word_t nind; // number of indices
00348     word_t ind[32];
00349 }; // class ibis::bitvector::indexSet
00350 
00352 class ibis::bitvector::const_iterator {
00353 public:
00354     const_iterator() : compressed(0), ind(0), nbits(0), literalvalue(0),
00355                        fillbit(0), active(0) {}
00356 
00357     const_iterator(const const_iterator& r)
00358         : compressed(r.compressed), ind(r.ind), nbits(r.nbits),
00359           literalvalue(r.literalvalue), fillbit(r.fillbit), active(r.active),
00360           end(r.end), begin(r.begin), it(r.it) {};
00361     const_iterator& operator=(const const_iterator& r) {
00362         compressed = r.compressed; ind = r.ind; nbits = r.nbits;
00363         literalvalue = r.literalvalue; fillbit = r.fillbit; active = r.active;
00364         end = r.end; begin = r.begin; it = r.it;
00365         return *this;}
00366 
00367     inline bool operator*() const;
00368     inline int operator!=(const const_iterator& rhs) const throw ();
00369     inline int operator==(const const_iterator& rhs) const throw ();
00370 
00371     inline const_iterator& operator++();
00372     inline const_iterator& operator--();
00373     const_iterator& operator+=(int incr);
00374 
00375 private:
00376     ibis::bitvector::word_t     compressed;
00377     ibis::bitvector::word_t     ind;
00378     ibis::bitvector::word_t     nbits;
00379     ibis::bitvector::word_t     literalvalue;
00380     int                         fillbit;
00381     const active_word*          active;
00382     array_t<word_t>::const_iterator end;
00383     array_t<word_t>::const_iterator begin;
00384     array_t<word_t>::const_iterator it;
00385 
00386     void decodeWord();
00387 
00388     // give three functions of bitvector access to private variables
00389     friend void ibis::bitvector::erase(word_t i, word_t j);
00390     friend const_iterator ibis::bitvector::begin() const;
00391     friend const_iterator ibis::bitvector::end() const;
00392     friend class ibis::bitvector::iterator;
00393 }; // end class ibis::bitvector::const_iterator
00394 
00402 class ibis::bitvector::iterator {
00403 public:
00404     iterator() : compressed(0), ind(0), nbits(0), literalvalue(0), fillbit(0),
00405                  bitv(0), active(0), vec(0) {}
00406     iterator(const iterator& r)
00407         : compressed(r.compressed), ind(r.ind), nbits(r.nbits),
00408           literalvalue(r.literalvalue), fillbit(r.fillbit), bitv(r.bitv),
00409           active(r.active), vec(r.vec), it(r.it) {};
00410     const iterator& operator=(const iterator& r) {
00411         compressed = r.compressed; ind = r.ind; nbits = r.nbits;
00412         literalvalue = r.literalvalue; fillbit = r.fillbit; bitv = r.bitv;
00413         active = r.active; vec = r.vec; it = r.it; return *this;}
00414 
00415     inline bool operator*() const; // still can not modify this
00416     inline int operator!=(const const_iterator& rhs) const throw ();
00417     inline int operator==(const const_iterator& rhs) const throw ();
00418     inline int operator!=(const iterator& rhs) const throw ();
00419     inline int operator==(const iterator& rhs) const throw ();
00420 
00421     inline iterator& operator++();
00422     inline iterator& operator--();
00423     iterator& operator+=(int incr);
00424     const iterator& operator=(int val);
00425 
00426 private:
00427     ibis::bitvector::word_t     compressed;
00428     ibis::bitvector::word_t     ind;
00429     ibis::bitvector::word_t     nbits;
00430     ibis::bitvector::word_t     literalvalue;
00431     int                         fillbit;
00432     ibis::bitvector*            bitv;
00433     active_word*                active;
00434     array_t<word_t>*            vec;
00435     array_t<word_t>::iterator   it;
00436 
00437     void decodeWord();
00438 
00439     friend iterator ibis::bitvector::begin();
00440     friend iterator ibis::bitvector::end();
00441 }; // end class ibis::bitvector::iterator
00442 
00449 inline void ibis::bitvector::sloppySize(word_t n) const {
00450     nbits = n-active.nbits;
00451 #if defined(WAH_CHECK_SIZE)
00452     word_t nb = do_cnt();
00453     if (nb != nbits) {
00454         const_cast<ibis::bitvector*>(this)->adjustSize(0, nbits);
00455         LOGGER(ibis::gVerbose >= 0)
00456             << "bitvector::sloppySize -- adjust the number of bits to "
00457             << n;
00458     }
00459 #endif
00460 } // ibis::bitvector::sloppySize
00461 
00463 inline bool ibis::bitvector::all0s() const {
00464     if (m_vec.empty()) {
00465         return true;
00466     }
00467     else if (m_vec.size() == 1) {
00468         return (m_vec[0] == 0 || (m_vec[0]>=HEADER0 && m_vec[0]<HEADER1));
00469     }
00470     else {
00471         return false;
00472     }
00473 } // ibis::bitvector::all0s
00474 
00476 inline bool ibis::bitvector::all1s() const {
00477     if (m_vec.size() == 1) {
00478         return (m_vec[0] == ALLONES || (m_vec[0] > HEADER1));
00479     }
00480     else {
00481         return false;
00482     }
00483 } // ibis::bitvector::all1s
00484 
00486 inline ibis::bitvector::word_t
00487 ibis::bitvector::cnt_bits(ibis::bitvector::word_t val) const {
00488     return ((val>ALLONES) ? ((val&MAXCNT)*MAXBITS) : MAXBITS);
00489 } // ibis::bitvector::cnt_bits
00490 
00492 inline ibis::bitvector::word_t
00493 ibis::bitvector::cnt_ones(ibis::bitvector::word_t val) const {
00494     // number of 1 bits in a value between 0 and 255
00495     static const word_t table[256] = {
00496         0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
00497         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00498         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00499         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00500         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00501         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00502         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00503         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00504         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00505         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00506         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00507         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00508         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00509         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00510         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00511         4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
00512     return table[val&0xFFU] + table[(val>>8)&0xFFU] +
00513         table[(val>>16)&0xFFU] + table[(val>>24)&0xFFU];
00514 } // ibis::bitvector::cnt_ones
00515 
00517 inline ibis::bitvector::word_t ibis::bitvector::size() const throw() {
00518     return ((nbits?nbits:(nbits=do_cnt()))+active.nbits);
00519 } // ibis::bitvector::size
00520 
00522 inline ibis::bitvector::word_t ibis::bitvector::cnt() const {
00523     if (nset==0 && !m_vec.empty())
00524         nbits = do_cnt();
00525     return (nset+cnt_ones(active.val));
00526 } // ibis::bitvector::cnt
00527 
00529 inline ibis::bitvector::word_t ibis::bitvector::numFillWords() const {
00530     word_t cnt=0;
00531     array_t<ibis::bitvector::word_t>::const_iterator it = m_vec.begin();
00532     while (it != m_vec.end()) {
00533         cnt += (*it >> ibis::bitvector::MAXBITS);
00534         it++;
00535     }
00536     return cnt;
00537 } // inline word_t ibis::bitvector::numFillWords() const {
00538 
00542 inline const ibis::bitvector&
00543 ibis::bitvector::operator=(const ibis::bitvector& bv) {
00544     nbits = bv.nbits; nset = bv.nset; active = bv.active;
00545     m_vec.deepCopy(bv.m_vec);
00546     return *this;
00547 }
00548 
00550 inline ibis::bitvector& ibis::bitvector::copy(const ibis::bitvector& bv) {
00551     nbits = bv.nbits; nset = bv.nset; active = bv.active;
00552     m_vec.deepCopy(bv.m_vec);
00553     return *this;
00554 }
00555 
00556 inline ibis::bitvector& ibis::bitvector::swap(bitvector& bv) {
00557     word_t tmp;
00558     tmp = bv.nbits; bv.nbits = nbits; nbits = tmp;
00559     tmp = bv.nset; bv.nset = nset; nset = tmp;
00560     active_word atmp = bv.active;
00561     bv.active = active; active = atmp;
00562     m_vec.swap(bv.m_vec);
00563     return *this;
00564 }
00565 
00569 inline void ibis::bitvector::append_active() {
00570 //      std::cout << "before: active.val = " << std::hex << active.val;
00571 //      if (m_vec.size())
00572 //      std::cout << ", m_vec.back() = " << m_vec.back();
00573 //      std::cout << std::dec << std::endl;
00574     if (m_vec.empty()) {
00575         m_vec.push_back(active.val);
00576     }
00577     else if (active.val == 0) {// incoming word is zero
00578         if (m_vec.back() == 0) {
00579             m_vec.back() = (HEADER0 + 2);
00580         }
00581         else if (m_vec.back() >= HEADER0 && m_vec.back() < HEADER1) {
00582             ++ m_vec.back();
00583         }
00584         else {
00585             m_vec.push_back(active.val);
00586         }
00587     }
00588     else if (active.val == ALLONES) {// incoming word is allones
00589         if (m_vec.back() == ALLONES) {
00590             m_vec.back() = (HEADER1 | 2);
00591         }
00592         else if (m_vec.back() >= HEADER1) {
00593             ++m_vec.back();
00594         }
00595         else {
00596             m_vec.push_back(active.val);
00597         }
00598     }
00599     else { // incoming word contains a mixture of bits
00600         m_vec.push_back(active.val);
00601     }
00602     nbits += MAXBITS;
00603     active.reset();
00604     nset = 0;
00605 //      std::cout << "after: m_vec.back() = " << std::hex << m_vec.back()
00606 //            << std::dec << std::endl;
00607 } // ibis::bitvector::append_active
00608 
00612 inline void ibis::bitvector::append_counter(int val, word_t cnt) {
00613     word_t head = 2 + val;
00614     word_t w = (head << SECONDBIT) + cnt;
00615     nbits += cnt*MAXBITS;
00616     if (m_vec.empty()) {
00617         m_vec.push_back(w);
00618     }
00619     else if ((m_vec.back()>>SECONDBIT) == head) {
00620         m_vec.back() += cnt;
00621     }
00622     else if ((m_vec.back()==ALLONES) && head==3) {
00623         m_vec.back() = w + 1;
00624     }
00625     else if ((m_vec.back() == 0) && head==2) {
00626         m_vec.back() = w + 1;
00627     }
00628     else {
00629         m_vec.push_back(w);
00630     }
00631 } // ibis::bitvector::append_counter
00632 
00633 // append a single bit
00634 inline ibis::bitvector& ibis::bitvector::operator+=(int b) {
00635     active.append(b);
00636     if (active.is_full())
00637         append_active();
00638     return *this;
00639 } // ibis::bitvector::operator+=
00640 
00645 inline void ibis::bitvector::appendFill(int val, word_t n) {
00646     if (n == 0) return;
00647     if (active.nbits > 0) {
00648         word_t tmp = (MAXBITS - active.nbits);
00649         if (tmp > n) tmp = n;
00650         active.nbits += tmp;
00651         active.val <<= tmp;
00652         n -= tmp;
00653         if (val != 0)
00654             active.val |= (1U<<tmp) - 1;
00655         if (active.nbits >= MAXBITS)
00656             append_active();
00657     }
00658     if (n >= MAXBITS) {
00659         word_t cnt = n / MAXBITS;
00660         if (cnt > 1)
00661             append_counter(val, cnt);
00662         else if (val != 0) {
00663             active.val = ALLONES;
00664             append_active();
00665         }
00666         else {
00667             active.val = 0;
00668             append_active();
00669         }
00670         n -= cnt * MAXBITS;
00671     }
00672     if (n > 0) {
00673         active.nbits = n;
00674         active.val = val*((1U<<n)-1);
00675     }
00676 } // ibis::bitvector::appendFill
00677 
00683 inline void ibis::bitvector::copy_runs(run& it, word_t& nw) {
00684 #if defined(DEBUG) && DEBUG + 0 > 1
00685         LOGGER(ibis::gVerbose >= 0)
00686             << "bitvector::copy_runs(0x"
00687             << std::hex << std::setw(8) << std::setfill('0')
00688             << *(it.it) << ", " << std::dec << nw
00689             << ") ... it.nWords = " << it.nWords;
00690 #endif
00691     // deal with the first word -- attach it to the last word in m_vec
00692     if (it.isFill != 0) {
00693         append_counter(it.fillBit, it.nWords);
00694         nw -= it.nWords;
00695     }
00696     else {
00697         active.val = *(it.it);
00698         append_active();
00699         -- nw;
00700     }
00701     ++ it.it;
00702     it.nWords = 0;
00703     nset = 0;
00704     nbits += MAXBITS * nw;
00705 
00706     while (nw > 0) { // copy the words
00707         it.decode();
00708         if (nw >= it.nWords) {
00709             m_vec.push_back(*(it.it));
00710             nw -= it.nWords;
00711             it.nWords = 0;
00712             ++ it.it;
00713         }
00714         else {
00715             break;
00716         }
00717     }
00718     nbits -= MAXBITS * nw;
00719 } // ibis::bitvector::copy_runs
00720 
00723 inline void ibis::bitvector::copy_runsn(run& it, word_t& nw) {
00724 #if defined(DEBUG) && DEBUG + 0 > 1
00725         LOGGER(ibis::gVerbose >= 0)
00726             << "bitvector::copy_runsn(0x"
00727             << std::hex << std::setw(8) << std::setfill('0')
00728             << *(it.it) << ", " << std::dec << nw
00729             << ") ... it.nWords = " << it.nWords;
00730 #endif
00731     // deal with the first word -- need to attach it to the last word in m_vec
00732     if (it.isFill != 0) {
00733         append_counter(!it.fillBit, it.nWords);
00734         nw -= it.nWords;
00735     }
00736     else {
00737         active.val = ALLONES ^ *(it.it);
00738         append_active();
00739         -- nw;
00740     }
00741     ++ it.it; // advance to the next word
00742     it.nWords = 0;
00743     nset = 0;
00744     nbits += MAXBITS * nw;
00745 
00746     while (nw > 0) { // copy the words
00747         it.decode();
00748         if (nw >= it.nWords) {
00749             m_vec.push_back((it.isFill?FILLBIT:ALLONES) ^ *(it.it));
00750             nw -= it.nWords;
00751             it.nWords = 0;
00752             ++ it.it;
00753         }
00754         else {
00755             break;
00756         }
00757     }
00758     nbits -= MAXBITS * nw;
00759 } // ibis::bitvector::copy_runsn
00760 
00764 inline void ibis::bitvector::copy_fill
00765 (array_t<ibis::bitvector::word_t>::iterator& jt, run& it) {
00766     if (it.fillBit == 0) {
00767         while (it.nWords > 3) {
00768             *jt = 0;
00769             jt[1] = 0;
00770             jt[2] = 0;
00771             jt[3] = 0;
00772             jt += 4;
00773             it.nWords -= 4;
00774         }
00775         if (it.nWords == 1) {
00776             *jt = 0; ++jt;
00777         }
00778         else if (it.nWords == 2) {
00779             *jt = 0; jt[1] = 0; jt += 2;
00780         }
00781         else if (it.nWords == 3) {
00782             *jt = 0; jt[1] = 0; jt[2] = 0; jt += 3;
00783         }
00784     }
00785     else {
00786         while (it.nWords > 3) {
00787             *jt = ALLONES;
00788             jt[1] = ALLONES;
00789             jt[2] = ALLONES;
00790             jt[3] = ALLONES;
00791             jt += 4;
00792             it.nWords -= 4;
00793         }
00794         if (it.nWords == 1) {
00795             *jt = ALLONES; ++jt;
00796         }
00797         else if (it.nWords == 2) {
00798             *jt = ALLONES; jt[1] = ALLONES; jt += 2;
00799         }
00800         else if (it.nWords == 3) {
00801             *jt = ALLONES; jt[1] = ALLONES; jt[2] = ALLONES; jt += 3;
00802         }
00803     }
00804     it.nWords = 0;
00805     ++ it.it;
00806 } // ibis::bitvector::copy_fill
00807 
00812 inline void ibis::bitvector::copy_runs
00813 (array_t<ibis::bitvector::word_t>::iterator& jt, run& it, word_t& nw) {
00814     while (nw >= it.nWords && nw > 0) {
00815         if (it.isFill != 0) { // copy a fill
00816             const array_t<word_t>::iterator iend = jt + it.nWords;
00817             if (it.fillBit == 0) {
00818                 while (jt < iend) {
00819                     *jt = 0;
00820                     ++ jt;
00821                 }
00822             }
00823             else {
00824                 while (jt < iend) {
00825                     *jt = ALLONES;
00826                     ++ jt;
00827                 }
00828             }
00829             nw -= it.nWords;
00830         }
00831         else { // copy a single word
00832             *jt = *(it.it);
00833             ++ jt;
00834             -- nw;
00835         }
00836         ++ it.it; // advance to the next word
00837         if (nw > 0) {
00838             it.decode();
00839         }
00840         else {
00841             it.nWords = 0;
00842             return;
00843         }
00844     }
00845 } // ibis::bitvector::copy_runs
00846 
00849 inline void ibis::bitvector::copy_runsn
00850 (array_t<ibis::bitvector::word_t>::iterator& jt, run& it, word_t& nw) {
00851     while (nw >= it.nWords) {
00852         if (it.isFill != 0) { // a fill
00853             const array_t<word_t>::iterator iend = jt + it.nWords;
00854             if (it.fillBit == 0) {
00855                 while (jt < iend) {
00856                     *jt = ALLONES;
00857                     ++ jt;
00858                 }
00859             }
00860             else {
00861                 while (jt < iend) {
00862                     *jt = 0;
00863                     ++ jt;
00864                 }
00865             }
00866             nw -= it.nWords;
00867         }
00868         else { // a literal word
00869             *jt = *(it.it) ^ ALLONES;
00870             ++ jt;
00871             -- nw;
00872         }
00873         ++ it.it; // advance to the next word
00874         if (nw > 0) {
00875             it.decode();
00876         }
00877         else {
00878             it.nWords = 0;
00879             return;
00880         }
00881     }
00882 } // ibis::bitvector::copy_runsn
00883 
00884 inline ibis::bitvector::iterator ibis::bitvector::begin() {
00885     iterator it;
00886     it.it     = m_vec.begin();
00887     it.vec    = &m_vec;
00888     it.bitv   = this;
00889     it.active = &active;
00890     it.decodeWord();
00891     return it;
00892 } // ibis::bitvector::begin
00893 
00894 inline ibis::bitvector::iterator ibis::bitvector::end() {
00895     iterator it;
00896     it.ind              = 0;
00897     it.compressed       = 0;
00898     it.nbits            = 0;
00899     it.literalvalue     = 0;
00900     it.fillbit          = 0;
00901     it.it     = m_vec.end() + 1;
00902     it.vec    = &m_vec;
00903     it.bitv   = this;
00904     it.active = &active;
00905     return it;
00906 } // ibis::bitvector::end
00907 
00908 inline ibis::bitvector::const_iterator ibis::bitvector::begin() const {
00909     const_iterator it;
00910     it.it     = m_vec.begin();
00911     it.begin  = m_vec.begin();
00912     it.end    = m_vec.end();
00913     it.active = &active;
00914     it.decodeWord();
00915     return it;
00916 } // ibis::bitvector::begin
00917 
00918 // dereference -- no error checking
00919 inline bool ibis::bitvector::iterator::operator*() const {
00920 #if defined(DEBUG) && DEBUG + 0 > 1
00921     if (vec==0 || it<vec->begin() ||  it>vec->end())
00922         throw "bitvector::const_iterator not initialized correctly.";
00923 #endif
00924     if (compressed != 0)
00925         return (fillbit != 0);
00926     else
00927         return (1 & (literalvalue >> (bitvector::SECONDBIT - ind)));
00928 } // ibis::bitvector::iterator::operator*
00929 
00930 // comparison only based on the iterator
00931 inline int ibis::bitvector::iterator::operator!=
00932 (const ibis::bitvector::const_iterator& rhs) const throw () {
00933     return (it != rhs.it);
00934 }
00935 inline int ibis::bitvector::iterator::operator==
00936 (const ibis::bitvector::const_iterator& rhs) const throw () {
00937     return (it == rhs.it);
00938 }
00939 inline int ibis::bitvector::iterator::operator!=
00940 (const ibis::bitvector::iterator& rhs) const throw () {
00941     return (it != rhs.it);
00942 }
00943 inline int ibis::bitvector::iterator::operator==
00944 (const ibis::bitvector::iterator& rhs) const throw () {
00945     return (it == rhs.it);
00946 }
00947 
00948 // increment by one
00949 inline ibis::bitvector::iterator& ibis::bitvector::iterator::operator++() {
00950 #if defined(DEBUG) && DEBUG + 0 > 1
00951     if (vec==0 || it<vec->begin() || it>vec->end())
00952         throw "bitvector::iterator not formed correctly.";
00953 #endif
00954     if (ind+1<nbits) {++ind;}
00955     else {++ it; decodeWord();}
00956     return *this;
00957 }
00958 
00959 // decrement by one
00960 inline ibis::bitvector::iterator& ibis::bitvector::iterator::operator--() {
00961 #if defined(DEBUG) && DEBUG + 0 > 1
00962     if (vec==0 || it<vec->begin() || it>vec->end()+1)
00963         throw "bitvector::iterator not formed correctly.";
00964 #endif
00965     if (ind) -- ind;
00966     else {
00967         if (it <= vec->end()) -- it;
00968         else if (active->nbits) it = vec->end();
00969         else it = vec->end() - 1;
00970         decodeWord();
00971         if (nbits) ind = nbits - 1;
00972     }
00973     return *this;
00974 }
00975 
00976 inline ibis::bitvector::const_iterator ibis::bitvector::end() const {
00977     const_iterator it;
00978     it.ind              = 0;
00979     it.compressed       = 0;
00980     it.nbits            = 0;
00981     it.literalvalue     = 0;
00982     it.fillbit          = 0;
00983     it.it    = m_vec.end() + 1;
00984     it.begin = m_vec.begin();
00985     it.end   = m_vec.end();
00986     it.active = &active;
00987     return it;
00988 } // ibis::bitvector::end()
00989 
00990 // dereference -- no error checking
00991 inline bool ibis::bitvector::const_iterator::operator*() const {
00992 #if defined(DEBUG) && DEBUG + 0 > 1
00993     if (it==0 || end==0 || it>end || nbits<=ind)
00994         throw "bitvector::const_iterator not initialized correctly.";
00995 #endif
00996     if (compressed != 0)
00997         return (fillbit != 0);
00998     else
00999         return (1 & (literalvalue >> (bitvector::SECONDBIT - ind)));
01000 }
01001 
01002 // comparison only based on the iterator
01003 inline int ibis::bitvector::const_iterator::operator!=
01004 (const ibis::bitvector::const_iterator& rhs) const throw (){
01005     return (it != rhs.it);
01006 }
01007 inline int ibis::bitvector::const_iterator::operator==
01008 (const ibis::bitvector::const_iterator& rhs) const throw () {
01009     return (it == rhs.it);
01010 }
01011 
01012 // increment by one
01013 inline ibis::bitvector::const_iterator&
01014 ibis::bitvector::const_iterator::operator++() {
01015 #if defined(DEBUG) && DEBUG + 0 > 1
01016     if (it==0 || end==0 || it>end)
01017         throw "bitvector::const_iterator not formed correctly.";
01018 #endif
01019     if (ind+1<nbits) {++ind;}
01020     else {++ it; decodeWord();}
01021     return *this;
01022 }
01023 
01024 // decrement by one
01025 inline ibis::bitvector::const_iterator&
01026 ibis::bitvector::const_iterator::operator--() {
01027 #if defined(DEBUG) && DEBUG + 0 > 1
01028     if (it==0 || end==0 || it>end)
01029         throw "bitvector::const_iterator not formed correctly.";
01030 #endif
01031     if (ind) -- ind;
01032     else {
01033         if (it <= end) -- it;
01034         else if (active->nbits) it = end;
01035         else it = end - 1;
01036         decodeWord();
01037         if (nbits) ind = nbits - 1;
01038     }
01039     return *this;
01040 }
01041 
01042 inline ibis::bitvector::indexSet ibis::bitvector::firstIndexSet() const {
01043     indexSet is;
01044     if (m_vec.end() > m_vec.begin()) {
01045         is.it = m_vec.begin() - 1;
01046         is.end = m_vec.end();
01047     }
01048     else {
01049         is.it = 0;
01050         is.end = 0;
01051     }
01052     is.active = &active;
01053     is.ind[0] = static_cast<word_t>(-1);
01054     is.nind = 0;
01055     ++is;
01056     return is;
01057 } // end ibis::bitvector::firstIndexSet;
01058 
01062 inline double ibis::bitvector::randomSize(word_t nb, word_t nc) {
01063     double sz = 0.0;
01064     if (nb > 0 && nb >= nc && nb > MAXBITS) {
01065         const double den = static_cast<double>(nc) /
01066             static_cast<double>(nb);
01067         const word_t nw = nb / MAXBITS;
01068         sz = 3.0 + nw - nw * (pow(1.0-den, static_cast<int>(2*MAXBITS))
01069                               + pow(den, static_cast<int>(2*MAXBITS)));
01070     }
01071     return sz*sizeof(word_t);
01072 } // end ibis::bitvector::randomSize
01073 
01078 inline double ibis::bitvector::markovSize(word_t nb, word_t nc, double f) {
01079     double sz = 0.0;
01080     if (nb > 0 && nb >= nc && nb > MAXBITS) {
01081         const double den = static_cast<double>(nc) /
01082             static_cast<double>(nb);
01083         const word_t nw = nb / MAXBITS;
01084         if ((den <= 0.5 && f > 1.00001) || (den > 0.5 && (1.0-den)*f > den))
01085             sz = ((1.0-den) * pow(1.0-den/((1.0-den)*f),
01086                                   static_cast<int>(2*MAXBITS-3)) +
01087                   den * pow(1.0-1.0/f, static_cast<int>(2*MAXBITS-3)));
01088         else
01089             sz = (pow(1.0-den, static_cast<int>(2*MAXBITS)) +
01090                   pow(den, static_cast<int>(2*MAXBITS)));
01091         sz = 3.0 + nw * (1.0 - sz);
01092     }
01093     return sz*sizeof(word_t);
01094 } // end ibis::bitvector::markovSize
01095 
01097 inline void ibis::bitvector::turnOnRawBit(const word_t ind) {
01098     if (ind < nbits) { // in regular words
01099         m_vec[ind / MAXBITS] |= (1 << (SECONDBIT - (ind % MAXBITS)));
01100         nset = 0;  // don't track nset
01101     }
01102     else { // assume to be in the active word
01103         active.val |= (1 << (active.nbits - (ind - nbits) - 1));
01104 #if defined(DEBUG)
01105         if (ind >= nbits + active.nbits ||
01106             active.val >= (1U << active.nbits)) {
01107             LOGGER(ibis::gVerbose >= 0)
01108                 << "bitvector::turnOnRawBit(" << ind
01109                 << ") found a bad active word";
01110         }
01111 #endif
01112     }
01113 } // ibis::bitvector::turnOnRawBit
01114 
01115 std::ostream& operator<<(std::ostream&, const ibis::bitvector&);
01116 #endif // __BITVECTOR_H

Make It A Bit Faster
Contact us
Disclaimers
FastBit source code
FastBit mailing list archive