bitvector.h
Go to the documentation of this file.
00001 // $Id$
00002 //      Author: John Wu <John.Wu at ACM.org>
00003 //              Lawrence Berkeley National Laboratory
00004 //      Copyright 2000-2012 the Regents of the University of California
00005 #ifndef BITVECTOR_H
00006 #define BITVECTOR_H
00007 
00008 
00009 
00010 #include "array_t.h"    // alternative to std::vector
00011 
00012 #if defined(_MSC_VER) && defined(_WIN32)
00013 //disable warnings on extern before template instantiation
00014 #pragma warning (disable : 4231)
00015 #endif
00016 #if defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__)) && defined(CXX_USE_DLL) && !defined(DLL_EXPORT)
00017 extern template class FASTBIT_CXX_DLLSPEC ibis::array_t<uint32_t>;
00018 #endif
00019 
00020 
00062 class FASTBIT_CXX_DLLSPEC ibis::bitvector {
00063 public:
00064     typedef uint32_t word_t;
00065 
00067     ~bitvector() {clear();};
00068     // constructors of bitvector class
00069     bitvector();
00070     bitvector(const bitvector& bv);
00071     explicit bitvector(const array_t<word_t>& arr);
00072     explicit bitvector(const char* file);
00073 
00074     inline const bitvector& operator=(const bitvector& bv);
00075     inline bitvector& copy(const bitvector& bv);
00076     inline bitvector& swap(bitvector& bv);
00077 
00078     // use bv to replace part of the existing value, match the ith bit with
00079     // the first of bv, return reference to self
00080     //bitvector& copy(const word_t i, const bitvector& bv);
00081     void setBit(const word_t i, int val);
00082     inline void turnOnRawBit(const word_t i);
00084     void erase(word_t i, word_t j);
00085 
00086     void set(int val, word_t n);
00087     void clear();
00088 
00089     bitvector& operator+=(const bitvector& bv); 
00090     inline bitvector& operator+=(int b);        
00091     void appendWord(word_t w);          
00092     inline void appendFill(int val, word_t n);
00093 
00095     int operator==(const bitvector& rhs) const;
00096 
00098     void flip();
00100     void operator&=(const bitvector& rhs);
00103     bitvector* operator&(const bitvector&) const;
00105     void operator|=(const bitvector& rhs);
00107     bitvector* operator|(const bitvector&) const;
00109     void operator^=(const bitvector& rhs);
00111     bitvector* operator^(const bitvector&) const;
00113     void operator-=(const bitvector& rhs);
00116     bitvector* operator-(const bitvector&) const;
00117 
00118     void subset(const bitvector& mask, bitvector& res) const;
00119     word_t count(const bitvector& mask) const;
00120 
00121     // I/O functions.
00124     void read(const char *fn);
00126     void write(const char *fn) const;
00128     void write(int fdes) const;
00132     void write(array_t<word_t>& arr) const;
00133 
00134     void compress();    
00135     void decompress();  
00136     word_t compressible() const;
00139     bool isCompressed() const {return (m_vec.size()*MAXBITS < nbits);}
00140 
00141     inline word_t size() const throw();
00142     inline void sloppySize(word_t n) const;
00143     inline word_t cnt() const;
00144     inline word_t count() const {return cnt();}
00145     inline word_t sloppyCount() const;
00146     inline word_t numFillWords() const;
00148     uint32_t bytes() const throw() {
00149         return (m_vec.size()*sizeof(word_t) + sizeof(bitvector));
00150     };
00154     uint32_t getSerialSize() const throw() {
00155         return (m_vec.size() + 1 + (active.nbits>0)) * sizeof(word_t);
00156     };
00158     static word_t bitsPerLiteral() {return MAXBITS;}
00159     inline static double randomSize(word_t nb, word_t nc);
00160     inline static double markovSize(word_t nb, word_t nc, double f);
00161     static double clusteringFactor(word_t nb, word_t nc, word_t sz);
00162 
00163     void adjustSize(word_t nv, word_t nt);
00164     void reserve(unsigned nb, unsigned nc, double cf=0.0);
00165 
00168     bool empty() const {return all0s() && active.val == 0;}
00169 
00170     std::ostream& print(std::ostream &) const; 
00171 
00173     class iterator;
00174     inline iterator end();
00175     inline iterator begin();
00176 
00178     class const_iterator;
00179     inline const_iterator end() const;
00180     inline const_iterator begin() const;
00181 
00183     class indexSet;
00184     inline indexSet firstIndexSet() const;
00185 
00186     // give accesses to some friends
00187     friend class indexSet;
00188     friend class iterator;
00189     friend class const_iterator;
00190 
00191 protected:
00192     inline bool all0s() const;
00193     inline bool all1s() const;
00194 
00195 private:
00196     // static members, i.e., constants to be used internally
00197     static const unsigned MAXBITS;
00198     static const unsigned SECONDBIT;
00199     static const word_t FILLBIT;
00200     static const word_t HEADER0;
00201     static const word_t HEADER1;
00202     static const word_t ALLONES;
00203     static const word_t MAXCNT;
00204 
00211     struct active_word {
00212         word_t val;     // the value
00213         word_t nbits;   // total number of bits
00214 
00215         active_word() : val(0), nbits(0) {};
00216         void reset() {val = 0; nbits = 0;};
00217         int is_full() const {return (nbits >= MAXBITS);};
00218         void append(int b) { // append a single bit
00219             val <<= 1; nbits ++; val += b;
00220         };
00221     }; // struct active_word
00222 
00225     struct run {
00226         int isFill;
00227         int fillBit;
00228         word_t nWords;
00229         array_t<word_t>::const_iterator it;
00230         run() : isFill(0), fillBit(0), nWords(0), it(0) {};
00231         void decode() { 
00232             fillBit = (*it > HEADER1);
00233             if (*it > ALLONES) {
00234                 nWords = (*it & MAXCNT);
00235                 isFill = 1;
00236             }
00237             else {
00238                 nWords = 1;
00239                 isFill = 0;
00240             }
00241         };
00244         void operator--() {
00245             if (nWords > 1) {--nWords;}
00246             else {++ it; nWords = 0;}
00247         };
00250         void operator-=(word_t len) {
00251             while (len > 0) {
00252                 if (nWords == 0) decode();
00253                 if (isFill != 0) {
00254                     if (nWords > len) {nWords -= len; len = 0;}
00255                     else if (nWords == len) {nWords = 0; len = 0; ++ it;}
00256                     else {len -= nWords; ++ it; nWords = 0;}
00257                 }
00258                 else {-- len; ++ it; nWords = 0;}
00259             }
00260         };
00261     };
00262     friend struct run;
00263     friend struct active_word;
00264 
00265     // member variables of bitvector class
00266     mutable word_t nbits;       
00267     mutable word_t nset;        
00268     active_word active;         
00269     array_t<word_t> m_vec;      
00270 
00271     // private functions of bitvector class
00272     word_t count_c1(const bitvector& mask) const;
00273     word_t count_c2(const bitvector& mask) const;
00274     // The following three functions all performs or operation, _c2 and _c1
00275     // generate compressed solutions, but _c0, _d1, and _d2 generate
00276     // uncompressed solutions.
00277     // or_c2 assumes there are compressed word in both operands
00278     // or_c1 *this may contain compressed word, but not rhs
00279     // or_c0 assumes both operands are not compressed
00280     // or_d1 *this contains no compressed word and is overwritten with the
00281     //       result
00282     // or_d2 both *this and rhs are compressed, but res is not compressed
00283     void or_c2(const bitvector& rhs, bitvector& res) const;
00284     void or_c1(const bitvector& rhs, bitvector& res) const;
00285     void or_c0(const bitvector& rhs);
00286     void or_d1(const bitvector& rhs);
00287     void or_d2(const bitvector& rhs, bitvector& res) const;
00288     void and_c2(const bitvector& rhs, bitvector& res) const;
00289     void and_c1(const bitvector& rhs, bitvector& res) const;
00290     void and_c0(const bitvector& rhs);
00291     void and_d1(const bitvector& rhs);
00292     void and_d2(const bitvector& rhs, bitvector& res) const;
00293     void xor_c2(const bitvector& rhs, bitvector& res) const;
00294     void xor_c1(const bitvector& rhs, bitvector& res) const;
00295     void xor_c0(const bitvector& rhs);
00296     void xor_d1(const bitvector& rhs);
00297     void xor_d2(const bitvector& rhs, bitvector& res) const;
00298     void minus_c2(const bitvector& rhs, bitvector& res) const;
00299     void minus_c1(const bitvector& rhs, bitvector& res) const;
00300     void minus_c1x(const bitvector& rhs, bitvector& res) const;
00301     void minus_c0(const bitvector& rhs);
00302     void minus_d1(const bitvector& rhs);
00303     void minus_d2(const bitvector& rhs, bitvector& res) const;
00304     inline void copy_runs(run& it, word_t& nw); // copy nw words
00305     inline void copy_runsn(run& it, word_t& nw); // copy nw words and negate
00306     inline void copy_fill(array_t<word_t>::iterator& jt, run& it);
00307     inline void copy_runs(array_t<word_t>::iterator& jt, run& it,
00308                           word_t& nw);
00309     inline void copy_runsn(array_t<word_t>::iterator& jt, run& it,
00310                            word_t& nw);
00311     void decompress(array_t<word_t>& tmp) const;
00312     void copy_comp(array_t<word_t>& tmp) const;
00313     inline void append_active();
00314     inline void append_counter(int val, word_t cnt);
00315     inline word_t cnt_ones(word_t) const; // number of ones in a word
00316     inline word_t cnt_bits(word_t) const; // number of bits in a word
00317     word_t do_cnt() const throw ();
00318 }; // class bitvector
00319 
00321 class ibis::bitvector::const_iterator {
00322 public:
00323     const_iterator() : compressed(0), ind(0), nbits(0), literalvalue(0),
00324                        fillbit(0), active(0) {}
00325 
00326     const_iterator(const const_iterator& r)
00327         : compressed(r.compressed), ind(r.ind), nbits(r.nbits),
00328           literalvalue(r.literalvalue), fillbit(r.fillbit), active(r.active),
00329           end(r.end), begin(r.begin), it(r.it) {};
00330     const_iterator& operator=(const const_iterator& r) {
00331         compressed = r.compressed; ind = r.ind; nbits = r.nbits;
00332         literalvalue = r.literalvalue; fillbit = r.fillbit; active = r.active;
00333         end = r.end; begin = r.begin; it = r.it;
00334         return *this;}
00335 
00336     inline bool operator*() const;
00337     inline int operator!=(const const_iterator& rhs) const throw ();
00338     inline int operator==(const const_iterator& rhs) const throw ();
00339 
00340     inline const_iterator& operator++();
00341     inline const_iterator& operator--();
00342     const_iterator& operator+=(int incr);
00343 
00344 private:
00345     ibis::bitvector::word_t     compressed;
00346     ibis::bitvector::word_t     ind;
00347     ibis::bitvector::word_t     nbits;
00348     ibis::bitvector::word_t     literalvalue;
00349     int                         fillbit;
00350     const active_word*          active;
00351     array_t<word_t>::const_iterator end;
00352     array_t<word_t>::const_iterator begin;
00353     array_t<word_t>::const_iterator it;
00354 
00355     void decodeWord();
00356 
00357     // give three functions of bitvector access to private variables
00358     friend void ibis::bitvector::erase(word_t i, word_t j);
00359     friend const_iterator ibis::bitvector::begin() const;
00360     friend const_iterator ibis::bitvector::end() const;
00361     friend class ibis::bitvector::iterator;
00362 }; // class ibis::bitvector::const_iterator
00363 
00371 class ibis::bitvector::iterator {
00372 public:
00373     iterator() : compressed(0), ind(0), nbits(0), literalvalue(0), fillbit(0),
00374                  bitv(0), active(0), vec(0) {}
00375     iterator(const iterator& r)
00376         : compressed(r.compressed), ind(r.ind), nbits(r.nbits),
00377           literalvalue(r.literalvalue), fillbit(r.fillbit), bitv(r.bitv),
00378           active(r.active), vec(r.vec), it(r.it) {};
00379     const iterator& operator=(const iterator& r) {
00380         compressed = r.compressed; ind = r.ind; nbits = r.nbits;
00381         literalvalue = r.literalvalue; fillbit = r.fillbit; bitv = r.bitv;
00382         active = r.active; vec = r.vec; it = r.it; return *this;}
00383 
00384     inline bool operator*() const; // still can not modify this
00385     inline int operator!=(const const_iterator& rhs) const throw ();
00386     inline int operator==(const const_iterator& rhs) const throw ();
00387     inline int operator!=(const iterator& rhs) const throw ();
00388     inline int operator==(const iterator& rhs) const throw ();
00389 
00390     inline iterator& operator++();
00391     inline iterator& operator--();
00392     iterator& operator+=(int incr);
00393     const iterator& operator=(int val);
00394 
00395 private:
00396     ibis::bitvector::word_t     compressed;
00397     ibis::bitvector::word_t     ind;
00398     ibis::bitvector::word_t     nbits;
00399     ibis::bitvector::word_t     literalvalue;
00400     int                         fillbit;
00401     ibis::bitvector*            bitv;
00402     active_word*                active;
00403     array_t<word_t>*            vec;
00404     array_t<word_t>::iterator   it;
00405 
00406     void decodeWord();
00407 
00408     friend iterator ibis::bitvector::begin();
00409     friend iterator ibis::bitvector::end();
00410 }; // class ibis::bitvector::iterator
00411 
00421 class FASTBIT_CXX_DLLSPEC ibis::bitvector::indexSet {
00422 public:
00424     indexSet() : it(0), end(0), active(0), nind(0) {}
00426     indexSet(const indexSet& rhs)
00427         : it(rhs.it), end(rhs.end), active(rhs.active), nind(rhs.nind) {
00428         ind[0] = rhs.ind[0];
00429         ind[1] = rhs.ind[1];
00430         ind[2] = rhs.ind[2];
00431         ind[3] = rhs.ind[3];
00432         ind[4] = rhs.ind[4];
00433         ind[5] = rhs.ind[5];
00434         ind[6] = rhs.ind[6];
00435         ind[7] = rhs.ind[7];
00436         ind[8] = rhs.ind[8];
00437         ind[9] = rhs.ind[9];
00438         ind[10] = rhs.ind[10];
00439         ind[11] = rhs.ind[11];
00440         ind[12] = rhs.ind[12];
00441         ind[13] = rhs.ind[13];
00442         ind[14] = rhs.ind[14];
00443         ind[15] = rhs.ind[15];
00444         ind[16] = rhs.ind[16];
00445         ind[17] = rhs.ind[17];
00446         ind[18] = rhs.ind[18];
00447         ind[19] = rhs.ind[19];
00448         ind[20] = rhs.ind[20];
00449         ind[21] = rhs.ind[21];
00450         ind[22] = rhs.ind[22];
00451         ind[23] = rhs.ind[23];
00452         ind[24] = rhs.ind[24];
00453         ind[25] = rhs.ind[25];
00454         ind[26] = rhs.ind[26];
00455         ind[27] = rhs.ind[27];
00456         ind[28] = rhs.ind[28];
00457         ind[29] = rhs.ind[29];
00458         ind[30] = rhs.ind[30];
00459         ind[31] = rhs.ind[31];
00460     }
00462     indexSet& operator=(const indexSet& rhs) {
00463         it = rhs.it;
00464         end = rhs.end;
00465         active = rhs.active;
00466         nind = rhs.nind;
00467         ind[0] = rhs.ind[0];
00468         ind[1] = rhs.ind[1];
00469         ind[2] = rhs.ind[2];
00470         ind[3] = rhs.ind[3];
00471         ind[4] = rhs.ind[4];
00472         ind[5] = rhs.ind[5];
00473         ind[6] = rhs.ind[6];
00474         ind[7] = rhs.ind[7];
00475         ind[8] = rhs.ind[8];
00476         ind[9] = rhs.ind[9];
00477         ind[10] = rhs.ind[10];
00478         ind[11] = rhs.ind[11];
00479         ind[12] = rhs.ind[12];
00480         ind[13] = rhs.ind[13];
00481         ind[14] = rhs.ind[14];
00482         ind[15] = rhs.ind[15];
00483         ind[16] = rhs.ind[16];
00484         ind[17] = rhs.ind[17];
00485         ind[18] = rhs.ind[18];
00486         ind[19] = rhs.ind[19];
00487         ind[20] = rhs.ind[20];
00488         ind[21] = rhs.ind[21];
00489         ind[22] = rhs.ind[22];
00490         ind[23] = rhs.ind[23];
00491         ind[24] = rhs.ind[24];
00492         ind[25] = rhs.ind[25];
00493         ind[26] = rhs.ind[26];
00494         ind[27] = rhs.ind[27];
00495         ind[28] = rhs.ind[28];
00496         ind[29] = rhs.ind[29];
00497         ind[30] = rhs.ind[30];
00498         ind[31] = rhs.ind[31];
00499         return *this;
00500     }
00501 
00502     //int operator!=(const indexSet& rhs) const {return (it != rhs.it);};
00504     bool isRange() const {return (nind>=ibis::bitvector::MAXBITS);}
00506     const word_t* indices() const {return ind;};
00508     word_t nIndices() const {return nind;}
00510     const word_t& currentWord() const {return *it;}
00511     indexSet& operator++();
00512 
00513     // allow bitvector::firstIndexSet() to access private member variables
00514     friend indexSet ibis::bitvector::firstIndexSet() const;
00515 
00516 private:
00517     array_t<word_t>::const_iterator it;
00518     array_t<word_t>::const_iterator end;
00519     const active_word* active; // points back to the active word
00520     word_t nind; // number of indices
00521     word_t ind[32];
00522 }; // class ibis::bitvector::indexSet
00523 
00530 inline void ibis::bitvector::sloppySize(word_t n) const {
00531     nbits = n-active.nbits;
00532 #if defined(WAH_CHECK_SIZE)
00533     word_t nb = do_cnt();
00534     if (nb != nbits) {
00535         const_cast<ibis::bitvector*>(this)->adjustSize(0, nbits);
00536         LOGGER(ibis::gVerbose >= 0)
00537             << "bitvector::sloppySize -- adjust the number of bits to "
00538             << n;
00539     }
00540 #endif
00541 } // ibis::bitvector::sloppySize
00542 
00552 inline ibis::bitvector::word_t ibis::bitvector::sloppyCount() const {
00553     if (nset > 0) {
00554         return nset;
00555     }
00556     else if (active.nbits == 0 || active.val == 0) {
00557         if (m_vec.empty() ||
00558             (m_vec.size() == 1 &&
00559              (m_vec[0] == 0 ||
00560               (m_vec[0]>=HEADER0 && m_vec[0]<HEADER1))))
00561             return 0;
00562         else
00563             return 2;
00564     }
00565     else {
00566         return 1;
00567     }
00568 } // ibis::bitvector::sloppyCount
00569 
00572 inline bool ibis::bitvector::all0s() const {
00573     if (m_vec.empty()) {
00574         return true;
00575     }
00576     else if (m_vec.size() == 1) {
00577         return (m_vec[0] == 0 || (m_vec[0]>=HEADER0 && m_vec[0]<HEADER1));
00578     }
00579     else {
00580         return false;
00581     }
00582 } // ibis::bitvector::all0s
00583 
00586 inline bool ibis::bitvector::all1s() const {
00587     if (m_vec.size() == 1) {
00588         return (m_vec[0] == ALLONES || (m_vec[0] > HEADER1));
00589     }
00590     else {
00591         return false;
00592     }
00593 } // ibis::bitvector::all1s
00594 
00596 inline ibis::bitvector::word_t
00597 ibis::bitvector::cnt_bits(ibis::bitvector::word_t val) const {
00598     return ((val>ALLONES) ? ((val&MAXCNT)*MAXBITS) : MAXBITS);
00599 } // ibis::bitvector::cnt_bits
00600 
00602 inline ibis::bitvector::word_t
00603 ibis::bitvector::cnt_ones(ibis::bitvector::word_t val) const {
00604     // number of 1 bits in a value between 0 and 255
00605     static const word_t table[256] = {
00606         0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
00607         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00608         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00609         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00610         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00611         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00612         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00613         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00614         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00615         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00616         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00617         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00618         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00619         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00620         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00621         4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
00622     return table[val&0xFFU] + table[(val>>8)&0xFFU] +
00623         table[(val>>16)&0xFFU] + table[(val>>24)&0xFFU];
00624 } // ibis::bitvector::cnt_ones
00625 
00627 inline ibis::bitvector::word_t ibis::bitvector::size() const throw() {
00628     return ((nbits?nbits:(nbits=do_cnt()))+active.nbits);
00629 } // ibis::bitvector::size
00630 
00632 inline ibis::bitvector::word_t ibis::bitvector::cnt() const {
00633     if (nset==0 && !m_vec.empty())
00634         nbits = do_cnt();
00635     return (nset+cnt_ones(active.val));
00636 } // ibis::bitvector::cnt
00637 
00639 inline ibis::bitvector::word_t ibis::bitvector::numFillWords() const {
00640     word_t cnt=0;
00641     array_t<ibis::bitvector::word_t>::const_iterator it = m_vec.begin();
00642     while (it != m_vec.end()) {
00643         cnt += (*it >> ibis::bitvector::MAXBITS);
00644         it++;
00645     }
00646     return cnt;
00647 } // inline word_t ibis::bitvector::numFillWords() const {
00648 
00652 inline const ibis::bitvector&
00653 ibis::bitvector::operator=(const ibis::bitvector& bv) {
00654     nbits = bv.nbits; nset = bv.nset; active = bv.active;
00655     m_vec.deepCopy(bv.m_vec);
00656     return *this;
00657 }
00658 
00660 inline ibis::bitvector& ibis::bitvector::copy(const ibis::bitvector& bv) {
00661     nbits = bv.nbits; nset = bv.nset; active = bv.active;
00662     m_vec.deepCopy(bv.m_vec);
00663     return *this;
00664 }
00665 
00666 inline ibis::bitvector& ibis::bitvector::swap(bitvector& bv) {
00667     word_t tmp;
00668     tmp = bv.nbits; bv.nbits = nbits; nbits = tmp;
00669     tmp = bv.nset; bv.nset = nset; nset = tmp;
00670     active_word atmp = bv.active;
00671     bv.active = active; active = atmp;
00672     m_vec.swap(bv.m_vec);
00673     return *this;
00674 }
00675 
00679 inline void ibis::bitvector::append_active() {
00680 //      std::cout << "before: active.val = " << std::hex << active.val;
00681 //      if (m_vec.size())
00682 //      std::cout << ", m_vec.back() = " << m_vec.back();
00683 //      std::cout << std::dec << std::endl;
00684     if (m_vec.empty()) {
00685         m_vec.push_back(active.val);
00686     }
00687     else if (active.val == 0) {// incoming word is zero
00688         if (m_vec.back() == 0) {
00689             m_vec.back() = (HEADER0 + 2);
00690         }
00691         else if (m_vec.back() >= HEADER0 && m_vec.back() < HEADER1) {
00692             ++ m_vec.back();
00693         }
00694         else {
00695             m_vec.push_back(active.val);
00696         }
00697     }
00698     else if (active.val == ALLONES) {// incoming word is allones
00699         if (m_vec.back() == ALLONES) {
00700             m_vec.back() = (HEADER1 | 2);
00701         }
00702         else if (m_vec.back() >= HEADER1) {
00703             ++m_vec.back();
00704         }
00705         else {
00706             m_vec.push_back(active.val);
00707         }
00708     }
00709     else { // incoming word contains a mixture of bits
00710         m_vec.push_back(active.val);
00711     }
00712     nbits += MAXBITS;
00713     active.reset();
00714     nset = 0;
00715 //      std::cout << "after: m_vec.back() = " << std::hex << m_vec.back()
00716 //            << std::dec << std::endl;
00717 } // ibis::bitvector::append_active
00718 
00722 inline void ibis::bitvector::append_counter(int val, word_t cnt) {
00723     word_t head = 2 + val;
00724     word_t w = (head << SECONDBIT) + cnt;
00725     nbits += cnt*MAXBITS;
00726     if (m_vec.empty()) {
00727         m_vec.push_back(w);
00728     }
00729     else if ((m_vec.back()>>SECONDBIT) == head) {
00730         m_vec.back() += cnt;
00731     }
00732     else if ((m_vec.back()==ALLONES) && head==3) {
00733         m_vec.back() = w + 1;
00734     }
00735     else if ((m_vec.back() == 0) && head==2) {
00736         m_vec.back() = w + 1;
00737     }
00738     else {
00739         m_vec.push_back(w);
00740     }
00741 } // ibis::bitvector::append_counter
00742 
00743 // append a single bit
00744 inline ibis::bitvector& ibis::bitvector::operator+=(int b) {
00745     active.append(b);
00746     if (active.is_full())
00747         append_active();
00748     return *this;
00749 } // ibis::bitvector::operator+=
00750 
00755 inline void ibis::bitvector::appendFill(int val, word_t n) {
00756     if (n == 0) return;
00757     if (active.nbits > 0) {
00758         word_t tmp = (MAXBITS - active.nbits);
00759         if (tmp > n) tmp = n;
00760         active.nbits += tmp;
00761         active.val <<= tmp;
00762         n -= tmp;
00763         if (val != 0)
00764             active.val |= (1U<<tmp) - 1;
00765         if (active.nbits >= MAXBITS)
00766             append_active();
00767     }
00768     if (n >= MAXBITS) {
00769         word_t cnt = n / MAXBITS;
00770         if (cnt > 1)
00771             append_counter(val, cnt);
00772         else if (val != 0) {
00773             active.val = ALLONES;
00774             append_active();
00775         }
00776         else {
00777             active.val = 0;
00778             append_active();
00779         }
00780         n -= cnt * MAXBITS;
00781     }
00782     if (n > 0) {
00783         active.nbits = n;
00784         active.val = val*((1U<<n)-1);
00785     }
00786 } // ibis::bitvector::appendFill
00787 
00793 inline void ibis::bitvector::copy_runs(run& it, word_t& nw) {
00794     // deal with the first word -- attach it to the last word in m_vec
00795     if (it.isFill != 0) {
00796         append_counter(it.fillBit, it.nWords);
00797         nw -= it.nWords;
00798     }
00799     else {
00800         active.val = *(it.it);
00801         append_active();
00802         -- nw;
00803     }
00804     ++ it.it;
00805     it.nWords = 0;
00806     nset = 0;
00807     nbits += MAXBITS * nw;
00808 
00809     while (nw > 0) { // copy the words
00810         it.decode();
00811         if (nw >= it.nWords) {
00812             m_vec.push_back(*(it.it));
00813             nw -= it.nWords;
00814             it.nWords = 0;
00815             ++ it.it;
00816         }
00817         else {
00818             break;
00819         }
00820     }
00821     nbits -= MAXBITS * nw;
00822 } // ibis::bitvector::copy_runs
00823 
00826 inline void ibis::bitvector::copy_runsn(run& it, word_t& nw) {
00827     // deal with the first word -- need to attach it to the last word in m_vec
00828     if (it.isFill != 0) {
00829         append_counter(!it.fillBit, it.nWords);
00830         nw -= it.nWords;
00831     }
00832     else {
00833         active.val = ALLONES ^ *(it.it);
00834         append_active();
00835         -- nw;
00836     }
00837     ++ it.it; // advance to the next word
00838     it.nWords = 0;
00839     nset = 0;
00840     nbits += MAXBITS * nw;
00841 
00842     while (nw > 0) { // copy the words
00843         it.decode();
00844         if (nw >= it.nWords) {
00845             m_vec.push_back((it.isFill?FILLBIT:ALLONES) ^ *(it.it));
00846             nw -= it.nWords;
00847             it.nWords = 0;
00848             ++ it.it;
00849         }
00850         else {
00851             break;
00852         }
00853     }
00854     nbits -= MAXBITS * nw;
00855 } // ibis::bitvector::copy_runsn
00856 
00860 inline void ibis::bitvector::copy_fill
00861 (array_t<ibis::bitvector::word_t>::iterator& jt, run& it) {
00862     if (it.fillBit == 0) {
00863         while (it.nWords > 3) {
00864             *jt = 0;
00865             jt[1] = 0;
00866             jt[2] = 0;
00867             jt[3] = 0;
00868             jt += 4;
00869             it.nWords -= 4;
00870         }
00871         if (it.nWords == 1) {
00872             *jt = 0; ++jt;
00873         }
00874         else if (it.nWords == 2) {
00875             *jt = 0; jt[1] = 0; jt += 2;
00876         }
00877         else if (it.nWords == 3) {
00878             *jt = 0; jt[1] = 0; jt[2] = 0; jt += 3;
00879         }
00880     }
00881     else {
00882         while (it.nWords > 3) {
00883             *jt = ALLONES;
00884             jt[1] = ALLONES;
00885             jt[2] = ALLONES;
00886             jt[3] = ALLONES;
00887             jt += 4;
00888             it.nWords -= 4;
00889         }
00890         if (it.nWords == 1) {
00891             *jt = ALLONES; ++jt;
00892         }
00893         else if (it.nWords == 2) {
00894             *jt = ALLONES; jt[1] = ALLONES; jt += 2;
00895         }
00896         else if (it.nWords == 3) {
00897             *jt = ALLONES; jt[1] = ALLONES; jt[2] = ALLONES; jt += 3;
00898         }
00899     }
00900     it.nWords = 0;
00901     ++ it.it;
00902 } // ibis::bitvector::copy_fill
00903 
00908 inline void ibis::bitvector::copy_runs
00909 (array_t<ibis::bitvector::word_t>::iterator& jt, run& it, word_t& nw) {
00910     while (nw >= it.nWords && nw > 0) {
00911         if (it.isFill != 0) { // copy a fill
00912             const array_t<word_t>::iterator iend = jt + it.nWords;
00913             if (it.fillBit == 0) {
00914                 while (jt < iend) {
00915                     *jt = 0;
00916                     ++ jt;
00917                 }
00918             }
00919             else {
00920                 while (jt < iend) {
00921                     *jt = ALLONES;
00922                     ++ jt;
00923                 }
00924             }
00925             nw -= it.nWords;
00926         }
00927         else { // copy a single word
00928             *jt = *(it.it);
00929             ++ jt;
00930             -- nw;
00931         }
00932         ++ it.it; // advance to the next word
00933         if (nw > 0) {
00934             it.decode();
00935         }
00936         else {
00937             it.nWords = 0;
00938             return;
00939         }
00940     }
00941 } // ibis::bitvector::copy_runs
00942 
00945 inline void ibis::bitvector::copy_runsn
00946 (array_t<ibis::bitvector::word_t>::iterator& jt, run& it, word_t& nw) {
00947     while (nw >= it.nWords) {
00948         if (it.isFill != 0) { // a fill
00949             const array_t<word_t>::iterator iend = jt + it.nWords;
00950             if (it.fillBit == 0) {
00951                 while (jt < iend) {
00952                     *jt = ALLONES;
00953                     ++ jt;
00954                 }
00955             }
00956             else {
00957                 while (jt < iend) {
00958                     *jt = 0;
00959                     ++ jt;
00960                 }
00961             }
00962             nw -= it.nWords;
00963         }
00964         else { // a literal word
00965             *jt = *(it.it) ^ ALLONES;
00966             ++ jt;
00967             -- nw;
00968         }
00969         ++ it.it; // advance to the next word
00970         if (nw > 0) {
00971             it.decode();
00972         }
00973         else {
00974             it.nWords = 0;
00975             return;
00976         }
00977     }
00978 } // ibis::bitvector::copy_runsn
00979 
00980 inline ibis::bitvector::iterator ibis::bitvector::begin() {
00981     iterator it;
00982     it.it     = m_vec.begin();
00983     it.vec    = &m_vec;
00984     it.bitv   = this;
00985     it.active = &active;
00986     it.decodeWord();
00987     return it;
00988 } // ibis::bitvector::begin
00989 
00990 inline ibis::bitvector::iterator ibis::bitvector::end() {
00991     iterator it;
00992     it.ind              = 0;
00993     it.compressed       = 0;
00994     it.nbits            = 0;
00995     it.literalvalue     = 0;
00996     it.fillbit          = 0;
00997     it.it     = m_vec.end() + 1;
00998     it.vec    = &m_vec;
00999     it.bitv   = this;
01000     it.active = &active;
01001     return it;
01002 } // ibis::bitvector::end
01003 
01004 inline ibis::bitvector::const_iterator ibis::bitvector::begin() const {
01005     const_iterator it;
01006     it.it     = m_vec.begin();
01007     it.begin  = m_vec.begin();
01008     it.end    = m_vec.end();
01009     it.active = &active;
01010     it.decodeWord();
01011     return it;
01012 } // ibis::bitvector::begin
01013 
01014 // dereference -- no error checking
01015 inline bool ibis::bitvector::iterator::operator*() const {
01016 #if defined(DEBUG) && DEBUG + 0 > 1
01017     if (vec==0 || it<vec->begin() ||  it>vec->end())
01018         throw "bitvector::const_iterator not initialized correctly.";
01019 #endif
01020     if (compressed != 0)
01021         return (fillbit != 0);
01022     else
01023         return (1 & (literalvalue >> (bitvector::SECONDBIT - ind)));
01024 } // ibis::bitvector::iterator::operator*
01025 
01026 // comparison only based on the iterator
01027 inline int ibis::bitvector::iterator::operator!=
01028 (const ibis::bitvector::const_iterator& rhs) const throw () {
01029     return (it != rhs.it);
01030 }
01031 inline int ibis::bitvector::iterator::operator==
01032 (const ibis::bitvector::const_iterator& rhs) const throw () {
01033     return (it == rhs.it);
01034 }
01035 inline int ibis::bitvector::iterator::operator!=
01036 (const ibis::bitvector::iterator& rhs) const throw () {
01037     return (it != rhs.it);
01038 }
01039 inline int ibis::bitvector::iterator::operator==
01040 (const ibis::bitvector::iterator& rhs) const throw () {
01041     return (it == rhs.it);
01042 }
01043 
01044 // increment by one
01045 inline ibis::bitvector::iterator& ibis::bitvector::iterator::operator++() {
01046 #if defined(DEBUG) && DEBUG + 0 > 1
01047     if (vec==0 || it<vec->begin() || it>vec->end())
01048         throw "bitvector::iterator not formed correctly.";
01049 #endif
01050     if (ind+1<nbits) {++ind;}
01051     else {++ it; decodeWord();}
01052     return *this;
01053 }
01054 
01055 // decrement by one
01056 inline ibis::bitvector::iterator& ibis::bitvector::iterator::operator--() {
01057 #if defined(DEBUG) && DEBUG + 0 > 1
01058     if (vec==0 || it<vec->begin() || it>vec->end()+1)
01059         throw "bitvector::iterator not formed correctly.";
01060 #endif
01061     if (ind) -- ind;
01062     else {
01063         if (it <= vec->end()) -- it;
01064         else if (active->nbits) it = vec->end();
01065         else it = vec->end() - 1;
01066         decodeWord();
01067         if (nbits) ind = nbits - 1;
01068     }
01069     return *this;
01070 }
01071 
01072 inline ibis::bitvector::const_iterator ibis::bitvector::end() const {
01073     const_iterator it;
01074     it.ind              = 0;
01075     it.compressed       = 0;
01076     it.nbits            = 0;
01077     it.literalvalue     = 0;
01078     it.fillbit          = 0;
01079     it.it    = m_vec.end() + 1;
01080     it.begin = m_vec.begin();
01081     it.end   = m_vec.end();
01082     it.active = &active;
01083     return it;
01084 } // ibis::bitvector::end()
01085 
01086 // dereference -- no error checking
01087 inline bool ibis::bitvector::const_iterator::operator*() const {
01088 #if defined(DEBUG) && DEBUG + 0 > 1
01089     if (it==0 || end==0 || it>end || nbits<=ind)
01090         throw "bitvector::const_iterator not initialized correctly.";
01091 #endif
01092     if (compressed != 0)
01093         return (fillbit != 0);
01094     else
01095         return (1 & (literalvalue >> (bitvector::SECONDBIT - ind)));
01096 }
01097 
01098 // comparison only based on the iterator
01099 inline int ibis::bitvector::const_iterator::operator!=
01100 (const ibis::bitvector::const_iterator& rhs) const throw (){
01101     return (it != rhs.it);
01102 }
01103 inline int ibis::bitvector::const_iterator::operator==
01104 (const ibis::bitvector::const_iterator& rhs) const throw () {
01105     return (it == rhs.it);
01106 }
01107 
01108 // increment by one
01109 inline ibis::bitvector::const_iterator&
01110 ibis::bitvector::const_iterator::operator++() {
01111 #if defined(DEBUG) && DEBUG + 0 > 1
01112     if (it==0 || end==0 || it>end)
01113         throw "bitvector::const_iterator not formed correctly.";
01114 #endif
01115     if (ind+1<nbits) {++ind;}
01116     else {++ it; decodeWord();}
01117     return *this;
01118 }
01119 
01120 // decrement by one
01121 inline ibis::bitvector::const_iterator&
01122 ibis::bitvector::const_iterator::operator--() {
01123 #if defined(DEBUG) && DEBUG + 0 > 1
01124     if (it==0 || end==0 || it>end)
01125         throw "bitvector::const_iterator not formed correctly.";
01126 #endif
01127     if (ind) -- ind;
01128     else {
01129         if (it <= end) -- it;
01130         else if (active->nbits) it = end;
01131         else it = end - 1;
01132         decodeWord();
01133         if (nbits) ind = nbits - 1;
01134     }
01135     return *this;
01136 }
01137 
01138 inline ibis::bitvector::indexSet ibis::bitvector::firstIndexSet() const {
01139     indexSet is;
01140     if (m_vec.end() > m_vec.begin()) {
01141         is.it = m_vec.begin() - 1;
01142         is.end = m_vec.end();
01143     }
01144     else {
01145         is.it = 0;
01146         is.end = 0;
01147     }
01148     is.active = &active;
01149     is.ind[0] = static_cast<word_t>(-1);
01150     is.nind = 0;
01151     ++is;
01152     return is;
01153 } // ibis::bitvector::firstIndexSet;
01154 
01158 inline double ibis::bitvector::randomSize(word_t nb, word_t nc) {
01159     double sz = 0.0;
01160     if (nb > 0 && nb >= nc && nb > MAXBITS) {
01161         const double den = static_cast<double>(nc) /
01162             static_cast<double>(nb);
01163         const word_t nw = nb / MAXBITS;
01164         sz = 3.0 + nw - nw * (pow(1.0-den, static_cast<int>(2*MAXBITS))
01165                               + pow(den, static_cast<int>(2*MAXBITS)));
01166     }
01167     return sz*sizeof(word_t);
01168 } // ibis::bitvector::randomSize
01169 
01174 inline double ibis::bitvector::markovSize(word_t nb, word_t nc, double f) {
01175     double sz = 0.0;
01176     if (nb > 0 && nb >= nc && nb > MAXBITS) {
01177         const double den = static_cast<double>(nc) /
01178             static_cast<double>(nb);
01179         const word_t nw = nb / MAXBITS;
01180         if ((den <= 0.5 && f > 1.00001) || (den > 0.5 && (1.0-den)*f > den))
01181             sz = ((1.0-den) * pow(1.0-den/((1.0-den)*f),
01182                                   static_cast<int>(2*MAXBITS-3)) +
01183                   den * pow(1.0-1.0/f, static_cast<int>(2*MAXBITS-3)));
01184         else
01185             sz = (pow(1.0-den, static_cast<int>(2*MAXBITS)) +
01186                   pow(den, static_cast<int>(2*MAXBITS)));
01187         sz = 3.0 + nw * (1.0 - sz);
01188     }
01189     return sz*sizeof(word_t);
01190 } // ibis::bitvector::markovSize
01191 
01193 inline void ibis::bitvector::turnOnRawBit(const word_t ind) {
01194     if (ind < nbits) { // in regular words
01195         m_vec[ind / MAXBITS] |= (1 << (SECONDBIT - (ind % MAXBITS)));
01196         nset = 0;  // don't track nset
01197     }
01198     else { // assume to be in the active word
01199         active.val |= (1 << (active.nbits - (ind - nbits) - 1));
01200 #if defined(DEBUG)
01201         if (ind >= nbits + active.nbits ||
01202             active.val >= (1U << active.nbits)) {
01203             LOGGER(ibis::gVerbose >= 0)
01204                 << "bitvector::turnOnRawBit(" << ind
01205                 << ") found a bad active word";
01206         }
01207 #endif
01208     }
01209 } // ibis::bitvector::turnOnRawBit
01210 
01211 std::ostream& operator<<(std::ostream&, const ibis::bitvector&);
01212 #endif // __BITVECTOR_H

Make It A Bit Faster
Contact us
Disclaimers
FastBit source code
FastBit mailing list archive