bitvector64.h
Go to the documentation of this file.
00001 // $Id$
00002 //      Author: John Wu <John.Wu at ACM.org>
00003 //              Lawrence Berkeley National Laboratory
00004 //      Copyright 2000-2012 the Regents of the University of California
00005 #ifndef BITVECTOR64_H
00006 #define BITVECTOR64_H
00007 
00008 
00009 
00010 #include "array_t.h"    // alternative to std::vector
00011 
00012 #include <stdio.h>      // sprintf
00013 
00014 
00054 class ibis::bitvector64 {
00055 public:
00056     typedef uint64_t word_t;
00057 
00058     // constructors of bitvector64 class
00059     bitvector64() : nbits(0), nset(0), active(), m_vec() {};
00060     ~bitvector64() {clear();};
00061     bitvector64(const bitvector64& bv) : nbits(bv.nbits), nset(bv.nset),
00062         active(bv.active), m_vec(bv.m_vec) {};
00063     bitvector64(const array_t<word_t>& arr);
00064     bitvector64(const char* file); 
00065     inline bitvector64& operator=(const bitvector64& bv); 
00066     inline bitvector64& copy(const bitvector64& bv);      
00067     inline bitvector64& swap(bitvector64& bv);
00068     // use bv to replace part of the existing value, match the ith bit with
00069     // the first of bv, return reference to self
00070     //bitvector64& copy(const word_t i, const bitvector64& bv);
00073     void setBit(const word_t i, int val);
00075     void erase(word_t i, word_t j);
00076 
00079     void set(int val, word_t n);
00081     void clear() {nbits = 0; nset = 0; active.reset(); m_vec.clear();}
00082 
00083     bitvector64& operator+=(const bitvector64& bv); 
00084     inline bitvector64& operator+=(int b);      
00085     void appendWord(word_t w);                  
00086 
00087     inline void appendFill(int val, word_t n);
00088 
00090     int operator==(const bitvector64& rhs) const;
00091 
00093     void flip();
00095     void operator&=(const bitvector64& rhs);
00097     bitvector64* operator&(const bitvector64&) const;
00099     void operator|=(const bitvector64& rhs);
00101     bitvector64* operator|(const bitvector64&) const;
00103     void operator^=(const bitvector64& rhs);
00105     bitvector64* operator^(const bitvector64&) const;
00107     void operator-=(const bitvector64& rhs);
00110     bitvector64* operator-(const bitvector64&) const;
00111 
00112     // I/O functions.
00115     void read(const char *fn);
00117     void write(const char *fn) const;
00118     void write(FILE* fptr) const;
00120     void write(array_t<word_t>& arr) const;
00121 
00122     void compress();    
00123     void decompress();  
00124 
00125     word_t compressible() const;
00128     bool isCompressed() const {return (m_vec.size()*MAXBITS < nbits);}
00129 
00131     word_t cnt() const {
00132         if (nset==0) do_cnt(); return (nset+cnt_ones(active.val));
00133     };
00134 
00136     word_t size() const throw() {return (nbits+active.nbits);};
00138     inline word_t numFillWords() const;
00140     word_t bytes() const throw() {
00141         return (m_vec.size()*sizeof(word_t) + sizeof(bitvector64));
00142     };
00145     word_t getSerialSize() const throw() {
00146         return (m_vec.size() + 1 + (active.nbits>0));
00147     };
00149     static unsigned bitsPerLiteral() {return MAXBITS;}
00153     inline static double randomSize(word_t nb, word_t nc);
00158     inline static double markovSize(word_t nb, word_t nc, double f);
00161     static double clusteringFactor(word_t nb, word_t nc, word_t nw);
00162 
00168     void adjustSize(word_t nv, word_t nt);
00169     std::ostream& print(std::ostream &) const; 
00170 
00172     class iterator;
00173     inline iterator end();
00174     inline iterator begin();
00175 
00177     class const_iterator;
00178     inline const_iterator end() const;
00179     inline const_iterator begin() const;
00180 
00182     class indexSet;
00183     inline indexSet firstIndexSet() const;
00184 
00185     // give accesses to some friends
00186     friend class indexSet;
00187     friend class iterator;
00188     friend class const_iterator;
00189 
00190 protected:
00191     inline bool all0s() const;
00192     inline bool all1s() const;
00193 
00194 private:
00195     // static members, i.e., constants to be used internally
00196     static const unsigned MAXBITS;
00197     static const unsigned SECONDBIT;
00198     static const word_t FILLBIT;
00199     static const word_t HEADER0;
00200     static const word_t HEADER1;
00201     static const word_t ALLONES;
00202     static const word_t MAXCNT;
00203 
00210     struct active_word {
00211         word_t val;     // the value
00212         word_t nbits;   // total number of bits
00213 
00214         active_word() : val(0), nbits(0) {};
00215         void reset() {val = 0; nbits = 0;};
00216         int is_full() const {return (nbits >= MAXBITS);};
00217         void append(int b) { // append a single bit
00218             val <<= 1; nbits ++; val += b;
00219         };
00220     }; // struct active_word
00221 
00224     struct run {
00225         int isFill;
00226         int fillBit;
00227         word_t nWords;
00228         array_t<word_t>::const_iterator it;
00229         run() : isFill(0), fillBit(0), nWords(0), it(0) {};
00230         void decode() { 
00231             fillBit = (*it > HEADER1);
00232             if (*it > ALLONES) {
00233                 nWords = (*it & MAXCNT);
00234                 isFill = 1;
00235             }
00236             else {
00237                 nWords = 1;
00238                 isFill = 0;
00239             }
00240         };
00243         void operator--() {
00244             if (nWords > 1) {--nWords;}
00245             else {++ it; nWords = 0;}
00246         };
00249         void operator-=(word_t len) {
00250             while (len > 0) {
00251                 if (nWords == 0) decode();
00252                 if (isFill) {
00253                     if (nWords > len) {nWords -= len; len = 0;}
00254                     else if (nWords == len) {nWords = 0; len = 0; ++ it;}
00255                     else {len -= nWords; ++ it; nWords = 0;}
00256                 }
00257                 else {-- len; ++ it; nWords = 0;}
00258             }
00259         };
00260     };
00261     friend struct run;
00262     friend struct active_word;
00263 
00264     // member variables of bitvector64 class
00265     word_t nbits;       
00266     mutable word_t nset;
00267     active_word active; 
00268     array_t<word_t> m_vec;      
00269 
00270     // private functions of bitvector64 class
00271     // The following three functions all performs or operation, _c2 and _c1
00272     // generate compressed solutions, but _c0, _d1, and _d2 generate
00273     // uncompressed solutions.
00274     // or_c2 assumes there are compressed word in both operands
00275     // or_c1 *this may contain compressed word, but not rhs
00276     // or_c0 assumes both operands are not compressed
00277     // or_d1 *this contains no compressed word and is overwritten with the
00278     //       result
00279     // or_d2 both *this and rhs are compressed, but res will not be compressed
00280     void or_c2(const bitvector64& rhs, bitvector64& res) const;
00281     void or_c1(const bitvector64& rhs, bitvector64& res) const;
00282     void or_c0(const bitvector64& rhs);
00283     void or_d1(const bitvector64& rhs);
00284     void or_d2(const bitvector64& rhs, bitvector64& res) const;
00285     void and_c2(const bitvector64& rhs, bitvector64& res) const;
00286     void and_c1(const bitvector64& rhs, bitvector64& res) const;
00287     void and_c0(const bitvector64& rhs);
00288     void and_d1(const bitvector64& rhs);
00289     void and_d2(const bitvector64& rhs, bitvector64& res) const;
00290     void xor_c2(const bitvector64& rhs, bitvector64& res) const;
00291     void xor_c1(const bitvector64& rhs, bitvector64& res) const;
00292     void xor_c0(const bitvector64& rhs);
00293     void xor_d1(const bitvector64& rhs);
00294     void xor_d2(const bitvector64& rhs, bitvector64& res) const;
00295     void minus_c2(const bitvector64& rhs, bitvector64& res) const;
00296     void minus_c1(const bitvector64& rhs, bitvector64& res) const;
00297     void minus_c1x(const bitvector64& rhs, bitvector64& res) const;
00298     void minus_c0(const bitvector64& rhs);
00299     void minus_d1(const bitvector64& rhs);
00300     void minus_d2(const bitvector64& rhs, bitvector64& res) const;
00301     inline void copy_runs(run& it, word_t& nw); // copy nw words
00302     inline void copy_runsn(run& it, word_t& nw); // copy nw words and negate
00303     inline void copy_fill(array_t<word_t>::iterator& jt, run& it);
00304     inline void copy_runs(array_t<word_t>::iterator& jt, run& it,
00305                           word_t& nw);
00306     inline void copy_runsn(array_t<word_t>::iterator& jt, run& it,
00307                            word_t& nw);
00308     void decompress(array_t<word_t>& tmp) const;
00309     void copy_comp(array_t<word_t>& tmp) const;
00310     inline void append_active();
00311     inline void append_counter(int val, word_t cnt);
00312     inline unsigned cnt_ones(word_t) const; // number of 1s in a literal word
00313     inline word_t cnt_bits(word_t) const; // number of bits in a word
00314     word_t do_cnt() const; // count the number of bits and number of ones
00315 }; // end class bitvector64
00316 
00326 class ibis::bitvector64::indexSet {
00327 public:
00328     // let the compiler define most of the canonical functions
00329 
00330     // allow bitvector64::firstIndexSet() to access private variables of this
00331     // class
00332     friend indexSet ibis::bitvector64::firstIndexSet() const;
00333 
00334     //int operator!=(const indexSet& rhs) const {return (it != rhs.it);};
00335     bool isRange() const {return (nind>=ibis::bitvector64::MAXBITS);}
00336     const word_t* indices() const {return ind;};
00337     word_t nIndices() const {return nind;}
00338     indexSet& operator++();
00339 
00340 private:
00341     array_t<word_t>::const_iterator it;
00342     array_t<word_t>::const_iterator end;
00343     const active_word* active; // points back to the active word
00344     word_t nind; // number of indices
00345     word_t ind[64];
00346 }; // class ibis::bitvector64::indexSet
00347 
00349 class ibis::bitvector64::const_iterator {
00350 public:
00351     inline bool operator*() const;
00352     inline int operator!=(const const_iterator& rhs) const throw ();
00353     inline int operator==(const const_iterator& rhs) const throw ();
00354 
00355     inline const_iterator& operator++();
00356     inline const_iterator& operator--();
00357     const_iterator& operator+=(int64_t incr);
00358 
00359 private:
00360     ibis::bitvector64::word_t   compressed;
00361     ibis::bitvector64::word_t   ind;
00362     ibis::bitvector64::word_t   nbits;
00363     ibis::bitvector64::word_t   literalvalue;
00364     int                         fillbit;
00365     const active_word*          active;
00366     array_t<word_t>::const_iterator end;
00367     array_t<word_t>::const_iterator begin;
00368     array_t<word_t>::const_iterator it;
00369 
00370     void decodeWord();
00371 
00372     // give three functions of bitvector64 access to private variables
00373     friend void ibis::bitvector64::erase(word_t i, word_t j);
00374     friend const_iterator ibis::bitvector64::begin() const;
00375     friend const_iterator ibis::bitvector64::end() const;
00376     friend class ibis::bitvector64::iterator;
00377 }; // end class ibis::bitvector64::const_iterator
00378 
00386 class ibis::bitvector64::iterator {
00387 public:
00388     inline bool operator*() const; // still can not modify this
00389     inline int operator!=(const const_iterator& rhs) const throw ();
00390     inline int operator==(const const_iterator& rhs) const throw ();
00391     inline int operator!=(const iterator& rhs) const throw ();
00392     inline int operator==(const iterator& rhs) const throw ();
00393 
00394     inline iterator& operator++();
00395     inline iterator& operator--();
00396     iterator& operator+=(int64_t incr);
00397     iterator& operator=(int val);
00398 
00399 private:
00400     ibis::bitvector64::word_t   compressed;
00401     ibis::bitvector64::word_t   ind;
00402     ibis::bitvector64::word_t   nbits;
00403     ibis::bitvector64::word_t   literalvalue;
00404     int                         fillbit;
00405     ibis::bitvector64*          bitv;
00406     active_word*                active;
00407     array_t<word_t>*            vec;
00408     array_t<word_t>::iterator   it;
00409 
00410     void decodeWord();
00411 
00412     friend iterator ibis::bitvector64::begin();
00413     friend iterator ibis::bitvector64::end();
00414 }; // end class ibis::bitvector64::iterator
00415 
00417 inline bool ibis::bitvector64::all0s() const {
00418     if (m_vec.empty()) {
00419         return true;
00420     }
00421     else if (m_vec.size() == 1) {
00422         return (m_vec[0] == 0 || (m_vec[0] >= HEADER0 && m_vec[0] < HEADER1));
00423     }
00424     else {
00425         return false;
00426     }
00427 } // ibis::bitvector64::all0s
00428 
00430 inline bool ibis::bitvector64::all1s() const {
00431     if (m_vec.size() == 1) {
00432         return (m_vec[0] == ALLONES || (m_vec[0] > HEADER1));
00433     }
00434     else {
00435         return false;
00436     }
00437 } // ibis::bitvector64::all1s()
00438 
00440 inline ibis::bitvector64::word_t
00441 ibis::bitvector64::cnt_bits(ibis::bitvector64::word_t val) const {
00442     return  ((val>ALLONES) ? ((val&MAXCNT)*MAXBITS) : MAXBITS);
00443 }
00444 
00446 inline unsigned 
00447 ibis::bitvector64::cnt_ones(ibis::bitvector64::word_t val) const {
00448     // number of 1 bits in a value between 0 and 255
00449     static const unsigned table[256] = {
00450         0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
00451         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00452         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00453         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00454         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00455         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00456         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00457         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00458         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00459         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00460         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00461         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00462         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00463         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00464         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00465         4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
00466     //assert(8 == sizeof(word_t));
00467     return table[val&0xFF] + table[(val>>8)&0xFF] +
00468         table[(val>>16)&0xFF] + table[(val>>24)&0xFF] +
00469         table[(val>>32)&0xFF] + table[(val>>40)&0xFF] +
00470         table[(val>>48)&0xFF] + table[val>>56];
00471 } // inline unsigned ibis::bitvector64::cnt_ones(word_t) const
00472 
00473 inline ibis::bitvector64::word_t
00474 ibis::bitvector64::numFillWords() const {
00475     word_t cnt=0;
00476     array_t<word_t>::const_iterator it = m_vec.begin();
00477     while (it != m_vec.end()) {
00478         cnt += (*it >> ibis::bitvector64::MAXBITS);
00479         it++;
00480     }
00481     return cnt;
00482 } // ibis::bitvector64::numFillWords
00483 
00484 // The assignment operator.  Wanted to use shallow copy for efficiency
00485 // consideration, but SHALLOW copy causes unexpected problem in test
00486 // program bitty.cpp change to deep copy
00487 inline ibis::bitvector64&
00488 ibis::bitvector64::operator=(const ibis::bitvector64& bv) {
00489     nbits = bv.nbits; nset = bv.nset; active = bv.active;
00490     m_vec.deepCopy(bv.m_vec);
00491     return *this;
00492 }
00493 
00494 // deep copy
00495 inline ibis::bitvector64& 
00496 ibis::bitvector64::copy(const ibis::bitvector64& bv) {
00497     nbits = bv.nbits; nset = bv.nset; active = bv.active;
00498     m_vec.deepCopy(bv.m_vec);
00499     return *this;
00500 }
00501 
00502 inline ibis::bitvector64&
00503 ibis::bitvector64::swap(bitvector64& bv) {
00504     word_t tmp;
00505     tmp = bv.nbits; bv.nbits = nbits; nbits = tmp;
00506     tmp = bv.nset; bv.nset = nset; nset = tmp;
00507     active_word atmp = bv.active;
00508     bv.active = active; active = atmp;
00509     m_vec.swap(bv.m_vec);
00510     return *this;
00511 }
00512 
00513 // append_active assumes that nbits == MAXBITS and refers to MAXBITS instead
00514 // of nbits
00515 inline void ibis::bitvector64::append_active() {
00516     if (m_vec.empty()) {
00517         m_vec.push_back(active.val);
00518     }
00519     else if (active.val == 0) {// incoming word is zero
00520         if (m_vec.back() == 0) {
00521             m_vec.back() = (HEADER0 + 2);
00522         }
00523         else if (m_vec.back() >= HEADER0 && m_vec.back() < HEADER1) {
00524             ++ m_vec.back();
00525         }
00526         else {
00527             m_vec.push_back(active.val);
00528         }
00529     }
00530     else if (active.val == ALLONES) {// incoming word is allones
00531         if (m_vec.back() == ALLONES) {
00532             m_vec.back() = (HEADER1 | 2);
00533         }
00534         else if (m_vec.back() >= HEADER1) {
00535             ++m_vec.back();
00536         }
00537         else {
00538             m_vec.push_back(active.val);
00539         }
00540     }
00541     else { // incoming word contains a mixture of bits
00542         m_vec.push_back(active.val);
00543     }
00544     nbits += MAXBITS;
00545     active.reset();
00546     nset = 0;
00547 } // void ibis::bitvector64::append_active()
00548 
00549 // a private function to append a single counter when the active word is
00550 // empty cnt is assumed to be multiples of MAXBITS (more than MAXBITS)
00551 inline void ibis::bitvector64::append_counter(int val, word_t cnt) {
00552     word_t head = 2 + val;
00553     word_t w = (head << SECONDBIT) + cnt;
00554     nbits += cnt*MAXBITS;
00555     if (m_vec.empty()) {
00556         m_vec.push_back(w);
00557     }
00558     else if ((m_vec.back()>>SECONDBIT) == head) {
00559         m_vec.back() += cnt;
00560     }
00561     else if ((m_vec.back()==ALLONES) && head==3) {
00562         m_vec.back() = w + 1;
00563     }
00564     else if ((m_vec.back() == 0) && head==2) {
00565         m_vec.back() = w + 1;
00566     }
00567     else {
00568         m_vec.push_back(w);
00569     }
00570 } // void append_counter()
00571 
00572 // append a single bit
00573 inline ibis::bitvector64& ibis::bitvector64::operator+=(int b) {
00574     active.append(b);
00575     if (active.is_full()) append_active();
00576     return *this;
00577 } // ibis::bitvector64& ibis::bitvector64::operator+=(int b)
00578 
00579 // append n bits of val (n may be arbitrary integer, value must be either 0
00580 // or 1)
00581 inline void ibis::bitvector64::appendFill(int val,
00582                                           ibis::bitvector64::word_t n) {
00583     if (active.nbits > 0) {
00584         word_t tmp = (MAXBITS - active.nbits);
00585         if (tmp > n) tmp = n;
00586         active.nbits += tmp;
00587         active.val <<= tmp;
00588         n -= tmp;
00589         if (val)
00590             active.val |= (static_cast<word_t>(1)<<tmp) - 1;
00591         if (active.nbits >= MAXBITS) append_active();
00592     }
00593     if (n >= MAXBITS) {
00594         word_t cnt = n / MAXBITS;
00595         if (cnt > 1)
00596             append_counter(val, cnt);
00597         else if (val) {
00598             active.val = ALLONES;
00599             append_active();
00600         }
00601         else {
00602             active.val = 0;
00603             append_active();
00604         }
00605         n -= cnt * MAXBITS;
00606     }
00607     if (n > 0) {
00608         active.nbits = n;
00609         active.val = val*((static_cast<word_t>(1)<<n)-1);
00610     }
00611 } // ibis::bitvector64::appendFill(val, n)
00612 
00613 // append nw words starting from 'it' to the current bit vector -- assume
00614 // active is empty
00615 inline void ibis::bitvector64::copy_runs(run& it, word_t& nw) {
00616     // deal with the first word -- need to attach it to the last word in m_vec
00617     if (it.isFill) {
00618         append_counter(it.fillBit, it.nWords);
00619         nw -= it.nWords;
00620     }
00621     else {
00622         active.val = *(it.it);
00623         append_active();
00624         -- nw;
00625     }
00626     ++ it.it; // advance to the next word
00627     //if (nw == 0) {it.nWords = 0; return;}
00628     it.decode();
00629     nset = 0;
00630     nbits += MAXBITS * nw;
00631     while (nw >= it.nWords && nw > 0) { // only need to copy the word
00632         m_vec.push_back(*(it.it));
00633         nw -= it.nWords;
00634         ++ it.it; // advance to the next word
00635         it.decode();
00636     }
00637     nbits -= MAXBITS * nw;
00638 } // ibis::bitvector64::copy_runs
00639 
00640 // append nw words starting from it to the current bit vector -- assume
00641 // active is empty
00642 inline void ibis::bitvector64::copy_runsn(run& it, word_t& nw) {
00643     // deal with the first word -- need to attach it to the last word in m_vec
00644     if (it.isFill) {
00645         append_counter(!it.fillBit, it.nWords);
00646         nw -= it.nWords;
00647     }
00648     else {
00649         active.val = ALLONES ^ *(it.it);
00650         append_active();
00651         -- nw;
00652     }
00653     ++ it.it; // advance to the next word
00654     //if (nw == 0) {it.nWords = 0; return;}
00655     it.decode();
00656     nset = 0;
00657     nbits += MAXBITS * nw;
00658     while (nw >= it.nWords && nw > 0) { // only need to copy the word
00659         m_vec.push_back((it.isFill?FILLBIT:ALLONES) ^ *(it.it));
00660         nw -= it.nWords;
00661         ++ it.it; // advance to the next word
00662         it.decode();
00663     }
00664     nbits -= MAXBITS * nw;
00665 } // ibis::bitvector64::copy_runsn
00666 
00667 // copy the fill in "run it" as literal words
00668 inline void ibis::bitvector64::copy_fill
00669 (array_t<ibis::bitvector64::word_t>::iterator& jt, run& it) {
00670     const array_t<word_t>::iterator iend = jt + it.nWords;
00671     if (it.fillBit == 0) {
00672         switch (it.nWords) {
00673         case 0: break;
00674         case 1:
00675             *jt = 0; ++jt; break;
00676         case 2:
00677             *jt = 0; ++jt; *jt = 0; ++jt; break;
00678         case 3:
00679             *jt = 0; ++jt; *jt = 0; ++jt; *jt = 0; ++jt; break;
00680         default:
00681             while (jt < iend) {
00682                 *jt = 0;
00683                 ++ jt;
00684             }
00685             break;
00686         }
00687     }
00688     else {
00689         switch (it.nWords) {
00690         case 0: break;
00691         case 1:
00692             *jt = ALLONES; ++jt; break;
00693         case 2:
00694             *jt = ALLONES; ++jt; *jt = ALLONES; ++jt; break;
00695         case 3:
00696             *jt = ALLONES; ++jt; *jt = ALLONES; ++jt; *jt = ALLONES; ++jt;
00697             break;
00698         default:
00699             while (jt < iend) {
00700                 *jt = ALLONES;
00701                 ++ jt;
00702             }
00703             break;
00704         }
00705     }
00706     it.nWords = 0;
00707     ++ it.it;
00708 } // ibis::bitvector64::copy_fill
00709 
00710 // copy the next nw words (nw X MAXBITS bits) starting with run it
00711 // to an array_t as uncompressed words
00712 inline void ibis::bitvector64::copy_runs
00713 (array_t<ibis::bitvector64::word_t>::iterator& jt, run& it, word_t& nw) {
00714     while (nw >= it.nWords && nw > 0) { // only need to copy the word
00715         if (it.isFill) {
00716             const array_t<word_t>::iterator iend = jt + it.nWords;
00717             if (it.fillBit == 0) {
00718                 while (jt < iend) {
00719                     *jt = 0;
00720                     ++ jt;
00721                 }
00722             }
00723             else {
00724                 while (jt < iend) {
00725                     *jt = ALLONES;
00726                     ++ jt;
00727                 }
00728             }
00729             nw -= it.nWords;
00730         }
00731         else {
00732             *jt = *(it.it);
00733             ++ jt;
00734             -- nw;
00735         }
00736         ++ it.it; // advance to the next word
00737         it.decode();
00738     }
00739 } // ibis::bitvector64::copy_runs
00740 
00741 // copy the complements of the next nw words (nw X MAXBITS bits)
00742 // starting with "run it" as uncompressed words
00743 inline void ibis::bitvector64::copy_runsn
00744 (array_t<ibis::bitvector64::word_t>::iterator& jt, run& it, word_t& nw) {
00745     while (nw >= it.nWords && nw > 0) { // only need to copy the word
00746         if (it.isFill) {
00747             const array_t<word_t>::iterator iend = jt + it.nWords;
00748             if (it.fillBit == 0) {
00749                 while (jt < iend) {
00750                     *jt = ALLONES;
00751                     ++ jt;
00752                 }
00753             }
00754             else {
00755                 while (jt < iend) {
00756                     *jt = 0;
00757                     ++ jt;
00758                 }
00759             }
00760             nw -= it.nWords;
00761         }
00762         else {
00763             *jt = *(it.it) ^ ALLONES;
00764             ++ jt;
00765             -- nw;
00766         }
00767         ++ it.it; // advance to the next word
00768         it.decode();
00769     }
00770 } // ibis::bitvector64::copy_runsn
00771 
00772 inline ibis::bitvector64::iterator ibis::bitvector64::begin() {
00773     iterator it;
00774     it.it     = m_vec.begin();
00775     it.vec    = &m_vec;
00776     it.bitv   = this;
00777     it.active = &active;
00778     it.decodeWord();
00779     return it;
00780 } // ibis::bitvector64::begin()
00781 
00782 inline ibis::bitvector64::iterator ibis::bitvector64::end() {
00783     iterator it;
00784     it.ind              = 0;
00785     it.compressed       = 0;
00786     it.nbits            = 0;
00787     it.literalvalue     = 0;
00788     it.fillbit          = 0;
00789     it.it     = m_vec.end() + 1;
00790     it.vec    = &m_vec;
00791     it.bitv   = this;
00792     it.active = &active;
00793     return it;
00794 } // ibis::bitvector64::end()
00795 
00796 inline ibis::bitvector64::const_iterator ibis::bitvector64::begin() const {
00797     const_iterator it;
00798     it.it     = m_vec.begin();
00799     it.begin  = m_vec.begin();
00800     it.end    = m_vec.end();
00801     it.active = &active;
00802     it.decodeWord();
00803     return it;
00804 } // ibis::bitvector64::begin()
00805 
00806 // dereference -- no error checking
00807 inline bool ibis::bitvector64::iterator::operator*() const {
00808 #if defined(DEBUG) && DEBUG + 0 > 1
00809     if (vec==0 || it<vec->begin() ||  it>vec->end())
00810         throw "bitvector64::const_iterator not initialized correctly.";
00811 #endif
00812     if (compressed != 0)
00813         return (fillbit != 0);
00814     else
00815         return ((word_t)1 & (literalvalue >> (bitvector64::SECONDBIT - ind)));
00816 }
00817 
00818 // comparison only based on the iterator
00819 inline int ibis::bitvector64::iterator::operator!=
00820 (const ibis::bitvector64::const_iterator& rhs) const throw () {
00821     return (it != rhs.it);
00822 }
00823 inline int ibis::bitvector64::iterator::operator==
00824 (const ibis::bitvector64::const_iterator& rhs) const throw () {
00825     return (it == rhs.it);
00826 }
00827 inline int ibis::bitvector64::iterator::operator!=
00828 (const ibis::bitvector64::iterator& rhs) const throw () {
00829     return (it != rhs.it);
00830 }
00831 inline int ibis::bitvector64::iterator::operator==
00832 (const ibis::bitvector64::iterator& rhs) const throw () {
00833     return (it == rhs.it);
00834 }
00835 
00836 // increment by one
00837 inline ibis::bitvector64::iterator& ibis::bitvector64::iterator::operator++() {
00838 #if defined(DEBUG) && DEBUG + 0 > 1
00839     if (vec==0 || it<vec->begin() || it>vec->end())
00840         throw "bitvector64::iterator not formed correctly.";
00841 #endif
00842     if (ind+1<nbits) {++ind;}
00843     else {++ it; decodeWord();}
00844     return *this;
00845 }
00846 
00847 // decrement by one
00848 inline ibis::bitvector64::iterator& ibis::bitvector64::iterator::operator--() {
00849 #if defined(DEBUG) && DEBUG + 0 > 1
00850     if (vec==0 || it<vec->begin() || it>vec->end()+1)
00851         throw "bitvector64::iterator not formed correctly.";
00852 #endif
00853     if (ind) -- ind;
00854     else {
00855         if (it <= vec->end()) -- it;
00856         else if (active->nbits) it = vec->end();
00857         else it = vec->end() - 1;
00858         decodeWord();
00859         if (nbits) ind = nbits - 1;
00860     }
00861     return *this;
00862 }
00863 
00864 inline ibis::bitvector64::const_iterator ibis::bitvector64::end() const {
00865     const_iterator it;
00866     it.ind              = 0;
00867     it.compressed       = 0;
00868     it.nbits            = 0;
00869     it.literalvalue     = 0;
00870     it.fillbit          = 0;
00871     it.it    = m_vec.end() + 1;
00872     it.begin = m_vec.begin();
00873     it.end   = m_vec.end();
00874     it.active = &active;
00875     return it;
00876 } // ibis::bitvector64::end()
00877 
00878 // dereference -- no error checking
00879 inline bool ibis::bitvector64::const_iterator::operator*() const {
00880 #if defined(DEBUG) && DEBUG + 0 > 1
00881     if (it==0 || end==0 || it>end || nbits<=ind)
00882         throw "bitvector64::const_iterator not initialized correctly.";
00883 #endif
00884     if (compressed != 0)
00885         return (fillbit != 0);
00886     else
00887         return ((word_t)1 & (literalvalue >> (bitvector64::SECONDBIT - ind)));
00888 }
00889 
00890 // comparison only based on the iterator
00891 inline int ibis::bitvector64::const_iterator::operator!=
00892 (const ibis::bitvector64::const_iterator& rhs) const throw (){
00893     return (it != rhs.it);
00894 }
00895 inline int ibis::bitvector64::const_iterator::operator==
00896 (const ibis::bitvector64::const_iterator& rhs) const throw () {
00897     return (it == rhs.it);
00898 }
00899 
00900 // increment by one
00901 inline ibis::bitvector64::const_iterator&
00902 ibis::bitvector64::const_iterator::operator++() {
00903 #if defined(DEBUG) && DEBUG + 0 > 1
00904     if (it==0 || end==0 || it>end)
00905         throw "bitvector64::const_iterator not formed correctly.";
00906 #endif
00907     if (ind+1<nbits) {++ind;}
00908     else {++ it; decodeWord();}
00909     return *this;
00910 }
00911 
00912 // decrement by one
00913 inline ibis::bitvector64::const_iterator&
00914 ibis::bitvector64::const_iterator::operator--() {
00915 #if defined(DEBUG) && DEBUG + 0 > 1
00916     if (it==0 || end==0 || it>end)
00917         throw "bitvector64::const_iterator not formed correctly.";
00918 #endif
00919     if (ind) -- ind;
00920     else {
00921         if (it <= end) -- it;
00922         else if (active->nbits) it = end;
00923         else it = end - 1;
00924         decodeWord();
00925         if (nbits) ind = nbits - 1;
00926     }
00927     return *this;
00928 }
00929 
00930 inline ibis::bitvector64::indexSet ibis::bitvector64::firstIndexSet() const {
00931     indexSet is;
00932     if (m_vec.end() > m_vec.begin()) {
00933         is.it = m_vec.begin() - 1;
00934         is.end = m_vec.end();
00935     }
00936     else {
00937         is.it = 0;
00938         is.end = 0;
00939     }
00940     is.active = &active;
00941     is.ind[0] = static_cast<word_t>(-1);
00942     is.nind = 0;
00943     ++is;
00944     return is;
00945 } // end ibis::bitvector64::firstIndexSet;
00946 
00947 inline double ibis::bitvector64::randomSize(word_t nb, word_t nc) {
00948     double sz = 0.0;
00949     if (nb > 0 && nb >= nc) {
00950         const double den = static_cast<double>(nc) /
00951             static_cast<double>(nb);
00952         const word_t nw = (nb > SECONDBIT ? nb / SECONDBIT - 1 : 0);
00953         sz = 3.0 + nw - nw *
00954             (pow(1.0-den, static_cast<int>(2*SECONDBIT)) +
00955              pow(den, static_cast<int>(2*SECONDBIT)));
00956     }
00957     return sz*sizeof(word_t);
00958 } // end ibis::bitvector64::randomSize
00959 
00960 inline double
00961 ibis::bitvector64::markovSize(word_t nb, word_t nc, double f) {
00962     double sz = 0.0;
00963     if (nb > 0 && nb >= nc) {
00964         const double den = static_cast<double>(nc) /
00965             static_cast<double>(nb);
00966         const word_t nw = (nb > SECONDBIT ? nb / SECONDBIT - 1 : 0);
00967         if ((den <= 0.5 && f > 1.0) || (den > 0.5 && (1.0-den)*f > den))
00968             sz = ((1.0-den) * pow(1.0-den/((1.0-den)*f),
00969                                   static_cast<int>(2*MAXBITS-3)) +
00970                   den * pow(1.0-1.0/f, static_cast<int>(2*MAXBITS-3)));
00971         else
00972             sz = (pow(1.0-den, static_cast<int>(2*SECONDBIT)) +
00973                   pow(den, static_cast<int>(2*SECONDBIT)));
00974         sz = 3.0 + nw * (1.0 - sz);
00975     }
00976     return sz*sizeof(word_t);
00977 } // end ibis::bitvector64::markovSize
00978 
00979 std::ostream& operator<<(std::ostream&, const ibis::bitvector64&);
00980 #endif // __BITVECTOR64_H

Make It A Bit Faster
Contact us
Disclaimers
FastBit source code
FastBit mailing list archive