bitvector64.h
Go to the documentation of this file.
00001 // $Id$
00002 //      Author: John Wu <John.Wu at ACM.org>
00003 //              Lawrence Berkeley National Laboratory
00004 //      Copyright 2000-2011 the Regents of the University of California
00005 #ifndef BITVECTOR64_H
00006 #define BITVECTOR64_H
00007 
00008 
00009 
00010 #include "array_t.h"    // alternative to std::vector
00011 
00012 #include <stdio.h>      // sprintf
00013 #include <limits.h>     // INT_MAX
00014 #include <iomanip>      // setw()
00015 
00016 
00056 class ibis::bitvector64 {
00057 public:
00058     typedef uint64_t word_t;
00059 
00060     // constructors of bitvector64 class
00061     bitvector64() : nbits(0), nset(0), active(), m_vec() {};
00062     ~bitvector64() {clear();};
00063     bitvector64(const bitvector64& bv) : nbits(bv.nbits), nset(bv.nset),
00064         active(bv.active), m_vec(bv.m_vec) {};
00065     bitvector64(const array_t<word_t>& arr);
00066     bitvector64(const char* file); 
00067     inline bitvector64& operator=(const bitvector64& bv); 
00068     inline bitvector64& copy(const bitvector64& bv);      
00069     inline bitvector64& swap(bitvector64& bv);
00070     // use bv to replace part of the existing value, match the ith bit with
00071     // the first of bv, return reference to self
00072     //bitvector64& copy(const word_t i, const bitvector64& bv);
00075     void setBit(const word_t i, int val);
00077     void erase(word_t i, word_t j);
00078 
00081     void set(int val, word_t n);
00083     void clear() {nbits = 0; nset = 0; active.reset(); m_vec.clear();}
00084 
00085     bitvector64& operator+=(const bitvector64& bv); 
00086     inline bitvector64& operator+=(int b);      
00087     void appendWord(word_t w);                  
00088 
00089     inline void appendFill(int val, word_t n);
00090 
00092     int operator==(const bitvector64& rhs) const;
00093 
00095     void flip();
00097     void operator&=(const bitvector64& rhs);
00099     bitvector64* operator&(const bitvector64&) const;
00101     void operator|=(const bitvector64& rhs);
00103     bitvector64* operator|(const bitvector64&) const;
00105     void operator^=(const bitvector64& rhs);
00107     bitvector64* operator^(const bitvector64&) const;
00109     void operator-=(const bitvector64& rhs);
00112     bitvector64* operator-(const bitvector64&) const;
00113 
00114     // I/O functions.
00117     void read(const char *fn);
00119     void write(const char *fn) const;
00120     void write(FILE* fptr) const;
00122     void write(array_t<word_t>& arr) const;
00123 
00124     void compress();    
00125     void decompress();  
00126 
00127     word_t compressible() const;
00128 
00130     word_t cnt() const {
00131         if (nset==0) do_cnt(); return (nset+cnt_ones(active.val));
00132     };
00133 
00135     word_t size() const throw() {return (nbits+active.nbits);};
00137     inline word_t numFillWords() const;
00139     word_t bytes() const throw() {
00140         return (m_vec.size()*sizeof(word_t) + sizeof(bitvector64));
00141     };
00144     word_t getSerialSize() const throw() {
00145         return (m_vec.size() + 1 + (active.nbits>0));
00146     };
00148     static unsigned bitsPerLiteral() {return MAXBITS;}
00152     inline static double randomSize(word_t nb, word_t nc);
00157     inline static double markovSize(word_t nb, word_t nc, double f);
00160     static double clusteringFactor(word_t nb, word_t nc, word_t nw);
00161 
00167     void adjustSize(word_t nv, word_t nt);
00168     std::ostream& print(std::ostream &) const; 
00169 
00171     class iterator;
00172     inline iterator end();
00173     inline iterator begin();
00174 
00176     class const_iterator;
00177     inline const_iterator end() const;
00178     inline const_iterator begin() const;
00179 
00181     class indexSet;
00182     inline indexSet firstIndexSet() const;
00183 
00184     // give accesses to some friends
00185     friend class indexSet;
00186     friend class iterator;
00187     friend class const_iterator;
00188 
00189 protected:
00190     bool isCompressed() const {return (m_vec.size()*MAXBITS < nbits);}
00191     inline bool all0s() const;
00192     inline bool all1s() const;
00193 
00194 private:
00195     // static members, i.e., constants to be used internally
00196     static const unsigned MAXBITS;
00197     static const unsigned SECONDBIT;
00198     static const word_t FILLBIT;
00199     static const word_t HEADER0;
00200     static const word_t HEADER1;
00201     static const word_t ALLONES;
00202     static const word_t MAXCNT;
00203 
00210     struct active_word {
00211         word_t val;     // the value
00212         word_t nbits;   // total number of bits
00213 
00214         active_word() : val(0), nbits(0) {};
00215         void reset() {val = 0; nbits = 0;};
00216         int is_full() const {return (nbits >= MAXBITS);};
00217         void append(int b) { // append a single bit
00218             val <<= 1; nbits ++; val += b;
00219         };
00220     }; // struct active_word
00221 
00224     struct run {
00225         int isFill;
00226         int fillBit;
00227         word_t nWords;
00228         array_t<word_t>::const_iterator it;
00229         run() : isFill(0), fillBit(0), nWords(0), it(0) {};
00230         void decode() { 
00231             fillBit = (*it > HEADER1);
00232             if (*it > ALLONES) {
00233                 nWords = (*it & MAXCNT);
00234                 isFill = 1;
00235             }
00236             else {
00237                 nWords = 1;
00238                 isFill = 0;
00239             }
00240         };
00243         void operator--() {
00244             if (nWords > 1) {--nWords;}
00245             else {++ it; nWords = 0;}
00246         };
00249         void operator-=(word_t len) {
00250             while (len > 0) {
00251                 if (nWords == 0) decode();
00252                 if (isFill) {
00253                     if (nWords > len) {nWords -= len; len = 0;}
00254                     else if (nWords == len) {nWords = 0; len = 0; ++ it;}
00255                     else {len -= nWords; ++ it; nWords = 0;}
00256                 }
00257                 else {-- len; ++ it; nWords = 0;}
00258             }
00259         };
00260     };
00261     friend struct run;
00262     friend struct active_word;
00263 
00264     // member variables of bitvector64 class
00265     word_t nbits;       
00266     mutable word_t nset;
00267     active_word active; 
00268     array_t<word_t> m_vec;      
00269 
00270     // private functions of bitvector64 class
00271     // The following three functions all performs or operation, _c2 and _c1
00272     // generate compressed solutions, but _c0, _d1, and _d2 generate
00273     // uncompressed solutions.
00274     // or_c2 assumes there are compressed word in both operands
00275     // or_c1 *this may contain compressed word, but not rhs
00276     // or_c0 assumes both operands are not compressed
00277     // or_d1 *this contains no compressed word and is overwritten with the
00278     //       result
00279     // or_d2 both *this and rhs are compressed, but res will not be compressed
00280     void or_c2(const bitvector64& rhs, bitvector64& res) const;
00281     void or_c1(const bitvector64& rhs, bitvector64& res) const;
00282     void or_c0(const bitvector64& rhs);
00283     void or_d1(const bitvector64& rhs);
00284     void or_d2(const bitvector64& rhs, bitvector64& res) const;
00285     void and_c2(const bitvector64& rhs, bitvector64& res) const;
00286     void and_c1(const bitvector64& rhs, bitvector64& res) const;
00287     void and_c0(const bitvector64& rhs);
00288     void and_d1(const bitvector64& rhs);
00289     void and_d2(const bitvector64& rhs, bitvector64& res) const;
00290     void xor_c2(const bitvector64& rhs, bitvector64& res) const;
00291     void xor_c1(const bitvector64& rhs, bitvector64& res) const;
00292     void xor_c0(const bitvector64& rhs);
00293     void xor_d1(const bitvector64& rhs);
00294     void xor_d2(const bitvector64& rhs, bitvector64& res) const;
00295     void minus_c2(const bitvector64& rhs, bitvector64& res) const;
00296     void minus_c1(const bitvector64& rhs, bitvector64& res) const;
00297     void minus_c1x(const bitvector64& rhs, bitvector64& res) const;
00298     void minus_c0(const bitvector64& rhs);
00299     void minus_d1(const bitvector64& rhs);
00300     void minus_d2(const bitvector64& rhs, bitvector64& res) const;
00301     inline void copy_runs(run& it, word_t& nw); // copy nw words
00302     inline void copy_runsn(run& it, word_t& nw); // copy nw words and negate
00303     inline void copy_fill(array_t<word_t>::iterator& jt, run& it);
00304     inline void copy_runs(array_t<word_t>::iterator& jt, run& it,
00305                           word_t& nw);
00306     inline void copy_runsn(array_t<word_t>::iterator& jt, run& it,
00307                            word_t& nw);
00308     void decompress(array_t<word_t>& tmp) const;
00309     void copy_comp(array_t<word_t>& tmp) const;
00310     inline void append_active();
00311     inline void append_counter(int val, word_t cnt);
00312     inline unsigned cnt_ones(word_t) const; // number of 1s in a literal word
00313     inline word_t cnt_bits(word_t) const; // number of bits in a word
00314     word_t do_cnt() const; // count the number of bits and number of ones
00315 }; // end class bitvector64
00316 
00326 class ibis::bitvector64::indexSet {
00327 public:
00328     // let the compiler define most of the canonical functions
00329 
00330     // allow bitvector64::firstIndexSet() to access private variables of this
00331     // class
00332     friend indexSet ibis::bitvector64::firstIndexSet() const;
00333 
00334     //int operator!=(const indexSet& rhs) const {return (it != rhs.it);};
00335     bool isRange() const {return (nind>=ibis::bitvector64::MAXBITS);}
00336     const word_t* indices() const {return ind;};
00337     word_t nIndices() const {return nind;}
00338     indexSet& operator++();
00339 
00340 private:
00341     array_t<word_t>::const_iterator it;
00342     array_t<word_t>::const_iterator end;
00343     const active_word* active; // points back to the active word
00344     word_t nind; // number of indices
00345     word_t ind[64];
00346 }; // class ibis::bitvector64::indexSet
00347 
00349 class ibis::bitvector64::const_iterator {
00350 public:
00351     inline bool operator*() const;
00352     inline int operator!=(const const_iterator& rhs) const throw ();
00353     inline int operator==(const const_iterator& rhs) const throw ();
00354 
00355     inline const_iterator& operator++();
00356     inline const_iterator& operator--();
00357     const_iterator& operator+=(int64_t incr);
00358 
00359 private:
00360     ibis::bitvector64::word_t   compressed;
00361     ibis::bitvector64::word_t   ind;
00362     ibis::bitvector64::word_t   nbits;
00363     ibis::bitvector64::word_t   literalvalue;
00364     int                         fillbit;
00365     const active_word*          active;
00366     array_t<word_t>::const_iterator end;
00367     array_t<word_t>::const_iterator begin;
00368     array_t<word_t>::const_iterator it;
00369 
00370     void decodeWord();
00371 
00372     // give three functions of bitvector64 access to private variables
00373     friend void ibis::bitvector64::erase(word_t i, word_t j);
00374     friend const_iterator ibis::bitvector64::begin() const;
00375     friend const_iterator ibis::bitvector64::end() const;
00376     friend class ibis::bitvector64::iterator;
00377 }; // end class ibis::bitvector64::const_iterator
00378 
00386 class ibis::bitvector64::iterator {
00387 public:
00388     inline bool operator*() const; // still can not modify this
00389     inline int operator!=(const const_iterator& rhs) const throw ();
00390     inline int operator==(const const_iterator& rhs) const throw ();
00391     inline int operator!=(const iterator& rhs) const throw ();
00392     inline int operator==(const iterator& rhs) const throw ();
00393 
00394     inline iterator& operator++();
00395     inline iterator& operator--();
00396     iterator& operator+=(int64_t incr);
00397     iterator& operator=(int val);
00398 
00399 private:
00400     ibis::bitvector64::word_t   compressed;
00401     ibis::bitvector64::word_t   ind;
00402     ibis::bitvector64::word_t   nbits;
00403     ibis::bitvector64::word_t   literalvalue;
00404     int                         fillbit;
00405     ibis::bitvector64*          bitv;
00406     active_word*                active;
00407     array_t<word_t>*            vec;
00408     array_t<word_t>::iterator   it;
00409 
00410     void decodeWord();
00411 
00412     friend iterator ibis::bitvector64::begin();
00413     friend iterator ibis::bitvector64::end();
00414 }; // end class ibis::bitvector64::iterator
00415 
00417 inline bool ibis::bitvector64::all0s() const {
00418     if (m_vec.empty()) {
00419         return true;
00420     }
00421     else if (m_vec.size() == 1) {
00422         return (m_vec[0] == 0 || (m_vec[0] >= HEADER0 && m_vec[0] < HEADER1));
00423     }
00424     else {
00425         return false;
00426     }
00427 } // ibis::bitvector64::all0s
00428 
00430 inline bool ibis::bitvector64::all1s() const {
00431     if (m_vec.size() == 1) {
00432         return (m_vec[0] == ALLONES || (m_vec[0] > HEADER1));
00433     }
00434     else {
00435         return false;
00436     }
00437 } // ibis::bitvector64::all1s()
00438 
00440 inline ibis::bitvector64::word_t
00441 ibis::bitvector64::cnt_bits(ibis::bitvector64::word_t val) const {
00442     return  ((val>ALLONES) ? ((val&MAXCNT)*MAXBITS) : MAXBITS);
00443 }
00444 
00446 inline unsigned 
00447 ibis::bitvector64::cnt_ones(ibis::bitvector64::word_t val) const {
00448     // number of 1 bits in a value between 0 and 255
00449     static const unsigned table[256] = {
00450         0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
00451         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00452         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00453         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00454         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00455         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00456         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00457         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00458         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00459         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00460         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00461         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00462         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00463         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00464         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00465         4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
00466     //assert(8 == sizeof(word_t));
00467     return table[val&0xFF] + table[(val>>8)&0xFF] +
00468         table[(val>>16)&0xFF] + table[(val>>24)&0xFF] +
00469         table[(val>>32)&0xFF] + table[(val>>40)&0xFF] +
00470         table[(val>>48)&0xFF] + table[val>>56];
00471 } // inline unsigned ibis::bitvector64::cnt_ones(word_t) const
00472 
00473 inline ibis::bitvector64::word_t
00474 ibis::bitvector64::numFillWords() const {
00475     word_t cnt=0;
00476     array_t<word_t>::const_iterator it = m_vec.begin();
00477     while (it != m_vec.end()) {
00478         cnt += (*it >> ibis::bitvector64::MAXBITS);
00479         it++;
00480     }
00481     return cnt;
00482 } // ibis::bitvector64::numFillWords
00483 
00484 // The assignment operator.  Wanted to use shallow copy for efficiency
00485 // consideration, but SHALLOW copy causes unexpected problem in test
00486 // program bitty.cpp change to deep copy
00487 inline ibis::bitvector64&
00488 ibis::bitvector64::operator=(const ibis::bitvector64& bv) {
00489     nbits = bv.nbits; nset = bv.nset; active = bv.active;
00490     m_vec.deepCopy(bv.m_vec);
00491     return *this;
00492 }
00493 
00494 // deep copy
00495 inline ibis::bitvector64& 
00496 ibis::bitvector64::copy(const ibis::bitvector64& bv) {
00497     nbits = bv.nbits; nset = bv.nset; active = bv.active;
00498     m_vec.deepCopy(bv.m_vec);
00499     return *this;
00500 }
00501 
00502 inline ibis::bitvector64&
00503 ibis::bitvector64::swap(bitvector64& bv) {
00504     word_t tmp;
00505     tmp = bv.nbits; bv.nbits = nbits; nbits = tmp;
00506     tmp = bv.nset; bv.nset = nset; nset = tmp;
00507     active_word atmp = bv.active;
00508     bv.active = active; active = atmp;
00509     m_vec.swap(bv.m_vec);
00510     return *this;
00511 }
00512 
00513 // append_active assumes that nbits == MAXBITS and refers to MAXBITS instead
00514 // of nbits
00515 inline void ibis::bitvector64::append_active() {
00516     if (m_vec.empty()) {
00517         m_vec.push_back(active.val);
00518     }
00519     else if (active.val == 0) {// incoming word is zero
00520         if (m_vec.back() == 0) {
00521             m_vec.back() = (HEADER0 + 2);
00522         }
00523         else if (m_vec.back() >= HEADER0 && m_vec.back() < HEADER1) {
00524             ++ m_vec.back();
00525         }
00526         else {
00527             m_vec.push_back(active.val);
00528         }
00529     }
00530     else if (active.val == ALLONES) {// incoming word is allones
00531         if (m_vec.back() == ALLONES) {
00532             m_vec.back() = (HEADER1 | 2);
00533         }
00534         else if (m_vec.back() >= HEADER1) {
00535             ++m_vec.back();
00536         }
00537         else {
00538             m_vec.push_back(active.val);
00539         }
00540     }
00541     else { // incoming word contains a mixture of bits
00542         m_vec.push_back(active.val);
00543     }
00544     nbits += MAXBITS;
00545     active.reset();
00546     nset = 0;
00547 } // void ibis::bitvector64::append_active()
00548 
00549 // a private function to append a single counter when the active word is
00550 // empty cnt is assumed to be multiples of MAXBITS (more than MAXBITS)
00551 inline void ibis::bitvector64::append_counter(int val, word_t cnt) {
00552     word_t head = 2 + val;
00553     word_t w = (head << SECONDBIT) + cnt;
00554     nbits += cnt*MAXBITS;
00555     if (m_vec.empty()) {
00556         m_vec.push_back(w);
00557     }
00558     else if ((m_vec.back()>>SECONDBIT) == head) {
00559         m_vec.back() += cnt;
00560     }
00561     else if ((m_vec.back()==ALLONES) && head==3) {
00562         m_vec.back() = w + 1;
00563     }
00564     else if ((m_vec.back() == 0) && head==2) {
00565         m_vec.back() = w + 1;
00566     }
00567     else {
00568         m_vec.push_back(w);
00569     }
00570 } // void append_counter()
00571 
00572 // append a single bit
00573 inline ibis::bitvector64& ibis::bitvector64::operator+=(int b) {
00574     active.append(b);
00575     if (active.is_full()) append_active();
00576     return *this;
00577 } // ibis::bitvector64& ibis::bitvector64::operator+=(int b)
00578 
00579 // append n bits of val (n may be arbitrary integer, value must be either 0
00580 // or 1)
00581 inline void ibis::bitvector64::appendFill(int val,
00582                                           ibis::bitvector64::word_t n) {
00583     if (active.nbits > 0) {
00584         word_t tmp = (MAXBITS - active.nbits);
00585         if (tmp > n) tmp = n;
00586         active.nbits += tmp;
00587         active.val <<= tmp;
00588         n -= tmp;
00589         if (val)
00590             active.val |= (static_cast<word_t>(1)<<tmp) - 1;
00591         if (active.nbits >= MAXBITS) append_active();
00592     }
00593     if (n >= MAXBITS) {
00594         word_t cnt = n / MAXBITS;
00595         if (cnt > 1)
00596             append_counter(val, cnt);
00597         else if (val) {
00598             active.val = ALLONES;
00599             append_active();
00600         }
00601         else {
00602             active.val = 0;
00603             append_active();
00604         }
00605         n -= cnt * MAXBITS;
00606     }
00607     if (n > 0) {
00608         active.nbits = n;
00609         active.val = val*((static_cast<word_t>(1)<<n)-1);
00610     }
00611 } // ibis::bitvector64::appendFill(val, n)
00612 
00613 // append nw words starting from 'it' to the current bit vector -- assume
00614 // active is empty
00615 inline void ibis::bitvector64::copy_runs(run& it, word_t& nw) {
00616 #if defined(DEBUG) && DEBUG + 0 > 1
00617     LOGGER(ibis::gVerbose >= 0)
00618         << "bitvector64::copy_runs(0x"
00619         << std::hex << std::setw(8) << std::setfill('0')
00620         << *(it.it) << ", " << std::dec << nw
00621         << ") ... it.nWords = " << it.nWords;
00622 #endif
00623     // deal with the first word -- need to attach it to the last word in m_vec
00624     if (it.isFill) {
00625         append_counter(it.fillBit, it.nWords);
00626         nw -= it.nWords;
00627     }
00628     else {
00629         active.val = *(it.it);
00630         append_active();
00631         -- nw;
00632     }
00633     ++ it.it; // advance to the next word
00634     //if (nw == 0) {it.nWords = 0; return;}
00635     it.decode();
00636     nset = 0;
00637     nbits += MAXBITS * nw;
00638     while (nw >= it.nWords && nw > 0) { // only need to copy the word
00639         m_vec.push_back(*(it.it));
00640         nw -= it.nWords;
00641         ++ it.it; // advance to the next word
00642         it.decode();
00643     }
00644     nbits -= MAXBITS * nw;
00645 } // ibis::bitvector64::copy_runs
00646 
00647 // append nw words starting from it to the current bit vector -- assume
00648 // active is empty
00649 inline void ibis::bitvector64::copy_runsn(run& it, word_t& nw) {
00650 #if defined(DEBUG) && DEBUG + 0 > 1
00651     LOGGER(ibis::gVerbose >= 0)
00652         << "bitvector64::copy_runsn(0x"
00653         << std::hex << std::setw(8) << std::setfill('0')
00654         << *(it.it) << ", " << std::dec << nw
00655         << ") ... it.nWords = " << it.nWords;
00656 #endif
00657     // deal with the first word -- need to attach it to the last word in m_vec
00658     if (it.isFill) {
00659         append_counter(!it.fillBit, it.nWords);
00660         nw -= it.nWords;
00661     }
00662     else {
00663         active.val = ALLONES ^ *(it.it);
00664         append_active();
00665         -- nw;
00666     }
00667     ++ it.it; // advance to the next word
00668     //if (nw == 0) {it.nWords = 0; return;}
00669     it.decode();
00670     nset = 0;
00671     nbits += MAXBITS * nw;
00672     while (nw >= it.nWords && nw > 0) { // only need to copy the word
00673         m_vec.push_back((it.isFill?FILLBIT:ALLONES) ^ *(it.it));
00674         nw -= it.nWords;
00675         ++ it.it; // advance to the next word
00676         it.decode();
00677     }
00678     nbits -= MAXBITS * nw;
00679 } // ibis::bitvector64::copy_runsn
00680 
00681 // copy the fill in "run it" as literal words
00682 inline void ibis::bitvector64::copy_fill
00683 (array_t<ibis::bitvector64::word_t>::iterator& jt, run& it) {
00684     const array_t<word_t>::iterator iend = jt + it.nWords;
00685     if (it.fillBit == 0) {
00686         switch (it.nWords) {
00687         case 0: break;
00688         case 1:
00689             *jt = 0; ++jt; break;
00690         case 2:
00691             *jt = 0; ++jt; *jt = 0; ++jt; break;
00692         case 3:
00693             *jt = 0; ++jt; *jt = 0; ++jt; *jt = 0; ++jt; break;
00694         default:
00695             while (jt < iend) {
00696                 *jt = 0;
00697                 ++ jt;
00698             }
00699             break;
00700         }
00701     }
00702     else {
00703         switch (it.nWords) {
00704         case 0: break;
00705         case 1:
00706             *jt = ALLONES; ++jt; break;
00707         case 2:
00708             *jt = ALLONES; ++jt; *jt = ALLONES; ++jt; break;
00709         case 3:
00710             *jt = ALLONES; ++jt; *jt = ALLONES; ++jt; *jt = ALLONES; ++jt;
00711             break;
00712         default:
00713             while (jt < iend) {
00714                 *jt = ALLONES;
00715                 ++ jt;
00716             }
00717             break;
00718         }
00719     }
00720     it.nWords = 0;
00721     ++ it.it;
00722 } // ibis::bitvector64::copy_fill
00723 
00724 // copy the next nw words (nw X MAXBITS bits) starting with run it
00725 // to an array_t as uncompressed words
00726 inline void ibis::bitvector64::copy_runs
00727 (array_t<ibis::bitvector64::word_t>::iterator& jt, run& it, word_t& nw) {
00728     while (nw >= it.nWords && nw > 0) { // only need to copy the word
00729         if (it.isFill) {
00730             const array_t<word_t>::iterator iend = jt + it.nWords;
00731             if (it.fillBit == 0) {
00732                 while (jt < iend) {
00733                     *jt = 0;
00734                     ++ jt;
00735                 }
00736             }
00737             else {
00738                 while (jt < iend) {
00739                     *jt = ALLONES;
00740                     ++ jt;
00741                 }
00742             }
00743             nw -= it.nWords;
00744         }
00745         else {
00746             *jt = *(it.it);
00747             ++ jt;
00748             -- nw;
00749         }
00750         ++ it.it; // advance to the next word
00751         it.decode();
00752     }
00753 } // ibis::bitvector64::copy_runs
00754 
00755 // copy the complements of the next nw words (nw X MAXBITS bits)
00756 // starting with "run it" as uncompressed words
00757 inline void ibis::bitvector64::copy_runsn
00758 (array_t<ibis::bitvector64::word_t>::iterator& jt, run& it, word_t& nw) {
00759     while (nw >= it.nWords && nw > 0) { // only need to copy the word
00760         if (it.isFill) {
00761             const array_t<word_t>::iterator iend = jt + it.nWords;
00762             if (it.fillBit == 0) {
00763                 while (jt < iend) {
00764                     *jt = ALLONES;
00765                     ++ jt;
00766                 }
00767             }
00768             else {
00769                 while (jt < iend) {
00770                     *jt = 0;
00771                     ++ jt;
00772                 }
00773             }
00774             nw -= it.nWords;
00775         }
00776         else {
00777             *jt = *(it.it) ^ ALLONES;
00778             ++ jt;
00779             -- nw;
00780         }
00781         ++ it.it; // advance to the next word
00782         it.decode();
00783     }
00784 } // ibis::bitvector64::copy_runsn
00785 
00786 inline ibis::bitvector64::iterator ibis::bitvector64::begin() {
00787     iterator it;
00788     it.it     = m_vec.begin();
00789     it.vec    = &m_vec;
00790     it.bitv   = this;
00791     it.active = &active;
00792     it.decodeWord();
00793     return it;
00794 } // ibis::bitvector64::begin()
00795 
00796 inline ibis::bitvector64::iterator ibis::bitvector64::end() {
00797     iterator it;
00798     it.ind              = 0;
00799     it.compressed       = 0;
00800     it.nbits            = 0;
00801     it.literalvalue     = 0;
00802     it.fillbit          = 0;
00803     it.it     = m_vec.end() + 1;
00804     it.vec    = &m_vec;
00805     it.bitv   = this;
00806     it.active = &active;
00807     return it;
00808 } // ibis::bitvector64::end()
00809 
00810 inline ibis::bitvector64::const_iterator ibis::bitvector64::begin() const {
00811     const_iterator it;
00812     it.it     = m_vec.begin();
00813     it.begin  = m_vec.begin();
00814     it.end    = m_vec.end();
00815     it.active = &active;
00816     it.decodeWord();
00817     return it;
00818 } // ibis::bitvector64::begin()
00819 
00820 // dereference -- no error checking
00821 inline bool ibis::bitvector64::iterator::operator*() const {
00822 #if defined(DEBUG) && DEBUG + 0 > 1
00823     if (vec==0 || it<vec->begin() ||  it>vec->end())
00824         throw "bitvector64::const_iterator not initialized correctly.";
00825 #endif
00826     if (compressed != 0)
00827         return (fillbit != 0);
00828     else
00829         return ((word_t)1 & (literalvalue >> (bitvector64::SECONDBIT - ind)));
00830 }
00831 
00832 // comparison only based on the iterator
00833 inline int ibis::bitvector64::iterator::operator!=
00834 (const ibis::bitvector64::const_iterator& rhs) const throw () {
00835     return (it != rhs.it);
00836 }
00837 inline int ibis::bitvector64::iterator::operator==
00838 (const ibis::bitvector64::const_iterator& rhs) const throw () {
00839     return (it == rhs.it);
00840 }
00841 inline int ibis::bitvector64::iterator::operator!=
00842 (const ibis::bitvector64::iterator& rhs) const throw () {
00843     return (it != rhs.it);
00844 }
00845 inline int ibis::bitvector64::iterator::operator==
00846 (const ibis::bitvector64::iterator& rhs) const throw () {
00847     return (it == rhs.it);
00848 }
00849 
00850 // increment by one
00851 inline ibis::bitvector64::iterator& ibis::bitvector64::iterator::operator++() {
00852 #if defined(DEBUG) && DEBUG + 0 > 1
00853     if (vec==0 || it<vec->begin() || it>vec->end())
00854         throw "bitvector64::iterator not formed correctly.";
00855 #endif
00856     if (ind+1<nbits) {++ind;}
00857     else {++ it; decodeWord();}
00858     return *this;
00859 }
00860 
00861 // decrement by one
00862 inline ibis::bitvector64::iterator& ibis::bitvector64::iterator::operator--() {
00863 #if defined(DEBUG) && DEBUG + 0 > 1
00864     if (vec==0 || it<vec->begin() || it>vec->end()+1)
00865         throw "bitvector64::iterator not formed correctly.";
00866 #endif
00867     if (ind) -- ind;
00868     else {
00869         if (it <= vec->end()) -- it;
00870         else if (active->nbits) it = vec->end();
00871         else it = vec->end() - 1;
00872         decodeWord();
00873         if (nbits) ind = nbits - 1;
00874     }
00875     return *this;
00876 }
00877 
00878 inline ibis::bitvector64::const_iterator ibis::bitvector64::end() const {
00879     const_iterator it;
00880     it.ind              = 0;
00881     it.compressed       = 0;
00882     it.nbits            = 0;
00883     it.literalvalue     = 0;
00884     it.fillbit          = 0;
00885     it.it    = m_vec.end() + 1;
00886     it.begin = m_vec.begin();
00887     it.end   = m_vec.end();
00888     it.active = &active;
00889     return it;
00890 } // ibis::bitvector64::end()
00891 
00892 // dereference -- no error checking
00893 inline bool ibis::bitvector64::const_iterator::operator*() const {
00894 #if defined(DEBUG) && DEBUG + 0 > 1
00895     if (it==0 || end==0 || it>end || nbits<=ind)
00896         throw "bitvector64::const_iterator not initialized correctly.";
00897 #endif
00898     if (compressed != 0)
00899         return (fillbit != 0);
00900     else
00901         return ((word_t)1 & (literalvalue >> (bitvector64::SECONDBIT - ind)));
00902 }
00903 
00904 // comparison only based on the iterator
00905 inline int ibis::bitvector64::const_iterator::operator!=
00906 (const ibis::bitvector64::const_iterator& rhs) const throw (){
00907     return (it != rhs.it);
00908 }
00909 inline int ibis::bitvector64::const_iterator::operator==
00910 (const ibis::bitvector64::const_iterator& rhs) const throw () {
00911     return (it == rhs.it);
00912 }
00913 
00914 // increment by one
00915 inline ibis::bitvector64::const_iterator&
00916 ibis::bitvector64::const_iterator::operator++() {
00917 #if defined(DEBUG) && DEBUG + 0 > 1
00918     if (it==0 || end==0 || it>end)
00919         throw "bitvector64::const_iterator not formed correctly.";
00920 #endif
00921     if (ind+1<nbits) {++ind;}
00922     else {++ it; decodeWord();}
00923     return *this;
00924 }
00925 
00926 // decrement by one
00927 inline ibis::bitvector64::const_iterator&
00928 ibis::bitvector64::const_iterator::operator--() {
00929 #if defined(DEBUG) && DEBUG + 0 > 1
00930     if (it==0 || end==0 || it>end)
00931         throw "bitvector64::const_iterator not formed correctly.";
00932 #endif
00933     if (ind) -- ind;
00934     else {
00935         if (it <= end) -- it;
00936         else if (active->nbits) it = end;
00937         else it = end - 1;
00938         decodeWord();
00939         if (nbits) ind = nbits - 1;
00940     }
00941     return *this;
00942 }
00943 
00944 inline ibis::bitvector64::indexSet ibis::bitvector64::firstIndexSet() const {
00945     indexSet is;
00946     if (m_vec.end() > m_vec.begin()) {
00947         is.it = m_vec.begin() - 1;
00948         is.end = m_vec.end();
00949     }
00950     else {
00951         is.it = 0;
00952         is.end = 0;
00953     }
00954     is.active = &active;
00955     is.ind[0] = static_cast<word_t>(-1);
00956     is.nind = 0;
00957     ++is;
00958     return is;
00959 } // end ibis::bitvector64::firstIndexSet;
00960 
00961 inline double ibis::bitvector64::randomSize(word_t nb, word_t nc) {
00962     double sz = 0.0;
00963     if (nb > 0 && nb >= nc) {
00964         const double den = static_cast<double>(nc) /
00965             static_cast<double>(nb);
00966         const word_t nw = (nb > SECONDBIT ? nb / SECONDBIT - 1 : 0);
00967         sz = 3.0 + nw - nw *
00968             (pow(1.0-den, static_cast<int>(2*SECONDBIT)) +
00969              pow(den, static_cast<int>(2*SECONDBIT)));
00970     }
00971     return sz*sizeof(word_t);
00972 } // end ibis::bitvector64::randomSize
00973 
00974 inline double
00975 ibis::bitvector64::markovSize(word_t nb, word_t nc, double f) {
00976     double sz = 0.0;
00977     if (nb > 0 && nb >= nc) {
00978         const double den = static_cast<double>(nc) /
00979             static_cast<double>(nb);
00980         const word_t nw = (nb > SECONDBIT ? nb / SECONDBIT - 1 : 0);
00981         if ((den <= 0.5 && f > 1.0) || (den > 0.5 && (1.0-den)*f > den))
00982             sz = ((1.0-den) * pow(1.0-den/((1.0-den)*f),
00983                                   static_cast<int>(2*MAXBITS-3)) +
00984                   den * pow(1.0-1.0/f, static_cast<int>(2*MAXBITS-3)));
00985         else
00986             sz = (pow(1.0-den, static_cast<int>(2*SECONDBIT)) +
00987                   pow(den, static_cast<int>(2*SECONDBIT)));
00988         sz = 3.0 + nw * (1.0 - sz);
00989     }
00990     return sz*sizeof(word_t);
00991 } // end ibis::bitvector64::markovSize
00992 
00993 std::ostream& operator<<(std::ostream&, const ibis::bitvector64&);
00994 #endif // __BITVECTOR64_H

Make It A Bit Faster
Contact us
Disclaimers
FastBit source code
FastBit mailing list archive