category.h
Go to the documentation of this file.
00001 //File: $Id$
00002 // Author: John Wu <John.Wu at ACM.org>
00003 // Copyright 2000-2011 the Regents of the University of California
00004 #ifndef IBIS_CATEGORY_H
00005 #define IBIS_CATEGORY_H
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 #include "irelic.h"
00016 #include "column.h"
00017 #include "dictionary.h"
00018 
00024 class ibis::text : public ibis::column {
00025 public:
00026     virtual ~text() {unloadIndex();};
00027     text(const part* tbl, FILE* file);
00028     text(const part* tbl, const char* name, ibis::TYPE_T t=ibis::TEXT);
00029     text(const ibis::column& col); // copy from column
00030 
00031     virtual long keywordSearch(const char* str, ibis::bitvector& hits) const;
00032     virtual long keywordSearch(const char* str) const;
00033     //     long keywordSearch(const std::vector<std::string>& strs) const;
00034     //     long keywordSearch(const std::vector<std::string>& strs,
00035     //                 ibis::bitvector& hits) const;
00036 
00037     virtual long stringSearch(const char* str, ibis::bitvector& hits) const;
00038     virtual long stringSearch(const std::vector<std::string>& strs,
00039                               ibis::bitvector& hits) const;
00040     virtual long stringSearch(const char* str) const;
00041     virtual long stringSearch(const std::vector<std::string>& strs) const;
00042     virtual long patternSearch(const char*, ibis::bitvector&) const;
00043     virtual long patternSearch(const char*) const;
00044 
00045     using ibis::column::estimateCost;
00046     virtual double estimateCost(const ibis::qString& cmp) const;
00047     virtual double estimateCost(const ibis::qMultiString& cmp) const;
00048 
00049     virtual long append(const char* dt, const char* df, const uint32_t nold,
00050                         const uint32_t nnew, uint32_t nbuf, char* buf);
00051     virtual long append(const void*, const ibis::bitvector&) {return -1;}
00052     virtual long saveSelected(const ibis::bitvector& sel, const char *dest,
00053                               char *buf, uint32_t nbuf);
00055     virtual array_t<uint32_t>* selectUInts(const bitvector& mask) const;
00057     virtual array_t<int64_t>* selectLongs(const bitvector& mask) const;
00058     virtual
00059     std::vector<std::string>* selectStrings(const bitvector& mask) const;
00060     virtual const char* findString(const char* str) const;
00061     virtual void getString(uint32_t i, std::string &val) const {
00062         readString(i, val);}
00063 
00064     virtual void write(FILE* file) const; 
00065     virtual void print(std::ostream& out) const; 
00066 
00067     const column* IDColumnForKeywordIndex() const;
00068     void TDListForKeywordIndex(std::string&) const;
00069     void delimitersForKeywordIndex(std::string&) const;
00070 
00073     struct tokenizer {
00080         virtual int operator()(std::vector<const char*>& tkns, char *buf) = 0;
00082         virtual ~tokenizer() {}
00083     }; // struct tokenizer
00084 
00085 protected:
00087     void startPositions(const char *dir, char *buf, uint32_t nbuf) const;
00089     void readString(uint32_t i, std::string &val) const;
00091     int  readString(std::string&, int, long, long, char*, uint32_t,
00092                     uint32_t&, off_t&) const;
00093     int  writeStrings(const char *to, const char *from,
00094                       const char *spto, const char *spfrom,
00095                       ibis::bitvector &msk, const ibis::bitvector &sel,
00096                       char *buf, uint32_t nbuf) const;
00097 
00098 private:
00099     text& operator=(const text&);
00100 }; // ibis::text
00101 
00108 class ibis::category : public ibis::text {
00109 public:
00110     virtual ~category();
00111     category(const part* tbl, FILE* file);
00112     category(const part* tbl, const char* name);
00113     category(const ibis::column& col); // copy from column
00114     // a special construct for meta-tag attributes
00115     category(const part* tbl, const char* name, const char* value,
00116              const char* dir=0, uint32_t nevt=0);
00117 
00119     virtual long stringSearch(const char* str, ibis::bitvector& hits) const;
00121     virtual long stringSearch(const std::vector<std::string>& vals,
00122                               ibis::bitvector& hits) const;
00124     virtual long stringSearch(const char* str) const;
00126     virtual long stringSearch(const std::vector<std::string>& vals) const;
00127 
00128     virtual long patternSearch(const char* pat) const;
00129     virtual long patternSearch(const char* pat, ibis::bitvector &hits) const;
00130     using ibis::text::estimateCost;
00131     virtual double estimateCost(const ibis::qLike& cmp) const;
00132     virtual double estimateCost(const ibis::qString& cmp) const;
00133     virtual double estimateCost(const ibis::qMultiString& cmp) const;
00134 
00136     virtual long append(const char* dt, const char* df, const uint32_t nold,
00137                         const uint32_t nnew, uint32_t nbuf, char* buf);
00138     virtual long append(const void*, const ibis::bitvector&) {return -1;}
00140     virtual array_t<uint32_t>* selectUInts(const bitvector& mask) const;
00141     virtual std::vector<std::string>*
00142     selectStrings(const bitvector& mask) const;
00143     virtual void getString(uint32_t i, std::string &val) const;
00144 
00145     virtual uint32_t getNumKeys() const;
00146     virtual const char* getKey(uint32_t i) const;
00147     virtual const char* isKey(const char* str) const;
00148 
00149     virtual void write(FILE* file) const;
00150     virtual void print(std::ostream& out) const;
00151 
00152     ibis::relic* fillIndex(const char *dir=0) const;
00153 
00154 private:
00155     // private member variables
00156 
00157     // dictionary is mutable in order to delay the reading of dictionary
00158     // from disk as late as possible
00159     mutable ibis::dictionary dic;
00160 
00161     // private member functions
00162     void prepareMembers() const;
00163     void readDictionary(const char *dir=0) const;
00164 
00165     category& operator=(const category&);
00166 }; // ibis::category
00167 
00172 class ibis::blob : public ibis::column {
00173 public:
00174     virtual ~blob() {};
00175     blob(const part*, FILE*);
00176     blob(const part*, const char*);
00177     blob(const ibis::column&);
00178 
00179     virtual long stringSearch(const char*, ibis::bitvector&) const {return -1;}
00180     virtual long stringSearch(const std::vector<std::string>&,
00181                               ibis::bitvector&) const {return -1;}
00182     virtual long stringSearch(const char*) const {return -1;}
00183     virtual long stringSearch(const std::vector<std::string>&) const {
00184         return -1;}
00185 
00186     virtual void computeMinMax() {}
00187     virtual void computeMinMax(const char*) {}
00188     virtual void computeMinMax(const char*, double&, double&) const {}
00189     virtual void loadIndex(const char*, int) const throw () {}
00190     virtual long indexSize() const {return -1;}
00191     virtual int  getValuesArray(void*) const {return -1;}
00192 
00193     virtual array_t<signed char>* selectBytes(const bitvector&) const {return 0;}
00194     virtual array_t<unsigned char>* selectUBytes(const bitvector&) const {return 0;}
00195     virtual array_t<int16_t>* selectShorts(const bitvector&) const {return 0;}
00196     virtual array_t<uint16_t>* selectUShorts(const bitvector&) const {return 0;}
00197     virtual array_t<int32_t>* selectInts(const bitvector&) const {return 0;}
00198     virtual array_t<uint32_t>* selectUInts(const bitvector&) const {return 0;}
00199     virtual array_t<int64_t>* selectLongs(const bitvector&) const {return 0;}
00200     virtual array_t<uint64_t>* selectULongs(const bitvector&) const {return 0;}
00201     virtual array_t<float>* selectFloats(const bitvector&) const {return 0;}
00202     virtual array_t<double>* selectDoubles(const bitvector&) const {return 0;}
00203     virtual std::vector<std::string>* selectStrings(const bitvector&) const {return 0;}
00204 
00205     // virtual long estimateRange(const ibis::qContinuousRange&,
00206     //                         ibis::bitvector&,
00207     //                         ibis::bitvector&) const {return -1;}
00208     // virtual long estimateRange(const ibis::qDiscreteRange&,
00209     //                         ibis::bitvector&,
00210     //                         ibis::bitvector&) const {return -1;}
00211     // virtual long evaluateRange(const ibis::qContinuousRange&,
00212     //                         const ibis::bitvector&,
00213     //                         ibis::bitvector&) const {return -1;}
00214     // virtual long evaluateRange(const ibis::qDiscreteRange&,
00215     //                         const ibis::bitvector&,
00216     //                         ibis::bitvector&) const {return -1;}
00217     // virtual long estimateRange(const ibis::qContinuousRange&) const {return -1;}
00218     // virtual long estimateRange(const ibis::qDiscreteRange&) const {return -1;}
00219     // virtual double estimateCost(const ibis::qContinuousRange&) const {return 0;}
00220     // virtual double estimateCost(const ibis::qDiscreteRange& cmp) const {return 0;}
00221     // virtual double estimateCost(const ibis::qString&) const {return 0;}
00222     // virtual double estimateCost(const ibis::qMultiString&) const {return 0;}
00223 
00224     // virtual float getUndecidable(const ibis::qContinuousRange&,
00225     //                           ibis::bitvector&) const {return 1;}
00226     // virtual float getUndecidable(const ibis::qDiscreteRange&,
00227     //                           ibis::bitvector&) const {return 1;}
00228 
00229     virtual double getActualMin() const {return DBL_MAX;}
00230     virtual double getActualMax() const {return -DBL_MAX;}
00231     virtual double getSum() const {return 0;}
00232 
00233     virtual long append(const void*, const ibis::bitvector&) {return -1;}
00234     virtual long append(const char* dt, const char* df, const uint32_t nold,
00235                         const uint32_t nnew, uint32_t nbuf, char* buf);
00236     virtual long writeData(const char* dir, uint32_t nold, uint32_t nnew,
00237                            ibis::bitvector& mask, const void *va1,
00238                            void *va2);
00239 
00240     virtual void write(FILE*) const;
00241     virtual void print(std::ostream&) const;
00242 
00243     long countRawBytes(const bitvector&) const;
00244     int selectRawBytes(const bitvector&,
00245                        array_t<unsigned char>&, array_t<uint32_t>&) const;
00246     int getBlob(uint32_t ind, unsigned char *&buf, uint32_t &size) const;
00247 
00248 protected:
00249     int extractAll(const bitvector&,
00250                    array_t<unsigned char>&, array_t<uint32_t>&,
00251                    const array_t<unsigned char>&,
00252                    const array_t<int64_t>&) const;
00253     int extractSome(const bitvector&,
00254                     array_t<unsigned char>&, array_t<uint32_t>&,
00255                     const array_t<unsigned char>&, const array_t<int64_t>&,
00256                     const uint32_t) const;
00257     int extractAll(const bitvector&,
00258                    array_t<unsigned char>&, array_t<uint32_t>&,
00259                    const char*, const array_t<int64_t>&) const;
00260     int extractSome(const bitvector&,
00261                     array_t<unsigned char>&, array_t<uint32_t>&,
00262                     const char*, const array_t<int64_t>&, const uint32_t) const;
00263     int extractSome(const bitvector&,
00264                     array_t<unsigned char>&, array_t<uint32_t>&,
00265                     const char*, const char*, const uint32_t) const;
00266     int readBlob(uint32_t ind, unsigned char *&buf, uint32_t &size,
00267                  const array_t<int64_t> &starts, const char *datafile) const;
00268     int readBlob(uint32_t ind, unsigned char *&buf, uint32_t &size,
00269                  const char *spfile, const char *datafile) const;
00270 }; // ibis::blob
00271 #endif // IBIS_CATEGORY_H

Make It A Bit Faster
Contact us
Disclaimers
FastBit source code
FastBit mailing list archive