category.h
Go to the documentation of this file.
00001 //File: $Id$
00002 // Author: John Wu <John.Wu at ACM.org>
00003 // Copyright 2000-2012 the Regents of the University of California
00004 #ifndef IBIS_CATEGORY_H
00005 #define IBIS_CATEGORY_H
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 #include "column.h"     // ibis::column
00016 #include "dictionary.h" // ibis::dictionary
00017 #include "idirekte.h"   // ibis::direkte
00018 
00024 class ibis::text : public ibis::column {
00025 public:
00026     virtual ~text() {unloadIndex();};
00027     text(const part* tbl, FILE* file);
00028     text(const part* tbl, const char* name, ibis::TYPE_T t=ibis::TEXT);
00029     text(const ibis::column& col); // copy from column
00030 
00031     virtual long keywordSearch(const char* str, ibis::bitvector& hits) const;
00032     virtual long keywordSearch(const char* str) const;
00033     //     long keywordSearch(const std::vector<std::string>& strs) const;
00034     //     long keywordSearch(const std::vector<std::string>& strs,
00035     //                 ibis::bitvector& hits) const;
00036 
00037     virtual long stringSearch(const char* str, ibis::bitvector& hits) const;
00038     virtual long stringSearch(const std::vector<std::string>& strs,
00039                               ibis::bitvector& hits) const;
00040     virtual long stringSearch(const char* str) const;
00041     virtual long stringSearch(const std::vector<std::string>& strs) const;
00042     virtual long patternSearch(const char*, ibis::bitvector&) const;
00043     virtual long patternSearch(const char*) const;
00044 
00045     using ibis::column::estimateCost;
00046     virtual double estimateCost(const ibis::qString& cmp) const;
00047     virtual double estimateCost(const ibis::qMultiString& cmp) const;
00048 
00049     virtual void loadIndex(const char* iopt=0, int ropt=0) const throw ();
00050     virtual long append(const char* dt, const char* df, const uint32_t nold,
00051                         const uint32_t nnew, uint32_t nbuf, char* buf);
00052     virtual long append(const void*, const ibis::bitvector&) {return -1;}
00053     virtual long saveSelected(const ibis::bitvector& sel, const char *dest,
00054                               char *buf, uint32_t nbuf);
00056     virtual array_t<uint32_t>* selectUInts(const bitvector& mask) const;
00058     virtual array_t<int64_t>* selectLongs(const bitvector& mask) const;
00059     virtual
00060     std::vector<std::string>* selectStrings(const bitvector& mask) const;
00061     virtual const char* findString(const char* str) const;
00062     virtual void getString(uint32_t i, std::string &val) const {
00063         readString(i, val);}
00064 
00065     virtual void write(FILE* file) const; 
00066     virtual void print(std::ostream& out) const; 
00067 
00068     const column* IDColumnForKeywordIndex() const;
00069     void TDListForKeywordIndex(std::string&) const;
00070     void delimitersForKeywordIndex(std::string&) const;
00071 
00074     struct tokenizer {
00081         virtual int operator()(std::vector<const char*>& tkns, char *buf) = 0;
00083         virtual ~tokenizer() {}
00084     }; // struct tokenizer
00085 
00086 protected:
00088     void startPositions(const char *dir, char *buf, uint32_t nbuf) const;
00090     void readString(uint32_t i, std::string &val) const;
00092     int  readString(std::string&, int, long, long, char*, uint32_t,
00093                     uint32_t&, off_t&) const;
00094     int  writeStrings(const char *to, const char *from,
00095                       const char *spto, const char *spfrom,
00096                       ibis::bitvector &msk, const ibis::bitvector &sel,
00097                       char *buf, uint32_t nbuf) const;
00098 
00099 private:
00100     text& operator=(const text&);
00101 }; // ibis::text
00102 
00109 class ibis::category : public ibis::text {
00110 public:
00111     virtual ~category();
00112     category(const part* tbl, FILE* file);
00113     category(const part* tbl, const char* name);
00114     category(const ibis::column& col); // copy from column
00115     // a special construct for meta-tag attributes
00116     category(const part* tbl, const char* name, const char* value,
00117              const char* dir=0, uint32_t nevt=0);
00118 
00120     virtual long stringSearch(const char* str, ibis::bitvector& hits) const;
00122     virtual long stringSearch(const std::vector<std::string>& vals,
00123                               ibis::bitvector& hits) const;
00125     virtual long stringSearch(const char* str) const;
00127     virtual long stringSearch(const std::vector<std::string>& vals) const;
00128 
00129     virtual long patternSearch(const char* pat) const;
00130     virtual long patternSearch(const char* pat, ibis::bitvector &hits) const;
00131     using ibis::text::estimateCost;
00132     virtual double estimateCost(const ibis::qLike& cmp) const;
00133     virtual double estimateCost(const ibis::qString& cmp) const;
00134     virtual double estimateCost(const ibis::qMultiString& cmp) const;
00135 
00136     virtual void loadIndex(const char* =0, int =0) const throw ();
00138     virtual long append(const char* dt, const char* df, const uint32_t nold,
00139                         const uint32_t nnew, uint32_t nbuf, char* buf);
00140     virtual long append(const void*, const ibis::bitvector&) {return -1;}
00142     virtual array_t<uint32_t>* selectUInts(const bitvector& mask) const;
00143     virtual std::vector<std::string>*
00144     selectStrings(const bitvector& mask) const;
00145     virtual void getString(uint32_t i, std::string &val) const;
00146 
00147     virtual uint32_t getNumKeys() const;
00148     virtual const char* getKey(uint32_t i) const;
00149     virtual const char* isKey(const char* str) const;
00150 
00151     virtual void write(FILE* file) const;
00152     virtual void print(std::ostream& out) const;
00153 
00154     ibis::direkte* fillIndex(const char *dir=0) const;
00156     const ibis::dictionary* getDictionary() const {return &dic;}
00157     int setDictionary(const dictionary&);
00158 
00159 private:
00160     // private member variables
00161 
00162     // dictionary is mutable in order to delay the reading of dictionary
00163     // from disk as late as possible
00164     mutable ibis::dictionary dic;
00165 
00166     // private member functions
00167     void prepareMembers() const;
00168     void readDictionary(const char *dir=0) const;
00169 
00170     category& operator=(const category&);
00171 }; // ibis::category
00172 
00177 class ibis::blob : public ibis::column {
00178 public:
00179     virtual ~blob() {};
00180     blob(const part*, FILE*);
00181     blob(const part*, const char*);
00182     blob(const ibis::column&);
00183 
00184     virtual long stringSearch(const char*, ibis::bitvector&) const {return -1;}
00185     virtual long stringSearch(const std::vector<std::string>&,
00186                               ibis::bitvector&) const {return -1;}
00187     virtual long stringSearch(const char*) const {return -1;}
00188     virtual long stringSearch(const std::vector<std::string>&) const {
00189         return -1;}
00190 
00191     virtual void computeMinMax() {}
00192     virtual void computeMinMax(const char*) {}
00193     virtual void computeMinMax(const char*, double&, double&) const {}
00194     virtual void loadIndex(const char*, int) const throw () {}
00195     virtual long indexSize() const {return -1;}
00196     virtual int  getValuesArray(void*) const {return -1;}
00197 
00198     virtual array_t<signed char>* selectBytes(const bitvector&) const {return 0;}
00199     virtual array_t<unsigned char>* selectUBytes(const bitvector&) const {return 0;}
00200     virtual array_t<int16_t>* selectShorts(const bitvector&) const {return 0;}
00201     virtual array_t<uint16_t>* selectUShorts(const bitvector&) const {return 0;}
00202     virtual array_t<int32_t>* selectInts(const bitvector&) const {return 0;}
00203     virtual array_t<uint32_t>* selectUInts(const bitvector&) const {return 0;}
00204     virtual array_t<int64_t>* selectLongs(const bitvector&) const {return 0;}
00205     virtual array_t<uint64_t>* selectULongs(const bitvector&) const {return 0;}
00206     virtual array_t<float>* selectFloats(const bitvector&) const {return 0;}
00207     virtual array_t<double>* selectDoubles(const bitvector&) const {return 0;}
00208     virtual std::vector<std::string>* selectStrings(const bitvector&) const {return 0;}
00209 
00210     // virtual long estimateRange(const ibis::qContinuousRange&,
00211     //                         ibis::bitvector&,
00212     //                         ibis::bitvector&) const {return -1;}
00213     // virtual long estimateRange(const ibis::qDiscreteRange&,
00214     //                         ibis::bitvector&,
00215     //                         ibis::bitvector&) const {return -1;}
00216     // virtual long evaluateRange(const ibis::qContinuousRange&,
00217     //                         const ibis::bitvector&,
00218     //                         ibis::bitvector&) const {return -1;}
00219     // virtual long evaluateRange(const ibis::qDiscreteRange&,
00220     //                         const ibis::bitvector&,
00221     //                         ibis::bitvector&) const {return -1;}
00222     // virtual long estimateRange(const ibis::qContinuousRange&) const {return -1;}
00223     // virtual long estimateRange(const ibis::qDiscreteRange&) const {return -1;}
00224     // virtual double estimateCost(const ibis::qContinuousRange&) const {return 0;}
00225     // virtual double estimateCost(const ibis::qDiscreteRange& cmp) const {return 0;}
00226     // virtual double estimateCost(const ibis::qString&) const {return 0;}
00227     // virtual double estimateCost(const ibis::qMultiString&) const {return 0;}
00228 
00229     // virtual float getUndecidable(const ibis::qContinuousRange&,
00230     //                           ibis::bitvector&) const {return 1;}
00231     // virtual float getUndecidable(const ibis::qDiscreteRange&,
00232     //                           ibis::bitvector&) const {return 1;}
00233 
00234     virtual double getActualMin() const {return DBL_MAX;}
00235     virtual double getActualMax() const {return -DBL_MAX;}
00236     virtual double getSum() const {return 0;}
00237 
00238     virtual long append(const void*, const ibis::bitvector&) {return -1;}
00239     virtual long append(const char* dt, const char* df, const uint32_t nold,
00240                         const uint32_t nnew, uint32_t nbuf, char* buf);
00241     virtual long writeData(const char* dir, uint32_t nold, uint32_t nnew,
00242                            ibis::bitvector& mask, const void *va1,
00243                            void *va2);
00244 
00245     virtual void write(FILE*) const;
00246     virtual void print(std::ostream&) const;
00247 
00248     long countRawBytes(const bitvector&) const;
00249     int selectRawBytes(const bitvector&,
00250                        array_t<unsigned char>&, array_t<uint32_t>&) const;
00251     int getBlob(uint32_t ind, unsigned char *&buf, uint32_t &size) const;
00252 
00253 protected:
00254     int extractAll(const bitvector&,
00255                    array_t<unsigned char>&, array_t<uint32_t>&,
00256                    const array_t<unsigned char>&,
00257                    const array_t<int64_t>&) const;
00258     int extractSome(const bitvector&,
00259                     array_t<unsigned char>&, array_t<uint32_t>&,
00260                     const array_t<unsigned char>&, const array_t<int64_t>&,
00261                     const uint32_t) const;
00262     int extractAll(const bitvector&,
00263                    array_t<unsigned char>&, array_t<uint32_t>&,
00264                    const char*, const array_t<int64_t>&) const;
00265     int extractSome(const bitvector&,
00266                     array_t<unsigned char>&, array_t<uint32_t>&,
00267                     const char*, const array_t<int64_t>&, const uint32_t) const;
00268     int extractSome(const bitvector&,
00269                     array_t<unsigned char>&, array_t<uint32_t>&,
00270                     const char*, const char*, const uint32_t) const;
00271     int readBlob(uint32_t ind, unsigned char *&buf, uint32_t &size,
00272                  const array_t<int64_t> &starts, const char *datafile) const;
00273     int readBlob(uint32_t ind, unsigned char *&buf, uint32_t &size,
00274                  const char *spfile, const char *datafile) const;
00275 }; // ibis::blob
00276 #endif // IBIS_CATEGORY_H

Make It A Bit Faster
Contact us
Disclaimers
FastBit source code
FastBit mailing list archive