00001 //File: $Id$ 00002 // Author: John Wu <John.Wu at ACM.org> 00003 // Copyright 2000-2012 the Regents of the University of California 00004 #ifndef IBIS_CATEGORY_H 00005 #define IBIS_CATEGORY_H 00006 00007 00008 00009 00010 00011 00012 00013 00014 00015 #include "column.h" // ibis::column 00016 #include "dictionary.h" // ibis::dictionary 00017 #include "idirekte.h" // ibis::direkte 00018 00024 class ibis::text : public ibis::column { 00025 public: 00026 virtual ~text() {unloadIndex();}; 00027 text(const part* tbl, FILE* file); 00028 text(const part* tbl, const char* name, ibis::TYPE_T t=ibis::TEXT); 00029 text(const ibis::column& col); // copy from column 00030 00031 virtual long keywordSearch(const char* str, ibis::bitvector& hits) const; 00032 virtual long keywordSearch(const char* str) const; 00033 // long keywordSearch(const std::vector<std::string>& strs) const; 00034 // long keywordSearch(const std::vector<std::string>& strs, 00035 // ibis::bitvector& hits) const; 00036 00037 virtual long stringSearch(const char* str, ibis::bitvector& hits) const; 00038 virtual long stringSearch(const std::vector<std::string>& strs, 00039 ibis::bitvector& hits) const; 00040 virtual long stringSearch(const char* str) const; 00041 virtual long stringSearch(const std::vector<std::string>& strs) const; 00042 virtual long patternSearch(const char*, ibis::bitvector&) const; 00043 virtual long patternSearch(const char*) const; 00044 00045 using ibis::column::estimateCost; 00046 virtual double estimateCost(const ibis::qString& cmp) const; 00047 virtual double estimateCost(const ibis::qMultiString& cmp) const; 00048 00049 virtual void loadIndex(const char* iopt=0, int ropt=0) const throw (); 00050 virtual long append(const char* dt, const char* df, const uint32_t nold, 00051 const uint32_t nnew, uint32_t nbuf, char* buf); 00052 virtual long append(const void*, const ibis::bitvector&) {return -1;} 00053 virtual long saveSelected(const ibis::bitvector& sel, const char *dest, 00054 char *buf, uint32_t nbuf); 00056 virtual array_t<uint32_t>* selectUInts(const bitvector& mask) const; 00058 virtual array_t<int64_t>* selectLongs(const bitvector& mask) const; 00059 virtual 00060 std::vector<std::string>* selectStrings(const bitvector& mask) const; 00061 virtual const char* findString(const char* str) const; 00062 virtual void getString(uint32_t i, std::string &val) const { 00063 readString(i, val);} 00064 00065 virtual void write(FILE* file) const; 00066 virtual void print(std::ostream& out) const; 00067 00068 const column* IDColumnForKeywordIndex() const; 00069 void TDListForKeywordIndex(std::string&) const; 00070 void delimitersForKeywordIndex(std::string&) const; 00071 00074 struct tokenizer { 00081 virtual int operator()(std::vector<const char*>& tkns, char *buf) = 0; 00083 virtual ~tokenizer() {} 00084 }; // struct tokenizer 00085 00086 protected: 00088 void startPositions(const char *dir, char *buf, uint32_t nbuf) const; 00090 void readString(uint32_t i, std::string &val) const; 00092 int readString(std::string&, int, long, long, char*, uint32_t, 00093 uint32_t&, off_t&) const; 00094 int writeStrings(const char *to, const char *from, 00095 const char *spto, const char *spfrom, 00096 ibis::bitvector &msk, const ibis::bitvector &sel, 00097 char *buf, uint32_t nbuf) const; 00098 00099 private: 00100 text& operator=(const text&); 00101 }; // ibis::text 00102 00109 class ibis::category : public ibis::text { 00110 public: 00111 virtual ~category(); 00112 category(const part* tbl, FILE* file); 00113 category(const part* tbl, const char* name); 00114 category(const ibis::column& col); // copy from column 00115 // a special construct for meta-tag attributes 00116 category(const part* tbl, const char* name, const char* value, 00117 const char* dir=0, uint32_t nevt=0); 00118 00120 virtual long stringSearch(const char* str, ibis::bitvector& hits) const; 00122 virtual long stringSearch(const std::vector<std::string>& vals, 00123 ibis::bitvector& hits) const; 00125 virtual long stringSearch(const char* str) const; 00127 virtual long stringSearch(const std::vector<std::string>& vals) const; 00128 00129 virtual long patternSearch(const char* pat) const; 00130 virtual long patternSearch(const char* pat, ibis::bitvector &hits) const; 00131 using ibis::text::estimateCost; 00132 virtual double estimateCost(const ibis::qLike& cmp) const; 00133 virtual double estimateCost(const ibis::qString& cmp) const; 00134 virtual double estimateCost(const ibis::qMultiString& cmp) const; 00135 00136 virtual void loadIndex(const char* =0, int =0) const throw (); 00138 virtual long append(const char* dt, const char* df, const uint32_t nold, 00139 const uint32_t nnew, uint32_t nbuf, char* buf); 00140 virtual long append(const void*, const ibis::bitvector&) {return -1;} 00142 virtual array_t<uint32_t>* selectUInts(const bitvector& mask) const; 00143 virtual std::vector<std::string>* 00144 selectStrings(const bitvector& mask) const; 00145 virtual void getString(uint32_t i, std::string &val) const; 00146 00147 virtual uint32_t getNumKeys() const; 00148 virtual const char* getKey(uint32_t i) const; 00149 virtual const char* isKey(const char* str) const; 00150 00151 virtual void write(FILE* file) const; 00152 virtual void print(std::ostream& out) const; 00153 00154 ibis::direkte* fillIndex(const char *dir=0) const; 00156 const ibis::dictionary* getDictionary() const {return &dic;} 00157 int setDictionary(const dictionary&); 00158 00159 private: 00160 // private member variables 00161 00162 // dictionary is mutable in order to delay the reading of dictionary 00163 // from disk as late as possible 00164 mutable ibis::dictionary dic; 00165 00166 // private member functions 00167 void prepareMembers() const; 00168 void readDictionary(const char *dir=0) const; 00169 00170 category& operator=(const category&); 00171 }; // ibis::category 00172 00177 class ibis::blob : public ibis::column { 00178 public: 00179 virtual ~blob() {}; 00180 blob(const part*, FILE*); 00181 blob(const part*, const char*); 00182 blob(const ibis::column&); 00183 00184 virtual long stringSearch(const char*, ibis::bitvector&) const {return -1;} 00185 virtual long stringSearch(const std::vector<std::string>&, 00186 ibis::bitvector&) const {return -1;} 00187 virtual long stringSearch(const char*) const {return -1;} 00188 virtual long stringSearch(const std::vector<std::string>&) const { 00189 return -1;} 00190 00191 virtual void computeMinMax() {} 00192 virtual void computeMinMax(const char*) {} 00193 virtual void computeMinMax(const char*, double&, double&) const {} 00194 virtual void loadIndex(const char*, int) const throw () {} 00195 virtual long indexSize() const {return -1;} 00196 virtual int getValuesArray(void*) const {return -1;} 00197 00198 virtual array_t<signed char>* selectBytes(const bitvector&) const {return 0;} 00199 virtual array_t<unsigned char>* selectUBytes(const bitvector&) const {return 0;} 00200 virtual array_t<int16_t>* selectShorts(const bitvector&) const {return 0;} 00201 virtual array_t<uint16_t>* selectUShorts(const bitvector&) const {return 0;} 00202 virtual array_t<int32_t>* selectInts(const bitvector&) const {return 0;} 00203 virtual array_t<uint32_t>* selectUInts(const bitvector&) const {return 0;} 00204 virtual array_t<int64_t>* selectLongs(const bitvector&) const {return 0;} 00205 virtual array_t<uint64_t>* selectULongs(const bitvector&) const {return 0;} 00206 virtual array_t<float>* selectFloats(const bitvector&) const {return 0;} 00207 virtual array_t<double>* selectDoubles(const bitvector&) const {return 0;} 00208 virtual std::vector<std::string>* selectStrings(const bitvector&) const {return 0;} 00209 00210 // virtual long estimateRange(const ibis::qContinuousRange&, 00211 // ibis::bitvector&, 00212 // ibis::bitvector&) const {return -1;} 00213 // virtual long estimateRange(const ibis::qDiscreteRange&, 00214 // ibis::bitvector&, 00215 // ibis::bitvector&) const {return -1;} 00216 // virtual long evaluateRange(const ibis::qContinuousRange&, 00217 // const ibis::bitvector&, 00218 // ibis::bitvector&) const {return -1;} 00219 // virtual long evaluateRange(const ibis::qDiscreteRange&, 00220 // const ibis::bitvector&, 00221 // ibis::bitvector&) const {return -1;} 00222 // virtual long estimateRange(const ibis::qContinuousRange&) const {return -1;} 00223 // virtual long estimateRange(const ibis::qDiscreteRange&) const {return -1;} 00224 // virtual double estimateCost(const ibis::qContinuousRange&) const {return 0;} 00225 // virtual double estimateCost(const ibis::qDiscreteRange& cmp) const {return 0;} 00226 // virtual double estimateCost(const ibis::qString&) const {return 0;} 00227 // virtual double estimateCost(const ibis::qMultiString&) const {return 0;} 00228 00229 // virtual float getUndecidable(const ibis::qContinuousRange&, 00230 // ibis::bitvector&) const {return 1;} 00231 // virtual float getUndecidable(const ibis::qDiscreteRange&, 00232 // ibis::bitvector&) const {return 1;} 00233 00234 virtual double getActualMin() const {return DBL_MAX;} 00235 virtual double getActualMax() const {return -DBL_MAX;} 00236 virtual double getSum() const {return 0;} 00237 00238 virtual long append(const void*, const ibis::bitvector&) {return -1;} 00239 virtual long append(const char* dt, const char* df, const uint32_t nold, 00240 const uint32_t nnew, uint32_t nbuf, char* buf); 00241 virtual long writeData(const char* dir, uint32_t nold, uint32_t nnew, 00242 ibis::bitvector& mask, const void *va1, 00243 void *va2); 00244 00245 virtual void write(FILE*) const; 00246 virtual void print(std::ostream&) const; 00247 00248 long countRawBytes(const bitvector&) const; 00249 int selectRawBytes(const bitvector&, 00250 array_t<unsigned char>&, array_t<uint32_t>&) const; 00251 int getBlob(uint32_t ind, unsigned char *&buf, uint32_t &size) const; 00252 00253 protected: 00254 int extractAll(const bitvector&, 00255 array_t<unsigned char>&, array_t<uint32_t>&, 00256 const array_t<unsigned char>&, 00257 const array_t<int64_t>&) const; 00258 int extractSome(const bitvector&, 00259 array_t<unsigned char>&, array_t<uint32_t>&, 00260 const array_t<unsigned char>&, const array_t<int64_t>&, 00261 const uint32_t) const; 00262 int extractAll(const bitvector&, 00263 array_t<unsigned char>&, array_t<uint32_t>&, 00264 const char*, const array_t<int64_t>&) const; 00265 int extractSome(const bitvector&, 00266 array_t<unsigned char>&, array_t<uint32_t>&, 00267 const char*, const array_t<int64_t>&, const uint32_t) const; 00268 int extractSome(const bitvector&, 00269 array_t<unsigned char>&, array_t<uint32_t>&, 00270 const char*, const char*, const uint32_t) const; 00271 int readBlob(uint32_t ind, unsigned char *&buf, uint32_t &size, 00272 const array_t<int64_t> &starts, const char *datafile) const; 00273 int readBlob(uint32_t ind, unsigned char *&buf, uint32_t &size, 00274 const char *spfile, const char *datafile) const; 00275 }; // ibis::blob 00276 #endif // IBIS_CATEGORY_H
![]() |