00001 // File: $Id$ 00002 // Author: John Wu <John.Wu at ACM.org> 00003 // Lawrence Berkeley National Laboratory 00004 // Copyright 2000-2012 the Regents of the University of California 00005 #ifndef IBIS_QUERY_H 00006 #define IBIS_QUERY_H 00007 00008 00009 00010 #include "part.h" // ibis::part 00011 #include "whereClause.h" // ibis::whereClause 00012 #include "selectClause.h" // ibis::selectClause 00013 00053 class FASTBIT_CXX_DLLSPEC ibis::query { 00054 public: 00055 enum QUERY_STATE { 00056 UNINITIALIZED, //< The query object is currently empty. 00057 SET_COMPONENTS, //< The query object has a select clause. 00058 SET_RIDS, //< The query object contains a list of RIDs. 00059 SET_PREDICATE, //< The query object has a where clause. 00060 SPECIFIED, //< SET_COMPONENTS & (SET_RIDS | SET_PREDICATE). 00061 QUICK_ESTIMATE, //< A upper and a lower bound are computed. 00062 FULL_EVALUATE, //< The exact hits are computed. 00063 BUNDLES_TRUNCATED, //< Only top-K results are stored. 00064 HITS_TRUNCATED //< The hit vector has been updated to match bundles. 00065 }; 00066 00067 virtual ~query(); 00071 query(const char* dir, const ibis::partList& tl); 00073 query(const char* uid=0, const part* et=0, const char* pref=0); 00074 00076 const char* id() const {return myID;}; 00077 const char* dir() const {return myDir;} 00078 const char* userName() const {return user;} 00079 00080 time_t timestamp() const {return dstime;} 00082 const part* partition() const {return mypart;} 00084 const selectClause& components() const {return comps;}; 00085 00087 int setRIDs(const RIDSet& set); 00089 int setWhereClause(const char *str); 00091 int setWhereClause(const std::vector<const char*>& names, 00092 const std::vector<double>& lbounds, 00093 const std::vector<double>& rbounds); 00095 int setWhereClause(const ibis::qExpr* qexp); 00097 virtual int setSelectClause(const char *str); 00099 int setPartition(const ibis::part* tbl); 00101 int setTable(const ibis::part* tbl) {return setPartition(tbl);} 00103 virtual const char* getWhereClause() const {return conds.getString();} 00105 virtual const char* getSelectClause() const {return *comps;} 00106 00107 void expandQuery(); 00108 void contractQuery(); 00109 std::string removeComplexConditions(); 00110 00112 const RIDSet* getUserRIDs() const {return rids_in;} 00113 00114 // Functions to perform estimation. 00115 00116 int estimate(); 00117 long getMinNumHits() const; 00118 long getMaxNumHits() const; 00119 00120 // Functions related to full evaluation. 00121 00122 int evaluate(const bool evalSelect=false); 00127 const ibis::bitvector* getHitVector() const {return hits;} 00128 long getNumHits() const; 00129 long getHitRows(std::vector<uint32_t> &rids) const; 00130 long countHits() const; 00131 00132 int orderby(const char *names) const; 00133 long limit(const char *names, uint32_t keep, 00134 bool updateHits = true); 00135 00146 array_t<signed char>* getQualifiedBytes(const char* column_name); 00148 array_t<unsigned char>* getQualifiedUBytes(const char* column_name); 00150 array_t<int16_t>* getQualifiedShorts(const char* column_name); 00152 array_t<uint16_t>* getQualifiedUShorts(const char* column_name); 00154 array_t<int32_t>* getQualifiedInts(const char* column_name); 00157 array_t<uint32_t>* getQualifiedUInts(const char* column_name); 00159 array_t<int64_t>* getQualifiedLongs(const char* column_name); 00161 array_t<uint64_t>* getQualifiedULongs(const char* column_name); 00164 array_t<float>* getQualifiedFloats(const char* column_name); 00167 array_t<double>* getQualifiedDoubles(const char* column_name); 00169 std::vector<std::string>* getQualifiedStrings(const char* column_name); 00171 RIDSet* getRIDs() const; 00173 RIDSet* getRIDs(const ibis::bitvector& mask) const; 00175 const RIDSet* getRIDsInBundle(const uint32_t bid) const; 00177 00184 void printSelected(std::ostream& out) const; 00188 void printSelectedWithRID(std::ostream& out) const; 00189 00195 long sequentialScan(ibis::bitvector& bv) const; 00196 00197 long getExpandedHits(ibis::bitvector&) const; 00198 00199 // used by ibis::bundle 00200 RIDSet* readRIDs() const; 00201 void writeRIDs(const RIDSet* rids) const; 00202 00205 void logMessage(const char* event, const char* fmt, ...) const; 00206 00207 // Functions for cleaning up, retrieving query states 00208 // and error messages. 00209 00211 void clear(); 00213 QUERY_STATE getState() const; 00215 const char* getLastError() const {return lastError;} 00217 void clearErrorMessage() const {*lastError=0;} 00218 00221 static bool isValidToken(const char* tok); 00223 // *** the value 16 is hard coded in functions newToken and *** 00224 // *** isValidToken *** 00225 static unsigned tokenLength() {return 16;} 00226 00228 static void removeQueryRecords() 00229 {ibis::gParameters().add("query.purgeTempFiles", "true");} 00231 static void keepQueryRecords() 00232 {ibis::gParameters().add("query.purgeTempFiles", "false");} 00233 00234 class result; // Forward declaration only 00235 class weight; 00236 class readLock; 00237 class writeLock; 00238 friend class readLock; 00239 friend class writeLock; 00240 00241 protected: 00242 char* user; 00243 whereClause conds; 00244 selectClause comps; 00245 QUERY_STATE state; 00246 ibis::bitvector* hits; 00247 ibis::bitvector* sup; 00248 mutable ibis::part::readLock* dslock; 00249 mutable char lastError[MAX_LINE+PATH_MAX]; 00250 00251 void logError(const char* event, const char* fmt, ...) const; 00252 void logWarning(const char* event, const char* fmt, ...) const; 00253 00254 void reorderExpr(); // reorder query expression 00255 00256 bool hasBundles() const; 00257 void getBounds(); 00258 void doEstimate(const qExpr* term, ibis::bitvector& low, 00259 ibis::bitvector& high) const; 00260 00261 int computeHits(); 00262 int doEvaluate(const qExpr* term, ibis::bitvector& hits) const; 00263 int doEvaluate(const qExpr* term, const ibis::bitvector& mask, 00264 ibis::bitvector& hits) const; 00265 int doScan(const qExpr* term, const ibis::bitvector& mask, 00266 ibis::bitvector& hits) const; 00267 int doScan(const qExpr* term, ibis::bitvector& hits) const; 00268 00269 int64_t processJoin(); 00270 00272 virtual void writeQuery(); 00274 void readQuery(const ibis::partList& tl); 00276 void removeFiles(); 00277 00279 void readHits(); 00281 void writeHits() const; 00283 void printRIDs(const RIDSet& ridset) const; 00286 uint32_t countPages(unsigned wordsize) const; 00287 00289 int doExpand(ibis::qExpr* exp0) const; 00291 int doContract(ibis::qExpr* exp0) const; 00292 00293 // A group of functions to count the number of pairs 00294 // satisfying the join conditions. 00295 int64_t sortJoin(const std::vector<const ibis::deprecatedJoin*>& terms, 00296 const ibis::bitvector& mask) const; 00297 int64_t sortJoin(const ibis::deprecatedJoin& cmp, 00298 const ibis::bitvector& mask) const; 00299 int64_t sortEquiJoin(const ibis::deprecatedJoin& cmp, 00300 const ibis::bitvector& mask) const; 00301 int64_t sortRangeJoin(const ibis::deprecatedJoin& cmp, 00302 const ibis::bitvector& mask) const; 00303 int64_t sortEquiJoin(const ibis::deprecatedJoin& cmp, 00304 const ibis::bitvector& mask, 00305 const char* pairfile) const; 00306 int64_t sortRangeJoin(const ibis::deprecatedJoin& cmp, 00307 const ibis::bitvector& mask, 00308 const char* pairfile) const; 00309 void orderPairs(const char* pairfile) const; 00310 int64_t mergePairs(const char* pairfile) const; 00311 00312 template <typename T1, typename T2> 00313 int64_t countEqualPairs(const array_t<T1>& val1, 00314 const array_t<T2>& val2) const; 00315 template <typename T1, typename T2> 00316 int64_t countDeltaPairs(const array_t<T1>& val1, 00317 const array_t<T2>& val2, const T1& delta) const; 00318 template <typename T1, typename T2> 00319 int64_t recordEqualPairs(const array_t<T1>& val1, 00320 const array_t<T2>& val2, 00321 const array_t<uint32_t>& ind1, 00322 const array_t<uint32_t>& ind2, 00323 const char* pairfile) const; 00324 template <typename T1, typename T2> 00325 int64_t recordDeltaPairs(const array_t<T1>& val1, 00326 const array_t<T2>& val2, 00327 const array_t<uint32_t>& ind1, 00328 const array_t<uint32_t>& ind2, 00329 const T1& delta, const char* pairfile) const; 00330 00331 // functions for access control 00332 void gainReadAccess(const char* mesg) const { 00333 if (ibis::gVerbose > 10) 00334 logMessage("gainReadAccess", "acquiring a read lock for %s", 00335 mesg); 00336 if (0 != pthread_rwlock_rdlock(&lock)) 00337 logMessage("gainReadAccess", 00338 "unable to gain read access to rwlock for %s", mesg); 00339 } 00340 void gainWriteAccess(const char* mesg) const { 00341 if (ibis::gVerbose > 10) 00342 logMessage("gainWriteAccess", "acquiring a write lock for %s", 00343 mesg); 00344 if (0 != pthread_rwlock_wrlock(&lock)) 00345 logMessage("gainWriteAccess", 00346 "unable to gain write access to rwlock for %s", mesg); 00347 } 00348 void releaseAccess(const char* mesg) const { 00349 if (ibis::gVerbose > 10) 00350 logMessage("releaseAccess", "releasing rwlock for %s", mesg); 00351 if (0 != pthread_rwlock_unlock(&lock)) 00352 logMessage("releaseAccess", "unable to unlock the rwlock for %s", 00353 mesg); 00354 } 00355 00356 private: 00357 char* myID; // The unique ID of this query object 00358 char* myDir; // Name of the directory containing the query record 00359 RIDSet* rids_in; // Rid list specified in an RID query 00360 const part* mypart; // Data partition used to process the query 00361 time_t dstime; // When query evaluation started 00362 mutable pthread_rwlock_t lock; // Rwlock for access control 00363 00364 // private functions 00365 static char* newToken(const char*); 00366 00367 void setMyDir(const char *pref); 00368 00369 query(const query&); 00370 query& operator=(const query&); 00371 }; // class ibis::query 00372 00373 namespace ibis { 00381 template <> 00382 int64_t query::countEqualPairs(const array_t<int32_t>& val1, 00383 const array_t<uint32_t>& val2) const; 00384 template <> 00385 int64_t query::countEqualPairs(const array_t<uint32_t>& val1, 00386 const array_t<int32_t>& val2) const; 00387 template <> 00388 int64_t query::countDeltaPairs(const array_t<int32_t>& val1, 00389 const array_t<uint32_t>& val2, 00390 const int32_t& delta) const; 00391 template <> 00392 int64_t query::countDeltaPairs(const array_t<uint32_t>& val1, 00393 const array_t<int32_t>& val2, 00394 const uint32_t& delta) const; 00395 template <> 00396 int64_t query::recordEqualPairs(const array_t<int32_t>& val1, 00397 const array_t<uint32_t>& val2, 00398 const array_t<uint32_t>& ind1, 00399 const array_t<uint32_t>& ind2, 00400 const char *pairfile) const; 00401 template <> 00402 int64_t query::recordEqualPairs(const array_t<uint32_t>& val1, 00403 const array_t<int32_t>& val2, 00404 const array_t<uint32_t>& ind1, 00405 const array_t<uint32_t>& ind2, 00406 const char *pairfile) const; 00407 template <> 00408 int64_t query::recordDeltaPairs(const array_t<int32_t>& val1, 00409 const array_t<uint32_t>& val2, 00410 const array_t<uint32_t>& ind1, 00411 const array_t<uint32_t>& ind2, 00412 const int32_t& delta, 00413 const char *pairfile) const; 00414 template <> 00415 int64_t query::recordDeltaPairs(const array_t<uint32_t>& val1, 00416 const array_t<int32_t>& val2, 00417 const array_t<uint32_t>& ind1, 00418 const array_t<uint32_t>& ind2, 00419 const uint32_t& delta, 00420 const char *pairfile) const; 00422 } 00423 00425 class ibis::query::weight : public ibis::qExpr::weight { 00426 public: 00427 virtual double operator()(const ibis::qExpr* ex) const; 00428 weight(const ibis::part* ds) : dataset(ds) {} 00429 00430 private: 00431 const ibis::part* dataset; 00432 }; 00433 00438 class ibis::query::readLock { 00439 public: 00440 readLock(const query* q, const char* m) : theQuery(q), mesg(m) { 00441 theQuery->gainReadAccess(m); 00442 }; 00443 ~readLock() {theQuery->releaseAccess(mesg);} 00444 private: 00445 const query* theQuery; 00446 const char* mesg; 00447 00448 readLock() {}; // no default constructor 00449 readLock(const readLock&) {}; // can not copy 00450 }; // class ibis::query::readLock 00451 00456 class ibis::query::writeLock { 00457 public: 00458 writeLock(const query* q, const char* m) : theQuery(q), mesg(m) { 00459 theQuery->gainWriteAccess(m); 00460 }; 00461 ~writeLock() {theQuery->releaseAccess(mesg);} 00462 private: 00463 const query* theQuery; 00464 const char* mesg; 00465 00466 writeLock() {}; // no default constructor 00467 writeLock(const writeLock&) {}; // can not copy 00468 }; // ibis::query::writeLock 00469 #endif // IBIS_QUERY_H
![]() |