query.h
Go to the documentation of this file.
00001 // File: $Id$
00002 // Author: John Wu <John.Wu at ACM.org>
00003 //         Lawrence Berkeley National Laboratory
00004 // Copyright 2000-2011 the Regents of the University of California
00005 #ifndef IBIS_QUERY_H
00006 #define IBIS_QUERY_H
00007 
00008 
00009 
00010 #include "part.h"       // ibis::part
00011 #include "whereClause.h"        // ibis::whereClause
00012 #include "selectClause.h"       // ibis::selectClause
00013 
00053 class FASTBIT_CXX_DLLSPEC ibis::query {
00054 public:
00055     enum QUERY_STATE {
00056         UNINITIALIZED,  //< The query object is currently empty.
00057         SET_COMPONENTS, //< The query object has a select clause.
00058         SET_RIDS,       //< The query object contains a list of RIDs.
00059         SET_PREDICATE,  //< The query object has a where clause.
00060         SPECIFIED,      //< SET_COMPONENTS & (SET_RIDS | SET_PREDICATE).
00061         QUICK_ESTIMATE, //< A upper and a lower bound are computed.
00062         FULL_EVALUATE,  //< The exact hits are computed.
00063         BUNDLES_TRUNCATED,      //< Only top-K results are stored.
00064         HITS_TRUNCATED  //< The hit vector has been updated to match bundles.
00065     };
00066 
00067     virtual ~query();
00071     query(const char* dir, const ibis::partList& tl);
00073     query(const char* uid=0, const part* et=0, const char* pref=0);
00074 
00076     const char* id() const {return myID;};      
00077     const char* dir() const {return myDir;}     
00078     const char* userName() const {return user;} 
00079 
00080     time_t timestamp() const {return dstime;}
00082     const part* partition() const {return mypart;}
00084     const selectClause& components() const {return comps;};
00085 
00087     int setRIDs(const RIDSet& set);
00089     int setWhereClause(const char *str);
00091     int setWhereClause(const std::vector<const char*>& names,
00092                        const std::vector<double>& lbounds,
00093                        const std::vector<double>& rbounds);
00095     int setWhereClause(const ibis::qExpr* qexp);
00105     virtual int setSelectClause(const char *str);
00108     int setPartition(const ibis::part* tbl);
00110     int setTable(const ibis::part* tbl) {return setPartition(tbl);}
00112     virtual const char* getWhereClause() const {return conds.getString();}
00114     virtual const char* getSelectClause() const {return *comps;}
00115 
00116     void expandQuery();
00117     void contractQuery();
00118     std::string removeComplexConditions();
00119 
00121     const RIDSet* getUserRIDs() const {return rids_in;}
00122 
00123     // Functions to perform estimation.
00124 
00125     int estimate();
00126     long getMinNumHits() const;
00127     long getMaxNumHits() const;
00128 
00129     // Functions related to full evaluation.
00130 
00131     int evaluate(const bool evalSelect=false);
00135     const ibis::bitvector* getHitVector() const {return hits;}
00136     long getNumHits() const;
00137     long getHitRows(std::vector<uint32_t> &rids) const;
00138     long countHits() const;
00139 
00140     int  orderby(const char *names, int direction) const;
00141     long limit(const char *names, int direction, uint32_t keep,
00142                bool updateHits = true);
00143 
00154     array_t<signed char>*   getQualifiedBytes(const char* column_name);
00156     array_t<unsigned char>* getQualifiedUBytes(const char* column_name);
00158     array_t<int16_t>* getQualifiedShorts(const char* column_name);
00160     array_t<uint16_t>* getQualifiedUShorts(const char* column_name);
00162     array_t<int32_t>* getQualifiedInts(const char* column_name);
00165     array_t<uint32_t>* getQualifiedUInts(const char* column_name);
00167     array_t<int64_t>* getQualifiedLongs(const char* column_name);
00169     array_t<uint64_t>* getQualifiedULongs(const char* column_name);
00172     array_t<float>* getQualifiedFloats(const char* column_name);
00175     array_t<double>* getQualifiedDoubles(const char* column_name);
00177     std::vector<std::string>* getQualifiedStrings(const char* column_name);
00179     RIDSet* getRIDs() const;
00181     RIDSet* getRIDs(const ibis::bitvector& mask) const;
00183     const RIDSet* getRIDsInBundle(const uint32_t bid) const;
00185 
00192     void printSelected(std::ostream& out) const;
00196     void printSelectedWithRID(std::ostream& out) const;
00197 
00203     long sequentialScan(ibis::bitvector& bv) const;
00204 
00208     long getExpandedHits(ibis::bitvector&) const;
00209 
00210     // used by ibis::bundle
00211     RIDSet* readRIDs() const;
00212     void writeRIDs(const RIDSet* rids) const;
00213 
00216     void logMessage(const char* event, const char* fmt, ...) const;
00217 
00218     // Functions for cleaning up, retrieving query states
00219     // and error messages.
00220 
00222     void clear();
00224     QUERY_STATE getState() const;
00226     const char* getLastError() const {return lastError;}
00228     void clearErrorMessage() const {*lastError=0;}
00229 
00232     static bool isValidToken(const char* tok);
00234     // *** the value 16 is hard coded in functions newToken and ***
00235     // *** isValidToken ***
00236     static unsigned tokenLength() {return 16;}
00237 
00239     static void removeQueryRecords()
00240     {ibis::gParameters().add("query.purgeTempFiles", "true");}
00242     static void keepQueryRecords()
00243     {ibis::gParameters().add("query.purgeTempFiles", "false");}
00244 
00245     class result; // Forward declaration only
00246     class weight;
00247     class readLock;
00248     class writeLock;
00249     friend class readLock;
00250     friend class writeLock;
00251 
00252 protected:
00253     char* user;         
00254     whereClause conds;  
00255     selectClause comps; 
00256     QUERY_STATE state;  
00257     ibis::bitvector* hits;
00258     ibis::bitvector* sup;
00259     mutable ibis::part::readLock* dslock;       
00260     mutable char lastError[MAX_LINE+PATH_MAX];  
00261 
00262     void logError(const char* event, const char* fmt, ...) const;
00263     void logWarning(const char* event, const char* fmt, ...) const;
00264 
00265     void reorderExpr(); // reorder query expression
00266 
00267     bool hasBundles() const;
00268     int  computeHits();   // generate the hit vector for range queries
00269     void getBounds();     // get the upper and lower bounds for range queries
00270     // use index only to come up with a upper bound and a lower bound
00271     void doEstimate(const qExpr* term, ibis::bitvector& low,
00272                     ibis::bitvector& high) const;
00274     int doScan(const qExpr* term, const ibis::bitvector& mask,
00275                ibis::bitvector& hits) const;
00277     int doScan(const qExpr* term, ibis::bitvector& hits) const;
00279     int doEvaluate(const qExpr* term, ibis::bitvector& hits) const;
00281     int doEvaluate(const qExpr* term, const ibis::bitvector& mask,
00282                    ibis::bitvector& hits) const;
00283 
00285     int64_t processJoin();
00286 
00288     virtual void writeQuery();
00290     void readQuery(const ibis::partList& tl);
00292     void removeFiles();
00293 
00295     void readHits();
00297     void writeHits() const;
00299     void printRIDs(const RIDSet& ridset) const;
00302     uint32_t countPages(unsigned wordsize) const;
00303 
00305     int doExpand(ibis::qExpr* exp0) const;
00307     int doContract(ibis::qExpr* exp0) const;
00308 
00309     // A group of functions to count the number of pairs
00310     // satisfying the join conditions.
00311     int64_t sortJoin(const std::vector<const ibis::deprecatedJoin*>& terms,
00312                      const ibis::bitvector& mask) const;
00313     int64_t sortJoin(const ibis::deprecatedJoin& cmp,
00314                      const ibis::bitvector& mask) const;
00315     int64_t sortEquiJoin(const ibis::deprecatedJoin& cmp,
00316                          const ibis::bitvector& mask) const;
00317     int64_t sortRangeJoin(const ibis::deprecatedJoin& cmp,
00318                           const ibis::bitvector& mask) const;
00319     int64_t sortEquiJoin(const ibis::deprecatedJoin& cmp,
00320                          const ibis::bitvector& mask,
00321                          const char* pairfile) const;
00322     int64_t sortRangeJoin(const ibis::deprecatedJoin& cmp,
00323                           const ibis::bitvector& mask,
00324                           const char* pairfile) const;
00325     void orderPairs(const char* pairfile) const;
00326     int64_t mergePairs(const char* pairfile) const;
00327 
00328     template <typename T1, typename T2>
00329     int64_t countEqualPairs(const array_t<T1>& val1,
00330                             const array_t<T2>& val2) const;
00331     template <typename T1, typename T2>
00332     int64_t countDeltaPairs(const array_t<T1>& val1,
00333                             const array_t<T2>& val2, const T1& delta) const;
00334     template <typename T1, typename T2>
00335     int64_t recordEqualPairs(const array_t<T1>& val1,
00336                              const array_t<T2>& val2,
00337                              const array_t<uint32_t>& ind1,
00338                              const array_t<uint32_t>& ind2,
00339                              const char* pairfile) const;
00340     template <typename T1, typename T2>
00341     int64_t recordDeltaPairs(const array_t<T1>& val1,
00342                              const array_t<T2>& val2,
00343                              const array_t<uint32_t>& ind1,
00344                              const array_t<uint32_t>& ind2,
00345                              const T1& delta, const char* pairfile) const;
00346 
00347     // functions for access control
00348     void gainReadAccess(const char* mesg) const {
00349         if (ibis::gVerbose > 10)
00350             logMessage("gainReadAccess", "acquiring a read lock for %s",
00351                        mesg);
00352         if (0 != pthread_rwlock_rdlock(&lock))
00353             logMessage("gainReadAccess",
00354                        "unable to gain read access to rwlock for %s", mesg);
00355     }
00356     void gainWriteAccess(const char* mesg) const {
00357         if (ibis::gVerbose > 10)
00358             logMessage("gainWriteAccess", "acquiring a write lock for %s",
00359                        mesg);
00360         if (0 != pthread_rwlock_wrlock(&lock))
00361             logMessage("gainWriteAccess",
00362                        "unable to gain write access to rwlock for %s", mesg);
00363     }
00364     void releaseAccess(const char* mesg) const {
00365         if (ibis::gVerbose > 10)
00366             logMessage("releaseAccess", "releasing rwlock for %s", mesg);
00367         if (0 != pthread_rwlock_unlock(&lock))
00368             logMessage("releaseAccess", "unable to unlock the rwlock for %s",
00369                        mesg);
00370     }
00371 
00372 private:
00373     char* myID;         // The unique ID of this query object
00374     char* myDir;        // Name of the directory containing the query record
00375     RIDSet* rids_in;    // Rid list specified in an RID query
00376     const part* mypart; // Data partition used to process the query
00377     time_t dstime;              // When query evaluation started
00378     mutable pthread_rwlock_t lock; // Rwlock for access control
00379 
00380     // private functions
00381     static char* newToken(const char*); 
00382 
00383     void setMyDir(const char *pref);
00384 
00385     query(const query&);
00386     query& operator=(const query&);
00387 }; // class ibis::query
00388 
00389 namespace ibis {
00397     template <>
00398     int64_t query::countEqualPairs(const array_t<int32_t>& val1,
00399                                    const array_t<uint32_t>& val2) const;
00400     template <>
00401     int64_t query::countEqualPairs(const array_t<uint32_t>& val1,
00402                                    const array_t<int32_t>& val2) const;
00403     template <>
00404     int64_t query::countDeltaPairs(const array_t<int32_t>& val1,
00405                                    const array_t<uint32_t>& val2,
00406                                    const int32_t& delta) const;
00407     template <>
00408     int64_t query::countDeltaPairs(const array_t<uint32_t>& val1,
00409                                    const array_t<int32_t>& val2,
00410                                    const uint32_t& delta) const;
00411     template <>
00412     int64_t query::recordEqualPairs(const array_t<int32_t>& val1,
00413                                     const array_t<uint32_t>& val2,
00414                                     const array_t<uint32_t>& ind1,
00415                                     const array_t<uint32_t>& ind2,
00416                                     const char *pairfile) const;
00417     template <>
00418     int64_t query::recordEqualPairs(const array_t<uint32_t>& val1,
00419                                     const array_t<int32_t>& val2,
00420                                     const array_t<uint32_t>& ind1,
00421                                     const array_t<uint32_t>& ind2,
00422                                     const char *pairfile) const;
00423     template <>
00424     int64_t query::recordDeltaPairs(const array_t<int32_t>& val1,
00425                                     const array_t<uint32_t>& val2,
00426                                     const array_t<uint32_t>& ind1,
00427                                     const array_t<uint32_t>& ind2,
00428                                     const int32_t& delta,
00429                                     const char *pairfile) const;
00430     template <>
00431     int64_t query::recordDeltaPairs(const array_t<uint32_t>& val1,
00432                                     const array_t<int32_t>& val2,
00433                                     const array_t<uint32_t>& ind1,
00434                                     const array_t<uint32_t>& ind2,
00435                                     const uint32_t& delta,
00436                                     const char *pairfile) const;
00438 }
00439 
00441 class ibis::query::weight : public ibis::qExpr::weight {
00442 public:
00443     virtual double operator()(const ibis::qExpr* ex) const;
00444     weight(const ibis::part* ds) : dataset(ds) {}
00445 
00446 private:
00447     const ibis::part* dataset;
00448 };
00449 
00454 class ibis::query::readLock {
00455 public:
00456     readLock(const query* q, const char* m) : theQuery(q), mesg(m) {
00457         theQuery->gainReadAccess(m);
00458     };
00459     ~readLock() {theQuery->releaseAccess(mesg);}
00460 private:
00461     const query* theQuery;
00462     const char* mesg;
00463 
00464     readLock() {}; // no default constructor
00465     readLock(const readLock&) {}; // can not copy
00466 }; // class ibis::query::readLock
00467 
00472 class ibis::query::writeLock {
00473 public:
00474     writeLock(const query* q, const char* m) : theQuery(q), mesg(m) {
00475         theQuery->gainWriteAccess(m);
00476     };
00477     ~writeLock() {theQuery->releaseAccess(mesg);}
00478 private:
00479     const query* theQuery;
00480     const char* mesg;
00481 
00482     writeLock() {}; // no default constructor
00483     writeLock(const writeLock&) {}; // can not copy
00484 }; // ibis::query::writeLock
00485 #endif // IBIS_QUERY_H

Make It A Bit Faster
Contact us
Disclaimers
FastBit source code
FastBit mailing list archive