00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #ifndef XAPIAN_INCLUDED_ENQUIRE_H
00025 #define XAPIAN_INCLUDED_ENQUIRE_H
00026
00027 #include <string>
00028 #include <time.h>
00029
00030 #include <xapian/base.h>
00031 #include <xapian/error.h>
00032 #include <xapian/types.h>
00033
00034 namespace Xapian {
00035
00036 class Database;
00037 class Document;
00038 class ErrorHandler;
00039 class MSetIterator;
00040 class Query;
00041 class TermIterator;
00042 class Weight;
00043
00047 class MSet {
00048 public:
00049 class Internal;
00051 Xapian::Internal::RefCntPtr<Internal> internal;
00052
00054 explicit MSet(MSet::Internal * internal_);
00055
00057 MSet();
00058
00060 ~MSet();
00061
00063 MSet(const MSet & other);
00064
00066 void operator=(const MSet &other);
00067
00083 void fetch(const MSetIterator &begin, const MSetIterator &end) const;
00084
00087 void fetch(const MSetIterator &item) const;
00088
00091 void fetch() const;
00092
00097 Xapian::percent convert_to_percent(Xapian::weight wt) const;
00098
00100 Xapian::percent convert_to_percent(const MSetIterator &it) const;
00101
00109 Xapian::doccount get_termfreq(const std::string &tname) const;
00110
00118 Xapian::weight get_termweight(const std::string &tname) const;
00119
00127 Xapian::doccount get_firstitem() const;
00128
00138 Xapian::doccount get_matches_lower_bound() const;
00139
00152 Xapian::doccount get_matches_estimated() const;
00153
00163 Xapian::doccount get_matches_upper_bound() const;
00164
00170 Xapian::weight get_max_possible() const;
00171
00185 Xapian::weight get_max_attained() const;
00186
00188 Xapian::doccount size() const;
00189
00191 Xapian::doccount max_size() const { return size(); }
00192
00194 bool empty() const;
00195
00197 void swap(MSet & other);
00198
00200 MSetIterator begin() const;
00201
00203 MSetIterator end() const;
00204
00206 MSetIterator back() const;
00207
00217 MSetIterator operator[](Xapian::doccount i) const;
00218
00220
00221 typedef MSetIterator value_type;
00222 typedef MSetIterator iterator;
00223 typedef MSetIterator const_iterator;
00224 typedef MSetIterator & reference;
00225 typedef MSetIterator & const_reference;
00226 typedef MSetIterator * pointer;
00227 typedef Xapian::doccount_diff difference_type;
00228 typedef Xapian::doccount size_type;
00230
00234 std::string get_description() const;
00235 };
00236
00240 class MSetIterator {
00241 private:
00242 friend class MSet;
00243 friend bool operator==(const MSetIterator &a, const MSetIterator &b);
00244 friend bool operator!=(const MSetIterator &a, const MSetIterator &b);
00245
00246 MSetIterator(Xapian::doccount index_, const MSet & mset_)
00247 : index(index_), mset(mset_) { }
00248
00249 Xapian::doccount index;
00250 MSet mset;
00251
00252 public:
00256 MSetIterator() : index(0), mset() { }
00257
00258 ~MSetIterator() { }
00259
00261 MSetIterator(const MSetIterator &other) {
00262 index = other.index;
00263 mset = other.mset;
00264 }
00265
00267 void operator=(const MSetIterator &other) {
00268 index = other.index;
00269 mset = other.mset;
00270 }
00271
00273 MSetIterator & operator++() {
00274 ++index;
00275 return *this;
00276 }
00277
00279 MSetIterator operator++(int) {
00280 MSetIterator tmp = *this;
00281 ++index;
00282 return tmp;
00283 }
00284
00286 MSetIterator & operator--() {
00287 --index;
00288 return *this;
00289 }
00290
00292 MSetIterator operator--(int) {
00293 MSetIterator tmp = *this;
00294 --index;
00295 return tmp;
00296 }
00297
00299 Xapian::docid operator*() const;
00300
00319 Xapian::Document get_document() const;
00320
00327 Xapian::doccount get_rank() const {
00328 return mset.get_firstitem() + index;
00329 }
00330
00332 Xapian::weight get_weight() const;
00333
00350 Xapian::doccount get_collapse_count() const;
00351
00357 Xapian::percent get_percent() const;
00358
00362 std::string get_description() const;
00363
00365
00366 typedef std::bidirectional_iterator_tag iterator_category;
00367 typedef Xapian::docid value_type;
00368 typedef Xapian::doccount_diff difference_type;
00369 typedef Xapian::docid * pointer;
00370 typedef Xapian::docid & reference;
00372 };
00373
00374 inline bool operator==(const MSetIterator &a, const MSetIterator &b)
00375 {
00376 return (a.index == b.index);
00377 }
00378
00379 inline bool operator!=(const MSetIterator &a, const MSetIterator &b)
00380 {
00381 return (a.index != b.index);
00382 }
00383
00384 class ESetIterator;
00385
00390 class ESet {
00391 public:
00392 class Internal;
00394 Xapian::Internal::RefCntPtr<Internal> internal;
00395
00397 ESet();
00398
00400 ~ESet();
00401
00403 ESet(const ESet & other);
00404
00406 void operator=(const ESet &other);
00407
00412 Xapian::termcount get_ebound() const;
00413
00415 Xapian::termcount size() const;
00416
00418 Xapian::termcount max_size() const { return size(); }
00419
00421 bool empty() const;
00422
00424 void swap(ESet & other);
00425
00427 ESetIterator begin() const;
00428
00430 ESetIterator end() const;
00431
00433 ESetIterator back() const;
00434
00436 ESetIterator operator[](Xapian::termcount i) const;
00437
00442 std::string get_description() const;
00443 };
00444
00446 class ESetIterator {
00447 private:
00448 friend class ESet;
00449 friend bool operator==(const ESetIterator &a, const ESetIterator &b);
00450 friend bool operator!=(const ESetIterator &a, const ESetIterator &b);
00451
00452 ESetIterator(Xapian::termcount index_, const ESet & eset_)
00453 : index(index_), eset(eset_) { }
00454
00455 Xapian::termcount index;
00456 ESet eset;
00457
00458 public:
00462 ESetIterator() : index(0), eset() { }
00463
00464 ~ESetIterator() { }
00465
00467 ESetIterator(const ESetIterator &other) {
00468 index = other.index;
00469 eset = other.eset;
00470 }
00471
00473 void operator=(const ESetIterator &other) {
00474 index = other.index;
00475 eset = other.eset;
00476 }
00477
00479 ESetIterator & operator++() {
00480 ++index;
00481 return *this;
00482 }
00483
00485 ESetIterator operator++(int) {
00486 ESetIterator tmp = *this;
00487 ++index;
00488 return tmp;
00489 }
00490
00492 ESetIterator & operator--() {
00493 --index;
00494 return *this;
00495 }
00496
00498 ESetIterator operator--(int) {
00499 ESetIterator tmp = *this;
00500 --index;
00501 return tmp;
00502 }
00503
00505 const std::string & operator *() const;
00506
00508 Xapian::weight get_weight() const;
00509
00513 std::string get_description() const;
00514
00516
00517 typedef std::bidirectional_iterator_tag iterator_category;
00518 typedef std::string value_type;
00519 typedef Xapian::termcount_diff difference_type;
00520 typedef std::string * pointer;
00521 typedef std::string & reference;
00523 };
00524
00525 inline bool operator==(const ESetIterator &a, const ESetIterator &b)
00526 {
00527 return (a.index == b.index);
00528 }
00529
00530 inline bool operator!=(const ESetIterator &a, const ESetIterator &b)
00531 {
00532 return (a.index != b.index);
00533 }
00534
00539 class RSet {
00540 public:
00542 class Internal;
00543
00545 Xapian::Internal::RefCntPtr<Internal> internal;
00546
00548 RSet(const RSet &rset);
00549
00551 void operator=(const RSet &rset);
00552
00554 RSet();
00555
00557 ~RSet();
00558
00560 Xapian::doccount size() const;
00561
00563 bool empty() const;
00564
00566 void add_document(Xapian::docid did);
00567
00569 void add_document(const Xapian::MSetIterator & i) { add_document(*i); }
00570
00572 void remove_document(Xapian::docid did);
00573
00575 void remove_document(const Xapian::MSetIterator & i) { remove_document(*i); }
00576
00578 bool contains(Xapian::docid did) const;
00579
00581 bool contains(const Xapian::MSetIterator & i) { return contains(*i); }
00582
00587 std::string get_description() const;
00588 };
00589
00592 class MatchDecider {
00593 public:
00596 virtual int operator()(const Xapian::Document &doc) const = 0;
00597
00599 virtual ~MatchDecider() {}
00600 };
00601
00604 class ExpandDecider {
00605 public:
00608 virtual int operator()(const std::string & tname) const = 0;
00609
00611 virtual ~ExpandDecider() {}
00612 };
00613
00624 class Enquire {
00625 private:
00627 Enquire(const Enquire &);
00628
00630 void operator=(const Enquire &);
00631
00632 public:
00633 class Internal;
00635 Xapian::Internal::RefCntPtr<Internal> internal;
00636
00652 Enquire(const Database &databases, ErrorHandler * errorhandler_ = 0);
00653
00656 ~Enquire();
00657
00664 void set_query(const Xapian::Query & query, Xapian::termcount qlen = 0);
00665
00672 const Xapian::Query & get_query();
00673
00680 void set_weighting_scheme(const Weight &weight_);
00681
00708 void set_collapse_key(Xapian::valueno collapse_key);
00709
00710 typedef enum {
00711 ASCENDING = 1,
00712 DESCENDING = 0,
00713 DONT_CARE = 2
00714 } docid_order;
00715
00739 void set_docid_order(docid_order order);
00740
00747 XAPIAN_DEPRECATED(void set_sort_forward(bool sort_forward));
00748
00767 void set_cutoff(Xapian::percent percent_cutoff, Xapian::weight weight_cutoff = 0);
00768
00785 XAPIAN_DEPRECATED(void set_sorting(Xapian::valueno sort_key, int sort_bands,
00786 bool sort_by_relevance = false));
00787
00790 void set_sort_by_relevance();
00791
00802 void set_sort_by_value(Xapian::valueno sort_key, bool ascending = true);
00803
00815 void set_sort_by_value_then_relevance(Xapian::valueno sort_key,
00816 bool ascending = true);
00817
00818
00819
00820
00821
00833 void set_bias(Xapian::weight bias_weight, time_t bias_halflife);
00834
00860 MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
00861 Xapian::doccount checkatleast = 0,
00862 const RSet * omrset = 0,
00863 const MatchDecider * mdecider = 0) const;
00864 MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
00865 const RSet * omrset,
00866 const MatchDecider * mdecider = 0) const {
00867 return get_mset(first, maxitems, 0, omrset, mdecider);
00868 }
00869
00870 static const int include_query_terms = 1;
00871 static const int use_exact_termfreq = 2;
00894 ESet get_eset(Xapian::termcount maxitems,
00895 const RSet & omrset,
00896 int flags = 0,
00897 double k = 1.0,
00898 const Xapian::ExpandDecider * edecider = 0) const;
00899
00913 inline ESet get_eset(Xapian::termcount maxitems, const RSet & omrset,
00914 const Xapian::ExpandDecider * edecider) const {
00915 return get_eset(maxitems, omrset, 0, 1.0, edecider);
00916 }
00917
00946 TermIterator get_matching_terms_begin(Xapian::docid did) const;
00947
00949 TermIterator get_matching_terms_end(Xapian::docid did) const;
00950
00973 TermIterator get_matching_terms_begin(const MSetIterator &it) const;
00974
00976 TermIterator get_matching_terms_end(const MSetIterator &it) const;
00977
00984 void register_match_decider(const std::string &name,
00985 const MatchDecider *mdecider = NULL);
00986
00990 std::string get_description() const;
00991 };
00992
00993 }
00994
00995 class SocketServer;
00996
00997 namespace Xapian {
00998
01000 class Weight {
01001 friend class Enquire;
01002 friend class ::SocketServer;
01003 public:
01004 class Internal;
01005 protected:
01006 Weight(const Weight &);
01007 private:
01008 void operator=(Weight &);
01009
01019 virtual Weight * clone() const = 0;
01020
01021 protected:
01022 const Internal * internal;
01023 Xapian::doclength querysize;
01024 Xapian::termcount wqf;
01025 std::string tname;
01026
01027 public:
01028 Weight() { }
01029 virtual ~Weight() { }
01030
01043 Weight * create(const Internal * internal_, Xapian::doclength querysize_,
01044 Xapian::termcount wqf_, std::string tname_) const {
01045 Weight * wt = clone();
01046 wt->internal = internal_;
01047 wt->querysize = querysize_;
01048 wt->wqf = wqf_;
01049 wt->tname = tname_;
01050 return wt;
01051 }
01052
01057 virtual std::string name() const = 0;
01058
01060 virtual std::string serialise() const = 0;
01061
01063 virtual Weight * unserialise(const std::string &s) const = 0;
01064
01072 virtual Xapian::weight get_sumpart(Xapian::termcount wdf,
01073 Xapian::doclength len) const = 0;
01074
01080 virtual Xapian::weight get_maxpart() const = 0;
01081
01090 virtual Xapian::weight get_sumextra(Xapian::doclength len) const = 0;
01091
01095 virtual Xapian::weight get_maxextra() const = 0;
01096
01098 virtual bool get_sumpart_needs_doclength() const { return true; }
01099 };
01100
01102 class BoolWeight : public Weight {
01103 public:
01104 BoolWeight * clone() const {
01105 return new BoolWeight;
01106 }
01107 BoolWeight() { }
01108 ~BoolWeight() { }
01109 std::string name() const { return "Bool"; }
01110 std::string serialise() const { return ""; }
01111 BoolWeight * unserialise(const std::string & ) const {
01112 return new BoolWeight;
01113 }
01114 Xapian::weight get_sumpart(Xapian::termcount , Xapian::doclength ) const { return 0; }
01115 Xapian::weight get_maxpart() const { return 0; }
01116
01117 Xapian::weight get_sumextra(Xapian::doclength ) const { return 0; }
01118 Xapian::weight get_maxextra() const { return 0; }
01119
01120 bool get_sumpart_needs_doclength() const { return false; }
01121 };
01122
01135 class BM25Weight : public Weight {
01136 private:
01137 mutable Xapian::weight termweight;
01138 mutable Xapian::doclength lenpart;
01139
01140 double k1, k2, k3, b;
01141 Xapian::doclength min_normlen;
01142
01143 mutable bool weight_calculated;
01144
01145 void calc_termweight() const;
01146
01147 public:
01166 BM25Weight(double k1_, double k2_, double k3_, double b_,
01167 double min_normlen_)
01168 : k1(k1_), k2(k2_), k3(k3_), b(b_), min_normlen(min_normlen_),
01169 weight_calculated(false)
01170 {
01171 if (k1 < 0) k1 = 0;
01172 if (k2 < 0) k2 = 0;
01173 if (k3 < 0) k3 = 0;
01174 if (b < 0) b = 0; else if (b > 1) b = 1;
01175 }
01176 BM25Weight() : k1(1), k2(0), k3(1), b(0.5), min_normlen(0.5),
01177 weight_calculated(false) { }
01178
01179 BM25Weight * clone() const;
01180 ~BM25Weight() { }
01181 std::string name() const;
01182 std::string serialise() const;
01183 BM25Weight * unserialise(const std::string & s) const;
01184 Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01185 Xapian::weight get_maxpart() const;
01186
01187 Xapian::weight get_sumextra(Xapian::doclength len) const;
01188 Xapian::weight get_maxextra() const;
01189
01190 bool get_sumpart_needs_doclength() const;
01191 };
01192
01206 class TradWeight : public Weight {
01207 private:
01208 mutable Xapian::weight termweight;
01209 mutable Xapian::doclength lenpart;
01210
01211 double param_k;
01212
01213 mutable bool weight_calculated;
01214
01215 void calc_termweight() const;
01216
01217 public:
01225 explicit TradWeight(double k) : param_k(k), weight_calculated(false) {
01226 if (param_k < 0) param_k = 0;
01227 }
01228
01229 TradWeight() : param_k(1.0), weight_calculated(false) { }
01230
01231 TradWeight * clone() const;
01232 ~TradWeight() { }
01233 std::string name() const;
01234 std::string serialise() const;
01235 TradWeight * unserialise(const std::string & s) const;
01236
01237 Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01238 Xapian::weight get_maxpart() const;
01239
01240 Xapian::weight get_sumextra(Xapian::doclength len) const;
01241 Xapian::weight get_maxextra() const;
01242
01243 bool get_sumpart_needs_doclength() const;
01244 };
01245
01246 }
01247
01248 #endif