1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 r"""searchconnection.py: A connection to the search engine for searching.
19
20 """
21 __docformat__ = "restructuredtext en"
22
23 import _checkxapian
24 import os as _os
25 import cPickle as _cPickle
26 import math
27
28 import xapian as _xapian
29 from datastructures import *
30 from fieldactions import *
31 import fieldmappings as _fieldmappings
32 import highlight as _highlight
33 import errors as _errors
34 import indexerconnection as _indexerconnection
35 import re as _re
36 from replaylog import log as _log
37
39 """A result from a search.
40
41 As well as being a ProcessedDocument representing the document in the
42 database, the result has several members which may be used to get
43 information about how well the document matches the search:
44
45 - `rank`: The rank of the document in the search results, starting at 0
46 (ie, 0 is the "top" result, 1 is the second result, etc).
47
48 - `weight`: A floating point number indicating the weight of the result
49 document. The value is only meaningful relative to other results for a
50 given search - a different search, or the same search with a different
51 database, may give an entirely different scale to the weights. This
52 should not usually be displayed to users, but may be useful if trying to
53 perform advanced reweighting operations on search results.
54
55 - `percent`: A percentage value for the weight of a document. This is
56 just a rescaled form of the `weight` member. It doesn't represent any
57 kind of probability value; the only real meaning of the numbers is that,
58 within a single set of results, a document with a higher percentage
59 corresponds to a better match. Because the percentage doesn't really
60 represent a probability, or a confidence value, it is probably unhelpful
61 to display it to most users, since they tend to place an over emphasis
62 on its meaning. However, it is included because it may be useful
63 occasionally.
64
65 """
67 ProcessedDocument.__init__(self, results._fieldmappings, msetitem.document)
68 self.rank = msetitem.rank
69 self.weight = msetitem.weight
70 self.percent = msetitem.percent
71 self._results = results
72
74 """Get the language that should be used for a given field.
75
76 Raises a KeyError if the field is not known.
77
78 """
79 actions = self._results._conn._field_actions[field]._actions
80 for action, kwargslist in actions.iteritems():
81 if action == FieldActions.INDEX_FREETEXT:
82 for kwargs in kwargslist:
83 try:
84 return kwargs['language']
85 except KeyError:
86 pass
87 return 'none'
88
89 - def summarise(self, field, maxlen=600, hl=('<b>', '</b>'), query=None):
90 """Return a summarised version of the field specified.
91
92 This will return a summary of the contents of the field stored in the
93 search result, with words which match the query highlighted.
94
95 The maximum length of the summary (in characters) may be set using the
96 maxlen parameter.
97
98 The return value will be a string holding the summary, with
99 highlighting applied. If there are multiple instances of the field in
100 the document, the instances will be joined with a newline character.
101
102 To turn off highlighting, set hl to None. Each highlight will consist
103 of the first entry in the `hl` list being placed before the word, and
104 the second entry in the `hl` list being placed after the word.
105
106 Any XML or HTML style markup tags in the field will be stripped before
107 the summarisation algorithm is applied.
108
109 If `query` is supplied, it should contain a Query object, as returned
110 from SearchConnection.query_parse() or related methods, which will be
111 used as the basis of the summarisation and highlighting rather than the
112 query which was used for the search.
113
114 Raises KeyError if the field is not known.
115
116 """
117 highlighter = _highlight.Highlighter(language_code=self._get_language(field))
118 field = self.data[field]
119 results = []
120 text = '\n'.join(field)
121 if query is None:
122 query = self._results._query
123 return highlighter.makeSample(text, query, maxlen, hl)
124
125 - def highlight(self, field, hl=('<b>', '</b>'), strip_tags=False, query=None):
126 """Return a highlighted version of the field specified.
127
128 This will return all the contents of the field stored in the search
129 result, with words which match the query highlighted.
130
131 The return value will be a list of strings (corresponding to the list
132 of strings which is the raw field data).
133
134 Each highlight will consist of the first entry in the `hl` list being
135 placed before the word, and the second entry in the `hl` list being
136 placed after the word.
137
138 If `strip_tags` is True, any XML or HTML style markup tags in the field
139 will be stripped before highlighting is applied.
140
141 If `query` is supplied, it should contain a Query object, as returned
142 from SearchConnection.query_parse() or related methods, which will be
143 used as the basis of the summarisation and highlighting rather than the
144 query which was used for the search.
145
146 Raises KeyError if the field is not known.
147
148 """
149 highlighter = _highlight.Highlighter(language_code=self._get_language(field))
150 field = self.data[field]
151 results = []
152 if query is None:
153 query = self._results._query
154 for text in field:
155 results.append(highlighter.highlight(text, query, hl, strip_tags))
156 return results
157
159 return ('<SearchResult(rank=%d, id=%r, data=%r)>' %
160 (self.rank, self.id, self.data))
161
162
164 """An iterator over a set of results from a search.
165
166 """
168 self._results = results
169 self._order = order
170 if self._order is None:
171 self._iter = iter(results._mset)
172 else:
173 self._iter = iter(self._order)
174
176 if self._order is None:
177 msetitem = self._iter.next()
178 else:
179 index = self._iter.next()
180 msetitem = self._results._mset.get_hit(index)
181 return SearchResult(msetitem, self._results)
182
183
185 """Get the significant digits of value which are constrained by the
186 (inclusive) lower and upper bounds.
187
188 If there are no significant digits which are definitely within the
189 bounds, exactly one significant digit will be returned in the result.
190
191 >>> _get_significant_digits(15,15,15)
192 15
193 >>> _get_significant_digits(15,15,17)
194 20
195 >>> _get_significant_digits(4777,208,6000)
196 5000
197 >>> _get_significant_digits(4777,4755,4790)
198 4800
199 >>> _get_significant_digits(4707,4695,4710)
200 4700
201 >>> _get_significant_digits(4719,4717,4727)
202 4720
203 >>> _get_significant_digits(0,0,0)
204 0
205 >>> _get_significant_digits(9,9,10)
206 9
207 >>> _get_significant_digits(9,9,100)
208 9
209
210 """
211 assert(lower <= value)
212 assert(value <= upper)
213 diff = upper - lower
214
215
216
217 if diff == 0:
218 pos_pow_10 = 1
219 else:
220 pos_pow_10 = int(10 ** math.ceil(math.log10(diff)))
221
222
223
224 if pos_pow_10 > value:
225 if value == 0:
226 pos_pow_10 = 1
227 else:
228 pos_pow_10 = int(10 ** math.floor(math.log10(value)))
229
230
231 return ((value + pos_pow_10 // 2) // pos_pow_10) * pos_pow_10
232
234 """A set of results of a search.
235
236 """
237 - def __init__(self, conn, enq, query, mset, fieldmappings, tagspy,
238 tagfields, facetspy, facetfields, facethierarchy,
239 facetassocs):
240 self._conn = conn
241 self._enq = enq
242 self._query = query
243 self._mset = mset
244 self._mset_order = None
245 self._fieldmappings = fieldmappings
246 self._tagspy = tagspy
247 if tagfields is None:
248 self._tagfields = None
249 else:
250 self._tagfields = set(tagfields)
251 self._facetspy = facetspy
252 self._facetfields = facetfields
253 self._facethierarchy = facethierarchy
254 self._facetassocs = facetassocs
255 self._numeric_ranges_built = {}
256
257 - def _cluster(self, num_clusters, maxdocs, fields=None):
258 """Cluster results based on similarity.
259
260 Note: this method is experimental, and will probably disappear or
261 change in the future.
262
263 The number of clusters is specified by num_clusters: unless there are
264 too few results, there will be exaclty this number of clusters in the
265 result.
266
267 """
268 clusterer = _xapian.ClusterSingleLink()
269 xapclusters = _xapian.ClusterAssignments()
270 docsim = _xapian.DocSimCosine()
271 source = _xapian.MSetDocumentSource(self._mset, maxdocs)
272
273 if fields is None:
274 clusterer.cluster(self._conn._index, xapclusters, docsim, source, num_clusters)
275 else:
276 decider = self._make_expand_decider(fields)
277 clusterer.cluster(self._conn._index, xapclusters, docsim, source, decider, num_clusters)
278
279 newid = 0
280 idmap = {}
281 clusters = {}
282 for item in self._mset:
283 docid = item.docid
284 clusterid = xapclusters.cluster(docid)
285 if clusterid not in idmap:
286 idmap[clusterid] = newid
287 newid += 1
288 clusterid = idmap[clusterid]
289 if clusterid not in clusters:
290 clusters[clusterid] = []
291 clusters[clusterid].append(item.rank)
292 return clusters
293
295 """Reorder the mset based on some clusters.
296
297 """
298 if self.startrank != 0:
299 raise _errors.SearchError("startrank must be zero to reorder by clusters")
300 reordered = False
301 tophits = []
302 nottophits = []
303
304 clusterstarts = dict(((c[0], None) for c in clusters.itervalues()))
305 for i in xrange(self.endrank):
306 if i in clusterstarts:
307 tophits.append(i)
308 else:
309 nottophits.append(i)
310 self._mset_order = tophits
311 self._mset_order.extend(nottophits)
312
314 """Make an expand decider which accepts only terms in the specified
315 field.
316
317 """
318 prefixes = {}
319 if isinstance(fields, basestring):
320 fields = [fields]
321 for field in fields:
322 try:
323 actions = self._conn._field_actions[field]._actions
324 except KeyError:
325 continue
326 for action, kwargslist in actions.iteritems():
327 if action == FieldActions.INDEX_FREETEXT:
328 prefix = self._conn._field_mappings.get_prefix(field)
329 prefixes[prefix] = None
330 prefixes['Z' + prefix] = None
331 if action in (FieldActions.INDEX_EXACT,
332 FieldActions.TAG,
333 FieldActions.FACET,):
334 prefix = self._conn._field_mappings.get_prefix(field)
335 prefixes[prefix] = None
336 prefix_re = _re.compile('|'.join([_re.escape(x) + '[^A-Z]' for x in prefixes.keys()]))
337 class decider(_xapian.ExpandDecider):
338 def __call__(self, term):
339 return prefix_re.match(term) is not None
340 return decider()
341
344 """Reorder results based on similarity.
345
346 The top `count` documents will be chosen such that they are relatively
347 dissimilar. `maxcount` documents will be considered for moving around,
348 and `max_similarity` is a value between 0 and 1 indicating the maximum
349 similarity to the previous document before a document is moved down the
350 result set.
351
352 Note: this method is experimental, and will probably disappear or
353 change in the future.
354
355 """
356 if self.startrank != 0:
357 raise _errors.SearchError("startrank must be zero to reorder by similiarity")
358 ds = _xapian.DocSimCosine()
359 ds.set_termfreqsource(_xapian.DatabaseTermFreqSource(self._conn._index))
360
361 if fields is not None:
362 ds.set_expand_decider(self._make_expand_decider(fields))
363
364 tophits = []
365 nottophits = []
366 full = False
367 reordered = False
368
369 sim_count = 0
370 new_order = []
371 end = min(self.endrank, maxcount)
372 for i in xrange(end):
373 if full:
374 new_order.append(i)
375 continue
376 hit = self._mset.get_hit(i)
377 if len(tophits) == 0:
378 tophits.append(hit)
379 continue
380
381
382 maxsim = 0.0
383 for tophit in tophits[-1:]:
384 sim_count += 1
385 sim = ds.similarity(hit.document, tophit.document)
386 if sim > maxsim:
387 maxsim = sim
388
389
390 if maxsim < max_similarity:
391 tophits.append(hit)
392 else:
393 nottophits.append(hit)
394 reordered = True
395
396
397 if len(tophits) >= count:
398 for hit in tophits:
399 new_order.append(hit.rank)
400 for hit in nottophits:
401 new_order.append(hit.rank)
402 full = True
403 if not full:
404 for hit in tophits:
405 new_order.append(hit.rank)
406 for hit in nottophits:
407 new_order.append(hit.rank)
408 if end != self.endrank:
409 new_order.extend(range(end, self.endrank))
410 assert len(new_order) == self.endrank
411 if reordered:
412 self._mset_order = new_order
413 else:
414 assert new_order == range(self.endrank)
415
417 return ("<SearchResults(startrank=%d, "
418 "endrank=%d, "
419 "more_matches=%s, "
420 "matches_lower_bound=%d, "
421 "matches_upper_bound=%d, "
422 "matches_estimated=%d, "
423 "estimate_is_exact=%s)>" %
424 (
425 self.startrank,
426 self.endrank,
427 self.more_matches,
428 self.matches_lower_bound,
429 self.matches_upper_bound,
430 self.matches_estimated,
431 self.estimate_is_exact,
432 ))
433
438 more_matches = property(_get_more_matches, doc=
439 """Check whether there are further matches after those in this result set.
440
441 """)
442
444 return self._mset.get_firstitem()
445 startrank = property(_get_startrank, doc=
446 """Get the rank of the first item in the search results.
447
448 This corresponds to the "startrank" parameter passed to the search() method.
449
450 """)
451
453 return self._mset.get_firstitem() + len(self._mset)
454 endrank = property(_get_endrank, doc=
455 """Get the rank of the item after the end of the search results.
456
457 If there are sufficient results in the index, this corresponds to the
458 "endrank" parameter passed to the search() method.
459
460 """)
461
463 return self._mset.get_matches_lower_bound()
464 matches_lower_bound = property(_get_lower_bound, doc=
465 """Get a lower bound on the total number of matching documents.
466
467 """)
468
470 return self._mset.get_matches_upper_bound()
471 matches_upper_bound = property(_get_upper_bound, doc=
472 """Get an upper bound on the total number of matching documents.
473
474 """)
475
477 lower = self._mset.get_matches_lower_bound()
478 upper = self._mset.get_matches_upper_bound()
479 est = self._mset.get_matches_estimated()
480 return _get_significant_digits(est, lower, upper)
481 matches_human_readable_estimate = property(_get_human_readable_estimate,
482 doc=
483 """Get a human readable estimate of the number of matching documents.
484
485 This consists of the value returned by the "matches_estimated" property,
486 rounded to an appropriate number of significant digits (as determined by
487 the values of the "matches_lower_bound" and "matches_upper_bound"
488 properties).
489
490 """)
491
493 return self._mset.get_matches_estimated()
494 matches_estimated = property(_get_estimated, doc=
495 """Get an estimate for the total number of matching documents.
496
497 """)
498
500 return self._mset.get_matches_lower_bound() == \
501 self._mset.get_matches_upper_bound()
502 estimate_is_exact = property(_estimate_is_exact, doc=
503 """Check whether the estimated number of matching documents is exact.
504
505 If this returns true, the estimate given by the `matches_estimated`
506 property is guaranteed to be correct.
507
508 If this returns false, it is possible that the actual number of matching
509 documents is different from the number given by the `matches_estimated`
510 property.
511
512 """)
513
515 """Get the hit with a given index.
516
517 """
518 if self._mset_order is None:
519 msetitem = self._mset.get_hit(index)
520 else:
521 msetitem = self._mset.get_hit(self._mset_order[index])
522 return SearchResult(msetitem, self)
523 __getitem__ = get_hit
524
526 """Get an iterator over the hits in the search result.
527
528 The iterator returns the results in increasing order of rank.
529
530 """
531 return SearchResultIter(self, self._mset_order)
532
534 """Get the number of hits in the search result.
535
536 Note that this is not (usually) the number of matching documents for
537 the search. If startrank is non-zero, it's not even the rank of the
538 last document in the search result. It's simply the number of hits
539 stored in the search result.
540
541 It is, however, the number of items returned by the iterator produced
542 by calling iter() on this SearchResults object.
543
544 """
545 return len(self._mset)
546
565
566 - def get_suggested_facets(self, maxfacets=5, desired_num_of_categories=7,
567 required_facets=None):
568 """Get a suggested set of facets, to present to the user.
569
570 This returns a list, in descending order of the usefulness of the
571 facet, in which each item is a tuple holding:
572
573 - fieldname of facet.
574 - sequence of 2-tuples holding the suggested values or ranges for that
575 field:
576
577 For facets of type 'string', the first item in the 2-tuple will
578 simply be the string supplied when the facet value was added to its
579 document. For facets of type 'float', it will be a 2-tuple, holding
580 floats giving the start and end of the suggested value range.
581
582 The second item in the 2-tuple will be the frequency of the facet
583 value or range in the result set.
584
585 If required_facets is not None, it must be a field name, or a sequence
586 of field names. Any field names mentioned in required_facets will be
587 returned if there are any facet values at all in the search results for
588 that field. The facet will only be omitted if there are no facet
589 values at all for the field.
590
591 The value of maxfacets will be respected as far as possible; the
592 exception is that if there are too many fields listed in
593 required_facets with at least one value in the search results, extra
594 facets will be returned (ie, obeying the required_facets parameter is
595 considered more important than the maxfacets parameter).
596
597 If facet_hierarchy was indicated when search() was called, and the
598 query included facets, then only subfacets of those query facets and
599 top-level facets will be included in the returned list. Furthermore
600 top-level facets will only be returned if there are remaining places
601 in the list after it has been filled with subfacets. Note that
602 required_facets is still respected regardless of the facet hierarchy.
603
604 If a query type was specified when search() was called, and the query
605 included facets, then facets with an association of Never to the
606 query type are never returned, even if mentioned in required_facets.
607 Facets with an association of Preferred are listed before others in
608 the returned list.
609
610 """
611 if 'facets' in _checkxapian.missing_features:
612 raise errors.SearchError("Facets unsupported with this release of xapian")
613 if self._facetspy is None:
614 raise _errors.SearchError("Facet selection wasn't enabled when the search was run")
615 if isinstance(required_facets, basestring):
616 required_facets = [required_facets]
617 scores = []
618 facettypes = {}
619 for field, slot, kwargslist in self._facetfields:
620 type = None
621 for kwargs in kwargslist:
622 type = kwargs.get('type', None)
623 if type is not None: break
624 if type is None: type = 'string'
625
626 if type == 'float':
627 if field not in self._numeric_ranges_built:
628 self._facetspy.build_numeric_ranges(slot, desired_num_of_categories)
629 self._numeric_ranges_built[field] = None
630 facettypes[field] = type
631 score = self._facetspy.score_categorisation(slot, desired_num_of_categories)
632 scores.append((score, field, slot))
633
634
635
636 if self._facethierarchy:
637
638 scores = [(tuple[-2] not in self._facethierarchy,) + tuple for tuple in scores]
639 if self._facetassocs:
640 preferred = _indexerconnection.IndexerConnection.FacetQueryType_Preferred
641 scores = [(self._facetassocs.get(tuple[-2]) != preferred,) + tuple for tuple in scores]
642 scores.sort()
643 if self._facethierarchy:
644 index = 1
645 else:
646 index = 0
647 if self._facetassocs:
648 index += 1
649 if index > 0:
650 scores = [tuple[index:] for tuple in scores]
651
652 results = []
653 required_results = []
654 for score, field, slot in scores:
655
656 required = False
657 if required_facets is not None:
658 required = field in required_facets
659
660
661 if not required and len(results) + len(required_results) >= maxfacets:
662 continue
663
664
665 values = self._facetspy.get_values_as_dict(slot)
666 if field in self._numeric_ranges_built:
667 if '' in values:
668 del values['']
669
670
671
672 if required:
673 if len(values) < 1:
674 continue
675 else:
676 if len(values) <= 1:
677 continue
678
679 newvalues = []
680 if facettypes[field] == 'float':
681
682
683 for value, frequency in values.iteritems():
684 if len(value) <= 9:
685 value1 = _log(_xapian.sortable_unserialise, value)
686 value2 = value1
687 else:
688 value1 = _log(_xapian.sortable_unserialise, value[:9])
689 value2 = _log(_xapian.sortable_unserialise, value[9:])
690 newvalues.append(((value1, value2), frequency))
691 else:
692 for value, frequency in values.iteritems():
693 newvalues.append((value, frequency))
694
695 newvalues.sort()
696 if required:
697 required_results.append((score, field, newvalues))
698 else:
699 results.append((score, field, newvalues))
700
701
702
703 maxfacets = maxfacets - len(required_results)
704 if maxfacets <= 0:
705 results = required_results
706 else:
707 results = results[:maxfacets]
708 results.extend(required_results)
709 results.sort()
710
711
712
713 results = [(field, newvalues) for (score, field, newvalues) in results]
714 return results
715
716
718 """A connection to the search engine for searching.
719
720 The connection will access a view of the database.
721
722 """
723 _qp_flags_base = _xapian.QueryParser.FLAG_LOVEHATE
724 _qp_flags_phrase = _xapian.QueryParser.FLAG_PHRASE
725 _qp_flags_synonym = (_xapian.QueryParser.FLAG_AUTO_SYNONYMS |
726 _xapian.QueryParser.FLAG_AUTO_MULTIWORD_SYNONYMS)
727 _qp_flags_bool = _xapian.QueryParser.FLAG_BOOLEAN
728
729 _index = None
730
732 """Create a new connection to the index for searching.
733
734 There may only an arbitrary number of search connections for a
735 particular database open at a given time (regardless of whether there
736 is a connection for indexing open as well).
737
738 If the database doesn't exist, an exception will be raised.
739
740 """
741 self._index = _log(_xapian.Database, indexpath)
742 self._indexpath = indexpath
743
744
745 self._load_config()
746
747 self._close_handlers = []
748
751
753 """Append a callback to the list of close handlers.
754
755 These will be called when the SearchConnection is closed. This happens
756 when the close() method is called, or when the SearchConnection object
757 is deleted. The callback will be passed two arguments: the path to the
758 SearchConnection object, and the userdata supplied to this method.
759
760 The handlers will be called in the order in which they were added.
761
762 The handlers will be called after the connection has been closed, so
763 cannot prevent it closing: their return value will be ignored. In
764 addition, they should not raise any exceptions.
765
766 """
767 self._close_handlers.append((handler, userdata))
768
770 """Get the sort type that should be used for a given field.
771
772 """
773 try:
774 actions = self._field_actions[field]._actions
775 except KeyError:
776 actions = {}
777 for action, kwargslist in actions.iteritems():
778 if action == FieldActions.SORT_AND_COLLAPSE:
779 for kwargs in kwargslist:
780 return kwargs['type']
781
783 """Load the configuration for the database.
784
785 """
786
787
788 assert self._index is not None
789
790 config_str = _log(self._index.get_metadata, '_xappy_config')
791 if len(config_str) == 0:
792 self._field_actions = {}
793 self._field_mappings = _fieldmappings.FieldMappings()
794 self._facet_hierarchy = {}
795 self._facet_query_table = {}
796 return
797
798 try:
799 (self._field_actions, mappings, self._facet_hierarchy, self._facet_query_table, self._next_docid) = _cPickle.loads(config_str)
800 except ValueError:
801
802 (self._field_actions, mappings, self._next_docid) = _cPickle.loads(config_str)
803 self._facet_hierarchy = {}
804 self._facet_query_table = {}
805 self._field_mappings = _fieldmappings.FieldMappings(mappings)
806
808 """Reopen the connection.
809
810 This updates the revision of the index which the connection references
811 to the latest flushed revision.
812
813 """
814 if self._index is None:
815 raise _errors.SearchError("SearchConnection has been closed")
816 self._index.reopen()
817
818 self._load_config()
819
821 """Close the connection to the database.
822
823 It is important to call this method before allowing the class to be
824 garbage collected to ensure that the connection is cleaned up promptly.
825
826 No other methods may be called on the connection after this has been
827 called. (It is permissible to call close() multiple times, but
828 only the first call will have any effect.)
829
830 If an exception occurs, the database will be closed, but changes since
831 the last call to flush may be lost.
832
833 """
834 if self._index is None:
835 return
836
837
838 indexpath = self._indexpath
839
840
841
842
843
844
845
846
847 self._index = None
848 self._indexpath = None
849 self._field_actions = None
850 self._field_mappings = None
851
852
853 for handler, userdata in self._close_handlers:
854 try:
855 handler(indexpath, userdata)
856 except Exception, e:
857 import sys, traceback
858 print >>sys.stderr, "WARNING: unhandled exception in handler called by SearchConnection.close(): %s" % traceback.format_exception_only(type(e), e)
859
861 """Count the number of documents in the database.
862
863 This count will include documents which have been added or removed but
864 not yet flushed().
865
866 """
867 if self._index is None:
868 raise _errors.SearchError("SearchConnection has been closed")
869 return self._index.get_doccount()
870
871 OP_AND = _xapian.Query.OP_AND
872 OP_OR = _xapian.Query.OP_OR
874 """Build a composite query from a list of queries.
875
876 The queries are combined with the supplied operator, which is either
877 SearchConnection.OP_AND or SearchConnection.OP_OR.
878
879 """
880 if self._index is None:
881 raise _errors.SearchError("SearchConnection has been closed")
882 return _log(_xapian.Query, operator, list(queries))
883
885 """Build a query which modifies the weights of a subquery.
886
887 This produces a query which returns the same documents as the subquery,
888 and in the same order, but with the weights assigned to each document
889 multiplied by the value of "multiplier". "multiplier" may be any floating
890 point value, but negative values will be clipped to 0, since Xapian
891 doesn't support negative weights.
892
893 This can be useful when producing queries to be combined with
894 query_composite, because it allows the relative importance of parts of
895 the query to be adjusted.
896
897 """
898 return _log(_xapian.Query, _xapian.Query.OP_SCALE_WEIGHT, query, multiplier)
899
901 """Filter a query with another query.
902
903 If exclude is False (or not specified), documents will only match the
904 resulting query if they match the both the first and second query: the
905 results of the first query are "filtered" to only include those which
906 also match the second query.
907
908 If exclude is True, documents will only match the resulting query if
909 they match the first query, but not the second query: the results of
910 the first query are "filtered" to only include those which do not match
911 the second query.
912
913 Documents will always be weighted according to only the first query.
914
915 - `query`: The query to filter.
916 - `filter`: The filter to apply to the query.
917 - `exclude`: If True, the sense of the filter is reversed - only
918 documents which do not match the second query will be returned.
919
920 """
921 if self._index is None:
922 raise _errors.SearchError("SearchConnection has been closed")
923 if not isinstance(filter, _xapian.Query):
924 raise _errors.SearchError("Filter must be a Xapian Query object")
925 if exclude:
926 return _log(_xapian.Query, _xapian.Query.OP_AND_NOT, query, filter)
927 else:
928 return _log(_xapian.Query, _xapian.Query.OP_FILTER, query, filter)
929
931 """Adjust the weights of one query with a secondary query.
932
933 Documents will be returned from the resulting query if and only if they
934 match the primary query (specified by the "primary" parameter).
935 However, the weights (and hence, the relevance rankings) of the
936 documents will be adjusted by adding weights from the secondary query
937 (specified by the "secondary" parameter).
938
939 """
940 if self._index is None:
941 raise _errors.SearchError("SearchConnection has been closed")
942 return _log(_xapian.Query, _xapian.Query.OP_AND_MAYBE, primary, secondary)
943
945 """Create a query for a range search.
946
947 This creates a query which matches only those documents which have a
948 field value in the specified range.
949
950 Begin and end must be appropriate values for the field, according to
951 the 'type' parameter supplied to the SORTABLE action for the field.
952
953 The begin and end values are both inclusive - any documents with a
954 value equal to begin or end will be returned (unless end is less than
955 begin, in which case no documents will be returned).
956
957 Begin or end may be set to None in order to create an open-ended
958 range. (They may also both be set to None, which will generate a query
959 which matches all documents containing any value for the field.)
960
961 """
962 if self._index is None:
963 raise _errors.SearchError("SearchConnection has been closed")
964
965 if begin is None and end is None:
966
967 return _log(_xapian.Query, '')
968
969 try:
970 slot = self._field_mappings.get_slot(field, 'collsort')
971 except KeyError:
972
973 return _log(_xapian.Query)
974
975 sorttype = self._get_sort_type(field)
976 marshaller = SortableMarshaller(False)
977 fn = marshaller.get_marshall_function(field, sorttype)
978
979 if begin is not None:
980 begin = fn(field, begin)
981 if end is not None:
982 end = fn(field, end)
983
984 if begin is None:
985 return _log(_xapian.Query, _xapian.Query.OP_VALUE_LE, slot, end)
986
987 if end is None:
988 return _log(_xapian.Query, _xapian.Query.OP_VALUE_GE, slot, begin)
989
990 return _log(_xapian.Query, _xapian.Query.OP_VALUE_RANGE, slot, begin, end)
991
993 """Create a query for a facet value.
994
995 This creates a query which matches only those documents which have a
996 facet value in the specified range.
997
998 For a numeric range facet, val should be a tuple holding the start and
999 end of the range, or a comma separated string holding two floating
1000 point values. For other facets, val should be the value to look
1001 for.
1002
1003 The start and end values are both inclusive - any documents with a
1004 value equal to start or end will be returned (unless end is less than
1005 start, in which case no documents will be returned).
1006
1007 """
1008 if self._index is None:
1009 raise _errors.SearchError("SearchConnection has been closed")
1010 if 'facets' in _checkxapian.missing_features:
1011 raise errors.SearchError("Facets unsupported with this release of xapian")
1012
1013 try:
1014 actions = self._field_actions[field]._actions
1015 except KeyError:
1016 actions = {}
1017 facettype = None
1018 for action, kwargslist in actions.iteritems():
1019 if action == FieldActions.FACET:
1020 for kwargs in kwargslist:
1021 facettype = kwargs.get('type', None)
1022 if facettype is not None:
1023 break
1024 if facettype is not None:
1025 break
1026
1027 if facettype == 'float':
1028 if isinstance(val, basestring):
1029 val = [float(v) for v in val.split(',', 2)]
1030 assert(len(val) == 2)
1031 try:
1032 slot = self._field_mappings.get_slot(field, 'facet')
1033 except KeyError:
1034 return _log(_xapian.Query)
1035
1036 sorttype = 'float'
1037 marshaller = SortableMarshaller(False)
1038 fn = marshaller.get_marshall_function(field, sorttype)
1039 begin = fn(field, val[0])
1040 end = fn(field, val[1])
1041 return _log(_xapian.Query, _xapian.Query.OP_VALUE_RANGE, slot, begin, end)
1042 else:
1043 assert(facettype == 'string' or facettype is None)
1044 prefix = self._field_mappings.get_prefix(field)
1045 return _log(_xapian.Query, prefix + val.lower())
1046
1047
1050 """Prepare (and return) a query parser using the specified fields and
1051 operator.
1052
1053 """
1054 if self._index is None:
1055 raise _errors.SearchError("SearchConnection has been closed")
1056
1057 if isinstance(allow, basestring):
1058 allow = (allow, )
1059 if isinstance(deny, basestring):
1060 deny = (deny, )
1061 if allow is not None and len(allow) == 0:
1062 allow = None
1063 if deny is not None and len(deny) == 0:
1064 deny = None
1065 if allow is not None and deny is not None:
1066 raise _errors.SearchError("Cannot specify both `allow` and `deny` "
1067 "(got %r and %r)" % (allow, deny))
1068
1069 if isinstance(default_allow, basestring):
1070 default_allow = (default_allow, )
1071 if isinstance(default_deny, basestring):
1072 default_deny = (default_deny, )
1073 if default_allow is not None and len(default_allow) == 0:
1074 default_allow = None
1075 if default_deny is not None and len(default_deny) == 0:
1076 default_deny = None
1077 if default_allow is not None and default_deny is not None:
1078 raise _errors.SearchError("Cannot specify both `default_allow` and `default_deny` "
1079 "(got %r and %r)" % (default_allow, default_deny))
1080
1081 qp = _log(_xapian.QueryParser)
1082 qp.set_database(self._index)
1083 qp.set_default_op(default_op)
1084
1085 if allow is None:
1086 allow = [key for key in self._field_actions]
1087 if deny is not None:
1088 allow = [key for key in allow if key not in deny]
1089
1090 for field in allow:
1091 try:
1092 actions = self._field_actions[field]._actions
1093 except KeyError:
1094 actions = {}
1095 for action, kwargslist in actions.iteritems():
1096 if action == FieldActions.INDEX_EXACT:
1097
1098
1099 qp.add_prefix(field, self._field_mappings.get_prefix(field))
1100 if action == FieldActions.INDEX_FREETEXT:
1101 allow_field_specific = True
1102 for kwargs in kwargslist:
1103 allow_field_specific = allow_field_specific or kwargs.get('allow_field_specific', True)
1104 if not allow_field_specific:
1105 continue
1106 qp.add_prefix(field, self._field_mappings.get_prefix(field))
1107 for kwargs in kwargslist:
1108 try:
1109 lang = kwargs['language']
1110 my_stemmer = _log(_xapian.Stem, lang)
1111 qp.my_stemmer = my_stemmer
1112 qp.set_stemmer(my_stemmer)
1113 qp.set_stemming_strategy(qp.STEM_SOME)
1114 except KeyError:
1115 pass
1116
1117 if default_allow is not None or default_deny is not None:
1118 if default_allow is None:
1119 default_allow = [key for key in self._field_actions]
1120 if default_deny is not None:
1121 default_allow = [key for key in default_allow if key not in default_deny]
1122 for field in default_allow:
1123 try:
1124 actions = self._field_actions[field]._actions
1125 except KeyError:
1126 actions = {}
1127 for action, kwargslist in actions.iteritems():
1128 if action == FieldActions.INDEX_FREETEXT:
1129 qp.add_prefix('', self._field_mappings.get_prefix(field))
1130
1131
1132 return qp
1133
1135 """Parse a query, with an optional prefix.
1136
1137 """
1138 if prefix is None:
1139 return qp.parse_query(string, flags)
1140 else:
1141 return qp.parse_query(string, flags, prefix)
1142
1144 """Parse a query with various flags.
1145
1146 If the initial boolean pass fails, fall back to not using boolean
1147 operators.
1148
1149 """
1150 try:
1151 q1 = self._query_parse_with_prefix(qp, string,
1152 self._qp_flags_base |
1153 self._qp_flags_phrase |
1154 self._qp_flags_synonym |
1155 self._qp_flags_bool,
1156 prefix)
1157 except _xapian.QueryParserError, e:
1158
1159
1160 q1 = self._query_parse_with_prefix(qp, string,
1161 self._qp_flags_base |
1162 self._qp_flags_phrase |
1163 self._qp_flags_synonym,
1164 prefix)
1165
1166 qp.set_stemming_strategy(qp.STEM_NONE)
1167 try:
1168 q2 = self._query_parse_with_prefix(qp, string,
1169 self._qp_flags_base |
1170 self._qp_flags_bool,
1171 prefix)
1172 except _xapian.QueryParserError, e:
1173
1174
1175 q2 = self._query_parse_with_prefix(qp, string,
1176 self._qp_flags_base,
1177 prefix)
1178
1179 return _log(_xapian.Query, _xapian.Query.OP_AND_MAYBE, q1, q2)
1180
1181 - def query_parse(self, string, allow=None, deny=None, default_op=OP_AND,
1182 default_allow=None, default_deny=None):
1183 """Parse a query string.
1184
1185 This is intended for parsing queries entered by a user. If you wish to
1186 combine structured queries, it is generally better to use the other
1187 query building methods, such as `query_composite` (though you may wish
1188 to create parts of the query to combine with such methods with this
1189 method).
1190
1191 The string passed to this method can have various operators in it. In
1192 particular, it may contain field specifiers (ie, field names, followed
1193 by a colon, followed by some text to search for in that field). For
1194 example, if "author" is a field in the database, the search string
1195 could contain "author:richard", and this would be interpreted as
1196 "search for richard in the author field". By default, any fields in
1197 the database which are indexed with INDEX_EXACT or INDEX_FREETEXT will
1198 be available for field specific searching in this way - however, this
1199 can be modified using the "allow" or "deny" parameters, and also by the
1200 allow_field_specific tag on INDEX_FREETEXT fields.
1201
1202 Any text which isn't prefixed by a field specifier is used to search
1203 the "default set" of fields. By default, this is the full set of
1204 fields in the database which are indexed with INDEX_FREETEXT and for
1205 which the search_by_default flag set (ie, if the text is found in any
1206 of those fields, the query will match). However, this may be modified
1207 with the "default_allow" and "default_deny" parameters. (Note that
1208 fields which are indexed with INDEX_EXACT aren't allowed to be used in
1209 the default list of fields.)
1210
1211 - `string`: The string to parse.
1212 - `allow`: A list of fields to allow in the query.
1213 - `deny`: A list of fields not to allow in the query.
1214 - `default_op`: The default operator to combine query terms with.
1215 - `default_allow`: A list of fields to search for by default.
1216 - `default_deny`: A list of fields not to search for by default.
1217
1218 Only one of `allow` and `deny` may be specified.
1219
1220 Only one of `default_allow` and `default_deny` may be specified.
1221
1222 If any of the entries in `allow` are not present in the configuration
1223 for the database, or are not specified for indexing (either as
1224 INDEX_EXACT or INDEX_FREETEXT), they will be ignored. If any of the
1225 entries in `deny` are not present in the configuration for the
1226 database, they will be ignored.
1227
1228 Returns a Query object, which may be passed to the search() method, or
1229 combined with other queries.
1230
1231 """
1232 qp = self._prepare_queryparser(allow, deny, default_op, default_allow,
1233 default_deny)
1234 return self._query_parse_with_fallback(qp, string)
1235
1237 """A query for a single field.
1238
1239 """
1240 if self._index is None:
1241 raise _errors.SearchError("SearchConnection has been closed")
1242 try:
1243 actions = self._field_actions[field]._actions
1244 except KeyError:
1245 actions = {}
1246
1247
1248 for action, kwargslist in actions.iteritems():
1249 if action in (FieldActions.INDEX_EXACT,
1250 FieldActions.TAG,
1251 FieldActions.FACET,):
1252 prefix = self._field_mappings.get_prefix(field)
1253 if len(value) > 0:
1254 chval = ord(value[0])
1255 if chval >= ord('A') and chval <= ord('Z'):
1256 prefix = prefix + ':'
1257 return _log(_xapian.Query, prefix + value)
1258 if action == FieldActions.INDEX_FREETEXT:
1259 qp = _log(_xapian.QueryParser)
1260 qp.set_default_op(default_op)
1261 prefix = self._field_mappings.get_prefix(field)
1262 for kwargs in kwargslist:
1263 try:
1264 lang = kwargs['language']
1265 qp.set_stemmer(_log(_xapian.Stem, lang))
1266 qp.set_stemming_strategy(qp.STEM_SOME)
1267 except KeyError:
1268 pass
1269 return self._query_parse_with_fallback(qp, value, prefix)
1270
1271 return _log(_xapian.Query)
1272
1273 - def query_similar(self, ids, allow=None, deny=None, simterms=10):
1274 """Get a query which returns documents which are similar to others.
1275
1276 The list of document IDs to base the similarity search on is given in
1277 `ids`. This should be an iterable, holding a list of strings. If
1278 any of the supplied IDs cannot be found in the database, they will be
1279 ignored. (If no IDs can be found in the database, the resulting query
1280 will not match any documents.)
1281
1282 By default, all fields which have been indexed for freetext searching
1283 will be used for the similarity calculation. The list of fields used
1284 for this can be customised using the `allow` and `deny` parameters
1285 (only one of which may be specified):
1286
1287 - `allow`: A list of fields to base the similarity calculation on.
1288 - `deny`: A list of fields not to base the similarity calculation on.
1289 - `simterms`: Number of terms to use for the similarity calculation.
1290
1291 For convenience, any of `ids`, `allow`, or `deny` may be strings, which
1292 will be treated the same as a list of length 1.
1293
1294 Regardless of the setting of `allow` and `deny`, only fields which have
1295 been indexed for freetext searching will be used for the similarity
1296 measure - all other fields will always be ignored for this purpose.
1297
1298 """
1299 eterms, prefixes = self._get_eterms(ids, allow, deny, simterms)
1300
1301
1302
1303 q = _log(_xapian.Query, _xapian.Query.OP_ELITE_SET, eterms, simterms)
1304 return q
1305
1307 """Get a set of "significant" terms for a document, or documents.
1308
1309 This has a similar interface to query_similar(): it takes a list of
1310 ids, and an optional specification of a set of fields to consider.
1311 Instead of returning a query, it returns a list of terms from the
1312 document (or documents), which appear "significant". Roughly,
1313 in this situation significant means that the terms occur more
1314 frequently in the specified document than in the rest of the corpus.
1315
1316 The list is in decreasing order of "significance".
1317
1318 By default, all terms related to fields which have been indexed for
1319 freetext searching will be considered for the list of significant
1320 terms. The list of fields used for this can be customised using the
1321 `allow` and `deny` parameters (only one of which may be specified):
1322
1323 - `allow`: A list of fields to consider.
1324 - `deny`: A list of fields not to consider.
1325
1326 For convenience, any of `ids`, `allow`, or `deny` may be strings, which
1327 will be treated the same as a list of length 1.
1328
1329 Regardless of the setting of `allow` and `deny`, only fields which have
1330 been indexed for freetext searching will be considered - all other
1331 fields will always be ignored for this purpose.
1332
1333 The maximum number of terms to return may be specified by the maxterms
1334 parameter.
1335
1336 """
1337 eterms, prefixes = self._get_eterms(ids, allow, deny, maxterms)
1338 terms = []
1339 for term in eterms:
1340 pos = 0
1341 for char in term:
1342 if not char.isupper():
1343 break
1344 pos += 1
1345 field = prefixes[term[:pos]]
1346 value = term[pos:]
1347 terms.append((field, value))
1348 return terms
1349
1351 """Get a set of terms for an expand
1352
1353 """
1354 if self._index is None:
1355 raise _errors.SearchError("SearchConnection has been closed")
1356 if allow is not None and deny is not None:
1357 raise _errors.SearchError("Cannot specify both `allow` and `deny`")
1358
1359 if isinstance(ids, basestring):
1360 ids = (ids, )
1361 if isinstance(allow, basestring):
1362 allow = (allow, )
1363 if isinstance(deny, basestring):
1364 deny = (deny, )
1365
1366
1367 if allow is None:
1368 allow = [key for key in self._field_actions]
1369 if deny is not None:
1370 allow = [key for key in allow if key not in deny]
1371
1372
1373 prefixes = {}
1374 for field in allow:
1375 try:
1376 actions = self._field_actions[field]._actions
1377 except KeyError:
1378 actions = {}
1379 for action, kwargslist in actions.iteritems():
1380 if action == FieldActions.INDEX_FREETEXT:
1381 prefixes[self._field_mappings.get_prefix(field)] = field
1382
1383
1384 while True:
1385 try:
1386 eterms = self._perform_expand(ids, prefixes, simterms)
1387 break;
1388 except _xapian.DatabaseModifiedError, e:
1389 self.reopen()
1390 return eterms, prefixes
1391
1396
1398 pos = 0
1399 for char in term:
1400 if not char.isupper():
1401 break
1402 pos += 1
1403 if term[:pos] in self._prefixes:
1404 return True
1405 return False
1406
1431
1433 """A query which matches all the documents in the database.
1434
1435 """
1436 return _log(_xapian.Query, '')
1437
1439 """A query which matches no documents in the database.
1440
1441 This may be useful as a placeholder in various situations.
1442
1443 """
1444 return _log(_xapian.Query)
1445
1446 - def spell_correct(self, querystr, allow=None, deny=None, default_op=OP_AND,
1447 default_allow=None, default_deny=None):
1448 """Correct a query spelling.
1449
1450 This returns a version of the query string with any misspelt words
1451 corrected.
1452
1453 - `allow`: A list of fields to allow in the query.
1454 - `deny`: A list of fields not to allow in the query.
1455 - `default_op`: The default operator to combine query terms with.
1456 - `default_allow`: A list of fields to search for by default.
1457 - `default_deny`: A list of fields not to search for by default.
1458
1459 Only one of `allow` and `deny` may be specified.
1460
1461 Only one of `default_allow` and `default_deny` may be specified.
1462
1463 If any of the entries in `allow` are not present in the configuration
1464 for the database, or are not specified for indexing (either as
1465 INDEX_EXACT or INDEX_FREETEXT), they will be ignored. If any of the
1466 entries in `deny` are not present in the configuration for the
1467 database, they will be ignored.
1468
1469 Note that it is possible that the resulting spell-corrected query will
1470 still match no documents - the user should usually check that some
1471 documents are matched by the corrected query before suggesting it to
1472 users.
1473
1474 """
1475 qp = self._prepare_queryparser(allow, deny, default_op, default_allow,
1476 default_deny)
1477 try:
1478 qp.parse_query(querystr,
1479 self._qp_flags_base |
1480 self._qp_flags_phrase |
1481 self._qp_flags_synonym |
1482 self._qp_flags_bool |
1483 qp.FLAG_SPELLING_CORRECTION)
1484 except _xapian.QueryParserError:
1485 qp.parse_query(querystr,
1486 self._qp_flags_base |
1487 self._qp_flags_phrase |
1488 self._qp_flags_synonym |
1489 qp.FLAG_SPELLING_CORRECTION)
1490 corrected = qp.get_corrected_query_string()
1491 if len(corrected) == 0:
1492 if isinstance(querystr, unicode):
1493
1494
1495 return querystr.encode('utf-8')
1496 return querystr
1497 return corrected
1498
1500 """Check if this database supports collapsing on a specified field.
1501
1502 """
1503 if self._index is None:
1504 raise _errors.SearchError("SearchConnection has been closed")
1505 try:
1506 self._field_mappings.get_slot(field, 'collsort')
1507 except KeyError:
1508 return False
1509 return True
1510
1512 """Check if this database supports sorting on a specified field.
1513
1514 """
1515 if self._index is None:
1516 raise _errors.SearchError("SearchConnection has been closed")
1517 try:
1518 self._field_mappings.get_slot(field, 'collsort')
1519 except KeyError:
1520 return False
1521 return True
1522
1524 """Get the prefix of a term.
1525
1526 Prefixes are any initial capital letters, with the exception that R always
1527 ends a prefix, even if followed by capital letters.
1528
1529 """
1530 for p in xrange(len(term)):
1531 if term[p].islower():
1532 return term[:p]
1533 elif term[p] == 'R':
1534 return term[:p+1]
1535 return term
1536
1538 """Check if a facet must never be returned by a particular query type.
1539
1540 Returns True if the facet must never be returned.
1541
1542 Returns False if the facet may be returned - either becuase there is no
1543 entry for the query type, or because the entry is not
1544 FacetQueryType_Never.
1545
1546 """
1547 if query_type is None:
1548 return False
1549 if query_type not in self._facet_query_table:
1550 return False
1551 if facet not in self._facet_query_table[query_type]:
1552 return False
1553 return self._facet_query_table[query_type][facet] == _indexerconnection.IndexerConnection.FacetQueryType_Never
1554
1555 - def search(self, query, startrank, endrank,
1556 checkatleast=0, sortby=None, collapse=None,
1557 gettags=None,
1558 getfacets=None, allowfacets=None, denyfacets=None, usesubfacets=None,
1559 percentcutoff=None, weightcutoff=None,
1560 query_type=None):
1561 """Perform a search, for documents matching a query.
1562
1563 - `query` is the query to perform.
1564 - `startrank` is the rank of the start of the range of matching
1565 documents to return (ie, the result with this rank will be returned).
1566 ranks start at 0, which represents the "best" matching document.
1567 - `endrank` is the rank at the end of the range of matching documents
1568 to return. This is exclusive, so the result with this rank will not
1569 be returned.
1570 - `checkatleast` is the minimum number of results to check for: the
1571 estimate of the total number of matches will always be exact if
1572 the number of matches is less than `checkatleast`. A value of ``-1``
1573 can be specified for the checkatleast parameter - this has the
1574 special meaning of "check all matches", and is equivalent to passing
1575 the result of get_doccount().
1576 - `sortby` is the name of a field to sort by. It may be preceded by a
1577 '+' or a '-' to indicate ascending or descending order
1578 (respectively). If the first character is neither '+' or '-', the
1579 sort will be in ascending order.
1580 - `collapse` is the name of a field to collapse the result documents
1581 on. If this is specified, there will be at most one result in the
1582 result set for each value of the field.
1583 - `gettags` is the name of a field to count tag occurrences in, or a
1584 list of fields to do so.
1585 - `getfacets` is a boolean - if True, the matching documents will be
1586 examined to build up a list of the facet values contained in them.
1587 - `allowfacets` is a list of the fieldnames of facets to consider.
1588 - `denyfacets` is a list of fieldnames of facets which will not be
1589 considered.
1590 - `usesubfacets` is a boolean - if True, only top-level facets and
1591 subfacets of facets appearing in the query are considered (taking
1592 precedence over `allowfacets` and `denyfacets`).
1593 - `percentcutoff` is the minimum percentage a result must have to be
1594 returned.
1595 - `weightcutoff` is the minimum weight a result must have to be
1596 returned.
1597 - `query_type` is a value indicating the type of query being
1598 performed. If not None, the value is used to influence which facets
1599 are be returned by the get_suggested_facets() function. If the
1600 value of `getfacets` is False, it has no effect.
1601
1602 If neither 'allowfacets' or 'denyfacets' is specified, all fields
1603 holding facets will be considered (but see 'usesubfacets').
1604
1605 """
1606 if self._index is None:
1607 raise _errors.SearchError("SearchConnection has been closed")
1608 if 'facets' in _checkxapian.missing_features:
1609 if getfacets is not None or \
1610 allowfacets is not None or \
1611 denyfacets is not None or \
1612 usesubfacets is not None or \
1613 query_type is not None:
1614 raise errors.SearchError("Facets unsupported with this release of xapian")
1615 if 'tags' in _checkxapian.missing_features:
1616 if gettags is not None:
1617 raise errors.SearchError("Tags unsupported with this release of xapian")
1618 if checkatleast == -1:
1619 checkatleast = self._index.get_doccount()
1620
1621 enq = _log(_xapian.Enquire, self._index)
1622 enq.set_query(query)
1623
1624 if sortby is not None:
1625 asc = True
1626 if sortby[0] == '-':
1627 asc = False
1628 sortby = sortby[1:]
1629 elif sortby[0] == '+':
1630 sortby = sortby[1:]
1631
1632 try:
1633 slotnum = self._field_mappings.get_slot(sortby, 'collsort')
1634 except KeyError:
1635 raise _errors.SearchError("Field %r was not indexed for sorting" % sortby)
1636
1637
1638
1639
1640
1641 enq.set_sort_by_value_then_relevance(slotnum, not asc)
1642
1643 if collapse is not None:
1644 try:
1645 slotnum = self._field_mappings.get_slot(collapse, 'collsort')
1646 except KeyError:
1647 raise _errors.SearchError("Field %r was not indexed for collapsing" % collapse)
1648 enq.set_collapse_key(slotnum)
1649
1650 maxitems = max(endrank - startrank, 0)
1651
1652
1653 checkatleast = max(checkatleast, endrank + 1)
1654
1655
1656 matchspies = []
1657
1658
1659 if isinstance(gettags, basestring):
1660 if len(gettags) != 0:
1661 gettags = [gettags]
1662 tagspy = None
1663 if gettags is not None and len(gettags) != 0:
1664 tagspy = _log(_xapian.TermCountMatchSpy)
1665 for field in gettags:
1666 try:
1667 prefix = self._field_mappings.get_prefix(field)
1668 tagspy.add_prefix(prefix)
1669 except KeyError:
1670 raise _errors.SearchError("Field %r was not indexed for tagging" % field)
1671 matchspies.append(tagspy)
1672
1673
1674
1675 facetspy = None
1676 facetfields = []
1677 if getfacets:
1678 if allowfacets is not None and denyfacets is not None:
1679 raise _errors.SearchError("Cannot specify both `allowfacets` and `denyfacets`")
1680 if allowfacets is None:
1681 allowfacets = [key for key in self._field_actions]
1682 if denyfacets is not None:
1683 allowfacets = [key for key in allowfacets if key not in denyfacets]
1684
1685
1686
1687
1688 queryfacets = set([None])
1689 if usesubfacets:
1690
1691 termsiter = query.get_terms_begin()
1692 termsend = query.get_terms_end()
1693 while termsiter != termsend:
1694 prefix = self._get_prefix_from_term(termsiter.get_term())
1695 field = self._field_mappings.get_fieldname_from_prefix(prefix)
1696 if field and FieldActions.FACET in self._field_actions[field]._actions:
1697 queryfacets.add(field)
1698 termsiter.next()
1699
1700 for field in allowfacets:
1701 try:
1702 actions = self._field_actions[field]._actions
1703 except KeyError:
1704 actions = {}
1705 for action, kwargslist in actions.iteritems():
1706 if action == FieldActions.FACET:
1707
1708
1709 if usesubfacets and self._facet_hierarchy.get(field) not in queryfacets:
1710 continue
1711
1712 if self._facet_query_never(field, query_type):
1713 continue
1714 slot = self._field_mappings.get_slot(field, 'facet')
1715 if facetspy is None:
1716 facetspy = _log(_xapian.CategorySelectMatchSpy)
1717 facettype = None
1718 for kwargs in kwargslist:
1719 facettype = kwargs.get('type', None)
1720 if facettype is not None:
1721 break
1722 if facettype is None or facettype == 'string':
1723 facetspy.add_slot(slot, True)
1724 else:
1725 facetspy.add_slot(slot)
1726 facetfields.append((field, slot, kwargslist))
1727
1728 if facetspy is None:
1729
1730
1731
1732
1733 facetspy = False
1734 else:
1735 matchspies.append(facetspy)
1736
1737
1738
1739 if len(matchspies) == 0:
1740 matchspy = None
1741 elif len(matchspies) == 1:
1742 matchspy = matchspies[0]
1743 else:
1744 matchspy = _log(_xapian.MultipleMatchDecider)
1745 for spy in matchspies:
1746 matchspy.append(spy)
1747
1748 enq.set_docid_order(enq.DONT_CARE)
1749
1750
1751 if percentcutoff is not None or weightcutoff is not None:
1752 if percentcutoff is None:
1753 percentcutoff = 0
1754 if weightcutoff is None:
1755 weightcutoff = 0
1756 enq.set_cutoff(percentcutoff, weightcutoff)
1757
1758
1759 while True:
1760 try:
1761 if matchspy is None:
1762 mset = enq.get_mset(startrank, maxitems, checkatleast)
1763 else:
1764 mset = enq.get_mset(startrank, maxitems, checkatleast,
1765 None, None, matchspy)
1766 break
1767 except _xapian.DatabaseModifiedError, e:
1768 self.reopen()
1769 facet_hierarchy = None
1770 if usesubfacets:
1771 facet_hierarchy = self._facet_hierarchy
1772
1773 return SearchResults(self, enq, query, mset, self._field_mappings,
1774 tagspy, gettags, facetspy, facetfields,
1775 facet_hierarchy,
1776 self._facet_query_table.get(query_type))
1777
1779 """Get an iterator which returns all the ids in the database.
1780
1781 The unqiue_ids are currently returned in binary lexicographical sort
1782 order, but this should not be relied on.
1783
1784 Note that the iterator returned by this method may raise a
1785 xapian.DatabaseModifiedError exception if modifications are committed
1786 to the database while the iteration is in progress. If this happens,
1787 the search connection must be reopened (by calling reopen) and the
1788 iteration restarted.
1789
1790 """
1791 if self._index is None:
1792 raise _errors.SearchError("SearchConnection has been closed")
1793 return _indexerconnection.PrefixedTermIter('Q', self._index.allterms())
1794
1796 """Get the document with the specified unique ID.
1797
1798 Raises a KeyError if there is no such document. Otherwise, it returns
1799 a ProcessedDocument.
1800
1801 """
1802 if self._index is None:
1803 raise _errors.SearchError("SearchConnection has been closed")
1804 while True:
1805 try:
1806 postlist = self._index.postlist('Q' + id)
1807 try:
1808 plitem = postlist.next()
1809 except StopIteration:
1810
1811 raise KeyError('Unique ID %r not found' % id)
1812 try:
1813 postlist.next()
1814 raise _errors.IndexerError("Multiple documents "
1815 "found with same unique ID")
1816 except StopIteration:
1817
1818 pass
1819
1820 result = ProcessedDocument(self._field_mappings)
1821 result.id = id
1822 result._doc = self._index.get_document(plitem.docid)
1823 return result
1824 except _xapian.DatabaseModifiedError, e:
1825 self.reopen()
1826
1828 """Get an iterator over the synonyms.
1829
1830 - `prefix`: if specified, only synonym keys with this prefix will be
1831 returned.
1832
1833 The iterator returns 2-tuples, in which the first item is the key (ie,
1834 a 2-tuple holding the term or terms which will be synonym expanded,
1835 followed by the fieldname specified (or None if no fieldname)), and the
1836 second item is a tuple of strings holding the synonyms for the first
1837 item.
1838
1839 These return values are suitable for the dict() builtin, so you can
1840 write things like:
1841
1842 >>> conn = _indexerconnection.IndexerConnection('foo')
1843 >>> conn.add_synonym('foo', 'bar')
1844 >>> conn.add_synonym('foo bar', 'baz')
1845 >>> conn.add_synonym('foo bar', 'foo baz')
1846 >>> conn.flush()
1847 >>> conn = SearchConnection('foo')
1848 >>> dict(conn.iter_synonyms())
1849 {('foo', None): ('bar',), ('foo bar', None): ('baz', 'foo baz')}
1850
1851 """
1852 if self._index is None:
1853 raise _errors.SearchError("SearchConnection has been closed")
1854 return _indexerconnection.SynonymIter(self._index, self._field_mappings, prefix)
1855
1870
1871 if __name__ == '__main__':
1872 import doctest, sys
1873 doctest.testmod (sys.modules[__name__])
1874