[Checkins] SVN: zope.index/trunk/ Merge the sorting branch.
Dan Korostelev
nadako at gmail.com
Mon Dec 29 07:33:19 EST 2008
Log message for revision 94415:
Merge the sorting branch.
Changed:
U zope.index/trunk/CHANGES.txt
U zope.index/trunk/buildout.cfg
U zope.index/trunk/src/zope/index/field/README.txt
U zope.index/trunk/src/zope/index/field/index.py
U zope.index/trunk/src/zope/index/field/tests.py
U zope.index/trunk/src/zope/index/interfaces.py
U zope.index/trunk/src/zope/index/keyword/__init__.py
U zope.index/trunk/src/zope/index/keyword/index.py
U zope.index/trunk/src/zope/index/keyword/interfaces.py
U zope.index/trunk/src/zope/index/keyword/tests.py
U zope.index/trunk/src/zope/index/topic/__init__.py
U zope.index/trunk/src/zope/index/topic/filter.py
U zope.index/trunk/src/zope/index/topic/index.py
U zope.index/trunk/src/zope/index/topic/interfaces.py
U zope.index/trunk/src/zope/index/topic/tests/test_topicindex.py
-=-
Modified: zope.index/trunk/CHANGES.txt
===================================================================
--- zope.index/trunk/CHANGES.txt 2008-12-29 11:41:41 UTC (rev 94414)
+++ zope.index/trunk/CHANGES.txt 2008-12-29 12:33:18 UTC (rev 94415)
@@ -1,15 +1,40 @@
Changes
=======
-3.4.2 (unreleased)
+3.5.0 (unreleased)
------------------
-Remove zope.testing from dependencies, as it's not really needed.
+- Remove zope.testing from dependencies, as it's not really needed.
+- Define IIndexSort interface for indexes that support sorting.
+
+- Implement sorting for FieldIndex (adapted from repoze.catalog/ZCatalog).
+
+- Add an ``apply`` method for KeywordIndex/TopicIndex, making them
+ implement IIndexSearch that can be useful in catalog.
+
+- Optimize the ``search`` method of KeywordIndex/TopicIndex by using
+ multiunion for the ``or`` operator and sorting before intersection for ``and``.
+
+- IMPORTANT: KeywordIndex/TopicIndex now use IFSets instead of IISets.
+ This makes it more compatible with other indexes (for example, when
+ using in catalog). This change can lead to problems, if your code somehow
+ depends on the II nature of sets, as it was before.
+
+ Also, FilteredSets used to use IFSets as well, if you have any
+ FilteredSets pickled in the database, you need to migrate them to
+ IFSets yourself. You can do it like that:
+
+ filter._ids = filter.family.IF.Set(filter._ids)
+
+ Where ``filter`` is an instance of FilteredSet.
+
+Thanks Chris McDonough of repoze for supporting and useful code.
+
3.4.1 (2007-09-28)
------------------
-Fixed bug in package metadata (wrong homepage URL).
+- Fixed bug in package metadata (wrong homepage URL).
3.4.0 (2007-09-28)
------------------
Modified: zope.index/trunk/buildout.cfg
===================================================================
--- zope.index/trunk/buildout.cfg 2008-12-29 11:41:41 UTC (rev 94414)
+++ zope.index/trunk/buildout.cfg 2008-12-29 12:33:18 UTC (rev 94415)
@@ -1,6 +1,6 @@
[buildout]
develop = .
-parts = test coverage-test coverage-report
+parts = test coverage-test coverage-report python
find-links = http://download.zope.org/distribution/
[test]
@@ -17,3 +17,8 @@
eggs = z3c.coverage
scripts = coverage=coverage-report
arguments = ('coverage', 'coverage/report')
+
+[python]
+recipe = zc.recipe.egg
+eggs = zope.index
+interpreter = python
Modified: zope.index/trunk/src/zope/index/field/README.txt
===================================================================
--- zope.index/trunk/src/zope/index/field/README.txt 2008-12-29 11:41:41 UTC (rev 94414)
+++ zope.index/trunk/src/zope/index/field/README.txt 2008-12-29 12:33:18 UTC (rev 94415)
@@ -108,6 +108,45 @@
>>> index.apply((30, 70))
IFSet([])
+Sorting
+-------
+
+Field indexes also implement IIndexSort interface that
+provides a method for sorting document ids by their indexed
+values.
+
+ >>> index.index_doc(1, 9)
+ >>> index.index_doc(2, 8)
+ >>> index.index_doc(3, 7)
+ >>> index.index_doc(4, 6)
+ >>> index.index_doc(5, 5)
+ >>> index.index_doc(6, 4)
+ >>> index.index_doc(7, 3)
+ >>> index.index_doc(8, 2)
+ >>> index.index_doc(9, 1)
+
+ >>> list(index.sort([4, 2, 9, 7, 3, 1, 5]))
+ [9, 7, 5, 4, 3, 2, 1]
+
+We can also specify the ``reverse`` argument to reverse results:
+
+ >>> list(index.sort([4, 2, 9, 7, 3, 1, 5], reverse=True))
+ [1, 2, 3, 4, 5, 7, 9]
+
+And as per IIndexSort, we can limit results by specifying the ``limit``
+argument:
+
+ >>> list(index.sort([4, 2, 9, 7, 3, 1, 5], limit=3))
+ [9, 7, 5]
+
+If we pass an id that is not indexed by this index, it won't be included
+in the result.
+
+ >>> list(index.sort([2, 10]))
+ [2]
+
+ >>> index.clear()
+
Bugfix testing:
---------------
Happened at least once that the value dropped out of the forward index,
Modified: zope.index/trunk/src/zope/index/field/index.py
===================================================================
--- zope.index/trunk/src/zope/index/field/index.py 2008-12-29 11:41:41 UTC (rev 94414)
+++ zope.index/trunk/src/zope/index/field/index.py 2008-12-29 12:33:18 UTC (rev 94415)
@@ -15,14 +15,15 @@
$Id$
"""
-import persistent
+import heapq
+import bisect
+from itertools import islice
import BTrees
-
+import persistent
+import zope.interface
from BTrees.Length import Length
-import zope.interface
-
from zope.index import interfaces
class FieldIndex(persistent.Persistent):
@@ -31,6 +32,7 @@
interfaces.IInjection,
interfaces.IStatistics,
interfaces.IIndexSearch,
+ interfaces.IIndexSort,
)
family = BTrees.family32
@@ -107,3 +109,102 @@
raise TypeError("two-length tuple expected", query)
return self.family.IF.multiunion(
self._fwd_index.values(*query))
+
+ def sort(self, docids, limit=None, reverse=False):
+ if (limit is not None) and (limit < 1):
+ raise ValueError('limit value must be 1 or greater')
+
+ numdocs = self._num_docs.value
+ if not numdocs:
+ raise StopIteration
+
+ if not isinstance(docids, (self.family.IF.Set, self.family.IF.TreeSet)):
+ docids = self.family.IF.Set(docids)
+ if not docids:
+ raise StopIteration
+
+ rlen = len(docids)
+
+ fwd_index = self._fwd_index
+ rev_index = self._rev_index
+ getValue = rev_index.get
+ marker = object()
+
+ # use_lazy and use_nbest computations lifted wholesale from
+ # Zope2 catalog without questioning reasoning
+ use_lazy = rlen > numdocs * (rlen / 100 + 1)
+ use_nbest = limit and limit * 4 < rlen
+
+ # overrides for unit tests
+ if getattr(self, '_use_lazy', False):
+ use_lazy = True
+ if getattr(self, '_use_nbest', False):
+ use_nbest = True
+
+ if use_nbest:
+ # this is a sort with a limit that appears useful, try to
+ # take advantage of the fact that we can keep a smaller
+ # set of simultaneous values in memory; use generators
+ # and heapq functions to do so.
+
+ def nsort():
+ for docid in docids:
+ val = getValue(docid, marker)
+ if val is not marker:
+ yield (val, docid)
+
+ iterable = nsort()
+
+ if reverse:
+ # we use a generator as an iterable in the reverse
+ # sort case because the nlargest implementation does
+ # not manifest the whole thing into memory at once if
+ # we do so.
+ for val in heapq.nlargest(limit, iterable):
+ yield val[1]
+ else:
+ # lifted from heapq.nsmallest
+ it = iter(iterable)
+ result = sorted(islice(it, 0, limit))
+ if not result:
+ raise StopIteration
+ insort = bisect.insort
+ pop = result.pop
+ los = result[-1] # los --> Largest of the nsmallest
+ for elem in it:
+ if los <= elem:
+ continue
+ insort(result, elem)
+ pop()
+ los = result[-1]
+ for val in result:
+ yield val[1]
+
+ else:
+ if use_lazy and not reverse:
+ # Since this the sort is not reversed, and the number
+ # of results in the search result set is much larger
+ # than the number of items in this index, we assume it
+ # will be fastest to iterate over all of our forward
+ # BTree's items instead of using a full sort, as our
+ # forward index is already sorted in ascending order
+ # by value. The Zope 2 catalog implementation claims
+ # that this case is rarely exercised in practice.
+ n = 0
+ for stored_docids in fwd_index.values():
+ for docid in self.family.IF.intersection(docids, stored_docids):
+ n += 1
+ yield docid
+ if limit and n >= limit:
+ raise StopIteration
+ else:
+ # If the result set is not much larger than the number
+ # of documents in this index, or if we need to sort in
+ # reverse order, use a non-lazy sort.
+ n = 0
+ for docid in sorted(docids, key=getValue, reverse=reverse):
+ if getValue(docid, marker) is not marker:
+ n += 1
+ yield docid
+ if limit and n >= limit:
+ raise StopIteration
Modified: zope.index/trunk/src/zope/index/field/tests.py
===================================================================
--- zope.index/trunk/src/zope/index/field/tests.py 2008-12-29 11:41:41 UTC (rev 94414)
+++ zope.index/trunk/src/zope/index/field/tests.py 2008-12-29 12:33:18 UTC (rev 94415)
@@ -16,10 +16,134 @@
$Id$
"""
import unittest
+from BTrees.IFBTree import IFSet
from zope.testing import doctest
+from zope.index.field import FieldIndex
+
+class TestFieldIndexSorting(unittest.TestCase):
+
+ def _populateIndex(self, index):
+ index.index_doc(5, 1) # docid, obj
+ index.index_doc(2, 2)
+ index.index_doc(1, 3)
+ index.index_doc(3, 4)
+ index.index_doc(4, 5)
+ index.index_doc(8, 6)
+ index.index_doc(9, 7)
+ index.index_doc(7, 8)
+ index.index_doc(6, 9)
+ index.index_doc(11, 10)
+ index.index_doc(10, 11)
+
+ def test_sort_lazy_nolimit(self):
+ index = FieldIndex()
+ index._use_lazy = True
+ self._populateIndex(index)
+ c1 = IFSet([1, 2, 3, 4, 5])
+ result = index.sort(c1)
+ self.assertEqual(list(result), [5, 2, 1, 3, 4])
+
+ def test_sort_lazy_withlimit(self):
+ index = FieldIndex()
+ index._use_lazy = True
+ self._populateIndex(index)
+ c1 = IFSet([1, 2, 3, 4, 5])
+ result = index.sort(c1, limit=3)
+ self.assertEqual(list(result), [5, 2, 1])
+
+ def test_sort_nonlazy_nolimit(self):
+ index = FieldIndex()
+ self._populateIndex(index)
+ c1 = IFSet([1, 2, 3, 4, 5])
+ result = index.sort(c1)
+ self.assertEqual(list(result), [5, 2, 1, 3, 4])
+
+ def test_sort_nonlazy_missingdocid(self):
+ index = FieldIndex()
+ self._populateIndex(index)
+ c1 = IFSet([1, 2, 3, 4, 5, 99])
+ result = index.sort(c1)
+ self.assertEqual(list(result), [5, 2, 1, 3, 4]) # 99 not present
+
+ def test_sort_nonlazy_withlimit(self):
+ index = FieldIndex()
+ self._populateIndex(index)
+ c1 = IFSet([1, 2, 3, 4, 5])
+ result = index.sort(c1, limit=3)
+ self.assertEqual(list(result), [5, 2, 1])
+
+ def test_sort_nonlazy_reverse_nolimit(self):
+ index = FieldIndex()
+ self._populateIndex(index)
+ c1 = IFSet([1, 2, 3, 4, 5])
+ result = index.sort(c1, reverse=True)
+ self.assertEqual(list(result), [4, 3, 1, 2, 5])
+
+ def test_sort_nonlazy_reverse_withlimit(self):
+ index = FieldIndex()
+ self._populateIndex(index)
+ c1 = IFSet([1, 2, 3, 4, 5])
+ result = index.sort(c1, reverse=True, limit=3)
+ self.assertEqual(list(result), [4, 3, 1])
+
+ def test_sort_nbest(self):
+ index = FieldIndex()
+ index._use_nbest = True
+ self._populateIndex(index)
+ c1 = IFSet([1, 2, 3, 4, 5])
+ result = index.sort(c1, limit=3)
+ self.assertEqual(list(result), [5, 2, 1])
+
+ def test_sort_nbest_reverse(self):
+ index = FieldIndex()
+ index._use_nbest = True
+ self._populateIndex(index)
+ c1 = IFSet([1, 2, 3, 4, 5])
+ result = index.sort(c1, reverse=True, limit=3)
+ self.assertEqual(list(result), [4, 3, 1])
+
+ def test_sort_nbest_missing(self):
+ index = FieldIndex()
+ index._use_nbest = True
+ self._populateIndex(index)
+ c1 = IFSet([1, 2, 3, 4, 5, 99])
+ result = index.sort(c1, limit=3)
+ self.assertEqual(list(result), [5, 2, 1])
+
+ def test_sort_nbest_missing_reverse(self):
+ index = FieldIndex()
+ index._use_nbest = True
+ self._populateIndex(index)
+ c1 = IFSet([1, 2, 3, 4, 5, 99])
+ result = index.sort(c1, reverse=True, limit=3)
+ self.assertEqual(list(result), [4, 3, 1])
+
+ def test_sort_nodocs(self):
+ index = FieldIndex()
+ c1 = IFSet([1, 2, 3, 4, 5])
+ result = index.sort(c1)
+ self.assertEqual(list(result), [])
+
+ def test_sort_nodocids(self):
+ index = FieldIndex()
+ self._populateIndex(index)
+ c1 = IFSet()
+ result = index.sort(c1)
+ self.assertEqual(list(result), [])
+
+ def test_sort_badlimit(self):
+ index = FieldIndex()
+ self._populateIndex(index)
+ c1 = IFSet([1, 2, 3, 4, 5])
+ result = index.sort(c1, limit=0)
+ self.assertRaises(ValueError, list, result)
+
def test_suite():
- return doctest.DocFileSuite('README.txt')
+ return unittest.TestSuite((
+ doctest.DocFileSuite('README.txt', optionflags=doctest.ELLIPSIS),
+ unittest.makeSuite(TestFieldIndexSorting),
+ ))
if __name__=='__main__':
unittest.main(defaultTest='test_suite')
Modified: zope.index/trunk/src/zope/index/interfaces.py
===================================================================
--- zope.index/trunk/src/zope/index/interfaces.py 2008-12-29 11:41:41 UTC (rev 94414)
+++ zope.index/trunk/src/zope/index/interfaces.py 2008-12-29 12:33:18 UTC (rev 94415)
@@ -82,6 +82,19 @@
"""
+class IIndexSort(Interface):
+
+ def sort(docids, limit=None, reverse=False):
+ """Sort document ids sequence using indexed values
+
+ If some of docids are not indexed they are skipped
+ from resulting iterable.
+
+ Return an iterable of document ids. Limited by
+ value of the "limit" argument and optionally
+ reversed, using the "reverse" argument.
+ """
+
class IStatistics(Interface):
"""An index that provides statistical information about itself."""
Modified: zope.index/trunk/src/zope/index/keyword/__init__.py
===================================================================
--- zope.index/trunk/src/zope/index/keyword/__init__.py 2008-12-29 11:41:41 UTC (rev 94414)
+++ zope.index/trunk/src/zope/index/keyword/__init__.py 2008-12-29 12:33:18 UTC (rev 94415)
@@ -1 +1 @@
-from zope.index.keyword.index import KeywordIndex
+from zope.index.keyword.index import KeywordIndex, CaseSensitiveKeywordIndex
Modified: zope.index/trunk/src/zope/index/keyword/index.py
===================================================================
--- zope.index/trunk/src/zope/index/keyword/index.py 2008-12-29 11:41:41 UTC (rev 94414)
+++ zope.index/trunk/src/zope/index/keyword/index.py 2008-12-29 12:33:18 UTC (rev 94415)
@@ -21,7 +21,7 @@
from BTrees.Length import Length
-from zope.index.interfaces import IInjection, IStatistics
+from zope.index.interfaces import IInjection, IStatistics, IIndexSearch
from zope.index.keyword.interfaces import IKeywordQuerying
from zope.interface import implements
@@ -31,7 +31,7 @@
family = BTrees.family32
normalize = True
- implements(IInjection, IStatistics, IKeywordQuerying)
+ implements(IInjection, IStatistics, IIndexSearch, IKeywordQuerying)
def __init__(self, family=None):
if family is not None:
@@ -117,7 +117,7 @@
has_key = idx.has_key
for word in words:
if not has_key(word):
- idx[word] = self.family.II.Set()
+ idx[word] = self.family.IF.Set()
idx[word].insert(docid)
def _insert_reverse(self, docid, words):
@@ -134,20 +134,38 @@
if self.normalize:
query = [w.lower() for w in query]
- f = {'and' : self.family.II.intersection,
- 'or' : self.family.II.union,
- }[operator]
-
- rs = None
+ sets = []
for word in query:
- docids = self._fwd_index.get(word, self.family.II.Set())
- rs = f(rs, docids)
-
+ docids = self._fwd_index.get(word, self.family.IF.Set())
+ sets.append(docids)
+
+ if operator == 'or':
+ rs = self.family.IF.multiunion(sets)
+ elif operator == 'and':
+ # sort smallest to largest set so we intersect the smallest
+ # number of document identifiers possible
+ sets.sort(key=len)
+ rs = None
+ for set in sets:
+ rs = self.family.IF.intersection(rs, set)
+ if not rs:
+ break
+ else:
+ raise TypeError('Keyword index only supports `and` and `or` operators, not `%s`.' % operator)
+
if rs:
return rs
else:
- return self.family.II.Set()
+ return self.family.IF.Set()
+ def apply(self, query):
+ operator = 'and'
+ if isinstance(query, dict):
+ if 'operator' in query:
+ operator = query.pop('operator')
+ query = query['query']
+ return self.search(query, operator=operator)
+
class CaseSensitiveKeywordIndex(KeywordIndex):
""" A case-sensitive keyword index """
normalize = False
Modified: zope.index/trunk/src/zope/index/keyword/interfaces.py
===================================================================
--- zope.index/trunk/src/zope/index/keyword/interfaces.py 2008-12-29 11:41:41 UTC (rev 94414)
+++ zope.index/trunk/src/zope/index/keyword/interfaces.py 2008-12-29 12:33:18 UTC (rev 94415)
@@ -27,5 +27,5 @@
'operator' can be either 'and' or 'or' to search for documents
containing all keywords or any keyword.
- Return an IISet of docids
+ Return an IFSet of docids
"""
Modified: zope.index/trunk/src/zope/index/keyword/tests.py
===================================================================
--- zope.index/trunk/src/zope/index/keyword/tests.py 2008-12-29 11:41:41 UTC (rev 94414)
+++ zope.index/trunk/src/zope/index/keyword/tests.py 2008-12-29 12:33:18 UTC (rev 94415)
@@ -23,7 +23,7 @@
class KeywordIndexTest(TestCase):
- from BTrees.IIBTree import IISet
+ from BTrees.IFBTree import IFSet
def setUp(self):
self.index = KeywordIndex()
@@ -41,7 +41,7 @@
results = self.index.search(query, mode)
- # results and expected are IISets() but we can not
+ # results and expected are IFSets() but we can not
# compare them directly since __eq__() does not seem
# to be implemented for BTrees
@@ -53,6 +53,18 @@
def _search_or(self, query, expected):
return self._search(query, expected, 'or')
+ def _apply(self, query, expected, mode='and'):
+ results = self.index.apply(query)
+ self.assertEqual(results.keys(), expected.keys())
+
+ def _apply_and(self, query, expected):
+ results = self.index.apply({'operator': 'and', 'query': query})
+ self.assertEqual(results.keys(), expected.keys())
+
+ def _apply_or(self, query, expected):
+ results = self.index.apply({'operator': 'or', 'query': query})
+ self.assertEqual(results.keys(), expected.keys())
+
def test_interface(self):
verifyClass(IInjection, KeywordIndex)
verifyClass(IStatistics, KeywordIndex)
@@ -85,13 +97,13 @@
self.index.index_doc(1, ('foo', 'bar', 'doom'))
self.index.index_doc(1, ('bar', 'blabla'))
self.assertEqual(self.index.documentCount(), 3)
- self._search('quick', self.IISet())
- self._search('foo', self.IISet())
- self._search('bar', self.IISet([1]))
- self._search(['doom'], self.IISet())
- self._search(['blabla'], self.IISet([1]))
- self._search_and(('bar', 'blabla'), self.IISet([1]))
- self._search(['cmf'], self.IISet([5]))
+ self._search('quick', self.IFSet())
+ self._search('foo', self.IFSet())
+ self._search('bar', self.IFSet([1]))
+ self._search(['doom'], self.IFSet())
+ self._search(['blabla'], self.IFSet([1]))
+ self._search_and(('bar', 'blabla'), self.IFSet([1]))
+ self._search(['cmf'], self.IFSet([5]))
def test_hasdoc(self):
self._populate_index()
@@ -104,26 +116,47 @@
def test_simplesearch(self):
self._populate_index()
- self._search([''], self.IISet())
- self._search(['cmf'], self.IISet([1, 5]))
- self._search(['zope'], self.IISet([1, 3]))
- self._search(['zope3'], self.IISet([1]))
- self._search(['foo'], self.IISet())
+ self._search([''], self.IFSet())
+ self._search(['cmf'], self.IFSet([1, 5]))
+ self._search(['zope'], self.IFSet([1, 3]))
+ self._search(['zope3'], self.IFSet([1]))
+ self._search(['foo'], self.IFSet())
def test_search_and(self):
self._populate_index()
- self._search_and(('cmf', 'zope3'), self.IISet([1]))
- self._search_and(('cmf', 'zope'), self.IISet([1]))
- self._search_and(('cmf', 'zope4'), self.IISet())
- self._search_and(('zope', 'ZOPE'), self.IISet([1, 3]))
+ self._search_and(('cmf', 'zope3'), self.IFSet([1]))
+ self._search_and(('cmf', 'zope'), self.IFSet([1]))
+ self._search_and(('cmf', 'zope4'), self.IFSet())
+ self._search_and(('zope', 'ZOPE'), self.IFSet([1, 3]))
def test_search_or(self):
self._populate_index()
- self._search_or(('cmf', 'zope3'), self.IISet([1, 5]))
- self._search_or(('cmf', 'zope'), self.IISet([1, 3, 5]))
- self._search_or(('cmf', 'zope4'), self.IISet([1, 5]))
- self._search_or(('zope', 'ZOPE'), self.IISet([1,3]))
+ self._search_or(('cmf', 'zope3'), self.IFSet([1, 5]))
+ self._search_or(('cmf', 'zope'), self.IFSet([1, 3, 5]))
+ self._search_or(('cmf', 'zope4'), self.IFSet([1, 5]))
+ self._search_or(('zope', 'ZOPE'), self.IFSet([1,3]))
+ def test_apply(self):
+ self._populate_index()
+ self._apply(('cmf', 'zope3'), self.IFSet([1]))
+ self._apply(('cmf', 'zope'), self.IFSet([1]))
+ self._apply(('cmf', 'zope4'), self.IFSet())
+ self._apply(('zope', 'ZOPE'), self.IFSet([1, 3]))
+
+ def test_apply_and(self):
+ self._populate_index()
+ self._apply_and(('cmf', 'zope3'), self.IFSet([1]))
+ self._apply_and(('cmf', 'zope'), self.IFSet([1]))
+ self._apply_and(('cmf', 'zope4'), self.IFSet())
+ self._apply_and(('zope', 'ZOPE'), self.IFSet([1, 3]))
+
+ def test_apply_or(self):
+ self._populate_index()
+ self._apply_or(('cmf', 'zope3'), self.IFSet([1, 5]))
+ self._apply_or(('cmf', 'zope'), self.IFSet([1, 3, 5]))
+ self._apply_or(('cmf', 'zope4'), self.IFSet([1, 5]))
+ self._apply_or(('zope', 'ZOPE'), self.IFSet([1,3]))
+
def test_index_input(self):
self.assertRaises(
TypeError, self.index.index_doc, 1, "non-sequence-string")
@@ -131,7 +164,7 @@
class KeywordIndexTest64(KeywordIndexTest):
- from BTrees.LLBTree import LLSet as IISet
+ from BTrees.LFBTree import LFSet as IFSet
def setUp(self):
self.index = KeywordIndex(family=BTrees.family64)
Modified: zope.index/trunk/src/zope/index/topic/__init__.py
===================================================================
--- zope.index/trunk/src/zope/index/topic/__init__.py 2008-12-29 11:41:41 UTC (rev 94414)
+++ zope.index/trunk/src/zope/index/topic/__init__.py 2008-12-29 12:33:18 UTC (rev 94415)
@@ -1,2 +1 @@
-#
-# This file is necessary to make this directory a package.
+from zope.index.topic.index import TopicIndex
Modified: zope.index/trunk/src/zope/index/topic/filter.py
===================================================================
--- zope.index/trunk/src/zope/index/topic/filter.py 2008-12-29 11:41:41 UTC (rev 94414)
+++ zope.index/trunk/src/zope/index/topic/filter.py 2008-12-29 12:33:18 UTC (rev 94415)
@@ -36,19 +36,21 @@
def __init__(self, id, expr, family=None):
if family is not None:
self.family = family
- self.id = id
+ self.id = id
self.expr = expr
self.clear()
def clear(self):
- self._ids = self.family.II.Set()
+ self._ids = self.family.IF.Set()
def index_doc(self, docid, context):
raise NotImplementedError
def unindex_doc(self, docid):
- try: self._ids.remove(docid)
- except KeyError: pass
+ try:
+ self._ids.remove(docid)
+ except KeyError:
+ pass
def getId(self):
return self.id
@@ -59,11 +61,11 @@
def setExpression(self, expr):
self.expr = expr
- def getIds(self):
+ def getIds(self):
return self._ids
def __repr__(self):
- return '%s: (%s) %s' % (self.id, self.expr, map(None, self._ids))
+ return '%s: (%s) %s' % (self.id, self.expr, list(self._ids))
__str__ = __repr__
@@ -73,6 +75,7 @@
def index_doc(self, docid, context):
try:
- if eval(self.expr): self._ids.insert(docid)
+ if eval(self.expr):
+ self._ids.insert(docid)
except:
pass # ignore errors
Modified: zope.index/trunk/src/zope/index/topic/index.py
===================================================================
--- zope.index/trunk/src/zope/index/topic/index.py 2008-12-29 11:41:41 UTC (rev 94414)
+++ zope.index/trunk/src/zope/index/topic/index.py 2008-12-29 12:33:18 UTC (rev 94415)
@@ -21,13 +21,13 @@
from zope.interface import implements
-from zope.index.interfaces import IInjection
+from zope.index.interfaces import IInjection, IIndexSearch
from zope.index.topic.interfaces import ITopicQuerying
class TopicIndex(Persistent):
- implements(IInjection, ITopicQuerying)
+ implements(IInjection, ITopicQuerying, IIndexSearch)
family = BTrees.family32
@@ -61,20 +61,42 @@
f.unindex_doc(docid)
def search(self, query, operator='and'):
- if isinstance(query, basestring): query = [query]
+ if isinstance(query, basestring):
+ query = [query]
+
if not isinstance(query, (tuple, list)):
raise TypeError(
'query argument must be a list/tuple of filter ids')
- f = {'and': self.family.II.intersection,
- 'or': self.family.II.union,
- }[operator]
-
- rs = None
+ sets = []
for id in self._filters.keys():
if id in query:
docids = self._filters[id].getIds()
- rs = f(rs, docids)
+ sets.append(docids)
+
+ if operator == 'or':
+ rs = self.family.IF.multiunion(sets)
+ elif operator == 'and':
+ # sort smallest to largest set so we intersect the smallest
+ # number of document identifiers possible
+ sets.sort(key=len)
+ rs = None
+ for set in sets:
+ rs = self.family.IF.intersection(rs, set)
+ if not rs:
+ break
+ else:
+ raise TypeError('Topic index only supports `and` and `or` operators, not `%s`.' % operator)
- if rs: return rs
- else: return self.family.II.Set()
+ if rs:
+ return rs
+ else:
+ return self.family.IF.Set()
+
+ def apply(self, query):
+ operator = 'and'
+ if isinstance(query, dict):
+ if 'operator' in query:
+ operator = query.pop('operator')
+ query = query['query']
+ return self.search(query, operator=operator)
Modified: zope.index/trunk/src/zope/index/topic/interfaces.py
===================================================================
--- zope.index/trunk/src/zope/index/topic/interfaces.py 2008-12-29 11:41:41 UTC (rev 94414)
+++ zope.index/trunk/src/zope/index/topic/interfaces.py 2008-12-29 12:33:18 UTC (rev 94415)
@@ -25,7 +25,7 @@
'operator' can be 'and' or 'or' to search for matches in all
or any filter.
- Return an IISet of docids
+ Return an IFSet of docids
"""
class ITopicFilteredSet(Interface):
@@ -50,4 +50,4 @@
"""Return the filter expression."""
def getIds():
- """Return an IISet of docids."""
+ """Return an IFSet of docids."""
Modified: zope.index/trunk/src/zope/index/topic/tests/test_topicindex.py
===================================================================
--- zope.index/trunk/src/zope/index/topic/tests/test_topicindex.py 2008-12-29 11:41:41 UTC (rev 94414)
+++ zope.index/trunk/src/zope/index/topic/tests/test_topicindex.py 2008-12-29 12:33:18 UTC (rev 94415)
@@ -65,6 +65,18 @@
def _search_and(self, query, expected):
return self._search(query, expected, 'and')
+ def _apply(self, query, expected, operator='and'):
+ result = self.index.apply(query)
+ self.assertEqual(result.keys(), expected)
+
+ def _apply_or(self, query, expected):
+ result = self.index.apply({'query': query, 'operator': 'or'})
+ self.assertEqual(result.keys(), expected)
+
+ def _apply_and(self, query, expected):
+ result = self.index.apply({'query': query, 'operator': 'and'})
+ self.assertEqual(result.keys(), expected)
+
def testInterfaces(self):
for iface in implementedBy(PythonFilteredSet):
verifyClass(iface, PythonFilteredSet)
@@ -96,7 +108,27 @@
self._search_and(['doc2'], [3,4])
self._search_and(['doc1','doc2'], [])
+ def test_apply_or(self):
+ self._apply_or('doc1', [1,2])
+ self._apply_or(['doc1'],[1,2])
+ self._apply_or('doc2', [3,4]),
+ self._apply_or(['doc2'],[3,4])
+ self._apply_or(['doc1','doc2'], [1,2,3,4])
+ def test_apply_and(self):
+ self._apply_and('doc1', [1,2])
+ self._apply_and(['doc1'], [1,2])
+ self._apply_and('doc2', [3,4])
+ self._apply_and(['doc2'], [3,4])
+ self._apply_and(['doc1','doc2'], [])
+
+ def test_apply(self):
+ self._apply('doc1', [1,2])
+ self._apply(['doc1'], [1,2])
+ self._apply('doc2', [3,4])
+ self._apply(['doc2'], [3,4])
+ self._apply(['doc1','doc2'], [])
+
class TopicIndexTest64(TopicIndexTest):
family = BTrees.family64
More information about the Checkins
mailing list