[Checkins] SVN: zope.index/trunk/ Merge the sorting branch.

Dan Korostelev nadako at gmail.com
Mon Dec 29 07:33:19 EST 2008


Log message for revision 94415:
  Merge the sorting branch.

Changed:
  U   zope.index/trunk/CHANGES.txt
  U   zope.index/trunk/buildout.cfg
  U   zope.index/trunk/src/zope/index/field/README.txt
  U   zope.index/trunk/src/zope/index/field/index.py
  U   zope.index/trunk/src/zope/index/field/tests.py
  U   zope.index/trunk/src/zope/index/interfaces.py
  U   zope.index/trunk/src/zope/index/keyword/__init__.py
  U   zope.index/trunk/src/zope/index/keyword/index.py
  U   zope.index/trunk/src/zope/index/keyword/interfaces.py
  U   zope.index/trunk/src/zope/index/keyword/tests.py
  U   zope.index/trunk/src/zope/index/topic/__init__.py
  U   zope.index/trunk/src/zope/index/topic/filter.py
  U   zope.index/trunk/src/zope/index/topic/index.py
  U   zope.index/trunk/src/zope/index/topic/interfaces.py
  U   zope.index/trunk/src/zope/index/topic/tests/test_topicindex.py

-=-
Modified: zope.index/trunk/CHANGES.txt
===================================================================
--- zope.index/trunk/CHANGES.txt	2008-12-29 11:41:41 UTC (rev 94414)
+++ zope.index/trunk/CHANGES.txt	2008-12-29 12:33:18 UTC (rev 94415)
@@ -1,15 +1,40 @@
 Changes
 =======
 
-3.4.2 (unreleased)
+3.5.0 (unreleased)
 ------------------
 
-Remove zope.testing from dependencies, as it's not really needed.
+- Remove zope.testing from dependencies, as it's not really needed.
 
+- Define IIndexSort interface for indexes that support sorting.
+
+- Implement sorting for FieldIndex (adapted from repoze.catalog/ZCatalog).
+
+- Add an ``apply`` method for KeywordIndex/TopicIndex, making them
+  implement IIndexSearch that can be useful in catalog.
+
+- Optimize the ``search`` method of KeywordIndex/TopicIndex by using
+  multiunion for the ``or`` operator and sorting before intersection for ``and``.
+
+- IMPORTANT: KeywordIndex/TopicIndex now use IFSets instead of IISets.
+  This makes it more compatible with other indexes (for example, when
+  using in catalog). This change can lead to problems, if your code somehow
+  depends on the II nature of sets, as it was before.
+  
+  Also, FilteredSets used to use IFSets as well, if you have any
+  FilteredSets pickled in the database, you need to migrate them to
+  IFSets yourself. You can do it like that:
+  
+      filter._ids = filter.family.IF.Set(filter._ids)
+  
+  Where ``filter`` is an instance of FilteredSet.
+
+Thanks Chris McDonough of repoze for supporting and useful code.
+
 3.4.1 (2007-09-28)
 ------------------
 
-Fixed bug in package metadata (wrong homepage URL).
+- Fixed bug in package metadata (wrong homepage URL).
 
 3.4.0 (2007-09-28)
 ------------------

Modified: zope.index/trunk/buildout.cfg
===================================================================
--- zope.index/trunk/buildout.cfg	2008-12-29 11:41:41 UTC (rev 94414)
+++ zope.index/trunk/buildout.cfg	2008-12-29 12:33:18 UTC (rev 94415)
@@ -1,6 +1,6 @@
 [buildout]
 develop = .
-parts = test coverage-test coverage-report
+parts = test coverage-test coverage-report python
 find-links = http://download.zope.org/distribution/
 
 [test]
@@ -17,3 +17,8 @@
 eggs = z3c.coverage
 scripts = coverage=coverage-report
 arguments = ('coverage', 'coverage/report')
+
+[python]
+recipe = zc.recipe.egg
+eggs = zope.index
+interpreter = python

Modified: zope.index/trunk/src/zope/index/field/README.txt
===================================================================
--- zope.index/trunk/src/zope/index/field/README.txt	2008-12-29 11:41:41 UTC (rev 94414)
+++ zope.index/trunk/src/zope/index/field/README.txt	2008-12-29 12:33:18 UTC (rev 94415)
@@ -108,6 +108,45 @@
     >>> index.apply((30, 70))
     IFSet([])
 
+Sorting
+-------
+
+Field indexes also implement IIndexSort interface that
+provides a method for sorting document ids by their indexed
+values.
+
+    >>> index.index_doc(1, 9)
+    >>> index.index_doc(2, 8)
+    >>> index.index_doc(3, 7)
+    >>> index.index_doc(4, 6)
+    >>> index.index_doc(5, 5)
+    >>> index.index_doc(6, 4)
+    >>> index.index_doc(7, 3)
+    >>> index.index_doc(8, 2)
+    >>> index.index_doc(9, 1)
+
+    >>> list(index.sort([4, 2, 9, 7, 3, 1, 5]))
+    [9, 7, 5, 4, 3, 2, 1]
+
+We can also specify the ``reverse`` argument to reverse results:
+
+    >>> list(index.sort([4, 2, 9, 7, 3, 1, 5], reverse=True))
+    [1, 2, 3, 4, 5, 7, 9]
+
+And as per IIndexSort, we can limit results by specifying the ``limit``
+argument:
+
+    >>> list(index.sort([4, 2, 9, 7, 3, 1, 5], limit=3)) 
+    [9, 7, 5]
+
+If we pass an id that is not indexed by this index, it won't be included
+in the result.
+
+    >>> list(index.sort([2, 10]))
+    [2]
+
+    >>> index.clear()
+
 Bugfix testing:
 ---------------
 Happened at least once that the value dropped out of the forward index,

Modified: zope.index/trunk/src/zope/index/field/index.py
===================================================================
--- zope.index/trunk/src/zope/index/field/index.py	2008-12-29 11:41:41 UTC (rev 94414)
+++ zope.index/trunk/src/zope/index/field/index.py	2008-12-29 12:33:18 UTC (rev 94415)
@@ -15,14 +15,15 @@
 
 $Id$
 """
-import persistent
+import heapq
+import bisect
+from itertools import islice
 
 import BTrees
-
+import persistent
+import zope.interface
 from BTrees.Length import Length
 
-import zope.interface
-
 from zope.index import interfaces
 
 class FieldIndex(persistent.Persistent):
@@ -31,6 +32,7 @@
         interfaces.IInjection,
         interfaces.IStatistics,
         interfaces.IIndexSearch,
+        interfaces.IIndexSort,
         )
 
     family = BTrees.family32
@@ -107,3 +109,102 @@
             raise TypeError("two-length tuple expected", query)
         return self.family.IF.multiunion(
             self._fwd_index.values(*query))
+
+    def sort(self, docids, limit=None, reverse=False):
+        if (limit is not None) and (limit < 1):
+            raise ValueError('limit value must be 1 or greater')
+
+        numdocs = self._num_docs.value
+        if not numdocs:
+            raise StopIteration
+
+        if not isinstance(docids, (self.family.IF.Set, self.family.IF.TreeSet)):
+            docids = self.family.IF.Set(docids)
+        if not docids:
+            raise StopIteration
+
+        rlen = len(docids)
+
+        fwd_index = self._fwd_index
+        rev_index = self._rev_index
+        getValue = rev_index.get
+        marker = object()
+
+        # use_lazy and use_nbest computations lifted wholesale from
+        # Zope2 catalog without questioning reasoning
+        use_lazy = rlen > numdocs * (rlen / 100 + 1)
+        use_nbest = limit and limit * 4 < rlen
+
+        # overrides for unit tests
+        if getattr(self, '_use_lazy', False):
+            use_lazy = True
+        if getattr(self, '_use_nbest', False):
+            use_nbest = True
+        
+        if use_nbest:
+            # this is a sort with a limit that appears useful, try to
+            # take advantage of the fact that we can keep a smaller
+            # set of simultaneous values in memory; use generators
+            # and heapq functions to do so.
+
+            def nsort():
+                for docid in docids:
+                    val = getValue(docid, marker)
+                    if val is not marker:
+                        yield (val, docid)
+
+            iterable = nsort()
+
+            if reverse:
+                # we use a generator as an iterable in the reverse
+                # sort case because the nlargest implementation does
+                # not manifest the whole thing into memory at once if
+                # we do so.
+                for val in heapq.nlargest(limit, iterable):
+                    yield val[1]
+            else:
+                # lifted from heapq.nsmallest
+                it = iter(iterable)
+                result = sorted(islice(it, 0, limit))
+                if not result:
+                    raise StopIteration
+                insort = bisect.insort
+                pop = result.pop
+                los = result[-1]    # los --> Largest of the nsmallest
+                for elem in it:
+                    if los <= elem:
+                        continue
+                    insort(result, elem)
+                    pop()
+                    los = result[-1]
+                for val in result:
+                    yield val[1]
+
+        else:
+            if use_lazy and not reverse:
+                # Since this the sort is not reversed, and the number
+                # of results in the search result set is much larger
+                # than the number of items in this index, we assume it
+                # will be fastest to iterate over all of our forward
+                # BTree's items instead of using a full sort, as our
+                # forward index is already sorted in ascending order
+                # by value. The Zope 2 catalog implementation claims
+                # that this case is rarely exercised in practice.
+                n = 0
+                for stored_docids in fwd_index.values():
+                    for docid in self.family.IF.intersection(docids, stored_docids):
+                        n += 1
+                        yield docid
+                        if limit and n >= limit:
+                            raise StopIteration
+            else:
+                # If the result set is not much larger than the number
+                # of documents in this index, or if we need to sort in
+                # reverse order, use a non-lazy sort.
+                n = 0
+                for docid in sorted(docids, key=getValue, reverse=reverse):
+                    if getValue(docid, marker) is not marker:
+                        n += 1
+                        yield docid
+                        if limit and n >= limit:
+                            raise StopIteration

Modified: zope.index/trunk/src/zope/index/field/tests.py
===================================================================
--- zope.index/trunk/src/zope/index/field/tests.py	2008-12-29 11:41:41 UTC (rev 94414)
+++ zope.index/trunk/src/zope/index/field/tests.py	2008-12-29 12:33:18 UTC (rev 94415)
@@ -16,10 +16,134 @@
 $Id$
 """
 import unittest
+from BTrees.IFBTree import IFSet
 from zope.testing import doctest
 
+from zope.index.field import FieldIndex
+
+class TestFieldIndexSorting(unittest.TestCase):
+
+    def _populateIndex(self, index):
+        index.index_doc(5, 1) # docid, obj
+        index.index_doc(2, 2)
+        index.index_doc(1, 3)
+        index.index_doc(3, 4) 
+        index.index_doc(4, 5)
+        index.index_doc(8, 6)
+        index.index_doc(9, 7)
+        index.index_doc(7, 8)
+        index.index_doc(6, 9)
+        index.index_doc(11, 10)
+        index.index_doc(10, 11)
+
+    def test_sort_lazy_nolimit(self):
+        index = FieldIndex()
+        index._use_lazy = True
+        self._populateIndex(index)
+        c1 = IFSet([1, 2, 3, 4, 5])
+        result = index.sort(c1)
+        self.assertEqual(list(result), [5, 2, 1, 3, 4])
+
+    def test_sort_lazy_withlimit(self):
+        index = FieldIndex()
+        index._use_lazy = True
+        self._populateIndex(index)
+        c1 = IFSet([1, 2, 3, 4, 5])
+        result = index.sort(c1, limit=3)
+        self.assertEqual(list(result), [5, 2, 1])
+
+    def test_sort_nonlazy_nolimit(self):
+        index = FieldIndex()
+        self._populateIndex(index)
+        c1 = IFSet([1, 2, 3, 4, 5])
+        result = index.sort(c1)
+        self.assertEqual(list(result), [5, 2, 1, 3, 4])
+
+    def test_sort_nonlazy_missingdocid(self):
+        index = FieldIndex()
+        self._populateIndex(index)
+        c1 = IFSet([1, 2, 3, 4, 5, 99])
+        result = index.sort(c1)
+        self.assertEqual(list(result), [5, 2, 1, 3, 4]) # 99 not present
+
+    def test_sort_nonlazy_withlimit(self):
+        index = FieldIndex()
+        self._populateIndex(index)
+        c1 = IFSet([1, 2, 3, 4, 5])
+        result = index.sort(c1, limit=3)
+        self.assertEqual(list(result), [5, 2, 1])
+
+    def test_sort_nonlazy_reverse_nolimit(self):
+        index = FieldIndex()
+        self._populateIndex(index)
+        c1 = IFSet([1, 2, 3, 4, 5])
+        result = index.sort(c1, reverse=True)
+        self.assertEqual(list(result), [4, 3, 1, 2, 5])
+
+    def test_sort_nonlazy_reverse_withlimit(self):
+        index = FieldIndex()
+        self._populateIndex(index)
+        c1 = IFSet([1, 2, 3, 4, 5])
+        result = index.sort(c1, reverse=True, limit=3)
+        self.assertEqual(list(result), [4, 3, 1])
+
+    def test_sort_nbest(self):
+        index = FieldIndex()
+        index._use_nbest = True
+        self._populateIndex(index)
+        c1 = IFSet([1, 2, 3, 4, 5])
+        result = index.sort(c1, limit=3)
+        self.assertEqual(list(result), [5, 2, 1])
+
+    def test_sort_nbest_reverse(self):
+        index = FieldIndex()
+        index._use_nbest = True
+        self._populateIndex(index)
+        c1 = IFSet([1, 2, 3, 4, 5])
+        result = index.sort(c1, reverse=True, limit=3)
+        self.assertEqual(list(result), [4, 3, 1])
+
+    def test_sort_nbest_missing(self):
+        index = FieldIndex()
+        index._use_nbest = True
+        self._populateIndex(index)
+        c1 = IFSet([1, 2, 3, 4, 5, 99])
+        result = index.sort(c1, limit=3)
+        self.assertEqual(list(result), [5, 2, 1])
+
+    def test_sort_nbest_missing_reverse(self):
+        index = FieldIndex()
+        index._use_nbest = True
+        self._populateIndex(index)
+        c1 = IFSet([1, 2, 3, 4, 5, 99])
+        result = index.sort(c1, reverse=True, limit=3)
+        self.assertEqual(list(result), [4, 3, 1])
+
+    def test_sort_nodocs(self):
+        index = FieldIndex()
+        c1 = IFSet([1, 2, 3, 4, 5])
+        result = index.sort(c1)
+        self.assertEqual(list(result), [])
+
+    def test_sort_nodocids(self):
+        index = FieldIndex()
+        self._populateIndex(index)
+        c1 = IFSet()
+        result = index.sort(c1)
+        self.assertEqual(list(result), [])
+
+    def test_sort_badlimit(self):
+        index = FieldIndex()
+        self._populateIndex(index)
+        c1 = IFSet([1, 2, 3, 4, 5])
+        result = index.sort(c1, limit=0)
+        self.assertRaises(ValueError, list, result)
+
 def test_suite():
-    return doctest.DocFileSuite('README.txt')
+    return unittest.TestSuite((
+        doctest.DocFileSuite('README.txt', optionflags=doctest.ELLIPSIS),
+        unittest.makeSuite(TestFieldIndexSorting),
+        ))
 
 if __name__=='__main__':
     unittest.main(defaultTest='test_suite')

Modified: zope.index/trunk/src/zope/index/interfaces.py
===================================================================
--- zope.index/trunk/src/zope/index/interfaces.py	2008-12-29 11:41:41 UTC (rev 94414)
+++ zope.index/trunk/src/zope/index/interfaces.py	2008-12-29 12:33:18 UTC (rev 94415)
@@ -82,6 +82,19 @@
 
         """
 
+class IIndexSort(Interface):
+
+    def sort(docids, limit=None, reverse=False):
+        """Sort document ids sequence using indexed values
+        
+        If some of docids are not indexed they are skipped
+        from resulting iterable.
+        
+        Return an iterable of document ids. Limited by
+        value of the "limit" argument and optionally
+        reversed, using the "reverse" argument.
+        """
+
 class IStatistics(Interface):
     """An index that provides statistical information about itself."""
 

Modified: zope.index/trunk/src/zope/index/keyword/__init__.py
===================================================================
--- zope.index/trunk/src/zope/index/keyword/__init__.py	2008-12-29 11:41:41 UTC (rev 94414)
+++ zope.index/trunk/src/zope/index/keyword/__init__.py	2008-12-29 12:33:18 UTC (rev 94415)
@@ -1 +1 @@
-from zope.index.keyword.index import KeywordIndex
+from zope.index.keyword.index import KeywordIndex, CaseSensitiveKeywordIndex

Modified: zope.index/trunk/src/zope/index/keyword/index.py
===================================================================
--- zope.index/trunk/src/zope/index/keyword/index.py	2008-12-29 11:41:41 UTC (rev 94414)
+++ zope.index/trunk/src/zope/index/keyword/index.py	2008-12-29 12:33:18 UTC (rev 94415)
@@ -21,7 +21,7 @@
 
 from BTrees.Length import Length
 
-from zope.index.interfaces import IInjection, IStatistics
+from zope.index.interfaces import IInjection, IStatistics, IIndexSearch
 from zope.index.keyword.interfaces import IKeywordQuerying
 from zope.interface import implements
 
@@ -31,7 +31,7 @@
 
     family = BTrees.family32
     normalize = True
-    implements(IInjection, IStatistics, IKeywordQuerying)
+    implements(IInjection, IStatistics, IIndexSearch, IKeywordQuerying)
 
     def __init__(self, family=None):
         if family is not None:
@@ -117,7 +117,7 @@
         has_key = idx.has_key
         for word in words:
             if not has_key(word):
-                idx[word] = self.family.II.Set()
+                idx[word] = self.family.IF.Set()
             idx[word].insert(docid)
 
     def _insert_reverse(self, docid, words):
@@ -134,20 +134,38 @@
         if self.normalize:
             query = [w.lower() for w in query]
 
-        f = {'and' : self.family.II.intersection,
-             'or' : self.family.II.union,
-             }[operator]
-    
-        rs = None
+        sets = []
         for word in query:
-            docids = self._fwd_index.get(word, self.family.II.Set())
-            rs = f(rs, docids)
-            
+            docids = self._fwd_index.get(word, self.family.IF.Set())
+            sets.append(docids)
+
+        if operator == 'or':
+            rs = self.family.IF.multiunion(sets)
+        elif operator == 'and':
+            # sort smallest to largest set so we intersect the smallest
+            # number of document identifiers possible
+            sets.sort(key=len)
+            rs = None
+            for set in sets:
+                rs = self.family.IF.intersection(rs, set)
+                if not rs:
+                    break
+        else:
+            raise TypeError('Keyword index only supports `and` and `or` operators, not `%s`.' % operator)
+        
         if rs:
             return rs
         else:
-            return self.family.II.Set()
+            return self.family.IF.Set()
 
+    def apply(self, query):
+        operator = 'and'
+        if isinstance(query, dict):
+            if 'operator' in query:
+                operator = query.pop('operator')
+            query = query['query']
+        return self.search(query, operator=operator)
+
 class CaseSensitiveKeywordIndex(KeywordIndex):
     """ A case-sensitive keyword index """
     normalize = False        

Modified: zope.index/trunk/src/zope/index/keyword/interfaces.py
===================================================================
--- zope.index/trunk/src/zope/index/keyword/interfaces.py	2008-12-29 11:41:41 UTC (rev 94414)
+++ zope.index/trunk/src/zope/index/keyword/interfaces.py	2008-12-29 12:33:18 UTC (rev 94415)
@@ -27,5 +27,5 @@
         'operator' can be either 'and' or 'or' to search for documents
         containing all keywords or any keyword. 
 
-        Return an IISet of docids
+        Return an IFSet of docids
         """

Modified: zope.index/trunk/src/zope/index/keyword/tests.py
===================================================================
--- zope.index/trunk/src/zope/index/keyword/tests.py	2008-12-29 11:41:41 UTC (rev 94414)
+++ zope.index/trunk/src/zope/index/keyword/tests.py	2008-12-29 12:33:18 UTC (rev 94415)
@@ -23,7 +23,7 @@
 
 class KeywordIndexTest(TestCase):
 
-    from BTrees.IIBTree import IISet
+    from BTrees.IFBTree import IFSet
 
     def setUp(self):
         self.index = KeywordIndex()
@@ -41,7 +41,7 @@
 
         results = self.index.search(query, mode)
 
-        # results and expected are IISets() but we can not
+        # results and expected are IFSets() but we can not
         # compare them directly since __eq__() does not seem
         # to be implemented for BTrees
 
@@ -53,6 +53,18 @@
     def _search_or(self, query, expected):
         return self._search(query, expected, 'or')
 
+    def _apply(self, query, expected, mode='and'):
+        results = self.index.apply(query)
+        self.assertEqual(results.keys(), expected.keys())
+
+    def _apply_and(self, query, expected):
+        results = self.index.apply({'operator': 'and', 'query': query})
+        self.assertEqual(results.keys(), expected.keys())
+
+    def _apply_or(self, query, expected):
+        results = self.index.apply({'operator': 'or', 'query': query})
+        self.assertEqual(results.keys(), expected.keys())
+
     def test_interface(self):
         verifyClass(IInjection, KeywordIndex)
         verifyClass(IStatistics, KeywordIndex)
@@ -85,13 +97,13 @@
         self.index.index_doc(1,  ('foo', 'bar', 'doom'))
         self.index.index_doc(1,  ('bar', 'blabla'))
         self.assertEqual(self.index.documentCount(), 3)
-        self._search('quick',   self.IISet())
-        self._search('foo',   self.IISet())
-        self._search('bar',   self.IISet([1]))
-        self._search(['doom'],   self.IISet())
-        self._search(['blabla'],   self.IISet([1]))
-        self._search_and(('bar', 'blabla'),   self.IISet([1]))
-        self._search(['cmf'],   self.IISet([5]))
+        self._search('quick',   self.IFSet())
+        self._search('foo',   self.IFSet())
+        self._search('bar',   self.IFSet([1]))
+        self._search(['doom'],   self.IFSet())
+        self._search(['blabla'],   self.IFSet([1]))
+        self._search_and(('bar', 'blabla'),   self.IFSet([1]))
+        self._search(['cmf'],   self.IFSet([5]))
 
     def test_hasdoc(self):
         self._populate_index()
@@ -104,26 +116,47 @@
 
     def test_simplesearch(self):
         self._populate_index()
-        self._search([''],      self.IISet())
-        self._search(['cmf'],   self.IISet([1, 5]))
-        self._search(['zope'],  self.IISet([1, 3]))
-        self._search(['zope3'], self.IISet([1]))
-        self._search(['foo'],   self.IISet())
+        self._search([''],      self.IFSet())
+        self._search(['cmf'],   self.IFSet([1, 5]))
+        self._search(['zope'],  self.IFSet([1, 3]))
+        self._search(['zope3'], self.IFSet([1]))
+        self._search(['foo'],   self.IFSet())
 
     def test_search_and(self):
         self._populate_index()
-        self._search_and(('cmf', 'zope3'), self.IISet([1]))
-        self._search_and(('cmf', 'zope'),  self.IISet([1]))
-        self._search_and(('cmf', 'zope4'), self.IISet())
-        self._search_and(('zope', 'ZOPE'), self.IISet([1, 3]))
+        self._search_and(('cmf', 'zope3'), self.IFSet([1]))
+        self._search_and(('cmf', 'zope'),  self.IFSet([1]))
+        self._search_and(('cmf', 'zope4'), self.IFSet())
+        self._search_and(('zope', 'ZOPE'), self.IFSet([1, 3]))
 
     def test_search_or(self):
         self._populate_index()
-        self._search_or(('cmf', 'zope3'), self.IISet([1, 5]))
-        self._search_or(('cmf', 'zope'),  self.IISet([1, 3, 5]))
-        self._search_or(('cmf', 'zope4'), self.IISet([1, 5]))
-        self._search_or(('zope', 'ZOPE'), self.IISet([1,3]))
+        self._search_or(('cmf', 'zope3'), self.IFSet([1, 5]))
+        self._search_or(('cmf', 'zope'),  self.IFSet([1, 3, 5]))
+        self._search_or(('cmf', 'zope4'), self.IFSet([1, 5]))
+        self._search_or(('zope', 'ZOPE'), self.IFSet([1,3]))
 
+    def test_apply(self):
+        self._populate_index()
+        self._apply(('cmf', 'zope3'), self.IFSet([1]))
+        self._apply(('cmf', 'zope'),  self.IFSet([1]))
+        self._apply(('cmf', 'zope4'), self.IFSet())
+        self._apply(('zope', 'ZOPE'), self.IFSet([1, 3]))
+
+    def test_apply_and(self):
+        self._populate_index()
+        self._apply_and(('cmf', 'zope3'), self.IFSet([1]))
+        self._apply_and(('cmf', 'zope'),  self.IFSet([1]))
+        self._apply_and(('cmf', 'zope4'), self.IFSet())
+        self._apply_and(('zope', 'ZOPE'), self.IFSet([1, 3]))
+
+    def test_apply_or(self):
+        self._populate_index()
+        self._apply_or(('cmf', 'zope3'), self.IFSet([1, 5]))
+        self._apply_or(('cmf', 'zope'),  self.IFSet([1, 3, 5]))
+        self._apply_or(('cmf', 'zope4'), self.IFSet([1, 5]))
+        self._apply_or(('zope', 'ZOPE'), self.IFSet([1,3]))
+
     def test_index_input(self):
         self.assertRaises(
             TypeError, self.index.index_doc, 1, "non-sequence-string")
@@ -131,7 +164,7 @@
 
 class KeywordIndexTest64(KeywordIndexTest):
 
-    from BTrees.LLBTree import LLSet as IISet
+    from BTrees.LFBTree import LFSet as IFSet
 
     def setUp(self):
         self.index = KeywordIndex(family=BTrees.family64)

Modified: zope.index/trunk/src/zope/index/topic/__init__.py
===================================================================
--- zope.index/trunk/src/zope/index/topic/__init__.py	2008-12-29 11:41:41 UTC (rev 94414)
+++ zope.index/trunk/src/zope/index/topic/__init__.py	2008-12-29 12:33:18 UTC (rev 94415)
@@ -1,2 +1 @@
-#
-# This file is necessary to make this directory a package.
+from zope.index.topic.index import TopicIndex

Modified: zope.index/trunk/src/zope/index/topic/filter.py
===================================================================
--- zope.index/trunk/src/zope/index/topic/filter.py	2008-12-29 11:41:41 UTC (rev 94414)
+++ zope.index/trunk/src/zope/index/topic/filter.py	2008-12-29 12:33:18 UTC (rev 94415)
@@ -36,19 +36,21 @@
     def __init__(self, id, expr, family=None):
         if family is not None:
             self.family = family
-        self.id   = id
+        self.id = id
         self.expr = expr
         self.clear()
 
     def clear(self):
-        self._ids = self.family.II.Set()
+        self._ids = self.family.IF.Set()
 
     def index_doc(self, docid, context):
         raise NotImplementedError
 
     def unindex_doc(self, docid):
-        try: self._ids.remove(docid)
-        except KeyError: pass
+        try:
+            self._ids.remove(docid)
+        except KeyError:
+            pass
 
     def getId(self):            
         return self.id
@@ -59,11 +61,11 @@
     def setExpression(self, expr): 
         self.expr = expr
 
-    def getIds(self): 
+    def getIds(self):
         return self._ids
 
     def __repr__(self):
-        return '%s: (%s) %s' % (self.id, self.expr, map(None, self._ids))
+        return '%s: (%s) %s' % (self.id, self.expr, list(self._ids))
 
     __str__ = __repr__
 
@@ -73,6 +75,7 @@
 
     def index_doc(self, docid, context):
         try:
-            if eval(self.expr): self._ids.insert(docid)
+            if eval(self.expr):
+                self._ids.insert(docid)
         except:
             pass  # ignore errors 

Modified: zope.index/trunk/src/zope/index/topic/index.py
===================================================================
--- zope.index/trunk/src/zope/index/topic/index.py	2008-12-29 11:41:41 UTC (rev 94414)
+++ zope.index/trunk/src/zope/index/topic/index.py	2008-12-29 12:33:18 UTC (rev 94415)
@@ -21,13 +21,13 @@
 
 from zope.interface import implements
 
-from zope.index.interfaces import IInjection
+from zope.index.interfaces import IInjection, IIndexSearch
 from zope.index.topic.interfaces import ITopicQuerying
 
 
 class TopicIndex(Persistent):
 
-    implements(IInjection, ITopicQuerying)
+    implements(IInjection, ITopicQuerying, IIndexSearch)
 
     family = BTrees.family32
 
@@ -61,20 +61,42 @@
             f.unindex_doc(docid)
 
     def search(self, query, operator='and'):
-        if isinstance(query, basestring): query = [query]
+        if isinstance(query, basestring):
+            query = [query]
+
         if not isinstance(query, (tuple, list)):
             raise TypeError(
                 'query argument must be a list/tuple of filter ids')
 
-        f = {'and': self.family.II.intersection,
-             'or': self.family.II.union,
-             }[operator]
-    
-        rs = None
+        sets = []
         for id in self._filters.keys():
             if id in query:
                 docids = self._filters[id].getIds()
-                rs = f(rs, docids)
+                sets.append(docids)
+
+        if operator == 'or':
+            rs = self.family.IF.multiunion(sets)
+        elif operator == 'and':
+            # sort smallest to largest set so we intersect the smallest
+            # number of document identifiers possible
+            sets.sort(key=len)
+            rs = None
+            for set in sets:
+                rs = self.family.IF.intersection(rs, set)
+                if not rs:
+                    break
+        else:
+            raise TypeError('Topic index only supports `and` and `or` operators, not `%s`.' % operator)
             
-        if rs: return rs
-        else: return self.family.II.Set()
+        if rs:
+            return rs
+        else:
+            return self.family.IF.Set()
+
+    def apply(self, query):
+        operator = 'and'
+        if isinstance(query, dict):
+            if 'operator' in query:
+                operator = query.pop('operator')
+            query = query['query']
+        return self.search(query, operator=operator)

Modified: zope.index/trunk/src/zope/index/topic/interfaces.py
===================================================================
--- zope.index/trunk/src/zope/index/topic/interfaces.py	2008-12-29 11:41:41 UTC (rev 94414)
+++ zope.index/trunk/src/zope/index/topic/interfaces.py	2008-12-29 12:33:18 UTC (rev 94415)
@@ -25,7 +25,7 @@
           'operator' can be 'and' or 'or' to search for matches in all
            or any filter.
 
-           Return an IISet of docids
+           Return an IFSet of docids
         """
 
 class ITopicFilteredSet(Interface):
@@ -50,4 +50,4 @@
         """Return the filter expression."""
 
     def getIds():
-        """Return an IISet of docids."""
+        """Return an IFSet of docids."""

Modified: zope.index/trunk/src/zope/index/topic/tests/test_topicindex.py
===================================================================
--- zope.index/trunk/src/zope/index/topic/tests/test_topicindex.py	2008-12-29 11:41:41 UTC (rev 94414)
+++ zope.index/trunk/src/zope/index/topic/tests/test_topicindex.py	2008-12-29 12:33:18 UTC (rev 94415)
@@ -65,6 +65,18 @@
     def _search_and(self, query, expected):
         return self._search(query, expected, 'and')
 
+    def _apply(self, query, expected, operator='and'):
+        result = self.index.apply(query)
+        self.assertEqual(result.keys(), expected)
+
+    def _apply_or(self, query, expected):
+        result = self.index.apply({'query': query, 'operator': 'or'})
+        self.assertEqual(result.keys(), expected)
+         
+    def _apply_and(self, query, expected):
+        result = self.index.apply({'query': query, 'operator': 'and'})
+        self.assertEqual(result.keys(), expected)
+
     def testInterfaces(self):
         for iface in implementedBy(PythonFilteredSet):
             verifyClass(iface, PythonFilteredSet)
@@ -96,7 +108,27 @@
         self._search_and(['doc2'], [3,4])
         self._search_and(['doc1','doc2'], [])
 
+    def test_apply_or(self):
+        self._apply_or('doc1',  [1,2])
+        self._apply_or(['doc1'],[1,2])
+        self._apply_or('doc2',  [3,4]),
+        self._apply_or(['doc2'],[3,4])
+        self._apply_or(['doc1','doc2'], [1,2,3,4])
 
+    def test_apply_and(self):
+        self._apply_and('doc1',   [1,2])
+        self._apply_and(['doc1'], [1,2])
+        self._apply_and('doc2',   [3,4])
+        self._apply_and(['doc2'], [3,4])
+        self._apply_and(['doc1','doc2'], [])
+
+    def test_apply(self):
+        self._apply('doc1',   [1,2])
+        self._apply(['doc1'], [1,2])
+        self._apply('doc2',   [3,4])
+        self._apply(['doc2'], [3,4])
+        self._apply(['doc1','doc2'], [])
+
 class TopicIndexTest64(TopicIndexTest):
 
     family = BTrees.family64



More information about the Checkins mailing list