[Checkins] SVN: zope.index/trunk/ Move FieldIndex's sorting functionality to a mixin class so it can be reused by zc.catalog's ValueIndex.

Dan Korostelev nadako at gmail.com
Fri Feb 27 05:33:42 EST 2009


Log message for revision 97337:
  Move FieldIndex's sorting functionality to a mixin class so it can be reused by zc.catalog's ValueIndex.

Changed:
  U   zope.index/trunk/CHANGES.txt
  U   zope.index/trunk/src/zope/index/field/index.py
  A   zope.index/trunk/src/zope/index/field/sorting.py

-=-
Modified: zope.index/trunk/CHANGES.txt
===================================================================
--- zope.index/trunk/CHANGES.txt	2009-02-27 10:28:19 UTC (rev 97336)
+++ zope.index/trunk/CHANGES.txt	2009-02-27 10:33:42 UTC (rev 97337)
@@ -15,6 +15,9 @@
 
 - Don't modify given query dictionary in the KeywordIndex.apply method.
 
+- Move FieldIndex's sorting functionality to a mixin class so it can
+  be reused by zc.catalog's ValueIndex.
+
 3.5.0 (2008-12-30)
 ------------------
 

Modified: zope.index/trunk/src/zope/index/field/index.py
===================================================================
--- zope.index/trunk/src/zope/index/field/index.py	2009-02-27 10:28:19 UTC (rev 97336)
+++ zope.index/trunk/src/zope/index/field/index.py	2009-02-27 10:33:42 UTC (rev 97337)
@@ -15,24 +15,20 @@
 
 $Id$
 """
-import heapq
-import bisect
-from itertools import islice
-
 import BTrees
 import persistent
 import zope.interface
 from BTrees.Length import Length
 
 from zope.index import interfaces
+from zope.index.field.sorting import SortingIndexMixin
 
-class FieldIndex(persistent.Persistent):
+class FieldIndex(SortingIndexMixin, persistent.Persistent):
 
     zope.interface.implements(
         interfaces.IInjection,
         interfaces.IStatistics,
         interfaces.IIndexSearch,
-        interfaces.IIndexSort,
         )
 
     family = BTrees.family32
@@ -109,102 +105,3 @@
             raise TypeError("two-length tuple expected", query)
         return self.family.IF.multiunion(
             self._fwd_index.values(*query))
-
-    def sort(self, docids, reverse=False, limit=None):
-        if (limit is not None) and (limit < 1):
-            raise ValueError('limit value must be 1 or greater')
-
-        numdocs = self._num_docs.value
-        if not numdocs:
-            raise StopIteration
-
-        if not isinstance(docids, (self.family.IF.Set, self.family.IF.TreeSet)):
-            docids = self.family.IF.Set(docids)
-        if not docids:
-            raise StopIteration
-
-        rlen = len(docids)
-
-        fwd_index = self._fwd_index
-        rev_index = self._rev_index
-        getValue = rev_index.get
-        marker = object()
-
-        # use_lazy and use_nbest computations lifted wholesale from
-        # Zope2 catalog without questioning reasoning
-        use_lazy = rlen > numdocs * (rlen / 100 + 1)
-        use_nbest = limit and limit * 4 < rlen
-
-        # overrides for unit tests
-        if getattr(self, '_use_lazy', False):
-            use_lazy = True
-        if getattr(self, '_use_nbest', False):
-            use_nbest = True
-        
-        if use_nbest:
-            # this is a sort with a limit that appears useful, try to
-            # take advantage of the fact that we can keep a smaller
-            # set of simultaneous values in memory; use generators
-            # and heapq functions to do so.
-
-            def nsort():
-                for docid in docids:
-                    val = getValue(docid, marker)
-                    if val is not marker:
-                        yield (val, docid)
-
-            iterable = nsort()
-
-            if reverse:
-                # we use a generator as an iterable in the reverse
-                # sort case because the nlargest implementation does
-                # not manifest the whole thing into memory at once if
-                # we do so.
-                for val in heapq.nlargest(limit, iterable):
-                    yield val[1]
-            else:
-                # lifted from heapq.nsmallest
-                it = iter(iterable)
-                result = sorted(islice(it, 0, limit))
-                if not result:
-                    raise StopIteration
-                insort = bisect.insort
-                pop = result.pop
-                los = result[-1]    # los --> Largest of the nsmallest
-                for elem in it:
-                    if los <= elem:
-                        continue
-                    insort(result, elem)
-                    pop()
-                    los = result[-1]
-                for val in result:
-                    yield val[1]
-
-        else:
-            if use_lazy and not reverse:
-                # Since this the sort is not reversed, and the number
-                # of results in the search result set is much larger
-                # than the number of items in this index, we assume it
-                # will be fastest to iterate over all of our forward
-                # BTree's items instead of using a full sort, as our
-                # forward index is already sorted in ascending order
-                # by value. The Zope 2 catalog implementation claims
-                # that this case is rarely exercised in practice.
-                n = 0
-                for stored_docids in fwd_index.values():
-                    for docid in self.family.IF.intersection(docids, stored_docids):
-                        n += 1
-                        yield docid
-                        if limit and n >= limit:
-                            raise StopIteration
-            else:
-                # If the result set is not much larger than the number
-                # of documents in this index, or if we need to sort in
-                # reverse order, use a non-lazy sort.
-                n = 0
-                for docid in sorted(docids, key=getValue, reverse=reverse):
-                    if getValue(docid, marker) is not marker:
-                        n += 1
-                        yield docid
-                        if limit and n >= limit:
-                            raise StopIteration

Added: zope.index/trunk/src/zope/index/field/sorting.py
===================================================================
--- zope.index/trunk/src/zope/index/field/sorting.py	                        (rev 0)
+++ zope.index/trunk/src/zope/index/field/sorting.py	2009-02-27 10:33:42 UTC (rev 97337)
@@ -0,0 +1,130 @@
+##############################################################################
+#
+# Copyright (c) 2009 Zope Foundation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+"""A sorting mixin class for FieldIndex-like indexes.
+
+$Id$
+"""
+import heapq
+import bisect
+from itertools import islice
+
+from zope.interface import implements
+from zope.index.interfaces import IIndexSort
+
+class SortingIndexMixin(object):
+
+    implements(IIndexSort)
+
+    _sorting_num_docs_attr = '_num_docs'   # Length object
+    _sorting_fwd_index_attr = '_fwd_index' # forward BTree index
+    _sorting_rev_index_attr = '_rev_index' # reverse BTree index
+
+    def sort(self, docids, reverse=False, limit=None):
+        if (limit is not None) and (limit < 1):
+            raise ValueError('limit value must be 1 or greater')
+
+        numdocs = getattr(self, self._sorting_num_docs_attr).value
+        if not numdocs:
+            raise StopIteration
+
+        if not isinstance(docids, (self.family.IF.Set, self.family.IF.TreeSet)):
+            docids = self.family.IF.Set(docids)
+        if not docids:
+            raise StopIteration
+
+        rlen = len(docids)
+
+        fwd_index = getattr(self, self._sorting_fwd_index_attr)
+        rev_index = getattr(self, self._sorting_rev_index_attr)
+        getValue = rev_index.get
+        marker = object()
+
+        # use_lazy and use_nbest computations lifted wholesale from
+        # Zope2 catalog without questioning reasoning
+        use_lazy = rlen > numdocs * (rlen / 100 + 1)
+        use_nbest = limit and limit * 4 < rlen
+
+        # overrides for unit tests
+        if getattr(self, '_use_lazy', False):
+            use_lazy = True
+        if getattr(self, '_use_nbest', False):
+            use_nbest = True
+        
+        if use_nbest:
+            # this is a sort with a limit that appears useful, try to
+            # take advantage of the fact that we can keep a smaller
+            # set of simultaneous values in memory; use generators
+            # and heapq functions to do so.
+
+            def nsort():
+                for docid in docids:
+                    val = getValue(docid, marker)
+                    if val is not marker:
+                        yield (val, docid)
+
+            iterable = nsort()
+
+            if reverse:
+                # we use a generator as an iterable in the reverse
+                # sort case because the nlargest implementation does
+                # not manifest the whole thing into memory at once if
+                # we do so.
+                for val in heapq.nlargest(limit, iterable):
+                    yield val[1]
+            else:
+                # lifted from heapq.nsmallest
+                it = iter(iterable)
+                result = sorted(islice(it, 0, limit))
+                if not result:
+                    raise StopIteration
+                insort = bisect.insort
+                pop = result.pop
+                los = result[-1]    # los --> Largest of the nsmallest
+                for elem in it:
+                    if los <= elem:
+                        continue
+                    insort(result, elem)
+                    pop()
+                    los = result[-1]
+                for val in result:
+                    yield val[1]
+
+        else:
+            if use_lazy and not reverse:
+                # Since this the sort is not reversed, and the number
+                # of results in the search result set is much larger
+                # than the number of items in this index, we assume it
+                # will be fastest to iterate over all of our forward
+                # BTree's items instead of using a full sort, as our
+                # forward index is already sorted in ascending order
+                # by value. The Zope 2 catalog implementation claims
+                # that this case is rarely exercised in practice.
+                n = 0
+                for stored_docids in fwd_index.values():
+                    for docid in self.family.IF.intersection(docids, stored_docids):
+                        n += 1
+                        yield docid
+                        if limit and n >= limit:
+                            raise StopIteration
+            else:
+                # If the result set is not much larger than the number
+                # of documents in this index, or if we need to sort in
+                # reverse order, use a non-lazy sort.
+                n = 0
+                for docid in sorted(docids, key=getValue, reverse=reverse):
+                    if getValue(docid, marker) is not marker:
+                        n += 1
+                        yield docid
+                        if limit and n >= limit:
+                            raise StopIteration


Property changes on: zope.index/trunk/src/zope/index/field/sorting.py
___________________________________________________________________
Added: svn:keywords
   + Id



More information about the Checkins mailing list