[Checkins] SVN: Products.ZCatalog/trunk/ Optimize sorting in presence of batching arguments. If a batch from the end of the result set is requested, we internally reverse the sorting order and at the end reverse the lazy sequence again. In a sequence with 100 entries, if we request the batch with items 80 to 90, we now reverse sort 20 items (100 to 80), slice of the first ten items and then reverse them. Before we would had to sort the first 90 items and then slice of the last 10.

Hanno Schlichting hannosch at hannosch.eu
Wed Jan 26 17:47:17 EST 2011


Log message for revision 119954:
  Optimize sorting in presence of batching arguments. If a batch from the end of the result set is requested, we internally reverse the sorting order and at the end reverse the lazy sequence again. In a sequence with 100 entries, if we request the batch with items 80 to 90, we now reverse sort 20 items (100 to 80), slice of the first ten items and then reverse them. Before we would had to sort the first 90 items and then slice of the last 10.
  

Changed:
  U   Products.ZCatalog/trunk/CHANGES.txt
  U   Products.ZCatalog/trunk/src/Products/ZCatalog/Catalog.py
  U   Products.ZCatalog/trunk/src/Products/ZCatalog/tests/test_catalog.py

-=-
Modified: Products.ZCatalog/trunk/CHANGES.txt
===================================================================
--- Products.ZCatalog/trunk/CHANGES.txt	2011-01-26 22:23:34 UTC (rev 119953)
+++ Products.ZCatalog/trunk/CHANGES.txt	2011-01-26 22:47:16 UTC (rev 119954)
@@ -4,8 +4,15 @@
 2.13.4 (unreleased)
 -------------------
 
+- Optimize sorting in presence of batching arguments. If a batch from the end
+  of the result set is requested, we internally reverse the sorting order and
+  at the end reverse the lazy sequence again. In a sequence with 100 entries,
+  if we request the batch with items 80 to 90, we now reverse sort 20 items
+  (100 to 80), slice of the first ten items and then reverse them. Before we
+  would had to sort the first 90 items and then slice of the last 10.
+
 - If batching arguments are provided, limit the returned lazy sequence to the
-  items in the required batch instead of returning trailing items falling
+  items in the required batch instead of returning leading items falling
   outside of the requested batch.
 
 - Fixed inline `IISet` to `IITreeSet` conversion code inside DateRangeIndex'

Modified: Products.ZCatalog/trunk/src/Products/ZCatalog/Catalog.py
===================================================================
--- Products.ZCatalog/trunk/src/Products/ZCatalog/Catalog.py	2011-01-26 22:23:34 UTC (rev 119953)
+++ Products.ZCatalog/trunk/src/Products/ZCatalog/Catalog.py	2011-01-26 22:47:16 UTC (rev 119954)
@@ -480,11 +480,14 @@
         order.sort()
         return [i[1] for i in order]
 
-    def _limit_sequence(self, sequence, slen, b_start=0, b_size=None):
+    def _limit_sequence(self, sequence, slen, b_start=0, b_size=None,
+                        switched_reverse=False):
         if b_size is not None:
             sequence = sequence[b_start:b_start + b_size]
             if slen:
                 slen = len(sequence)
+        if switched_reverse:
+            sequence.reverse()
         return (sequence, slen)
 
     def search(self, query, sort_index=None, reverse=0, limit=None, merge=1):
@@ -687,6 +690,25 @@
         if limit is not None and limit >= rlen:
             limit = rlen
 
+        # if we want a batch from the end of the resultset, reverse sorting
+        # order and limit it, then reverse the resultset again
+        switched_reverse = False
+        if b_start and b_start > rlen / 2:
+            reverse = not reverse
+            switched_reverse = True
+            b_end = b_start + b_size
+            if b_end >= rlen:
+                overrun = rlen - b_end
+                if b_start >= rlen:
+                    # bail out, we are outside the possible range
+                    return LazyCat([], 0, actual_result_count)
+                else:
+                    b_size += overrun
+                b_start = 0
+            else:
+                b_start = b_end - b_start
+            limit = b_start + b_size
+
         if merge and limit is None and (
             rlen > (len(sort_index) * (rlen / 100 + 1))):
             # The result set is much larger than the sorted index,
@@ -721,7 +743,7 @@
             else:
                 result.sort()
             sequence, slen = self._limit_sequence(result, length, b_start,
-                b_size)
+                b_size, switched_reverse)
             result = LazyCat(LazyValues(sequence), slen, actual_result_count)
         elif limit is None or (limit * 4 > rlen):
             # Iterate over the result set getting sort keys from the index
@@ -744,11 +766,13 @@
                     result.sort()
                 if limit is not None:
                     result = result[:limit]
-                sequence, _ = self._limit_sequence(result, 0, b_start, b_size)
+                sequence, _ = self._limit_sequence(result, 0, b_start, b_size,
+                    switched_reverse)
                 result = LazyValues(sequence)
                 result.actual_result_count = actual_result_count
             else:
-                sequence, _ = self._limit_sequence(result, 0, b_start, b_size)
+                sequence, _ = self._limit_sequence(result, 0, b_start, b_size,
+                    switched_reverse)
                 return sequence
         elif reverse:
             # Limit/sort results using N-Best algorithm
@@ -776,11 +800,13 @@
                     worst = keys[0]
             result.reverse()
             if merge:
-                sequence, _ = self._limit_sequence(result, 0, b_start, b_size)
+                sequence, _ = self._limit_sequence(result, 0, b_start, b_size,
+                    switched_reverse)
                 result = LazyValues(sequence)
                 result.actual_result_count = actual_result_count
             else:
-                sequence, _ = self._limit_sequence(result, 0, b_start, b_size)
+                sequence, _ = self._limit_sequence(result, 0, b_start, b_size,
+                    switched_reverse)
                 return sequence
         elif not reverse:
             # Limit/sort results using N-Best algorithm in reverse (N-Worst?)
@@ -805,11 +831,13 @@
                         n += 1
                     best = keys[-1]
             if merge:
-                sequence, _ = self._limit_sequence(result, 0, b_start, b_size)
+                sequence, _ = self._limit_sequence(result, 0, b_start, b_size,
+                    switched_reverse)
                 result = LazyValues(sequence)
                 result.actual_result_count = actual_result_count
             else:
-                sequence, _ = self._limit_sequence(result, 0, b_start, b_size)
+                sequence, _ = self._limit_sequence(result, 0, b_start, b_size,
+                    switched_reverse)
                 return sequence
 
         return LazyMap(self.__getitem__, result, len(result),

Modified: Products.ZCatalog/trunk/src/Products/ZCatalog/tests/test_catalog.py
===================================================================
--- Products.ZCatalog/trunk/src/Products/ZCatalog/tests/test_catalog.py	2011-01-26 22:23:34 UTC (rev 119953)
+++ Products.ZCatalog/trunk/src/Products/ZCatalog/tests/test_catalog.py	2011-01-26 22:47:16 UTC (rev 119954)
@@ -361,12 +361,30 @@
         self.assertEqual(result.actual_result_count, 100)
         self.assertEqual([r.num for r in result], range(11, 28))
 
+    def testSortLimitViaBatchingArgsSecondHalf(self):
+        query = dict(att1='att1', sort_on='num', b_start=70, b_size=15)
+        result = self._catalog(query)
+        self.assertEqual(result.actual_result_count, 100)
+        self.assertEqual([r.num for r in result], range(70, 85))
+
     def testSortLimitViaBatchingArgsEnd(self):
+        query = dict(att1='att1', sort_on='num', b_start=90, b_size=10)
+        result = self._catalog(query)
+        self.assertEqual(result.actual_result_count, 100)
+        self.assertEqual([r.num for r in result], range(90, 100))
+
+    def testSortLimitViaBatchingArgsOverEnd(self):
         query = dict(att1='att1', sort_on='num', b_start=90, b_size=15)
         result = self._catalog(query)
         self.assertEqual(result.actual_result_count, 100)
         self.assertEqual([r.num for r in result], range(90, 100))
 
+    def testSortLimitViaBatchingArgsOutside(self):
+        query = dict(att1='att1', sort_on='num', b_start=110, b_size=10)
+        result = self._catalog(query)
+        self.assertEqual(result.actual_result_count, 100)
+        self.assertEqual([r.num for r in result], [])
+
     # _get_sort_attr
     # _getSortIndex
     # searchResults



More information about the checkins mailing list