[Checkins] SVN: Products.ZCatalog/trunk/ Optimize sorting in presence of batching arguments. If a batch from the end of the result set is requested, we internally reverse the sorting order and at the end reverse the lazy sequence again. In a sequence with 100 entries, if we request the batch with items 80 to 90, we now reverse sort 20 items (100 to 80), slice of the first ten items and then reverse them. Before we would had to sort the first 90 items and then slice of the last 10.
Hanno Schlichting
hannosch at hannosch.eu
Wed Jan 26 17:47:17 EST 2011
Log message for revision 119954:
Optimize sorting in presence of batching arguments. If a batch from the end of the result set is requested, we internally reverse the sorting order and at the end reverse the lazy sequence again. In a sequence with 100 entries, if we request the batch with items 80 to 90, we now reverse sort 20 items (100 to 80), slice of the first ten items and then reverse them. Before we would had to sort the first 90 items and then slice of the last 10.
Changed:
U Products.ZCatalog/trunk/CHANGES.txt
U Products.ZCatalog/trunk/src/Products/ZCatalog/Catalog.py
U Products.ZCatalog/trunk/src/Products/ZCatalog/tests/test_catalog.py
-=-
Modified: Products.ZCatalog/trunk/CHANGES.txt
===================================================================
--- Products.ZCatalog/trunk/CHANGES.txt 2011-01-26 22:23:34 UTC (rev 119953)
+++ Products.ZCatalog/trunk/CHANGES.txt 2011-01-26 22:47:16 UTC (rev 119954)
@@ -4,8 +4,15 @@
2.13.4 (unreleased)
-------------------
+- Optimize sorting in presence of batching arguments. If a batch from the end
+ of the result set is requested, we internally reverse the sorting order and
+ at the end reverse the lazy sequence again. In a sequence with 100 entries,
+ if we request the batch with items 80 to 90, we now reverse sort 20 items
+ (100 to 80), slice of the first ten items and then reverse them. Before we
+ would had to sort the first 90 items and then slice of the last 10.
+
- If batching arguments are provided, limit the returned lazy sequence to the
- items in the required batch instead of returning trailing items falling
+ items in the required batch instead of returning leading items falling
outside of the requested batch.
- Fixed inline `IISet` to `IITreeSet` conversion code inside DateRangeIndex'
Modified: Products.ZCatalog/trunk/src/Products/ZCatalog/Catalog.py
===================================================================
--- Products.ZCatalog/trunk/src/Products/ZCatalog/Catalog.py 2011-01-26 22:23:34 UTC (rev 119953)
+++ Products.ZCatalog/trunk/src/Products/ZCatalog/Catalog.py 2011-01-26 22:47:16 UTC (rev 119954)
@@ -480,11 +480,14 @@
order.sort()
return [i[1] for i in order]
- def _limit_sequence(self, sequence, slen, b_start=0, b_size=None):
+ def _limit_sequence(self, sequence, slen, b_start=0, b_size=None,
+ switched_reverse=False):
if b_size is not None:
sequence = sequence[b_start:b_start + b_size]
if slen:
slen = len(sequence)
+ if switched_reverse:
+ sequence.reverse()
return (sequence, slen)
def search(self, query, sort_index=None, reverse=0, limit=None, merge=1):
@@ -687,6 +690,25 @@
if limit is not None and limit >= rlen:
limit = rlen
+ # if we want a batch from the end of the resultset, reverse sorting
+ # order and limit it, then reverse the resultset again
+ switched_reverse = False
+ if b_start and b_start > rlen / 2:
+ reverse = not reverse
+ switched_reverse = True
+ b_end = b_start + b_size
+ if b_end >= rlen:
+ overrun = rlen - b_end
+ if b_start >= rlen:
+ # bail out, we are outside the possible range
+ return LazyCat([], 0, actual_result_count)
+ else:
+ b_size += overrun
+ b_start = 0
+ else:
+ b_start = b_end - b_start
+ limit = b_start + b_size
+
if merge and limit is None and (
rlen > (len(sort_index) * (rlen / 100 + 1))):
# The result set is much larger than the sorted index,
@@ -721,7 +743,7 @@
else:
result.sort()
sequence, slen = self._limit_sequence(result, length, b_start,
- b_size)
+ b_size, switched_reverse)
result = LazyCat(LazyValues(sequence), slen, actual_result_count)
elif limit is None or (limit * 4 > rlen):
# Iterate over the result set getting sort keys from the index
@@ -744,11 +766,13 @@
result.sort()
if limit is not None:
result = result[:limit]
- sequence, _ = self._limit_sequence(result, 0, b_start, b_size)
+ sequence, _ = self._limit_sequence(result, 0, b_start, b_size,
+ switched_reverse)
result = LazyValues(sequence)
result.actual_result_count = actual_result_count
else:
- sequence, _ = self._limit_sequence(result, 0, b_start, b_size)
+ sequence, _ = self._limit_sequence(result, 0, b_start, b_size,
+ switched_reverse)
return sequence
elif reverse:
# Limit/sort results using N-Best algorithm
@@ -776,11 +800,13 @@
worst = keys[0]
result.reverse()
if merge:
- sequence, _ = self._limit_sequence(result, 0, b_start, b_size)
+ sequence, _ = self._limit_sequence(result, 0, b_start, b_size,
+ switched_reverse)
result = LazyValues(sequence)
result.actual_result_count = actual_result_count
else:
- sequence, _ = self._limit_sequence(result, 0, b_start, b_size)
+ sequence, _ = self._limit_sequence(result, 0, b_start, b_size,
+ switched_reverse)
return sequence
elif not reverse:
# Limit/sort results using N-Best algorithm in reverse (N-Worst?)
@@ -805,11 +831,13 @@
n += 1
best = keys[-1]
if merge:
- sequence, _ = self._limit_sequence(result, 0, b_start, b_size)
+ sequence, _ = self._limit_sequence(result, 0, b_start, b_size,
+ switched_reverse)
result = LazyValues(sequence)
result.actual_result_count = actual_result_count
else:
- sequence, _ = self._limit_sequence(result, 0, b_start, b_size)
+ sequence, _ = self._limit_sequence(result, 0, b_start, b_size,
+ switched_reverse)
return sequence
return LazyMap(self.__getitem__, result, len(result),
Modified: Products.ZCatalog/trunk/src/Products/ZCatalog/tests/test_catalog.py
===================================================================
--- Products.ZCatalog/trunk/src/Products/ZCatalog/tests/test_catalog.py 2011-01-26 22:23:34 UTC (rev 119953)
+++ Products.ZCatalog/trunk/src/Products/ZCatalog/tests/test_catalog.py 2011-01-26 22:47:16 UTC (rev 119954)
@@ -361,12 +361,30 @@
self.assertEqual(result.actual_result_count, 100)
self.assertEqual([r.num for r in result], range(11, 28))
+ def testSortLimitViaBatchingArgsSecondHalf(self):
+ query = dict(att1='att1', sort_on='num', b_start=70, b_size=15)
+ result = self._catalog(query)
+ self.assertEqual(result.actual_result_count, 100)
+ self.assertEqual([r.num for r in result], range(70, 85))
+
def testSortLimitViaBatchingArgsEnd(self):
+ query = dict(att1='att1', sort_on='num', b_start=90, b_size=10)
+ result = self._catalog(query)
+ self.assertEqual(result.actual_result_count, 100)
+ self.assertEqual([r.num for r in result], range(90, 100))
+
+ def testSortLimitViaBatchingArgsOverEnd(self):
query = dict(att1='att1', sort_on='num', b_start=90, b_size=15)
result = self._catalog(query)
self.assertEqual(result.actual_result_count, 100)
self.assertEqual([r.num for r in result], range(90, 100))
+ def testSortLimitViaBatchingArgsOutside(self):
+ query = dict(att1='att1', sort_on='num', b_start=110, b_size=10)
+ result = self._catalog(query)
+ self.assertEqual(result.actual_result_count, 100)
+ self.assertEqual([r.num for r in result], [])
+
# _get_sort_attr
# _getSortIndex
# searchResults
More information about the checkins
mailing list