[Checkins] SVN: Products.ZCatalog/trunk/ Extend BooleanIndex by making the indexed value variable instead of hardcoding it to `True`. The indexed value will determine the smaller set automatically and choose its best value. An inline switch is done once the indexed value set grows larger than 60% of the total length. 60% was chosen to avoid constant switching for indexes that have an almost equal distribution of `True/False`.
Hanno Schlichting
hannosch at hannosch.eu
Tue May 17 08:41:54 EDT 2011
Log message for revision 121686:
Extend BooleanIndex by making the indexed value variable instead of hardcoding it to `True`. The indexed value will determine the smaller set automatically and choose its best value. An inline switch is done once the indexed value set grows larger than 60% of the total length. 60% was chosen to avoid constant switching for indexes that have an almost equal distribution of `True/False`.
Changed:
U Products.ZCatalog/trunk/CHANGES.txt
U Products.ZCatalog/trunk/src/Products/PluginIndexes/BooleanIndex/BooleanIndex.py
U Products.ZCatalog/trunk/src/Products/PluginIndexes/BooleanIndex/tests.py
-=-
Modified: Products.ZCatalog/trunk/CHANGES.txt
===================================================================
--- Products.ZCatalog/trunk/CHANGES.txt 2011-05-17 12:33:54 UTC (rev 121685)
+++ Products.ZCatalog/trunk/CHANGES.txt 2011-05-17 12:41:54 UTC (rev 121686)
@@ -4,6 +4,13 @@
2.13.14 (unreleased)
--------------------
+- Extend BooleanIndex by making the indexed value variable instead of
+ hardcoding it to `True`. The indexed value will determine the smaller set
+ automatically and choose its best value. An inline switch is done once the
+ indexed value set grows larger than 60% of the total length. 60% was chosen
+ to avoid constant switching for indexes that have an almost equal
+ distribution of `True/False`.
+
- Substitute catalog entry in UUIDIndex error message.
2.13.13 (2011-05-04)
Modified: Products.ZCatalog/trunk/src/Products/PluginIndexes/BooleanIndex/BooleanIndex.py
===================================================================
--- Products.ZCatalog/trunk/src/Products/PluginIndexes/BooleanIndex/BooleanIndex.py 2011-05-17 12:33:54 UTC (rev 121685)
+++ Products.ZCatalog/trunk/src/Products/PluginIndexes/BooleanIndex/BooleanIndex.py 2011-05-17 12:41:54 UTC (rev 121686)
@@ -32,6 +32,9 @@
self._index = set([documentId1, documentId2])
self._unindex = {documentId:[True/False]}
+ self._length is the length of the unindex
+ self._index_length is the length of the index
+
False doesn't have actual entries in _index.
"""
@@ -50,33 +53,93 @@
manage_main._setName('manage_main')
manage_browse = DTMLFile('../dtml/browseIndex', globals())
+ _index_value = 1
+ _index_length = None
+
def clear(self):
- self._length = BTrees.Length.Length()
self._index = IITreeSet()
+ self._index_length = BTrees.Length.Length()
+ self._index_value = 1
self._unindex = IIBTree()
+ self._length = BTrees.Length.Length()
+ def histogram(self):
+ """Return a mapping which provides a histogram of the number of
+ elements found at each point in the index.
+ """
+ histogram = {}
+ indexed = bool(self._index_value)
+ histogram[indexed] = self._index_length.value
+ histogram[not indexed] = self._length.value - self._index_length.value
+ return histogram
+
+ def _invert_index(self, documentId):
+ self._index_value = indexed = int(not self._index_value)
+ self._index.clear()
+ length = 0
+ for rid, value in self._unindex.iteritems():
+ # documentId is the rid of the currently processed object that
+ # triggered the invert. in the case of unindexing, the rid hasn't
+ # been removed from the unindex yet. While indexing, the rid will
+ # be added to the index and unindex after this method is done
+ if value == indexed and rid != documentId:
+ self._index.add(rid)
+ length += 1
+ self._index_length = BTrees.Length.Length(length)
+
def insertForwardIndexEntry(self, entry, documentId):
- """If True, insert directly into treeset
+ """If the value matches the indexed one, insert into treeset
"""
- if entry:
+ # when we get the first entry, decide to index the opposite of what
+ # we got, as indexing zero items is fewer than one
+ length = self._length.value
+ index_length = self._index_length.value
+ if length == 0:
+ self._index_value = int(not bool(entry))
+
+ if bool(entry) is bool(self._index_value):
+ # is the index (after adding the current entry) larger than 60%
+ # of the total length? than switch the indexed value
+ if (index_length + 1) >= ((length + 1) * 0.6):
+ self._invert_index(documentId)
+ return
+
self._index.insert(documentId)
- self._length.change(1)
+ # BBB inline migration
+ length = self._index_length
+ if length is None:
+ self._index_length = BTrees.Length.Length(len(self._index))
+ else:
+ length.change(1)
- def removeForwardIndexEntry(self, entry, documentId):
+ def removeForwardIndexEntry(self, entry, documentId, check=True):
"""Take the entry provided and remove any reference to documentId
in its entry in the index.
"""
- try:
- if entry:
+ if bool(entry) is bool(self._index_value):
+ try:
self._index.remove(documentId)
- self._length.change(-1)
- except ConflictError:
- raise
- except Exception:
- LOG.exception('%s: unindex_object could not remove '
- 'documentId %s from index %s. This '
- 'should not happen.' % (self.__class__.__name__,
- str(documentId), str(self.id)))
+ # BBB inline migration
+ length = self._index_length
+ if length is None:
+ self._index_length = BTrees.Length.Length(len(self._index))
+ else:
+ length.change(-1)
+ except ConflictError:
+ raise
+ except Exception:
+ LOG.exception('%s: unindex_object could not remove '
+ 'documentId %s from index %s. This '
+ 'should not happen.' % (self.__class__.__name__,
+ str(documentId), str(self.id)))
+ elif check:
+ length = self._length.value
+ index_length = self._index_length.value
+ # is the index (after removing the current entry) larger than
+ # 60% of the total length? than switch the indexed value
+ if (index_length) <= ((length - 1) * 0.6):
+ self._invert_index(documentId)
+ return
def _index_object(self, documentId, obj, threshold=None, attr=''):
""" index and object 'obj' with integer id 'documentId'"""
@@ -94,10 +157,11 @@
oldDatum = self._unindex.get(documentId, _marker)
if datum != oldDatum:
if oldDatum is not _marker:
- self.removeForwardIndexEntry(oldDatum, documentId)
+ self.removeForwardIndexEntry(oldDatum, documentId, check=False)
if datum is _marker:
try:
del self._unindex[documentId]
+ self._length.change(-1)
except ConflictError:
raise
except Exception:
@@ -106,24 +170,44 @@
documentId)
if datum is not _marker:
- if datum:
- self.insertForwardIndexEntry(datum, documentId)
+ self.insertForwardIndexEntry(datum, documentId)
self._unindex[documentId] = datum
+ self._length.change(1)
returnStatus = 1
return returnStatus
+ def unindex_object(self, documentId):
+ """ Unindex the object with integer id 'documentId' and don't
+ raise an exception if we fail
+ """
+ unindexRecord = self._unindex.get(documentId, _marker)
+ if unindexRecord is _marker:
+ return None
+
+ self.removeForwardIndexEntry(unindexRecord, documentId)
+
+ try:
+ del self._unindex[documentId]
+ self._length.change(-1)
+ except ConflictError:
+ raise
+ except:
+ LOG.debug('Attempt to unindex nonexistent document'
+ ' with id %s' % documentId,exc_info=True)
+
def _apply_index(self, request, resultset=None):
record = parseIndexRequest(request, self.id, self.query_options)
if record.keys is None:
return None
index = self._index
+ indexed = self._index_value
for key in record.keys:
- if key:
- # If True, check index
+ if bool(key) is bool(indexed):
+ # If we match the indexed value, check index
return (intersection(index, resultset), (self.id, ))
else:
# Otherwise, remove from resultset or _unindex
@@ -139,11 +223,14 @@
return 2
def items(self):
- items = []
- for v, k in self._unindex.items():
- if isinstance(v, int):
- v = IISet((v, ))
- items.append((k, v))
+ # return a list of value to int set of rid tuples
+ indexed = self._index_value
+ items = [(bool(indexed), self._index.keys())]
+ false = IISet()
+ for rid, value in self._unindex.iteritems():
+ if value != indexed:
+ false.add(rid)
+ items.append((not bool(indexed), false))
return items
manage_addBooleanIndexForm = DTMLFile('dtml/addBooleanIndex', globals())
Modified: Products.ZCatalog/trunk/src/Products/PluginIndexes/BooleanIndex/tests.py
===================================================================
--- Products.ZCatalog/trunk/src/Products/PluginIndexes/BooleanIndex/tests.py 2011-05-17 12:33:54 UTC (rev 121685)
+++ Products.ZCatalog/trunk/src/Products/PluginIndexes/BooleanIndex/tests.py 2011-05-17 12:41:54 UTC (rev 121686)
@@ -36,15 +36,15 @@
index = self._makeOne()
obj = Dummy(1, True)
index._index_object(obj.id, obj, attr='truth')
- self.failUnless(1 in index._unindex)
- self.failUnless(1 in index._index)
+ self.assertTrue(1 in index._unindex)
+ self.assertFalse(1 in index._index)
def test_index_false(self):
index = self._makeOne()
obj = Dummy(1, False)
index._index_object(obj.id, obj, attr='truth')
- self.failUnless(1 in index._unindex)
- self.failIf(1 in index._index)
+ self.assertTrue(1 in index._unindex)
+ self.assertFalse(1 in index._index)
def test_index_missing_attribute(self):
index = self._makeOne()
@@ -61,8 +61,8 @@
index._index_object(obj.id, obj, attr='truth')
res, idx = index._apply_index({'truth': True})
- self.failUnlessEqual(idx, ('truth', ))
- self.failUnlessEqual(list(res), [1])
+ self.assertEqual(idx, ('truth', ))
+ self.assertEqual(list(res), [1])
def test_search_false(self):
index = self._makeOne()
@@ -72,8 +72,8 @@
index._index_object(obj.id, obj, attr='truth')
res, idx = index._apply_index({'truth': False})
- self.failUnlessEqual(idx, ('truth', ))
- self.failUnlessEqual(list(res), [2])
+ self.assertEqual(idx, ('truth', ))
+ self.assertEqual(list(res), [2])
def test_search_inputresult(self):
index = self._makeOne()
@@ -83,27 +83,121 @@
index._index_object(obj.id, obj, attr='truth')
res, idx = index._apply_index({'truth': True}, resultset=IISet([]))
- self.failUnlessEqual(idx, ('truth', ))
- self.failUnlessEqual(list(res), [])
+ self.assertEqual(idx, ('truth', ))
+ self.assertEqual(list(res), [])
res, idx = index._apply_index({'truth': True}, resultset=IISet([2]))
- self.failUnlessEqual(idx, ('truth', ))
- self.failUnlessEqual(list(res), [])
+ self.assertEqual(idx, ('truth', ))
+ self.assertEqual(list(res), [])
res, idx = index._apply_index({'truth': True}, resultset=IISet([1]))
- self.failUnlessEqual(idx, ('truth', ))
- self.failUnlessEqual(list(res), [1])
+ self.assertEqual(idx, ('truth', ))
+ self.assertEqual(list(res), [1])
res, idx = index._apply_index({'truth': True}, resultset=IISet([1, 2]))
- self.failUnlessEqual(idx, ('truth', ))
- self.failUnlessEqual(list(res), [1])
+ self.assertEqual(idx, ('truth', ))
+ self.assertEqual(list(res), [1])
res, idx = index._apply_index({'truth': False},
resultset=IISet([1, 2]))
- self.failUnlessEqual(idx, ('truth', ))
- self.failUnlessEqual(list(res), [2])
+ self.assertEqual(idx, ('truth', ))
+ self.assertEqual(list(res), [2])
+ def test_index_many_true(self):
+ index = self._makeOne()
+ for i in range(0, 100):
+ obj = Dummy(i, i < 80 and True or False)
+ index._index_object(obj.id, obj, attr='truth')
+ self.assertEqual(list(index._index), range(80, 100))
+ self.assertEqual(len(index._unindex), 100)
+ res, idx = index._apply_index({'truth': True})
+ self.assertEqual(list(res), range(0, 80))
+ res, idx = index._apply_index({'truth': False})
+ self.assertEqual(list(res), range(80, 100))
+
+ def test_index_many_false(self):
+ index = self._makeOne()
+ for i in range(0, 100):
+ obj = Dummy(i, i >= 80 and True or False)
+ index._index_object(obj.id, obj, attr='truth')
+ self.assertEqual(list(index._index), range(80, 100))
+ self.assertEqual(len(index._unindex), 100)
+
+ res, idx = index._apply_index({'truth': False})
+ self.assertEqual(list(res), range(0, 80))
+ res, idx = index._apply_index({'truth': True})
+ self.assertEqual(list(res), range(80, 100))
+
+ def test_index_many_change(self):
+ index = self._makeOne()
+ def add(i, value):
+ obj = Dummy(i, value)
+ index._index_object(obj.id, obj, attr='truth')
+ # First lets index only True values
+ for i in range(0, 4):
+ add(i, True)
+ self.assertEqual(list(index._index), [])
+ self.assertEqual(len(index._unindex), 4)
+ # Now add an equal number of False values
+ for i in range(4, 8):
+ add(i, False)
+ self.assertEqual(list(index._index), range(4, 8))
+ self.assertEqual(len(index._unindex), 8)
+ # Once False gets to be more than 60% of the indexed set, we switch
+ add(8, False)
+ self.assertEqual(list(index._index), range(4, 9))
+ add(9, False)
+ self.assertEqual(list(index._index), range(0, 4))
+ res, idx = index._apply_index({'truth': True})
+ self.assertEqual(list(res), range(0, 4))
+ res, idx = index._apply_index({'truth': False})
+ self.assertEqual(list(res), range(4, 10))
+ # and we can again switch if the percentages change again
+ for i in range(6, 10):
+ index.unindex_object(i)
+ self.assertEqual(list(index._index), range(4, 6))
+ self.assertEqual(len(index._unindex), 6)
+ res, idx = index._apply_index({'truth': True})
+ self.assertEqual(list(res), range(0, 4))
+ res, idx = index._apply_index({'truth': False})
+ self.assertEqual(list(res), range(4, 6))
+
+ def test_items(self):
+ index = self._makeOne()
+ # test empty
+ items = dict(index.items())
+ self.assertEqual(len(items[True]), 0)
+ self.assertEqual(len(items[False]), 0)
+ # test few trues
+ for i in range(0, 20):
+ obj = Dummy(i, i < 5 and True or False)
+ index._index_object(obj.id, obj, attr='truth')
+ items = dict(index.items())
+ self.assertEqual(len(items[True]), 5)
+ self.assertEqual(len(items[False]), 15)
+ # test many trues
+ for i in range(7, 20):
+ index.unindex_object(i)
+ items = dict(index.items())
+ self.assertEqual(len(items[True]), 5)
+ self.assertEqual(len(items[False]), 2)
+
+ def test_histogram(self):
+ index = self._makeOne()
+ # test empty
+ hist = index.histogram()
+ self.assertEqual(hist[True], 0)
+ self.assertEqual(hist[False], 0)
+ # test few trues
+ for i in range(0, 20):
+ obj = Dummy(i, i < 5 and True or False)
+ index._index_object(obj.id, obj, attr='truth')
+ hist = index.histogram()
+ self.assertEqual(hist[True], 5)
+ self.assertEqual(hist[False], 15)
+
+
def test_suite():
from unittest import TestSuite, makeSuite
suite = TestSuite()
More information about the checkins
mailing list