[Checkins] SVN: z3c.indexer/trunk/ implemented TextIndex with family64 BTree support which makes it
Roger Ineichen
roger at projekt01.ch
Sun Dec 27 09:09:08 EST 2009
Log message for revision 107173:
implemented TextIndex with family64 BTree support which makes it
possible to use long as index ids. Added tests for the new TextIndex64.
Changed:
U z3c.indexer/trunk/CHANGES.txt
U z3c.indexer/trunk/src/z3c/indexer/index.py
U z3c.indexer/trunk/src/z3c/indexer/interfaces.py
U z3c.indexer/trunk/src/z3c/indexer/tests.py
-=-
Modified: z3c.indexer/trunk/CHANGES.txt
===================================================================
--- z3c.indexer/trunk/CHANGES.txt 2009-12-27 13:45:38 UTC (rev 107172)
+++ z3c.indexer/trunk/CHANGES.txt 2009-12-27 14:09:08 UTC (rev 107173)
@@ -2,11 +2,13 @@
CHANGES
=======
-0.6.0dev (unreleased)
----------------------
+0.6.1 (unreleased)
+------------------
-- ...
+- Feature: implemented TextIndex with family64 BTree support which makes it
+ possible to use long as index ids. Added tests for the new TextIndex64.
+
0.6.0 (2009-02-22)
------------------
@@ -33,6 +35,7 @@
- Use new lightweight and ZMI-less versions of packages: zope.keyreference,
zope.intid, zope.site, zope.container and zope.catalog (for tests).
+
0.5.1 (2008-12-11)
------------------
Modified: z3c.indexer/trunk/src/z3c/indexer/index.py
===================================================================
--- z3c.indexer/trunk/src/z3c/indexer/index.py 2009-12-27 13:45:38 UTC (rev 107172)
+++ z3c.indexer/trunk/src/z3c/indexer/index.py 2009-12-27 14:09:08 UTC (rev 107173)
@@ -16,14 +16,19 @@
"""
__docformat__ = "reStructuredText"
+import BTrees
from BTrees.IFBTree import IFBTree
from BTrees.IFBTree import union
from BTrees.IFBTree import difference
import zope.interface
+from zope.container import contained
from zope.index.field import index as fieldindex
+from zope.index.text import lexicon as lex
from zope.index.text import textindex
-from zope.container import contained
+from BTrees.Length import Length
+from zope.index.text import okapiindex
+
from zc.catalog import index as zcindex
from z3c.indexer import interfaces
@@ -46,6 +51,44 @@
zope.interface.implements(interfaces.ITextIndex)
+class Lexicon64(lex.Lexicon):
+ """Lexicon with long key support used by p01.oid."""
+
+ def __init__(self, *pipeline):
+ family64 = BTrees.family64
+ #self._wids = OIBTree() # word -> wid
+ self._wids = family64.OI.BTree()
+ #self._words = IOBTree() # wid -> word
+ self._words = family64.IO.BTree()
+ # wid 0 is reserved for words that aren't in the lexicon (OOV -- out
+ # of vocabulary). This can happen, e.g., if a query contains a word
+ # we never saw before, and that isn't a known stopword (or otherwise
+ # filtered out). Returning a special wid value for OOV words is a
+ # way to let clients know when an OOV word appears.
+ self.wordCount = Length()
+ self._pipeline = pipeline
+
+
+class TextIndex64(TextIndex):
+ """TextIndex with long key support used by p01.oid."""
+
+ zope.interface.implements(interfaces.ITextIndex64)
+
+ def __init__(self, lexicon=None, index=None):
+ """Provisional constructor.
+
+ This creates the lexicon and index if not passed in.
+ """
+ family64 = BTrees.family64
+ if lexicon is None:
+ lexicon = Lexicon64(lex.Splitter(), lex.CaseNormalizer(),
+ lex.StopWordRemover())
+ if index is None:
+ index = okapiindex.OkapiIndex(lexicon, family64)
+ self.lexicon = lexicon
+ self.index = index
+
+
class FieldIndex(IndexMixin, fieldindex.FieldIndex,
contained.Contained):
"""Field index based on zope.index.field.index.TextIndex
@@ -61,7 +104,7 @@
def applyNotEq(self, not_value):
all = self.apply((None, None))
r = self.apply((not_value, not_value))
- return difference(all, r)
+ return self.family.IF.difference(all, r)
def applyBetween(self, min_value, max_value, exclude_min=False,
exclude_max=False):
@@ -84,11 +127,11 @@
if not results:
# no applicable terms at all
- return IFBTree()
+ return self.family.IF.BTree()
result = results.pop(0)
for res in results:
- result = union(result, res)
+ result = self.family.IF.union(result, res)
return result
Modified: z3c.indexer/trunk/src/z3c/indexer/interfaces.py
===================================================================
--- z3c.indexer/trunk/src/z3c/indexer/interfaces.py 2009-12-27 13:45:38 UTC (rev 107172)
+++ z3c.indexer/trunk/src/z3c/indexer/interfaces.py 2009-12-27 14:09:08 UTC (rev 107173)
@@ -144,6 +144,10 @@
"""Apply text query."""
+class ITextIndex64(ITextIndex):
+ """ITextIndex with family 64 BTree support."""
+
+
class IFieldIndex(IIndex):
"""Value index."""
Modified: z3c.indexer/trunk/src/z3c/indexer/tests.py
===================================================================
--- z3c.indexer/trunk/src/z3c/indexer/tests.py 2009-12-27 13:45:38 UTC (rev 107172)
+++ z3c.indexer/trunk/src/z3c/indexer/tests.py 2009-12-27 14:09:08 UTC (rev 107173)
@@ -17,8 +17,9 @@
__docformat__ = "reStructuredText"
import unittest
+import doctest
+import BTrees
import zope.component
-from zope.testing import doctest
from zope.intid import IntIds
from zope.intid.interfaces import IIntIds
from zope.keyreference.interfaces import IKeyReference
@@ -60,6 +61,46 @@
return index.TextIndex
+class TestTextIndex64(z3c.testing.InterfaceBaseTest):
+
+ def getTestInterface(self):
+ return interfaces.ITextIndex64
+
+ def getTestClass(self):
+ return index.TextIndex64
+
+ def test_long_key(self):
+ idx = self.makeTestObject()
+
+ # test int as id
+ intID = int(42)
+ idx.index_doc(intID, u'foo')
+ self.assertEqual(idx.documentCount(), 1)
+ # test query
+ self.assertEqual(len(idx.apply(u'foo')), 1)
+ self.assertEqual(len(idx.apply(u'bar')), 0)
+ # test btree type
+ self.assertEqual(type(idx.apply(u'foo')), BTrees.LFBTree.LFBucket)
+ self.assertEqual(type(idx.apply(u'bar')), BTrees.LFBTree.LFBucket)
+ # test unindex with long as id
+ idx.unindex_doc(intID)
+ self.assertEqual(idx.documentCount(), 0)
+
+ # test long as id
+ longID = int(123456789123456789)
+ idx.index_doc(longID, u'foofoo')
+ self.assertEqual(idx.documentCount(), 1)
+ # test query
+ self.assertEqual(len(idx.apply(u'foofoo')), 1)
+ self.assertEqual(len(idx.apply(u'barbar')), 0)
+ # test btree type
+ self.assertEqual(type(idx.apply(u'foofoo')), BTrees.LFBTree.LFBucket)
+ self.assertEqual(type(idx.apply(u'barbar')), BTrees.LFBTree.LFBucket)
+ # test unindex with long as id
+ idx.unindex_doc(longID)
+ self.assertEqual(idx.documentCount(), 0)
+
+
class TestFieldIndex(z3c.testing.InterfaceBaseTest):
def getTestInterface(self):
@@ -268,6 +309,7 @@
optionflags=doctest.NORMALIZE_WHITESPACE|doctest.ELLIPSIS,
),
unittest.makeSuite(TestTextIndex),
+ unittest.makeSuite(TestTextIndex64),
unittest.makeSuite(TestFieldIndex),
unittest.makeSuite(TestValueIndex),
unittest.makeSuite(TestSetIndex),
More information about the checkins
mailing list