[Checkins] SVN: z3c.indexer/trunk/ implemented TextIndex with family64 BTree support which makes it

Roger Ineichen roger at projekt01.ch
Sun Dec 27 09:09:08 EST 2009


Log message for revision 107173:
  implemented TextIndex with family64 BTree support which makes it
  possible to use long as index ids. Added tests for the new TextIndex64.

Changed:
  U   z3c.indexer/trunk/CHANGES.txt
  U   z3c.indexer/trunk/src/z3c/indexer/index.py
  U   z3c.indexer/trunk/src/z3c/indexer/interfaces.py
  U   z3c.indexer/trunk/src/z3c/indexer/tests.py

-=-
Modified: z3c.indexer/trunk/CHANGES.txt
===================================================================
--- z3c.indexer/trunk/CHANGES.txt	2009-12-27 13:45:38 UTC (rev 107172)
+++ z3c.indexer/trunk/CHANGES.txt	2009-12-27 14:09:08 UTC (rev 107173)
@@ -2,11 +2,13 @@
 CHANGES
 =======
 
-0.6.0dev (unreleased)
----------------------
+0.6.1 (unreleased)
+------------------
 
-- ...
+- Feature: implemented TextIndex with family64 BTree support which makes it
+  possible to use long as index ids. Added tests for the new TextIndex64.
 
+
 0.6.0 (2009-02-22)
 ------------------
 
@@ -33,6 +35,7 @@
 - Use new lightweight and ZMI-less versions of packages: zope.keyreference,
   zope.intid, zope.site, zope.container and zope.catalog (for tests).
 
+
 0.5.1 (2008-12-11)
 ------------------
 

Modified: z3c.indexer/trunk/src/z3c/indexer/index.py
===================================================================
--- z3c.indexer/trunk/src/z3c/indexer/index.py	2009-12-27 13:45:38 UTC (rev 107172)
+++ z3c.indexer/trunk/src/z3c/indexer/index.py	2009-12-27 14:09:08 UTC (rev 107173)
@@ -16,14 +16,19 @@
 """
 __docformat__ = "reStructuredText"
 
+import BTrees
 from BTrees.IFBTree import IFBTree
 from BTrees.IFBTree import union
 from BTrees.IFBTree import difference
 
 import zope.interface
+from zope.container import contained
 from zope.index.field import index as fieldindex
+from zope.index.text import lexicon as lex
 from zope.index.text import textindex
-from zope.container import contained
+from BTrees.Length import Length
+from zope.index.text import okapiindex
+
 from zc.catalog import index as zcindex
 from z3c.indexer import interfaces
 
@@ -46,6 +51,44 @@
     zope.interface.implements(interfaces.ITextIndex)
 
 
+class Lexicon64(lex.Lexicon):
+    """Lexicon with long key support used by p01.oid."""
+
+    def __init__(self, *pipeline):
+        family64 = BTrees.family64
+        #self._wids = OIBTree()  # word -> wid
+        self._wids = family64.OI.BTree()
+        #self._words = IOBTree() # wid -> word
+        self._words = family64.IO.BTree()
+        # wid 0 is reserved for words that aren't in the lexicon (OOV -- out
+        # of vocabulary).  This can happen, e.g., if a query contains a word
+        # we never saw before, and that isn't a known stopword (or otherwise
+        # filtered out).  Returning a special wid value for OOV words is a
+        # way to let clients know when an OOV word appears.
+        self.wordCount = Length()
+        self._pipeline = pipeline
+
+    
+class TextIndex64(TextIndex):
+    """TextIndex with long key support used by p01.oid."""
+
+    zope.interface.implements(interfaces.ITextIndex64)
+
+    def __init__(self, lexicon=None, index=None):
+        """Provisional constructor.
+
+        This creates the lexicon and index if not passed in.
+        """
+        family64 = BTrees.family64
+        if lexicon is None:
+            lexicon = Lexicon64(lex.Splitter(), lex.CaseNormalizer(),
+                lex.StopWordRemover())
+        if index is None:
+            index = okapiindex.OkapiIndex(lexicon, family64)
+        self.lexicon = lexicon
+        self.index = index
+
+
 class FieldIndex(IndexMixin, fieldindex.FieldIndex,
     contained.Contained):
     """Field index based on zope.index.field.index.TextIndex
@@ -61,7 +104,7 @@
     def applyNotEq(self, not_value):
         all = self.apply((None, None))
         r = self.apply((not_value, not_value))
-        return difference(all, r)
+        return self.family.IF.difference(all, r)
 
     def applyBetween(self, min_value, max_value, exclude_min=False,
         exclude_max=False):
@@ -84,11 +127,11 @@
 
         if not results:
             # no applicable terms at all
-            return IFBTree()
+            return self.family.IF.BTree()
 
         result = results.pop(0)
         for res in results:
-            result = union(result, res)
+            result = self.family.IF.union(result, res)
         return result
 
 

Modified: z3c.indexer/trunk/src/z3c/indexer/interfaces.py
===================================================================
--- z3c.indexer/trunk/src/z3c/indexer/interfaces.py	2009-12-27 13:45:38 UTC (rev 107172)
+++ z3c.indexer/trunk/src/z3c/indexer/interfaces.py	2009-12-27 14:09:08 UTC (rev 107173)
@@ -144,6 +144,10 @@
         """Apply text query."""
 
 
+class ITextIndex64(ITextIndex):
+    """ITextIndex with family 64 BTree support."""
+
+
 class IFieldIndex(IIndex):
     """Value index."""
 

Modified: z3c.indexer/trunk/src/z3c/indexer/tests.py
===================================================================
--- z3c.indexer/trunk/src/z3c/indexer/tests.py	2009-12-27 13:45:38 UTC (rev 107172)
+++ z3c.indexer/trunk/src/z3c/indexer/tests.py	2009-12-27 14:09:08 UTC (rev 107173)
@@ -17,8 +17,9 @@
 __docformat__ = "reStructuredText"
 
 import unittest
+import doctest
+import BTrees
 import zope.component
-from zope.testing import doctest
 from zope.intid import IntIds
 from zope.intid.interfaces import IIntIds
 from zope.keyreference.interfaces import IKeyReference
@@ -60,6 +61,46 @@
         return index.TextIndex
 
 
+class TestTextIndex64(z3c.testing.InterfaceBaseTest):
+
+    def getTestInterface(self):
+        return interfaces.ITextIndex64
+
+    def getTestClass(self):
+        return index.TextIndex64
+
+    def test_long_key(self):
+        idx = self.makeTestObject()
+
+        # test int as id
+        intID = int(42)
+        idx.index_doc(intID, u'foo')
+        self.assertEqual(idx.documentCount(), 1)
+        # test query
+        self.assertEqual(len(idx.apply(u'foo')), 1)
+        self.assertEqual(len(idx.apply(u'bar')), 0)
+        # test btree type
+        self.assertEqual(type(idx.apply(u'foo')), BTrees.LFBTree.LFBucket)
+        self.assertEqual(type(idx.apply(u'bar')), BTrees.LFBTree.LFBucket)
+        # test unindex with long as id
+        idx.unindex_doc(intID)
+        self.assertEqual(idx.documentCount(), 0)
+
+        # test long as id
+        longID = int(123456789123456789)
+        idx.index_doc(longID, u'foofoo')
+        self.assertEqual(idx.documentCount(), 1)
+        # test query
+        self.assertEqual(len(idx.apply(u'foofoo')), 1)
+        self.assertEqual(len(idx.apply(u'barbar')), 0)
+        # test btree type
+        self.assertEqual(type(idx.apply(u'foofoo')), BTrees.LFBTree.LFBucket)
+        self.assertEqual(type(idx.apply(u'barbar')), BTrees.LFBTree.LFBucket)
+        # test unindex with long as id
+        idx.unindex_doc(longID)
+        self.assertEqual(idx.documentCount(), 0)
+
+
 class TestFieldIndex(z3c.testing.InterfaceBaseTest):
 
     def getTestInterface(self):
@@ -268,6 +309,7 @@
             optionflags=doctest.NORMALIZE_WHITESPACE|doctest.ELLIPSIS,
             ),
         unittest.makeSuite(TestTextIndex),
+        unittest.makeSuite(TestTextIndex64),
         unittest.makeSuite(TestFieldIndex),
         unittest.makeSuite(TestValueIndex),
         unittest.makeSuite(TestSetIndex),



More information about the checkins mailing list