[Zope3-checkins] SVN: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/ Updated IInjection to emphasize indexing of values (for documents),

Jim Fulton jim at zope.com
Mon Dec 6 10:04:19 EST 2004


Log message for revision 28574:
  Updated IInjection to emphasize indexing of values (for documents),
  rather than documents.
  
  Added IIndexSearch, which provides a search that returns integer sets
  or mappings.
  
  Updated field indexes to provide IIndexSearch as their only search
  method.
  
  Replaced the field-index tests with a doctest.
  

Changed:
  A   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/field/README.txt
  U   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/field/index.py
  D   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/field/tests/
  A   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/field/tests.py
  U   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/__init__.py

-=-
Added: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/field/README.txt
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/field/README.txt	2004-12-06 14:50:20 UTC (rev 28573)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/field/README.txt	2004-12-06 15:04:19 UTC (rev 28574)
@@ -0,0 +1,100 @@
+Field Indexes
+=============
+
+Field indexes index orderable values.  Note that they don't check for
+orderability. That is, all of the values added to the index must be
+orderable together. It is up to applications to provide only mutually
+orderable values.
+
+    >>> from zope.index.field import FieldIndex
+
+    >>> index = FieldIndex()
+    >>> index.index_doc(0, 6)
+    >>> index.index_doc(1, 26)
+    >>> index.index_doc(2, 94)
+    >>> index.index_doc(3, 68)
+    >>> index.index_doc(4, 30)
+    >>> index.index_doc(5, 68)
+    >>> index.index_doc(6, 82)
+    >>> index.index_doc(7, 30)
+    >>> index.index_doc(8, 43)
+    >>> index.index_doc(9, 15)
+
+Fied indexes are searched with apply_index.  The argument is a tuple
+with a minimum and maximum value:
+
+    >>> index.apply_index((30, 70))
+    IISet([3, 4, 5, 7, 8])
+
+Open-ended ranges can be provided by provinding None as an end point:
+
+    >>> index.apply_index((30, None))
+    IISet([2, 3, 4, 5, 6, 7, 8])
+
+    >>> index.apply_index((None, 70))
+    IISet([0, 1, 3, 4, 5, 7, 8, 9])
+
+    >>> index.apply_index((None, None))
+    IISet([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
+
+To do an exact value search, supply equal minimum and maximum values:
+
+    >>> index.apply_index((30, 30))
+    IISet([4, 7])
+
+    >>> index.apply_index((70, 70))
+    IISet([])
+
+Field indexes support basic statistics:
+
+    >>> index.documentCount()
+    10
+    >>> index.wordCount()
+    8
+
+Documents can be reindexed:
+
+    >>> index.apply_index((15, 15))
+    IISet([9])
+    >>> index.index_doc(9, 14)
+
+    >>> index.apply_index((15, 15))
+    IISet([])
+    >>> index.apply_index((14, 14))
+    IISet([9])
+    
+Documents can be unindexed:
+
+    >>> index.unindex_doc(7)
+    >>> index.documentCount()
+    9
+    >>> index.wordCount()
+    8
+    >>> index.unindex_doc(8)
+    >>> index.documentCount()
+    8
+    >>> index.wordCount()
+    7
+
+    >>> index.apply_index((30, 70))
+    IISet([3, 4, 5])
+
+Unindexing a document id that isn't present is ignored:
+
+    >>> index.unindex_doc(8)
+    >>> index.unindex_doc(80)
+    >>> index.documentCount()
+    8
+    >>> index.wordCount()
+    7
+
+We can also clear the index entirely:
+
+    >>> index.clear()
+    >>> index.documentCount()
+    0
+    >>> index.wordCount()
+    0
+
+    >>> index.apply_index((30, 70))
+    IISet([])

Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/field/index.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/field/index.py	2004-12-06 14:50:20 UTC (rev 28573)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/field/index.py	2004-12-06 15:04:19 UTC (rev 28574)
@@ -15,24 +15,25 @@
 
 $Id$
 """
-from persistent import Persistent
+import persistent
 
 from BTrees.IOBTree import IOBTree
 from BTrees.OOBTree import OOBTree
-from BTrees.IIBTree import IITreeSet, IISet, union
+from BTrees.IIBTree import IITreeSet, IISet, multiunion
 from BTrees.Length import Length
 
-from types import ListType, TupleType
-from zope.interface import implements
+import zope.interface
 
-from zope.index.interfaces import IInjection, ISimpleQuery
-from zope.index.interfaces import IStatistics, IRangeQuerying
+from zope.index import interfaces
 
+class FieldIndex(persistent.Persistent):
 
-class FieldIndex(Persistent):
+    zope.interface.implements(
+        interfaces.IInjection,
+        interfaces.IStatistics,
+        interfaces.IIndexSearch,
+        )
 
-    implements(IRangeQuerying, IInjection, ISimpleQuery, IStatistics)
-
     def __init__(self):
         self.clear()
 
@@ -52,71 +53,47 @@
         """See interface IStatistics"""
         return len(self._fwd_index)
 
-    def has_doc(self, docid):
-        return bool(self._rev_index.has_key(docid))
-
     def index_doc(self, docid, value):
         """See interface IInjection"""
-        if self.has_doc(docid):       # unindex doc if present
+        rev_index = self._rev_index
+        if docid in rev_index:
+            # unindex doc if present
             self.unindex_doc(docid)
-        self._insert_forward(docid, value)
-        self._insert_reverse(docid, value)
 
+        # Insert into forward index.
+        set = self._fwd_index.get(value)
+        if set is None:
+            set = IITreeSet()
+            self._fwd_index[value] = set
+        set.insert(docid)
+
+        # increment doc count
+        self._num_docs.change(1)
+
+        # Insert into reverse index.
+        rev_index[docid] = value
+
     def unindex_doc(self, docid):
         """See interface IInjection"""
-        try:      # ignore non-existing docids, don't raise
-            value = self._rev_index[docid]
-        except KeyError:
-            return
+        rev_index = self._rev_index
+        value = rev_index.get(docid)
+        if value is None:
+            return # not in index
 
-        del self._rev_index[docid]
+        del rev_index[docid]
 
         try:
-            self._fwd_index[value].remove(docid)
-            if len(self._fwd_index[value]) == 0:
-                del self._fwd_index[value]
+            set = self._fwd_index[value]
+            set.remove(docid)
         except KeyError:
+            # This is fishy, but we don't want to raise an error.
+            # We should probably log something.
             pass
-        self._num_docs.change(-1)
 
-    def search(self, values):
-        "See interface ISimpleQuerying"
-        # values can either be a single value or a sequence of
-        # values to be searched.
-        if isinstance(values, (ListType, TupleType)):
-            result = IISet()
-            for value in values:
-                try:
-                    r = IISet(self._fwd_index[value])
-                except KeyError:
-                    continue
-                # the results of all subsearches are combined using OR
-                result = union(result, r)
-        else:
-            try:
-                result = IISet(self._fwd_index[values])
-            except KeyError:
-                result = IISet()
+        if not set:
+            del self._fwd_index[value]
 
-        return result
+        self._num_docs.change(-1)
 
-    def query(self, querytext, start=0, count=None):
-        """See interface IQuerying"""
-        res = self.search(querytext)
-        if start or count:
-            res = res[start:start+count]
-        return res
-
-    def rangesearch(self, minvalue, maxvalue):
-        return IISet(self._fwd_index.keys(minvalue, maxvalue))
-
-    def _insert_forward(self, docid, value):
-        """Insert into forward index."""
-        if not self._fwd_index.has_key(value):
-            self._fwd_index[value] = IITreeSet()
-        self._fwd_index[value].insert(docid)
-        self._num_docs.change(1)
-
-    def _insert_reverse(self, docid, value):
-        """Insert into reverse index."""
-        self._rev_index[docid] = value
+    def apply_index(self, query):
+        return multiunion(self._fwd_index.values(*query))        

Copied: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/field/tests.py (from rev 28563, Zope3/trunk/src/zope/index/field/tests/test_fieldindex.py)
===================================================================
--- Zope3/trunk/src/zope/index/field/tests/test_fieldindex.py	2004-12-04 19:04:40 UTC (rev 28563)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/field/tests.py	2004-12-06 15:04:19 UTC (rev 28574)
@@ -0,0 +1,25 @@
+##############################################################################
+#
+# Copyright (c) 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+"""Test field index
+
+$Id$
+"""
+
+def test_suite():
+    from zope.testing.doctest import DocFileSuite
+    return DocFileSuite('README.txt')
+
+if __name__=='__main__':
+    import unittest
+    unittest.main(defaultTest='test_suite')

Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/__init__.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/__init__.py	2004-12-06 14:50:20 UTC (rev 28573)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/__init__.py	2004-12-06 15:04:19 UTC (rev 28574)
@@ -21,11 +21,13 @@
 class IInjection(Interface):
     """Interface for injecting documents into an index."""
 
-    def index_doc(docid, doc):
+    def index_doc(docid, value):
         """Add a document to the index.
 
         docid: int, identifying the document
-        doc: the document to be indexed
+
+        value: the value to be indexed
+
         return: None
 
         This can also be used to reindex documents.
@@ -35,6 +37,7 @@
         """Remove a document from the index.
 
         docid: int, identifying the document
+
         return: None
 
         This call is a no-op if the docid isn't in the index, however,
@@ -45,6 +48,40 @@
         """Unindex all documents indexed by the index
         """
 
+class IIndexSearch(Interface):
+
+    def apply_index(query):
+        """Apply an index to the given query
+
+        The type if the query is index specific.
+
+        TODO
+            This is somewhat problemetic. It means that application
+            code that calls apply_index has to be aware of the
+            expected query type. This isn't too much of a problem now,
+            as we have no more general query language nor do we have
+            any sort of automatic query-form generation.
+
+            It would be nice to have a system later for having
+            query-form generation or, perhaps, sme sort of query
+            language. At that point, we'll need some sort of way to
+            determine query types, presumably through introspection of
+            the index objects.
+
+        A result is returned that is:
+
+        - An IIBTree or an IIBucket mapping document ids to integer
+          scores for document ids of documents that match the query,
+
+        - An IISet or IITreeSet containing document ids of documents
+          that match the query, or
+
+        - None, indicating that the index could not use the query and
+          that the result should have no impact on determining a final
+          result.
+
+        """
+
 class IQuerying(Interface):
     """An index that can be queried by some text and returns a result set."""
 
@@ -113,22 +150,6 @@
         query.
         """
 
-class IRangeQuerying(Interface):
-    """Query over a range of objects."""
-
-    def rangesearch(minval, maxval):
-        """Execute a range search.
-
-           Return an IISet of docids for all docs where
-
-           minval <= value <= maxval   if minval<=maxval and 
-                                       both minval and maxval are not None
-
-           Value <= maxval             if minval is not None 
-
-           value >= minval             if maxval is not None
-        """             
-
 class IKeywordQuerying(Interface):
     """Query over a set of keywords, seperated by white space."""
 



More information about the Zope3-Checkins mailing list