[Zope-Checkins] CVS: Zope/lib/python/Products/ZCTextIndex - BaseIndex.py:1.29.12.1 CosineIndex.py:1.22.74.1 IIndex.py:1.11.70.1 OkapiIndex.py:1.29.74.3

Casey Duncan casey@zope.com
Thu, 5 Jun 2003 15:02:51 -0400


Update of /cvs-repository/Zope/lib/python/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv14371

Modified Files:
      Tag: casey-zctextindex-fewer-conflicts-branch
	BaseIndex.py CosineIndex.py IIndex.py OkapiIndex.py 
Log Message:
Fix flaw in query mechanism by removing a len(BTree) and replacing it with a cached length lookup. This should improve performance and scalability.
Add document_count method for this purpose. This method is overridden by a BTree.Length.Length object in instances.
Added code to opportunistically cache the length when the index is changed
Added tests for length, and upgrade code
Updated interface


=== Zope/lib/python/Products/ZCTextIndex/BaseIndex.py 1.29 => 1.29.12.1 ===
--- Zope/lib/python/Products/ZCTextIndex/BaseIndex.py:1.29	Tue Feb  4 13:29:41 2003
+++ Zope/lib/python/Products/ZCTextIndex/BaseIndex.py	Thu Jun  5 15:02:20 2003
@@ -20,7 +20,7 @@
 from BTrees.IOBTree import IOBTree
 from BTrees.IIBTree import IIBTree, IIBucket, IITreeSet
 from BTrees.IIBTree import intersection, difference
-import BTrees.Length
+from BTrees.Length import Length
 
 from Products.ZCTextIndex.IIndex import IIndex
 from Products.ZCTextIndex import WidCode
@@ -83,12 +83,18 @@
         self._docwords = IOBTree()
 
         # Use a BTree length for efficient length computation w/o conflicts
-        self.length = BTrees.Length.Length()
+        self.length = Length()
+        self.document_count = Length()
 
     def length(self):
         """Return the number of words in the index."""
         # This is overridden per instance
         return len(self._wordinfo)
+        
+    def document_count(self):
+        """Return the number of documents in the index"""
+        # This is overridden per instance
+        return len(self._docweight)        
 
     def get_words(self, docid):
         """Return a list of the wordids for a given docid."""
@@ -104,6 +110,11 @@
         self._mass_add_wordinfo(wid2weight, docid)
         self._docweight[docid] = docweight
         self._docwords[docid] = WidCode.encode(wids)
+        try:
+            self.document_count.change(1)
+        except AttributeError:
+            # Upgrade document_count to Length object
+            self.document_count = Length(self.document_count())
         return len(wids)
 
     # A subclass may wish to extend or override this.  This is for adjusting
@@ -165,6 +176,11 @@
             self._del_wordinfo(wid, docid)
         del self._docwords[docid]
         del self._docweight[docid]
+        try:
+            self.document_count.change(-1)
+        except AttributeError:
+            # Upgrade document_count to Length object
+            self.document_count = Length(self.document_count())
 
     def search(self, term):
         wids = self._lexicon.termToWordIds(term)


=== Zope/lib/python/Products/ZCTextIndex/CosineIndex.py 1.22 => 1.22.74.1 ===
--- Zope/lib/python/Products/ZCTextIndex/CosineIndex.py:1.22	Tue May 28 19:42:20 2002
+++ Zope/lib/python/Products/ZCTextIndex/CosineIndex.py	Thu Jun  5 15:02:20 2003
@@ -69,7 +69,7 @@
     def _search_wids(self, wids):
         if not wids:
             return []
-        N = float(len(self._docweight))
+        N = float(self.document_count())
         L = []
         DictType = type({})
         for wid in wids:
@@ -86,7 +86,7 @@
         wids = []
         for term in terms:
             wids += self._lexicon.termToWordIds(term)
-        N = float(len(self._docweight))
+        N = float(self.document_count())
         sum = 0.0
         for wid in self._remove_oov_wids(wids):
             wt = inverse_doc_frequency(len(self._wordinfo[wid]), N)


=== Zope/lib/python/Products/ZCTextIndex/IIndex.py 1.11 => 1.11.70.1 ===
--- Zope/lib/python/Products/ZCTextIndex/IIndex.py:1.11	Wed Aug 14 18:25:14 2002
+++ Zope/lib/python/Products/ZCTextIndex/IIndex.py	Thu Jun  5 15:02:20 2003
@@ -20,6 +20,9 @@
     """Interface for an Index."""
 
     def length():
+        """Return the number of words in the index."""
+        
+    def document_count():
         """Return the number of documents in the index."""
 
     def get_words(docid):
@@ -62,10 +65,13 @@
         """
 
     def index_doc(docid, text):
-        "XXX"
+        """Add a document with the specified id and text to the index. If a
+        document by that id already exists, replace its text with the new
+        text provided
+        """
 
     def unindex_doc(docid):
-        "XXX"
+        """Remove the document with the specified id from the index"""
 
     def has_doc(docid):
         """Returns true if docid is an id of a document in the index"""


=== Zope/lib/python/Products/ZCTextIndex/OkapiIndex.py 1.29.74.2 => 1.29.74.3 ===
--- Zope/lib/python/Products/ZCTextIndex/OkapiIndex.py:1.29.74.2	Thu Jun  5 00:41:58 2003
+++ Zope/lib/python/Products/ZCTextIndex/OkapiIndex.py	Thu Jun  5 15:02:20 2003
@@ -86,7 +86,7 @@
     def _search_wids(self, wids):
         if not wids:
             return []
-        N = float(len(self._docweight))  # total # of docs
+        N = float(self.document_count())  # total # of docs
         try:
             doclen = self._totaldoclen()
         except TypeError:
@@ -135,7 +135,7 @@
     def _search_wids(self, wids):
         if not wids:
             return []
-        N = float(len(self._docweight))  # total # of docs
+        N = float(self.document_count())  # total # of docs
         try:
             doclen = self._totaldoclen()
         except TypeError: