[Zope-Checkins] CVS: Zope/lib/python/Products/ZCTextIndex - Lexicon.py:1.18.26.2

Casey Duncan casey@zope.com
Wed, 4 Jun 2003 23:52:24 -0400


Update of /cvs-repository/Zope/lib/python/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv5611

Modified Files:
      Tag: casey-zctextindex-fewer-conflicts-branch
	Lexicon.py 
Log Message:
Finish refactor of Lexicon to remove conflict causing counters
Move new wid generation back where it belongs
Add a sanity check to the wid generation
Remove counter references in mhindex test module


=== Zope/lib/python/Products/ZCTextIndex/Lexicon.py 1.18.26.1 => 1.18.26.2 ===
--- Zope/lib/python/Products/ZCTextIndex/Lexicon.py:1.18.26.1	Thu May 29 23:53:11 2003
+++ Zope/lib/python/Products/ZCTextIndex/Lexicon.py	Wed Jun  4 23:51:53 2003
@@ -39,13 +39,8 @@
         # filtered out).  Returning a special wid value for OOV words is a
         # way to let clients know when an OOV word appears.
         self.length = Length()
-        self._nextwid = 1
         self._pipeline = pipeline
 
-        # Keep some statistics about indexing
-        self._nbytes = 0 # Number of bytes indexed (at start of pipeline)
-        self._nwords = 0 # Number of words indexed (after pipeline)
-
     def length(self):
         """Return the number of unique terms in the lexicon."""
         # Overridden in instances
@@ -62,11 +57,8 @@
 
     def sourceToWordIds(self, text):
         last = _text2list(text)
-        #for t in last:
-        #    self._nbytes += len(t)
         for element in self._pipeline:
             last = element.process(last)
-        #self._nwords += len(last)
         return map(self._getWordIdCreate, last)
 
     def termToWordIds(self, text):
@@ -135,16 +127,16 @@
     def _getWordIdCreate(self, word):
         wid = self._wids.get(word)
         if wid is None:
-            self.length.change(1)
-            wid = self.length()
+            wid = self._new_wid()
             self._wids[word] = wid
             self._words[wid] = word
         return wid
 
     def _new_wid(self):
-        wid = self._nextwid
-        self._nextwid += 1
-        return wid
+        self.length.change(1)
+        while self._words.has_key(self.length()): # just to be safe
+            self.length.change(1)
+        return self.length()
 
 def _text2list(text):
     # Helper: splitter input may be a string or a list of strings