[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG - TextIndexNG.py:1.2.2.41

Andreas Jung andreas@digicool.com
Thu, 21 Feb 2002 20:35:20 -0500


Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG
In directory cvs.zope.org:/tmp/cvs-serv22963/lib/python/Products/PluginIndexes/TextIndexNG

Modified Files:
      Tag: ajung-textindexng-branch
	TextIndexNG.py 
Log Message:
- stemmer and splitter support fully functional
- near search mostly working (positionMap handling needs some
tweaks to handle phrase searches with positional arguments)


=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/TextIndexNG.py 1.2.2.40 => 1.2.2.41 ===
 
         # allow single characters
-        self.splitterCasefolding   = getattr(extra,'splitterCasefolding',1) or 1
+        self.splitterCasefolding   = getattr(extra,'splitterCasefolding',1) 
 
         # name of stemmer or None
-        self.useStemmer    = getattr(extra,'useStemmer',    None) 
+        self.useStemmer    = getattr(extra,'useStemmer',    None) or None
 
         # default operator to combine queries
         self.useOperator   = getattr(extra,'useOperator',   'and')
@@ -250,19 +250,6 @@
             self._v_Similarityfunc = getattr(Similarity,self.useSimilarity)
 
 
-        # get splitter function
-
-        self.splitterfunc = self.stemmerfunc = None
-
-        if self.useSplitter:
-            self.splitterfunc = Splitter.getSplitter(self.useSplitter)
-
-
-        # stemmer function
-
-        if self.useStemmer:
-            self.stemmerfunc = Stemmer.Stemmer(self.useStemmer).stem
-
         if self.lexicon:
 
             # try to get lexicon through acquisition
@@ -275,9 +262,9 @@
                 self._LEXICON = GlobbingLexiconNG()
                 debug('created new globbing lexicon')
 
-                if self.stemmerfunc:
+                if self.useStemmer:
                     debug('stemming disabled because globbing enabled')
-                    self.stemmerfunc = None
+                    self.useStemmer= None
 
             else:
                 self._LEXICON = LexiconNG()
@@ -414,15 +401,17 @@
 
 
         # Split the text into a list of words
-        # The splitterfunc just returns an iterator-like object.
 
-        words = self.splitterfunc(source,
-                            encoding     = encoding,
-                            casefolding  = self.splitterCasefolding,
-                            maxlen       = self.splitterMaxLen,
-                            indexnumbers = self.splitterIndexNumbers,
-                            singlechar   = self.splitterSingleChars
-                            ).split()
+        SP = Splitter.getSplitter(self.useSplitter)
+
+        words = SP( source,
+                    encoding     = encoding,
+                    casefolding  = self.splitterCasefolding,
+                    maxlen       = self.splitterMaxLen,
+                    indexnumbers = self.splitterIndexNumbers,
+                    singlechar   = self.splitterSingleChars
+                    ).split()
+
         T("Splitter")
 
         # apply stopwords list 
@@ -444,8 +433,9 @@
 
         # Stem all words in one run
 
-        if self.stemmerfunc:
-            words = self.stemmerfunc(words)
+        if self.useStemmer:
+            ST = Stemmer.Stemmer( self.useStemmer )
+            words = ST.stem(words)
 
         T("Stemmer")
 
@@ -579,7 +569,8 @@
 
         # Stem the word if necessary        
         if self.useStemmer:
-            word = self.stemmerfunc(word)
+            ST = Stemmer.Stemmer( self.useStemmer )
+            word = ST.stem(word)
             debug("\tStemming: ", word)
 
         # perform casefolding if necessary
@@ -808,7 +799,7 @@
 
         # Split retrieved document and obtain list of word positions
 
-        SP = self.splitterfunc(data)
+        SP = Splitter.getSplitter( self.useSplitter )( data )
 
         for word in words: