[Zope-CVS] CVS: Products/ZCTextIndex - ZCTextIndex.py:1.1.2.9 Splitter.py:NONE

Guido van Rossum guido@python.org
Fri, 3 May 2002 13:10:11 -0400


Update of /cvs-repository/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv9684

Modified Files:
      Tag: TextIndexDS9-branch
	ZCTextIndex.py 
Removed Files:
      Tag: TextIndexDS9-branch
	Splitter.py 
Log Message:
Don't use the cumbersome and broken ZopeSplitter.
re.findall(r"\w+") is fast enough.


=== Products/ZCTextIndex/ZCTextIndex.py 1.1.2.8 => 1.1.2.9 ===
-from Products.ZCTextIndex.Lexicon import Lexicon
-from Products.ZCTextIndex.NBest import NBest
-from Products.ZCTextIndex.QueryEngine import QueryEngine
-from Products.ZCTextIndex.QueryParser import QueryParser
-from Products.ZCTextIndex.Splitter import Splitter
+##############################################################################
+#
+# Copyright (c) 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+# 
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+# 
+##############################################################################
 
-from Products.ZCTextIndex.StopDict import get_stopdict
+import re
 
-from Products.PluginIndexes.TextIndex.Splitter.ZopeSplitter.ZopeSplitter \
-     import ZopeSplitter
+import ZODB
+from Persistence import Persistent
 
 from Products.PluginIndexes.common.PluggableIndex \
      import PluggableIndexInterface
 
-import ZODB
-from Persistence import Persistent
+from Products.ZCTextIndex.Index import Index
+from Products.ZCTextIndex.Lexicon import Lexicon
+from Products.ZCTextIndex.NBest import NBest
+from Products.ZCTextIndex.QueryEngine import QueryEngine
+from Products.ZCTextIndex.QueryParser import QueryParser
+from Products.ZCTextIndex.StopDict import get_stopdict
 
 class ZCTextIndex(Persistent):
     __implements__ = PluggableIndexInterface
 
     def __init__(self, doc_attr="text"):
         self._fieldname = doc_attr
-        self.lexicon = Lexicon(Splitter(ZopeSplitter, get_stopdict(),
-                                        index_numbers=1))
+        self.lexicon = Lexicon(Splitter(),
+                               [CaseNormalizer(), StopWordRemover()])
         self.engine = QueryEngine()
         self.index = Index(self.lexicon)
         self.parser = QueryParser()
@@ -52,3 +63,23 @@
             return x()
         else:
             return x
+
+# Trivial pipeline elements
+
+class Splitter:
+
+    def process(self, text):
+        return re.findall(r"\w+", text)
+
+class CaseNormalizer:
+
+    def process(self, lst):
+        return [w.lower() for w in lst]
+
+class StopWordRemover:
+
+    dict = get_stopdict()
+
+    def process(self, lst):
+        d = self.dict
+        return [w for w in lst if len(w) > 1 and not d.has_key(w)]

=== Removed File Products/ZCTextIndex/Splitter.py ===