[Zope-CVS] CVS: Products/ZCTextIndex - IPipelineElementFactory.py:1.1 PipelineFactory.py:1.1 HTMLSplitter.py:1.7 Lexicon.py:1.13 ZCTextIndex.py:1.20 __init__.py:1.6

Casey Duncan casey@zope.com
Wed, 22 May 2002 13:13:09 -0400


Update of /cvs-repository/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv28796

Modified Files:
	HTMLSplitter.py Lexicon.py ZCTextIndex.py __init__.py 
Added Files:
	IPipelineElementFactory.py PipelineFactory.py 
Log Message:
Improved Zope integration

  * A pipeline factory registry now allows registration of possible
    pipeline elements for use by Zope lexicons.

  * ZMI constructor form for lexicon uses pipeline registry to generate form
    fields

  * ZMI constructor form for ZCTextindex allows you to choose between
    Okapi and Cosine relevance algorithms


=== Added File Products/ZCTextIndex/IPipelineElementFactory.py ===
##############################################################################
#
# Copyright (c) 2002 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################

from Interface import Base as Interface

class IPipelineElementFactory(Interface):
    """Class for creating pipeline elements by name"""

    def registerFactory(name, factory):
        """Registers a pipeline factory by name.
        
        Each name can be registered only once. Duplicate registrations
        will raise a ValueError
        """
        
    def getFactoryNames():
        """Returns a sorted list of registered pipeline factory names
        """
        
    def instantiate(name):
        """Instantiates a pipeline element by name. If name is not registered
        raise a KeyError.
        """


=== Added File Products/ZCTextIndex/PipelineFactory.py ===
##############################################################################
#
# Copyright (c) 2002 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################

from Products.ZCTextIndex.IPipelineElementFactory \
     import IPipelineElementFactory
     
class PipelineElementFactory:
    
    __implements__ = IPipelineElementFactory
    
    def __init__(self):
        self._elements = {}
    
    def registerFactory(self, name, factory):
        if self._elements.has_key(name):
            raise ValueError, 'ZCTextIndex splitter named' + \
                              '"%s" already registered'
        
        self._elements[name] = factory
        
    def getFactoryNames(self):
        names = self._elements.keys()
        names.sort()
        return names
        
    def instantiate(self, name):
        return self._elements[name]()
        

splitter_factory = PipelineElementFactory()

element_factory = PipelineElementFactory()


=== Products/ZCTextIndex/HTMLSplitter.py 1.6 => 1.7 ===
 
 from Products.ZCTextIndex.ISplitter import ISplitter
+from Products.ZCTextIndex.PipelineFactory import splitter_factory
 
 import re
 
@@ -43,6 +44,8 @@
         rx = re.compile("[A-Za-z]")
         return [word for word in text.split()
                 if len(word) > 1 and rx.search(word)]
+                
+splitter_factory.registerFactory('HTML Word Splitter', HTMLWordSplitter)
 
 if __name__ == "__main__":
     import sys


=== Products/ZCTextIndex/Lexicon.py 1.12 => 1.13 ===
 from Products.ZCTextIndex.ILexicon import ILexicon
 from Products.ZCTextIndex.StopDict import get_stopdict
+from PipelineFactory import splitter_factory, element_factory
 
 class Lexicon:
 
@@ -140,11 +141,15 @@
         for s in lst:
             result += self.rxGlob.findall(s)
         return result
+        
+splitter_factory.registerFactory('Regex Splitter', Splitter)
 
 class CaseNormalizer:
 
     def process(self, lst):
         return [w.lower() for w in lst]
+        
+element_factory.registerFactory('Case Normalizer', CaseNormalizer)
 
 class StopWordRemover:
 
@@ -161,3 +166,6 @@
     else:
         def process(self, lst):
             return self._process(self.dict, lst)
+            
+            
+element_factory.registerFactory('Stop Word Remover', StopWordRemover)


=== Products/ZCTextIndex/ZCTextIndex.py 1.19 => 1.20 ===
 from Products.PluginIndexes.common.util import parseIndexRequest
 
-from Products.ZCTextIndex.OkapiIndex import OkapiIndex
 from Products.ZCTextIndex.ILexicon import ILexicon
 from Products.ZCTextIndex.Lexicon import \
      Lexicon, Splitter, CaseNormalizer, StopWordRemover
 from Products.ZCTextIndex.NBest import NBest
 from Products.ZCTextIndex.QueryParser import QueryParser
+from PipelineFactory import splitter_factory, element_factory
+
+from Products.ZCTextIndex.CosineIndex import CosineIndex
+from Products.ZCTextIndex.OkapiIndex import OkapiIndex
+index_types = {'Okapi BM25 Rank':OkapiIndex, 
+               'Cosine Measure':CosineIndex}
 
 class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
     """Persistent TextIndex"""
@@ -50,7 +55,7 @@
 
     ## Constructor ##
 
-    def __init__(self, id, extra, caller, index_factory=OkapiIndex):
+    def __init__(self, id, extra, caller, index_factory=None):
         self.id = id
         self._fieldname = extra.doc_attr
         lexicon = getattr(caller, extra.lexicon_id, None)
@@ -64,7 +69,15 @@
                 % lexicon.getId()
 
         self.lexicon = lexicon
-        self._index_factory = index_factory
+
+        if index_factory is None:
+            if extra.index_type not in index_types.keys():
+                raise ValueError, 'Invalid index type "%s"' % extra.index_type
+            self._index_factory = index_types[extra.index_type]
+            self._index_type = extra.index_type
+        else:
+            self._index_factory = index_factory
+            
         self.clear()
 
     ## External methods not in the Pluggable Index API ##
@@ -144,6 +157,10 @@
     ## User Interface Methods ##
 
     manage_main = DTMLFile('dtml/manageZCTextIndex', globals())
+    
+    def getIndexType(self):
+        """Return index type string"""
+        return getattr(self, '_index_type', self._index_factory.__name__)
 
 InitializeClass(ZCTextIndex)
 
@@ -157,29 +174,39 @@
 
 manage_addLexiconForm = DTMLFile('dtml/addLexicon', globals())
 
-def manage_addLexicon(self, id, title, splitter=None, normalizer=None,
-                      stopwords=None, REQUEST=None):
+def manage_addLexicon(self, id, title='', splitter_name=None, 
+                      element_names=None, REQUEST=None):
     """Add ZCTextIndex Lexicon"""
-    elements = []
-    if splitter:
-        elements.append(Splitter())
-    if normalizer:
-        elements.append(CaseNormalizer())
-    if stopwords:
-        elements.append(StopWordRemover())
+    
+    elements = [element_factory.instantiate(name) for name in element_names]
+    
+    if splitter_name:
+        elements.insert(0, splitter_factory.instantiate(splitter_name))
+
     lexicon = PLexicon(id, title, *elements)
     self._setObject(id, lexicon)
     if REQUEST is not None:
         return self.manage_main(self, REQUEST, update_menu=1)
 
 class PLexicon(Lexicon, Persistent, Acquisition.Implicit, SimpleItem):
-    """Persistent Lexcion for ZCTextIndex"""
+    """Persistent Lexicon for ZCTextIndex"""
 
     meta_type = 'ZCTextIndex Lexicon'
+    
+    manage_options = ({'label':'Overview', 'action':'manage_main'},) + \
+                     SimpleItem.manage_options
 
     def __init__(self, id, title='', *pipeline):
         self.id = str(id)
         self.title = str(title)
         PLexicon.inheritedAttribute('__init__')(self, *pipeline)
+        
+    ## User Interface Methods ##
+        
+    def getPipelineNames(self):
+        """Return list of names of pipeline element classes"""
+        return [element.__class__.__name__ for element in self._pipeline]
+         
+    manage_main = DTMLFile('dtml/manageLexicon', globals())
 
 InitializeClass(PLexicon)


=== Products/ZCTextIndex/__init__.py 1.5 => 1.6 ===
 """
 
+from PipelineFactory import splitter_factory, element_factory
+from Products.ZCTextIndex import ZCTextIndex, HTMLSplitter
+
 def initialize(context):
-    from Products.ZCTextIndex import ZCTextIndex
 
     context.registerClass(
         ZCTextIndex.ZCTextIndex,
         permission = 'Add Pluggable Index',
         constructors = (ZCTextIndex.manage_addZCTextIndexForm,
-                      ZCTextIndex.manage_addZCTextIndex),
+                        ZCTextIndex.manage_addZCTextIndex,
+                        getIndexTypes),
         icon='www/index.gif',
         visibility=None
     )
@@ -32,6 +35,19 @@
         ZCTextIndex.PLexicon,
         permission = 'Add Vocabularies',
         constructors = (ZCTextIndex.manage_addLexiconForm,
-                        ZCTextIndex.manage_addLexicon),
+                        ZCTextIndex.manage_addLexicon,
+                        getSplitterNames, getElementNames),
         icon='www/lexicon.gif'
     )
+    
+## Functions below are for use in the ZMI constructor forms ##
+    
+def getSplitterNames(self):
+    return splitter_factory.getFactoryNames()
+    
+def getElementNames(self):
+    return element_factory.getFactoryNames()
+    
+def getIndexTypes(self):
+    return ZCTextIndex.index_types.keys()
+