[Zope-CVS] CVS: Products/ZCTextIndex - HTMLSplitter.py:1.5 Lexicon.py:1.6 RiceCode.py:1.3 ZCTextIndex.py:1.8

Tim Peters tim.one@comcast.net
Tue, 14 May 2002 23:50:07 -0400


Update of /cvs-repository/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv17116

Modified Files:
	HTMLSplitter.py Lexicon.py RiceCode.py ZCTextIndex.py 
Log Message:
Whitespace normalization.


=== Products/ZCTextIndex/HTMLSplitter.py 1.4 => 1.5 ===
         for t in text:
             splat += self._split(t)
-        return splat    
+        return splat
 
-    def _split(self, text):    
+    def _split(self, text):
         text = text.lower()
         remove = ["<[^>]*>",
                   "&[A-Za-z]+;",


=== Products/ZCTextIndex/Lexicon.py 1.5 => 1.6 ===
                 wids.append(wid)
         return wids
-        
+
     def get_word(self, wid):
         """Return the word for the given word id"""
         return self._words[wid]


=== Products/ZCTextIndex/RiceCode.py 1.2 => 1.3 ===
 
 Based on a Java implementation by Glen McCluskey described in a Usenix
- ;login: article at 
+ ;login: article at
 http://www.usenix.org/publications/login/2000-4/features/java.html
 
 McCluskey's article explains the approach as follows.  The encoding
@@ -33,7 +33,7 @@
 
     def __getitem__(self, i):
         byte, offset = divmod(i, 8)
-        mask = 2 ** offset 
+        mask = 2 ** offset
         if self.bytes[byte] & mask:
             return 1
         else:
@@ -41,12 +41,12 @@
 
     def __setitem__(self, i, val):
         byte, offset = divmod(i, 8)
-        mask = 2 ** offset 
+        mask = 2 ** offset
         if val:
             self.bytes[byte] |= mask
         else:
             self.bytes[byte] &= ~mask
-        
+
     def __len__(self):
         return self.nbits
 
@@ -78,7 +78,7 @@
 
     def init(self, m):
         self.m = m
-        self.lower = (1 << m) - 1 
+        self.lower = (1 << m) - 1
         self.mask = 1 << (m - 1)
 
     def append(self, val):
@@ -123,7 +123,7 @@
 
     def tostring(self):
         """Return a binary string containing the encoded data.
-        
+
         The binary string may contain some extra zeros at the end.
         """
         return self.bits.tostring()


=== Products/ZCTextIndex/ZCTextIndex.py 1.7 => 1.8 ===
 class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
     """Persistent TextIndex"""
-    
+
     __implements__ = PluggableIndexInterface
-    
+
     meta_type = 'ZCTextIndex'
-    
+
     manage_options= (
         {'label': 'Settings', 'action': 'manage_main'},
     )
-    
+
     query_options = ['query']
 
     def __init__(self, id, extra, caller, index_factory=Index):
         self.id = id
         self._fieldname = extra.doc_attr
         lexicon = getattr(caller, extra.lexicon_id, None)
-        
+
         if lexicon is None:
             raise LookupError, 'Lexicon "%s" not found' % extra.lexicon_id
-        
+
         if not ILexicon.isImplementedBy(lexicon):
             raise ValueError, \
                 'Object "%s" does not implement lexicon interface' \
@@ -63,7 +63,7 @@
         self.lexicon = lexicon
         self.index = index_factory(self.lexicon)
         self.parser = QueryParser()
-        
+
     ## Pluggable Index APIs ##
 
     def index_object(self, docid, obj, threshold=None):
@@ -78,7 +78,7 @@
 
     def _apply_index(self, request, cid=''):
         """Apply query specified by request, a mapping containing the query.
-           
+
         Returns two object on success, the resultSet containing the
         matching record numbers and a tuple containing the names of
         the fields used
@@ -86,7 +86,7 @@
         Returns None if request is not valid for this index.
         """
         record = parseIndexRequest(request, self.id, self.query_options)
-        if record.keys is None: 
+        if record.keys is None:
             return None
         query_str = ' '.join(record.keys)
         tree = self.parser.parseQuery(query_str)
@@ -100,11 +100,11 @@
         chooser = NBest(nbest)
         chooser.addmany(results.items())
         return chooser.getbest()
-    
+
     def numObjects(self):
         """Return number of object indexed"""
         return self.index.length()
-        
+
     def getEntryForObject(self, documentId, default=None):
         """Return the list of words indexed for documentId"""
         try:
@@ -113,28 +113,28 @@
             return default
         get_word = self.lexicon.get_word
         return [get_word(wid) for wid in word_ids]
-        
+
     def clear(self):
         """reinitialize the index"""
         self.index = Index(self.lexicon)
-        
+
     def _get_object_text(self, obj):
         x = getattr(obj, self._fieldname)
         if callable(x):
             return x()
         else:
             return x
-            
+
     ## User Interface Methods ##
-    
+
     manage_main = DTMLFile('dtml/manageZCTextIndex', globals())
 
 InitializeClass(ZCTextIndex)
 
-def manage_addZCTextIndex(self, id, extra=None, REQUEST=None, 
+def manage_addZCTextIndex(self, id, extra=None, REQUEST=None,
                           RESPONSE=None):
     """Add a text index"""
-    return self.manage_addIndex(id, 'ZCTextIndex', extra, 
+    return self.manage_addIndex(id, 'ZCTextIndex', extra,
                                 REQUEST, RESPONSE, REQUEST.URL3)
 
 manage_addZCTextIndexForm = DTMLFile('dtml/addZCTextIndex', globals())
@@ -155,17 +155,15 @@
     self._setObject(id, lexicon)
     if REQUEST is not None:
         return self.manage_main(self, REQUEST, update_menu=1)
-        
+
 class PLexicon(Lexicon, Persistent, Acquisition.Implicit, SimpleItem):
     """Persistent Lexcion for ZCTextIndex"""
-    
+
     meta_type = 'ZCTextIndex Lexicon'
-    
+
     def __init__(self, id, title='', *pipeline):
         self.id = str(id)
         self.title = str(title)
         PLexicon.inheritedAttribute('__init__')(self, *pipeline)
-        
+
 InitializeClass(PLexicon)
-    
-