[Checkins] SVN: zope.index/trunk/ Refactor KeywordIndex not to do normalizing by default, as keywords can be non-string objects as well.

Dan Korostelev nadako at gmail.com
Tue Dec 30 06:12:16 EST 2008


Log message for revision 94439:
  Refactor KeywordIndex not to do normalizing by default, as keywords can be non-string objects as well.

Changed:
  U   zope.index/trunk/CHANGES.txt
  U   zope.index/trunk/src/zope/index/interfaces.py
  U   zope.index/trunk/src/zope/index/keyword/__init__.py
  U   zope.index/trunk/src/zope/index/keyword/index.py
  U   zope.index/trunk/src/zope/index/keyword/tests.py

-=-
Modified: zope.index/trunk/CHANGES.txt
===================================================================
--- zope.index/trunk/CHANGES.txt	2008-12-30 08:23:27 UTC (rev 94438)
+++ zope.index/trunk/CHANGES.txt	2008-12-30 11:12:16 UTC (rev 94439)
@@ -29,6 +29,28 @@
   
   Where ``filter`` is an instance of FilteredSet.
 
+- IMPORTANT: KeywordIndex are now non-normalizing. Because
+  it can be useful for non-string keywords, where case-normalizing
+  doesn't make any sense. Instead, it provides the ``normalize``
+  method that can be overriden by subclasses to provide some
+  normalization.
+  
+  The CaseInsensitiveKeywordIndex class is now provided that
+  do case-normalization for string-based keywords. The old
+  CaseSensitiveKeywordIndex is gone, applications should use
+  KeywordIndex for that.
+
+Looks like the KeywordIndex/TopicIndex was sort of abadonware
+and wasn't used by application developers, so after some
+discussion we decided to refactor them to make them more
+usable, optimal and compatible with other indexes and catalog.
+
+Porting application from old KeywordIndex/TopicIndex to new
+ones are rather easy and explained above, so we believe that
+it isn't a problem. Please, use zope3-users at zope.org or
+zope-dev at zope.org mailing lists, if you have any problems
+with migration.
+
 Thanks Chris McDonough of repoze for supporting and useful code.
 
 3.4.1 (2007-09-28)

Modified: zope.index/trunk/src/zope/index/interfaces.py
===================================================================
--- zope.index/trunk/src/zope/index/interfaces.py	2008-12-30 08:23:27 UTC (rev 94438)
+++ zope.index/trunk/src/zope/index/interfaces.py	2008-12-30 11:12:16 UTC (rev 94439)
@@ -90,7 +90,7 @@
         If some of docids are not indexed they are skipped
         from resulting iterable.
         
-        Return an iterable of document ids. Limited by
+        Return a sorted iterable of document ids. Limited by
         value of the "limit" argument and optionally
         reversed, using the "reverse" argument.
         """

Modified: zope.index/trunk/src/zope/index/keyword/__init__.py
===================================================================
--- zope.index/trunk/src/zope/index/keyword/__init__.py	2008-12-30 08:23:27 UTC (rev 94438)
+++ zope.index/trunk/src/zope/index/keyword/__init__.py	2008-12-30 11:12:16 UTC (rev 94439)
@@ -1 +1 @@
-from zope.index.keyword.index import KeywordIndex, CaseSensitiveKeywordIndex
+from zope.index.keyword.index import KeywordIndex, CaseInsensitiveKeywordIndex

Modified: zope.index/trunk/src/zope/index/keyword/index.py
===================================================================
--- zope.index/trunk/src/zope/index/keyword/index.py	2008-12-30 08:23:27 UTC (rev 94438)
+++ zope.index/trunk/src/zope/index/keyword/index.py	2008-12-30 11:12:16 UTC (rev 94439)
@@ -27,11 +27,10 @@
 
 
 class KeywordIndex(Persistent):
-    """ A case-insensitive keyword index """
+    """Keyword index"""
 
+    implements(IInjection, IStatistics, IIndexSearch, IKeywordQuerying)
     family = BTrees.family32
-    normalize = True
-    implements(IInjection, IStatistics, IIndexSearch, IKeywordQuerying)
 
     def __init__(self, family=None):
         if family is not None:
@@ -61,6 +60,15 @@
     def has_doc(self, docid):
         return bool(self._rev_index.has_key(docid))
 
+    def normalize(self, seq):
+        """Perform normalization on sequence of keywords.
+        
+        Return normalized sequence. This method may be
+        overriden by subclasses.
+        
+        """
+        return seq
+
     def index_doc(self, docid, seq):
         if isinstance(seq, basestring):
             raise TypeError('seq argument must be a list/tuple of strings')
@@ -68,8 +76,7 @@
         if not seq:
             return
 
-        if self.normalize:
-            seq = [w.lower() for w in seq]
+        seq = self.normalize(seq)
 
         old_kw = self._rev_index.get(docid, None)
         new_kw = self.family.OO.Set(seq)
@@ -131,8 +138,7 @@
         if isinstance(query, basestring):
             query = [query]
 
-        if self.normalize:
-            query = [w.lower() for w in query]
+        query = self.normalize(query)
 
         sets = []
         for word in query:
@@ -166,6 +172,8 @@
             query = query['query']
         return self.search(query, operator=operator)
 
-class CaseSensitiveKeywordIndex(KeywordIndex):
-    """ A case-sensitive keyword index """
-    normalize = False        
+class CaseInsensitiveKeywordIndex(KeywordIndex):
+    """A case-normalizing keyword index (for strings as keywords)"""
+
+    def normalize(self, seq):
+        return [w.lower() for w in seq]

Modified: zope.index/trunk/src/zope/index/keyword/tests.py
===================================================================
--- zope.index/trunk/src/zope/index/keyword/tests.py	2008-12-30 08:23:27 UTC (rev 94438)
+++ zope.index/trunk/src/zope/index/keyword/tests.py	2008-12-30 11:12:16 UTC (rev 94439)
@@ -16,8 +16,8 @@
 
 import BTrees
 
-from zope.index.keyword.index import KeywordIndex
-from zope.index.interfaces import IInjection, IStatistics
+from zope.index.keyword.index import CaseInsensitiveKeywordIndex
+from zope.index.interfaces import IInjection, IStatistics, IIndexSearch
 from zope.index.keyword.interfaces import IKeywordQuerying
 from zope.interface.verify import verifyClass
 
@@ -26,7 +26,7 @@
     from BTrees.IFBTree import IFSet
 
     def setUp(self):
-        self.index = KeywordIndex()
+        self.index = CaseInsensitiveKeywordIndex()
 
     def _populate_index(self):
 
@@ -38,13 +38,11 @@
 
 
     def _search(self, query, expected, mode='and'):
-
         results = self.index.search(query, mode)
 
         # results and expected are IFSets() but we can not
         # compare them directly since __eq__() does not seem
         # to be implemented for BTrees
-
         self.assertEqual(results.keys(), expected.keys())
 
     def _search_and(self, query, expected):
@@ -66,9 +64,10 @@
         self.assertEqual(results.keys(), expected.keys())
 
     def test_interface(self):
-        verifyClass(IInjection, KeywordIndex)
-        verifyClass(IStatistics, KeywordIndex)
-        verifyClass(IKeywordQuerying, KeywordIndex)
+        verifyClass(IInjection, CaseInsensitiveKeywordIndex)
+        verifyClass(IStatistics, CaseInsensitiveKeywordIndex)
+        verifyClass(IIndexSearch, CaseInsensitiveKeywordIndex)
+        verifyClass(IKeywordQuerying, CaseInsensitiveKeywordIndex)
 
     def test_empty_index(self):
         self.assertEqual(self.index.documentCount(), 0)
@@ -167,7 +166,7 @@
     from BTrees.LFBTree import LFSet as IFSet
 
     def setUp(self):
-        self.index = KeywordIndex(family=BTrees.family64)
+        self.index = CaseInsensitiveKeywordIndex(family=BTrees.family64)
 
 
 def test_suite():



More information about the Checkins mailing list