[Checkins] SVN: zope.index/trunk/ Refactor KeywordIndex not to do normalizing by default, as keywords can be non-string objects as well.

Chris McDonough chrism at plope.com
Tue Dec 30 06:19:36 EST 2008


Hi Dan,

Looks good!  FTR, if you don't use the CaseInsensitive index personally, it
might be reasonable to not include it at all within zope.index, as now it's easy
enough for people to subclass to get the behavior back.  Case normalization is
really also the application's job, not the index's job, so it'd be good to not
encourage folks to use the index to get it unless they're willing to subclass.

That said, if you use it, it's fine.

- C



Dan Korostelev wrote:
> Log message for revision 94439:
>   Refactor KeywordIndex not to do normalizing by default, as keywords can be non-string objects as well.
> 
> Changed:
>   U   zope.index/trunk/CHANGES.txt
>   U   zope.index/trunk/src/zope/index/interfaces.py
>   U   zope.index/trunk/src/zope/index/keyword/__init__.py
>   U   zope.index/trunk/src/zope/index/keyword/index.py
>   U   zope.index/trunk/src/zope/index/keyword/tests.py
> 
> -=-
> Modified: zope.index/trunk/CHANGES.txt
> ===================================================================
> --- zope.index/trunk/CHANGES.txt	2008-12-30 08:23:27 UTC (rev 94438)
> +++ zope.index/trunk/CHANGES.txt	2008-12-30 11:12:16 UTC (rev 94439)
> @@ -29,6 +29,28 @@
>    
>    Where ``filter`` is an instance of FilteredSet.
>  
> +- IMPORTANT: KeywordIndex are now non-normalizing. Because
> +  it can be useful for non-string keywords, where case-normalizing
> +  doesn't make any sense. Instead, it provides the ``normalize``
> +  method that can be overriden by subclasses to provide some
> +  normalization.
> +  
> +  The CaseInsensitiveKeywordIndex class is now provided that
> +  do case-normalization for string-based keywords. The old
> +  CaseSensitiveKeywordIndex is gone, applications should use
> +  KeywordIndex for that.
> +
> +Looks like the KeywordIndex/TopicIndex was sort of abadonware
> +and wasn't used by application developers, so after some
> +discussion we decided to refactor them to make them more
> +usable, optimal and compatible with other indexes and catalog.
> +
> +Porting application from old KeywordIndex/TopicIndex to new
> +ones are rather easy and explained above, so we believe that
> +it isn't a problem. Please, use zope3-users at zope.org or
> +zope-dev at zope.org mailing lists, if you have any problems
> +with migration.
> +
>  Thanks Chris McDonough of repoze for supporting and useful code.
>  
>  3.4.1 (2007-09-28)
> 
> Modified: zope.index/trunk/src/zope/index/interfaces.py
> ===================================================================
> --- zope.index/trunk/src/zope/index/interfaces.py	2008-12-30 08:23:27 UTC (rev 94438)
> +++ zope.index/trunk/src/zope/index/interfaces.py	2008-12-30 11:12:16 UTC (rev 94439)
> @@ -90,7 +90,7 @@
>          If some of docids are not indexed they are skipped
>          from resulting iterable.
>          
> -        Return an iterable of document ids. Limited by
> +        Return a sorted iterable of document ids. Limited by
>          value of the "limit" argument and optionally
>          reversed, using the "reverse" argument.
>          """
> 
> Modified: zope.index/trunk/src/zope/index/keyword/__init__.py
> ===================================================================
> --- zope.index/trunk/src/zope/index/keyword/__init__.py	2008-12-30 08:23:27 UTC (rev 94438)
> +++ zope.index/trunk/src/zope/index/keyword/__init__.py	2008-12-30 11:12:16 UTC (rev 94439)
> @@ -1 +1 @@
> -from zope.index.keyword.index import KeywordIndex, CaseSensitiveKeywordIndex
> +from zope.index.keyword.index import KeywordIndex, CaseInsensitiveKeywordIndex
> 
> Modified: zope.index/trunk/src/zope/index/keyword/index.py
> ===================================================================
> --- zope.index/trunk/src/zope/index/keyword/index.py	2008-12-30 08:23:27 UTC (rev 94438)
> +++ zope.index/trunk/src/zope/index/keyword/index.py	2008-12-30 11:12:16 UTC (rev 94439)
> @@ -27,11 +27,10 @@
>  
>  
>  class KeywordIndex(Persistent):
> -    """ A case-insensitive keyword index """
> +    """Keyword index"""
>  
> +    implements(IInjection, IStatistics, IIndexSearch, IKeywordQuerying)
>      family = BTrees.family32
> -    normalize = True
> -    implements(IInjection, IStatistics, IIndexSearch, IKeywordQuerying)
>  
>      def __init__(self, family=None):
>          if family is not None:
> @@ -61,6 +60,15 @@
>      def has_doc(self, docid):
>          return bool(self._rev_index.has_key(docid))
>  
> +    def normalize(self, seq):
> +        """Perform normalization on sequence of keywords.
> +        
> +        Return normalized sequence. This method may be
> +        overriden by subclasses.
> +        
> +        """
> +        return seq
> +
>      def index_doc(self, docid, seq):
>          if isinstance(seq, basestring):
>              raise TypeError('seq argument must be a list/tuple of strings')
> @@ -68,8 +76,7 @@
>          if not seq:
>              return
>  
> -        if self.normalize:
> -            seq = [w.lower() for w in seq]
> +        seq = self.normalize(seq)
>  
>          old_kw = self._rev_index.get(docid, None)
>          new_kw = self.family.OO.Set(seq)
> @@ -131,8 +138,7 @@
>          if isinstance(query, basestring):
>              query = [query]
>  
> -        if self.normalize:
> -            query = [w.lower() for w in query]
> +        query = self.normalize(query)
>  
>          sets = []
>          for word in query:
> @@ -166,6 +172,8 @@
>              query = query['query']
>          return self.search(query, operator=operator)
>  
> -class CaseSensitiveKeywordIndex(KeywordIndex):
> -    """ A case-sensitive keyword index """
> -    normalize = False        
> +class CaseInsensitiveKeywordIndex(KeywordIndex):
> +    """A case-normalizing keyword index (for strings as keywords)"""
> +
> +    def normalize(self, seq):
> +        return [w.lower() for w in seq]
> 
> Modified: zope.index/trunk/src/zope/index/keyword/tests.py
> ===================================================================
> --- zope.index/trunk/src/zope/index/keyword/tests.py	2008-12-30 08:23:27 UTC (rev 94438)
> +++ zope.index/trunk/src/zope/index/keyword/tests.py	2008-12-30 11:12:16 UTC (rev 94439)
> @@ -16,8 +16,8 @@
>  
>  import BTrees
>  
> -from zope.index.keyword.index import KeywordIndex
> -from zope.index.interfaces import IInjection, IStatistics
> +from zope.index.keyword.index import CaseInsensitiveKeywordIndex
> +from zope.index.interfaces import IInjection, IStatistics, IIndexSearch
>  from zope.index.keyword.interfaces import IKeywordQuerying
>  from zope.interface.verify import verifyClass
>  
> @@ -26,7 +26,7 @@
>      from BTrees.IFBTree import IFSet
>  
>      def setUp(self):
> -        self.index = KeywordIndex()
> +        self.index = CaseInsensitiveKeywordIndex()
>  
>      def _populate_index(self):
>  
> @@ -38,13 +38,11 @@
>  
>  
>      def _search(self, query, expected, mode='and'):
> -
>          results = self.index.search(query, mode)
>  
>          # results and expected are IFSets() but we can not
>          # compare them directly since __eq__() does not seem
>          # to be implemented for BTrees
> -
>          self.assertEqual(results.keys(), expected.keys())
>  
>      def _search_and(self, query, expected):
> @@ -66,9 +64,10 @@
>          self.assertEqual(results.keys(), expected.keys())
>  
>      def test_interface(self):
> -        verifyClass(IInjection, KeywordIndex)
> -        verifyClass(IStatistics, KeywordIndex)
> -        verifyClass(IKeywordQuerying, KeywordIndex)
> +        verifyClass(IInjection, CaseInsensitiveKeywordIndex)
> +        verifyClass(IStatistics, CaseInsensitiveKeywordIndex)
> +        verifyClass(IIndexSearch, CaseInsensitiveKeywordIndex)
> +        verifyClass(IKeywordQuerying, CaseInsensitiveKeywordIndex)
>  
>      def test_empty_index(self):
>          self.assertEqual(self.index.documentCount(), 0)
> @@ -167,7 +166,7 @@
>      from BTrees.LFBTree import LFSet as IFSet
>  
>      def setUp(self):
> -        self.index = KeywordIndex(family=BTrees.family64)
> +        self.index = CaseInsensitiveKeywordIndex(family=BTrees.family64)
>  
>  
>  def test_suite():
> 
> _______________________________________________
> Checkins mailing list
> Checkins at zope.org
> http://mail.zope.org/mailman/listinfo/checkins
> 



More information about the Checkins mailing list