[Checkins] SVN: zope.index/trunk/ Refactor KeywordIndex not to do normalizing by default, as keywords can be non-string objects as well.
Dan Korostelev
nadako at gmail.com
Tue Dec 30 06:12:16 EST 2008
Log message for revision 94439:
Refactor KeywordIndex not to do normalizing by default, as keywords can be non-string objects as well.
Changed:
U zope.index/trunk/CHANGES.txt
U zope.index/trunk/src/zope/index/interfaces.py
U zope.index/trunk/src/zope/index/keyword/__init__.py
U zope.index/trunk/src/zope/index/keyword/index.py
U zope.index/trunk/src/zope/index/keyword/tests.py
-=-
Modified: zope.index/trunk/CHANGES.txt
===================================================================
--- zope.index/trunk/CHANGES.txt 2008-12-30 08:23:27 UTC (rev 94438)
+++ zope.index/trunk/CHANGES.txt 2008-12-30 11:12:16 UTC (rev 94439)
@@ -29,6 +29,28 @@
Where ``filter`` is an instance of FilteredSet.
+- IMPORTANT: KeywordIndex are now non-normalizing. Because
+ it can be useful for non-string keywords, where case-normalizing
+ doesn't make any sense. Instead, it provides the ``normalize``
+ method that can be overriden by subclasses to provide some
+ normalization.
+
+ The CaseInsensitiveKeywordIndex class is now provided that
+ do case-normalization for string-based keywords. The old
+ CaseSensitiveKeywordIndex is gone, applications should use
+ KeywordIndex for that.
+
+Looks like the KeywordIndex/TopicIndex was sort of abadonware
+and wasn't used by application developers, so after some
+discussion we decided to refactor them to make them more
+usable, optimal and compatible with other indexes and catalog.
+
+Porting application from old KeywordIndex/TopicIndex to new
+ones are rather easy and explained above, so we believe that
+it isn't a problem. Please, use zope3-users at zope.org or
+zope-dev at zope.org mailing lists, if you have any problems
+with migration.
+
Thanks Chris McDonough of repoze for supporting and useful code.
3.4.1 (2007-09-28)
Modified: zope.index/trunk/src/zope/index/interfaces.py
===================================================================
--- zope.index/trunk/src/zope/index/interfaces.py 2008-12-30 08:23:27 UTC (rev 94438)
+++ zope.index/trunk/src/zope/index/interfaces.py 2008-12-30 11:12:16 UTC (rev 94439)
@@ -90,7 +90,7 @@
If some of docids are not indexed they are skipped
from resulting iterable.
- Return an iterable of document ids. Limited by
+ Return a sorted iterable of document ids. Limited by
value of the "limit" argument and optionally
reversed, using the "reverse" argument.
"""
Modified: zope.index/trunk/src/zope/index/keyword/__init__.py
===================================================================
--- zope.index/trunk/src/zope/index/keyword/__init__.py 2008-12-30 08:23:27 UTC (rev 94438)
+++ zope.index/trunk/src/zope/index/keyword/__init__.py 2008-12-30 11:12:16 UTC (rev 94439)
@@ -1 +1 @@
-from zope.index.keyword.index import KeywordIndex, CaseSensitiveKeywordIndex
+from zope.index.keyword.index import KeywordIndex, CaseInsensitiveKeywordIndex
Modified: zope.index/trunk/src/zope/index/keyword/index.py
===================================================================
--- zope.index/trunk/src/zope/index/keyword/index.py 2008-12-30 08:23:27 UTC (rev 94438)
+++ zope.index/trunk/src/zope/index/keyword/index.py 2008-12-30 11:12:16 UTC (rev 94439)
@@ -27,11 +27,10 @@
class KeywordIndex(Persistent):
- """ A case-insensitive keyword index """
+ """Keyword index"""
+ implements(IInjection, IStatistics, IIndexSearch, IKeywordQuerying)
family = BTrees.family32
- normalize = True
- implements(IInjection, IStatistics, IIndexSearch, IKeywordQuerying)
def __init__(self, family=None):
if family is not None:
@@ -61,6 +60,15 @@
def has_doc(self, docid):
return bool(self._rev_index.has_key(docid))
+ def normalize(self, seq):
+ """Perform normalization on sequence of keywords.
+
+ Return normalized sequence. This method may be
+ overriden by subclasses.
+
+ """
+ return seq
+
def index_doc(self, docid, seq):
if isinstance(seq, basestring):
raise TypeError('seq argument must be a list/tuple of strings')
@@ -68,8 +76,7 @@
if not seq:
return
- if self.normalize:
- seq = [w.lower() for w in seq]
+ seq = self.normalize(seq)
old_kw = self._rev_index.get(docid, None)
new_kw = self.family.OO.Set(seq)
@@ -131,8 +138,7 @@
if isinstance(query, basestring):
query = [query]
- if self.normalize:
- query = [w.lower() for w in query]
+ query = self.normalize(query)
sets = []
for word in query:
@@ -166,6 +172,8 @@
query = query['query']
return self.search(query, operator=operator)
-class CaseSensitiveKeywordIndex(KeywordIndex):
- """ A case-sensitive keyword index """
- normalize = False
+class CaseInsensitiveKeywordIndex(KeywordIndex):
+ """A case-normalizing keyword index (for strings as keywords)"""
+
+ def normalize(self, seq):
+ return [w.lower() for w in seq]
Modified: zope.index/trunk/src/zope/index/keyword/tests.py
===================================================================
--- zope.index/trunk/src/zope/index/keyword/tests.py 2008-12-30 08:23:27 UTC (rev 94438)
+++ zope.index/trunk/src/zope/index/keyword/tests.py 2008-12-30 11:12:16 UTC (rev 94439)
@@ -16,8 +16,8 @@
import BTrees
-from zope.index.keyword.index import KeywordIndex
-from zope.index.interfaces import IInjection, IStatistics
+from zope.index.keyword.index import CaseInsensitiveKeywordIndex
+from zope.index.interfaces import IInjection, IStatistics, IIndexSearch
from zope.index.keyword.interfaces import IKeywordQuerying
from zope.interface.verify import verifyClass
@@ -26,7 +26,7 @@
from BTrees.IFBTree import IFSet
def setUp(self):
- self.index = KeywordIndex()
+ self.index = CaseInsensitiveKeywordIndex()
def _populate_index(self):
@@ -38,13 +38,11 @@
def _search(self, query, expected, mode='and'):
-
results = self.index.search(query, mode)
# results and expected are IFSets() but we can not
# compare them directly since __eq__() does not seem
# to be implemented for BTrees
-
self.assertEqual(results.keys(), expected.keys())
def _search_and(self, query, expected):
@@ -66,9 +64,10 @@
self.assertEqual(results.keys(), expected.keys())
def test_interface(self):
- verifyClass(IInjection, KeywordIndex)
- verifyClass(IStatistics, KeywordIndex)
- verifyClass(IKeywordQuerying, KeywordIndex)
+ verifyClass(IInjection, CaseInsensitiveKeywordIndex)
+ verifyClass(IStatistics, CaseInsensitiveKeywordIndex)
+ verifyClass(IIndexSearch, CaseInsensitiveKeywordIndex)
+ verifyClass(IKeywordQuerying, CaseInsensitiveKeywordIndex)
def test_empty_index(self):
self.assertEqual(self.index.documentCount(), 0)
@@ -167,7 +166,7 @@
from BTrees.LFBTree import LFSet as IFSet
def setUp(self):
- self.index = KeywordIndex(family=BTrees.family64)
+ self.index = CaseInsensitiveKeywordIndex(family=BTrees.family64)
def test_suite():
More information about the Checkins
mailing list