[Checkins] SVN: Zope/trunk/ LP #142478: normalize terms passed to ``PLexicon.queryLexicon``

Tres Seaver tseaver at palladion.com
Mon Apr 12 08:44:02 EDT 2010


Log message for revision 110743:
  LP #142478:  normalize terms passed to ``PLexicon.queryLexicon``
  
  o Use the lexicon's pipeline (e.g., case flattening, stop word removal, etc.)
  
  o Forward-port from 2.12 branch.
  

Changed:
  U   Zope/trunk/doc/CHANGES.rst
  U   Zope/trunk/src/Products/ZCTextIndex/ZCTextIndex.py
  U   Zope/trunk/src/Products/ZCTextIndex/tests/testZCTextIndex.py

-=-
Modified: Zope/trunk/doc/CHANGES.rst
===================================================================
--- Zope/trunk/doc/CHANGES.rst	2010-04-12 12:26:43 UTC (rev 110742)
+++ Zope/trunk/doc/CHANGES.rst	2010-04-12 12:44:01 UTC (rev 110743)
@@ -153,6 +153,9 @@
 Bugs Fixed
 ++++++++++
 
+- LP #142478:  normalize terms passed to ``PLexicon.queryLexicon`` using
+  the lexicon's pipeline (e.g., case flattening, stop word removal, etc.)
+
 - LP #143604: Removed top-level database-quota-size from zope.conf, some
   storages support a quota option instead.
 

Modified: Zope/trunk/src/Products/ZCTextIndex/ZCTextIndex.py
===================================================================
--- Zope/trunk/src/Products/ZCTextIndex/ZCTextIndex.py	2010-04-12 12:26:43 UTC (rev 110742)
+++ Zope/trunk/src/Products/ZCTextIndex/ZCTextIndex.py	2010-04-12 12:44:01 UTC (rev 110743)
@@ -358,7 +358,7 @@
         """
         if words:
             wids = []
-            for word in words:
+            for word in self.parseTerms(words):
                 wids.extend(self.globToWordIds(word))
             words = [self.get_word(wid) for wid in wids]
         else:
@@ -384,17 +384,21 @@
             columns.append(words[i:i + rows])
             i += rows
 
-        return self._queryLexicon(self, REQUEST,
-                                  page=page,
-                                  rows=rows,
-                                  cols=cols,
-                                  start_word=start+1,
-                                  end_word=end,
-                                  word_count=word_count,
-                                  page_count=page_count,
-                                  page_range=xrange(page_count),
-                                  page_columns=columns)
+        info = dict(page=page,
+                    rows=rows,
+                    cols=cols,
+                    start_word=start+1,
+                    end_word=end,
+                    word_count=word_count,
+                    page_count=page_count,
+                    page_range=xrange(page_count),
+                    page_columns=columns)
 
+        if REQUEST is not None:
+            return self._queryLexicon(self, REQUEST, **info)
+
+        return info
+
     security.declareProtected(LexiconMgmtPerm, 'manage_main')
     manage_main = DTMLFile('dtml/manageLexicon', globals())
 

Modified: Zope/trunk/src/Products/ZCTextIndex/tests/testZCTextIndex.py
===================================================================
--- Zope/trunk/src/Products/ZCTextIndex/tests/testZCTextIndex.py	2010-04-12 12:26:43 UTC (rev 110742)
+++ Zope/trunk/src/Products/ZCTextIndex/tests/testZCTextIndex.py	2010-04-12 12:44:01 UTC (rev 110743)
@@ -245,6 +245,7 @@
                     nbest, total = self.zc_index.query(w)
                     self.assertEqual(total, 0, "did not expect to find %s" % w)
 
+
 class CosineIndexTests(ZCIndexTestsBase, testIndex.CosineIndexTest):
 
     # A fairly involved test of the ranking calculations based on
@@ -566,15 +567,146 @@
 
 class PLexiconTests(unittest.TestCase):
 
-    def test_z3interfaces(self):
+    def _getTargetClass(self):
+        from Products.ZCTextIndex.ZCTextIndex import PLexicon
+        return PLexicon
+
+    def _makeOne(self, id='testing', title='Testing', *pipeline):
+        return self._getTargetClass()(id, title, *pipeline)
+
+    def test_class_conforms_to_ILexicon(self):
         from Products.ZCTextIndex.interfaces import ILexicon
+        from zope.interface.verify import verifyClass
+        verifyClass(ILexicon, self._getTargetClass())
+
+    def test_instance_conforms_to_ILexicon(self):
+        from Products.ZCTextIndex.interfaces import ILexicon
+        from zope.interface.verify import verifyObject
+        verifyObject(ILexicon, self._makeOne())
+
+    def test_class_conforms_to_IZCLexicon(self):
         from Products.ZCTextIndex.interfaces import IZCLexicon
         from zope.interface.verify import verifyClass
+        verifyClass(IZCLexicon, self._getTargetClass())
 
-        verifyClass(ILexicon, PLexicon)
-        verifyClass(IZCLexicon, PLexicon)
+    def test_instance_conforms_to_IZCLexicon(self):
+        from Products.ZCTextIndex.interfaces import IZCLexicon
+        from zope.interface.verify import verifyObject
+        verifyObject(IZCLexicon, self._makeOne())
 
+    def test_queryLexicon_defaults_empty(self):
+        lexicon = self._makeOne()
+        info = lexicon.queryLexicon(REQUEST=None, words=None)
+        self.assertEqual(info['page'], 0)
+        self.assertEqual(info['rows'], 20)
+        self.assertEqual(info['cols'], 4)
+        self.assertEqual(info['start_word'], 1)
+        self.assertEqual(info['end_word'], 0)
+        self.assertEqual(info['word_count'], 0)
+        self.assertEqual(list(info['page_range']), [])
+        self.assertEqual(info['page_columns'], [])
 
+    def test_queryLexicon_defaults_non_empty(self):
+        WORDS = 'aaa bbb ccc ddd eee fff ggg'.split()
+        lexicon = self._makeOne()
+        lexicon.sourceToWordIds(WORDS)
+        info = lexicon.queryLexicon(REQUEST=None, words=None)
+        self.assertEqual(info['page'], 0)
+        self.assertEqual(info['rows'], 20)
+        self.assertEqual(info['cols'], 4)
+        self.assertEqual(info['start_word'], 1)
+        self.assertEqual(info['end_word'], 7)
+        self.assertEqual(info['word_count'], 7)
+        self.assertEqual(list(info['page_range']), [0])
+        self.assertEqual(info['page_columns'], [WORDS])
+
+    def test_queryLexicon_row_breaks(self):
+        WORDS = 'aaa bbb ccc ddd eee fff ggg'.split()
+        lexicon = self._makeOne()
+        lexicon.sourceToWordIds(WORDS)
+        info = lexicon.queryLexicon(REQUEST=None, words=None, rows=4)
+        self.assertEqual(info['page'], 0)
+        self.assertEqual(info['rows'], 4)
+        self.assertEqual(info['cols'], 4)
+        self.assertEqual(info['start_word'], 1)
+        self.assertEqual(info['end_word'], 7)
+        self.assertEqual(info['word_count'], 7)
+        self.assertEqual(list(info['page_range']), [0])
+        self.assertEqual(info['page_columns'], [WORDS[0:4], WORDS[4:]])
+
+    def test_queryLexicon_page_breaks(self):
+        WORDS = 'aaa bbb ccc ddd eee fff ggg'.split()
+        lexicon = self._makeOne()
+        lexicon.sourceToWordIds(WORDS)
+        info = lexicon.queryLexicon(REQUEST=None, words=None, rows=2, cols=2)
+        self.assertEqual(info['page'], 0)
+        self.assertEqual(info['rows'], 2)
+        self.assertEqual(info['cols'], 2)
+        self.assertEqual(info['start_word'], 1)
+        self.assertEqual(info['end_word'], 4)
+        self.assertEqual(info['word_count'], 7)
+        self.assertEqual(list(info['page_range']), [0, 1])
+        self.assertEqual(info['page_columns'], [WORDS[0:2], WORDS[2:4]])
+
+    def test_queryLexicon_page_break_not_first(self):
+        WORDS = 'aaa bbb ccc ddd eee fff ggg'.split()
+        lexicon = self._makeOne()
+        lexicon.sourceToWordIds(WORDS)
+        info = lexicon.queryLexicon(REQUEST=None, words=None,
+                                    page=1, rows=2, cols=2)
+        self.assertEqual(info['page'], 1)
+        self.assertEqual(info['rows'], 2)
+        self.assertEqual(info['cols'], 2)
+        self.assertEqual(info['start_word'], 5)
+        self.assertEqual(info['end_word'], 7)
+        self.assertEqual(info['word_count'], 7)
+        self.assertEqual(list(info['page_range']), [0, 1])
+        self.assertEqual(info['page_columns'], [WORDS[4:6], WORDS[6:]])
+
+    def test_queryLexicon_words_no_globbing(self):
+        WORDS = 'aaa bbb ccc ddd eee fff ggg'.split()
+        lexicon = self._makeOne()
+        lexicon.sourceToWordIds(WORDS)
+        info = lexicon.queryLexicon(REQUEST=None, words=['aaa', 'bbb'])
+        self.assertEqual(info['page'], 0)
+        self.assertEqual(info['rows'], 20)
+        self.assertEqual(info['cols'], 4)
+        self.assertEqual(info['start_word'], 1)
+        self.assertEqual(info['end_word'], 2)
+        self.assertEqual(info['word_count'], 2)
+        self.assertEqual(list(info['page_range']), [0])
+        self.assertEqual(info['page_columns'], [['aaa', 'bbb']])
+
+    def test_queryLexicon_words_w_globbing(self):
+        WORDS = 'aaa bbb ccc ddd eee fff ggg'.split()
+        lexicon = self._makeOne()
+        lexicon.sourceToWordIds(WORDS)
+        info = lexicon.queryLexicon(REQUEST=None, words=['aa*', 'bbb*'])
+        self.assertEqual(info['page'], 0)
+        self.assertEqual(info['rows'], 20)
+        self.assertEqual(info['cols'], 4)
+        self.assertEqual(info['start_word'], 1)
+        self.assertEqual(info['end_word'], 2)
+        self.assertEqual(info['word_count'], 2)
+        self.assertEqual(list(info['page_range']), [0])
+        self.assertEqual(info['page_columns'], [['aaa', 'bbb']])
+
+    def test_queryLexicon_uses_pipeline_for_normalization(self):
+        from Products.ZCTextIndex.Lexicon import CaseNormalizer
+        WORDS = 'aaa bbb ccc ddd eee fff ggg'.split()
+        lexicon = self._makeOne('test', 'Testing', CaseNormalizer())
+        lexicon.sourceToWordIds(WORDS)
+        info = lexicon.queryLexicon(REQUEST=None, words=['AA*', 'Bbb*'])
+        self.assertEqual(info['page'], 0)
+        self.assertEqual(info['rows'], 20)
+        self.assertEqual(info['cols'], 4)
+        self.assertEqual(info['start_word'], 1)
+        self.assertEqual(info['end_word'], 2)
+        self.assertEqual(info['word_count'], 2)
+        self.assertEqual(list(info['page_range']), [0])
+        self.assertEqual(info['page_columns'], [['aaa', 'bbb']])
+
+
 def test_suite():
     s = unittest.TestSuite()
     for klass in (CosineIndexTests, OkapiIndexTests,



More information about the checkins mailing list