[Zope-Checkins] CVS: Zope/lib/python/Products/ZCTextIndex/tests - mhindex.py:1.16.8.1 testIndex.py:1.11.10.1 testLexicon.py:1.3.10.3 testStopper.py:1.2.10.1 testZCTextIndex.py:1.36.6.1

Casey Duncan casey@zope.com
Thu, 5 Jun 2003 16:37:06 -0400


Update of /cvs-repository/Zope/lib/python/Products/ZCTextIndex/tests
In directory cvs.zope.org:/tmp/cvs-serv28323/lib/python/Products/ZCTextIndex/tests

Modified Files:
      Tag: Zope-2_6-branch
	mhindex.py testIndex.py testLexicon.py testStopper.py 
	testZCTextIndex.py 
Log Message:
Backport casey-zctextindex-fewer-conflicts-branch:

  - Indexes and Lexicon now much less likely to generate write conflicts.
    Previously *any* concurrent index/unindex operation would conflict

  - Performance and scalability fix for queries



=== Zope/lib/python/Products/ZCTextIndex/tests/mhindex.py 1.16 => 1.16.8.1 ===
--- Zope/lib/python/Products/ZCTextIndex/tests/mhindex.py:1.16	Wed Jul 10 14:02:09 2002
+++ Zope/lib/python/Products/ZCTextIndex/tests/mhindex.py	Thu Jun  5 16:37:05 2003
@@ -441,8 +441,6 @@
             self.updatefolder(f, f.listmessages())
             print "Total", len(self.docpaths)
         self.commit()
-        print "Indexed", self.index.lexicon._nbytes, "bytes and",
-        print self.index.lexicon._nwords, "words;",
         print len(self.index.lexicon._words), "unique words."
 
     def updatefolder(self, f, msgs):
@@ -468,7 +466,7 @@
             self.index.index_text(docid, text)
             self.maycommit()
         # Remove messages from the folder that no longer exist
-        for path in self.path2docid.keys(f.name):
+        for path in list(self.path2docid.keys(f.name)):
             if not path.startswith(f.name + "/"):
                 break
             if self.getmtime(path) == 0:
@@ -544,7 +542,7 @@
             st = os.stat(path)
         except os.error, msg:
             return 0
-        return st[ST_MTIME]
+        return int(st[ST_MTIME])
 
     def maycommit(self):
         self.trans_count += 1


=== Zope/lib/python/Products/ZCTextIndex/tests/testIndex.py 1.11 => 1.11.10.1 ===
--- Zope/lib/python/Products/ZCTextIndex/tests/testIndex.py:1.11	Wed Jun 12 17:45:53 2002
+++ Zope/lib/python/Products/ZCTextIndex/tests/testIndex.py	Thu Jun  5 16:37:05 2003
@@ -12,8 +12,10 @@
 #
 ##############################################################################
 
+import os
 from unittest import TestCase, TestSuite, main, makeSuite
 
+from BTrees.Length import Length
 from Products.ZCTextIndex.Lexicon import Lexicon, Splitter
 from Products.ZCTextIndex.CosineIndex import CosineIndex
 from Products.ZCTextIndex.OkapiIndex import OkapiIndex
@@ -34,6 +36,8 @@
         self.assert_(self.index.has_doc(DOCID))
         self.assert_(self.index._docweight[DOCID])
         self.assertEqual(len(self.index._docweight), 1)
+        self.assertEqual(
+            len(self.index._docweight), self.index.document_count())
         self.assertEqual(len(self.index._wordinfo), 5)
         self.assertEqual(len(self.index._docwords), 1)
         self.assertEqual(len(self.index.get_words(DOCID)), 5)
@@ -48,6 +52,8 @@
         self.test_index_document(DOCID)
         self.index.unindex_doc(DOCID)
         self.assertEqual(len(self.index._docweight), 0)
+        self.assertEqual(
+            len(self.index._docweight), self.index.document_count())
         self.assertEqual(len(self.index._wordinfo), 0)
         self.assertEqual(len(self.index._docwords), 0)
         self.assertEqual(len(self.index._wordinfo),
@@ -60,6 +66,8 @@
         self.index.index_doc(DOCID, doc)
         self.assert_(self.index._docweight[DOCID])
         self.assertEqual(len(self.index._docweight), 2)
+        self.assertEqual(
+            len(self.index._docweight), self.index.document_count())
         self.assertEqual(len(self.index._wordinfo), 8)
         self.assertEqual(len(self.index._docwords), 2)
         self.assertEqual(len(self.index.get_words(DOCID)), 4)
@@ -82,6 +90,8 @@
         self.index.unindex_doc(1)
         DOCID = 2
         self.assertEqual(len(self.index._docweight), 1)
+        self.assertEqual(
+            len(self.index._docweight), self.index.document_count())
         self.assert_(self.index._docweight[DOCID])
         self.assertEqual(len(self.index._wordinfo), 4)
         self.assertEqual(len(self.index._docwords), 1)
@@ -101,6 +111,8 @@
         self.assertEqual(len(self.index.get_words(DOCID)), 7)
         self.assertEqual(len(self.index._wordinfo),
                          self.index.length())
+        self.assertEqual(
+            len(self.index._docweight), self.index.document_count())
         wids = self.lexicon.termToWordIds("repeat")
         self.assertEqual(len(wids), 1)
         repititive_wid = wids[0]
@@ -145,9 +157,130 @@
 class OkapiIndexTest(IndexTest):
     IndexFactory = OkapiIndex
 
+class TestIndexConflict(TestCase):
+    
+    storage = None
+
+    def tearDown(self):
+        if self.storage is not None:
+            self.storage.close()
+
+    def openDB(self):
+        from ZODB.FileStorage import FileStorage
+        from ZODB.DB import DB
+        n = 'fs_tmp__%s' % os.getpid()
+        self.storage = FileStorage(n)
+        self.db = DB(self.storage)
+        
+    def test_index_doc_conflict(self):
+        self.index = OkapiIndex(Lexicon())
+        self.openDB()
+        r1 = self.db.open().root()
+        r1['i'] = self.index
+        get_transaction().commit()
+        
+        r2 = self.db.open().root()
+        copy = r2['i']
+        # Make sure the data is loaded
+        list(copy._docweight.items())
+        list(copy._docwords.items())
+        list(copy._wordinfo.items())
+        list(copy._lexicon._wids.items())
+        list(copy._lexicon._words.items())
+        
+        self.assertEqual(self.index._p_serial, copy._p_serial)
+        
+        self.index.index_doc(0, 'The time has come')
+        get_transaction().commit()
+        
+        copy.index_doc(1, 'That time has gone')
+        get_transaction().commit()
+
+    def test_reindex_doc_conflict(self):
+        self.index = OkapiIndex(Lexicon())
+        self.index.index_doc(0, 'Sometimes change is good')
+        self.index.index_doc(1, 'Then again, who asked')
+        self.openDB()
+        r1 = self.db.open().root()
+        r1['i'] = self.index
+        get_transaction().commit()
+        
+        r2 = self.db.open().root()
+        copy = r2['i']
+        # Make sure the data is loaded
+        list(copy._docweight.items())
+        list(copy._docwords.items())
+        list(copy._wordinfo.items())
+        list(copy._lexicon._wids.items())
+        list(copy._lexicon._words.items())
+        
+        self.assertEqual(self.index._p_serial, copy._p_serial)
+        
+        self.index.index_doc(0, 'Sometimes change isn\'t bad')
+        get_transaction().commit()
+        
+        copy.index_doc(1, 'Then again, who asked you?')
+        get_transaction().commit()
+        
+class TestUpgrade(TestCase):
+
+    def test_query_before_totaldoclen_upgrade(self):
+        self.index1 = OkapiIndex(Lexicon(Splitter()))
+        self.index1.index_doc(0, 'The quiet of night')
+        # Revert index1 back to a long to simulate an older index instance
+        self.index1._totaldoclen = long(self.index1._totaldoclen())
+        self.assertEqual(len(self.index1.search('night')), 1)
+    
+    def test_upgrade_totaldoclen(self):
+        self.index1 = OkapiIndex(Lexicon())
+        self.index2 = OkapiIndex(Lexicon())
+        self.index1.index_doc(0, 'The quiet of night')
+        self.index2.index_doc(0, 'The quiet of night')
+        # Revert index1 back to a long to simulate an older index instance
+        self.index1._totaldoclen = long(self.index1._totaldoclen())
+        self.index1.index_doc(1, 'gazes upon my shadow')
+        self.index2.index_doc(1, 'gazes upon my shadow')
+        self.assertEqual(
+            self.index1._totaldoclen(), self.index2._totaldoclen())
+        self.index1._totaldoclen = long(self.index1._totaldoclen())
+        self.index1.unindex_doc(0)
+        self.index2.unindex_doc(0)
+        self.assertEqual(
+            self.index1._totaldoclen(), self.index2._totaldoclen())
+
+    def test_query_before_document_count_upgrade(self):
+        self.index1 = OkapiIndex(Lexicon(Splitter()))
+        self.index1.index_doc(0, 'The quiet of night')
+        # Revert index1 back to a long to simulate an older index instance
+        del self.index1.document_count
+        self.assertEqual(len(self.index1.search('night')), 1)
+    
+    def test_upgrade_document_count(self):
+        self.index1 = OkapiIndex(Lexicon())
+        self.index2 = OkapiIndex(Lexicon())
+        self.index1.index_doc(0, 'The quiet of night')
+        self.index2.index_doc(0, 'The quiet of night')
+        # Revert index1 back to simulate an older index instance
+        del self.index1.document_count
+        self.index1.index_doc(1, 'gazes upon my shadow')
+        self.index2.index_doc(1, 'gazes upon my shadow')
+        self.assert_(self.index1.document_count.__class__ is Length)
+        self.assertEqual(
+            self.index1.document_count(), self.index2.document_count())
+        del self.index1.document_count
+        self.index1.unindex_doc(0)
+        self.index2.unindex_doc(0)
+        self.assert_(self.index1.document_count.__class__ is Length)
+        self.assertEqual(
+            self.index1.document_count(), self.index2.document_count())
+        
+        
+        
 def test_suite():
     return TestSuite((makeSuite(CosineIndexTest),
                       makeSuite(OkapiIndexTest),
+                      makeSuite(TestIndexConflict),
+                      makeSuite(TestUpgrade),
                     ))
 
 if __name__=='__main__':


=== Zope/lib/python/Products/ZCTextIndex/tests/testLexicon.py 1.3.10.2 => 1.3.10.3 ===
--- Zope/lib/python/Products/ZCTextIndex/tests/testLexicon.py:1.3.10.2	Thu Dec 19 10:37:36 2002
+++ Zope/lib/python/Products/ZCTextIndex/tests/testLexicon.py	Thu Jun  5 16:37:05 2003
@@ -12,9 +12,11 @@
 #
 ##############################################################################
 
-import sys
+import os, sys
 from unittest import TestCase, TestSuite, main, makeSuite
 
+import ZODB
+
 from Products.ZCTextIndex.Lexicon import Lexicon
 from Products.ZCTextIndex.Lexicon import Splitter, CaseNormalizer
 
@@ -134,9 +136,59 @@
         words = HTMLWordSplitter().process(words)
         self.assertEqual(words, expected)
         locale.setlocale(locale.LC_ALL, loc) # restore saved locale
+        
+    def testUpgradeLength(self):
+        from BTrees.Length import Length
+        lexicon = Lexicon(Splitter())
+        del lexicon.length # Older instances don't override length
+        lexicon.sourceToWordIds('how now brown cow')
+        self.assert_(lexicon.length.__class__ is Length)        
+        
+class TestLexiconConflict(TestCase):
+    
+    storage = None
+
+    def tearDown(self):
+        if self.storage is not None:
+            self.storage.close()
+
+    def openDB(self):
+        from ZODB.FileStorage import FileStorage
+        from ZODB.DB import DB
+        n = 'fs_tmp__%s' % os.getpid()
+        self.storage = FileStorage(n)
+        self.db = DB(self.storage)
+        
+    def testAddWordConflict(self):
+        self.l = Lexicon(Splitter())
+        self.openDB()
+        r1 = self.db.open().root()
+        r1['l'] = self.l
+        get_transaction().commit()
+        
+        r2 = self.db.open().root()
+        copy = r2['l']
+        # Make sure the data is loaded
+        list(copy._wids.items())
+        list(copy._words.items())
+        copy.length()
+        
+        self.assertEqual(self.l._p_serial, copy._p_serial)
+        
+        self.l.sourceToWordIds('mary had a little lamb')
+        get_transaction().commit()
+        
+        copy.sourceToWordIds('whose fleece was')
+        copy.sourceToWordIds('white as snow')
+        get_transaction().commit()
+        self.assertEqual(copy.length(), 11)
+        self.assertEqual(copy.length(), len(copy._words))
 
 def test_suite():
-    return makeSuite(Test)
+    suite = TestSuite()
+    suite.addTest(makeSuite(Test))
+    suite.addTest(makeSuite(TestLexiconConflict))
+    return suite
 
 if __name__=='__main__':
     main(defaultTest='test_suite')


=== Zope/lib/python/Products/ZCTextIndex/tests/testStopper.py 1.2 => 1.2.10.1 ===
--- Zope/lib/python/Products/ZCTextIndex/tests/testStopper.py:1.2	Wed May 22 12:44:54 2002
+++ Zope/lib/python/Products/ZCTextIndex/tests/testStopper.py	Thu Jun  5 16:37:05 2003
@@ -1,3 +1,16 @@
+##############################################################################
+#
+# Copyright (c) 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
 """Tests for the C version of the StopWordRemover."""
 
 import unittest


=== Zope/lib/python/Products/ZCTextIndex/tests/testZCTextIndex.py 1.36 => 1.36.6.1 ===
--- Zope/lib/python/Products/ZCTextIndex/tests/testZCTextIndex.py:1.36	Wed Aug 14 18:25:14 2002
+++ Zope/lib/python/Products/ZCTextIndex/tests/testZCTextIndex.py	Thu Jun  5 16:37:05 2003
@@ -331,7 +331,7 @@
         self._checkAbsoluteScores()
 
     def _checkAbsoluteScores(self):
-        self.assertEqual(self.index._totaldoclen, 6)
+        self.assertEqual(self.index._totaldoclen(), 6)
         # So the mean doc length is 2.  We use that later.
 
         r, num = self.zc_index.query("one")