[Zope-Checkins] CVS: Zope/lib/python/Products/ZCTextIndex/tests - mhindex.py:1.19 testIndex.py:1.12 testLexicon.py:1.6 testZCTextIndex.py:1.37

Casey Duncan casey@zope.com
Thu, 5 Jun 2003 15:43:56 -0400


Update of /cvs-repository/Zope/lib/python/Products/ZCTextIndex/tests
In directory cvs.zope.org:/tmp/cvs-serv20404/tests

Modified Files:
	mhindex.py testIndex.py testLexicon.py testZCTextIndex.py 
Log Message:
Merge casey-zctextindex-fewer-conflicts-branch:

  - Indexes and Lexicon now much less likely to generate write conflicts. 
    Previously *any* concurrent index/unindex operation would conflict

  - Performance and scalability fix for queries


=== Zope/lib/python/Products/ZCTextIndex/tests/mhindex.py 1.18 => 1.19 ===
--- Zope/lib/python/Products/ZCTextIndex/tests/mhindex.py:1.18	Tue Dec  3 16:30:28 2002
+++ Zope/lib/python/Products/ZCTextIndex/tests/mhindex.py	Thu Jun  5 15:43:55 2003
@@ -441,8 +441,6 @@
             self.updatefolder(f, f.listmessages())
             print "Total", len(self.docpaths)
         self.commit()
-        print "Indexed", self.index.lexicon._nbytes, "bytes and",
-        print self.index.lexicon._nwords, "words;",
         print len(self.index.lexicon._words), "unique words."
 
     def updatefolder(self, f, msgs):


=== Zope/lib/python/Products/ZCTextIndex/tests/testIndex.py 1.11 => 1.12 ===
--- Zope/lib/python/Products/ZCTextIndex/tests/testIndex.py:1.11	Wed Jun 12 17:45:53 2002
+++ Zope/lib/python/Products/ZCTextIndex/tests/testIndex.py	Thu Jun  5 15:43:55 2003
@@ -12,8 +12,10 @@
 #
 ##############################################################################
 
+import os
 from unittest import TestCase, TestSuite, main, makeSuite
 
+from BTrees.Length import Length
 from Products.ZCTextIndex.Lexicon import Lexicon, Splitter
 from Products.ZCTextIndex.CosineIndex import CosineIndex
 from Products.ZCTextIndex.OkapiIndex import OkapiIndex
@@ -34,6 +36,8 @@
         self.assert_(self.index.has_doc(DOCID))
         self.assert_(self.index._docweight[DOCID])
         self.assertEqual(len(self.index._docweight), 1)
+        self.assertEqual(
+            len(self.index._docweight), self.index.document_count())
         self.assertEqual(len(self.index._wordinfo), 5)
         self.assertEqual(len(self.index._docwords), 1)
         self.assertEqual(len(self.index.get_words(DOCID)), 5)
@@ -48,6 +52,8 @@
         self.test_index_document(DOCID)
         self.index.unindex_doc(DOCID)
         self.assertEqual(len(self.index._docweight), 0)
+        self.assertEqual(
+            len(self.index._docweight), self.index.document_count())
         self.assertEqual(len(self.index._wordinfo), 0)
         self.assertEqual(len(self.index._docwords), 0)
         self.assertEqual(len(self.index._wordinfo),
@@ -60,6 +66,8 @@
         self.index.index_doc(DOCID, doc)
         self.assert_(self.index._docweight[DOCID])
         self.assertEqual(len(self.index._docweight), 2)
+        self.assertEqual(
+            len(self.index._docweight), self.index.document_count())
         self.assertEqual(len(self.index._wordinfo), 8)
         self.assertEqual(len(self.index._docwords), 2)
         self.assertEqual(len(self.index.get_words(DOCID)), 4)
@@ -82,6 +90,8 @@
         self.index.unindex_doc(1)
         DOCID = 2
         self.assertEqual(len(self.index._docweight), 1)
+        self.assertEqual(
+            len(self.index._docweight), self.index.document_count())
         self.assert_(self.index._docweight[DOCID])
         self.assertEqual(len(self.index._wordinfo), 4)
         self.assertEqual(len(self.index._docwords), 1)
@@ -101,6 +111,8 @@
         self.assertEqual(len(self.index.get_words(DOCID)), 7)
         self.assertEqual(len(self.index._wordinfo),
                          self.index.length())
+        self.assertEqual(
+            len(self.index._docweight), self.index.document_count())
         wids = self.lexicon.termToWordIds("repeat")
         self.assertEqual(len(wids), 1)
         repititive_wid = wids[0]
@@ -145,9 +157,130 @@
 class OkapiIndexTest(IndexTest):
     IndexFactory = OkapiIndex
 
+class TestIndexConflict(TestCase):
+    
+    storage = None
+
+    def tearDown(self):
+        if self.storage is not None:
+            self.storage.close()
+
+    def openDB(self):
+        from ZODB.FileStorage import FileStorage
+        from ZODB.DB import DB
+        n = 'fs_tmp__%s' % os.getpid()
+        self.storage = FileStorage(n)
+        self.db = DB(self.storage)
+        
+    def test_index_doc_conflict(self):
+        self.index = OkapiIndex(Lexicon())
+        self.openDB()
+        r1 = self.db.open().root()
+        r1['i'] = self.index
+        get_transaction().commit()
+        
+        r2 = self.db.open().root()
+        copy = r2['i']
+        # Make sure the data is loaded
+        list(copy._docweight.items())
+        list(copy._docwords.items())
+        list(copy._wordinfo.items())
+        list(copy._lexicon._wids.items())
+        list(copy._lexicon._words.items())
+        
+        self.assertEqual(self.index._p_serial, copy._p_serial)
+        
+        self.index.index_doc(0, 'The time has come')
+        get_transaction().commit()
+        
+        copy.index_doc(1, 'That time has gone')
+        get_transaction().commit()
+
+    def test_reindex_doc_conflict(self):
+        self.index = OkapiIndex(Lexicon())
+        self.index.index_doc(0, 'Sometimes change is good')
+        self.index.index_doc(1, 'Then again, who asked')
+        self.openDB()
+        r1 = self.db.open().root()
+        r1['i'] = self.index
+        get_transaction().commit()
+        
+        r2 = self.db.open().root()
+        copy = r2['i']
+        # Make sure the data is loaded
+        list(copy._docweight.items())
+        list(copy._docwords.items())
+        list(copy._wordinfo.items())
+        list(copy._lexicon._wids.items())
+        list(copy._lexicon._words.items())
+        
+        self.assertEqual(self.index._p_serial, copy._p_serial)
+        
+        self.index.index_doc(0, 'Sometimes change isn\'t bad')
+        get_transaction().commit()
+        
+        copy.index_doc(1, 'Then again, who asked you?')
+        get_transaction().commit()
+        
+class TestUpgrade(TestCase):
+
+    def test_query_before_totaldoclen_upgrade(self):
+        self.index1 = OkapiIndex(Lexicon(Splitter()))
+        self.index1.index_doc(0, 'The quiet of night')
+        # Revert index1 back to a long to simulate an older index instance
+        self.index1._totaldoclen = long(self.index1._totaldoclen())
+        self.assertEqual(len(self.index1.search('night')), 1)
+    
+    def test_upgrade_totaldoclen(self):
+        self.index1 = OkapiIndex(Lexicon())
+        self.index2 = OkapiIndex(Lexicon())
+        self.index1.index_doc(0, 'The quiet of night')
+        self.index2.index_doc(0, 'The quiet of night')
+        # Revert index1 back to a long to simulate an older index instance
+        self.index1._totaldoclen = long(self.index1._totaldoclen())
+        self.index1.index_doc(1, 'gazes upon my shadow')
+        self.index2.index_doc(1, 'gazes upon my shadow')
+        self.assertEqual(
+            self.index1._totaldoclen(), self.index2._totaldoclen())
+        self.index1._totaldoclen = long(self.index1._totaldoclen())
+        self.index1.unindex_doc(0)
+        self.index2.unindex_doc(0)
+        self.assertEqual(
+            self.index1._totaldoclen(), self.index2._totaldoclen())
+
+    def test_query_before_document_count_upgrade(self):
+        self.index1 = OkapiIndex(Lexicon(Splitter()))
+        self.index1.index_doc(0, 'The quiet of night')
+        # Revert index1 back to a long to simulate an older index instance
+        del self.index1.document_count
+        self.assertEqual(len(self.index1.search('night')), 1)
+    
+    def test_upgrade_document_count(self):
+        self.index1 = OkapiIndex(Lexicon())
+        self.index2 = OkapiIndex(Lexicon())
+        self.index1.index_doc(0, 'The quiet of night')
+        self.index2.index_doc(0, 'The quiet of night')
+        # Revert index1 back to simulate an older index instance
+        del self.index1.document_count
+        self.index1.index_doc(1, 'gazes upon my shadow')
+        self.index2.index_doc(1, 'gazes upon my shadow')
+        self.assert_(self.index1.document_count.__class__ is Length)
+        self.assertEqual(
+            self.index1.document_count(), self.index2.document_count())
+        del self.index1.document_count
+        self.index1.unindex_doc(0)
+        self.index2.unindex_doc(0)
+        self.assert_(self.index1.document_count.__class__ is Length)
+        self.assertEqual(
+            self.index1.document_count(), self.index2.document_count())
+        
+        
+        
 def test_suite():
     return TestSuite((makeSuite(CosineIndexTest),
                       makeSuite(OkapiIndexTest),
+                      makeSuite(TestIndexConflict),
+                      makeSuite(TestUpgrade),
                     ))
 
 if __name__=='__main__':


=== Zope/lib/python/Products/ZCTextIndex/tests/testLexicon.py 1.5 => 1.6 ===
--- Zope/lib/python/Products/ZCTextIndex/tests/testLexicon.py:1.5	Thu Dec 19 10:39:29 2002
+++ Zope/lib/python/Products/ZCTextIndex/tests/testLexicon.py	Thu Jun  5 15:43:55 2003
@@ -12,9 +12,11 @@
 #
 ##############################################################################
 
-import sys
+import os, sys
 from unittest import TestCase, TestSuite, main, makeSuite
 
+import ZODB
+
 from Products.ZCTextIndex.Lexicon import Lexicon
 from Products.ZCTextIndex.Lexicon import Splitter, CaseNormalizer
 
@@ -134,9 +136,59 @@
         words = HTMLWordSplitter().process(words)
         self.assertEqual(words, expected)
         locale.setlocale(locale.LC_ALL, loc) # restore saved locale
+        
+    def testUpgradeLength(self):
+        from BTrees.Length import Length
+        lexicon = Lexicon(Splitter())
+        del lexicon.length # Older instances don't override length
+        lexicon.sourceToWordIds('how now brown cow')
+        self.assert_(lexicon.length.__class__ is Length)        
+        
+class TestLexiconConflict(TestCase):
+    
+    storage = None
+
+    def tearDown(self):
+        if self.storage is not None:
+            self.storage.close()
+
+    def openDB(self):
+        from ZODB.FileStorage import FileStorage
+        from ZODB.DB import DB
+        n = 'fs_tmp__%s' % os.getpid()
+        self.storage = FileStorage(n)
+        self.db = DB(self.storage)
+        
+    def testAddWordConflict(self):
+        self.l = Lexicon(Splitter())
+        self.openDB()
+        r1 = self.db.open().root()
+        r1['l'] = self.l
+        get_transaction().commit()
+        
+        r2 = self.db.open().root()
+        copy = r2['l']
+        # Make sure the data is loaded
+        list(copy._wids.items())
+        list(copy._words.items())
+        copy.length()
+        
+        self.assertEqual(self.l._p_serial, copy._p_serial)
+        
+        self.l.sourceToWordIds('mary had a little lamb')
+        get_transaction().commit()
+        
+        copy.sourceToWordIds('whose fleece was')
+        copy.sourceToWordIds('white as snow')
+        get_transaction().commit()
+        self.assertEqual(copy.length(), 11)
+        self.assertEqual(copy.length(), len(copy._words))
 
 def test_suite():
-    return makeSuite(Test)
+    suite = TestSuite()
+    suite.addTest(makeSuite(Test))
+    suite.addTest(makeSuite(TestLexiconConflict))
+    return suite
 
 if __name__=='__main__':
     main(defaultTest='test_suite')


=== Zope/lib/python/Products/ZCTextIndex/tests/testZCTextIndex.py 1.36 => 1.37 ===
--- Zope/lib/python/Products/ZCTextIndex/tests/testZCTextIndex.py:1.36	Wed Aug 14 18:25:14 2002
+++ Zope/lib/python/Products/ZCTextIndex/tests/testZCTextIndex.py	Thu Jun  5 15:43:55 2003
@@ -331,7 +331,7 @@
         self._checkAbsoluteScores()
 
     def _checkAbsoluteScores(self):
-        self.assertEqual(self.index._totaldoclen, 6)
+        self.assertEqual(self.index._totaldoclen(), 6)
         # So the mean doc length is 2.  We use that later.
 
         r, num = self.zc_index.query("one")