[Zope3-checkins] CVS: Zope3/lib/python/Zope/TextIndex/tests - testTextIndexWrapper.py:1.2

Guido van Rossum guido@python.org
Wed, 4 Dec 2002 03:32:19 -0500


Update of /cvs-repository/Zope3/lib/python/Zope/TextIndex/tests
In directory cvs.zope.org:/tmp/cvs-serv481

Modified Files:
	testTextIndexWrapper.py 
Log Message:
Add tests for Latin-1 and Unicode words.


=== Zope3/lib/python/Zope/TextIndex/tests/testTextIndexWrapper.py 1.1 => 1.2 ===
--- Zope3/lib/python/Zope/TextIndex/tests/testTextIndexWrapper.py:1.1	Tue Dec  3 11:45:23 2002
+++ Zope3/lib/python/Zope/TextIndex/tests/testTextIndexWrapper.py	Wed Dec  4 03:32:19 2002
@@ -19,15 +19,16 @@
 from unittest import TestCase, TestSuite, main, makeSuite
 
 from Zope.TextIndex.TextIndexWrapper import TextIndexWrapper
+from Zope.TextIndex import ParseTree
 
 class Test(TestCase):
 
     def setUp(self):
         w = TextIndexWrapper()
-        doc1 = u"the quick brown fox jumps over the lazy dog"
-        doc2 = u"the brown fox and the yellow fox don't need the retriever"
-        w.index_doc(1000, [doc1])
-        w.index_doc(1001, [doc2])
+        doc = u"the quick brown fox jumps over the lazy dog"
+        w.index_doc(1000, [doc])
+        doc = u"the brown fox and the yellow fox don't need the retriever"
+        w.index_doc(1001, [doc])
         self.wrapper = w
 
     def testOne(self):
@@ -35,6 +36,43 @@
         self.assertEqual(total, 1)
         [(docid, rank)] = matches # if this fails there's a problem
         self.assertEqual(docid, 1000)
+
+    def testLatin1(self):
+        w = self.wrapper
+        doc = u"Fran\xe7ois"
+        w.index_doc(1002, [doc])
+        matches, total = self.wrapper.query(doc, 0, 10)
+        self.assertEqual(total, 1)
+        [(docid, rank)] = matches # if this fails there's a problem
+        self.assertEqual(docid, 1002)
+
+    def testUnicode(self):
+        w = self.wrapper
+        # Verbose, but easy to debug
+        delta  = u"\N{GREEK SMALL LETTER DELTA}"
+        delta += u"\N{GREEK SMALL LETTER EPSILON}"
+        delta += u"\N{GREEK SMALL LETTER LAMDA}"
+        delta += u"\N{GREEK SMALL LETTER TAU}"
+        delta += u"\N{GREEK SMALL LETTER ALPHA}"
+        assert delta.islower()
+        emdash = u"\N{EM DASH}"
+        assert not emdash.isalnum()
+        alpha  = u"\N{GREEK SMALL LETTER ALPHA}"
+        assert alpha.islower()
+        lamda  = u"\N{GREEK SMALL LETTER LAMDA}"
+        lamda += u"\N{GREEK SMALL LETTER ALPHA}"
+        assert lamda.islower()
+        doc = delta + emdash + alpha
+        w.index_doc(1002, [doc])
+        for word in delta, alpha:
+            matches, total = self.wrapper.query(word, 0, 10)
+            self.assertEqual(total, 1)
+            [(docid, rank)] = matches # if this fails there's a problem
+            self.assertEqual(docid, 1002)
+        self.assertRaises(ParseTree.ParseError,
+                          self.wrapper.query, emdash, 0, 10)
+        matches, total = self.wrapper.query(lamda, 0, 10)
+        self.assertEqual(total, 0)
 
     def testNone(self):
         matches, total = self.wrapper.query(u"dalmatian", 0, 10)