[Zope3-checkins] CVS: zopeproducts/lucenetextindex/tests - __init__.py:1.1 test_lucenetextindex.py:1.1

Stephan Richter srichter@cosmos.phy.tufts.edu
Tue, 29 Jul 2003 09:04:13 -0400


Update of /cvs-repository/zopeproducts/lucenetextindex/tests
In directory cvs.zope.org:/tmp/cvs-serv15566/tests

Added Files:
	__init__.py test_lucenetextindex.py 
Log Message:
I am proud to announce the first check-in of the Lucene Text Index for 
Zope 3. The code was fully sponsored by struktur AG (www.struktur.de).

In the next week I expect to get some changes from struktur, since they
have done some stress testing and I will maintain the code. This is a good
example on how to use the Index (especially the TextIndex) API and make 
your own implementations.


=== Added File zopeproducts/lucenetextindex/tests/__init__.py ===


=== Added File zopeproducts/lucenetextindex/tests/test_lucenetextindex.py ===
##############################################################################
#
# Copyright (c) 2002 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""Lucene Text Index Tests

$Id: test_lucenetextindex.py,v 1.1 2003/07/29 13:04:07 srichter Exp $
"""
import unittest
from zopeproducts.lucenetextindex.index import LuceneTextIndex

class LuceneServerStub:

    def __init__(self):
        self._index = {}

    def isIndexed(self, id):
        return self._index.has_key(id)

    def insertDocument(self, id, text):
        self._index[id] = text

    def updateDocument(self, id, text):
        self._index[id] = text

    def deleteDocument(self, id):
        del self._index[id]
        
    def query(self, query):
        results = []
        for id, text in self._index.items():
            if query in text:
                results.append([id, len(query)/float(len(text))])
        return results

    def getStatistics(self):
        num_docs = len(self._index)
        words = []
        for text in self._index.values():
            terms = text.split()
            for term in terms:
                if term not in words:
                    words.append(term)
        return [num_docs, len(words)]

    def _getLucene(self):
        return self

    lucene = property(_getLucene)


class MyLuceneTextIndex(LuceneTextIndex):
    """We override the connect/disconnect method, so that we do not have to
    use real XML-RPC calls."""

    def connect(self):
        assert self.url == 'http://localhost:10080/RPC2/'
        if not hasattr(self, '_v_connection') or self._v_connection is None:
            self._v_connection = LuceneServerStub()
        return self._v_connection

    def disconnect(self):
        self._v_connection = None


class LuceneTextIndexTest(unittest.TestCase):

    def setUp(self):
        w = MyLuceneTextIndex('http://localhost:10080/RPC2/')
        doc = u"the quick brown fox jumps over the lazy dog"
        w.index_doc(1000, [doc])
        doc = u"the brown fox and the yellow fox don't need the retriever"
        w.index_doc(1001, [doc])
        self.wrapper = w

    def testCounts(self):
        w = self.wrapper
        self.assertEqual(self.wrapper.documentCount(), 2)
        self.assertEqual(self.wrapper.wordCount(), 13)
        doc = u"foo bar"
        w.index_doc(1002, [doc])
        self.assertEqual(self.wrapper.documentCount(), 3)
        self.assertEqual(self.wrapper.wordCount(), 15)

    def testOne(self):
        matches, total = self.wrapper.query(u"quick brown fox", 0, 10)
        self.assertEqual(total, 1)
        [(docid, rank)] = matches # if this fails there's a problem
        self.assertEqual(docid, 1000)

    def testDefaultBatch(self):
        matches, total = self.wrapper.query(u"fox", 0)
        self.assertEqual(total, 2)
        self.assertEqual(len(matches), 2)
        matches, total = self.wrapper.query(u"fox")
        self.assertEqual(total, 2)
        self.assertEqual(len(matches), 2)
        matches, total = self.wrapper.query(u" fox", 1)
        self.assertEqual(total, 2)
        self.assertEqual(len(matches), 1)

    def testNone(self):
        matches, total = self.wrapper.query(u"dalmatian", 0, 10)
        self.assertEqual(total, 0)
        self.assertEqual(len(matches), 0)

    def testAll(self):
        matches, total = self.wrapper.query(u"brown fox", 0, 10)
        self.assertEqual(total, 2)
        self.assertEqual(len(matches), 2)
        matches.sort()
        self.assertEqual(matches[0][0], 1000)
        self.assertEqual(matches[1][0], 1001)

    def testBatching(self):
        matches1, total = self.wrapper.query(u"brown fox", 0, 1)
        self.assertEqual(total, 2)
        self.assertEqual(len(matches1), 1)
        matches2, total = self.wrapper.query(u"brown fox", 1, 1)
        self.assertEqual(total, 2)
        self.assertEqual(len(matches2), 1)
        matches = matches1 + matches2
        matches.sort()
        self.assertEqual(matches[0][0], 1000)
        self.assertEqual(matches[1][0], 1001)


def test_suite():
    return unittest.makeSuite(LuceneTextIndexTest)

if __name__=='__main__':
    unittest.main(defaultTest='test_suite')