[Checkins] SVN: zope.index/trunk/src/zope/index/text/ 100% test coverage for zope.index.text.lexicon module.

Wed Jun 10 19:37:59 EDT 2009

Log message for revision 100819:
  100% test coverage for zope.index.text.lexicon module.
  
  Add interfaces for pipeline elements.
  
  Remove broken cruft in StopWordRemover.process.
  
  

Changed:
  U   zope.index/trunk/src/zope/index/text/interfaces.py
  U   zope.index/trunk/src/zope/index/text/lexicon.py
  U   zope.index/trunk/src/zope/index/text/tests/test_htmlsplitter.py
  U   zope.index/trunk/src/zope/index/text/tests/test_lexicon.py

-=-
Modified: zope.index/trunk/src/zope/index/text/interfaces.py
===================================================================

--- zope.index/trunk/src/zope/index/text/interfaces.py	2009-06-10 22:29:44 UTC (rev 100818)
+++ zope.index/trunk/src/zope/index/text/interfaces.py	2009-06-10 23:37:59 UTC (rev 100819)
@@ -74,7 +74,7 @@
     def isGlob(word):
         """Return true if the word is a globbing pattern.
 
-        The word should be one of the words returned by parseTerm().
+        The word should be one of the words returned by parseTerms().
         """
 
 class IQueryParser(Interface):
@@ -161,13 +161,23 @@
         returning None indicates the object should not be indexed
         """
 
-class ISplitter(Interface):
-    """A splitter."""
+class IPipelineElement(Interface):
+    """ An element in a lexicon's processing pipeline.
+    """
+    def process(terms):
+        """ Transform each term in terms.
 
-    def process(text):
-        """Run the splitter over the input text, returning a list of terms."""
+        Return the sequence of transformed terms.
+        """
 
+class ISplitter(IPipelineElement):
+    """ Split text into a sequence of words.
+    """
+    def processGlob(terms):
+        """ Transform terms, leaving globbing markers in place.
+        """
 
+
 class IExtendedQuerying(Interface):
     """An index that supports advanced search setups."""
 

Modified: zope.index/trunk/src/zope/index/text/lexicon.py
===================================================================
--- zope.index/trunk/src/zope/index/text/lexicon.py	2009-06-10 22:29:44 UTC (rev 100818)
+++ zope.index/trunk/src/zope/index/text/lexicon.py	2009-06-10 23:37:59 UTC (rev 100819)
@@ -26,6 +26,8 @@
 from persistent import Persistent
 
 from zope.index.text.interfaces import ILexicon
+from zope.index.text.interfaces import IPipelineElement
+from zope.index.text.interfaces import ISplitter
 from zope.index.text.stopdict import get_stopdict
 from zope.index.text.parsetree import QueryError
 
@@ -62,7 +64,7 @@
         last = _text2list(text)
         for element in self._pipeline:
             last = element.process(last)
-        if not hasattr(self.wordCount, 'change'):
+        if not isinstance(self.wordCount, Length):
             # Make sure wordCount is overridden with a BTrees.Length.Length
             self.wordCount = Length(self.wordCount())        
         # Strategically unload the length value so that we get the most
@@ -145,12 +147,10 @@
 
     def _new_wid(self):
         count = self.wordCount
-        try:
+        count.change(1)
+        while self._words.has_key(count()):
+            # just to be safe
             count.change(1)
-        except AttributeError:
-            count = self.wordCount = Length.Length(count())
-        while self._words.has_key(count()): # just to be safe
-            count.change(1)
         return count()
 
 def _text2list(text):
@@ -165,6 +165,7 @@
 # Sample pipeline elements
 
 class Splitter(object):
+    implements(ISplitter)
 
     rx = re.compile(r"(?u)\w+")
     rxGlob = re.compile(r"(?u)\w+[\w*?]*") # See globToWordIds() above
@@ -182,23 +183,19 @@
         return result
 
 class CaseNormalizer(object):
+    implements(IPipelineElement)
 
     def process(self, lst):
         return [w.lower() for w in lst]
 
 class StopWordRemover(object):
+    implements(IPipelineElement)
 
     dict = get_stopdict().copy()
 
-    try:
-        from zope.index.text.stopper import process as _process
-    except ImportError:
-        def process(self, lst):
-            has_key = self.dict.has_key
-            return [w for w in lst if not has_key(w)]
-    else:
-        def process(self, lst):
-            return self._process(self.dict, lst)
+    def process(self, lst):
+        has_key = self.dict.has_key
+        return [w for w in lst if not has_key(w)]
 
 class StopWordAndSingleCharRemover(StopWordRemover):
 

Modified: zope.index/trunk/src/zope/index/text/tests/test_htmlsplitter.py
===================================================================
--- zope.index/trunk/src/zope/index/text/tests/test_htmlsplitter.py	2009-06-10 22:29:44 UTC (rev 100818)
+++ zope.index/trunk/src/zope/index/text/tests/test_htmlsplitter.py	2009-06-10 23:37:59 UTC (rev 100819)
@@ -16,7 +16,13 @@
 import unittest
 
 class HTMLWordSplitterTests(unittest.TestCase):
-    # Subclasses must define '_getBTreesFamily'
+    _old_locale = None
+
+    def tearDown(self):
+        if self._old_locale is not None:
+            import locale
+            locale.setlocale(locale.LC_ALL, self._old_locale)
+
     def _getTargetClass(self):
         from zope.index.text.htmlsplitter import HTMLWordSplitter
         return HTMLWordSplitter
@@ -42,6 +48,23 @@
         splitter = self._makeOne()
         self.assertEqual(splitter.process(['abc def']), ['abc', 'def'])
 
+    def test_process_w_locale_awareness(self):
+        import locale
+        import sys
+        self._old_locale = locale.setlocale(locale.LC_ALL)
+        # set German locale
+        try:
+            if sys.platform == 'win32':
+                locale.setlocale(locale.LC_ALL, 'German_Germany.1252')
+            else:
+                locale.setlocale(locale.LC_ALL, 'de_DE.ISO8859-1')
+        except locale.Error:
+            return # This test doesn't work here :-(
+        expected = ['m\xfclltonne', 'waschb\xe4r',
+                    'beh\xf6rde', '\xfcberflieger']
+        splitter = self._makeOne()
+        self.assertEqual(splitter.process(' '.join(expected)), expected)
+
     def test_process_w_markup(self):
         splitter = self._makeOne()
         self.assertEqual(splitter.process(['<h1>abc</h1> &nbsp; <p>def</p>']),
@@ -71,6 +94,26 @@
         self.assertEqual(splitter.processGlob(['abc?def hij*klm nop* qrs?']),
                          ['abc?def', 'hij*klm', 'nop*', 'qrs?'])
 
+    def test_process_w_locale_awareness(self):
+        import locale
+        import sys
+        self._old_locale = locale.setlocale(locale.LC_ALL)
+        # set German locale
+        try:
+            if sys.platform == 'win32':
+                locale.setlocale(locale.LC_ALL, 'German_Germany.1252')
+            else:
+                locale.setlocale(locale.LC_ALL, 'de_DE.ISO8859-1')
+        except locale.Error:
+            return # This test doesn't work here :-(
+        expected = ['m\xfclltonne', 'waschb\xe4r',
+                    'beh\xf6rde', '\xfcberflieger']
+        words = [" ".join(expected)]
+        words = Splitter().process(words)
+        self.assertEqual(words, expected)
+        words = HTMLWordSplitter().process(words)
+        self.assertEqual(words, expected)
+
 def test_suite():
     return unittest.TestSuite((
         unittest.makeSuite(HTMLWordSplitterTests),

Modified: zope.index/trunk/src/zope/index/text/tests/test_lexicon.py
===================================================================
--- zope.index/trunk/src/zope/index/text/tests/test_lexicon.py	2009-06-10 22:29:44 UTC (rev 100818)
+++ zope.index/trunk/src/zope/index/text/tests/test_lexicon.py	2009-06-10 23:37:59 UTC (rev 100819)
@@ -15,12 +15,346 @@
 
 $Id$
 """
-import sys
-from unittest import TestCase, main, makeSuite
+import unittest
 
 from zope.index.text.lexicon import Lexicon
-from zope.index.text.lexicon import Splitter, CaseNormalizer
 
+
+class LexiconTests(unittest.TestCase):
+
+    def _getTargetClass(self):
+        from zope.index.text.lexicon import Lexicon
+        return Lexicon
+
+    def _makeOne(self, *pipeline):
+        from zope.index.text.lexicon import Splitter
+        pipeline = (Splitter(),) + pipeline
+        return self._getTargetClass()(*pipeline)
+
+    def test_class_conforms_to_ILexicon(self):
+        from zope.interface.verify import verifyClass
+        from zope.index.text.interfaces import ILexicon
+        verifyClass(ILexicon, self._getTargetClass())
+
+    def test_instance_conforms_to_ILexicon(self):
+        from zope.interface.verify import verifyObject
+        from zope.index.text.interfaces import ILexicon
+        verifyObject(ILexicon, self._makeOne())
+
+    def test_empty(self):
+        lexicon = self._makeOne()
+        self.assertEqual(len(lexicon.words()), 0)
+        self.assertEqual(len(lexicon.wids()), 0)
+        self.assertEqual(len(lexicon.items()), 0)
+        self.assertEqual(lexicon.wordCount(), 0)
+
+    def test_wordCount_legacy_instance_no_write_on_read(self):
+        from BTrees.Length import Length
+        lexicon = self._makeOne()
+        # Simulate old instance, which didn't have Length attr
+        del lexicon.wordCount
+        self.assertEqual(lexicon.wordCount(), 0)
+        # No write-on-read!
+        self.failIf(isinstance(lexicon.wordCount, Length))
+
+    def test_sourceToWordIds(self):
+        lexicon = self._makeOne()
+        wids = lexicon.sourceToWordIds('cats and dogs')
+        self.assertEqual(wids, [1, 2, 3])
+        self.assertEqual(lexicon.get_word(1), 'cats')
+        self.assertEqual(lexicon.get_wid('cats'), 1)
+
+    def test_sourceToWordIds_promotes_wordCount_attr(self):
+        from BTrees.Length import Length
+        lexicon = self._makeOne()
+        # Simulate old instance, which didn't have Length attr
+        del lexicon.wordCount
+        wids = lexicon.sourceToWordIds('cats and dogs')
+        self.assertEqual(wids, [1, 2, 3])
+        self.assertEqual(lexicon.wordCount(), 3)
+        self.failUnless(isinstance(lexicon.wordCount, Length))
+
+    def test_termToWordIds_hit(self):
+        lexicon = self._makeOne()
+        lexicon.sourceToWordIds('cats and dogs')
+        wids = lexicon.termToWordIds('dogs')
+        self.assertEqual(wids, [3])
+
+    def test_termToWordIds_miss(self):
+        lexicon = self._makeOne()
+        lexicon.sourceToWordIds('cats and dogs')
+        wids = lexicon.termToWordIds('boxes')
+        self.assertEqual(wids, [0])
+
+    def test_termToWordIds_w_extra_pipeline_element(self):
+        lexicon = self._makeOne(StupidPipelineElement('dogs', 'fish'))
+        lexicon.sourceToWordIds('cats and dogs')
+        wids = lexicon.termToWordIds('fish')
+        self.assertEqual(wids, [3])
+
+    def test_termToWordIds_w_case_normalizer(self):
+        from zope.index.text.lexicon import CaseNormalizer
+        lexicon = self._makeOne(CaseNormalizer())
+        lexicon.sourceToWordIds('CATS and dogs')
+        wids = lexicon.termToWordIds('cats and dogs')
+        self.assertEqual(wids, [1, 2, 3])
+
+    def test_termToWordIds_wo_case_normalizer(self):
+        lexicon = self._makeOne()
+        wids = lexicon.sourceToWordIds('CATS and dogs')
+        wids = lexicon.termToWordIds('cats and dogs')
+        self.assertEqual(wids, [0, 2, 3])
+
+    def test_termToWordIds_w_two_extra_pipeline_elements(self):
+        lexicon = self._makeOne(StupidPipelineElement('cats', 'fish'),
+                                WackyReversePipelineElement('fish'),
+                               )
+        lexicon.sourceToWordIds('cats and dogs')
+        wids = lexicon.termToWordIds('hsif')
+        self.assertEqual(wids, [1])
+
+    def test_termToWordIds_w_three_extra_pipeline_elements(self):
+        lexicon = self._makeOne(StopWordPipelineElement({'and':1}),
+                                StupidPipelineElement('dogs', 'fish'),
+                                WackyReversePipelineElement('fish'),
+                               )
+        wids = lexicon.sourceToWordIds('cats and dogs')
+        wids = lexicon.termToWordIds('hsif')
+        self.assertEqual(wids, [2])
+
+    def test_parseTerms_tuple(self):
+        TERMS = ('a', 'b*c', 'de?f')
+        lexicon = self._makeOne()
+        self.assertEqual(lexicon.parseTerms(TERMS), list(TERMS))
+
+    def test_parseTerms_list(self):
+        TERMS = ('a', 'b*c', 'de?f')
+        lexicon = self._makeOne()
+        self.assertEqual(lexicon.parseTerms(TERMS), list(TERMS))
+
+    def test_parseTerms_empty_string(self):
+        lexicon = self._makeOne()
+        self.assertEqual(lexicon.parseTerms('a b*c de?f'),
+                         ['a', 'b*c', 'de?f'])
+
+    def test_parseTerms_nonempty_string(self):
+        lexicon = self._makeOne()
+        self.assertEqual(lexicon.parseTerms(''), [])
+
+    def test_isGlob_empty(self):
+        lexicon = self._makeOne()
+        self.failIf(lexicon.isGlob(''))
+
+    def test_isGlob_miss(self):
+        lexicon = self._makeOne()
+        self.failIf(lexicon.isGlob('abc'))
+
+    def test_isGlob_question_mark(self):
+        lexicon = self._makeOne()
+        self.failUnless(lexicon.isGlob('a?c'))
+
+    def test_isGlob_asterisk(self):
+        lexicon = self._makeOne()
+        self.failUnless(lexicon.isGlob('abc*'))
+
+    def test_globToWordIds_invalid_pattern(self):
+        from zope.index.text.parsetree import QueryError
+        lexicon = self._makeOne()
+        lexicon.sourceToWordIds('cats and dogs')
+        self.assertRaises(QueryError, lexicon.globToWordIds, '*s')
+
+    def test_globToWordIds_simple_pattern(self):
+        lexicon = self._makeOne()
+        lexicon.sourceToWordIds('cats and dogs are enemies')
+        self.assertEqual(lexicon.globToWordIds('a*'), [2, 4])
+
+    def test_globToWordIds_simple_pattern(self):
+        lexicon = self._makeOne()
+        lexicon.sourceToWordIds('cats and dogs are enemies')
+        self.assertEqual(lexicon.globToWordIds('a?e'), [4])
+
+    def test_globToWordIds_prefix(self):
+        lexicon = self._makeOne()
+        lexicon.sourceToWordIds('cats and dogs are enemies')
+        self.assertEqual(lexicon.globToWordIds('are'), [4])
+
+    def test_getWordIdCreate_new(self):
+        lexicon = self._makeOne()
+        wid = lexicon._getWordIdCreate('nonesuch')
+        self.assertEqual(wid, 1)
+        self.assertEqual(lexicon.get_word(1), 'nonesuch')
+        self.assertEqual(lexicon.get_wid('nonesuch'), 1)
+
+    def test_getWordIdCreate_extant(self):
+        lexicon = self._makeOne()
+        lexicon.sourceToWordIds('cats and dogs are enemies')
+        wid = lexicon._getWordIdCreate('cats')
+        self.assertEqual(wid, 1)
+        self.assertEqual(lexicon.get_word(1), 'cats')
+        self.assertEqual(lexicon.get_wid('cats'), 1)
+
+    def test__new_wid_recovers_from_damaged_length(self):
+        lexicon = self._makeOne()
+        lexicon.sourceToWordIds('cats and dogs')
+        lexicon.wordCount.set(0)
+        wid = lexicon._new_wid()
+        self.assertEqual(wid, 4)
+        self.assertEqual(lexicon.wordCount(), 4)
+
+class SplitterTests(unittest.TestCase):
+    _old_locale = None
+
+    def tearDown(self):
+        if self._old_locale is not None:
+            import locale
+            locale.setlocale(locale.LC_ALL, self._old_locale)
+
+    def _getTargetClass(self):
+        from zope.index.text.lexicon import Splitter
+        return Splitter
+
+    def _makeOne(self):
+        return self._getTargetClass()()
+
+    def test_class_conforms_to_ISplitter(self):
+        from zope.interface.verify import verifyClass
+        from zope.index.text.interfaces import ISplitter
+        verifyClass(ISplitter, self._getTargetClass())
+
+    def test_instance_conforms_to_ISplitter(self):
+        from zope.interface.verify import verifyObject
+        from zope.index.text.interfaces import ISplitter
+        verifyObject(ISplitter, self._makeOne())
+
+    def test_process_empty_string(self):
+        splitter = self._makeOne()
+        self.assertEqual(splitter.process(['']), [])
+
+    def test_process_simple(self):
+        splitter = self._makeOne()
+        self.assertEqual(splitter.process(['abc def']), ['abc', 'def'])
+
+    def test_process_w_locale_awareness(self):
+        import locale
+        import sys
+        self._old_locale = locale.setlocale(locale.LC_ALL)
+        # set German locale
+        try:
+            if sys.platform == 'win32':
+                locale.setlocale(locale.LC_ALL, 'German_Germany.1252')
+            else:
+                locale.setlocale(locale.LC_ALL, 'de_DE.ISO8859-1')
+        except locale.Error:
+            return # This test doesn't work here :-(
+        expected = ['m\xfclltonne', 'waschb\xe4r',
+                    'beh\xf6rde', '\xfcberflieger']
+        splitter = self._makeOne()
+        self.assertEqual(splitter.process(' '.join(expected)), expected)
+
+    def test_process_w_glob(self):
+        splitter = self._makeOne()
+        self.assertEqual(splitter.process(['abc?def hij*klm nop* qrs?']),
+                         ['abc', 'def', 'hij', 'klm', 'nop', 'qrs'])
+
+    def test_processGlob_empty_string(self):
+        splitter = self._makeOne()
+        self.assertEqual(splitter.processGlob(['']), [])
+
+    def test_processGlob_simple(self):
+        splitter = self._makeOne()
+        self.assertEqual(splitter.processGlob(['abc def']), ['abc', 'def'])
+
+    def test_processGlob_w_glob(self):
+        splitter = self._makeOne()
+        self.assertEqual(splitter.processGlob(['abc?def hij*klm nop* qrs?']),
+                         ['abc?def', 'hij*klm', 'nop*', 'qrs?'])
+
+class CaseNormalizerTests(unittest.TestCase):
+
+    def _getTargetClass(self):
+        from zope.index.text.lexicon import CaseNormalizer
+        return CaseNormalizer
+
+    def _makeOne(self):
+        return self._getTargetClass()()
+
+    def test_class_conforms_to_IPipelineElement(self):
+        from zope.interface.verify import verifyClass
+        from zope.index.text.interfaces import IPipelineElement
+        verifyClass(IPipelineElement, self._getTargetClass())
+
+    def test_instance_conforms_to_IPipelineElement(self):
+        from zope.interface.verify import verifyObject
+        from zope.index.text.interfaces import IPipelineElement
+        verifyObject(IPipelineElement, self._makeOne())
+
+    def test_process_empty(self):
+        cn = self._makeOne()
+        self.assertEqual(cn.process([]), [])
+
+    def test_process_nonempty(self):
+        cn = self._makeOne()
+        self.assertEqual(cn.process(['ABC Def']), ['abc def'])
+
+class StopWordRemoverTests(unittest.TestCase):
+
+    def _getTargetClass(self):
+        from zope.index.text.lexicon import StopWordRemover
+        return StopWordRemover
+
+    def _makeOne(self):
+        return self._getTargetClass()()
+
+    def test_class_conforms_to_IPipelineElement(self):
+        from zope.interface.verify import verifyClass
+        from zope.index.text.interfaces import IPipelineElement
+        verifyClass(IPipelineElement, self._getTargetClass())
+
+    def test_instance_conforms_to_IPipelineElement(self):
+        from zope.interface.verify import verifyObject
+        from zope.index.text.interfaces import IPipelineElement
+        verifyObject(IPipelineElement, self._makeOne())
+
+    def test_process_empty(self):
+        cn = self._makeOne()
+        self.assertEqual(cn.process([]), [])
+
+    def test_process_nonempty(self):
+        QUOTE = 'The end of government is justice'
+        cn = self._makeOne()
+        self.assertEqual(cn.process(QUOTE.lower().split()),
+                         ['end', 'government', 'justice'])
+
+class StopWordAndSingleCharRemoverTests(unittest.TestCase):
+
+    def _getTargetClass(self):
+        from zope.index.text.lexicon import StopWordAndSingleCharRemover
+        return StopWordAndSingleCharRemover
+
+    def _makeOne(self):
+        return self._getTargetClass()()
+
+    def test_class_conforms_to_IPipelineElement(self):
+        from zope.interface.verify import verifyClass
+        from zope.index.text.interfaces import IPipelineElement
+        verifyClass(IPipelineElement, self._getTargetClass())
+
+    def test_instance_conforms_to_IPipelineElement(self):
+        from zope.interface.verify import verifyObject
+        from zope.index.text.interfaces import IPipelineElement
+        verifyObject(IPipelineElement, self._makeOne())
+
+    def test_process_empty(self):
+        cn = self._makeOne()
+        self.assertEqual(cn.process([]), [])
+
+    def test_process_nonempty(self):
+        QUOTE = 'The end of government is justice z x q'
+        cn = self._makeOne()
+        self.assertEqual(cn.process(QUOTE.lower().split()),
+                         ['end', 'government', 'justice'])
+
+
 class StupidPipelineElement(object):
     def __init__(self, fromword, toword):
         self.__fromword = fromword
@@ -63,83 +397,11 @@
                 res.append(term)
         return res
 
-
-class Test(TestCase):
-    def testSourceToWordIds(self):
-        lexicon = Lexicon(Splitter())
-        wids = lexicon.sourceToWordIds('cats and dogs')
-        self.assertEqual(wids, [1, 2, 3])
-
-    def testTermToWordIds(self):
-        lexicon = Lexicon(Splitter())
-        wids = lexicon.sourceToWordIds('cats and dogs')
-        wids = lexicon.termToWordIds('dogs')
-        self.assertEqual(wids, [3])
-
-    def testMissingTermToWordIds(self):
-        lexicon = Lexicon(Splitter())
-        wids = lexicon.sourceToWordIds('cats and dogs')
-        wids = lexicon.termToWordIds('boxes')
-        self.assertEqual(wids, [0])
-
-    def testOnePipelineElement(self):
-        lexicon = Lexicon(Splitter(), StupidPipelineElement('dogs', 'fish'))
-        wids = lexicon.sourceToWordIds('cats and dogs')
-        wids = lexicon.termToWordIds('fish')
-        self.assertEqual(wids, [3])
-
-    def testSplitterAdaptorFold(self):
-        lexicon = Lexicon(Splitter(), CaseNormalizer())
-        wids = lexicon.sourceToWordIds('CATS and dogs')
-        wids = lexicon.termToWordIds('cats and dogs')
-        self.assertEqual(wids, [1, 2, 3])
-
-    def testSplitterAdaptorNofold(self):
-        lexicon = Lexicon(Splitter())
-        wids = lexicon.sourceToWordIds('CATS and dogs')
-        wids = lexicon.termToWordIds('cats and dogs')
-        self.assertEqual(wids, [0, 2, 3])
-
-    def testTwoElementPipeline(self):
-        lexicon = Lexicon(Splitter(),
-                          StupidPipelineElement('cats', 'fish'),
-                          WackyReversePipelineElement('fish'))
-        wids = lexicon.sourceToWordIds('cats and dogs')
-        wids = lexicon.termToWordIds('hsif')
-        self.assertEqual(wids, [1])
-
-    def testThreeElementPipeline(self):
-        lexicon = Lexicon(Splitter(),
-                          StopWordPipelineElement({'and':1}),
-                          StupidPipelineElement('dogs', 'fish'),
-                          WackyReversePipelineElement('fish'))
-        wids = lexicon.sourceToWordIds('cats and dogs')
-        wids = lexicon.termToWordIds('hsif')
-        self.assertEqual(wids, [2])
-
-    def testSplitterLocaleAwareness(self):
-        from zope.index.text.htmlsplitter import HTMLWordSplitter
-        import locale
-        loc = locale.setlocale(locale.LC_ALL) # get current locale
-         # set German locale
-        try:
-            if sys.platform != 'win32':
-                locale.setlocale(locale.LC_ALL, 'de_DE.ISO8859-1')
-            else:
-                locale.setlocale(locale.LC_ALL, 'German_Germany.1252')
-        except locale.Error:
-            return # This test doesn't work here :-(
-        expected = ['m\xfclltonne', 'waschb\xe4r',
-                    'beh\xf6rde', '\xfcberflieger']
-        words = [" ".join(expected)]
-        words = Splitter().process(words)
-        self.assertEqual(words, expected)
-        words = HTMLWordSplitter().process(words)
-        self.assertEqual(words, expected)
-        locale.setlocale(locale.LC_ALL, loc) # restore saved locale
-
 def test_suite():
-    return makeSuite(Test)
-
-if __name__=='__main__':
-    main(defaultTest='test_suite')
+    return unittest.TestSuite((
+        unittest.makeSuite(LexiconTests),
+        unittest.makeSuite(SplitterTests),
+        unittest.makeSuite(CaseNormalizerTests),
+        unittest.makeSuite(StopWordRemoverTests),
+        unittest.makeSuite(StopWordAndSingleCharRemoverTests),
+    ))