[Checkins] SVN: zope.index/trunk/src/zope/index/text/tests/ Remove Zope2 cruft.

Tres Seaver tseaver at palladion.com
Wed Jun 10 23:30:10 EDT 2009


Log message for revision 100830:
  Remove Zope2 cruft.

Changed:
  D   zope.index/trunk/src/zope/index/text/tests/indexhtml.py
  D   zope.index/trunk/src/zope/index/text/tests/queryhtml.py

-=-
Deleted: zope.index/trunk/src/zope/index/text/tests/indexhtml.py
===================================================================
--- zope.index/trunk/src/zope/index/text/tests/indexhtml.py	2009-06-11 03:26:12 UTC (rev 100829)
+++ zope.index/trunk/src/zope/index/text/tests/indexhtml.py	2009-06-11 03:30:09 UTC (rev 100830)
@@ -1,166 +0,0 @@
-#! /usr/bin/env python
-##############################################################################
-#
-# Copyright (c) 2003 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE.
-#
-##############################################################################
-"""Index a collection of HTML files on the filesystem.
-
-usage: indexhtml.py [options] dir
-
-Will create an index of all files in dir or its subdirectories.
-
-options:
--f data.fs  -- the path to the filestorage datafile
-
-$Id$
-"""
-import os
-from time import clock
-
-from ZODB.Storage.FileStorage import FileStorage
-from ZODB.BTrees.IOBTree import IOBTree
-import transaction
-
-from zope.index.text.htmlsplitter import HTMLWordSplitter
-from zope.index.text.lexicon import Lexicon, StopWordRemover
-
-def make_zc_index():
-    # there's an elaborate dance necessary to construct an index
-    class Struct(object):
-        pass
-    extra = Struct()
-    extra.doc_attr = "read"
-    extra.lexicon_id = "lexicon"
-    caller = Struct()
-    caller.lexicon = Lexicon(HTMLWordSplitter(), StopWordRemover())
-    return ZCTextIndex("read", extra, caller)
-
-# TODO: make a splitter more like the HTMLSplitter for TextIndex
-# signature is
-# Splitter(string, stop_words, encoding,
-#          singlechar, indexnumbers, casefolding)
-
-class MySplitter(object):
-    def __init__(self):
-        self._v_splitter = HTMLWordSplitter()
-    def __call__(self, text, stopdict, *args, **kwargs):
-        words = self._v_splitter._split(text)
-        def lookup(w):
-            return stopdict.get(w, w)
-        return filter(None, map(lookup, words))
-
-def make_old_index():
-    from Products.PluginIndexes.TextIndex.TextIndex import TextIndex
-    from Products.PluginIndexes.TextIndex.Lexicon  import Lexicon
-    from zope.index.text.stopdict import get_stopdict
-
-    l = Lexicon(get_stopdict())
-    l.SplitterFunc = MySplitter()
-    return TextIndex("read", lexicon=l)
-
-def main(db, root, dir):
-    rt["index"] = index = INDEX()
-    rt["files"] = paths = IOBTree()
-    transaction.commit()
-
-    zodb_time = 0.0
-    pack_time = 0.0
-
-    files = [os.path.join(dir, file) for file in os.listdir(dir)]
-    docid = 0
-    t0 = clock()
-    for file in files:
-        if os.path.isdir(file):
-            files += [os.path.join(file, sub) for sub in os.listdir(file)]
-        else:
-            if not file.endswith(".html"):
-                continue
-            docid += 1
-            if LIMIT is not None and docid > LIMIT:
-                break
-            if VERBOSE:
-                print "%5d" % docid, file
-            f = open(file, "rb")
-            paths[docid] = file
-            index.index_object(docid, f)
-            f.close()
-            if docid % TXN_INTERVAL == 0:
-                z0 = clock()
-                transaction.commit()
-                z1 = clock()
-                zodb_time += z1 - z0
-                if VERBOSE:
-                    print "commit took", z1 - z0, zodb_time
-            if docid % PACK_INTERVAL == 0:
-                p0 = clock()
-                db.pack()
-                p1 = clock()
-                zodb_time += p1 - p0
-                pack_time += p1 - p0
-                if VERBOSE:
-                    print "pack took", p1 - p0, pack_time
-    z0 = clock()
-    transaction.commit()
-    z1 = t1 = clock()
-    total_time = t1 - t0
-    zodb_time += z1 - z0
-    if VERBOSE:
-        print "Total index time", total_time
-        print "Non-pack time", total_time - pack_time
-        print "Non-ZODB time", total_time - zodb_time
-
-if __name__ == "__main__":
-    import sys
-    import getopt
-
-    VERBOSE = 0
-    FSPATH = "Data.fs"
-    TXN_INTERVAL = 100
-    PACK_INTERVAL = 500
-    LIMIT = None
-    INDEX = make_zc_index
-    try:
-        opts, args = getopt.getopt(sys.argv[1:], 'vf:t:p:n:T')
-    except getopt.error, msg:
-        print msg
-        print __doc__
-        sys.exit(2)
-
-    for o, v in opts:
-        if o == '-v':
-            VERBOSE += 1
-        if o == '-f':
-            FSPATH = v
-        if o == '-t':
-            TXN_INTERVAL = int(v)
-        if o == '-p':
-            PACK_INTERVAL = int(v)
-        if o == '-n':
-            LIMIT = int(v)
-        if o == '-T':
-            INDEX = make_old_index
-
-    if len(args) != 1:
-        print "Expected on argument"
-        print __doc__
-        sys.exit(2)
-    dir = args[0]
-
-    fs = FileStorage(FSPATH)
-    db = ZODB.DB(fs)
-    cn = db.open()
-    rt = cn.root()
-    dir = os.path.join(os.getcwd(), dir)
-    print dir
-    main(db, rt, dir)
-    cn.close()
-    fs.close()

Deleted: zope.index/trunk/src/zope/index/text/tests/queryhtml.py
===================================================================
--- zope.index/trunk/src/zope/index/text/tests/queryhtml.py	2009-06-11 03:26:12 UTC (rev 100829)
+++ zope.index/trunk/src/zope/index/text/tests/queryhtml.py	2009-06-11 03:30:09 UTC (rev 100830)
@@ -1,130 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2003 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE.
-#
-##############################################################################
-"""Query HTML tests
-
-$Id$
-"""
-from time import clock
-
-from ZODB.Storage.FileStorage import FileStorage
-
-QUERIES = ["nested recursive functions",
-           "explicit better than implicit",
-           "build hpux",
-           "cannot create 'method-wrapper' instances",
-            "extension module C++",
-           "class method",
-           "instance variable",
-           "articulate information",
-           "import default files",
-           "gopher ftp http",
-           "documentation",
-           ]
-
-def path2url(p):
-    # convert the paths to a python.org URL
-    # hack: only works for the way Jeremy indexed his copy of python.org
-    marker = "www.python.org/."
-    i = p.find(marker)
-    if i == -1:
-        return p
-    i += len(marker)
-    return "http://www.python.org" + p[i:]
-
-from Products.PluginIndexes.TextIndex.TextIndex import And, Or
-from zope.index.nbest import NBest
-
-def main(rt):
-    index = rt["index"]
-    files = rt["files"]
-    times = {}
-    ITERS = range(50)
-    for i in range(11):
-        for q in QUERIES:
-            terms = q.split()
-            for c in " OR ", " AND ":
-                query = c.join(terms)
-                t0 = clock()
-                if TEXTINDEX:
-                    if c == " OR ":
-                        op = Or
-                    else:
-                        op = And
-                    _q = " ".join(terms)
-                    for _ in ITERS:
-                        b = index.query(_q, op).bucket()
-                        num = len(b)
-                        chooser = NBest(10)
-                        chooser.addmany(b.items())
-                        results = chooser.getbest()
-
-                else:
-                    try:
-                        for _ in ITERS:
-                            results, num = index.query(query)
-                    except:
-                        continue
-                t1 = clock()
-                print "<p>Query: \"%s\"" % query
-                print "<br>Num results: %d" % num
-                print "<br>time.clock(): %s" % (t1 - t0)
-                key = query
-                if i == 0:
-                    print "<ol>"
-                    for docid, score in results:
-                        url = path2url(files[docid])
-                        fmt = '<li><a href="%s">%s</A> score = %s'
-                        print fmt % (url, url, score)
-                    print "</ol>"
-                    continue
-                l = times.setdefault(key, [])
-                l.append(t1 - t0)
-
-    l = times.keys()
-    l.sort()
-    print "<hr>"
-    for k in l:
-        v = times[k]
-        print "<p>Query: \"%s\"" % k
-        print "<br>Min time: %s" % min(v)
-        print "<br>All times: %s" % " ".join(map(str, v))
-
-if __name__ == "__main__":
-    import sys
-    import getopt
-
-    VERBOSE = 0
-    FSPATH = "Data.fs"
-    TEXTINDEX = 0
-
-    try:
-        opts, args = getopt.getopt(sys.argv[1:], 'vf:T')
-    except getopt.error, msg:
-        print msg
-        print __doc__
-        sys.exit(2)
-
-    for o, v in opts:
-        if o == '-v':
-            VERBOSE += 1
-        if o == '-f':
-            FSPATH = v
-        if o == '-T':
-            TEXTINDEX = 1
-
-    fs = FileStorage(FSPATH, read_only=1)
-    db = ZODB.DB(fs, cache_size=10000)
-    cn = db.open()
-    rt = cn.root()
-    main(rt)



More information about the Checkins mailing list