[Checkins] SVN: zope.index/trunk/src/zope/index/text/tests/ Remove Zope2 cruft.
Tres Seaver
tseaver at palladion.com
Wed Jun 10 23:30:10 EDT 2009
Log message for revision 100830:
Remove Zope2 cruft.
Changed:
D zope.index/trunk/src/zope/index/text/tests/indexhtml.py
D zope.index/trunk/src/zope/index/text/tests/queryhtml.py
-=-
Deleted: zope.index/trunk/src/zope/index/text/tests/indexhtml.py
===================================================================
--- zope.index/trunk/src/zope/index/text/tests/indexhtml.py 2009-06-11 03:26:12 UTC (rev 100829)
+++ zope.index/trunk/src/zope/index/text/tests/indexhtml.py 2009-06-11 03:30:09 UTC (rev 100830)
@@ -1,166 +0,0 @@
-#! /usr/bin/env python
-##############################################################################
-#
-# Copyright (c) 2003 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE.
-#
-##############################################################################
-"""Index a collection of HTML files on the filesystem.
-
-usage: indexhtml.py [options] dir
-
-Will create an index of all files in dir or its subdirectories.
-
-options:
--f data.fs -- the path to the filestorage datafile
-
-$Id$
-"""
-import os
-from time import clock
-
-from ZODB.Storage.FileStorage import FileStorage
-from ZODB.BTrees.IOBTree import IOBTree
-import transaction
-
-from zope.index.text.htmlsplitter import HTMLWordSplitter
-from zope.index.text.lexicon import Lexicon, StopWordRemover
-
-def make_zc_index():
- # there's an elaborate dance necessary to construct an index
- class Struct(object):
- pass
- extra = Struct()
- extra.doc_attr = "read"
- extra.lexicon_id = "lexicon"
- caller = Struct()
- caller.lexicon = Lexicon(HTMLWordSplitter(), StopWordRemover())
- return ZCTextIndex("read", extra, caller)
-
-# TODO: make a splitter more like the HTMLSplitter for TextIndex
-# signature is
-# Splitter(string, stop_words, encoding,
-# singlechar, indexnumbers, casefolding)
-
-class MySplitter(object):
- def __init__(self):
- self._v_splitter = HTMLWordSplitter()
- def __call__(self, text, stopdict, *args, **kwargs):
- words = self._v_splitter._split(text)
- def lookup(w):
- return stopdict.get(w, w)
- return filter(None, map(lookup, words))
-
-def make_old_index():
- from Products.PluginIndexes.TextIndex.TextIndex import TextIndex
- from Products.PluginIndexes.TextIndex.Lexicon import Lexicon
- from zope.index.text.stopdict import get_stopdict
-
- l = Lexicon(get_stopdict())
- l.SplitterFunc = MySplitter()
- return TextIndex("read", lexicon=l)
-
-def main(db, root, dir):
- rt["index"] = index = INDEX()
- rt["files"] = paths = IOBTree()
- transaction.commit()
-
- zodb_time = 0.0
- pack_time = 0.0
-
- files = [os.path.join(dir, file) for file in os.listdir(dir)]
- docid = 0
- t0 = clock()
- for file in files:
- if os.path.isdir(file):
- files += [os.path.join(file, sub) for sub in os.listdir(file)]
- else:
- if not file.endswith(".html"):
- continue
- docid += 1
- if LIMIT is not None and docid > LIMIT:
- break
- if VERBOSE:
- print "%5d" % docid, file
- f = open(file, "rb")
- paths[docid] = file
- index.index_object(docid, f)
- f.close()
- if docid % TXN_INTERVAL == 0:
- z0 = clock()
- transaction.commit()
- z1 = clock()
- zodb_time += z1 - z0
- if VERBOSE:
- print "commit took", z1 - z0, zodb_time
- if docid % PACK_INTERVAL == 0:
- p0 = clock()
- db.pack()
- p1 = clock()
- zodb_time += p1 - p0
- pack_time += p1 - p0
- if VERBOSE:
- print "pack took", p1 - p0, pack_time
- z0 = clock()
- transaction.commit()
- z1 = t1 = clock()
- total_time = t1 - t0
- zodb_time += z1 - z0
- if VERBOSE:
- print "Total index time", total_time
- print "Non-pack time", total_time - pack_time
- print "Non-ZODB time", total_time - zodb_time
-
-if __name__ == "__main__":
- import sys
- import getopt
-
- VERBOSE = 0
- FSPATH = "Data.fs"
- TXN_INTERVAL = 100
- PACK_INTERVAL = 500
- LIMIT = None
- INDEX = make_zc_index
- try:
- opts, args = getopt.getopt(sys.argv[1:], 'vf:t:p:n:T')
- except getopt.error, msg:
- print msg
- print __doc__
- sys.exit(2)
-
- for o, v in opts:
- if o == '-v':
- VERBOSE += 1
- if o == '-f':
- FSPATH = v
- if o == '-t':
- TXN_INTERVAL = int(v)
- if o == '-p':
- PACK_INTERVAL = int(v)
- if o == '-n':
- LIMIT = int(v)
- if o == '-T':
- INDEX = make_old_index
-
- if len(args) != 1:
- print "Expected on argument"
- print __doc__
- sys.exit(2)
- dir = args[0]
-
- fs = FileStorage(FSPATH)
- db = ZODB.DB(fs)
- cn = db.open()
- rt = cn.root()
- dir = os.path.join(os.getcwd(), dir)
- print dir
- main(db, rt, dir)
- cn.close()
- fs.close()
Deleted: zope.index/trunk/src/zope/index/text/tests/queryhtml.py
===================================================================
--- zope.index/trunk/src/zope/index/text/tests/queryhtml.py 2009-06-11 03:26:12 UTC (rev 100829)
+++ zope.index/trunk/src/zope/index/text/tests/queryhtml.py 2009-06-11 03:30:09 UTC (rev 100830)
@@ -1,130 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2003 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE.
-#
-##############################################################################
-"""Query HTML tests
-
-$Id$
-"""
-from time import clock
-
-from ZODB.Storage.FileStorage import FileStorage
-
-QUERIES = ["nested recursive functions",
- "explicit better than implicit",
- "build hpux",
- "cannot create 'method-wrapper' instances",
- "extension module C++",
- "class method",
- "instance variable",
- "articulate information",
- "import default files",
- "gopher ftp http",
- "documentation",
- ]
-
-def path2url(p):
- # convert the paths to a python.org URL
- # hack: only works for the way Jeremy indexed his copy of python.org
- marker = "www.python.org/."
- i = p.find(marker)
- if i == -1:
- return p
- i += len(marker)
- return "http://www.python.org" + p[i:]
-
-from Products.PluginIndexes.TextIndex.TextIndex import And, Or
-from zope.index.nbest import NBest
-
-def main(rt):
- index = rt["index"]
- files = rt["files"]
- times = {}
- ITERS = range(50)
- for i in range(11):
- for q in QUERIES:
- terms = q.split()
- for c in " OR ", " AND ":
- query = c.join(terms)
- t0 = clock()
- if TEXTINDEX:
- if c == " OR ":
- op = Or
- else:
- op = And
- _q = " ".join(terms)
- for _ in ITERS:
- b = index.query(_q, op).bucket()
- num = len(b)
- chooser = NBest(10)
- chooser.addmany(b.items())
- results = chooser.getbest()
-
- else:
- try:
- for _ in ITERS:
- results, num = index.query(query)
- except:
- continue
- t1 = clock()
- print "<p>Query: \"%s\"" % query
- print "<br>Num results: %d" % num
- print "<br>time.clock(): %s" % (t1 - t0)
- key = query
- if i == 0:
- print "<ol>"
- for docid, score in results:
- url = path2url(files[docid])
- fmt = '<li><a href="%s">%s</A> score = %s'
- print fmt % (url, url, score)
- print "</ol>"
- continue
- l = times.setdefault(key, [])
- l.append(t1 - t0)
-
- l = times.keys()
- l.sort()
- print "<hr>"
- for k in l:
- v = times[k]
- print "<p>Query: \"%s\"" % k
- print "<br>Min time: %s" % min(v)
- print "<br>All times: %s" % " ".join(map(str, v))
-
-if __name__ == "__main__":
- import sys
- import getopt
-
- VERBOSE = 0
- FSPATH = "Data.fs"
- TEXTINDEX = 0
-
- try:
- opts, args = getopt.getopt(sys.argv[1:], 'vf:T')
- except getopt.error, msg:
- print msg
- print __doc__
- sys.exit(2)
-
- for o, v in opts:
- if o == '-v':
- VERBOSE += 1
- if o == '-f':
- FSPATH = v
- if o == '-T':
- TEXTINDEX = 1
-
- fs = FileStorage(FSPATH, read_only=1)
- db = ZODB.DB(fs, cache_size=10000)
- cn = db.open()
- rt = cn.root()
- main(rt)
More information about the Checkins
mailing list