[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG - interactiveDemo.py:1.1.2.1

Andreas Jung andreas@digicool.com
Wed, 16 Jan 2002 20:56:13 -0500


Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG
In directory cvs.zope.org:/tmp/cvs-serv17965

Added Files:
      Tag: ajung-textindexng-branch
	interactiveDemo.py 
Log Message:
added


=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/interactiveDemo.py ===
#!/usr/bin/env python2.1

import Zope
import ExtensionClass
from Products.PluginIndexes.TextIndexNG import TextIndexNG
from Products.PluginIndexes.TextIndex import TextIndex
from Products.ZCatalog import Catalog
import os, sys, re,traceback, atexit
import time
import readline

histfile = os.path.expanduser('~/.pyhist')
try:
    readline.read_history_file(histfile)
except IOError: pass
atexit.register(readline.write_history_file,histfile)

datadir = '/work/html//doc/python-2.2/lib'
datadir = '/export/html//doc/ZopeBook'

class extra: pass


class TO(ExtensionClass.Base):
    
    def __init__(self,txt,path=''):
        self.text = txt
        self.path = path
 

ex = extra()
ex.useSplitter='ZopeSplitter'
#ex.useStemmer='porter'
ex.useOperator='and'
ex.lexicon = None
ex.useGlobbing=1
#ex.useProximity='soundex'
#ex.nearStorage = 'documentLookup'
ex.nearStorage = 'internal'


CAT = Catalog.Catalog("cat")
CAT.aq_parent = TO('aq_parent')

TI = TextIndexNG.TextIndexNG('text',ex,caller = CAT)
CAT.addIndex('text',TI)

t1 = TO ('this text is a text')
t2 = TO ('the quick brown fox jumps over the lazy dog because the dog is quick and jumps quick') 

CAT.catalogObject(t1 , 't1')
CAT.catalogObject(t1 , 't2')

files = os.listdir(datadir)
files.sort()

ts = time.time()
bytes = 0
print '-'*78

for i in range(len(files)):
    f = files[i]
    print >>sys.stderr,f
    fname = os.path.join(datadir,f)
    bytes+=os.stat(fname)[6]
    if not os.path.isfile(fname): continue
    data = open(fname).read()

    T = TO(data,fname)
    CAT.catalogObject(T,fname)

print "%d files, total size: %d" % (len(files), bytes)
print "Indexing time: %5.3lf" % (time.time() - ts)
for x in dir(ex):
    print "%25s = %s" % (x,getattr(ex,x))




while 1:

    line = raw_input("> ")
    
    
    try:
        res = CAT.searchResults(text={'query':line})

        print "Result:"

        for r in res:
            rid = r.getRID()
            print CAT.paths[rid]
            print  r.text,r.path
        

    except:
        traceback.print_exc()