[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG - interactiveDemo.py:1.1.2.2

Andreas Jung andreas@digicool.com
Wed, 13 Feb 2002 15:51:37 -0500


Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG
In directory cvs.zope.org:/tmp/cvs-serv1703

Modified Files:
      Tag: ajung-textindexng-branch
	interactiveDemo.py 
Log Message:
code cleanup


=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/interactiveDemo.py 1.1.2.1 => 1.1.2.2 ===
 from Products.PluginIndexes.TextIndex import TextIndex
 from Products.ZCatalog import Catalog
-import os, sys, re,traceback, atexit
+import os, sys, re,traceback, atexit, getopt
 import time
 import readline
 
@@ -15,9 +15,6 @@
 except IOError: pass
 atexit.register(readline.write_history_file,histfile)
 
-datadir = '/work/html//doc/python-2.2/lib'
-datadir = '/export/html//doc/ZopeBook'
-
 class extra: pass
 
 
@@ -26,73 +23,103 @@
     def __init__(self,txt,path=''):
         self.text = txt
         self.path = path
- 
 
-ex = extra()
-ex.useSplitter='ZopeSplitter'
-#ex.useStemmer='porter'
-ex.useOperator='and'
-ex.lexicon = None
-ex.useGlobbing=1
-#ex.useProximity='soundex'
-#ex.nearStorage = 'documentLookup'
-ex.nearStorage = 'internal'
 
+def index_directory(dirname, verbose, timed):
 
-CAT = Catalog.Catalog("cat")
-CAT.aq_parent = TO('aq_parent')
+    if not dirname: raise RuntimeError,'no directory name'
 
-TI = TextIndexNG.TextIndexNG('text',ex,caller = CAT)
-CAT.addIndex('text',TI)
+    ex = extra()
+    ex.useSplitter          = 'ZopeSplitter'
+    ex.splitterCasefolding  = 1
+    ex.useStemmer           = None
+    ex.useOperator          = 'and'
+    ex.lexicon              = None
+    ex.useGlobbing          = 0
+    ex.nearDistance         = 5
+    ex.useSimilarity        = 0
+    ex.stopWords = None
 
-t1 = TO ('this text is a text')
-t2 = TO ('the quick brown fox jumps over the lazy dog because the dog is quick and jumps quick') 
+    CAT = Catalog.Catalog("cat")
+    CAT.aq_parent = TO('aq_parent')
 
-CAT.catalogObject(t1 , 't1')
-CAT.catalogObject(t1 , 't2')
+    TI = TextIndexNG.TextIndexNG('text',ex, caller = CAT)
+    if verbose: TI.debugOn()
+    else:       TI.debugOff()
+    TI.timed_statistics = timed
 
-files = os.listdir(datadir)
-files.sort()
+    CAT.addIndex('text',TI)
+    CAT.addColumn('text')
+    CAT.addColumn('path')
 
-ts = time.time()
-bytes = 0
-print '-'*78
+    t1 = TO ('this text is a text')
+    t2 = TO ('the quick brown fox jumps over the lazy dog because the dog is quick and jumps quick') 
 
-for i in range(len(files)):
-    f = files[i]
-    print >>sys.stderr,f
-    fname = os.path.join(datadir,f)
-    bytes+=os.stat(fname)[6]
-    if not os.path.isfile(fname): continue
-    data = open(fname).read()
+    CAT.catalogObject(t1 , 't1')
+    CAT.catalogObject(t1 , 't2')
 
-    T = TO(data,fname)
-    CAT.catalogObject(T,fname)
+    files = os.listdir(dirname)
+    files.sort()
 
-print "%d files, total size: %d" % (len(files), bytes)
-print "Indexing time: %5.3lf" % (time.time() - ts)
-for x in dir(ex):
-    print "%25s = %s" % (x,getattr(ex,x))
+    ts = time.time()
+    bytes = 0
+    print '-'*78
 
+    for i in range(len(files)):
+        f = files[i]
+        print >>sys.stderr,f
+        fname = os.path.join(dirname,f)
+        bytes+=os.stat(fname)[6]
+        if not os.path.isfile(fname): continue
+        data = open(fname).read()
 
+        T = TO(data,fname)
+        CAT.catalogObject(T,fname)
 
+    print "%d files, total size: %d" % (len(files), bytes)
+    print "Indexing time: %5.3lf" % (time.time() - ts)
+    for x in dir(ex):
+        print "%25s = %s" % (x,getattr(ex,x))
 
-while 1:
+    return CAT
 
-    line = raw_input("> ")
-    
-    
-    try:
-        res = CAT.searchResults(text={'query':line})
+def interactive_mode(CAT):
 
-        print "Result:"
+    while 1:
 
-        for r in res:
-            rid = r.getRID()
-            print CAT.paths[rid]
-            print  r.text,r.path
+        line = raw_input("> ")
         
+        try:
+            res = CAT.searchResults(text={'query':line})
+
+            print "Result: %d matches" % len(res)
+
+            for i in range(len(res)):
+                r = res[i]
+                rid = r.getRID()
+                print  "%-2d %s" % (i, r.path)
+            
+
+        except:
+            traceback.print_exc()
+
+
+if __name__== '__main__':
+
+    opts,args = getopt.getopt(sys.argv[1:],'hd:',['help','directory=',\
+            'verbose','timed'])
+
+    directory = None
+    verbose = 0
+    timed   = 0 
+
+    for k,v in opts:
+        if k in ['-h','--help']:        usage(); sys.exit(1)
+        if k in ['-d','--directory']:   directory = v
+        if k in ['--verbose']:          verbose = 1
+        if k in ['--timed']:            timed = 1
 
-    except:
-        traceback.print_exc()
     
+    cat = index_directory(directory, verbose, timed)
+    interactive_mode(cat)
+