[Zope-CVS] CVS: Products/ZCTextIndex/tests - mailtest.py:1.1.2.13

Jeremy Hylton jeremy@zope.com
Wed, 1 May 2002 19:15:45 -0400


Update of /cvs-repository/Products/ZCTextIndex/tests
In directory cvs.zope.org:/tmp/cvs-serv9229/tests

Modified Files:
      Tag: TextIndexDS9-branch
	mailtest.py 
Log Message:
More features for mailtest.py.

-b NNN -- return NNN best matches (default is 10)

if a query is run in verbose mode, print out 5 lines of context after
the docid and score.  

add the message subject and author to the text of the message. this
change makes it easier to run tests and get a rough sense for whether
the ranking makes sense.




=== Products/ZCTextIndex/tests/mailtest.py 1.1.2.12 => 1.1.2.13 ===
     -p NNN -- pack <data.fs> every NNN messages (default: 500), and at end
     -p 0 -- don't pack at all
+    -b NNN -- return the NNN best matches (default is 10)
 
 The script either indexes or queries depending on whether -q or -i is
 passed as an option.
@@ -51,7 +52,13 @@
     total_bytes = 0
 
     def __init__(self, msg):
-        self.text = msg.fp.read()
+        subject = msg.getheader('subject', '')
+        author = msg.getheader('from', '')
+        if author:
+            summary = "%s (%s)\n" % (subject, author)
+        else:
+            summary = "%s\n" % subject
+        self.text = summary + msg.fp.read()
         Message.total_bytes += len(self.text)
 
 def index(rt, mboxfile, db):
@@ -113,9 +120,24 @@
 
 def query(rt, query_str):
     idx = rt["index"]
-    results = idx.query(query_str)
-    for r in results.items():
-        print r
+    docs = rt["documents"]
+    results = idx.query(query_str, BEST)
+    print "query:", query_str
+    print "# results:", len(results)
+    for docid, score in results:
+        print "docid %4d score %2d" % (docid, score)
+        if VERBOSE:
+            msg = docs[docid]
+            # print 3 lines of context
+            CONTEXT = 5
+            ctx = msg.text.split("\n", CONTEXT)
+            del ctx[-1]
+            print "-" * 60
+            print "message:"
+            for l in ctx:
+                print l
+            print "-" * 60
+        
 
 def main(fs_path, mbox_path, query_str):
     f = ZODB.FileStorage.FileStorage(fs_path)
@@ -136,12 +158,13 @@
     import getopt
 
     NUM = 0
+    BEST = 10
     VERBOSE = 0
     PACK_INTERVAL = 500
     query_str = None
     mbox_path = None
     try:
-        opts, args = getopt.getopt(sys.argv[1:], 'vn:p:i:q:')
+        opts, args = getopt.getopt(sys.argv[1:], 'vn:p:i:q:b:')
     except getopt.error, msg:
         usage(msg)
     if len(args) != 1:
@@ -157,5 +180,7 @@
             query_str = v
         elif o == '-i':
             mbox_path = v
+        elif o == '-b':
+            BEST = int(v)
     fs_path, = args
     main(fs_path, mbox_path, query_str)