[Zope-CVS] CVS: Products/ZCTextIndex/tests - mailtest.py:1.1.2.6

Jeremy Hylton jeremy@zope.com
Wed, 1 May 2002 13:43:56 -0400


Update of /cvs-repository/Products/ZCTextIndex/tests
In directory cvs.zope.org:/tmp/cvs-serv6428

Modified Files:
      Tag: TextIndexDS9-branch
	mailtest.py 
Log Message:
mailtest is growing a regular swiss army knife of features.

-m mailbox: index the mailbox
-q query: execute the query

Store the documents in the database, too, so we can print out useful
results one day.

Update the doc strings.


=== Products/ZCTextIndex/tests/mailtest.py 1.1.2.5 => 1.1.2.6 ===
 
-usage: python mailtest.py [options] <mailbox> <data.fs>
+usage: python mailtest.py [options] <data.fs>
 
 options:
     -v verbose
     -n NNN -- max number of messages to read from mailbox
+    -q query
+    -i mailbox
 
-The script reads mail messages from the mailbox and indexes them.  It
-indexes one message at a time, then commits the transaction.
+The script either indexes or queries depending on whether -q or -i is
+passed as an option.
 
-To interact with the index after it is completed, you can simply load
-the index from the database:
+For -i mailbox, the script reads mail messages from the mailbox and
+indexes them.  It indexes one message at a time, then commits the
+transaction.
+
+For -q query, it performs a query on an existing index.
+
+If both are specified, the index is performed first.
+
+You can also interact with the index after it is completed. Load the
+index from the database:
 
     import ZODB
     from ZODB.FileStorage import FileStorage
@@ -23,6 +33,7 @@
 import ZODB
 import ZODB.FileStorage
 from Products.ZCTextIndex.ZCTextIndex import ZCTextIndex
+from BTrees.IOBTree import IOBTree
 
 import sys
 import mailbox
@@ -38,28 +49,21 @@
     total_bytes = 0
     
     def __init__(self, msg):
-        self.msg = msg
-
-    def text(self):
-        buf = self.msg.fp.read()
-        Message.total_bytes += len(buf)
-        return buf
+        self.text = msg.fp.read()
+        Message.total_bytes += len(self.text)
 
-def main(inp, out):
+def index(rt, mboxfile):
     global NUM
     idx_time = 0
     pack_time = 0
     
-    f = ZODB.FileStorage.FileStorage(out)
-    db = ZODB.DB(f)
-    cn = db.open()
-    rt = cn.root()
     rt["index"] = idx = ZCTextIndex("text")
+    rt["documents"] = docs = IOBTree()
     get_transaction().commit()
 
-    mbox = mailbox.UnixMailbox(open(inp))
+    mbox = mailbox.UnixMailbox(open(mboxfile))
     if VERBOSE:
-        print "opened", inp
+        print "opened", mboxfile
     if not NUM:
         NUM = sys.maxint
     i = 0
@@ -71,6 +75,7 @@
         msg = Message(_msg)
         i0 = time.clock()
         idx.index_object(i, msg)
+        docs[i] = msg
         get_transaction().commit()
         i1 = time.clock()
         idx_time += i1 - i0
@@ -80,10 +85,37 @@
             p0 = time.clock()
             db.pack(time.time())
             p1 = time.clock()
-            print "pack took %s sec" % (p1 - p0)
+            if VERBOSE:
+                print "pack took %s sec" % (p1 - p0)
             pack_time += p1 - p0
 
-    return idx_time, pack_time
+    if VERBOSE:
+        print "Index time", idx_time
+        print "Index bytes", Message.total_bytes
+        rate = (Message.total_bytes / idx_time) / 1024
+        print "Index rate %d KB/sec" % int(rate)
+
+def query(rt, query_str):
+    idx = rt["index"]
+    results = idx.query(query_str)
+    print results
+    for r in results.items():
+        print r
+
+def main(fs_path, mbox_path, query_str):
+    f = ZODB.FileStorage.FileStorage(fs_path)
+    db = ZODB.DB(f)
+    cn = db.open()
+    rt = cn.root()
+    
+    if mbox_path is not None:
+        index(rt, mbox_path)
+    if query is not None:
+        query(rt, query_str)
+
+    cn.close()
+    db.close()
+    f.close()
 
 if __name__ == "__main__":
     import getopt
@@ -91,12 +123,14 @@
     NUM = 0
     VERBOSE = 0
     PACK_INTERVAL = 500
+    query_str = None
+    mbox_path = None
     try:
-        opts, args = getopt.getopt(sys.argv[1:], 'vn:p:')
+        opts, args = getopt.getopt(sys.argv[1:], 'vn:p:m:q:')
     except getopt.error, msg:
         usage(msg)
-    if len(args) != 2:
-        usage("exactly 2 filename arguments required")
+    if len(args) != 1:
+        usage("exactly 1 filename argument required")
     for o, v in opts:
         if o == '-n':
             NUM = int(v)
@@ -104,7 +138,10 @@
             VERBOSE += 1
         elif o == '-p':
             PACK_INTERVAL = int(v)
-    inp, out = args
-    ti, tp = main(inp, out)
-    print "Index time", ti
-    print "Index bytes", Message.total_bytes
+        elif o == '-q':
+            query_str = v
+        elif o == '-m':
+            mbox_path = v
+    fs_path, = args
+    print "main"
+    main(fs_path, mbox_path, query_str)