[Zope-CVS] CVS: Products/ZCTextIndex - IQueryParser.py:1.1.2.4 ParseTree.py:1.1.2.2 ZCTextIndex.py:1.1.2.13 IQueryEngine.py:NONE QueryEngine.py:NONE

Guido van Rossum guido@python.org
Mon, 6 May 2002 13:05:35 -0400


Update of /cvs-repository/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv4358/lib/python/Products/ZCTextIndex

Modified Files:
      Tag: TextIndexDS9-branch
	IQueryParser.py ParseTree.py ZCTextIndex.py 
Removed Files:
      Tag: TextIndexDS9-branch
	IQueryEngine.py QueryEngine.py 
Log Message:
More refactoring: move the query engine functionality into the parse
tree node implementation.  (The query engine test remains, to test
this functionality in the parse tree.)


=== Products/ZCTextIndex/IQueryParser.py 1.1.2.3 => 1.1.2.4 ===
 
         Return a parse tree (which implements IQueryParseTree).
+
+        May raise ParseTree.ParseError.
         """
 
 class IQueryParseTree(Interface.Base):
@@ -41,5 +43,19 @@
         'AND'             a list of parse trees
         'OR'              a list of parse trees
         'NOT'             a parse tree
-        'ATOM'            a string
+        'ATOM'            a string (representing a single search term)
+        """
+
+    def terms():
+        """Return a list of all terms in this node, excluding NOT subtrees."""
+
+    def executeQuery(index):
+        """Execute the query represented by this node against the index.
+
+        The index argument must implement the IIndex interface.
+
+        Return an IIBucket or IIBTree mapping document ids to scores
+        (higher scores mean better results).
+
+        May raise ParseTree.QueryError.
         """


=== Products/ZCTextIndex/ParseTree.py 1.1.2.1 => 1.1.2.2 ===
 """Generic parser support: exception and parse tree nodes."""
 
+from BTrees.IIBTree import difference, weightedIntersection, weightedUnion
+from Products.ZCTextIndex.NBest import NBest
+
+class QueryError(Exception):
+    pass
+
 class ParseError(Exception):
     pass
 
@@ -39,6 +45,9 @@
             t.extend(v.terms())
         return t
 
+    def executeQuery(self, index):
+        raise NotImplementedError
+
 class NotNode(ParseTreeNode):
 
     _nodeType = "NOT"
@@ -46,17 +55,60 @@
     def terms(self):
         return []
 
+    def executeQuery(self, index):
+        raise QueryError, "NOT operator must occur right after AND"
+
 class AndNode(ParseTreeNode):
 
     _nodeType = "AND"
 
+    def executeQuery(self, index):
+        L = []
+        Nots = []
+        for subnode in self.getValue():
+            if subnode.nodeType() == "NOT":
+                Nots.append(subnode.getValue().executeQuery(index))
+            else:
+                L.append(subnode.executeQuery(index))
+        assert L
+        L.sort(lambda x, y: cmp(len(x), len(y)))
+        set = L[0]
+        for x in L[1:]:
+            dummy, set = weightedIntersection(set, x)
+        if Nots:
+            Nots.sort(lambda x, y: cmp(len(x), len(y)))
+            notset = Nots[0]
+            for x in Nots[1:]:
+                dummy, notset = weightedUnion(notset, x)
+            set = difference(set, notset)
+        return set
+
 class OrNode(ParseTreeNode):
 
     _nodeType = "OR"
 
+    def executeQuery(self, index):
+        # Balance unions as closely as possible, smallest to largest.
+        allofem = self.getValue()
+        merge = NBest(len(allofem))
+        for subnode in allofem:
+            result = subnode.executeQuery(index)
+            merge.add(result, len(result))
+        while len(merge) > 1:
+            # Merge the two smallest so far, and add back to the queue.
+            x, dummy = merge.pop_smallest()
+            y, dummy = merge.pop_smallest()
+            dummy, z = weightedUnion(x, y)
+            merge.add(z, len(z))
+        result, dummy = merge.pop_smallest()
+        return result
+
 class AtomNode(ParseTreeNode):
 
     _nodeType = "ATOM"
 
     def terms(self):
         return [self.getValue()]
+
+    def executeQuery(self, index):
+            return index.search(self.getValue())


=== Products/ZCTextIndex/ZCTextIndex.py 1.1.2.12 => 1.1.2.13 ===
         self._fieldname = doc_attr
         self.lexicon = Lexicon(Splitter(), CaseNormalizer(), StopWordRemover())
-        self.engine = QueryEngine()
         self.index = Index(self.lexicon)
         self.parser = QueryParser()
 
@@ -53,7 +52,7 @@
     def query(self, query, nbest=10):
         # returns a mapping from docids to scores
         tree = self.parser.parseQuery(query)
-        results = self.engine.executeQuery(self.index, tree)
+        results = tree.executeQuery(self.index)
         chooser = NBest(nbest)
         chooser.addmany(results.items())
         return chooser.getbest()

=== Removed File Products/ZCTextIndex/IQueryEngine.py ===

=== Removed File Products/ZCTextIndex/QueryEngine.py ===