[Zope-Checkins] CVS: Zope2 - Catalog.py:1.60.2.9.2.1

chrism@serenade.digicool.com chrism@serenade.digicool.com
Tue, 17 Apr 2001 02:43:33 -0400


Update of /cvs-repository/Zope2/lib/python/Products/ZCatalog
In directory serenade.digicool.com:/home/chrism/sandboxes/CatalogForNow/lib/python/Products/ZCatalog

Modified Files:
      Tag: chrism-CatalogForNow-branch
	Catalog.py 
Log Message:
Broke out body of _indexedSearch method into several functions.  Most things that instantiate a Lazy object now try to pass in the length.  "sort_on" and "sort-on" are now removed from the query before the query is passed to indexes.  Fixed a bug in the searchResults method which may have lead to people believing they could sort on a nontext or nonkeyword index.

Miserable.


--- Updated File Catalog.py in package Zope2 --
--- Catalog.py	2001/03/23 20:50:03	1.60.2.9
+++ Catalog.py	2001/04/17 06:43:33	1.60.2.9.2.1
@@ -98,7 +98,7 @@
 from Lazy import LazyMap, LazyFilter, LazyCat
 from CatalogBrains import AbstractCatalogBrain, NoBrainer
 
-from BTrees.IIBTree import intersection, weightedIntersection
+from BTrees.IIBTree import intersection, weightedIntersection, IISet
 from BTrees.OIBTree import OIBTree
 from BTrees.IOBTree import IOBTree
 import BTrees.Length
@@ -527,85 +527,127 @@
 ## Searching engine.  You don't really have to worry about what goes
 ## on below here...  Most of this stuff came from ZTables with tweaks.
 
+##  ^^^^ in some warped fantasy land, the prior comment might even be true. ;-)
+## i'm leaving it in here for entertainment value - chrism
+    
     def _indexedSearch(self, args, sort_index, append, used):
         """
         Iterate through the indexes, applying the query to each one.
         """
-
-        rs=None
-        data=self.data
-        
+        resultSet = None 
         if used is None: used={}
-        for i in self.indexes.keys():
-            index = self.indexes[i].__of__(self)
-            if hasattr(index,'_apply_index'):
-                r=index._apply_index(args)
-                if r is not None:
-                    r, u = r
-                    for name in u:
+        
+        for index in self.indexes.values():
+            if hasattr(index, '_apply_index'):
+                index = index.__of__(self)
+                indexResult=index._apply_index(args)
+                if indexResult is not None:
+                    indexResult, indexUsed = indexResult
+                    for name in indexUsed:
                         used[name]=1
-                    w, rs = weightedIntersection(rs, r)
+                    weight, resultSet=weightedIntersection(resultSet,
+                                                           indexResult)
 
-        #assert rs==None or hasattr(rs, 'values') or hasattr(rs, 'keys')
-        if rs is None:
-            # return everything
-            if sort_index is None:
-                rs=data.items()
-                append(LazyMap(self.instantiate, rs, len(self)))
-            else:
-                try:
-                    for k, intset in sort_index.items():
-                        append((k,LazyMap(self.__getitem__, intset)))
-                except AttributeError:
-                    raise ValueError, (
-                        "Incorrect index name passed as" 
-                        " 'sort_on' parameter.  Note that you may only" 
-                        " sort on values for which there is a matching" 
-                        " index available.")
-        elif rs:
-            # this is reached by having an empty result set (ie non-None)
-            if sort_index is None and hasattr(rs, 'values'):
-                # having a 'values' means we have a data structure with
-                # scores.  Build a new result set, sort it by score, reverse
-                # it, compute the normalized score, and Lazify it.
-                rset = rs.byValue(0) # sort it by score
-                max = float(rset[0][0])
-                rs = []
-                for score, key in rset:
-                    # compute normalized scores
-                    rs.append(( int((score/max)*100), score, key))
-                append(LazyMap(self.__getitem__, rs))
-                    
-            elif sort_index is None and not hasattr(rs, 'values'):
-                # no scores?  Just Lazify.
-                if hasattr(rs, 'keys'): rs=rs.keys() 
-                append(LazyMap(self.__getitem__, rs))
+        if resultSet is None:
+            # Case 1: resultSet came back as None
+            #
+            # none of the indexes were remotely interested in the args,
+            # so return everything.  we return everything instead of
+            # returning nothing for hysterical reasons.
+            self._appendAllResults(sort_index, append)
+
+        elif resultSet:
+            # Case 2: nonempty resultSet
+            #
+            # this is reached by having a non-empty, non-None result set,
+            # meaning that at least one of the indexes was interested
+            # in the args and had data that matched the query.
+            self._appendSpecifiedResults(sort_index, append, resultSet)
+
+        # Case 3: empty resultSet (implied)
+        #
+        # this is reached by having an empty result set, meaning that
+        # at least one of the indexes was interested in the args, but
+        # the query matched none of the documents in any of the indexes.
+        # We do nothing in this case.
+        
+        # return the names used by the indexes for an unknown reason ;-)
+        return used
+
+    def _appendAllResults(self, sort_index, append):
+        # this method is internal and is meant to be called only
+        # by _indexedSearch!
+        if sort_index is None:
+            # we don't have a sort index, just return everything
+            # in an undetermined order.
+            resultSet=self.data.items()
+            append(LazyMap(self.instantiate, resultSet, len(self)))
+        else:
+            # we have a sort index, return stuff sorted by sort index.
+            for k, intSet in sort_index.items():
+                append((k,LazyMap(self.__getitem__, intSet, len(intSet))))
+
+    def _appendSpecifiedResults(self, sort_index, append, resultSet):
+        # this method is internal and is meant to be called only
+        # by _indexedSearch!
+
+        haveScores = hasattr(resultSet, 'byValue')
+
+        if sort_index is None and haveScores:
+            # we have no sort_index, but we do have scores to sort by.
+            # having a 'byValue' means we have a data structure with
+            # scores.  Build a new result set using byValue, compute
+            # the normalized score, and Lazify it.
+            scoreSorted = resultSet.byValue(0)
+            # scoreSorted is now a list of two-tuples where the first
+            # element of the tuple is a score and the second is a docId.
+            # The two-tuples are sorted highest-score-first.
+            max = float(scoreSorted[0][0]) or 1.0
+            sortedSet = []
+            length = 0
+            for score, key in scoreSorted:
+                # compute normalized scores
+                normScore = int((score/max) * 100)
+                sortedSet.append((normScore, score, key))
+                length = length + 1
+            append(LazyMap(self.__getitem__, sortedSet, length))
+
+        elif sort_index is None and not haveScores:
+            # we don't have a 'byValue' which means there are no
+            # scores in the resultSet.  We want to just lazify the
+            # results and return them.  If we have a dictionaryish
+            # mapping object, get just its keys for use by LazyMap.
+            if hasattr(resultSet, 'keys'):
+                resultSet=resultSet.keys()
+            length = len(resultSet)
+            append(LazyMap(self.__getitem__, resultSet, length))
+
+        else:
+            # We have a sort_index.
+            # We do nothing with scores.
+            if ((len(resultSet) / 4) > len(sort_index)):
+                # if the sort index has a quarter as many keys as
+                # the result set
+                for k, intSet in sort_index.items():
+                    # We have an index that has a set of values for
+                    # each sort key, so we interset with each set and
+                    # get a sorted sequence of the intersections.
+
+                    # This only makes sense if the number of
+                    # keys is much less then the number of results.
+                    intSet = intersection(resultSet, intSet)
+                    if intSet:
+                        if hasattr(intSet, 'keys'):
+                            intSet=intSet.keys()
+                        length = len(intSet)
+                        append((k, LazyMap(self.__getitem__,intSet,length)))
             else:
-                # sort.  If there are scores, then this block is not
-                # reached, therefor 'sort-on' does not happen in the
-                # context of text index query.  This should probably
-                # sort by relevance first, then the 'sort-on' attribute.
-                if ((len(rs) / 4) > len(sort_index)):
-                    # if the sorted index has a quarter as many keys as
-                    # the result set
-                    for k, intset in sort_index.items():
-                        # We have an index that has a set of values for
-                        # each sort key, so we interset with each set and
-                        # get a sorted sequence of the intersections.
-
-                        # This only makes sense if the number of
-                        # keys is much less then the number of results.
-                        intset = intersection(rs, intset)
-                        if intset:
-                            if hasattr(intset, 'keys'): intset=intset.keys() 
-                            append((k,LazyMap(self.__getitem__, intset)))
-                else:
-                    if hasattr(rs, 'keys'): rs=rs.keys()
-                    for did in rs:
-                        append((sort_index.keyForDocument(did),
-                               LazyMap(self.__getitem__,[did])))
+                if hasattr(resultSet, 'keys'):
+                    resultSet=resultSet.keys()
+                for docId in resultSet:
+                    append((sort_index.keyForDocument(docId),
+                            LazyMap(self.__getitem__,[docId])))
 
-        return used
 
     def searchResults(self, REQUEST=None, used=None,
                       query_map={
@@ -635,16 +677,24 @@
         # Compute "sort_index", which is a sort index, or none:
         if kw.has_key('sort-on'):
             sort_index=kw['sort-on']
+            del kw['sort-on']
         elif hasattr(self, 'sort-on'):
             sort_index=getattr(self, 'sort-on')
         elif kw.has_key('sort_on'):
             sort_index=kw['sort_on']
+            del kw['sort_on']
         else: sort_index=None
         sort_order=''
-        if sort_index is not None and self.indexes.has_key(sort_index):
-            sort_index=self.indexes[sort_index]
-            if not hasattr(sort_index, 'keyForDocument'):
-                raise CatalogError('Invalid sort index')
+        if sort_index is not None:
+            if self.indexes.has_key(sort_index):
+                sort_index=self.indexes[sort_index]
+                if not hasattr(sort_index, 'keyForDocument'):
+                    raise CatalogError(
+                        'The index chosen for sort_on is not capable of being'
+                        ' used as a sort index.'
+                        )
+            else:
+                raise CatalogError('Unknown sort_on index %s' % sort_index)
 
         # Perform searches with indexes and sort_index
         r=[]