[Checkins] SVN: Zope/branches/andig-compositeindex/src/Products/PluginIndexes/CompositeIndex/ new procedure for adding components

Andreas Gabriel gabriel at hrz.uni-marburg.de
Sun Oct 10 18:25:29 EDT 2010


Log message for revision 117419:
  new procedure for adding components 
  

Changed:
  U   Zope/branches/andig-compositeindex/src/Products/PluginIndexes/CompositeIndex/CompositeIndex.py
  U   Zope/branches/andig-compositeindex/src/Products/PluginIndexes/CompositeIndex/dtml/addCompositeIndex.dtml
  U   Zope/branches/andig-compositeindex/src/Products/PluginIndexes/CompositeIndex/tests/testCompositeIndex.py

-=-
Modified: Zope/branches/andig-compositeindex/src/Products/PluginIndexes/CompositeIndex/CompositeIndex.py
===================================================================
--- Zope/branches/andig-compositeindex/src/Products/PluginIndexes/CompositeIndex/CompositeIndex.py	2010-10-10 21:15:16 UTC (rev 117418)
+++ Zope/branches/andig-compositeindex/src/Products/PluginIndexes/CompositeIndex/CompositeIndex.py	2010-10-10 22:25:29 UTC (rev 117419)
@@ -15,34 +15,70 @@
 import logging
 
 from Acquisition import aq_parent
+from Persistence import PersistentMapping
 
+
 from App.special_dtml import DTMLFile
 
 from BTrees.IIBTree import IIBTree, IITreeSet, IISet, union, intersection, difference
 from BTrees.OOBTree import OOBTree
 from BTrees.IOBTree import IOBTree
-import BTrees.Length
+from BTrees.Length import Length
 
+
 from zope.interface import implements
 
 from ZODB.POSException import ConflictError
 
 from Products.PluginIndexes.interfaces import ITransposeQuery
 from Products.PluginIndexes.interfaces import IUniqueValueIndex
-from Products.PluginIndexes.KeywordIndex.KeywordIndex import KeywordIndex
-
+from Products.PluginIndexes.common.UnIndex import UnIndex
 from Products.PluginIndexes.common.util import parseIndexRequest
+from Products.PluginIndexes.common import safe_callable
 
 from util import PermuteKeywordList
 
+QUERY_OPTIONS = { 'FieldIndex' :  ["query","range"] ,
+                  'KeywordIndex' : ["query","operator","range"] }
 
-
 _marker = []
 
 logger = logging.getLogger('CompositeIndex')
 
-class CompositeIndex(KeywordIndex):
 
+class Component:
+
+    def __init__(self,id,type,attributes):
+        
+        self._id = id
+        self._type = type
+        
+        if isinstance(attributes, str):
+            self._attributes = attributes.split(',')
+        else:
+            self._attributes = list(attributes)
+            
+        self._attributes = [ attr.strip() for attr in self._attributes if attr ]
+        
+
+    @property
+    def id(self):
+        return self._id
+
+    @property
+    def type(self):
+        return self._type
+
+    @property
+    def attributes(self):
+        if not self._attributes:
+            return [self._id]
+        return self._attributes
+
+
+
+class CompositeIndex(UnIndex):
+
     """Index for composition of simple fields.
        or sequences of items
     """
@@ -60,8 +96,80 @@
          'help': ('CompositeIndex','CompositeIndex_Settings.stx')},
     )
 
+    query_options = ("query","operator", "range")
+
+    def __init__(
+        self, id, ignore_ex=None, call_methods=None, extra=None, caller=None):
+        """Create an unindex
+
+        UnIndexes are indexes that contain two index components, the
+        forward index (like plain index objects) and an inverted
+        index.  The inverted index is so that objects can be unindexed
+        even when the old value of the object is not known.
+
+        e.g.
+
+        self._index = {datum:[documentId1, documentId2]}
+        self._unindex = {documentId:datum}
+
+        If any item in self._index has a length-one value, the value is an
+        integer, and not a set.  There are special cases in the code to deal
+        with this.
+
+        The arguments are:
+
+          'id' -- the name of the item attribute to index.  This is
+          either an attribute name or a record key.
+
+          'ignore_ex' -- should be set to true if you want the index
+          to ignore exceptions raised while indexing instead of
+          propagating them.
+
+          'call_methods' -- should be set to true if you want the index
+          to call the attribute 'id' (note: 'id' should be callable!)
+          You will also need to pass in an object in the index and
+          uninded methods for this to work.
+
+          'extra' -- a mapping object that keeps additional
+          index-related parameters - subitem 'indexed_attrs'
+          can be list of dicts with following keys { id, type, attributes }
+
+          'caller' -- reference to the calling object (usually
+          a (Z)Catalog instance
+        """
+
+        def _get(o, k, default):
+            """ return a value for a given key of a dict/record 'o' """
+            if isinstance(o, dict):
+                return o.get(k, default)
+            else:
+                return getattr(o, k, default)
+
+        self.id = id
+        self.ignore_ex=ignore_ex        # currently unimplimented
+        self.call_methods=call_methods
+
+        self.operators = ('or', 'and')
+        self.useOperator = 'or'
+
+        # set components
+        self._components = PersistentMapping()
+        for cdata in extra:
+            c_id = cdata['id']
+            c_type = cdata['type']
+            c_attributes = cdata['attributes']  
+            self._components[c_id] = Component(c_id,c_type,c_attributes)
+
+        if not self._components:
+            self._components[id] = Component(id,'KeywordIndex',None)
+        
+        self._length = Length()
+        self.clear()
+
+
+
     def clear(self):
-        self._length = BTrees.Length.Length()
+        self._length = Length()
         self._index = IOBTree()
         self._unindex = IOBTree()
 
@@ -90,7 +198,6 @@
         operator = self.useOperator
 
         rank=[]
-        
         for c, rec in record.keys:
             # experimental code for specifing the operator
             if operator == self.useOperator:
@@ -98,14 +205,14 @@
                 
             if not operator in self.operators :
                 raise RuntimeError,"operator not valid: %s" % escape(operator)
-
+            
             res = self._apply_component_index(rec,c)
-
+            
             if res is None:
                 continue
                 
             res, dummy  = res 
-
+            
             rank.append((len(res),res))
 
 
@@ -113,10 +220,11 @@
         rank.sort()
 
         k = None
+        
         for l,res in rank:
 
             k = intersection(k, res)
-
+            
             if not k:
                 break
 
@@ -124,44 +232,49 @@
         # switch to intersecton mode
         
         if operator == 'or':
+            res = None
             set_func = union
         else:
+            res = resultset
             set_func = intersection
+
         
+        
         rank=[]
         if set_func == intersection:
-            res = None
             for key in k:
-                set=self._index.get(key, IISet())
-                rank.append((len(set),key))
+                
+                s=self._index.get(key, IISet())
+                if isinstance(s, int):
+                    rank.append((1,key))
+                else:
+                    rank.append((len(s),key))
         
             # sort from short to long sets
             rank.sort()
-
+            
         else:
-            res = None
             # dummy length
             if k:
                 rank = enumerate(k)
 
-
         # collect docIds
         for l,key in rank:
             
-            set=self._index.get(key, None)
-            if set is None:
-                set = IISet(())
-            elif isinstance(set, int):
-                set = IISet((set,))
-            res = set_func(res, set)
+            s=self._index.get(key, None)
+            if s is None:
+                s = IISet(())
+            elif isinstance(s, int):
+                s = IISet((s,))
+            res = set_func(res, s)
             if not res and set_func is intersection:
                 break
 
 
-        if isinstance(res, int):  r=IISet((res,))
+        if isinstance(res, int):  res = IISet((res,))
 
         if res is None:
-            return IISet(),(self.id,)
+            res = IISet(),(self.id,)
 
         return res, (self.id,)
         
@@ -201,19 +314,20 @@
             else:
                 setlist = index.items(lo)
 
-            for k, set in setlist:
-                if isinstance(set, tuple):
-                    set = IISet((set,))
+            for k, s in setlist:
+                if isinstance(s, tuple):
+                    s = IISet((s,))
                 r = union(r, set)
         else: # not a range search
             for key in record.keys:
-                set=index.get(key, None)
-                if set is None:
-                    set = IISet(())
-                elif isinstance(set, int):
-                    set = IISet((set,))
-                r = union(r, set)
+                s=index.get(key, None)
 
+                if s is None:
+                    s = IISet(())
+                elif isinstance(s, int):
+                    s = IISet((s,))
+                r = union(r, s)
+
         if isinstance(r, int):
             r=IISet((r,))
 
@@ -232,7 +346,7 @@
         return res
 
 
-    def _index_object(self, documentId, obj, threshold=None, attr=''):
+    def _index_object(self, documentId, obj, threshold=None):
         """ index an object 'obj' with integer id 'i'
 
         Ideally, we've been passed a sequence of some sort that we
@@ -246,17 +360,14 @@
         # we'll do so.
 
         # unhashed keywords
-        newUKeywords = self._get_object_keywords(obj, attr)
-
-        
+        newUKeywords = self._get_permuted_keywords(obj)
+                
         # hashed keywords
         newKeywords = map(lambda x: hash(x),newUKeywords)
         
         for i, kw in enumerate(newKeywords):
             if not self._tindex.get(kw,None):
                 self._tindex[kw]=newUKeywords[i]
-
-
             
         newKeywords = map(lambda x: hash(x),newUKeywords)
 
@@ -299,6 +410,25 @@
         return 1
 
 
+    def unindex_objectKeywords(self, documentId, keywords):
+        """ carefully unindex the object with integer id 'documentId'"""
+
+        if keywords is not None:
+            for kw in keywords:
+                self.removeForwardIndexEntry(kw, documentId)
+
+    def unindex_object(self, documentId):
+        """ carefully unindex the object with integer id 'documentId'"""
+
+        keywords = self._unindex.get(documentId, None)
+        self.unindex_objectKeywords(documentId, keywords)
+        try:
+            del self._unindex[documentId]
+        except KeyError:
+            logger.debug('Attempt to unindex nonexistent'
+                         ' document id %s' % documentId)    
+
+
     def insertForwardIndexEntry(self, entry, documentId):
         """Take the entry provided and put it in the correct place
         in the forward index.
@@ -382,49 +512,78 @@
                              'should not happen.' % (self.__class__.__name__,
                                                     repr(components),str(self.id),str(c)))
         
-    def _get_object_keywords(self, obj, attr):
-        """ composite keyword lists """    
+    def _get_permuted_keywords(self, obj):
+        """ returns permutation list of object keywords """    
 
-        fields = self.getComponentIndexAttributes()
+        components = self.getIndexComponents()
          
         kw_list = []
-
-        for attributes in fields:
-            kw = []
-            for attr in attributes:
-                kw.extend(list(super(CompositeIndex,self)._get_object_keywords(obj, attr)))
+        
+        for c in components:
+            kw=self._get_keywords(obj, c)
             kw_list.append(kw)
         
         pkl = PermuteKeywordList(kw_list)
 
         return pkl.keys
 
+
+    def _get_keywords(self,obj,component):
+
+        if component.type == 'FieldIndex':
+            attr = component.attributes[-1]
+            try:
+                datum = getattr(obj, attr)
+                if safe_callable(datum):
+                    datum = datum()
+            except (AttributeError, TypeError):
+                datum = _marker
+            if isinstance(datum,list):
+                datum = tuple(datum)
+            return (datum,)
+
+        elif component.type == 'KeywordIndex':
+            for attr in component.attributes:
+                datum = []
+                newKeywords = getattr(obj, attr, ())
+                if safe_callable(newKeywords):
+                    try:
+                        newKeywords = newKeywords()
+                    except AttributeError:
+                        continue
+                if not newKeywords and newKeywords is not False:
+                    continue
+                elif isinstance(newKeywords, basestring): #Python 2.1 compat isinstance
+                    datum.append(newKeywords)
+                else:
+                    unique = {}
+                    try:
+                        for k in newKeywords:
+                            unique[k] = None
+                    except TypeError:
+                        # Not a sequence
+                        datum.append(newKeywords)
+                    else:
+                        datum.extend(unique.keys())
+            return datum
+        else:
+            raise KeyError
+
+    def getIndexComponents(self):
+        """ return sequence of indexed attributes """
+        return self._components.values()
+
+ 
     def getComponentIndexNames(self):
         """ returns component index names to composite """
 
-        ids = []
+        return self._components.keys()
 
-        fields = self.getIndexSourceNames()
-        for attr in fields:
-            c = attr.split(':')
-            ids.append(c.pop())
-
-        return tuple(ids)
-
     def getComponentIndexAttributes(self):
         """ returns list of attributes of each component index to composite"""
 
-        attributes=[]
-        
-        fields = self.getIndexSourceNames()
-        for idx in fields:
-            attr =  idx.split(':')
-            if len(attr) == 1:
-                attributes.append(attr) 
-            else:
-                attributes.append(attr[1:])
+        return tuple([a.attributes for a in self._components.values()])
 
-        return tuple(attributes)
 
     def getEntryForObject(self, documentId, default=_marker):
         """Takes a document ID and returns all the information we have
@@ -461,8 +620,9 @@
 
         # default: return unique values from first component
 
-        if name is None:
-            name = self.getComponentIndexNames()[0]
+        if name is None: 
+            return super(CompositeIndex,self).uniqueValues( name=name, withLengths=withLengths)
+
         
         if self._cindexes.has_key(name):
             index = self._cindexes[name]
@@ -503,36 +663,26 @@
         
         cquery = query.copy()
 
-        cIdxs = self.getComponentIndexNames()
+        components = self.getIndexComponents()
 
         records=[]
-        for name in cIdxs:
-            abort = False
-                    
-            #TODO query_options
-            # if intex_type == "FieldIndex":
-            #    query_options = ["query","range"]
-            # elif intex_type == "KeywordIndex":
-            #    query_options = ["query","operator","range"]
+ 
+        for c in components:
+            query_options = QUERY_OPTIONS[c.type]
+            rec = parseIndexRequest(query, c.id, query_options)
 
-            query_options = ["query","range"]
-            rec = parseIndexRequest(query, name, query_options)
-                        
-
-
             if rec.keys is None:
                 continue
-                        
-            records.append((name, rec))
 
-                        
+            records.append((c.id, rec))
 
+        if not records:
+            return query
 
         cquery.update( { self.id: { 'query': records }} )
-
                     
         # delete obsolete query attributes from request
-        for i in cIdxs[:len(records)+1]:
+        for i in [ r[0] for r in records ]:
             if cquery.has_key(i):
                 del cquery[i]
 
@@ -540,8 +690,6 @@
         
         return cquery
 
-    
-
     manage = manage_main = DTMLFile('dtml/manageCompositeIndex', globals())
     manage_main._setName('manage_main')
     manage_browse = DTMLFile('dtml/browseIndex', globals())

Modified: Zope/branches/andig-compositeindex/src/Products/PluginIndexes/CompositeIndex/dtml/addCompositeIndex.dtml
===================================================================
--- Zope/branches/andig-compositeindex/src/Products/PluginIndexes/CompositeIndex/dtml/addCompositeIndex.dtml	2010-10-10 21:15:16 UTC (rev 117418)
+++ Zope/branches/andig-compositeindex/src/Products/PluginIndexes/CompositeIndex/dtml/addCompositeIndex.dtml	2010-10-10 22:25:29 UTC (rev 117419)
@@ -26,7 +26,7 @@
     </div>
     </td>
     <td align="left" valign="top">
-    <input type="text" name="id" size="40" />
+    <input type="text" name="id" size="10" />
     </td>
   </tr>
 
@@ -38,9 +38,13 @@
     </div>
     </td>
     <td align="left" valign="top">
-    <input type="text" name="extra.indexed_attrs:record:string" size="40" /><br/>
-    <em>indexId1,indexId2,...</em> or<br/>
-    <em>indexId1:attribute11:attribute12:...,indexId2:attribute21,...</em>
+    <input type="text" name="extra.id:records:string" size="10" />
+    <select name="extra.type:records:string">
+       <option value="">FieldIndex</option>
+       <option value="">KeywordIndex</option>
+    </select>
+    <input type="text" name="extra.attributes:records:string" size="40" /><br/>
+    
     </td>
   </tr>
 

Modified: Zope/branches/andig-compositeindex/src/Products/PluginIndexes/CompositeIndex/tests/testCompositeIndex.py
===================================================================
--- Zope/branches/andig-compositeindex/src/Products/PluginIndexes/CompositeIndex/tests/testCompositeIndex.py	2010-10-10 21:15:16 UTC (rev 117418)
+++ Zope/branches/andig-compositeindex/src/Products/PluginIndexes/CompositeIndex/tests/testCompositeIndex.py	2010-10-10 22:25:29 UTC (rev 117419)
@@ -54,7 +54,10 @@
 
     def setUp(self):
 
-        self._index = CompositeIndex('comp01',extra = {'indexed_attrs': 'is_default_page,review_state,portal_type'})
+        self._index = CompositeIndex('comp01',
+                                     extra = [ { 'id': 'is_default_page' ,'type': 'FieldIndex','attributes':''},
+                                               { 'id': 'review_state' ,'type': 'FieldIndex','attributes':''},
+                                               { 'id': 'portal_type' ,'type': 'FieldIndex','attributes':''}])
         
         self._field_indexes = ( FieldIndex('review_state'), FieldIndex('portal_type'), FieldIndex('is_default_page'))
 
@@ -67,20 +70,25 @@
             r = index._apply_index(req)
             if r is not None:
                 r, u = r
-            w, rs = weightedIntersection(rs, r)
-            if not rs:
-                break
-        return rs
+                w, rs = weightedIntersection(rs, r)
+                if not rs:
+                    break
+        if not rs:
+            return set()
+        return set(rs)
 
     
     def _compositeSearch(self, req, expectedValues=None):
+        
         query = self._index.make_query(req)
         rs = None
         r =  self._index._apply_index(query)
         if r is not None:
             r, u = r
-        w, rs = weightedIntersection(rs, r)
-        return rs
+            w, rs = weightedIntersection(rs, r)
+        if not rs:
+            return set()
+        return set(rs)
     
 
     def _populateIndexes(self, k , v):
@@ -89,6 +97,7 @@
             index.index_object( k, v )
 
 
+
     def _clearIndexes(self):
         self._index.clear()
         for index in self._field_indexes:
@@ -96,9 +105,10 @@
 
     def testPerformance(self):
 
-        lengths = [10,100,1000,10000,100000]
+        lengths = [1000,10000,100000]
 
-        queries = [{  'portal_type' : { 'query': 'Document' } , 
+        queries = [{  'portal_type' : { 'query': 'Document' }} ,
+                   {  'portal_type' : { 'query': 'Document' } , 
                       'review_state' : { 'query': 'pending' } }  ,\
                    {  'is_default_page': { 'query' : True }, 
                       'portal_type' : { 'query': 'Document' } , 
@@ -110,18 +120,16 @@
 
             st = time()
             res1 = self._defaultSearch(*args, **kw)
-            print list(res1)
             print "atomic:    %s hits in %3.2fms" % (len(res1), (time() -st)*1000)
 
             st = time()
             res2 = self._compositeSearch(*args, **kw)
-            print list(res2)
             print "composite: %s hits in %3.2fms" % (len(res2), (time() -st)*1000)
 
             self.assertEqual(len(res1),len(res2))
+            
+            self.assertEqual(res1,res2)
 
-            for i,v in enumerate(res1):
-                self.assertEqual(res1[i], res2[i])  
 
 
 
@@ -130,13 +138,11 @@
             print "************************************" 
             print "indexed objects: %s" % l
             for i  in range(l):
-                name = 'dummy%s' % i
+                name = '%s' % i
                 obj = RandomTestObject(name)
-                print obj
                 self._populateIndexes(i,obj)
 
             for query in queries:
-                print query
                 profileSearch(query)
 
 



More information about the checkins mailing list