[Checkins] SVN: zc.relation/trunk/ - Add in zc.relationship for testing. This revealed that I needed to abort my

Gary Poster gary at zope.com
Mon Aug 6 22:03:30 EDT 2007


Log message for revision 78650:
  - Add in zc.relationship for testing.  This revealed that I needed to abort my
  transactions in zc.relation but everything else seemed to be ok.
  
  - add in an additional timeit test for seeing how adding a search index
    affected searches that didn't use it.  I didn't like the results, so
    did a bit of optimization.  This means that search indexes now have a
    slightly different contract.
  
  - optimzation.txt wasn't hooked in.  Not sure how much it adds. :-/
  
  catalog test coverage slipped to 98%.  Would be really cool to get it and
  searchindex up to 100%.
  
  

Changed:
  _U  zc.relation/trunk/
  U   zc.relation/trunk/buildout.cfg
  U   zc.relation/trunk/setup.py
  U   zc.relation/trunk/src/zc/relation/catalog.py
  U   zc.relation/trunk/src/zc/relation/interfaces.py
  U   zc.relation/trunk/src/zc/relation/optimization.txt
  U   zc.relation/trunk/src/zc/relation/searchindex.py
  U   zc.relation/trunk/src/zc/relation/tests.py
  U   zc.relation/trunk/src/zc/relation/timeit/transitive_search_index.py
  U   zc.relation/trunk/src/zc/relation/tokens.txt

-=-

Property changes on: zc.relation/trunk
___________________________________________________________________
Name: svn:externals
   + zc.relationship svn+ssh://svn.zope.org/repos/main/zc.relationship/trunk


Modified: zc.relation/trunk/buildout.cfg
===================================================================
--- zc.relation/trunk/buildout.cfg	2007-08-07 01:45:50 UTC (rev 78649)
+++ zc.relation/trunk/buildout.cfg	2007-08-07 02:03:29 UTC (rev 78650)
@@ -1,12 +1,13 @@
 [buildout]
-develop = .
+develop = . zc.relationship
 parts = test py
 
 find-links = http://download.zope.org/distribution/
 
 [test]
 recipe = zc.recipe.testrunner
-eggs = zc.relation
+eggs = zc.relation [test]
+       zc.relationship
 defaults = "--tests-pattern [fn]?tests --exit-with-status".split()
 
 [py]

Modified: zc.relation/trunk/setup.py
===================================================================
--- zc.relation/trunk/setup.py	2007-08-07 01:45:50 UTC (rev 78649)
+++ zc.relation/trunk/setup.py	2007-08-07 02:03:29 UTC (rev 78650)
@@ -26,4 +26,5 @@
         
         'zope.testing',
         ],
+    extras_require={'test':'zc.relationship'},
     )

Modified: zc.relation/trunk/src/zc/relation/catalog.py
===================================================================
--- zc.relation/trunk/src/zc/relation/catalog.py	2007-08-07 01:45:50 UTC (rev 78649)
+++ zc.relation/trunk/src/zc/relation/catalog.py	2007-08-07 02:03:29 UTC (rev 78650)
@@ -12,8 +12,6 @@
 
 from zc.relation import interfaces
 
-# use OOBTree, not items tuple
-
 ##############################################################################
 # helpers
 #
@@ -46,26 +44,6 @@
         Mapping = BTrees.family32.OO.BTree
     return Mapping
 
-def makeCheckTargetFilter(targetQuery, targetFilter, catalog):
-    targetCache = {}
-    checkTargetFilter = None
-    if targetQuery:
-        targetData = catalog.getRelationTokens(targetQuery)
-        if not targetData:
-            return () # shortcut, indicating no values
-        else:
-            if targetFilter is not None:
-                def checkTargetFilter(relchain, query):
-                    return relchain[-1] in targetData and targetFilter(
-                        relchain, query, catalog, targetCache)
-            else:
-                def checkTargetFilter(relchain, query):
-                    return relchain[-1] in targetData
-    elif targetFilter is not None:
-        def checkTargetFilter(relchain, query):
-            return targetFilter(relchain, query, catalog, targetCache)
-    return checkTargetFilter
-
 class Ref(persistent.Persistent):
     def __init__(self, ob):
         self.ob = ob
@@ -127,6 +105,7 @@
 
     family = BTrees.family32
     _listeners = _queryFactories = _searchIndexes = ()
+    _searchIndexMatches = None
 
     def __init__(self, dumpRel, loadRel, relFamily=None, family=None):
         if family is not None:
@@ -215,7 +194,21 @@
         res._queryFactories = self._queryFactories # it's a tuple
         res._relLength = BTrees.Length.Length()
         res._relLength.set(self._relLength.value)
-        res._searchIndexes = tuple(ix.copy(self) for ix in self._searchIndexes)
+        if self._searchIndexMatches is not None:
+            indexes = []
+            res._searchIndexMatches = self.family.OO.Bucket()
+            for ix, keys in self._searchIndexes:
+                cix = ix.copy(self)
+                indexes.append((cix, keys))
+                for key in keys:
+                    dest = res._searchIndexMatches.get(key)
+                    if dest is None:
+                        dest = res._searchIndexMatches[key] = (
+                            persistent.list.PersistentList())
+                    for info in self._searchIndexMatches[key]:
+                        if info[3] is ix:
+                            dest.append(info[:3] + (cix,))
+            res._searchIndexes = tuple(indexes)
         return res
     
     # Value Indexes
@@ -357,18 +350,65 @@
     # --------------
 
     def addSearchIndex(self, ix):
-        ix.setCatalog(self)
-        self._searchIndexes += (ix,)
+        matches = tuple(ix.setCatalog(self))
+        if self._searchIndexMatches is None:
+            self._searchIndexMatches = self.family.OO.Bucket()
+        keys = set()
+        for (name, query_names, static_values, maxDepth, filter, queryFactory
+            ) in matches:
+            if name is None:
+                name = ''
+                rel_bool = True
+            else:
+                rel_bool = False
+            query_names_res = []
+            none_query = False
+            for nm in query_names:
+                if nm is None:
+                    none_query = True
+                else:
+                    query_names_res.append(nm)
+            if maxDepth is None:
+                maxDepth = 0
+            k = (rel_bool, name, none_query, tuple(query_names_res), maxDepth)
+            a = self._searchIndexMatches.get(k)
+            if a is None:
+                self._searchIndexMatches[
+                    k] = a = persistent.list.PersistentList()
+            if getattr(static_values, 'items', None) is not None:
+                static_values = static_values.items()
+            a.append((filter, queryFactory, tuple(static_values), ix))
+            keys.add(k)
+        keys = frozenset(keys)
+        self._searchIndexes += ((ix, keys),)
 
     def iterSearchIndexes(self):
-        return iter(self._searchIndexes)
+        return (data[0] for data in self._searchIndexes)
 
     def removeSearchIndex(self, ix):
-        res = tuple(i for i in self._searchIndexes if i is not ix)
-        if len(res) == len(self._searchIndexes):
+        res = []
+        keys = None
+        for data in self._searchIndexes:
+            if data[0] is ix:
+                assert keys is None, 'internal data structure error'
+                keys = data[1]
+            else:
+                res.append(data)
+        if keys is None:
             raise LookupError('index not found', ix)
-        self._searchIndexes = res
+        self._searchIndexes = tuple(res)
         ix.setCatalog(None)
+        if not res:
+            self._searchIndexMatches = None
+        else:
+            for key in keys:
+                a = self._searchIndexMatches[key]
+                res = persistent.list.PersistentList(
+                    info for info in a if info[3] is not ix)
+                if not res:
+                    del self._searchIndexMatches[key]
+                else:
+                    self._searchIndexMatches[key] = res
 
     # Indexing
     # ========
@@ -663,17 +703,38 @@
             res = self._relTools['intersection'](res, data.pop(0)[1])
         return res
 
-    def _getSearchIndexResults(self, name, query, maxDepth, filter,
+    def _getSearchIndexResults(self, key, query, maxDepth, filter,
                                targetQuery, targetFilter, queryFactory):
-        for ix in self._searchIndexes:
-            res = ix.getResults(name, query, maxDepth, filter, targetQuery,
-                                targetFilter, queryFactory)
+        # only call for relations
+        for (c_filter, c_queryFactory,
+             c_static_values, ix) in self._searchIndexMatches.get(key, ()):
+            if (c_filter != filter or
+                c_queryFactory != queryFactory):
+                continue
+            for k, v in c_static_values:
+                if query[k] != v:
+                    continue
+            res = ix.getResults(
+                None, query, maxDepth, filter, queryFactory)
             if res is not None:
+                if res:
+                    if targetQuery:
+                        targetData = self._relData(targetQuery)
+                        if not targetData:
+                            return self._relTools['Set']()
+                        res = self._relTools['intersection'](
+                            res, targetData)
+                    if targetFilter is not None:
+                        targetCache = {}
+                        res = (rel for rel in res if
+                               targetFilter(
+                                [rel], query, self,
+                                targetCache))
                 return res
 
     def _iterListeners(self):
         # fix up ourself first
-        for ix in self._searchIndexes:
+        for ix, keys in self._searchIndexes:
             yield ix
         # then tell others
         for l in self.iterListeners():
@@ -715,10 +776,23 @@
                 return filter(relchain, query, self, filterCache)
         else:
             checkFilter = None
-        checkTargetFilter = makeCheckTargetFilter(
-            targetQuery, targetFilter, self)
-        if checkTargetFilter is not None and not checkTargetFilter:
-            relData = ()
+        targetCache = {}
+        checkTargetFilter = None
+        if targetQuery:
+            targetData = self._relData(targetQuery)
+            if not targetData:
+                relData = () # shortcut
+            else:
+                if targetFilter is not None:
+                    def checkTargetFilter(relchain, query):
+                        return relchain[-1] in targetData and targetFilter(
+                            relchain, query, self, targetCache)
+                else:
+                    def checkTargetFilter(relchain, query):
+                        return relchain[-1] in targetData
+        elif targetFilter is not None:
+            def checkTargetFilter(relchain, query):
+                return targetFilter(relchain, query, self, targetCache)
         return (query, relData, maxDepth, checkFilter, checkTargetFilter,
                 getQueries)
 
@@ -825,19 +899,36 @@
                 return multiunion(
                     (self._reltoken_name_TO_objtokenset.get((r, name))
                      for r in rels), data)
-        res = self._getSearchIndexResults(
-            name, query, maxDepth, filter, targetQuery,
-            targetFilter, queryFactory)
-        if res is not None:
-            return res
-        res = self._getSearchIndexResults(
-            None, query, maxDepth, filter, targetQuery, targetFilter,
-            queryFactory)
-        if res is not None:
-            return multiunion(
-                (self._reltoken_name_TO_objtokenset.get((r, name))
-                 for r in res),
-                data)
+        if self._searchIndexMatches is not None:
+            if None in query:
+                none = True
+                query_names = tuple(nm for nm in query if nm is not None)
+            else:
+                none = False
+                query_names = tuple(query)
+            if not targetQuery and targetFilter is None:
+                key = (False, name, none, query_names, maxDepth or 0)
+                for (c_filter, c_queryFactory, c_static_values, ix
+                    ) in self._searchIndexMatches.get(key, ()):
+                    if (c_filter != filter or
+                        c_queryFactory != queryFactory):
+                        continue
+                    for k, v in c_static_values:
+                        if query[k] != v:
+                            continue
+                    res = ix.getResults(
+                        name, query, maxDepth, filter, queryFactory)
+                    if res is not None:
+                        return res
+            key = (True, '', none, query_names, maxDepth or 0)
+            res = self._getSearchIndexResults(
+                key, query, maxDepth, filter, targetQuery, targetFilter,
+                queryFactory)
+            if res is not None:
+                return multiunion(
+                    (self._reltoken_name_TO_objtokenset.get((r, name))
+                     for r in res),
+                    self._attrs[name])
         if getQueries is None:
             queryFactory, getQueries = self._getQueryFactory(
                 query, queryFactory)
@@ -899,11 +990,19 @@
             if res is None:
                 res = self._relTools['Set']()
             return res
-        res = self._getSearchIndexResults(
-            None, query, maxDepth, filter, targetQuery, targetFilter,
-            queryFactory)
-        if res is not None:
-            return res
+        if self._searchIndexMatches is not None:
+            if None in query:
+                none = True
+                query_names = tuple(nm for nm in query if nm is not None)
+            else:
+                none = False
+                query_names = tuple(query)
+            key = (True, '', none, query_names, maxDepth or 0)
+            res = self._getSearchIndexResults(
+                key, query, maxDepth, filter, targetQuery, targetFilter,
+                queryFactory)
+            if res is not None:
+                return res
         if getQueries is None:
             queryFactory, getQueries = self._getQueryFactory(
                 query, queryFactory)
@@ -926,10 +1025,6 @@
     def findRelationChains(self, query, maxDepth=None, filter=None,
                                targetQuery=(), targetFilter=None,
                                queryFactory=None):
-        """find relation tokens that match the query.
-        
-        same arguments as findValueTokens except no name.
-        """
         query = BTrees.family32.OO.Bucket(query)
         queryFactory, getQueries = self._getQueryFactory(
             query, queryFactory)
@@ -957,10 +1052,6 @@
     def findRelationTokenChains(self, query, maxDepth=None, filter=None,
                                     targetQuery=(), targetFilter=None,
                                     queryFactory=None):
-        """find relation tokens that match the query.
-        
-        same arguments as findValueTokens except no name.
-        """
         query = BTrees.family32.OO.Bucket(query)
         queryFactory, getQueries = self._getQueryFactory(
             query, queryFactory)
@@ -977,11 +1068,19 @@
             queryFactory, getQueries = self._getQueryFactory(
                 query, queryFactory)
         targetQuery = BTrees.family32.OO.Bucket(targetQuery)
-        res = self._getSearchIndexResults(
-            None, query, maxDepth, filter, targetQuery, targetFilter,
-            queryFactory)
-        if res is not None:
-            return bool(res)
+        if self._searchIndexMatches is not None:
+            if None in query:
+                none = True
+                query_names = tuple(nm for nm in query if nm is not None)
+            else:
+                none = False
+                query_names = tuple(query)
+            key = (True, '', none, query_names, maxDepth or 0)
+            res = self._getSearchIndexResults(
+                key, query, maxDepth, filter, targetQuery, targetFilter,
+                queryFactory)
+            if res is not None:
+                return bool(res)
         if getQueries is None:
             queryFactory, getQueries = self._getQueryFactory(
                 query, queryFactory)

Modified: zc.relation/trunk/src/zc/relation/interfaces.py
===================================================================
--- zc.relation/trunk/src/zc/relation/interfaces.py	2007-08-07 01:45:50 UTC (rev 78649)
+++ zc.relation/trunk/src/zc/relation/interfaces.py	2007-08-07 02:03:29 UTC (rev 78650)
@@ -88,17 +88,21 @@
         """return a copy of this index, bound to provided catalog if given"""
 
     def setCatalog(catalog):
-        """set the search index to be using the given catalog.
+        """set the search index to be using the given catalog, return matches.
         
         Should immediately being index up-to-date if catalog has content.
         
         if index already has a catalog, raise an error.
         
         If provided catalog is None, clear catalog and indexes.
+        
+        Returned matches should be iterable of tuples of (search name or None,
+        query names, static values, maxDepth, filter, queryFactory).  Only
+        searches matching one of these tuples will be sent to the search
+        index.
         """
 
-    def getResults(name, query, maxDepth, filter, targetQuery,
-                   targetFilter, queryFactory):
+    def getResults(name, query, maxDepth, filter, queryFactory):
         """return results for search if available, and None if not
         
         Returning a non-None value means that this search index claims the

Modified: zc.relation/trunk/src/zc/relation/optimization.txt
===================================================================
--- zc.relation/trunk/src/zc/relation/optimization.txt	2007-08-07 01:45:50 UTC (rev 78649)
+++ zc.relation/trunk/src/zc/relation/optimization.txt	2007-08-07 02:03:29 UTC (rev 78650)
@@ -97,16 +97,18 @@
     ...     return token
     ...
 
-    >>> import zc.relation
-    >>> factory = zc.relation.TransposingTransitiveQueriesFactory(
-    ...     'subjects', 'objects')
-    >>> catalog = zc.relation.Catalog(
-    ...     dumpRelation, loadRelation, factory)
+    >>> import zc.relation.catalog
+    >>> catalog = zc.relation.catalog.Catalog(
+    ...     dumpRelation, loadRelation)
     >>> catalog.addValueIndex(IRelation['subjects'], multiple=True)
     >>> catalog.addValueIndex(
     ...     IRelation['relationtype'], relTypeDump, relTypeLoad,
     ...     BTrees.family32.OI, name='reltype')
     >>> catalog.addValueIndex(IRelation['objects'], multiple=True)
+    >>> import zc.relation.queryfactory
+    >>> factory = zc.relation.queryfactory.TransposingTransitive(
+    ...     'subjects', 'objects')
+    >>> catalog.addDefaultQueryFactory(factory)
 
     >>> rel = Relation((1,), 'has the role of', (2,))
     >>> catalog.index(rel)

Modified: zc.relation/trunk/src/zc/relation/searchindex.py
===================================================================
--- zc.relation/trunk/src/zc/relation/searchindex.py	2007-08-07 01:45:50 UTC (rev 78649)
+++ zc.relation/trunk/src/zc/relation/searchindex.py	2007-08-07 02:03:29 UTC (rev 78650)
@@ -16,6 +16,20 @@
 _marker = object()
 
 class TransposingTransitive(persistent.Persistent):
+    """for searches using zc.relation.queryfactory.TransposingTransitive.
+    
+    Only indexes one direction.  Only indexes with maxDepth=None.
+    Does not support filters.
+    
+    This approach could be used for other query factories that only look
+    at the final element in the relchain.  If that were desired, I'd abstract
+    some of this code.
+    
+    while target filters are currently supported, perhaps they shouldn't be:
+    the target filter can look at the last element in the relchain, but not
+    at the true relchain itself.  That is: the relchain lies, except for the
+    last element.
+    """
     zope.interface.implements(zc.relation.interfaces.ISearchIndex)
 
     name = index = catalog = None
@@ -30,14 +44,6 @@
         self.reverse = reverse
         self.update = frozenset(
             itertools.chain((k for k, v in static), (forward, reverse)))
-        match = list(static)
-        match.append((forward, _marker))
-        match.sort()
-        self._match = tuple(match)
-        match = list(static)
-        match.append((reverse, _marker))
-        match.sort()
-        self._reverse_match = tuple(match)
         self.factory = zc.relation.queryfactory.TransposingTransitive(
             forward, reverse)
 
@@ -56,8 +62,6 @@
         new.reverse = self.reverse
         new.factory = self.factory
         new.static = self.static
-        new._match = self._match
-        new._reverse_match = self._reverse_match
         if self.index is not None:
             if catalog is None:
                 catalog = self.catalog
@@ -84,6 +88,13 @@
         for token in catalog.getRelationTokens():
             if token not in self.index:
                 self._index(token)
+        # name, query_names, static_values, maxDepth, filter, queryFactory
+        query_names = (self.forward,) + tuple(k for k, v in self.static)
+        res = [(None, query_names, self.static, None, None, self.factory)]
+        for nm in self.names:
+            res.append(
+                (nm, query_names, self.static, None, None, self.factory))
+        return res
 
     def _buildQuery(self, dynamic):
         res = BTrees.family32.OO.Bucket(self.static)
@@ -210,18 +221,10 @@
 
     # end listener interface
 
-    def getResults(self, name, query, maxDepth, filter, targetQuery,
-                   targetFilter, queryFactory):
-        if (queryFactory != self.factory or 
-            name is not None and name not in self.names or
-            maxDepth is not None or filter is not None or
-            len(query) != len(self.static) + 1 or
-            name is not None and (targetQuery or targetFilter is not None)):
-            return None
-        for given, match in itertools.izip(query.items(), self._match):
-            if (given[0] != match[0] or 
-                match[1] is not _marker and given[1] != match[1]):
-                return None
+    def getResults(self, name, query, maxDepth, filter, queryFactory):
+        for k, v in self.static:
+            if query[k] != v:
+                return
         # TODO: try to use intransitive index, if available
         rels = self.catalog.getRelationTokens(query)
         if name is None:
@@ -234,39 +237,42 @@
             return tools['Set']()
         elif not rels:
             return rels
-        res = zc.relation.catalog.multiunion(
+        return zc.relation.catalog.multiunion(
             (ix.get(rel) for rel in rels), tools)
-        if name is None:
-            checkTargetFilter = zc.relation.catalog.makeCheckTargetFilter(
-                targetQuery, targetFilter, self.catalog)
-            if checkTargetFilter is not None:
-                if not checkTargetFilter: # no results
-                    res = tools['Set']()
-                else:
-                    res = tools['Set'](
-                        rel for rel in res if checkTargetFilter([rel], query))
-        return res
 
 
 class Intransitive(persistent.Persistent):
+    """saves results for precise search.
+    
+    Could be used for transitive searches, but writes would be much more
+    expensive than the TransposingTransitive approach.
+    
+    """
+    # XXX Rename to Direct?
     zope.interface.implements(
         zc.relation.interfaces.ISearchIndex,
         zc.relation.interfaces.IListener)
 
-    index = catalog = name = queriesFactory = None
+    index = catalog = name = queryFactory = None
     update = frozenset()
 
     def __init__(self, names, name=None,
-                 queriesFactory=None, getValueTokens=None, update=None):
+                 queryFactory=None, getValueTokens=None, update=None,
+                 unlimitedDepth=False):
         self.names = tuple(sorted(names))
         self.name = name
-        self.queriesFactory = queriesFactory
+        self.queryFactory = queryFactory
         if update is None:
             update = names
             if name is not None:
                 update += (name,)
         self.update = frozenset(update)
         self.getValueTokens = getValueTokens
+        if unlimitedDepth:
+            depths = (1, None)
+        else:
+            depths = (1,)
+        self.depths = tuple(depths)
 
 
     def copy(self, catalog=None):
@@ -280,9 +286,10 @@
                 res.index[k] = copy.copy(v)
         res.names = self.names
         res.name = self.name
-        res.queriesFactory = self.queriesFactory
+        res.queryFactory = self.queryFactory
         res.update = self.update
         res.getValueTokens = self.getValueTokens
+        res.depths = self.depths
         return res
 
     def setCatalog(self, catalog):
@@ -294,6 +301,9 @@
         self.catalog = catalog
         self.index = BTrees.family32.OO.BTree()
         self.sourceAdded(catalog)
+        # name, query_names, static_values, maxDepth, filter, queryFactory
+        return [(self.name, self.names, (), depth, None, self.queryFactory)
+                for depth in self.depths]
 
     def relationAdded(self, token, catalog, additions):
         self._index(token, catalog, additions)
@@ -319,8 +329,8 @@
 
     def _indexQuery(self, query):
             dquery = dict(query)
-            if self.queriesFactory is not None:
-                getQueries = self.queriesFactory(dquery, self.catalog)
+            if self.queryFactory is not None:
+                getQueries = self.queryFactory(dquery, self.catalog)
             else:
                 getQueries = lambda empty: (query,)
             res = zc.relation.catalog.multiunion(
@@ -418,20 +428,13 @@
             else:
                 break
 
-    def getResults(self, name, query, maxDepth, filter, targetQuery,
-                   targetFilter, queriesFactory):
-        if (name != self.name or maxDepth not in (1, None) or
-            queriesFactory != self.queriesFactory or targetQuery
-            or filter is not None or targetFilter is not None):
-            return # TODO could maybe handle some later
-        names = []
+    def getResults(self, name, query, maxDepth, filter, queryFactory):
         query = tuple(query.items())
         for nm, v in query:
             if isinstance(v, zc.relation.catalog.Any):
                 return None # TODO open up
-            names.append(nm)
         res = self.index.get(query)
-        if res is None and self.names == tuple(names):
+        if res is None:
             if self.name is None:
                 res = self.catalog.getRelationModuleTools()['Set']()
             else:

Modified: zc.relation/trunk/src/zc/relation/tests.py
===================================================================
--- zc.relation/trunk/src/zc/relation/tests.py	2007-08-07 01:45:50 UTC (rev 78649)
+++ zc.relation/trunk/src/zc/relation/tests.py	2007-08-07 02:03:29 UTC (rev 78650)
@@ -23,6 +23,8 @@
 def tearDown(test):
     db = test.globs.get('db')
     if db is not None:
+        import transaction
+        transaction.abort()
         db.close()
     zope.testing.module.tearDown(test)
 
@@ -40,13 +42,9 @@
              'searchindex.txt',
              setUp=setUp, tearDown=tearDown,
              optionflags=doctest.INTERPRET_FOOTNOTES),
-#         doctest.DocFileSuite(
-#             'optimization.txt',
-#             setUp=setUp, tearDown=tearDown,
-#             optionflags=doctest.INTERPRET_FOOTNOTES),
-#         doctest.DocFileSuite(
-#             'administration.txt',
-#             setUp=setUp, tearDown=tearDown,
-#             optionflags=doctest.INTERPRET_FOOTNOTES),
+         doctest.DocFileSuite(
+             'optimization.txt',
+             setUp=setUp, tearDown=tearDown,
+             optionflags=doctest.INTERPRET_FOOTNOTES),
         ))
     return res

Modified: zc.relation/trunk/src/zc/relation/timeit/transitive_search_index.py
===================================================================
--- zc.relation/trunk/src/zc/relation/timeit/transitive_search_index.py	2007-08-07 01:45:50 UTC (rev 78649)
+++ zc.relation/trunk/src/zc/relation/timeit/transitive_search_index.py	2007-08-07 02:03:29 UTC (rev 78650)
@@ -1,127 +1,158 @@
 """
+The goal of this file is to determine how well the transposing transitive
+index helps searches.
 
-The result has three big sections, one for `findRelationTokens`, one for
-`findValueTokens`, and one for `canFind`.  Within each section, we have six
-searches, each broader than the last.  A 'brute' result is a result without
-a search index.  A 'relation' result uses a search index without a configured
-value index.  A 'value' result uses a search index with a configured value
-index.  'relation' and 'value' results should really only differ materially
-for `findValueTokens`.  A 'brute_generate' result shows how fast it takes to
-get the generator back from a brute search, without actually iterating over
-it, and is not pertinent for `canFind`.
+The result of the first test has three big sections, one for
+`findRelationTokens`, one for `findValueTokens`, and one for `canFind`. 
+Within each section, we have six searches, each broader than the last. 
+A 'brute' result is a result without a search index.  A 'relation'
+result uses a search index without a configured value index.  A 'value'
+result uses a search index with a configured value index.  'relation'
+and 'value' results should really only differ materially for
+`findValueTokens`.  A 'brute_generate' result shows how fast it takes to
+get the generator back from a brute search, without actually iterating
+over it, and is not pertinent for `canFind`.
 
+The result of the second test shows how expensive it is to install a search
+index that is not used.
+
 Example result:
 
-[('control_result', 0.0094759464263916016),
+                               Test 1
+
+
+[('control_result', 0.0091979503631591797),
  "**** res = list(catalog.findRelationTokens({'token': 9})) ****",
  '**** [109] ****',
- ('brute_generate', 0.2457740306854248),
- ('brute', 0.81798601150512695),
- ('relation', 0.52101397514343262),
- ('value', 0.51753401756286621),
+ ('brute_generate', 0.230133056640625),
+ ('brute', 0.79251599311828613),
+ ('relation', 0.52498507499694824),
+ ('value', 0.52437424659729004),
  "**** res = list(catalog.findRelationTokens({'token': 7})) ****",
  '**** [107] ****',
- ('brute_generate', 0.24491000175476074),
- ('brute', 0.83113908767700195),
- ('relation', 0.53361701965332031),
- ('value', 0.52347803115844727),
+ ('brute_generate', 0.2291419506072998),
+ ('brute', 0.80537819862365723),
+ ('relation', 0.52660107612609863),
+ ('value', 0.53035998344421387),
  "**** res = list(catalog.findRelationTokens({'token': 5})) ****",
  '**** [105, 107, 108, 109] ****',
- ('brute_generate', 0.24980616569519043),
- ('brute', 1.9965240955352783),
- ('relation', 0.5299079418182373),
- ('value', 0.53121399879455566),
+ ('brute_generate', 0.23081111907958984),
+ ('brute', 1.9628522396087646),
+ ('relation', 0.53455114364624023),
+ ('value', 0.53245711326599121),
  "**** res = list(catalog.findRelationTokens({'token': 3})) ****",
  '**** [103, 105, 106, 107, 108, 109] ****',
- ('brute_generate', 0.24709606170654297),
- ('brute', 2.8365921974182129),
- ('relation', 0.54007720947265625),
- ('value', 0.53515410423278809),
+ ('brute_generate', 0.23008418083190918),
+ ('brute', 2.7950401306152344),
+ ('relation', 0.53938508033752441),
+ ('value', 0.53650403022766113),
  "**** res = list(catalog.findRelationTokens({'token': 1})) ****",
  '**** [101, 103, 104, 105, 106, 107, 108, 109] ****',
- ('brute_generate', 0.24880099296569824),
- ('brute', 3.6703901290893555),
- ('relation', 0.53357386589050293),
- ('value', 0.53384017944335938),
+ ('brute_generate', 0.23291897773742676),
+ ('brute', 3.614861011505127),
+ ('relation', 0.53824424743652344),
+ ('value', 0.53764486312866211),
  "**** res = list(catalog.findRelationTokens({'token': 0})) ****",
  '**** [100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111] ****',
- ('brute_generate', 0.25098800659179688),
- ('brute', 5.3719911575317383),
- ('relation', 0.53967404365539551),
- ('value', 0.53260326385498047),
+ ('brute_generate', 0.23366808891296387),
+ ('brute', 5.3421971797943115),
+ ('relation', 0.54594111442565918),
+ ('value', 0.54389309883117676),
  '------------------',
  '------------------',
  "**** res = list(catalog.findValueTokens('children', {'token': 9})) ****",
  '**** [23, 24] ****',
- ('brute_generate', 0.25597715377807617),
- ('brute', 0.87791824340820312),
- ('relation', 0.71430587768554688),
- ('value', 0.53523707389831543),
+ ('brute_generate', 0.2296299934387207),
+ ('brute', 0.84845805168151855),
+ ('relation', 0.68227314949035645),
+ ('value', 0.54161596298217773),
  "**** res = list(catalog.findValueTokens('children', {'token': 7})) ****",
  '**** [17, 18, 19] ****',
- ('brute_generate', 0.25435924530029297),
- ('brute', 0.9038851261138916),
- ('relation', 0.73513698577880859),
- ('value', 0.53882598876953125),
+ ('brute_generate', 0.22747707366943359),
+ ('brute', 0.87759518623352051),
+ ('relation', 0.69233298301696777),
+ ('value', 0.53438305854797363),
  "**** res = list(catalog.findValueTokens('children', {'token': 5})) ****",
  '**** [7, 8, 9, 17, 18, 19, 20, 21, 22, 23, 24] ****',
- ('brute_generate', 0.2563321590423584),
- ('brute', 2.2175660133361816),
- ('relation', 0.8459780216217041),
- ('value', 0.54215097427368164),
+ ('brute_generate', 0.22690701484680176),
+ ('brute', 2.1761610507965088),
+ ('relation', 0.80766010284423828),
+ ('value', 0.54901003837585449),
  "**** res = list(catalog.findValueTokens('children', {'token': 3})) ****",
  '**** [5, 6, 7, 8, 9, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] ****',
- ('brute_generate', 0.25664114952087402),
- ('brute', 3.1539299488067627),
- ('relation', 0.91316413879394531),
- ('value', 0.55379605293273926),
+ ('brute_generate', 0.22738194465637207),
+ ('brute', 3.0915610790252686),
+ ('relation', 0.86617612838745117),
+ ('value', 0.55521416664123535),
  "**** res = list(catalog.findValueTokens('children', {'token': 1})) ****",
  '**** [3, 4, 5, 6, 7, 8, 9, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] ****',
- ('brute_generate', 0.25446605682373047),
- ('brute', 4.058326244354248),
- ('relation', 0.98118209838867188),
- ('value', 0.54912614822387695),
+ ('brute_generate', 0.22893619537353516),
+ ('brute', 4.0192921161651611),
+ ('relation', 0.94522690773010254),
+ ('value', 0.55083107948303223),
  "**** res = list(catalog.findValueTokens('children', {'token': 0})) ****",
  '**** [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32] ****',
- ('brute_generate', 0.25460124015808105),
- ('brute', 6.0014550685882568),
- ('relation', 1.1424670219421387),
- ('value', 0.55950188636779785),
+ ('brute_generate', 0.22567915916442871),
+ ('brute', 5.9501399993896484),
+ ('relation', 1.1025910377502441),
+ ('value', 0.56526517868041992),
  '------------------',
  '------------------',
  "**** res = catalog.canFind({'token': 9}, targetQuery={'children': 23}) ****",
  '**** True ****',
- ('brute', 0.81314396858215332),
- ('relation', 0.68167209625244141),
- ('value', 0.68214011192321777),
+ ('brute', 0.77447414398193359),
+ ('relation', 0.66926908493041992),
+ ('value', 0.66884613037109375),
  "**** res = catalog.canFind({'token': 7}, targetQuery={'children': 23}) ****",
  '**** False ****',
- ('brute', 0.98219418525695801),
- ('relation', 0.67153406143188477),
- ('value', 0.6687159538269043),
+ ('brute', 0.93198609352111816),
+ ('relation', 0.6589500904083252),
+ ('value', 0.6538691520690918),
  "**** res = catalog.canFind({'token': 5}, targetQuery={'children': 23}) ****",
  '**** True ****',
- ('brute', 2.0033540725708008),
- ('relation', 0.70535111427307129),
- ('value', 0.70386195182800293),
+ ('brute', 1.9603328704833984),
+ ('relation', 0.67744708061218262),
+ ('value', 0.67296600341796875),
  "**** res = catalog.canFind({'token': 3}, targetQuery={'children': 23}) ****",
  '**** True ****',
- ('brute', 2.8076090812683105),
- ('relation', 0.72887015342712402),
- ('value', 0.71839404106140137),
+ ('brute', 2.7534151077270508),
+ ('relation', 0.67701125144958496),
+ ('value', 0.67615604400634766),
  "**** res = catalog.canFind({'token': 1}, targetQuery={'children': 23}) ****",
  '**** True ****',
- ('brute', 3.6096680164337158),
- ('relation', 0.73476409912109375),
- ('value', 0.73782920837402344),
+ ('brute', 3.5522449016571045),
+ ('relation', 0.67667698860168457),
+ ('value', 0.67722702026367188),
  "**** res = catalog.canFind({'token': 0}, targetQuery={'children': 23}) ****",
  '**** True ****',
- ('brute', 5.2542290687561035),
- ('relation', 0.75870609283447266),
- ('value', 0.75950288772583008),
+ ('brute', 5.198289155960083),
+ ('relation', 0.67714405059814453),
+ ('value', 0.67705321311950684),
  '------------------',
  '------------------']
 
+
+                               Test 2
+
+
+[('control_result', 0.0068368911743164062),
+ "**** res = catalog.findRelationTokens({'token': 9}) ****",
+ ('brute', 0.23646903038024902),
+ ('index', 0.29944419860839844),
+ "**** res = catalog.findRelationTokens({'token': 0}) ****",
+ ('brute', 0.23561906814575195),
+ ('index', 0.30117917060852051),
+ '------------------',
+ '------------------',
+ "**** res = catalog.findValueTokens('children', {'token': 9}) ****",
+ ('brute', 0.2293241024017334),
+ ('index', 0.30806207656860352),
+ "**** res = catalog.findValueTokens('children', {'token': 0}) ****",
+ ('brute', 0.23177909851074219),
+ ('index', 0.30793118476867676),
+ '------------------',
+ '------------------']
+
 Notes:
 
 - While simply creating a generator is unsurprisingly the least work, if you
@@ -132,6 +163,12 @@
 - The relation index, without the additional value index, still does a pretty
   good job on value searches, as hoped.
 
+- In the second test we are only creating the generator each time.  We see
+  what the cost is to install a search index.  This is small--as we see
+  in the first test, getting the results takes much more time--and is
+  hopefully a largely non-increasing cost once you add your first search
+  index.
+
 It might be nice to be able to demand a generator even when you have a search
 index, in case you only want a result or two for a given call.  Probably
 should be YAGNI for now.
@@ -285,4 +322,45 @@
     d.append('------------------')
     d.append('------------------')
 
+
+print '                               Test 1\n\n'
 pprint.pprint(d)
+
+reverse_setup = brute_setup + """
+import zc.relation.searchindex
+catalog.addSearchIndex(
+    zc.relation.searchindex.TransposingTransitive('children', 'token'))
+"""
+
+relations_run_template = '''
+res = catalog.findRelationTokens({'token': %d})
+'''
+
+value_run_template = '''
+res = catalog.findValueTokens('children', {'token': %d})
+'''
+
+print '\n\n                               Test 2\n\n'
+
+control = timeit.Timer('res = catalog.__len__()', brute_setup)
+control_result = min(control.repeat(3, runs))
+d = [('control_result', control_result)]
+
+for template in (relations_run_template, value_run_template):
+    for o in (9,0):
+        run = template % (o,)
+        d.append('**** %s ****' % (run.strip(),))
+        d.append((
+            'brute',
+            min(timeit.Timer(
+                run, brute_setup).repeat(3, runs)) - control_result))
+        d.append((
+            'index',
+            min(timeit.Timer(
+                run, reverse_setup).repeat(3, runs)) - control_result))
+        
+    d.append('------------------')
+    d.append('------------------')
+
+pprint.pprint(d)
+        

Modified: zc.relation/trunk/src/zc/relation/tokens.txt
===================================================================
--- zc.relation/trunk/src/zc/relation/tokens.txt	2007-08-07 01:45:50 UTC (rev 78649)
+++ zc.relation/trunk/src/zc/relation/tokens.txt	2007-08-07 02:03:29 UTC (rev 78650)
@@ -684,7 +684,8 @@
     >>> index = zc.relation.searchindex.Intransitive(
     ...     ('organization', 'principal_id'), 'role_id', factory2,
     ...     getValueTokens,
-    ...     ('organization', 'principal_id', 'part', 'role_id'))
+    ...     ('organization', 'principal_id', 'part', 'role_id'),
+    ...     unlimitedDepth=True)
     >>> catalog.addSearchIndex(index)
 
     >>> res = catalog.findValueTokens(
@@ -866,7 +867,8 @@
     >>> root['index2'] = index2 = zc.relation.searchindex.Intransitive(
     ...     ('organization', 'principal_id'), 'role_id', factory3,
     ...     getValueTokens2,
-    ...     ('organization', 'principal_id', 'part', 'role_id'))
+    ...     ('organization', 'principal_id', 'part', 'role_id'),
+    ...     unlimitedDepth=True)
     >>> role_catalog.addSearchIndex(index2)
 
 The new role_catalog index needs to be updated from the org_catalog.



More information about the Checkins mailing list