[Checkins] SVN: zc.relation/trunk/ - Add in zc.relationship for
testing. This revealed that I needed to abort my
Gary Poster
gary at zope.com
Mon Aug 6 22:03:30 EDT 2007
Log message for revision 78650:
- Add in zc.relationship for testing. This revealed that I needed to abort my
transactions in zc.relation but everything else seemed to be ok.
- add in an additional timeit test for seeing how adding a search index
affected searches that didn't use it. I didn't like the results, so
did a bit of optimization. This means that search indexes now have a
slightly different contract.
- optimzation.txt wasn't hooked in. Not sure how much it adds. :-/
catalog test coverage slipped to 98%. Would be really cool to get it and
searchindex up to 100%.
Changed:
_U zc.relation/trunk/
U zc.relation/trunk/buildout.cfg
U zc.relation/trunk/setup.py
U zc.relation/trunk/src/zc/relation/catalog.py
U zc.relation/trunk/src/zc/relation/interfaces.py
U zc.relation/trunk/src/zc/relation/optimization.txt
U zc.relation/trunk/src/zc/relation/searchindex.py
U zc.relation/trunk/src/zc/relation/tests.py
U zc.relation/trunk/src/zc/relation/timeit/transitive_search_index.py
U zc.relation/trunk/src/zc/relation/tokens.txt
-=-
Property changes on: zc.relation/trunk
___________________________________________________________________
Name: svn:externals
+ zc.relationship svn+ssh://svn.zope.org/repos/main/zc.relationship/trunk
Modified: zc.relation/trunk/buildout.cfg
===================================================================
--- zc.relation/trunk/buildout.cfg 2007-08-07 01:45:50 UTC (rev 78649)
+++ zc.relation/trunk/buildout.cfg 2007-08-07 02:03:29 UTC (rev 78650)
@@ -1,12 +1,13 @@
[buildout]
-develop = .
+develop = . zc.relationship
parts = test py
find-links = http://download.zope.org/distribution/
[test]
recipe = zc.recipe.testrunner
-eggs = zc.relation
+eggs = zc.relation [test]
+ zc.relationship
defaults = "--tests-pattern [fn]?tests --exit-with-status".split()
[py]
Modified: zc.relation/trunk/setup.py
===================================================================
--- zc.relation/trunk/setup.py 2007-08-07 01:45:50 UTC (rev 78649)
+++ zc.relation/trunk/setup.py 2007-08-07 02:03:29 UTC (rev 78650)
@@ -26,4 +26,5 @@
'zope.testing',
],
+ extras_require={'test':'zc.relationship'},
)
Modified: zc.relation/trunk/src/zc/relation/catalog.py
===================================================================
--- zc.relation/trunk/src/zc/relation/catalog.py 2007-08-07 01:45:50 UTC (rev 78649)
+++ zc.relation/trunk/src/zc/relation/catalog.py 2007-08-07 02:03:29 UTC (rev 78650)
@@ -12,8 +12,6 @@
from zc.relation import interfaces
-# use OOBTree, not items tuple
-
##############################################################################
# helpers
#
@@ -46,26 +44,6 @@
Mapping = BTrees.family32.OO.BTree
return Mapping
-def makeCheckTargetFilter(targetQuery, targetFilter, catalog):
- targetCache = {}
- checkTargetFilter = None
- if targetQuery:
- targetData = catalog.getRelationTokens(targetQuery)
- if not targetData:
- return () # shortcut, indicating no values
- else:
- if targetFilter is not None:
- def checkTargetFilter(relchain, query):
- return relchain[-1] in targetData and targetFilter(
- relchain, query, catalog, targetCache)
- else:
- def checkTargetFilter(relchain, query):
- return relchain[-1] in targetData
- elif targetFilter is not None:
- def checkTargetFilter(relchain, query):
- return targetFilter(relchain, query, catalog, targetCache)
- return checkTargetFilter
-
class Ref(persistent.Persistent):
def __init__(self, ob):
self.ob = ob
@@ -127,6 +105,7 @@
family = BTrees.family32
_listeners = _queryFactories = _searchIndexes = ()
+ _searchIndexMatches = None
def __init__(self, dumpRel, loadRel, relFamily=None, family=None):
if family is not None:
@@ -215,7 +194,21 @@
res._queryFactories = self._queryFactories # it's a tuple
res._relLength = BTrees.Length.Length()
res._relLength.set(self._relLength.value)
- res._searchIndexes = tuple(ix.copy(self) for ix in self._searchIndexes)
+ if self._searchIndexMatches is not None:
+ indexes = []
+ res._searchIndexMatches = self.family.OO.Bucket()
+ for ix, keys in self._searchIndexes:
+ cix = ix.copy(self)
+ indexes.append((cix, keys))
+ for key in keys:
+ dest = res._searchIndexMatches.get(key)
+ if dest is None:
+ dest = res._searchIndexMatches[key] = (
+ persistent.list.PersistentList())
+ for info in self._searchIndexMatches[key]:
+ if info[3] is ix:
+ dest.append(info[:3] + (cix,))
+ res._searchIndexes = tuple(indexes)
return res
# Value Indexes
@@ -357,18 +350,65 @@
# --------------
def addSearchIndex(self, ix):
- ix.setCatalog(self)
- self._searchIndexes += (ix,)
+ matches = tuple(ix.setCatalog(self))
+ if self._searchIndexMatches is None:
+ self._searchIndexMatches = self.family.OO.Bucket()
+ keys = set()
+ for (name, query_names, static_values, maxDepth, filter, queryFactory
+ ) in matches:
+ if name is None:
+ name = ''
+ rel_bool = True
+ else:
+ rel_bool = False
+ query_names_res = []
+ none_query = False
+ for nm in query_names:
+ if nm is None:
+ none_query = True
+ else:
+ query_names_res.append(nm)
+ if maxDepth is None:
+ maxDepth = 0
+ k = (rel_bool, name, none_query, tuple(query_names_res), maxDepth)
+ a = self._searchIndexMatches.get(k)
+ if a is None:
+ self._searchIndexMatches[
+ k] = a = persistent.list.PersistentList()
+ if getattr(static_values, 'items', None) is not None:
+ static_values = static_values.items()
+ a.append((filter, queryFactory, tuple(static_values), ix))
+ keys.add(k)
+ keys = frozenset(keys)
+ self._searchIndexes += ((ix, keys),)
def iterSearchIndexes(self):
- return iter(self._searchIndexes)
+ return (data[0] for data in self._searchIndexes)
def removeSearchIndex(self, ix):
- res = tuple(i for i in self._searchIndexes if i is not ix)
- if len(res) == len(self._searchIndexes):
+ res = []
+ keys = None
+ for data in self._searchIndexes:
+ if data[0] is ix:
+ assert keys is None, 'internal data structure error'
+ keys = data[1]
+ else:
+ res.append(data)
+ if keys is None:
raise LookupError('index not found', ix)
- self._searchIndexes = res
+ self._searchIndexes = tuple(res)
ix.setCatalog(None)
+ if not res:
+ self._searchIndexMatches = None
+ else:
+ for key in keys:
+ a = self._searchIndexMatches[key]
+ res = persistent.list.PersistentList(
+ info for info in a if info[3] is not ix)
+ if not res:
+ del self._searchIndexMatches[key]
+ else:
+ self._searchIndexMatches[key] = res
# Indexing
# ========
@@ -663,17 +703,38 @@
res = self._relTools['intersection'](res, data.pop(0)[1])
return res
- def _getSearchIndexResults(self, name, query, maxDepth, filter,
+ def _getSearchIndexResults(self, key, query, maxDepth, filter,
targetQuery, targetFilter, queryFactory):
- for ix in self._searchIndexes:
- res = ix.getResults(name, query, maxDepth, filter, targetQuery,
- targetFilter, queryFactory)
+ # only call for relations
+ for (c_filter, c_queryFactory,
+ c_static_values, ix) in self._searchIndexMatches.get(key, ()):
+ if (c_filter != filter or
+ c_queryFactory != queryFactory):
+ continue
+ for k, v in c_static_values:
+ if query[k] != v:
+ continue
+ res = ix.getResults(
+ None, query, maxDepth, filter, queryFactory)
if res is not None:
+ if res:
+ if targetQuery:
+ targetData = self._relData(targetQuery)
+ if not targetData:
+ return self._relTools['Set']()
+ res = self._relTools['intersection'](
+ res, targetData)
+ if targetFilter is not None:
+ targetCache = {}
+ res = (rel for rel in res if
+ targetFilter(
+ [rel], query, self,
+ targetCache))
return res
def _iterListeners(self):
# fix up ourself first
- for ix in self._searchIndexes:
+ for ix, keys in self._searchIndexes:
yield ix
# then tell others
for l in self.iterListeners():
@@ -715,10 +776,23 @@
return filter(relchain, query, self, filterCache)
else:
checkFilter = None
- checkTargetFilter = makeCheckTargetFilter(
- targetQuery, targetFilter, self)
- if checkTargetFilter is not None and not checkTargetFilter:
- relData = ()
+ targetCache = {}
+ checkTargetFilter = None
+ if targetQuery:
+ targetData = self._relData(targetQuery)
+ if not targetData:
+ relData = () # shortcut
+ else:
+ if targetFilter is not None:
+ def checkTargetFilter(relchain, query):
+ return relchain[-1] in targetData and targetFilter(
+ relchain, query, self, targetCache)
+ else:
+ def checkTargetFilter(relchain, query):
+ return relchain[-1] in targetData
+ elif targetFilter is not None:
+ def checkTargetFilter(relchain, query):
+ return targetFilter(relchain, query, self, targetCache)
return (query, relData, maxDepth, checkFilter, checkTargetFilter,
getQueries)
@@ -825,19 +899,36 @@
return multiunion(
(self._reltoken_name_TO_objtokenset.get((r, name))
for r in rels), data)
- res = self._getSearchIndexResults(
- name, query, maxDepth, filter, targetQuery,
- targetFilter, queryFactory)
- if res is not None:
- return res
- res = self._getSearchIndexResults(
- None, query, maxDepth, filter, targetQuery, targetFilter,
- queryFactory)
- if res is not None:
- return multiunion(
- (self._reltoken_name_TO_objtokenset.get((r, name))
- for r in res),
- data)
+ if self._searchIndexMatches is not None:
+ if None in query:
+ none = True
+ query_names = tuple(nm for nm in query if nm is not None)
+ else:
+ none = False
+ query_names = tuple(query)
+ if not targetQuery and targetFilter is None:
+ key = (False, name, none, query_names, maxDepth or 0)
+ for (c_filter, c_queryFactory, c_static_values, ix
+ ) in self._searchIndexMatches.get(key, ()):
+ if (c_filter != filter or
+ c_queryFactory != queryFactory):
+ continue
+ for k, v in c_static_values:
+ if query[k] != v:
+ continue
+ res = ix.getResults(
+ name, query, maxDepth, filter, queryFactory)
+ if res is not None:
+ return res
+ key = (True, '', none, query_names, maxDepth or 0)
+ res = self._getSearchIndexResults(
+ key, query, maxDepth, filter, targetQuery, targetFilter,
+ queryFactory)
+ if res is not None:
+ return multiunion(
+ (self._reltoken_name_TO_objtokenset.get((r, name))
+ for r in res),
+ self._attrs[name])
if getQueries is None:
queryFactory, getQueries = self._getQueryFactory(
query, queryFactory)
@@ -899,11 +990,19 @@
if res is None:
res = self._relTools['Set']()
return res
- res = self._getSearchIndexResults(
- None, query, maxDepth, filter, targetQuery, targetFilter,
- queryFactory)
- if res is not None:
- return res
+ if self._searchIndexMatches is not None:
+ if None in query:
+ none = True
+ query_names = tuple(nm for nm in query if nm is not None)
+ else:
+ none = False
+ query_names = tuple(query)
+ key = (True, '', none, query_names, maxDepth or 0)
+ res = self._getSearchIndexResults(
+ key, query, maxDepth, filter, targetQuery, targetFilter,
+ queryFactory)
+ if res is not None:
+ return res
if getQueries is None:
queryFactory, getQueries = self._getQueryFactory(
query, queryFactory)
@@ -926,10 +1025,6 @@
def findRelationChains(self, query, maxDepth=None, filter=None,
targetQuery=(), targetFilter=None,
queryFactory=None):
- """find relation tokens that match the query.
-
- same arguments as findValueTokens except no name.
- """
query = BTrees.family32.OO.Bucket(query)
queryFactory, getQueries = self._getQueryFactory(
query, queryFactory)
@@ -957,10 +1052,6 @@
def findRelationTokenChains(self, query, maxDepth=None, filter=None,
targetQuery=(), targetFilter=None,
queryFactory=None):
- """find relation tokens that match the query.
-
- same arguments as findValueTokens except no name.
- """
query = BTrees.family32.OO.Bucket(query)
queryFactory, getQueries = self._getQueryFactory(
query, queryFactory)
@@ -977,11 +1068,19 @@
queryFactory, getQueries = self._getQueryFactory(
query, queryFactory)
targetQuery = BTrees.family32.OO.Bucket(targetQuery)
- res = self._getSearchIndexResults(
- None, query, maxDepth, filter, targetQuery, targetFilter,
- queryFactory)
- if res is not None:
- return bool(res)
+ if self._searchIndexMatches is not None:
+ if None in query:
+ none = True
+ query_names = tuple(nm for nm in query if nm is not None)
+ else:
+ none = False
+ query_names = tuple(query)
+ key = (True, '', none, query_names, maxDepth or 0)
+ res = self._getSearchIndexResults(
+ key, query, maxDepth, filter, targetQuery, targetFilter,
+ queryFactory)
+ if res is not None:
+ return bool(res)
if getQueries is None:
queryFactory, getQueries = self._getQueryFactory(
query, queryFactory)
Modified: zc.relation/trunk/src/zc/relation/interfaces.py
===================================================================
--- zc.relation/trunk/src/zc/relation/interfaces.py 2007-08-07 01:45:50 UTC (rev 78649)
+++ zc.relation/trunk/src/zc/relation/interfaces.py 2007-08-07 02:03:29 UTC (rev 78650)
@@ -88,17 +88,21 @@
"""return a copy of this index, bound to provided catalog if given"""
def setCatalog(catalog):
- """set the search index to be using the given catalog.
+ """set the search index to be using the given catalog, return matches.
Should immediately being index up-to-date if catalog has content.
if index already has a catalog, raise an error.
If provided catalog is None, clear catalog and indexes.
+
+ Returned matches should be iterable of tuples of (search name or None,
+ query names, static values, maxDepth, filter, queryFactory). Only
+ searches matching one of these tuples will be sent to the search
+ index.
"""
- def getResults(name, query, maxDepth, filter, targetQuery,
- targetFilter, queryFactory):
+ def getResults(name, query, maxDepth, filter, queryFactory):
"""return results for search if available, and None if not
Returning a non-None value means that this search index claims the
Modified: zc.relation/trunk/src/zc/relation/optimization.txt
===================================================================
--- zc.relation/trunk/src/zc/relation/optimization.txt 2007-08-07 01:45:50 UTC (rev 78649)
+++ zc.relation/trunk/src/zc/relation/optimization.txt 2007-08-07 02:03:29 UTC (rev 78650)
@@ -97,16 +97,18 @@
... return token
...
- >>> import zc.relation
- >>> factory = zc.relation.TransposingTransitiveQueriesFactory(
- ... 'subjects', 'objects')
- >>> catalog = zc.relation.Catalog(
- ... dumpRelation, loadRelation, factory)
+ >>> import zc.relation.catalog
+ >>> catalog = zc.relation.catalog.Catalog(
+ ... dumpRelation, loadRelation)
>>> catalog.addValueIndex(IRelation['subjects'], multiple=True)
>>> catalog.addValueIndex(
... IRelation['relationtype'], relTypeDump, relTypeLoad,
... BTrees.family32.OI, name='reltype')
>>> catalog.addValueIndex(IRelation['objects'], multiple=True)
+ >>> import zc.relation.queryfactory
+ >>> factory = zc.relation.queryfactory.TransposingTransitive(
+ ... 'subjects', 'objects')
+ >>> catalog.addDefaultQueryFactory(factory)
>>> rel = Relation((1,), 'has the role of', (2,))
>>> catalog.index(rel)
Modified: zc.relation/trunk/src/zc/relation/searchindex.py
===================================================================
--- zc.relation/trunk/src/zc/relation/searchindex.py 2007-08-07 01:45:50 UTC (rev 78649)
+++ zc.relation/trunk/src/zc/relation/searchindex.py 2007-08-07 02:03:29 UTC (rev 78650)
@@ -16,6 +16,20 @@
_marker = object()
class TransposingTransitive(persistent.Persistent):
+ """for searches using zc.relation.queryfactory.TransposingTransitive.
+
+ Only indexes one direction. Only indexes with maxDepth=None.
+ Does not support filters.
+
+ This approach could be used for other query factories that only look
+ at the final element in the relchain. If that were desired, I'd abstract
+ some of this code.
+
+ while target filters are currently supported, perhaps they shouldn't be:
+ the target filter can look at the last element in the relchain, but not
+ at the true relchain itself. That is: the relchain lies, except for the
+ last element.
+ """
zope.interface.implements(zc.relation.interfaces.ISearchIndex)
name = index = catalog = None
@@ -30,14 +44,6 @@
self.reverse = reverse
self.update = frozenset(
itertools.chain((k for k, v in static), (forward, reverse)))
- match = list(static)
- match.append((forward, _marker))
- match.sort()
- self._match = tuple(match)
- match = list(static)
- match.append((reverse, _marker))
- match.sort()
- self._reverse_match = tuple(match)
self.factory = zc.relation.queryfactory.TransposingTransitive(
forward, reverse)
@@ -56,8 +62,6 @@
new.reverse = self.reverse
new.factory = self.factory
new.static = self.static
- new._match = self._match
- new._reverse_match = self._reverse_match
if self.index is not None:
if catalog is None:
catalog = self.catalog
@@ -84,6 +88,13 @@
for token in catalog.getRelationTokens():
if token not in self.index:
self._index(token)
+ # name, query_names, static_values, maxDepth, filter, queryFactory
+ query_names = (self.forward,) + tuple(k for k, v in self.static)
+ res = [(None, query_names, self.static, None, None, self.factory)]
+ for nm in self.names:
+ res.append(
+ (nm, query_names, self.static, None, None, self.factory))
+ return res
def _buildQuery(self, dynamic):
res = BTrees.family32.OO.Bucket(self.static)
@@ -210,18 +221,10 @@
# end listener interface
- def getResults(self, name, query, maxDepth, filter, targetQuery,
- targetFilter, queryFactory):
- if (queryFactory != self.factory or
- name is not None and name not in self.names or
- maxDepth is not None or filter is not None or
- len(query) != len(self.static) + 1 or
- name is not None and (targetQuery or targetFilter is not None)):
- return None
- for given, match in itertools.izip(query.items(), self._match):
- if (given[0] != match[0] or
- match[1] is not _marker and given[1] != match[1]):
- return None
+ def getResults(self, name, query, maxDepth, filter, queryFactory):
+ for k, v in self.static:
+ if query[k] != v:
+ return
# TODO: try to use intransitive index, if available
rels = self.catalog.getRelationTokens(query)
if name is None:
@@ -234,39 +237,42 @@
return tools['Set']()
elif not rels:
return rels
- res = zc.relation.catalog.multiunion(
+ return zc.relation.catalog.multiunion(
(ix.get(rel) for rel in rels), tools)
- if name is None:
- checkTargetFilter = zc.relation.catalog.makeCheckTargetFilter(
- targetQuery, targetFilter, self.catalog)
- if checkTargetFilter is not None:
- if not checkTargetFilter: # no results
- res = tools['Set']()
- else:
- res = tools['Set'](
- rel for rel in res if checkTargetFilter([rel], query))
- return res
class Intransitive(persistent.Persistent):
+ """saves results for precise search.
+
+ Could be used for transitive searches, but writes would be much more
+ expensive than the TransposingTransitive approach.
+
+ """
+ # XXX Rename to Direct?
zope.interface.implements(
zc.relation.interfaces.ISearchIndex,
zc.relation.interfaces.IListener)
- index = catalog = name = queriesFactory = None
+ index = catalog = name = queryFactory = None
update = frozenset()
def __init__(self, names, name=None,
- queriesFactory=None, getValueTokens=None, update=None):
+ queryFactory=None, getValueTokens=None, update=None,
+ unlimitedDepth=False):
self.names = tuple(sorted(names))
self.name = name
- self.queriesFactory = queriesFactory
+ self.queryFactory = queryFactory
if update is None:
update = names
if name is not None:
update += (name,)
self.update = frozenset(update)
self.getValueTokens = getValueTokens
+ if unlimitedDepth:
+ depths = (1, None)
+ else:
+ depths = (1,)
+ self.depths = tuple(depths)
def copy(self, catalog=None):
@@ -280,9 +286,10 @@
res.index[k] = copy.copy(v)
res.names = self.names
res.name = self.name
- res.queriesFactory = self.queriesFactory
+ res.queryFactory = self.queryFactory
res.update = self.update
res.getValueTokens = self.getValueTokens
+ res.depths = self.depths
return res
def setCatalog(self, catalog):
@@ -294,6 +301,9 @@
self.catalog = catalog
self.index = BTrees.family32.OO.BTree()
self.sourceAdded(catalog)
+ # name, query_names, static_values, maxDepth, filter, queryFactory
+ return [(self.name, self.names, (), depth, None, self.queryFactory)
+ for depth in self.depths]
def relationAdded(self, token, catalog, additions):
self._index(token, catalog, additions)
@@ -319,8 +329,8 @@
def _indexQuery(self, query):
dquery = dict(query)
- if self.queriesFactory is not None:
- getQueries = self.queriesFactory(dquery, self.catalog)
+ if self.queryFactory is not None:
+ getQueries = self.queryFactory(dquery, self.catalog)
else:
getQueries = lambda empty: (query,)
res = zc.relation.catalog.multiunion(
@@ -418,20 +428,13 @@
else:
break
- def getResults(self, name, query, maxDepth, filter, targetQuery,
- targetFilter, queriesFactory):
- if (name != self.name or maxDepth not in (1, None) or
- queriesFactory != self.queriesFactory or targetQuery
- or filter is not None or targetFilter is not None):
- return # TODO could maybe handle some later
- names = []
+ def getResults(self, name, query, maxDepth, filter, queryFactory):
query = tuple(query.items())
for nm, v in query:
if isinstance(v, zc.relation.catalog.Any):
return None # TODO open up
- names.append(nm)
res = self.index.get(query)
- if res is None and self.names == tuple(names):
+ if res is None:
if self.name is None:
res = self.catalog.getRelationModuleTools()['Set']()
else:
Modified: zc.relation/trunk/src/zc/relation/tests.py
===================================================================
--- zc.relation/trunk/src/zc/relation/tests.py 2007-08-07 01:45:50 UTC (rev 78649)
+++ zc.relation/trunk/src/zc/relation/tests.py 2007-08-07 02:03:29 UTC (rev 78650)
@@ -23,6 +23,8 @@
def tearDown(test):
db = test.globs.get('db')
if db is not None:
+ import transaction
+ transaction.abort()
db.close()
zope.testing.module.tearDown(test)
@@ -40,13 +42,9 @@
'searchindex.txt',
setUp=setUp, tearDown=tearDown,
optionflags=doctest.INTERPRET_FOOTNOTES),
-# doctest.DocFileSuite(
-# 'optimization.txt',
-# setUp=setUp, tearDown=tearDown,
-# optionflags=doctest.INTERPRET_FOOTNOTES),
-# doctest.DocFileSuite(
-# 'administration.txt',
-# setUp=setUp, tearDown=tearDown,
-# optionflags=doctest.INTERPRET_FOOTNOTES),
+ doctest.DocFileSuite(
+ 'optimization.txt',
+ setUp=setUp, tearDown=tearDown,
+ optionflags=doctest.INTERPRET_FOOTNOTES),
))
return res
Modified: zc.relation/trunk/src/zc/relation/timeit/transitive_search_index.py
===================================================================
--- zc.relation/trunk/src/zc/relation/timeit/transitive_search_index.py 2007-08-07 01:45:50 UTC (rev 78649)
+++ zc.relation/trunk/src/zc/relation/timeit/transitive_search_index.py 2007-08-07 02:03:29 UTC (rev 78650)
@@ -1,127 +1,158 @@
"""
+The goal of this file is to determine how well the transposing transitive
+index helps searches.
-The result has three big sections, one for `findRelationTokens`, one for
-`findValueTokens`, and one for `canFind`. Within each section, we have six
-searches, each broader than the last. A 'brute' result is a result without
-a search index. A 'relation' result uses a search index without a configured
-value index. A 'value' result uses a search index with a configured value
-index. 'relation' and 'value' results should really only differ materially
-for `findValueTokens`. A 'brute_generate' result shows how fast it takes to
-get the generator back from a brute search, without actually iterating over
-it, and is not pertinent for `canFind`.
+The result of the first test has three big sections, one for
+`findRelationTokens`, one for `findValueTokens`, and one for `canFind`.
+Within each section, we have six searches, each broader than the last.
+A 'brute' result is a result without a search index. A 'relation'
+result uses a search index without a configured value index. A 'value'
+result uses a search index with a configured value index. 'relation'
+and 'value' results should really only differ materially for
+`findValueTokens`. A 'brute_generate' result shows how fast it takes to
+get the generator back from a brute search, without actually iterating
+over it, and is not pertinent for `canFind`.
+The result of the second test shows how expensive it is to install a search
+index that is not used.
+
Example result:
-[('control_result', 0.0094759464263916016),
+ Test 1
+
+
+[('control_result', 0.0091979503631591797),
"**** res = list(catalog.findRelationTokens({'token': 9})) ****",
'**** [109] ****',
- ('brute_generate', 0.2457740306854248),
- ('brute', 0.81798601150512695),
- ('relation', 0.52101397514343262),
- ('value', 0.51753401756286621),
+ ('brute_generate', 0.230133056640625),
+ ('brute', 0.79251599311828613),
+ ('relation', 0.52498507499694824),
+ ('value', 0.52437424659729004),
"**** res = list(catalog.findRelationTokens({'token': 7})) ****",
'**** [107] ****',
- ('brute_generate', 0.24491000175476074),
- ('brute', 0.83113908767700195),
- ('relation', 0.53361701965332031),
- ('value', 0.52347803115844727),
+ ('brute_generate', 0.2291419506072998),
+ ('brute', 0.80537819862365723),
+ ('relation', 0.52660107612609863),
+ ('value', 0.53035998344421387),
"**** res = list(catalog.findRelationTokens({'token': 5})) ****",
'**** [105, 107, 108, 109] ****',
- ('brute_generate', 0.24980616569519043),
- ('brute', 1.9965240955352783),
- ('relation', 0.5299079418182373),
- ('value', 0.53121399879455566),
+ ('brute_generate', 0.23081111907958984),
+ ('brute', 1.9628522396087646),
+ ('relation', 0.53455114364624023),
+ ('value', 0.53245711326599121),
"**** res = list(catalog.findRelationTokens({'token': 3})) ****",
'**** [103, 105, 106, 107, 108, 109] ****',
- ('brute_generate', 0.24709606170654297),
- ('brute', 2.8365921974182129),
- ('relation', 0.54007720947265625),
- ('value', 0.53515410423278809),
+ ('brute_generate', 0.23008418083190918),
+ ('brute', 2.7950401306152344),
+ ('relation', 0.53938508033752441),
+ ('value', 0.53650403022766113),
"**** res = list(catalog.findRelationTokens({'token': 1})) ****",
'**** [101, 103, 104, 105, 106, 107, 108, 109] ****',
- ('brute_generate', 0.24880099296569824),
- ('brute', 3.6703901290893555),
- ('relation', 0.53357386589050293),
- ('value', 0.53384017944335938),
+ ('brute_generate', 0.23291897773742676),
+ ('brute', 3.614861011505127),
+ ('relation', 0.53824424743652344),
+ ('value', 0.53764486312866211),
"**** res = list(catalog.findRelationTokens({'token': 0})) ****",
'**** [100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111] ****',
- ('brute_generate', 0.25098800659179688),
- ('brute', 5.3719911575317383),
- ('relation', 0.53967404365539551),
- ('value', 0.53260326385498047),
+ ('brute_generate', 0.23366808891296387),
+ ('brute', 5.3421971797943115),
+ ('relation', 0.54594111442565918),
+ ('value', 0.54389309883117676),
'------------------',
'------------------',
"**** res = list(catalog.findValueTokens('children', {'token': 9})) ****",
'**** [23, 24] ****',
- ('brute_generate', 0.25597715377807617),
- ('brute', 0.87791824340820312),
- ('relation', 0.71430587768554688),
- ('value', 0.53523707389831543),
+ ('brute_generate', 0.2296299934387207),
+ ('brute', 0.84845805168151855),
+ ('relation', 0.68227314949035645),
+ ('value', 0.54161596298217773),
"**** res = list(catalog.findValueTokens('children', {'token': 7})) ****",
'**** [17, 18, 19] ****',
- ('brute_generate', 0.25435924530029297),
- ('brute', 0.9038851261138916),
- ('relation', 0.73513698577880859),
- ('value', 0.53882598876953125),
+ ('brute_generate', 0.22747707366943359),
+ ('brute', 0.87759518623352051),
+ ('relation', 0.69233298301696777),
+ ('value', 0.53438305854797363),
"**** res = list(catalog.findValueTokens('children', {'token': 5})) ****",
'**** [7, 8, 9, 17, 18, 19, 20, 21, 22, 23, 24] ****',
- ('brute_generate', 0.2563321590423584),
- ('brute', 2.2175660133361816),
- ('relation', 0.8459780216217041),
- ('value', 0.54215097427368164),
+ ('brute_generate', 0.22690701484680176),
+ ('brute', 2.1761610507965088),
+ ('relation', 0.80766010284423828),
+ ('value', 0.54901003837585449),
"**** res = list(catalog.findValueTokens('children', {'token': 3})) ****",
'**** [5, 6, 7, 8, 9, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] ****',
- ('brute_generate', 0.25664114952087402),
- ('brute', 3.1539299488067627),
- ('relation', 0.91316413879394531),
- ('value', 0.55379605293273926),
+ ('brute_generate', 0.22738194465637207),
+ ('brute', 3.0915610790252686),
+ ('relation', 0.86617612838745117),
+ ('value', 0.55521416664123535),
"**** res = list(catalog.findValueTokens('children', {'token': 1})) ****",
'**** [3, 4, 5, 6, 7, 8, 9, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] ****',
- ('brute_generate', 0.25446605682373047),
- ('brute', 4.058326244354248),
- ('relation', 0.98118209838867188),
- ('value', 0.54912614822387695),
+ ('brute_generate', 0.22893619537353516),
+ ('brute', 4.0192921161651611),
+ ('relation', 0.94522690773010254),
+ ('value', 0.55083107948303223),
"**** res = list(catalog.findValueTokens('children', {'token': 0})) ****",
'**** [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32] ****',
- ('brute_generate', 0.25460124015808105),
- ('brute', 6.0014550685882568),
- ('relation', 1.1424670219421387),
- ('value', 0.55950188636779785),
+ ('brute_generate', 0.22567915916442871),
+ ('brute', 5.9501399993896484),
+ ('relation', 1.1025910377502441),
+ ('value', 0.56526517868041992),
'------------------',
'------------------',
"**** res = catalog.canFind({'token': 9}, targetQuery={'children': 23}) ****",
'**** True ****',
- ('brute', 0.81314396858215332),
- ('relation', 0.68167209625244141),
- ('value', 0.68214011192321777),
+ ('brute', 0.77447414398193359),
+ ('relation', 0.66926908493041992),
+ ('value', 0.66884613037109375),
"**** res = catalog.canFind({'token': 7}, targetQuery={'children': 23}) ****",
'**** False ****',
- ('brute', 0.98219418525695801),
- ('relation', 0.67153406143188477),
- ('value', 0.6687159538269043),
+ ('brute', 0.93198609352111816),
+ ('relation', 0.6589500904083252),
+ ('value', 0.6538691520690918),
"**** res = catalog.canFind({'token': 5}, targetQuery={'children': 23}) ****",
'**** True ****',
- ('brute', 2.0033540725708008),
- ('relation', 0.70535111427307129),
- ('value', 0.70386195182800293),
+ ('brute', 1.9603328704833984),
+ ('relation', 0.67744708061218262),
+ ('value', 0.67296600341796875),
"**** res = catalog.canFind({'token': 3}, targetQuery={'children': 23}) ****",
'**** True ****',
- ('brute', 2.8076090812683105),
- ('relation', 0.72887015342712402),
- ('value', 0.71839404106140137),
+ ('brute', 2.7534151077270508),
+ ('relation', 0.67701125144958496),
+ ('value', 0.67615604400634766),
"**** res = catalog.canFind({'token': 1}, targetQuery={'children': 23}) ****",
'**** True ****',
- ('brute', 3.6096680164337158),
- ('relation', 0.73476409912109375),
- ('value', 0.73782920837402344),
+ ('brute', 3.5522449016571045),
+ ('relation', 0.67667698860168457),
+ ('value', 0.67722702026367188),
"**** res = catalog.canFind({'token': 0}, targetQuery={'children': 23}) ****",
'**** True ****',
- ('brute', 5.2542290687561035),
- ('relation', 0.75870609283447266),
- ('value', 0.75950288772583008),
+ ('brute', 5.198289155960083),
+ ('relation', 0.67714405059814453),
+ ('value', 0.67705321311950684),
'------------------',
'------------------']
+
+ Test 2
+
+
+[('control_result', 0.0068368911743164062),
+ "**** res = catalog.findRelationTokens({'token': 9}) ****",
+ ('brute', 0.23646903038024902),
+ ('index', 0.29944419860839844),
+ "**** res = catalog.findRelationTokens({'token': 0}) ****",
+ ('brute', 0.23561906814575195),
+ ('index', 0.30117917060852051),
+ '------------------',
+ '------------------',
+ "**** res = catalog.findValueTokens('children', {'token': 9}) ****",
+ ('brute', 0.2293241024017334),
+ ('index', 0.30806207656860352),
+ "**** res = catalog.findValueTokens('children', {'token': 0}) ****",
+ ('brute', 0.23177909851074219),
+ ('index', 0.30793118476867676),
+ '------------------',
+ '------------------']
+
Notes:
- While simply creating a generator is unsurprisingly the least work, if you
@@ -132,6 +163,12 @@
- The relation index, without the additional value index, still does a pretty
good job on value searches, as hoped.
+- In the second test we are only creating the generator each time. We see
+ what the cost is to install a search index. This is small--as we see
+ in the first test, getting the results takes much more time--and is
+ hopefully a largely non-increasing cost once you add your first search
+ index.
+
It might be nice to be able to demand a generator even when you have a search
index, in case you only want a result or two for a given call. Probably
should be YAGNI for now.
@@ -285,4 +322,45 @@
d.append('------------------')
d.append('------------------')
+
+print ' Test 1\n\n'
pprint.pprint(d)
+
+reverse_setup = brute_setup + """
+import zc.relation.searchindex
+catalog.addSearchIndex(
+ zc.relation.searchindex.TransposingTransitive('children', 'token'))
+"""
+
+relations_run_template = '''
+res = catalog.findRelationTokens({'token': %d})
+'''
+
+value_run_template = '''
+res = catalog.findValueTokens('children', {'token': %d})
+'''
+
+print '\n\n Test 2\n\n'
+
+control = timeit.Timer('res = catalog.__len__()', brute_setup)
+control_result = min(control.repeat(3, runs))
+d = [('control_result', control_result)]
+
+for template in (relations_run_template, value_run_template):
+ for o in (9,0):
+ run = template % (o,)
+ d.append('**** %s ****' % (run.strip(),))
+ d.append((
+ 'brute',
+ min(timeit.Timer(
+ run, brute_setup).repeat(3, runs)) - control_result))
+ d.append((
+ 'index',
+ min(timeit.Timer(
+ run, reverse_setup).repeat(3, runs)) - control_result))
+
+ d.append('------------------')
+ d.append('------------------')
+
+pprint.pprint(d)
+
Modified: zc.relation/trunk/src/zc/relation/tokens.txt
===================================================================
--- zc.relation/trunk/src/zc/relation/tokens.txt 2007-08-07 01:45:50 UTC (rev 78649)
+++ zc.relation/trunk/src/zc/relation/tokens.txt 2007-08-07 02:03:29 UTC (rev 78650)
@@ -684,7 +684,8 @@
>>> index = zc.relation.searchindex.Intransitive(
... ('organization', 'principal_id'), 'role_id', factory2,
... getValueTokens,
- ... ('organization', 'principal_id', 'part', 'role_id'))
+ ... ('organization', 'principal_id', 'part', 'role_id'),
+ ... unlimitedDepth=True)
>>> catalog.addSearchIndex(index)
>>> res = catalog.findValueTokens(
@@ -866,7 +867,8 @@
>>> root['index2'] = index2 = zc.relation.searchindex.Intransitive(
... ('organization', 'principal_id'), 'role_id', factory3,
... getValueTokens2,
- ... ('organization', 'principal_id', 'part', 'role_id'))
+ ... ('organization', 'principal_id', 'part', 'role_id'),
+ ... unlimitedDepth=True)
>>> role_catalog.addSearchIndex(index2)
The new role_catalog index needs to be updated from the org_catalog.
More information about the Checkins
mailing list