[Checkins] SVN: zc.relation/trunk/src/zc/relation/timeit/ add a new timeit for the transitive search index. slightly clean up an older one

Sat Aug 4 07:52:55 EDT 2007

Log message for revision 78580:
  add a new timeit for the transitive search index.  slightly clean up an older one

Changed:
  U   zc.relation/trunk/src/zc/relation/timeit/manual_intersection.py
  A   zc.relation/trunk/src/zc/relation/timeit/transitive_search_index.py

-=-
Modified: zc.relation/trunk/src/zc/relation/timeit/manual_intersection.py
===================================================================

--- zc.relation/trunk/src/zc/relation/timeit/manual_intersection.py	2007-08-04 10:32:02 UTC (rev 78579)
+++ zc.relation/trunk/src/zc/relation/timeit/manual_intersection.py	2007-08-04 11:52:55 UTC (rev 78580)
@@ -64,15 +64,12 @@
 
 intersect_template = 'BTrees.family32.IO.intersection(%s, %s)'
 
-
-control = timeit.Timer('pass', setup)
-
 options = ('one', 'ten', 'hundred', 'thousand', 'tenthousand')
 
 runs = 10000
 
-control_result = min(timeit.Timer('pass', setup).repeat(3, runs))
-
+control = timeit.Timer('pass', setup)
+control_result = min(control.repeat(3, runs))
 d = [('control_result', control_result)]
 
 for i, big in enumerate(options):

Added: zc.relation/trunk/src/zc/relation/timeit/transitive_search_index.py
===================================================================
--- zc.relation/trunk/src/zc/relation/timeit/transitive_search_index.py	                        (rev 0)
+++ zc.relation/trunk/src/zc/relation/timeit/transitive_search_index.py	2007-08-04 11:52:55 UTC (rev 78580)
@@ -0,0 +1,288 @@
+"""
+
+The result has three big sections, one for `findRelationTokens`, one for
+`findValueTokens`, and one for `canFind`.  Within each section, we have six
+searches, each broader than the last.  A 'brute' result is a result without
+a search index.  A 'relation' result uses a search index without a configured
+value index.  A 'value' result uses a search index with a configured value
+index.  'relation' and 'value' results should really only differ materially
+for `findValueTokens`.  A 'brute_generate' result shows how fast it takes to
+get the generator back from a brute search, without actually iterating over
+it, and is not pertinent for `canFind`.
+
+Example result:
+
+[('control_result', 0.0094759464263916016),
+ "**** res = list(catalog.findRelationTokens({'token': 9})) ****",
+ '**** [109] ****',
+ ('brute_generate', 0.2457740306854248),
+ ('brute', 0.81798601150512695),
+ ('relation', 0.52101397514343262),
+ ('value', 0.51753401756286621),
+ "**** res = list(catalog.findRelationTokens({'token': 7})) ****",
+ '**** [107] ****',
+ ('brute_generate', 0.24491000175476074),
+ ('brute', 0.83113908767700195),
+ ('relation', 0.53361701965332031),
+ ('value', 0.52347803115844727),
+ "**** res = list(catalog.findRelationTokens({'token': 5})) ****",
+ '**** [105, 107, 108, 109] ****',
+ ('brute_generate', 0.24980616569519043),
+ ('brute', 1.9965240955352783),
+ ('relation', 0.5299079418182373),
+ ('value', 0.53121399879455566),
+ "**** res = list(catalog.findRelationTokens({'token': 3})) ****",
+ '**** [103, 105, 106, 107, 108, 109] ****',
+ ('brute_generate', 0.24709606170654297),
+ ('brute', 2.8365921974182129),
+ ('relation', 0.54007720947265625),
+ ('value', 0.53515410423278809),
+ "**** res = list(catalog.findRelationTokens({'token': 1})) ****",
+ '**** [101, 103, 104, 105, 106, 107, 108, 109] ****',
+ ('brute_generate', 0.24880099296569824),
+ ('brute', 3.6703901290893555),
+ ('relation', 0.53357386589050293),
+ ('value', 0.53384017944335938),
+ "**** res = list(catalog.findRelationTokens({'token': 0})) ****",
+ '**** [100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111] ****',
+ ('brute_generate', 0.25098800659179688),
+ ('brute', 5.3719911575317383),
+ ('relation', 0.53967404365539551),
+ ('value', 0.53260326385498047),
+ '------------------',
+ '------------------',
+ "**** res = list(catalog.findValueTokens('children', {'token': 9})) ****",
+ '**** [23, 24] ****',
+ ('brute_generate', 0.25597715377807617),
+ ('brute', 0.87791824340820312),
+ ('relation', 0.71430587768554688),
+ ('value', 0.53523707389831543),
+ "**** res = list(catalog.findValueTokens('children', {'token': 7})) ****",
+ '**** [17, 18, 19] ****',
+ ('brute_generate', 0.25435924530029297),
+ ('brute', 0.9038851261138916),
+ ('relation', 0.73513698577880859),
+ ('value', 0.53882598876953125),
+ "**** res = list(catalog.findValueTokens('children', {'token': 5})) ****",
+ '**** [7, 8, 9, 17, 18, 19, 20, 21, 22, 23, 24] ****',
+ ('brute_generate', 0.2563321590423584),
+ ('brute', 2.2175660133361816),
+ ('relation', 0.8459780216217041),
+ ('value', 0.54215097427368164),
+ "**** res = list(catalog.findValueTokens('children', {'token': 3})) ****",
+ '**** [5, 6, 7, 8, 9, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] ****',
+ ('brute_generate', 0.25664114952087402),
+ ('brute', 3.1539299488067627),
+ ('relation', 0.91316413879394531),
+ ('value', 0.55379605293273926),
+ "**** res = list(catalog.findValueTokens('children', {'token': 1})) ****",
+ '**** [3, 4, 5, 6, 7, 8, 9, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] ****',
+ ('brute_generate', 0.25446605682373047),
+ ('brute', 4.058326244354248),
+ ('relation', 0.98118209838867188),
+ ('value', 0.54912614822387695),
+ "**** res = list(catalog.findValueTokens('children', {'token': 0})) ****",
+ '**** [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32] ****',
+ ('brute_generate', 0.25460124015808105),
+ ('brute', 6.0014550685882568),
+ ('relation', 1.1424670219421387),
+ ('value', 0.55950188636779785),
+ '------------------',
+ '------------------',
+ "**** res = catalog.canFind({'token': 9}, targetQuery={'children': 23}) ****",
+ '**** True ****',
+ ('brute', 0.81314396858215332),
+ ('relation', 0.68167209625244141),
+ ('value', 0.68214011192321777),
+ "**** res = catalog.canFind({'token': 7}, targetQuery={'children': 23}) ****",
+ '**** False ****',
+ ('brute', 0.98219418525695801),
+ ('relation', 0.67153406143188477),
+ ('value', 0.6687159538269043),
+ "**** res = catalog.canFind({'token': 5}, targetQuery={'children': 23}) ****",
+ '**** True ****',
+ ('brute', 2.0033540725708008),
+ ('relation', 0.70535111427307129),
+ ('value', 0.70386195182800293),
+ "**** res = catalog.canFind({'token': 3}, targetQuery={'children': 23}) ****",
+ '**** True ****',
+ ('brute', 2.8076090812683105),
+ ('relation', 0.72887015342712402),
+ ('value', 0.71839404106140137),
+ "**** res = catalog.canFind({'token': 1}, targetQuery={'children': 23}) ****",
+ '**** True ****',
+ ('brute', 3.6096680164337158),
+ ('relation', 0.73476409912109375),
+ ('value', 0.73782920837402344),
+ "**** res = catalog.canFind({'token': 0}, targetQuery={'children': 23}) ****",
+ '**** True ****',
+ ('brute', 5.2542290687561035),
+ ('relation', 0.75870609283447266),
+ ('value', 0.75950288772583008),
+ '------------------',
+ '------------------']
+
+Notes:
+
+- While simply creating a generator is unsurprisingly the least work, if you
+  want all the results then the index is always a win (on read!), even in the
+  smallest search.  Even in this small graph it can give us factor-of-10
+  results at the broadest search.
+
+- The relation index, without the additional value index, still does a pretty
+  good job on value searches, as hoped.
+
+It might be nice to be able to demand a generator even when you have a search
+index, in case you only want a result or two for a given call.  Probably
+should be YAGNI for now.
+
+It would be nice to add some further tests later to see how much worse
+the write performance is when you have these indexes.
+
+(I want to write look at the intransitive search too: is it really only
+useful when you have a query factory that mutates the initial search, as
+in tokens.txt?)
+
+"""
+
+import timeit
+import pprint
+
+# see zc/relation/searchindex.txt
+
+brute_setup = '''
+import BTrees
+relations = BTrees.family64.IO.BTree()
+relations[99] = None # just to give us a start
+
+class Relation(object):
+    def __init__(self, token, children=()):
+        self.token = token
+        self.children = BTrees.family64.IF.TreeSet(children)
+        self.id = relations.maxKey() + 1
+        relations[self.id] = self
+
+
+def token(rel, self):
+    return rel.token
+
+def children(rel, self):
+    return rel.children
+
+def dumpRelation(obj, index, cache):
+    return obj.id
+
+def loadRelation(token, index, cache):
+    return relations[token]
+
+import zc.relation.queryfactory
+factory = zc.relation.queryfactory.TransposingTransitive(
+    'token', 'children')
+import zc.relation.catalog
+catalog = zc.relation.catalog.Catalog(
+    dumpRelation, loadRelation, BTrees.family64.IO, BTrees.family64)
+catalog.addValueIndex(token)
+catalog.addValueIndex(children, multiple=True)
+catalog.addDefaultQueryFactory(factory)
+
+for token, children in (
+    (0, (1, 2)), (1, (3, 4)), (2, (10, 11, 12)), (3, (5, 6)),
+    (4, (13, 14)), (5, (7, 8, 9)), (6, (15, 16)), (7, (17, 18, 19)),
+    (8, (20, 21, 22)), (9, (23, 24)), (10, (25, 26)),
+    (11, (27, 28, 29, 30, 31, 32))):
+    catalog.index(Relation(token, children))
+
+'''
+
+#                                  _____________0_____________
+#                                 /                           \
+#                        ________1_______                ______2____________
+#                       /                \              /          |        \
+#                ______3_____            _4_          10       ____11_____   12
+#               /            \          /   \         / \     / /  |  \ \ \
+#       _______5_______       6       13     14     25  26  27 28 29 30 31 32
+#      /       |       \     / \
+#    _7_      _8_       9   15 16
+#   / | \    / | \     / \
+# 17 18 19  20 21 22  23 24
+
+relation_index_setup = brute_setup + '''
+import zc.relation.searchindex
+catalog.addSearchIndex(
+    zc.relation.searchindex.TransposingTransitive('token', 'children'))
+'''
+
+value_index_setup = brute_setup + '''
+import zc.relation.searchindex
+catalog.addSearchIndex(
+    zc.relation.searchindex.TransposingTransitive(
+        'token', 'children', names=('children',)))
+'''
+
+relations_run_template = '''
+res = list(catalog.findRelationTokens({'token': %d}))
+'''
+
+value_run_template = '''
+res = list(catalog.findValueTokens('children', {'token': %d}))
+'''
+
+canfind_run_template = '''
+res = catalog.canFind({'token': %d}, targetQuery={'children': 23})
+'''
+
+options = (9,7,5,3,1,0)
+
+runs = 10000
+
+control = timeit.Timer('res = catalog.__len__()\nlist()', brute_setup)
+control_result = min(control.repeat(3, runs))
+d = [('control_result', control_result)]
+
+for template in (relations_run_template, value_run_template,
+                 canfind_run_template):
+    for o in options:
+        run = template % (o,)
+        # verify we get the same results
+        brute_globs = {}
+        relation_globs = {}
+        value_globs = {}
+        exec brute_setup + run in brute_globs
+        exec relation_index_setup + run in relation_globs
+        exec value_index_setup + run in value_globs
+        brute = brute_globs['res']
+        relation = relation_globs['res']
+        value = value_globs['res']
+        canfind = template == canfind_run_template
+        if not canfind:
+            brute.sort()
+        assert brute == relation == value, '%s: %r, %r, %r' % (
+            run, brute, relation, value)
+        # end verify
+        d.append('**** %s ****' % (run.strip(),))
+        d.append('**** %s ****' % (brute,))
+        if not canfind:
+            # show how long it takes to make the generator
+            altered = run.replace('list(', '', 1)
+            altered = altered.replace(')', '', 1)
+            d.append((
+                'brute_generate',
+                min(timeit.Timer(
+                    altered, brute_setup).repeat(3, runs)) - control_result))
+        d.append((
+            'brute',
+            min(timeit.Timer(
+                run, brute_setup).repeat(3, runs)) - control_result))
+        d.append((
+            'relation',
+            min(timeit.Timer(
+                run, relation_index_setup).repeat(3, runs)) - control_result))
+        d.append((
+            'value',
+            min(timeit.Timer(
+                run, value_index_setup).repeat(3, runs)) - control_result))
+        
+    d.append('------------------')
+    d.append('------------------')
+
+pprint.pprint(d)