[Checkins] SVN: Sandbox/poster/zc.relation/trunk/ initial commit
Gary Poster
gary at zope.com
Thu Aug 2 21:34:35 EDT 2007
Log message for revision 78553:
initial commit
Changed:
_U Sandbox/poster/zc.relation/trunk/
A Sandbox/poster/zc.relation/trunk/README.txt
A Sandbox/poster/zc.relation/trunk/bootstrap.py
A Sandbox/poster/zc.relation/trunk/buildout.cfg
A Sandbox/poster/zc.relation/trunk/setup.py
A Sandbox/poster/zc.relation/trunk/src/
A Sandbox/poster/zc.relation/trunk/src/zc/
A Sandbox/poster/zc.relation/trunk/src/zc/__init__.py
A Sandbox/poster/zc.relation/trunk/src/zc/relation/
A Sandbox/poster/zc.relation/trunk/src/zc/relation/CHANGES.txt
A Sandbox/poster/zc.relation/trunk/src/zc/relation/README.txt
A Sandbox/poster/zc.relation/trunk/src/zc/relation/__init__.py
A Sandbox/poster/zc.relation/trunk/src/zc/relation/catalog.py
A Sandbox/poster/zc.relation/trunk/src/zc/relation/example.txt
A Sandbox/poster/zc.relation/trunk/src/zc/relation/interfaces.py
A Sandbox/poster/zc.relation/trunk/src/zc/relation/optimization.txt
A Sandbox/poster/zc.relation/trunk/src/zc/relation/queryfactory.py
A Sandbox/poster/zc.relation/trunk/src/zc/relation/searchindex.py
A Sandbox/poster/zc.relation/trunk/src/zc/relation/searchindex.txt
A Sandbox/poster/zc.relation/trunk/src/zc/relation/tests.py
A Sandbox/poster/zc.relation/trunk/src/zc/relation/tokens.txt
-=-
Property changes on: Sandbox/poster/zc.relation/trunk
___________________________________________________________________
Name: svn:ignore
+ develop-eggs
bin
parts
.installed.cfg
build
dist
eggs
Added: Sandbox/poster/zc.relation/trunk/README.txt
===================================================================
--- Sandbox/poster/zc.relation/trunk/README.txt (rev 0)
+++ Sandbox/poster/zc.relation/trunk/README.txt 2007-08-03 01:34:35 UTC (rev 78553)
@@ -0,0 +1 @@
+Index intransitive and transitive n-ary relationships.
Property changes on: Sandbox/poster/zc.relation/trunk/README.txt
___________________________________________________________________
Name: svn:eol-style
+ native
Added: Sandbox/poster/zc.relation/trunk/bootstrap.py
===================================================================
--- Sandbox/poster/zc.relation/trunk/bootstrap.py (rev 0)
+++ Sandbox/poster/zc.relation/trunk/bootstrap.py 2007-08-03 01:34:35 UTC (rev 78553)
@@ -0,0 +1,52 @@
+##############################################################################
+#
+# Copyright (c) 2006 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+"""Bootstrap a buildout-based project
+
+Simply run this script in a directory containing a buildout.cfg.
+The script accepts buildout command-line options, so you can
+use the -c option to specify an alternate configuration file.
+
+$Id: bootstrap.py 69908 2006-08-31 21:53:00Z jim $
+"""
+
+import os, shutil, sys, tempfile, urllib2
+
+tmpeggs = tempfile.mkdtemp()
+
+ez = {}
+exec urllib2.urlopen('http://peak.telecommunity.com/dist/ez_setup.py'
+ ).read() in ez
+ez['use_setuptools'](to_dir=tmpeggs, download_delay=0)
+
+import pkg_resources
+
+cmd = 'from setuptools.command.easy_install import main; main()'
+if sys.platform == 'win32':
+ cmd = '"%s"' % cmd # work around spawn lamosity on windows
+
+ws = pkg_resources.working_set
+assert os.spawnle(
+ os.P_WAIT, sys.executable, sys.executable,
+ '-c', cmd, '-mqNxd', tmpeggs, 'zc.buildout',
+ dict(os.environ,
+ PYTHONPATH=
+ ws.find(pkg_resources.Requirement.parse('setuptools')).location
+ ),
+ ) == 0
+
+ws.add_entry(tmpeggs)
+ws.require('zc.buildout')
+import zc.buildout.buildout
+zc.buildout.buildout.main(sys.argv[1:] + ['bootstrap'])
+shutil.rmtree(tmpeggs)
Added: Sandbox/poster/zc.relation/trunk/buildout.cfg
===================================================================
--- Sandbox/poster/zc.relation/trunk/buildout.cfg (rev 0)
+++ Sandbox/poster/zc.relation/trunk/buildout.cfg 2007-08-03 01:34:35 UTC (rev 78553)
@@ -0,0 +1,15 @@
+[buildout]
+develop = .
+parts = test py
+
+find-links = http://download.zope.org/distribution/
+
+[test]
+recipe = zc.recipe.testrunner
+eggs = zc.relation
+defaults = "--tests-pattern [fn]?tests --exit-with-status".split()
+
+[py]
+recipe = zc.recipe.egg
+eggs = zc.relation
+interpreter = py
Property changes on: Sandbox/poster/zc.relation/trunk/buildout.cfg
___________________________________________________________________
Name: svn:eol-style
+ native
Added: Sandbox/poster/zc.relation/trunk/setup.py
===================================================================
--- Sandbox/poster/zc.relation/trunk/setup.py (rev 0)
+++ Sandbox/poster/zc.relation/trunk/setup.py 2007-08-03 01:34:35 UTC (rev 78553)
@@ -0,0 +1,29 @@
+from setuptools import setup, find_packages
+
+setup(
+ name="zc.relation",
+ version="1.0",
+ packages=find_packages('src'),
+ include_package_data=True,
+ package_dir= {'':'src'},
+
+ namespace_packages=['zc'],
+
+ zip_safe=False,
+ author='Zope Project',
+ author_email='zope3-dev at zope.org',
+ description=open("README.txt").read(),
+ long_description=(
+ open('src/zc/relation/CHANGES.txt').read() +
+ '\n========\nOverview\n========\n\n' +
+ open("src/zc/relation/README.txt").read()),
+ license='ZPL 2.1',
+ keywords="zope zope3",
+ install_requires=[
+ 'ZODB3 >= 3.8dev',
+ 'zope.interface',
+ 'setuptools',
+
+ 'zope.testing',
+ ],
+ )
Added: Sandbox/poster/zc.relation/trunk/src/zc/__init__.py
===================================================================
--- Sandbox/poster/zc.relation/trunk/src/zc/__init__.py (rev 0)
+++ Sandbox/poster/zc.relation/trunk/src/zc/__init__.py 2007-08-03 01:34:35 UTC (rev 78553)
@@ -0,0 +1,7 @@
+# this is a namespace package
+try:
+ import pkg_resources
+ pkg_resources.declare_namespace(__name__)
+except ImportError:
+ import pkgutil
+ __path__ = pkgutil.extend_path(__path__, __name__)
Added: Sandbox/poster/zc.relation/trunk/src/zc/relation/CHANGES.txt
===================================================================
--- Sandbox/poster/zc.relation/trunk/src/zc/relation/CHANGES.txt (rev 0)
+++ Sandbox/poster/zc.relation/trunk/src/zc/relation/CHANGES.txt 2007-08-03 01:34:35 UTC (rev 78553)
@@ -0,0 +1,154 @@
+=======
+Changes
+=======
+
+1.0
+===
+
+NOTE: The following is science fiction in the zc.relationship support so far.
+The change is upcoming and will be part of zc.relationship 2.0 (which is in
+fact intended to be essentially-to-completely backward compatible with
+zc.relationship 1.x).
+
+(dev version; supports Zope 3.4/Zope 2.11/ZODB 3.8)
+
+This is the initial release of the zc.relation package. However, it
+represents a refactoring of another package, zc.relationship. This
+package contains only a modified version of the relation(ship) index,
+now called a catalog. The refactored version of zc.relationship index
+relies on (subclasses) this catalog. zc.relationship also maintains a
+backwards-compatible subclass.
+
+This package only relies on the ZODB, zope.interface, and zope.testing
+software, and can be used inside or outside of a standard ZODB database.
+The software does have to be there, though (the package relies heavily
+on the ZODB BTrees package).
+
+If you would like to switch a legacy zc.relationship index to a
+zc.relation catalog, try this trick in your generations script.
+Assuming the old index is `old`, the following line should create
+a new zc.relation catalog with your legacy data:
+
+ >>> new = old.copy(zc.relation.Catalog)
+
+Why is the same basic data structure called a catalog now? Because I
+exposed the ability to mutate the data structure, and what you are really
+adding and removing are indexes. It didn't make sense to put an index in
+an index, but it does make sense to put an index in a catalog. Thus, a
+name change was born.
+
+The catalog in this package has several incompatibilities from the earlier
+zc.relationship index, and many new features. The zc.relationship package
+maintains a backwards-compatible subclass. The following discussion
+compares the zc.relation catalog with the zc.relationship 1.x index.
+
+Incompatibilities with zc.relationship 1.x index
+------------------------------------------------
+
+The two big changes are that method names now refer to `Relation` rather
+than `Relationship`; and the catalog is instantiated slightly differently
+from the index. A few other changes are worth your attention. The
+following list attempts to highlight all incompatibilities.
+
+:Big incompatibilities:
+
+ - `findRelationshipTokenSet` and `findValueTokenSet` are renamed, with
+ some slightly different semantics, as `getRelationTokens` and
+ `getValueTokens`. The exact same result as
+ `findRelationTokenSet(query)` can be obtained with
+ `findRelationTokens(query, 1)` (where 1 is maxDepth). The same
+ result as `findValueTokenSet(reltoken, name)` can be obtained with
+ `findValueTokens(name, {None: reltoken}, 1)`.
+
+ - `findRelations` replaces `findRelatonships`. The new method will use
+ the defaultTransitiveQueriesFactory if it is set and maxDepth is not 1.
+ It shares the call signature of `findRelationChains`.
+
+ - `isLinked` is now `canFind`.
+
+ - The catalog instantiation arguments have changed from the old index.
+
+ * `loadRel` and `dumpRel` are now required arguments for instantiation.
+
+ * The only other optional arguments are relFamily and family.
+ Specify what elements to index with `addValueIndex`
+
+ * Note also that `addValueIndex` defaults to no load and dump function,
+ unlike the old instantiation options.
+
+ - query factories are different. See IQueryFactory in the interfaces.
+
+ * they first get (query, catalog, cache) and then return a getQueries
+ callable that gets relchains and yields queries; OR None if they
+ don't match.
+
+ * They must also handle an empty relchain. Typically this should
+ return the original query, but may also be used to mutate the
+ original query.
+
+ * They are no longer thought of as transitive query factories, but as
+ general query mutators.
+
+:Medium:
+
+ - The catalog no longer inherits from
+ zope.app.container.contained.Contained.
+
+ - The index requires ZODB 3.8 or higher.
+
+:Small:
+
+ - `deactivateSets` is no longer an instantiation option (it was broken
+ because of a ZODB bug anyway, as had been described in the
+ documentation).
+
+Changes and new features
+------------------------
+
+- The catalog now offers the ability to index certain
+ searches. The indexes must be explicitly instantiated and registered
+ you want to optimize. This can be used when searching for values, when
+ searching for relations, or when determining if two objects are
+ linked. It cannot be used for relation chains. Requesting an index
+ has the usual trade-offs of greater storage space and slower write
+ speed for faster search speed. Registering a search index is done
+ after instantiation time; you can iteratate over the current settings
+ used, and remove them. (The code path expects to support legacy
+ zc.relationship index instances for all of these APIs.)
+
+- You can now specify new values after the catalog has been created, iterate
+ over the settings used, and remove values.
+
+- The catalog has a copy method, to quickly make new copies without actually
+ having to reindex the relations.
+
+- query arguments can now specify multiple values for a given name by
+ using zc.relation.catalog.any(1, 2, 3, 4) or
+ zc.relation.catalog.Any((1, 2, 3, 4)).
+
+- The catalog supports specifying indexed values by passing callables rather
+ than interface elements (which are also still supported).
+
+- `findRelations` and new method `findRelationTokens` can find
+ relations transitively and intransitively. `findRelationTokens`
+ when used intransitively repeats the legacy zc.relationship index
+ behavior of `findRelationTokenSet`.
+ (`findRelationTokenSet` remains in the API, not deprecated, a companion
+ to `findValueTokenSet`.)
+
+- in findValues and findValueTokens, `query` argument is now optional. If
+ the query evaluates to False in a boolean context, all values, or value
+ tokens, are returned. Value tokens are explicitly returned using the
+ underlying BTree storage. This can then be used directly for other BTree
+ operations.
+
+- Completely new docs. Unfortunately, still really not good enough.
+
+- The package has drastically reduced direct dependecies from zc.relationship:
+ it is now more clearly a ZODB tool, with no other Zope dependencies than
+ zope.testing and zope.interface.
+
+- Listeners allow objects to listen to messages from the catalog (which can
+ be used directly or, for instance, to fire off events).
+
+- You can provide
Property changes on: Sandbox/poster/zc.relation/trunk/src/zc/relation/CHANGES.txt
___________________________________________________________________
Name: svn:eol-style
+ native
Added: Sandbox/poster/zc.relation/trunk/src/zc/relation/README.txt
===================================================================
--- Sandbox/poster/zc.relation/trunk/src/zc/relation/README.txt (rev 0)
+++ Sandbox/poster/zc.relation/trunk/src/zc/relation/README.txt 2007-08-03 01:34:35 UTC (rev 78553)
@@ -0,0 +1,1045 @@
+================
+Relation Catalog
+================
+
+.. contents::
+ :local:
+
+Overview
+========
+
+The relation catalog can be used to optimize intransitive and transitive
+searches for N-ary relations of finite, preset dimensions; can be used
+in the ZODB or standalone (though it uses ZODB classes--BTrees and
+persistent.Persistent--so the ZODB software must be present); and can be
+used with variable definitions of transitive behavior. It is a generic,
+relatively policy-free tool. It is expected to be used primarily as an
+engine for more specialized and constrained tools and APIs. Three such
+tools are zc.relationship containers, plone.relations containers, and
+zc.vault. The documents in the package, including this one, describe
+other possible uses.
+
+The relation catalog uses the model that relations are full-fledged
+objects that are indexed for optimized searches. It takes a very
+precise view of the world: instantiation requires multiple arguments
+specifying the configuration; and using the catalog requires that you
+acknowledge that the relations and their associated indexed values are
+usually tokenized within the catalog, via tokenizers and resolvers you
+specify. This precision trades some ease-of-use for the possibility of
+flexibility, power, and efficiency. That said, the catalog's API is
+intended to be consistent, and to largely adhere to "there's only one
+way to do it".
+
+Setting Up a Relation Catalog
+=============================
+
+Creating the Catalog
+--------------------
+
+Imagine a two way relation from one value to another. Let's say that we
+are modeling a relation of people to their supervisors: an employee may
+have a single supervisor. For this first example, the relation between
+employee and supervisor will be intrinsic: the employee has a pointer to
+the supervisor, and the employee object itself represents the relation.
+
+Let's say further, for simplicity, that employee names are unique and
+can be used to represent employees. We can use names as our "tokens".
+Tokens are similar to the primary key in a relational database, or in
+intid or keyreference in Zope 3--some way to uniquely identify an
+object, which sorts reliably and can be resolved to the object given the
+right context. For speed, integers make the best tokens; followed by other
+immutables like strings; followed by non-persistent objects; followed by
+persistent objects.
+
+Here is our toy `Employee` example class.
+
+ >>> employees = {} # we'll use this to resolve the "name" tokens
+ >>> class Employee(object):
+ ... def __init__(self, name, supervisor=None):
+ ... if name in employees:
+ ... raise ValueError('employee with same name already exists')
+ ... self.name = name # expect this to be readonly
+ ... self.supervisor = supervisor
+ ... employees[name] = self
+ ... # the next parts just make the tests prettier
+ ... def __repr__(self):
+ ... return '<Employee instance "' + self.name + '">'
+ ... def __cmp__(self, other):
+ ... # pukes if other doesn't have name
+ ... return cmp(self.name, other.name)
+ ...
+
+So, we need to define how to turn employees into their tokens.
+
+The function to tokenize, or "dump", gets the object to be tokenized;
+and, because it helps sometimes to provide context, the catalog; and a
+dictionary that will be shared for a given search. The dictionary can
+be used as a cache for optimizations (for instance, to stash a utility
+that you looked up).
+
+For this example, our function is trivial: we said the token would be
+the employee's name.
+
+ >>> def dumpEmployees(emp, catalog, cache):
+ ... return emp.name
+ ...
+
+If you store the relation catalog persistently (e.g., in the ZODB) be aware
+that the callables you provide must be picklable--a module-level function,
+for instance.
+
+We also need a way to turn tokens into employees, or "load".
+
+The "load" functions get the token to be resolved; the catalog, for
+context; and a dict cache, for optimizations of subsequent calls.
+
+You might have noticed in our Employee __init__ that we keep a mapping
+of name to object in the `employees` global dict (defined right above
+the class definition). We'll use that for resolving the tokens.
+
+ >>> def loadEmployees(token, catalog, cache):
+ ... return employees[token]
+ ...
+
+Now we know enough to get started with a catalog. We'll instantiate it
+by specifying how to tokenize relations, and what kind of BTree modules
+should be used to hold the tokens.
+
+ >>> import zc.relation.catalog
+ >>> import BTrees
+ >>> catalog = zc.relation.catalog.Catalog(dumpEmployees, loadEmployees,
+ ... relFamily=BTrees.family32.OI)
+
+[#verifyObjectICatalog]_ [#legacy]_ We can't do very much searching with it so far though,
+because the catalog doesn't have any indexes. In this example, the
+relationship itself represents the employee, so we won't need to index
+that separately. But we do need a way to tell the catalog how to find
+the other end of the relationship, the supervisor.
+
+You can specify this to the catalog with a zope.interface attribute or
+method, or with a callable. We'll use a callable for now. It takes the
+indexed relationship and the catalog for context.
+
+ >>> def supervisor(emp, catalog):
+ ... return emp.supervisor # None or another employee
+ ...
+
+Then we'll use that to tell the catalog to add an index for
+`supervisor`. We'll also specify how to tokenize and store those
+values: in this case, the same way as the relations themselves. We
+could also specify the name to call the index, but it will default to
+the __name__ of the function (or interface element), which will work
+just fine for us now.
+
+ >>> catalog.addValueIndex(supervisor, dumpEmployees, loadEmployees,
+ ... btree=BTrees.family32.OI)
+
+Now we have an index [#addValueIndexExceptions]_.
+
+ >>> [info['name'] for info in catalog.iterValueIndexInfo()]
+ ['supervisor']
+
+Adding Relations
+----------------
+
+Now let's create a few employees. All but one will have supervisors.
+If you recall our toy Employee class, the first argument to the
+constructor is the employee name (and therefore the token), and the
+optional second argument is the supervisor.
+
+ >>> a = Employee('Alice')
+ >>> b = Employee('Betty', a)
+ >>> c = Employee('Chuck', a)
+ >>> d = Employee('Diane', b)
+ >>> e = Employee('Edgar', b)
+ >>> f = Employee('Frank', c)
+ >>> g = Employee('Galyn', c)
+ >>> h = Employee('Howie', d)
+
+Here is a diagram of the hierarchy.
+
+::
+
+ Alice
+ __/ \__
+ Betty Chuck
+ / \ / \
+ Diane Edgar Frank Galyn
+ |
+ Howie
+
+Let's tell the catalog about the relations.
+
+ >>> for emp in (a,b,c,d,e,f,g,h):
+ ... catalog.index(emp)
+ ...
+
+Searching
+=========
+
+Queries, `findRelations`, and special query values
+--------------------------------------------------
+
+So who works for Alice? That means we want to get the relations--the
+employees--with a `supervisor` of Alice.
+
+The heart of a question to the catalog is a query. A query is spelled
+as a dictionary. The main idea is simply that keys in a dictionary
+specify index names, and the values specify the constraints.
+
+The values in a query are always expressed with tokens. The catalog has
+several helpers to make this less onerous, but for now let's take
+advantage of the fact that our tokens are easily comprehensible.
+
+ >>> sorted(catalog.findRelations({'supervisor': 'Alice'}))
+ [<Employee instance "Betty">, <Employee instance "Chuck">]
+
+Alice is the direct (intransitive) boss of Betty and Chuck.
+
+What if you want to ask "who doesn't report to anyone?" Then you want to
+ask for a relation in which the supervisor is None.
+
+ >>> list(catalog.findRelations({'supervisor': None}))
+ [<Employee instance "Alice">]
+
+Alice is the only employee who doesn't report to anyone.
+
+What if you want to ask "who reports to Diane or Chuck?" Then you use the
+zc.relation `Any` class or `any` function to pass the multiple values.
+
+ >>> sorted(catalog.findRelations(
+ ... {'supervisor': zc.relation.catalog.any('Diane', 'Chuck')}))
+ ... # doctest: +NORMALIZE_WHITESPACE
+ [<Employee instance "Frank">, <Employee instance "Galyn">,
+ <Employee instance "Howie">]
+
+Frank, Galyn, and Howie each report to either Diane or Chuck.[#any]_
+
+It is worth a quick mention here that the catalog always has parallel
+search methods, one for finding objects, as seen above, and one for
+finding tokens. Finding tokens can be much more efficient, especially if
+the result from the relation catalog is just one step along the
+path of finding your desired result. But finding objects is simpler for
+some common cases. Here's a quick example of the same query, getting
+tokens rather than objects.
+
+ >>> sorted(catalog.findRelationTokens({'supervisor': 'Alice'}))
+ ['Betty', 'Chuck']
+ >>> sorted(catalog.findRelationTokens({'supervisor': None}))
+ ['Alice']
+ >>> sorted(catalog.findRelationTokens(
+ ... {'supervisor': zc.relation.catalog.any('Diane', 'Chuck')}))
+ ['Frank', 'Galyn', 'Howie']
+
+`findValues` and the `None` query key
+-------------------------------------
+
+So how do we find who an employee's supervisor is? Well, in this case,
+look at the attribute on the employee! If you can use an attribute that
+will usually be a win in the ZODB.
+
+ >>> h.supervisor
+ <Employee instance "Diane">
+
+Again, as we mentioned at the start of this first example, the knowledge
+of a supervisor is "intrinsic" to the employee instance. It is
+possible, and even easy, to ask the catalog this kind of question, but
+the catalog syntax is more geared to "extrinsic" relations, such as the
+one from the supervisor to the employee: the connection between a
+supervisor object and its employees is extrinsic to the supervisor, so
+you actually might want a catalog to find it!
+
+However, we will explore the syntax very briefly, because it introduces an
+important pair of search methods, and because it is a a stepping stone
+to our first transitive search.
+
+So, o relaton catalog, who is Howie's supervisor?
+
+To ask this question we want to get the indexed values off of the
+relations: `findValues`. In its simplest form, the arguments are the
+index name of the values you want, and a query to find the relationships
+that have the desired values.
+
+What about the query? Above, we noted that the keys in a query are the
+names of the indexes to search. However, in this case, we don't want to
+search one or more indexes for matching relationships, as usual, but
+actually specify a relationship: Howie.
+
+We do not have an index name. The key, then, is `None`: no index name,
+just relationship tokens. For our current example, that would mean the
+query is `{None: 'Howie'}`.
+
+ >>> list(catalog.findValues('supervisor', {None: 'Howie'}))[0]
+ <Employee instance "Diane">
+
+Congratulations, you just found an obfuscated and comparitively
+inefficient way to write `howie.supervisor`! [#intrinsic_search]_
+[#findValuesExceptions]_
+
+Slightly more usefully, you can use other query keys along with None.
+
+ >>> sorted(catalog.findRelationTokens(
+ ... {None: zc.relation.catalog.any('Betty', 'Alice', 'Chuck'),
+ ... 'supervisor': 'Alice'}))
+ ['Betty', 'Chuck']
+
+Transitive Searching, Query Factories, and `maxDepth`
+----------------------------------------------------------------
+
+What about transitive searching? Well, you need to tell the catalog how to
+walk the tree. In simple (and very common) cases like this, the
+zc.relation.queryfactory.TransposingTransitive will do the trick.
+
+A transitive query factory is just a callable that the catalog uses to
+ask "I got this query, and here are the results I found. I'm supposed to
+walk another step transitively, so what query should I search for next?"
+Writing a factory is more complex than we want to talk about right now,
+but using the TransposingTransitiveQueryFactory is easy. You just tell
+it the two query names it should transpose for walking in either
+direction.
+
+For instance, here we just want to tell the factory to transpose the two
+keys we've used, None and 'supervisor'. Let's make a factory, use it
+in a query for a couple of transitive searches, and then, if you want,
+you can read through a footnote to talk through what is happening.
+
+Here's the factory.
+
+ >>> import zc.relation.queryfactory
+ >>> factory = zc.relation.queryfactory.TransposingTransitive(
+ ... None, 'supervisor')
+
+Now `factory` is just a callable. Let's let it help answer a couple of
+questions.
+
+Who are all of Howie's supervisors transitively (this looks up in the
+diagram)?
+
+ >>> list(catalog.findValues('supervisor', {None: 'Howie'},
+ ... queryFactory=factory))
+ ... # doctest: +NORMALIZE_WHITESPACE
+ [<Employee instance "Diane">, <Employee instance "Betty">,
+ <Employee instance "Alice">]
+
+Who are all of the people Betty supervises transitively, breadth first (this
+looks down in the diagram)?
+
+ >>> people = list(catalog.findRelations(
+ ... {'supervisor': 'Betty'}, queryFactory=factory))
+ >>> sorted(people[:2])
+ [<Employee instance "Diane">, <Employee instance "Edgar">]
+ >>> people[2]
+ <Employee instance "Howie">
+
+Yup, that looks right. So how did that work? If you care, read this
+footnote. [#I_care]_
+
+This transitive factory is really the only transitive factory you would
+want for this particular catalog, so it probably is safe to wire it in
+as a default. You can add multiple query factories to match different
+queries using `addDefaultQueryFactory`.
+
+ >>> catalog.addDefaultQueryFactory(factory)
+
+Now all searches are transitive by default.
+
+ >>> list(catalog.findValues('supervisor', {None: 'Howie'}))
+ ... # doctest: +NORMALIZE_WHITESPACE
+ [<Employee instance "Diane">, <Employee instance "Betty">,
+ <Employee instance "Alice">]
+ >>> people = list(catalog.findRelations({'supervisor': 'Betty'}))
+ >>> sorted(people[:2])
+ [<Employee instance "Diane">, <Employee instance "Edgar">]
+ >>> people[2]
+ <Employee instance "Howie">
+
+We can force a non-transitive search, or a specific search depth, with
+maxDepth [#needs_a_transitive_queries_factory]_.
+
+ >>> list(catalog.findValues('supervisor', {None: 'Howie'}, maxDepth=1))
+ [<Employee instance "Diane">]
+ >>> sorted(catalog.findRelations({'supervisor': 'Betty'}, maxDepth=1))
+ [<Employee instance "Diane">, <Employee instance "Edgar">]
+
+[#maxDepthExceptions]_ We'll introduce some other available search
+arguments later in this document and in other documents. It's important
+to note that *all search methods share the same arguments as
+`findRelations`*. `findValues` and `findValueTokens` only add the
+initial argument of specifying the desired value.
+
+We've looked at two search methods so far: the `findValues` and
+`findRelations` methods help you ask what is related. But what if you
+want to know *how* things are transitively related?
+
+`findRelationshipChains` and `targetQuery`
+------------------------------------------
+
+Another search method, `findRelationChains`, helps you discover how
+things are transitively related.
+
+The method name says "find relation chains". But what is a "relation
+chain"? In this API, it is a transitive path of relations. For
+instance, what's the chain of command above Howie? `findRelationChains`
+will return each unique path.
+
+ >>> list(catalog.findRelationChains({None: 'Howie'}))
+ ... # doctest: +NORMALIZE_WHITESPACE
+ [(<Employee instance "Howie">,),
+ (<Employee instance "Howie">, <Employee instance "Diane">),
+ (<Employee instance "Howie">, <Employee instance "Diane">,
+ <Employee instance "Betty">),
+ (<Employee instance "Howie">, <Employee instance "Diane">,
+ <Employee instance "Betty">, <Employee instance "Alice">)]
+
+Look at that result carefully. Notice that the result is an iterable of
+tuples. Each tuple is a unique chain, which may be a part of a
+subsequent chain. In this case, the last chain is the longest and the
+most comprehensive.
+
+What if we wanted to see all the paths from Alice? That will be one
+chain for each supervised employee, because it shows all possible paths.
+
+ >>> sorted(catalog.findRelationChains(
+ ... {'supervisor': 'Alice'}))
+ ... # doctest: +NORMALIZE_WHITESPACE
+ [(<Employee instance "Betty">,),
+ (<Employee instance "Betty">, <Employee instance "Diane">),
+ (<Employee instance "Betty">, <Employee instance "Diane">,
+ <Employee instance "Howie">),
+ (<Employee instance "Betty">, <Employee instance "Edgar">),
+ (<Employee instance "Chuck">,),
+ (<Employee instance "Chuck">, <Employee instance "Frank">),
+ (<Employee instance "Chuck">, <Employee instance "Galyn">)]
+
+That's all the paths--all the chains--from Alice. We sorted the results,
+but normaly they would be breadth first.
+
+But what if we wanted to just find the paths from one query result to
+another query result--say, we wanted to know the chain of command from Alice
+down to Howie? Then we can specify a `targetQuery` that specifies the
+characteristics of our desired end point (or points).
+
+ >>> list(catalog.findRelationChains(
+ ... {'supervisor': 'Alice'}, targetQuery={None: 'Howie'}))
+ ... # doctest: +NORMALIZE_WHITESPACE
+ [(<Employee instance "Betty">, <Employee instance "Diane">,
+ <Employee instance "Howie">)]
+
+So, Betty supervises Diane, who supervises Howie.
+
+Note that `targetQuery` now joins `maxDepth` in our collection of shared
+search arguments that we have introduced.
+
+`filter` and `targetFilter`
+---------------------------
+
+We can take a quick look now at the last of the two shared search arguments:
+filter and targetFilter. These two are similar in that they both are
+callables that can approve or reject given relations in a search based on
+whatever logic you can code. They differ in that `filter` stops any further
+transitive searches from the relation, while `targetFilter` merely omits the
+given result but allows further search from it. Like `targetQuery`, then,
+`targetFilter` is good when you want to specify the other end of a path.
+
+As an example, let's say we only want to return female employees.
+
+ >>> female_employees = ('Alice', 'Betty', 'Diane', 'Galyn')
+ >>> def female_filter(relchain, query, catalog, cache):
+ ... return relchain[-1] in female_employees
+ ...
+
+Here are all the female employees supervised by Alice transitively, using
+`targetFilter`.
+
+ >>> list(catalog.findRelations({'supervisor': 'Alice'},
+ ... targetFilter=female_filter))
+ ... # doctest: +NORMALIZE_WHITESPACE
+ [<Employee instance "Betty">, <Employee instance "Diane">,
+ <Employee instance "Galyn">]
+
+Here are all the female employees supervised by Chuck.
+
+ >>> list(catalog.findRelations({'supervisor': 'Chuck'},
+ ... targetFilter=female_filter))
+ [<Employee instance "Galyn">]
+
+The same method used as a filter will only return females directly
+supervised by other females--not Galyn, in this case.
+
+ >>> list(catalog.findRelations({'supervisor': 'Alice'},
+ ... filter=female_filter))
+ [<Employee instance "Betty">, <Employee instance "Diane">]
+
+These can be combined with one another, and with the other search
+arguments [#filter]_.
+
+Search indexes
+--------------
+
+Without setting up any additional indexes, the transitive behavior of
+the `findRelations` and `findValues` methods essentially relies on the
+brute force searches of `findRelationChains`. Results are iterables
+that are gradually computed. For instance, let's repeat the question
+"Whom does Betty supervise?". Notice that `res` first populates a list
+with three members, but then does not populate a second list. The
+iterator has been exhausted.
+
+ >>> res = catalog.findRelationTokens({'supervisor': 'Betty'})
+ >>> unindexed = sorted(res)
+ >>> len(unindexed)
+ 3
+ >>> len(list(res)) # iterator is exhausted
+ 0
+
+The brute force of this aproach can be sufficient in many cases, but
+sometimes speed for these searches is critical. In these cases, you can
+add a "search index". A search index speeds up the result of one or
+more precise searches by indexing the results. Search indexes can
+affect the results of searches with a queryFactory in `findRelations`,
+`findValues`, and the soon-to-be-introduced `canFind`, but they do not
+affect `findRelationChains`.
+
+The zc.relation package currently includes two kinds of search indexes,
+one for indexing relation searches and one for indexing value searches.
+They only help for searches with an unlimited maxDepth (the default).
+Other search index implementations and approaches may be added in the
+future.
+
+Here's a very brief example of adding a search index for the transitive
+searches seen above that specify a 'supervisor'.
+
+ >>> import zc.relation.searchindex
+ >>> catalog.addSearchIndex(
+ ... zc.relation.searchindex.TransposingTransitive(
+ ... 'supervisor', None))
+
+The `None` describes how to walk back up the chain. Search indexes are
+explained in reasonable detail in searchindex.txt.
+
+Now that we have added the index, we can search again. The result this
+time is already computed, so, at least when you ask for tokens, it
+is repeatable.
+
+ >>> res = catalog.findRelationTokens({'supervisor': 'Betty'})
+ >>> len(list(res))
+ 3
+ >>> len(list(res))
+ 3
+ >>> sorted(res) == unindexed
+ True
+
+Note that the breadth-first sorting is lost when an index is used [#updates]_.
+
+Transitive cycles (and updating and removing relations)
+-------------------------------------------------------
+
+The transitive searches and the provided search indexes can handle
+cycles. Cycles are less likely in the current example than some others,
+but we can stretch the case a bit: imagine a "king in disguise", in
+which someone at the top works lower in the hierarchy. Perhaps Alice
+works for Zane, who works for Betty, who works for Alice. Artificial,
+but easy enough to draw::
+
+ ______
+ / \
+ / Zane
+ / |
+ / Alice
+ / __/ \__
+ / Betty__ Chuck
+ \-/ / \ / \
+ Diane Edgar Frank Galyn
+ |
+ Howie
+
+Easy to create too.
+
+ >>> z = Employee('Zane', b)
+ >>> a.supervisor = z
+
+Now we have a cycle. Of course, we have not yet told the catalog about it.
+`index` can be used both to reindex Alice and index Zane.
+
+ >>> catalog.index(a)
+ >>> catalog.index(z)
+
+Now, if we ask who works for Betty, we get the entire tree. (We'll ask
+for tokens, just so that the result is smaller to look at.) [#same_set]_
+
+ >>> sorted(catalog.findRelationTokens({'supervisor': 'Betty'}))
+ ... # doctest: +NORMALIZE_WHITESPACE
+ ['Alice', 'Betty', 'Chuck', 'Diane', 'Edgar', 'Frank', 'Galyn', 'Howie',
+ 'Zane']
+
+If we ask for the supervisors of Frank, it will include Betty.
+
+ >>> list(catalog.findValueTokens('supervisor', {None: 'Frank'}))
+ ['Chuck', 'Alice', 'Zane', 'Betty']
+
+Paths returned by `findRelationChains` are marked with special interfaces, and
+special metadata, to show the chain.
+
+ >>> res = list(catalog.findRelationChains({None: 'Frank'}))
+ >>> len(res)
+ 5
+ >>> import zc.relation.interfaces
+ >>> [zc.relation.interfaces.ICircularRelationPath.providedBy(r)
+ ... for r in res]
+ [False, False, False, False, True]
+
+Here's the last chain:
+
+ >>> res[-1] # doctest: +NORMALIZE_WHITESPACE
+ cycle(<Employee instance "Frank">, <Employee instance "Chuck">,
+ <Employee instance "Alice">, <Employee instance "Zane">,
+ <Employee instance "Betty">)
+
+The chain's 'cycled' attribute has a list of queries that create a cycle.
+If you run the query, or queries, you see where the cycle would
+restart--where the path would have started to overlap. Sometimes the query
+results will include multiple cycles, and some paths that are not cycles.
+In this case, there's only a single cycled query, which results in a single
+cycled relation.
+
+ >>> len(res[4].cycled)
+ 1
+
+ >>> list(catalog.findRelations(res[4].cycled[0], maxDepth=1))
+ [<Employee instance "Alice">]
+
+
+To remove this craziness [#reverse_lookup]_, we can unindex Zane, and change
+and reindex Alice.
+
+ >>> a.supervisor = None
+ >>> catalog.index(a)
+
+ >>> list(catalog.findValueTokens('supervisor', {None: 'Frank'}))
+ ['Chuck', 'Alice']
+
+ >>> catalog.unindex(z)
+
+ >>> sorted(catalog.findRelationTokens({'supervisor': 'Betty'}))
+ ['Diane', 'Edgar', 'Howie']
+
+`canFind`
+---------
+
+We're to the last search method: `canFind`. We've gotten values and
+relations, but what if you simply want to know if there is any
+connection at all? For instance, is Alice a supervisor of Howie? Is
+Chuck? To answer these questions, you can use the `canFind` method
+combined with the `targetQuery` search argument.
+
+The `canFind` method takes the same arguments as findRelations. However,
+it simply returns a boolean about whether the search has any results. This
+is a convenience that also allows some extra optimizations.
+
+Does Betty supervise anyone?
+
+ >>> catalog.canFind({'supervisor': 'Betty'})
+ True
+
+What about Howie?
+
+ >>> catalog.canFind({'supervisor': 'Howie'})
+ False
+
+What about...Zane (not an employee)?
+
+ >>> catalog.canFind({'supervisor': 'Zane'})
+ False
+
+If we want to know if Alice or Chuck supervise Howie, then we want to specify
+characteristics of two points on a path. To ask a question about the other
+end of a path, use `targetQuery`.
+
+Is Alice a supervisor of Howie?
+
+ >>> catalog.canFind({'supervisor': 'Alice'}, targetQuery={None: 'Howie'})
+ True
+
+Is Chuck a supervisor of Howie?
+
+ >>> catalog.canFind({'supervisor': 'Chuck'}, targetQuery={None: 'Howie'})
+ False
+
+Is Howie Alice's employee?
+
+ >>> catalog.canFind({None: 'Howie'}, targetQuery={'supervisor': 'Alice'})
+ True
+
+Is Howie Chuck's employee?
+
+ >>> catalog.canFind({None: 'Howie'}, targetQuery={'supervisor': 'Chuck'})
+ False
+
+(Note that, if your relations describe a hierarchy, searching up a
+hierarchy is usually more efficient, so the second pair of questions is
+generally preferable to the first in that case. However, if you install
+a search index for either direction of the query, `canFind` can use
+it efficiently either way)
+
+Conclusion
+==========
+
+Review
+------
+
+That brings us to the end of our introductory example. Let's review, and
+then look at where you can go from here.
+
+- The relation catalog indexes relations. The relations can be one-way,
+ as we've seen here, with the employee relation pointing to the supervisor.
+ They can also be two-way, three-way, or N-way, as long as you tell the
+ catalog to index the different values.
+
+- Relations and their values are stored as tokens. Integers are the most
+ efficient tokens, but others can work find too. The index has methods to
+ help you work with tokens, but we did not explore them here.
+
+- Relations are indexed with `index`. We didn't look at this, but relations
+ do not have to have all indexed values, which means they can be a
+ heterogeneous set of relations, allowing indexing of interesting data
+ structures.
+
+- You add value indexes to relation catalogs to be able to search. Values
+ can be identified with callables (which we saw) or interface elements
+ (which we did not see).
+
+- As we've seen here, relations are assumed to be between single values.
+ However, they do not have to be, as can be seen elsewhere (hint: use
+ the `multiple` argument in `addValueIndex`).
+
+- You search transitively by using a query factory. The
+ zc.relation.queryfactory.TransposingTransitive is a good common case
+ factory that lets you walk up and down a hierarchy. Query factories can
+ do other tricks too, which we did not see.
+
+- You can set up searches indexes to speed up specific transitive searches.
+
+- We looked at the primary search methods that return objects as opposed to
+ tokens.
+
+ * `findRelations` returns relations that match the search.
+
+ * `findValues` returns values for the relations that match the search.
+
+ * `findRelationChains` returns the transitive paths that match the search.
+
+ * `canFind` returns a boolean about whether anything matches the search.
+
+ We also discussed the fact that users who want to get tokens back from
+ searches can. We did not give much of an example of this. The parallel
+ methods are `findRelationTokens`, `findValueTokens`, and
+ `findRelationTokenChains`.
+
+- Queries are formed with dicts. The keys are the names of the indexes
+you want to search, or, for the special case of precise relationships,
+None. The values are the tokens of the results you want to match; or
+None, indicating relations that have None as a value (or no values, if
+it is a multiple). Search values can use zc.relation.catalog.any or
+zc.relation.catalog.Any to specify multiple (non-None) results to match
+for a given key.
+
+As you can tell by the holes we mentioned in the overview, there's more
+to cover. Hopefully, this will be enough to get your feet wet, though,
+and maybe start to use the catalog.
+
+Next Steps
+----------
+
+If you want to read more, next steps depend on how you like to learn. Here
+are some of the other documents in the zc.relation package.
+
+:tokens.txt:
+ This document explores the details of tokens. All God's chillun
+ love tokens, at least if God's chillun are writing non-toy apps
+ using zc.relation. It includes discussion of the token helpers that
+ the catalog provides, how to use zope.app.intid-like registries with
+ zc.relation, how to use tokens to "join" query results reasonably
+ efficiently, and how to index joins.
+
+:searchindex.txt:
+ Queries factories and search indexes: from basics to nitty gritty details.
+
+:optimization.txt:
+ Best practices for optimizing your use of the relation catalog.
+
+:administration.txt:
+ Managing indexes and listeners.
+
+:interfaces.py:
+ The contract, for nuts and bolts.
+
+Finally, the truly die-hard might also be interested in the timeit
+directory, which holds scripts I ran to test assumptions and learn.
+
+.. ......... ..
+.. FOOTNOTES ..
+.. ......... ..
+
+.. [#verifyObjectICatalog] The catalog provides ICatalog.
+
+ >>> from zope.interface.verify import verifyObject
+ >>> import zc.relation.interfaces
+ >>> verifyObject(zc.relation.interfaces.ICatalog, catalog)
+ True
+
+.. [#legacy] Old instances of zc.relationship indexes, which in the newest
+ version subclass a zc.relationship Catalog, used to have a dict in an
+ internal data structure. We specify that here so that the code that
+ converts the dict to an OOBTree can have a chance to run.
+
+ >>> catalog._attrs = dict(catalog._attrs)
+
+.. [#addValueIndexExceptions] Adding a value index can generate several
+ exceptions.
+
+ You must supply both of dump and load or neither.
+
+ >>> catalog.addValueIndex(supervisor, dumpEmployees, None,
+ ... btree=BTrees.family32.OI, name='supervisor2')
+ Traceback (most recent call last):
+ ...
+ ValueError: either both of 'dump' and 'load' must be None, or neither
+
+ In this example, even if we fix it, we'll get an error, because we have
+ already indexed the supervisor function.
+
+ >>> catalog.addValueIndex(supervisor, dumpEmployees, loadEmployees,
+ ... btree=BTrees.family32.OI, name='supervisor2')
+ ... # doctest: +ELLIPSIS
+ Traceback (most recent call last):
+ ...
+ ValueError: ('element already indexed', <function supervisor at ...>)
+
+ You also can't add a different function under the same name.
+
+ >>> def supervisor2(emp, catalog):
+ ... return emp.supervisor # None or another employee
+ ...
+ >>> catalog.addValueIndex(supervisor2, dumpEmployees, loadEmployees,
+ ... btree=BTrees.family32.OI, name='supervisor')
+ ... # doctest: +ELLIPSIS
+ Traceback (most recent call last):
+ ...
+ ValueError: ('name already used', 'supervisor')
+
+ Finally, if your function does not have a __name__ and you do not provide
+ one, you may not add an index.
+
+ >>> class Supervisor3(object):
+ ... __name__ = None
+ ... def __call__(klass, emp, catalog):
+ ... return emp.supervisor
+ ...
+ >>> supervisor3 = Supervisor3()
+ >>> supervisor3.__name__
+ >>> catalog.addValueIndex(supervisor3, dumpEmployees, loadEmployees,
+ ... btree=BTrees.family32.OI)
+ ... # doctest: +ELLIPSIS
+ Traceback (most recent call last):
+ ...
+ ValueError: no name specified
+
+.. [#any] Any can be compared.
+
+ >>> zc.relation.catalog.any('foo', 'bar', 'baz')
+ <zc.relation.catalog.Any instance ('bar', 'baz', 'foo')>
+ >>> (zc.relation.catalog.any('foo', 'bar', 'baz') ==
+ ... zc.relation.catalog.any('bar', 'foo', 'baz'))
+ True
+ >>> (zc.relation.catalog.any('foo', 'bar', 'baz') !=
+ ... zc.relation.catalog.any('bar', 'foo', 'baz'))
+ False
+ >>> (zc.relation.catalog.any('foo', 'bar', 'baz') ==
+ ... zc.relation.catalog.any('foo', 'baz'))
+ False
+ >>> (zc.relation.catalog.any('foo', 'bar', 'baz') !=
+ ... zc.relation.catalog.any('foo', 'baz'))
+ True
+
+.. [#intrinsic_search] Here's the same with token results.
+
+ >>> list(catalog.findValueTokens('supervisor', {None: 'Howie'}))
+ ['Diane']
+
+ While we're down here in the footnotes, I'll mention that you can
+ search for relations that haven't been indexed.
+
+ >>> list(catalog.findRelationTokens({None: 'Ygritte'}))
+ []
+ >>> list(catalog.findRelations({None: 'Ygritte'}))
+ []
+
+.. [#findValuesExceptions] If you use findValues or findValueTokens and try
+ to specify a value name that is not indexed, you get a ValueError.
+
+ >>> catalog.findValues('foo')
+ Traceback (most recent call last):
+ ...
+ ValueError: ('name not indexed', 'foo')
+
+.. [#I_care] OK, you care about how that query factory worked, so
+ we will look into it a bit. Let's talk through two steps of the
+ transitive search in the second question. The catalog initially
+ performs the initial intransitive search requested: find relations
+ for which Betty is the supervisor. That's Diane and Edgar.
+
+ Now, for each of the results, the catalog asks the query factory for
+ next steps. Let's take Diane. The catalog says to the factory,
+ "Given this query for relations where Betty is supervisor, I got
+ this result of Diane. Do you have any other queries I should try to
+ look further?". The factory also gets the catalog instance so it
+ can use it to answer the question if it needs to.
+
+ OK, the next part is where your brain hurts. Hang on.
+
+ In our case, the factory sees that the query was for supervisor. Its
+ other key, the one it transposes with, is None. *The factory gets
+ the transposing key's result for the current token.* So, for us, a
+ key of None is actually a no-op: the result *is* the current
+ token, Diane. Then, the factory has its answer: replace the old value
+ of supervisor in the query, Betty, with the result, Diane. The next
+ transitive query should be {'supervisor', 'Diane'}. Ta-da.
+
+.. [#needs_a_transitive_queries_factory] A search with a maxDepth > 1 but
+ no queryFactory raises an error.
+
+ >>> catalog.removeDefaultQueryFactory(factory)
+ >>> catalog.findRelationTokens({'supervisor': 'Diane'}, maxDepth=3)
+ Traceback (most recent call last):
+ ...
+ ValueError: if maxDepth not in (None, 1), queryFactory must be available
+
+ >>> catalog.addDefaultQueryFactory(factory)
+
+.. [#maxDepthExceptions] maxDepth must be None or a positive integer, or
+ else you'll get a value error.
+
+ >>> catalog.findRelations({'supervisor': 'Betty'}, maxDepth=0)
+ Traceback (most recent call last):
+ ...
+ ValueError: maxDepth must be None or a positive integer
+
+ >>> catalog.findRelations({'supervisor': 'Betty'}, maxDepth=-1)
+ Traceback (most recent call last):
+ ...
+ ValueError: maxDepth must be None or a positive integer
+
+.. [#filter] For instance:
+
+ >>> list(catalog.findRelationTokens({'supervisor': 'Alice'},
+ ... targetFilter=female_filter,
+ ... targetQuery={None: 'Galyn'}))
+ ['Galyn']
+ >>> list(catalog.findRelationTokens({'supervisor': 'Alice'},
+ ... targetFilter=female_filter,
+ ... targetQuery={None: 'Not known'}))
+ []
+ >>> arbitrary = ['Alice', 'Chuck', 'Betty', 'Galyn']
+ >>> def arbitrary_filter(relchain, query, catalog, cache):
+ ... return relchain[-1] in arbitrary
+ >>> list(catalog.findRelationTokens({'supervisor': 'Alice'},
+ ... filter=arbitrary_filter,
+ ... targetFilter=female_filter))
+ ['Betty', 'Galyn']
+
+
+.. [#updates] The scenario we are looking at in this document shows a case
+ in which special logic in the search index needs to address updates.
+ For example, if we move Howie from Diane
+
+ ::
+
+ Alice
+ __/ \__
+ Betty Chuck
+ / \ / \
+ Diane Edgar Frank Galyn
+ |
+ Howie
+
+ to Galyn
+
+ ::
+
+ Alice
+ __/ \__
+ Betty Chuck
+ / \ / \
+ Diane Edgar Frank Galyn
+ |
+ Howie
+
+ then the search index is correct both for the new location and the old.
+
+ >>> h.supervisor = g
+ >>> catalog.index(h)
+ >>> list(catalog.findRelationTokens({'supervisor': 'Diane'}))
+ []
+ >>> list(catalog.findRelationTokens({'supervisor': 'Betty'}))
+ ['Diane', 'Edgar']
+ >>> list(catalog.findRelationTokens({'supervisor': 'Chuck'}))
+ ['Frank', 'Galyn', 'Howie']
+ >>> list(catalog.findRelationTokens({'supervisor': 'Galyn'}))
+ ['Howie']
+ >>> h.supervisor = d
+ >>> catalog.index(h) # move him back
+ >>> list(catalog.findRelationTokens({'supervisor': 'Galyn'}))
+ []
+ >>> list(catalog.findRelationTokens({'supervisor': 'Diane'}))
+ ['Howie']
+
+.. [#same_set] The result of the query for Betty, Alice, and Zane are all the
+ same.
+
+ >>> res1 = catalog.findRelationTokens({'supervisor': 'Betty'})
+ >>> res2 = catalog.findRelationTokens({'supervisor': 'Alice'})
+ >>> res3 = catalog.findRelationTokens({'supervisor': 'Zane'})
+ >>> list(res1) == list(res2) == list(res3)
+ True
+
+ The cycle doesn't pollute the index outside of the cycle.
+
+ >>> res = catalog.findRelationTokens({'supervisor': 'Diane'})
+ >>> list(res)
+ ['Howie']
+ >>> list(res) # it isn't lazy, it is precalculated
+ ['Howie']
+
+.. [#reverse_lookup] If you want to, look what happens when you go the
+ other way:
+
+ >>> res = list(catalog.findRelationChains({'supervisor': 'Zane'}))
+ >>> def sortEqualLenByName(one, two):
+ ... if len(one) == len(two):
+ ... return cmp(one, two)
+ ... return 0
+ ...
+ >>> res.sort(sortEqualLenByName) # normalizes for test stability
+ >>> print res # doctest: +NORMALIZE_WHITESPACE
+ [(<Employee instance "Alice">,),
+ (<Employee instance "Alice">, <Employee instance "Betty">),
+ (<Employee instance "Alice">, <Employee instance "Chuck">),
+ (<Employee instance "Alice">, <Employee instance "Betty">,
+ <Employee instance "Diane">),
+ (<Employee instance "Alice">, <Employee instance "Betty">,
+ <Employee instance "Edgar">),
+ cycle(<Employee instance "Alice">, <Employee instance "Betty">,
+ <Employee instance "Zane">),
+ (<Employee instance "Alice">, <Employee instance "Chuck">,
+ <Employee instance "Frank">),
+ (<Employee instance "Alice">, <Employee instance "Chuck">,
+ <Employee instance "Galyn">),
+ (<Employee instance "Alice">, <Employee instance "Betty">,
+ <Employee instance "Diane">, <Employee instance "Howie">)]
+
+ >>> [zc.relation.interfaces.ICircularRelationPath.providedBy(r)
+ ... for r in res]
+ [False, False, False, False, False, True, False, False, False]
+ >>> len(res[5].cycled)
+ 1
+ >>> list(catalog.findRelations(res[5].cycled[0], maxDepth=1))
+ [<Employee instance "Alice">]
Property changes on: Sandbox/poster/zc.relation/trunk/src/zc/relation/README.txt
___________________________________________________________________
Name: svn:eol-style
+ native
Added: Sandbox/poster/zc.relation/trunk/src/zc/relation/__init__.py
===================================================================
--- Sandbox/poster/zc.relation/trunk/src/zc/relation/__init__.py (rev 0)
+++ Sandbox/poster/zc.relation/trunk/src/zc/relation/__init__.py 2007-08-03 01:34:35 UTC (rev 78553)
@@ -0,0 +1 @@
+#
Added: Sandbox/poster/zc.relation/trunk/src/zc/relation/catalog.py
===================================================================
--- Sandbox/poster/zc.relation/trunk/src/zc/relation/catalog.py (rev 0)
+++ Sandbox/poster/zc.relation/trunk/src/zc/relation/catalog.py 2007-08-03 01:34:35 UTC (rev 78553)
@@ -0,0 +1,999 @@
+import sys
+import copy
+
+import persistent
+import persistent.list
+import persistent.wref
+import BTrees
+import BTrees.Length
+
+import zope.interface
+import zope.interface.interfaces
+
+from zc.relation import interfaces
+
+# use OOBTree, not items tuple
+
+##############################################################################
+# helpers
+#
+
+def multiunion(sets, data):
+ sets = tuple(s for s in sets if s) # bool is appropriate here
+ if not sets:
+ res = data['Set']()
+ elif data['multiunion'] is not None:
+ res = data['multiunion'](sets)
+ else:
+ res = sets[0]
+ for s in sets[1:]:
+ res = data['union'](res, s)
+ return res
+
+def getModuleTools(module):
+ return dict(
+ (nm, getattr(module, nm, None)) for nm in
+ ('BTree', 'TreeSet', 'Bucket', 'Set',
+ 'intersection', 'multiunion', 'union', 'difference'))
+
+def getMapping(tools):
+ if tools['TreeSet'].__name__[0] == 'I':
+ Mapping = BTrees.family32.IO.BTree
+ elif tools['TreeSet'].__name__[0] == 'L':
+ Mapping = BTrees.family64.IO.BTree
+ else:
+ assert tools['TreeSet'].__name__.startswith('O')
+ Mapping = BTrees.family32.OO.BTree
+ return Mapping
+
+def makeCheckTargetFilter(targetQuery, targetFilter, catalog):
+ targetCache = {}
+ checkTargetFilter = None
+ if targetQuery:
+ targetData = catalog.getRelationTokens(targetQuery)
+ if not targetData:
+ return () # shortcut, indicating no values
+ else:
+ if targetFilter is not None:
+ def checkTargetFilter(relchain, query):
+ return relchain[-1] in targetData and targetFilter(
+ relchain, query, catalog, targetCache)
+ else:
+ def checkTargetFilter(relchain, query):
+ return relchain[-1] in targetData
+ elif targetFilter is not None:
+ def checkTargetFilter(relchain, query):
+ return targetFilter(relchain, query, catalog, targetCache)
+ return checkTargetFilter
+
+class Ref(persistent.Persistent):
+ def __init__(self, ob):
+ self.ob = ob
+
+ def __call__(self):
+ return self.ob
+
+def createRef(ob):
+ if isinstance(ob, persistent.Persistent):
+ return persistent.wref.WeakRef(ob)
+ else:
+ return Ref(ob)
+
+##############################################################################
+# Any and any
+#
+
+class Any(object):
+ def __init__(self, source):
+ self.source = frozenset(source)
+
+ def __iter__(self):
+ return iter(self.source)
+
+ def __eq__(self, other):
+ return (isinstance(other, self.__class__) and
+ self.source == other.source)
+
+ def __ne__(self, other):
+ return not self.__eq__(other)
+
+ def __repr__(self):
+ return '<%s.%s instance %r>' % (
+ self.__class__.__module__, self.__class__.__name__,
+ tuple(sorted(self.source)))
+
+def any(*args):
+ return Any(args)
+
+##############################################################################
+# the marker that shows that a path is circular
+#
+
+class CircularRelationPath(tuple):
+ zope.interface.implements(interfaces.ICircularRelationPath)
+
+ def __new__(kls, elements, cycled):
+ res = super(CircularRelationPath, kls).__new__(kls, elements)
+ res.cycled = cycled
+ return res
+ def __repr__(self):
+ return 'cycle%s' % super(CircularRelationPath, self).__repr__()
+
+##############################################################################
+# the relation catalog
+
+class Catalog(persistent.Persistent):
+ zope.interface.implements(interfaces.ICatalog)
+
+ family = BTrees.family32
+ _listeners = _queryFactories = _searchIndexes = ()
+
+ def __init__(self, dumpRel, loadRel, relFamily=None, family=None):
+ if family is not None:
+ self.family = family
+ else:
+ family = self.family
+ self._name_TO_mapping = family.OO.BTree()
+ # held mappings are objtoken to (relcount, relset)
+ self._EMPTY_name_TO_relcount_relset = family.OO.BTree()
+ self._reltoken_name_TO_objtokenset = family.OO.BTree()
+ if relFamily is None:
+ relFamily = family.IF
+ self._relTools = getModuleTools(relFamily)
+ self._relTools['load'] = loadRel
+ self._relTools['dump'] = dumpRel
+ self._relLength = BTrees.Length.Length()
+ self._relTokens = self._relTools['TreeSet']()
+ # private; only mutate via indexValue and unindexValue
+ self._attrs = _attrs = self.family.OO.Bucket() # _attrs name is legacy
+
+ # The code is divided into the following sections by ReST-like headers:
+ #
+ # Administration and Introspection
+ # General
+ # Value Indexes
+ # Listeners
+ # DefaultQueryFactories
+ # Search Indexes
+ # Indexing
+ # Top-Level
+ # Indexing Values
+ # Tokenization
+ # Searching
+
+ # Administration and Introspection
+ # ================================
+
+ # General
+ # -------
+
+ def __contains__(self, rel):
+ return self.tokenizeRelation(rel) in self._relTokens
+
+ def __len__(self):
+ return self._relLength.value
+
+ def __iter__(self):
+ cache = {}
+ for token in self._relTokens:
+ yield self._relTools['load'](token, self, cache)
+
+ def clear(self):
+ for v in self._name_TO_mapping.values():
+ v.clear()
+ self._EMPTY_name_TO_relcount_relset.clear()
+ self._reltoken_name_TO_objtokenset.clear()
+ self._relTokens.clear()
+ self._relLength.set(0)
+ for l in self._iterListeners():
+ l.sourceCleared(self)
+
+ def copy(self, klass=None):
+ if klass is None:
+ klass = self.__class__
+ res = klass.__new__(klass)
+ res._relTokens = self._relTools['TreeSet']()
+ res._relTokens.update(self._relTokens)
+ res.family = self.family
+ res._name_TO_mapping = self.family.OO.BTree()
+ for name, mapping in self._name_TO_mapping.items():
+ new = mapping.__class__()
+ for k, (l, s) in mapping.items():
+ new[k] = (copy.copy(l), copy.copy(s))
+ res._name_TO_mapping[name] = new
+ res._EMPTY_name_TO_relcount_relset = self.family.OO.BTree()
+ for k, (l, s) in self._EMPTY_name_TO_relcount_relset.items():
+ res._EMPTY_name_TO_relcount_relset[k] = (
+ copy.copy(l), copy.copy(s))
+ res._reltoken_name_TO_objtokenset = self.family.OO.BTree()
+ for k, s in self._reltoken_name_TO_objtokenset.items():
+ res._reltoken_name_TO_objtokenset[k] = copy.copy(s)
+ res._attrs = self.family.OO.Bucket(
+ [(k, self.family.OO.Bucket(v)) for k, v in self._attrs.items()])
+ res._relTools = dict(self._relTools)
+ res._listeners = self._listeners # it's a tuple
+ res._queryFactories = self._queryFactories # it's a tuple
+ res._relLength = BTrees.Length.Length()
+ res._relLength.set(self._relLength.value)
+ res._searchIndexes = tuple(ix.copy(self) for ix in self._searchIndexes)
+ return res
+
+ # Value Indexes
+ # -------------
+
+ def _fixLegacyAttrs(self):
+ if isinstance(self._attrs, dict): # legacy
+ # because _attrs used to be a normal dict
+ self._attrs = self.family.OO.Bucket(self._attrs)
+
+ def addValueIndex(self, element, dump=None, load=None, btree=None,
+ multiple=False, name=None):
+ if btree is None:
+ btree = self.family.IF
+ res = self.family.OO.Bucket(getModuleTools(btree))
+ res['dump'] = dump
+ res['load'] = load
+ res['multiple'] = multiple
+ if (res['dump'] is None) ^ (res['load'] is None):
+ raise ValueError(
+ "either both of 'dump' and 'load' must be None, or "
+ "neither")
+ # when both load and dump are None, this is a small
+ # optimization that can be a large optimization if the returned
+ # value is one of the main four options of the selected btree
+ # family (BTree, TreeSet, Set, Bucket).
+ if zope.interface.interfaces.IElement.providedBy(element):
+ key = 'element'
+ res['attrname'] = defaultname = element.__name__
+ res['interface'] = element.interface
+ res['call'] = zope.interface.interfaces.IMethod.providedBy(element)
+ else:
+ key = 'callable'
+ defaultname = getattr(element, '__name__', None)
+ if [d for d in self._attrs.values() if d.get(key) == element]:
+ raise ValueError('element already indexed', element)
+ res[key] = element
+ if name is None:
+ if defaultname is None:
+ raise ValueError('no name specified')
+ name = defaultname
+ if name in self._attrs:
+ raise ValueError('name already used', name)
+ res['name'] = name
+ self._name_TO_mapping[name] = getMapping(res)()
+ # these are objtoken to (relcount, relset)
+ self._attrs[name] = res
+ load = self._relTools['load']
+ cache = {}
+ for token in self._relTokens:
+ additions = {}
+ additions[name] = (None, self._indexNew(
+ token, load(token, self, cache), res))
+ for l in self._iterListeners():
+ l.relationModified(token, self, additions, {})
+ self._fixLegacyAttrs()
+
+ def iterValueIndexInfo(self):
+ for d in self._attrs.values():
+ res = {}
+ res['name'] = d['name']
+ res['element'] = d.get('element', d.get('callable'))
+ res['multiple'] = d['multiple']
+ res['dump'] = d['dump']
+ res['load'] = d['load']
+ res['btree'] = sys.modules[d['TreeSet'].__module__]
+ yield res
+
+ def removeValueIndex(self, name):
+ del self._attrs[name]
+ self._fixLegacyAttrs()
+ del self._name_TO_mapping[name]
+ if name in self._EMPTY_name_TO_relcount_relset:
+ del self._EMPTY_name_TO_relcount_relset[name]
+
+ # Listeners
+ # -----------
+
+ def addListener(self, listener):
+ res = [ref for ref in self._listeners if ref() is not None]
+ res.append(createRef(listener))
+ self._listeners = tuple(res)
+ listener.sourceAdded(self)
+
+ def iterListeners(self):
+ for ref in self._listeners:
+ l = ref()
+ if l is not None:
+ yield l
+
+ def removeListener(self, listener):
+ if listener is None:
+ raise LookupError('listener not found', listener)
+ res = []
+ found = False
+ for ref in reversed(self._listeners):
+ l = ref()
+ if l is listener and not found:
+ found = True
+ continue
+ res.append(ref)
+ if not found:
+ raise LookupError('listener not found', listener)
+ res.reverse()
+ self._listeners = tuple(res)
+ listener.sourceRemoved(self)
+
+ # DefaultQueryFactories
+ # -----------------------
+
+ def addDefaultQueryFactory(self, factory):
+ res = [ref for ref in self._queryFactories if ref() is not None]
+ res.append(createRef(factory))
+ self._queryFactories = tuple(res)
+
+ def iterDefaultQueryFactories(self):
+ for ref in self._queryFactories:
+ factory = ref()
+ if factory is not None:
+ yield factory
+
+ def removeDefaultQueryFactory(self, factory):
+ if factory is None:
+ raise LookupError('factory not found', factory)
+ res = []
+ found = False
+ for ref in reversed(self._queryFactories):
+ l = ref()
+ if l is factory and not found:
+ found = True
+ continue
+ res.append(ref)
+ if not found:
+ raise LookupError('factory not found', factory)
+ res.reverse()
+ self._queryFactories = tuple(res)
+
+ # Search Indexes
+ # --------------
+
+ def addSearchIndex(self, ix):
+ ix.setCatalog(self)
+ self._searchIndexes += (ix,)
+
+ def iterSearchIndexes(self):
+ return iter(self._searchIndexes)
+
+ def removeSearchIndex(self, ix):
+ res = tuple(i for i in self._searchIndexes if i is not ix)
+ if len(res) == len(self._searchIndexes):
+ raise LookupError('index not found', ix)
+ self._searchIndexes = res
+ ix.setCatalog(None)
+
+ # Indexing
+ # ========
+
+ # Top-Level
+ # ---------
+
+ def _indexNew(self, token, rel, data):
+ assert self._reltoken_name_TO_objtokenset.get(
+ (token, data['name']), self) is self
+ values, tokens, optimization = self._getValuesAndTokens(
+ rel, data)
+ if optimization and tokens is not None:
+ tokens = data['TreeSet'](tokens)
+ self._add(token, tokens, data['name'], tokens)
+ return tokens
+
+ def index(self, rel):
+ self.index_doc(self._relTools['dump'](rel, self, {}), rel)
+
+ def index_doc(self, relToken, rel):
+ additions = {}
+ removals = {}
+ if relToken in self._relTokens:
+ # reindex
+ for data in self._attrs.values():
+ values, newTokens, optimization = self._getValuesAndTokens(
+ rel, data)
+ oldTokens = self._reltoken_name_TO_objtokenset[
+ (relToken, data['name'])]
+ if newTokens != oldTokens:
+ if newTokens is not None and oldTokens is not None:
+ added = data['difference'](newTokens, oldTokens)
+ removed = data['difference'](oldTokens, newTokens)
+ if optimization:
+ # the goal of this optimization is to not have to
+ # recreate a TreeSet (which can be large and
+ # relatively timeconsuming) when only small changes
+ # have been made. We ballpark this by saying
+ # "if there are only a few removals, do them, and
+ # then do an update: it's almost certainly a win
+ # over essentially generating a new TreeSet and
+ # updating it with *all* values. On the other
+ # hand, if there are a lot of removals, it's
+ # probably quicker just to make a new one." See
+ # timeit/set_creation_vs_removal.py for details.
+ # A len is pretty cheap--`removed` is a single
+ # bucket, and `oldTokens` should have all of its
+ # buckets in memory already, and adding up bucket
+ # lens in C is pretty fast.
+ len_removed = len(removed)
+ if len_removed < 5:
+ recycle = True
+ else:
+ len_old = len(oldTokens)
+ ratio = float(len_old)/len_removed
+ recycle = (ratio <= 0.1 or len_old > 500
+ and ratio < 0.2)
+ if recycle:
+ for t in removed:
+ oldTokens.remove(t)
+ oldTokens.update(added)
+ newTokens = oldTokens
+ else:
+ newTokens = data['TreeSet'](newTokens)
+ else:
+ if optimization and newTokens is not None:
+ newTokens = data['TreeSet'](newTokens)
+ removed = oldTokens
+ added = newTokens
+ self._remove(relToken, removed, data['name'])
+ if removed:
+ removals[data['name']] = removed
+ self._add(relToken, added, data['name'], newTokens)
+ if added:
+ additions[data['name']] = added
+ for l in self._iterListeners():
+ l.relationModified(relToken, self, additions, removals)
+ else:
+ # new token
+ for data in self._attrs.values():
+ additions[data['name']] = self._indexNew(relToken, rel, data)
+ self._relTokens.insert(relToken)
+ self._relLength.change(1)
+ for l in self._iterListeners():
+ l.relationAdded(relToken, self, additions)
+
+ def unindex(self, rel):
+ self.unindex_doc(self._relTools['dump'](rel, self, {}))
+
+ def unindex_doc(self, relToken):
+ removals = {}
+ if relToken in self._relTokens:
+ for data in self._attrs.values():
+ tokens = self._reltoken_name_TO_objtokenset.pop(
+ (relToken, data['name']))
+ if tokens:
+ removals[data['name']] = tokens
+ self._remove(relToken, tokens, data['name'])
+ self._relTokens.remove(relToken)
+ self._relLength.change(-1)
+ for l in self._iterListeners():
+ l.relationRemoved(relToken, self, removals)
+
+ # Indexing Values
+ # ---------------
+
+ def _getValuesAndTokens(self, rel, data):
+ values = None
+ if 'interface' in data:
+ valueSource = data['interface'](rel, None)
+ if valueSource is not None:
+ values = getattr(valueSource, data['attrname'])
+ if data['call']:
+ values = values()
+ else:
+ values = data['callable'](rel, self)
+ if not data['multiple'] and values is not None:
+ # None is a marker for no value
+ values = (values,)
+ optimization = data['dump'] is None and (
+ values is None or
+ isinstance(values, (
+ data['TreeSet'], data['BTree'], data['Bucket'], data['Set'])))
+ if not values:
+ return None, None, optimization
+ elif optimization:
+ # this is the optimization story (see _add)
+ return values, values, optimization
+ else:
+ cache = {}
+ if data['dump'] is None:
+ tokens = data['TreeSet'](values)
+ else:
+ tokens = data['TreeSet'](
+ data['dump'](o, self, cache) for o in values)
+ return values, tokens, False
+
+ def _add(self, relToken, tokens, name, fullTokens):
+ self._reltoken_name_TO_objtokenset[(relToken, name)] = fullTokens
+ if tokens is None:
+ dataset = self._EMPTY_name_TO_relcount_relset
+ keys = (name,)
+ else:
+ dataset = self._name_TO_mapping[name]
+ keys = tokens
+ for key in keys:
+ data = dataset.get(key)
+ if data is None:
+ data = dataset[key] = (
+ BTrees.Length.Length(), self._relTools['TreeSet']())
+ res = data[1].insert(relToken)
+ assert res, 'Internal error: relToken existed in data'
+ data[0].change(1)
+
+ def _remove(self, relToken, tokens, name):
+ if tokens is None:
+ dataset = self._EMPTY_name_TO_relcount_relset
+ keys = (name,)
+ else:
+ dataset = self._name_TO_mapping[name]
+ keys = tokens
+ for key in keys:
+ data = dataset[key]
+ data[1].remove(relToken)
+ data[0].change(-1)
+ if not data[0].value:
+ del dataset[key]
+ else:
+ assert data[0].value > 0
+
+ # Tokenization
+ # ============
+
+ def tokenizeQuery(self, query):
+ res = {}
+ for k, v in query.items():
+ if k is None:
+ tools = self._relTools
+ else:
+ tools = self._attrs[k]
+ if isinstance(v, Any):
+ if tools['dump'] is not None:
+ cache = {}
+ v = Any(tools['dump'](val, self, cache) for val in v)
+ else:
+ if v is not None and tools['dump'] is not None:
+ v = tools['dump'](v, self, {})
+ res[k] = v
+ return res
+
+ def resolveQuery(self, query):
+ res = {}
+ for k, v in query.items():
+ if k is None:
+ tools = self._relTools
+ else:
+ tools = self._attrs[k]
+ if isinstance(v, Any):
+ if tools['load'] is not None:
+ cache = {}
+ v = Any(tools['load'](val, self, cache) for val in v)
+ else:
+ if v is not None and tools['load'] is not None:
+ v = tools['load'](v, self, {})
+ res[k] = v
+ return res
+
+ def tokenizeValues(self, values, name):
+ dump = self._attrs[name]['dump']
+ if dump is None:
+ return values
+ cache = {}
+ return (dump(v, self, cache) for v in values)
+
+ def resolveValueTokens(self, tokens, name):
+ load = self._attrs[name]['load']
+ if load is None:
+ return tokens
+ cache = {}
+ return (load(t, self, cache) for t in tokens)
+
+ def tokenizeRelation(self, rel):
+ return self._relTools['dump'](rel, self, {})
+
+ def resolveRelationToken(self, token):
+ return self._relTools['load'](token, self, {})
+
+ def tokenizeRelations(self, rels):
+ cache = {}
+ return (self._relTools['dump'](r, self, cache) for r in rels)
+
+ def resolveRelationTokens(self, tokens):
+ cache = {}
+ return (self._relTools['load'](t, self, cache) for t in tokens)
+
+ # Searching
+ # =========
+
+ # Internal Helpers
+ # ----------------
+
+ def _relData(self, query):
+ # query must be BTrees.family32.OO.Bucket. The key may be
+ # a value index name or None, indicating one or more relations. The
+ # val may be token, None, or iterator (object with a `next` method)
+ # of tokens (may not include None).
+ if not query:
+ return self._relTokens
+ data = []
+ tools = self._relTools
+ explicit_relations = False
+ for name, value in query.items():
+ if name is None:
+ explicit_relations = True
+ if not isinstance(value, Any):
+ value = (value,)
+ rels = tools['Set'](value)
+ length = len(rels)
+ else:
+ if isinstance(value, Any):
+ get = self._name_TO_mapping[name].get
+ rels = multiunion(
+ (get(token, (None, None))[1] for token in value),
+ self._relTools)
+ length = len(rels)
+ else:
+ if value is None:
+ relData = self._EMPTY_name_TO_relcount_relset.get(name)
+ else:
+ relData = self._name_TO_mapping[name].get(value)
+ if relData is None:
+ return None
+ length = relData[0].value
+ rels = relData[1]
+ if not length:
+ return None
+ data.append((length, rels))
+ # we don't want to sort on the set values!! just the lengths.
+ data.sort(key=lambda i: i[0])
+ if explicit_relations and len(data) == 1:
+ # we'll need to intersect with our set of relations to make
+ # sure the relations are actually members. This set should
+ # be the biggest possible set, so we put it at the end after
+ # sorting.
+ data.append((self._relLength.value, self._relTokens))
+ # we know we have at least one result now. intersect all. Work
+ # from smallest to largest, until we're done or we don't have any
+ # more results.
+ res = data.pop(0)[1]
+ while res and data:
+ res = self._relTools['intersection'](res, data.pop(0)[1])
+ return res
+
+ def _getSearchIndexResults(self, name, query, maxDepth, filter,
+ targetQuery, targetFilter, queryFactory):
+ for ix in self._searchIndexes:
+ res = ix.getResults(name, query, maxDepth, filter, targetQuery,
+ targetFilter, queryFactory)
+ if res is not None:
+ return res
+
+ def _iterListeners(self):
+ # fix up ourself first
+ for ix in self._searchIndexes:
+ yield ix
+ # then tell others
+ for l in self.iterListeners():
+ yield l
+
+ def _getQueryFactory(self, query, queryFactory):
+ res = None
+ if queryFactory is not None:
+ res = queryFactory(query, self)
+ else:
+ for queryFactory in self.iterDefaultQueryFactories():
+ res = queryFactory(query, self)
+ if res is not None:
+ break
+ if res is None:
+ queryFactory = None
+ return queryFactory, res
+
+ def _parse(self, query, maxDepth, filter, targetQuery, targetFilter,
+ getQueries):
+ assert (isinstance(query, BTrees.family32.OO.Bucket) and
+ isinstance(targetQuery, BTrees.family32.OO.Bucket)), (
+ 'internal error: parse expects query and targetQuery '
+ 'to already be normalized (to OO.Bucket.')
+ if maxDepth is not None and (
+ not isinstance(maxDepth, (int, long)) or maxDepth < 1):
+ raise ValueError('maxDepth must be None or a positive integer')
+ if getQueries is not None:
+ queries = getQueries(())
+ else:
+ queries = (query,)
+ if maxDepth == 1:
+ getQueries = None
+ elif getQueries is None and maxDepth is not None:
+ raise ValueError(
+ 'if maxDepth not in (None, 1), queryFactory must be available')
+ relData = (r for r in (self._relData(q) for q in queries) if r)
+ if filter is not None:
+ filterCache = {}
+ def checkFilter(relchain, query):
+ return filter(relchain, query, self, filterCache)
+ else:
+ checkFilter = None
+ checkTargetFilter = makeCheckTargetFilter(
+ targetQuery, targetFilter, self)
+ if checkTargetFilter is not None and not checkTargetFilter:
+ relData = ()
+ return (query, relData, maxDepth, checkFilter, checkTargetFilter,
+ getQueries)
+
+ # API to help plugin writers
+ # --------------------------
+
+ def getRelationModuleTools(self):
+ return self._relTools
+
+ def getValueModuleTools(self, name):
+ return self._attrs[name]
+
+ def getRelationTokens(self, query=None):
+ if query is None:
+ return self._relTokens
+ else:
+ if not isinstance(query, BTrees.family32.OO.Bucket):
+ query = BTrees.family32.OO.Bucket(query)
+ return self._relData(query)
+
+ def getValueTokens(self, name, reltoken=None):
+ if reltoken is None:
+ return self._name_TO_mapping[name]
+ else:
+ return self._reltoken_name_TO_objtokenset.get(
+ (reltoken, name))
+
+ def yieldRelationTokenChains(self, query, relData, maxDepth, checkFilter,
+ checkTargetFilter, getQueries,
+ findCycles=True):
+ stack = []
+ for d in relData:
+ stack.append(((), iter(d)))
+ while stack:
+ tokenChain, relDataIter = stack[0]
+ try:
+ relToken = relDataIter.next()
+ except StopIteration:
+ stack.pop(0)
+ else:
+ tokenChain += (relToken,)
+ if checkFilter is not None and not checkFilter(
+ tokenChain, query):
+ continue
+ walkFurther = maxDepth is None or len(tokenChain) < maxDepth
+ if getQueries is not None and (walkFurther or findCycles):
+ oldInputs = frozenset(tokenChain)
+ next = set()
+ cycled = []
+ for q in getQueries(tokenChain):
+ relData = self._relData(q)
+ if relData:
+ intersection = oldInputs.intersection(relData)
+ if intersection:
+ # it's a cycle
+ cycled.append(q)
+ elif walkFurther:
+ next.update(relData)
+ if walkFurther and next:
+ stack.append((tokenChain, iter(next)))
+ if cycled:
+ tokenChain = CircularRelationPath(
+ tokenChain, cycled)
+ if (checkTargetFilter is None or
+ checkTargetFilter(tokenChain, query)):
+ yield tokenChain
+
+ # Main search API
+ # ---------------
+
+ def findValueTokens(self, name, query=(), maxDepth=None,
+ filter=None, targetQuery=(), targetFilter=None,
+ queryFactory=None):
+ data = self._attrs.get(name)
+ if data is None:
+ raise ValueError('name not indexed', name)
+ query = BTrees.family32.OO.Bucket(query)
+ getQueries = None
+ if queryFactory is None:
+ queryFactory, getQueries = self._getQueryFactory(
+ query, queryFactory)
+ targetQuery = BTrees.family32.OO.Bucket(targetQuery)
+ if (((maxDepth is None and queryFactory is None)
+ or maxDepth==1) and filter is None and targetFilter is None):
+ # return a set
+ if not query and not targetQuery:
+ return self._name_TO_mapping[name]
+ rels = self._relData(query)
+ if targetQuery and rels:
+ # well, it's kind of odd to have specified query and
+ # targetQuery without a transitive search, but hey, this
+ # should be the result.
+ rels = self._relTools['intersection'](
+ rels, self._relData(targetQuery))
+ if not rels:
+ return data['Set']()
+ elif len(rels) == 1:
+ res = self._reltoken_name_TO_objtokenset.get(
+ (rels.maxKey(), name))
+ if res is None:
+ res = self._attrs[name]['Set']()
+ return res
+ else:
+ return multiunion(
+ (self._reltoken_name_TO_objtokenset.get((r, name))
+ for r in rels), data)
+ res = self._getSearchIndexResults(
+ name, query, maxDepth, filter, targetQuery,
+ targetFilter, queryFactory)
+ if res is not None:
+ return res
+ res = self._getSearchIndexResults(
+ None, query, maxDepth, filter, targetQuery, targetFilter,
+ queryFactory)
+ if res is not None:
+ return multiunion(
+ (self._reltoken_name_TO_objtokenset.get((r, name))
+ for r in res),
+ data)
+ if getQueries is None:
+ queryFactory, getQueries = self._getQueryFactory(
+ query, queryFactory)
+ return self._yieldValueTokens(
+ name, *self._parse( # query and targetQuery normalized above
+ query, maxDepth, filter, targetQuery, targetFilter,
+ getQueries))
+
+ def findValues(self, name, query=(), maxDepth=None, filter=None,
+ targetQuery=(), targetFilter=None,
+ queryFactory=None):
+ res = self.findValueTokens(name, query, maxDepth, filter,
+ targetQuery, targetFilter,
+ queryFactory)
+ resolve = self._attrs[name]['load']
+ if resolve is None:
+ return res
+ else:
+ cache = {}
+ return (resolve(t, self, cache) for t in res)
+
+ def _yieldValueTokens(
+ self, name, query, relData, maxDepth, checkFilter,
+ checkTargetFilter, getQueries, yieldSets=False):
+ # this is really an internal bit of findValueTokens, and is only
+ # used there.
+ relSeen = set()
+ objSeen = set()
+ for path in self.yieldRelationTokenChains(
+ query, relData, maxDepth, checkFilter, checkTargetFilter,
+ getQueries, findCycles=False):
+ relToken = path[-1]
+ if relToken not in relSeen:
+ relSeen.add(relToken)
+ outputSet = self._reltoken_name_TO_objtokenset.get(
+ (relToken, name))
+ if outputSet:
+ if yieldSets: # this is needed for zc.relationship!!!
+ yield outputSet
+ else:
+ for token in outputSet:
+ if token not in objSeen:
+ yield token
+ objSeen.add(token)
+
+ def findRelationTokens(self, query=(), maxDepth=None, filter=None,
+ targetQuery=(), targetFilter=None,
+ queryFactory=None):
+ query = BTrees.family32.OO.Bucket(query)
+ getQueries = None
+ if queryFactory is None:
+ queryFactory, getQueries = self._getQueryFactory(
+ query, queryFactory)
+ targetQuery = BTrees.family32.OO.Bucket(targetQuery)
+ if (((maxDepth is None and queryFactory is None)
+ or maxDepth==1)
+ and filter is None and not targetQuery and targetFilter is None):
+ res = self._relData(query)
+ if res is None:
+ res = self._relTools['Set']()
+ return res
+ res = self._getSearchIndexResults(
+ None, query, maxDepth, filter, targetQuery, targetFilter,
+ queryFactory)
+ if res is not None:
+ return res
+ if getQueries is None:
+ queryFactory, getQueries = self._getQueryFactory(
+ query, queryFactory)
+ seen = self._relTools['Set']()
+ return (res[-1] for res in self.yieldRelationTokenChains(
+ *self._parse(
+ query, maxDepth, filter, targetQuery,
+ targetFilter, getQueries) +
+ (False,))
+ if seen.insert(res[-1]))
+
+ def findRelations(self, query=(), maxDepth=None, filter=None,
+ targetQuery=(), targetFilter=None,
+ queryFactory=None):
+ return self.resolveRelationTokens(
+ self.findRelationTokens(
+ query, maxDepth, filter, targetQuery, targetFilter,
+ queryFactory))
+
+ def findRelationChains(self, query, maxDepth=None, filter=None,
+ targetQuery=(), targetFilter=None,
+ queryFactory=None):
+ """find relation tokens that match the query.
+
+ same arguments as findValueTokens except no name.
+ """
+ query = BTrees.family32.OO.Bucket(query)
+ queryFactory, getQueries = self._getQueryFactory(
+ query, queryFactory)
+ return self._yieldRelationChains(*self._parse(
+ query, maxDepth, filter, BTrees.family32.OO.Bucket(targetQuery),
+ targetFilter, getQueries))
+
+ def _yieldRelationChains(self, query, relData, maxDepth, checkFilter,
+ checkTargetFilter, getQueries,
+ findCycles=True):
+ # this is really an internal bit of findRelationChains, and is only
+ # used there.
+ resolve = self._relTools['load']
+ cache = {}
+ for p in self.yieldRelationTokenChains(
+ query, relData, maxDepth, checkFilter, checkTargetFilter,
+ getQueries, findCycles):
+ t = (resolve(t, self, cache) for t in p)
+ if interfaces.ICircularRelationPath.providedBy(p):
+ res = CircularRelationPath(t, p.cycled)
+ else:
+ res = tuple(t)
+ yield res
+
+ def findRelationTokenChains(self, query, maxDepth=None, filter=None,
+ targetQuery=(), targetFilter=None,
+ queryFactory=None):
+ """find relation tokens that match the query.
+
+ same arguments as findValueTokens except no name.
+ """
+ query = BTrees.family32.OO.Bucket(query)
+ queryFactory, getQueries = self._getQueryFactory(
+ query, queryFactory)
+ return self.yieldRelationTokenChains(*self._parse(
+ query, maxDepth, filter, BTrees.family32.OO.Bucket(targetQuery),
+ targetFilter, getQueries))
+
+ def canFind(self, query, maxDepth=None, filter=None,
+ targetQuery=(), targetFilter=None,
+ queryFactory=None):
+ query = BTrees.family32.OO.Bucket(query)
+ getQueries = None
+ if queryFactory is None:
+ queryFactory, getQueries = self._getQueryFactory(
+ query, queryFactory)
+ targetQuery = BTrees.family32.OO.Bucket(targetQuery)
+ res = self._getSearchIndexResults(
+ None, query, maxDepth, filter, targetQuery, targetFilter,
+ queryFactory)
+ if res is not None:
+ return bool(res)
+ if getQueries is None:
+ queryFactory, getQueries = self._getQueryFactory(
+ query, queryFactory)
+ try:
+ self.yieldRelationTokenChains(
+ *self._parse(
+ query, maxDepth, filter, targetQuery,
+ targetFilter, getQueries) +
+ (False,)).next()
+ except StopIteration:
+ return False
+ else:
+ return True
Added: Sandbox/poster/zc.relation/trunk/src/zc/relation/example.txt
===================================================================
--- Sandbox/poster/zc.relation/trunk/src/zc/relation/example.txt (rev 0)
+++ Sandbox/poster/zc.relation/trunk/src/zc/relation/example.txt 2007-08-03 01:34:35 UTC (rev 78553)
@@ -0,0 +1,658 @@
+====================================
+zc.relation.Catalog Extended Example
+====================================
+
+.. contents::
+ :local:
+
+Introduction and Set Up
+=======================
+
+This document assumes you have read the introductory README.txt and want
+to learn a bit more by example. In it, we will explore a more
+complicated set of relations, and will not explain the basics that the
+README already addressed.
+
+Imagine we are indexing security assertions in a system. In this
+system, users may have roles within an organization. Each organization
+may have multiple child organizations and may have a single parent
+organization. A user with a role in a parent organization will have the
+same role in all transitively connected child relations.
+
+This catalog, then, will be indexing a heterogeneous collection of
+relations. One kind of relation will model the hierarchy of
+organizations. We'll do it with an intrinsic relation of
+organizations to their children: that reflects the fact that parent
+organizations choose and are comprised of their children; children do not
+choose their parents.
+
+The other relation will model the (multiple) roles a (single) user has
+in a (single) organization. This relation will be entirely extrinsic.
+
+Let's define this with interfaces.
+
+ >>> import zope.interface
+ >>> class IOrganization(zope.interface.Interface):
+ ... title = zope.interface.Attribute('the title')
+ ... parts = zope.interface.Attribute(
+ ... 'the organizations that make up this one')
+ ...
+ >>> class IRoles(zope.interface.Interface):
+ ... organization = zope.interface.Attribute(
+ ... 'the organization in which this relation operates')
+ ... principal_id = zope.interface.Attribute(
+ ... 'the pricipal id whose roles this relation lists')
+ ... role_ids = zope.interface.Attribute(
+ ... 'the role ids that the principal explicitly has in the '
+ ... 'organization. The principal may have other roles via '
+ ... 'roles in parent organizations.')
+ ...
+
+Now we can create some classes. In the first example, the setup was a bit
+of a toy. This time we will be just a bit more practical. We'll also expect
+to be operating within the ZODB, with a root and transactions. [#ZODB]_
+
+Here's how we will dump and load our relations: use a "registry"
+object, similar to an intid utility. [#faux_intid]_
+
+We use the cache just to show you how you might use it. It probably is
+overkill for this job, and maybe even a loss, but you can see the idea.
+
+ >>> def dump(obj, catalog, cache):
+ ... reg = cache.get('registry')
+ ... if reg is None:
+ ... reg = cache['registry'] = catalog._p_jar.root()['registry']
+ ... return reg.getId(obj)
+ ...
+ >>> def load(token, catalog, cache):
+ ... reg = cache.get('registry')
+ ... if reg is None:
+ ... reg = cache['registry'] = catalog._p_jar.root()['registry']
+ ... return reg.getObject(token)
+ ...
+
+Now we can create a relation catalog to hold these items.
+
+ >>> import zc.relation
+ >>> catalog = root['catalog'] = zc.relation.Catalog(dump, load)
+ >>> transaction.commit()
+
+Now we set up our indexes. We'll start with just the organizations, and
+set up the catalog with them. This part will be similar to the example
+in README.txt, but will introduce more discussions of optimizations and
+tokens. Then we'll add in the part about roles, and explore transitive
+query factories and transitive indexes some more.
+
+Optimizations and Tokens: Setting Up the Organizations
+======================================================
+
+The organization will hold a set of organizations. This is actually not
+inherently easy in the ZODB because this means that we need to compare
+or hash persistent objects, which does not work reliably over time and
+across machines out-of-the-box. To side-step the issue for this example,
+and still do something a bit interesting and real-world, we'll the
+registry tokens introduced above. This will also give us a chance to
+talk a bit more about optimizations and tokens. (If you would like
+to sanely and transparently hold a set of persistent objects, try the
+zc.set package.)
+
+ >>> import BTrees
+ >>> class Organization(persistent.Persistent):
+ ... zope.interface.implements(IOrganization)
+ ... def __init__(self, title):
+ ... self.title = title
+ ... self.parts = BTrees.family32.IF.TreeSet()
+ ... # the next parts just make the tests prettier
+ ... def __repr__(self):
+ ... return '<Organization instance "' + self.title + '">'
+ ... def __cmp__(self, other):
+ ... # pukes if other doesn't have name
+ ... return cmp(self.title, other.title)
+ ...
+
+Now we can add the `parts` index to the catalog. This will do a few
+new things from how we added indexes in the README. We'll add the index,
+then talk about what we did.
+
+ >>> catalog.addValueIndex(IOrganization['parts'], multiple=True,
+ ... name="part")
+
+So, what's different?
+
+First, we are using an interface element to define the value to be indexed.
+It provides an interface to which objects will be adapted, a default name
+for the index, and information as to whether the attribute should be used
+directly or called.
+
+Second, we are not specifying a dump or load. They are None. This
+means that the indexed value can already be treated as a token. This
+can allow a very significant optimization for reindexing if the indexed
+value is a large collection using the same BTree family as the
+index--which leads us to the next difference.
+
+Third, we are specifying that `multiple=True`. This means that the value
+on a given relation that provides or can be adapted to IOrganization will
+have a collection of `parts`. These will always be regarded as a set,
+whether the actual colection is a BTrees set or the keys of a BTree.
+
+Last, we are specifying a name to be used for queries. I find that queries
+read more easily when the query keys are singular, so I often rename plurals.
+
+We are just working with organizations at the moment, so we can add
+another simple transposing transitive query factory, switching between
+'part' and `None`.
+
+ >>> factory = zc.relation.TransposingTransitiveQueriesFactory(
+ ... 'part', None)
+ >>> catalog.defaultTransitiveQueriesFactory = factory
+
+Let's add a transitive index in too, of the hierarchy looking down.
+
+ >>> catalog.addTransitiveIndex('part', None)
+
+Let's create and add a few organizations. We'll make a structure like this::
+
+ Ynod Corp Mangement Zookd Corp Management
+ / | \ / | \
+ Ynod Devs Ynod SAs Ynod Admins Zookd Admins Zookd SAs Zookd Devs
+ / \ \ / / \
+Y3L4 Proj Bet Proj Ynod Zookd Task Force Zookd hOgnmd Zookd Nbd
+
+ >>> orgs = root['organizations'] = BTrees.family32.OO.BTree()
+ >>> for nm, parts in (
+ ... ('Y3L4 Proj', ()),
+ ... ('Bet Proj', ()),
+ ... ('Ynod Zookd Task Force', ()),
+ ... ('Zookd hOgnmd', ()),
+ ... ('Zookd Nbd', ()),
+ ... ('Ynod Devs', ('Y3L4 Proj', 'Bet Proj')),
+ ... ('Ynod SAs', ()),
+ ... ('Ynod Admins', ('Ynod Zookd Task Force',)),
+ ... ('Zookd Admins', ('Ynod Zookd Task Force',)),
+ ... ('Zookd SAs', ()),
+ ... ('Zookd Devs', ('Zookd hOgnmd', 'Zookd Nbd')),
+ ... ('Ynod Corp Management', ('Ynod Devs', 'Ynod SAs', 'Ynod Admins')),
+ ... ('Zookd Corp Management', ('Zookd Devs', 'Zookd SAs',
+ ... 'Zookd Admins'))):
+ ... org = Organization(nm)
+ ... for part in parts:
+ ... ignore = org.parts.insert(registry.getId(orgs[part]))
+ ... orgs[nm] = org
+ ... catalog.index(org)
+ ...
+
+Now we can search. To do this, we can use some of the token methods that
+the catalog provides. The most commonly used is `tokenizeQuery`. It takes a
+query with values that are not tokenized and converts them to values that are
+tokenized.
+
+ >>> Ynod_SAs_id = registry.getId(orgs['Ynod SAs'])
+ >>> catalog.tokenizeQuery({None: orgs['Ynod SAs']}) == {
+ ... None: Ynod_SAs_id}
+ True
+ >>> Zookd_SAs_id = registry.getId(orgs['Zookd SAs'])
+ >>> Zookd_Devs_id = registry.getId(orgs['Zookd Devs'])
+ >>> catalog.tokenizeQuery(
+ ... {None: zc.relation.any(orgs['Zookd SAs'], orgs['Zookd Devs'])}
+ ... ) == {
+ ... None: zc.relation.any(Zookd_SAs_id, Zookd_Devs_id)}
+ True
+
+Of course, right now doing this with 'part' alone is kind of silly, since it
+does not change within the relation catalog (because we said that dump and
+load were `None`, as discussed above).
+
+ >>> catalog.tokenizeQuery({'part': Ynod_SAs_id}) == {
+ ... 'part': Ynod_SAs_id}
+ True
+ >>> catalog.tokenizeQuery(
+ ... {'part': zc.relation.any(Zookd_SAs_id, Zookd_Devs_id)}
+ ... ) == {'part': zc.relation.any(Zookd_SAs_id, Zookd_Devs_id)}
+ True
+
+It is so common that we're going to assign it to a variable in our example.
+Then we'll do a search or two.
+
+So...find the relations that Ynod Devs supervise.
+
+ >>> t = catalog.tokenizeQuery
+ >>> res = list(catalog.findRelationTokens(t({None: orgs['Ynod Devs']})))
+
+OK...we used `findRelationTokens`, as opposed to `findRelations`, so res
+is a couple of numbers now. How do we convert them back?
+`resolveRelationTokens` will do the trick.
+
+ >>> len(res)
+ 3
+ >>> sorted(catalog.resolveRelationTokens(res))
+ ... # doctest: +NORMALIZE_WHITESPACE
+ [<Organization instance "Bet Proj">, <Organization instance "Y3L4 Proj">,
+ <Organization instance "Ynod Devs">]
+
+`resolveQuery` is the mirror image of `tokenizeQuery`: it converts
+tokenized queries to queries with "loaded" values.
+
+ >>> original = {'part': zc.relation.any(Zookd_SAs_id, Zookd_Devs_id),
+ ... None: orgs['Zookd Devs']}
+ >>> tokenized = catalog.tokenizeQuery(original)
+ >>> original == catalog.resolveQuery(tokenized)
+ True
+
+Likewise, `tokenizeRelations` is the mirror image of `resolveRelationTokens`.
+
+ >>> sorted(catalog.tokenizeRelations(
+ ... [orgs["Bet Proj"], orgs["Y3L4 Proj"]])) == sorted(
+ ... registry.getId(o) for o in
+ ... [orgs["Bet Proj"], orgs["Y3L4 Proj"]])
+ True
+
+The other token-related methods are as follows:
+
+- `tokenizeValues`, which returns an iterable of tokens for the values
+ of the given index name;
+- `resolveValueTokens`, which returns an iterable of values for the tokens of
+ the given index name;
+- `tokenizeRelation`, which returns a token for the given relation; and
+- `resolveRelationToken`, which returns a relation for the given token.
+
+Why do we bother with these tokens, instead of hiding them away and making
+the API prettier? [#show_remaining_token_methods]_ By exposing them, we
+enable efficient joining, and efficient use in other contexts. For instance,
+if you use the same intid utility to tokenize in other catalogs, our results
+can be merged with the results of other catalogs. Similarly, you can use
+the results of queries to other catalogs--or even "joins" from earlier
+results of querying this catalog--as query values here.
+
+Custom Transitive Query Factories and Transitive Indexes: Roles
+===============================================================
+
+We have set up the Organization relations. Now let's set up the roles, and
+actually be able to answer the questions that we described at the beginning
+of the document.
+
+In our Roles object, roles and principals will simply be strings--ids, if
+this were a real system. The organization will be a direct object reference.
+
+ >>> class Roles(persistent.Persistent):
+ ... zope.interface.implements(IRoles)
+ ... def __init__(self, principal_id, role_ids, organization):
+ ... self.principal_id = principal_id
+ ... self.role_ids = BTrees.family32.OI.TreeSet(role_ids)
+ ... self.organization = organization
+ ... # the rest is for prettier/easier tests
+ ... def __repr__(self):
+ ... return "<Roles instance (%s has %s in %s)>" % (
+ ... self.principal_id, ', '.join(self.role_ids),
+ ... self.organization.title)
+ ... def __cmp__(self, other):
+ ... return cmp(
+ ... (self.principal_id, tuple(self.role_ids),
+ ... self.organization.title),
+ ... (other.principal_id, tuple(other.role_ids),
+ ... other.organization.title))
+ ...
+
+Now let's add add the value indexes to the relation catalog.
+
+ >>> catalog.addValueIndex(IRoles['principal_id'], btree=BTrees.family32.OI)
+ >>> catalog.addValueIndex(IRoles['role_ids'], btree=BTrees.family32.OI,
+ ... multiple=True, name='role_id')
+ >>> catalog.addValueIndex(IRoles['organization'], dump, load)
+
+Those are some slightly new variations of what we've seen in `addValueIndex`
+before, but all mixing and matching on the same ingredients.
+
+As a reminder, here is our data structure::
+
+ Ynod Corp Mangement Zookd Corp Management
+ / | \ / | \
+ Ynod Devs Ynod SAs Ynod Admins Zookd Admins Zookd SAs Zookd Devs
+ / \ \ / / \
+Y3L4 Proj Bet Proj Ynod Zookd Task Force Zookd hOgnmd Zookd Nbd
+
+Now let's create and add some roles.
+
+ >>> principal_ids = [
+ ... 'abe', 'bran', 'cathy', 'david', 'edgar', 'frank', 'gertrude',
+ ... 'harriet', 'ignas', 'jacob', 'karyn', 'lettie', 'molly', 'nancy',
+ ... 'ophelia', 'pat']
+ >>> role_ids = ['user manager', 'writer', 'reviewer', 'publisher']
+ >>> get_role = dict((v[0], v) for v in role_ids).__getitem__
+ >>> roles = root['roles'] = BTrees.family32.IO.BTree()
+ >>> next = 0
+ >>> for prin, org, role_ids in (
+ ... ('abe', orgs['Zookd Corp Management'], 'uwrp'),
+ ... ('bran', orgs['Ynod Corp Management'], 'uwrp'),
+ ... ('cathy', orgs['Ynod Devs'], 'w'),
+ ... ('cathy', orgs['Y3L4 Proj'], 'r'),
+ ... ('david', orgs['Bet Proj'], 'wrp'),
+ ... ('edgar', orgs['Ynod Devs'], 'up'),
+ ... ('frank', orgs['Ynod SAs'], 'uwrp'),
+ ... ('frank', orgs['Ynod Admins'], 'w'),
+ ... ('gertrude', orgs['Ynod Zookd Task Force'], 'uwrp'),
+ ... ('harriet', orgs['Ynod Zookd Task Force'], 'w'),
+ ... ('harriet', orgs['Ynod Admins'], 'r'),
+ ... ('ignas', orgs['Zookd Admins'], 'r'),
+ ... ('ignas', orgs['Zookd Corp Management'], 'w'),
+ ... ('karyn', orgs['Zookd Corp Management'], 'uwrp'),
+ ... ('karyn', orgs['Ynod Corp Management'], 'uwrp'),
+ ... ('lettie', orgs['Zookd Corp Management'], 'u'),
+ ... ('lettie', orgs['Ynod Zookd Task Force'], 'w'),
+ ... ('lettie', orgs['Zookd SAs'], 'w'),
+ ... ('molly', orgs['Zookd SAs'], 'uwrp'),
+ ... ('nancy', orgs['Zookd Devs'], 'wrp'),
+ ... ('nancy', orgs['Zookd hOgnmd'], 'u'),
+ ... ('ophelia', orgs['Zookd Corp Management'], 'w'),
+ ... ('ophelia', orgs['Zookd Devs'], 'r'),
+ ... ('ophelia', orgs['Zookd Nbd'], 'p'),
+ ... ('pat', orgs['Zookd Nbd'], 'wrp')):
+ ... assert prin in principal_ids
+ ... role_ids = [get_role(l) for l in role_ids]
+ ... role = roles[next] = Roles(prin, role_ids, org)
+ ... role.key = next
+ ... next += 1
+ ... catalog.index(role)
+ ...
+
+Now we can begin to do searches [#real_value_tokens]_. They are not
+transitive yet--we have not yet defined how to deal with these
+transitively, and the default transitive queries factory doesn't know
+how to interpret them.
+
+What are all the role settings for ophelia?
+
+ >>> sorted(catalog.findRelations({'principal_id': 'ophelia'}))
+ ... # doctest: +NORMALIZE_WHITESPACE
+ [<Roles instance (ophelia has publisher in Zookd Nbd)>,
+ <Roles instance (ophelia has reviewer in Zookd Devs)>,
+ <Roles instance (ophelia has writer in Zookd Corp Management)>]
+
+That answer does not need to be transitive.
+
+Next question. Where does ophelia have the 'writer' role?
+
+ >>> list(catalog.findValues(
+ ... 'organization', {'principal_id': 'ophelia',
+ ... 'role_id': 'writer'}))
+ [<Organization instance "Zookd Corp Management">]
+
+Well, that's correct intransitively. Do we need a transitive queries
+factory? No! This is a great chance to look at the token join we talked
+about in the previous section. This should actually be a two-step
+operation: find all of the organizations in which ophelia has writer,
+and then find all of the transitive parts to that organization.
+
+ >>> sorted(catalog.findRelations({None: zc.relation.Any(
+ ... catalog.findValueTokens('organization',
+ ... {'principal_id': 'ophelia',
+ ... 'role_id': 'writer'}))}))
+ ... # doctest: +NORMALIZE_WHITESPACE
+ [<Organization instance "Ynod Zookd Task Force">,
+ <Organization instance "Zookd Admins">,
+ <Organization instance "Zookd Corp Management">,
+ <Organization instance "Zookd Devs">,
+ <Organization instance "Zookd Nbd">,
+ <Organization instance "Zookd SAs">,
+ <Organization instance "Zookd hOgnmd">]
+
+That's more like it.
+
+Next question. What users have roles in the 'Zookd Devs' organization?
+Intransitively, that's pretty easy.
+
+ >>> sorted(catalog.findValueTokens(
+ ... 'principal_id', t({'organization': orgs['Zookd Devs']})))
+ ['nancy', 'ophelia']
+
+Transitively, we should do another join.
+
+ >>> org_id = registry.getId(orgs['Zookd Devs'])
+ >>> sorted(catalog.findValueTokens(
+ ... 'principal_id', {
+ ... 'organization': zc.relation.any(
+ ... org_id, *catalog.findRelationTokens({'part': org_id}))}))
+ ['abe', 'ignas', 'karyn', 'lettie', 'nancy', 'ophelia']
+
+That's a little awkward, but it does the trick.
+
+Last question, and the kind of question that started the entire example.
+ What roles does ophelia have in the "Zookd Nbd" organization?
+
+ >>> list(catalog.findValueTokens(
+ ... 'role_id', t({'organization': orgs['Zookd Nbd'],
+ ... 'principal_id': 'ophelia'})))
+ ['publisher']
+
+Intransitively, that's correct. But, transitively, ophelia also has
+reviewer and writer, and that's the answer we want to be able to get quickly.
+
+We could ask the question a different way, then, again leveraging a join.
+
+ >>> org_id = registry.getId(orgs['Zookd Nbd'])
+ >>> sorted(catalog.findValueTokens(
+ ... 'role_id', {
+ ... 'organization': zc.relation.any(
+ ... org_id,
+ ... *catalog.findRelationTokens({'part': org_id})),
+ ... 'principal_id': 'ophelia'}))
+ ['publisher', 'reviewer', 'writer']
+
+It has the same annoyance factor as the previous join, but, again, it
+does the trick. Let's make a function that makes that call so we can
+compare its results with some others.
+
+ >>> def getRolesInOrganization(principal_id, org):
+ ... org_id = registry.getId(org)
+ ... return sorted(catalog.findValueTokens(
+ ... 'role_id', {
+ ... 'organization': zc.relation.any(
+ ... org_id,
+ ... *catalog.findRelationTokens({'part': org_id})),
+ ... 'principal_id': principal_id}))
+ ...
+ >>> getRolesInOrganization('ophelia', orgs['Zookd Nbd'])
+ ['publisher', 'reviewer', 'writer']
+
+But what if we want to define a custom transitive queries factory to
+make the query transitive in the way we expect?
+
+A transitive query factory is a callable that takes four arguments: a
+chain of relations, a query, the catalog, and a dict cache. The last
+token in the relation chain is the most recent. The query is the one
+that *started* the search (with some exceptions for transitive indexes
+that we'll describe later). The output is expected to be an iterable of
+queries to search further from the given chain of relations.
+
+Here's a flawed approach to this problem.
+
+ >>> def flawed_factory(relchain, query, catalog, cache):
+ ... if (len(query) != 2 or
+ ... 'organization' not in query or
+ ... 'principal_id' not in query):
+ ... return
+ ... current = catalog.findValueTokens( # don't mutate this result!!
+ ... 'organization', {None: relchain[-1]}, 1)
+ ... if current:
+ ... organizations = catalog.findRelationTokens(
+ ... {'part': zc.relation.Any(current)})
+ ... query['organization'] = zc.relation.Any(organizations)
+ ... yield query
+ ...
+
+That works for our current example.
+
+ >>> sorted(catalog.findValueTokens(
+ ... 'role_id', t({'organization': orgs['Zookd Nbd'],
+ ... 'principal_id': 'ophelia'}),
+ ... transitiveQueriesFactory=flawed_factory))
+ ['publisher', 'reviewer', 'writer']
+
+However, it won't work for other similar queries.
+
+ >>> getRolesInOrganization('abe', orgs['Zookd Nbd'])
+ ['publisher', 'reviewer', 'user manager', 'writer']
+ >>> sorted(catalog.findValueTokens(
+ ... 'role_id', t({'organization': orgs['Zookd Nbd'],
+ ... 'principal_id': 'abe'}),
+ ... transitiveQueriesFactory=flawed_factory))
+ []
+
+oops.
+
+The flawed_factory is actually a useful pattern for more typical relation
+traversal. It goes from relation to relation to relation, and ophelia has
+connected relations all the way to the top. However, abe only has them at
+the top, so nothing is traversed.
+
+We only need a slight change, then. We need to be able to traverse
+"through thin air".
+
+We need a new transitive queries factory, and we'll index it as well.
+
+First, we want to define our transitive query factory. As described in the
+introduction to the problem, the question to ask is to find
+the roles a user has in one organization and above. Because of the
+arrangement of these relationships, this is not a simple transposition of
+value names, as we saw in the README example (and as is common).
+
+We want a search that, given an organization and a principal_id, searches
+up for the same principal_id in parent organizations. As a new wrinkle,
+we need to be able to search up parent organizations even if there is not a
+match for a given organization.
+
+ ##>>> def transitiveFactory(relchain, query, index, cache):
+
+
+XXX make default transitive queries factory able to be multiple
+
+XXX indexing funky transitive query factory will not update correctly
+ because the search charcteristics are different than the update
+ characteristics
+
+
+
+.. ......... ..
+.. Footnotes ..
+.. ......... ..
+
+.. [#ZODB] Here we will set up a ZODB instance for us to use.
+
+ >>> from ZODB.tests.util import DB
+ >>> db = DB()
+ >>> conn = db.open()
+ >>> root = conn.root()
+
+.. [#faux_intid] Here's a simple persistent keyreference. Notice that it is
+ not persistent itself: this is important for conflict resolution to be
+ able to work (which we don't show here, but we're trying to lean more
+ towards real usage for this example).
+
+ >>> class Reference(object): # see zope.app.keyreference
+ ... def __init__(self, obj):
+ ... self.object = obj
+ ... def __cmp__(self, other):
+ ... # this doesn't work during conflict resolution. See
+ ... # zope.app.keyreference.persistent, 3.5 release, for current
+ ... # best practice.
+ ... if not isinstance(other, Reference):
+ ... raise ValueError('can only compare with Reference objects')
+ ... if self.object._p_jar is None or other.object._p_jar is None:
+ ... raise ValueError(
+ ... 'can only compare when both objects have connections')
+ ... return cmp(
+ ... (self.object._p_jar.db().database_name, self.object._p_oid),
+ ... (other.object._p_jar.db().database_name, other.object._p_oid),
+ ... )
+ ...
+
+ Here's a simple integer identifier tool.
+
+ >>> import persistent
+ >>> import BTrees
+ >>> class Registry(persistent.Persistent): # see zope.app.intid
+ ... def __init__(self, family=BTrees.family32):
+ ... self.family = family
+ ... self.ids = self.family.IO.BTree()
+ ... self.refs = self.family.OI.BTree()
+ ... def getId(self, obj):
+ ... if not isinstance(obj, persistent.Persistent):
+ ... raise ValueError('not a persistent object', obj)
+ ... if obj._p_jar is None:
+ ... self._p_jar.add(obj)
+ ... ref = Reference(obj)
+ ... id = self.refs.get(ref)
+ ... if id is None:
+ ... # naive for conflict resolution; see zope.app.intid
+ ... if self.ids:
+ ... id = self.ids.maxKey() + 1
+ ... else:
+ ... id = self.family.minint
+ ... self.ids[id] = ref
+ ... self.refs[ref] = id
+ ... return id
+ ... def __contains__(self, obj):
+ ... if (not isinstance(obj, persistent.Persistent) or
+ ... obj._p_oid is None):
+ ... return False
+ ... return Reference(obj) in self.refs
+ ... def getObject(self, id, default=None):
+ ... res = self.ids.get(id, None)
+ ... if res is None:
+ ... return default
+ ... else:
+ ... return res.object
+ ... def remove(self, r):
+ ... if isinstance(r, (int, long)):
+ ... self.refs.pop(self.ids.pop(r))
+ ... elif (not isinstance(r, persistent.Persistent) or
+ ... r._p_oid is None):
+ ... raise LookupError(r)
+ ... else:
+ ... self.ids.pop(self.refs.pop(Reference(r)))
+ ...
+ >>> registry = root['registry'] = Registry()
+
+ >>> import transaction
+ >>> transaction.commit()
+
+.. [#show_remaining_token_methods] For what it's worth, here are some small
+ examples of the remaining token-related methods.
+
+ These two are the singular versions of `tokenizeRelations` and
+ `resolveRelationTokens`.
+
+ `tokenizeRelation` returns a token for the given relation.
+
+ >>> catalog.tokenizeRelation(orgs['Zookd Corp Management']) == (
+ ... registry.getId(orgs['Zookd Corp Management']))
+ True
+
+ `resolveRelationToken` returns a relation for the given token.
+
+ >>> catalog.resolveRelationToken(registry.getId(
+ ... orgs['Zookd Corp Management'])) is orgs['Zookd Corp Management']
+ True
+
+ The "values" ones are a bit lame to show now, since the only value
+ we have right now is not tokenized but used straight up. But here
+ goes, showing some fascinating no-ops.
+
+ `tokenizeValues`, returns an iterable of tokens for the values of
+ the given index name.
+
+ >>> list(catalog.tokenizeValues((1,2,3), 'part'))
+ [1, 2, 3]
+
+ `resolveValueTokens` returns an iterable of values for the tokens of
+ the given index name.
+
+ >>> list(catalog.resolveValueTokens((1,2,3), 'part'))
+ [1, 2, 3]
+
+.. [#real_value_tokens] We can also show the values token methods more
+ sanely now.
+
+ >>> original = sorted((orgs['Zookd Devs'], orgs['Ynod SAs']))
+ >>> tokens = list(catalog.tokenizeValues(original, 'organization'))
+ >>> original == sorted(catalog.resolveValueTokens(tokens, 'organization'))
+ True
Property changes on: Sandbox/poster/zc.relation/trunk/src/zc/relation/example.txt
___________________________________________________________________
Name: svn:eol-style
+ native
Added: Sandbox/poster/zc.relation/trunk/src/zc/relation/interfaces.py
===================================================================
--- Sandbox/poster/zc.relation/trunk/src/zc/relation/interfaces.py (rev 0)
+++ Sandbox/poster/zc.relation/trunk/src/zc/relation/interfaces.py 2007-08-03 01:34:35 UTC (rev 78553)
@@ -0,0 +1,332 @@
+##############################################################################
+#
+# Copyright (c) 2004 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+"""interfaces
+"""
+import zope.interface
+
+class ICircularRelationPath(zope.interface.Interface):
+ """A tuple that has a circular relation in the very final element of
+ the path."""
+
+ cycled = zope.interface.Attribute(
+ """a list of the searches needed to continue the cycle""")
+
+class IQueryFactory(zope.interface.Interface):
+ def __call__(query, catalog, cache):
+ """if query matches, return `getQueries` callable; else return None.
+
+ A getQueries callable receives a relchain. The last relation token in
+ relchain is the most recent, and if you are using search indexes may be
+ the only reliable one. Return an iterable of queries to search
+ further from given relchain.
+
+ IMPORTANT: the getQueries is first called with an empty tuple. This
+ shou normally yield the original query, but can yield one or more
+ arbitrary queries as desired.
+ """
+
+class IFilter(zope.interface.Interface):
+ def __call__(relchain, query, index, cache):
+ """return boolean: whether to accept the given relchain.
+ last relation token in relchain is the most recent.
+ query is original query that started the search.
+ Used for the filter and targetFilter arguments of the IIndex query
+ methods. Cache is a dictionary that will be used throughout a given
+ search."""
+
+class IMessageListener(zope.interface.Interface):
+
+ def relationAdded(token, catalog, additions):
+ """message: relation token has been added to catalog.
+
+ additions is a dictionary of {value name : iterable of added value
+ tokens}.
+ """
+
+ def relationModified(token, catalog, additions, removals):
+ """message: relation token has been updated in catalog.
+
+ additions is a dictionary of {value name : iterable of added value
+ tokens}.
+ removals is a dictionary of {value name : iterable of removed value
+ tokens}.
+ """
+
+ def relationRemoved(token, catalog, removals):
+ """message: relation token has been removed from catalog.
+
+ removals is a dictionary of {value name : iterable of removed value
+ tokens}.
+ """
+
+ def sourceCleared(catalog):
+ """message: the catalog has been cleared."""
+
+class IListener(IMessageListener):
+
+ def sourceAdded(catalog):
+ """message: you've been added as a listener to the given catalog."""
+
+ def sourceRemoved(catalog):
+ """message: you've been removed as a listener from the given catalog.
+ """
+
+class ISearchIndex(IMessageListener):
+
+ def copy(catalog=None):
+ """return a copy of this index, bound to provided catalog if given"""
+
+ def setCatalog(catalog):
+ """set the search index to be using the given catalog.
+
+ Should immediately being index up-to-date if catalog has content.
+
+ if index already has a catalog, raise an error.
+
+ If provided catalog is None, clear catalog and indexes.
+ """
+
+ def getResults(name, query, maxDepth, filter, targetQuery,
+ targetFilter, queryFactory):
+ """return results for search if available, and None if not
+
+ Returning a non-None value means that this search index claims the
+ search. No other search indexes will be consulted, and the given
+ results are believed to be accurate.
+ """
+
+class ICatalog(zope.interface.Interface):
+
+ family = zope.interface.Attribute(
+ """BTrees.family32 or BTrees.family64. Influences defaults.""")
+
+ def index(relation):
+ """obtains the token for the relation and indexes"""
+
+ def index_doc(token, relation):
+ """indexes relation as given token"""
+
+ def unindex(relation):
+ """obtains the token for the relation and unindexes"""
+
+ def unindex_doc(relation):
+ """unindexes relation for given token"""
+
+ def __contains__(relation):
+ """returns whether the relation is in the index"""
+
+ def __len__():
+ """returns number of relations in catalog."""
+
+ def __iter__():
+ """return iterator of relations in catalog"""
+
+ def clear():
+ """clean catalog to index no relations"""
+
+ def copy(klass=None):
+ """return a copy of index, using klass (__new__) if given."""
+
+ def addValueIndex(element, dump=None, load=None, btree=None,
+ multiple=False, name=None):
+ """add a value index for given element.
+
+ element may be interface element or callable. Here are the other
+ arguments.
+
+ - `dump`, the tokenizer is a callable taking (obj, index, cache)
+ and returning a token. If it is None, that means that the value
+ is already a sufficient token.
+
+ - `load` is the token resolver, a callable taking (token, index, cache)
+ to return the object which the token represents. If it is None,
+ that means that the token is the value. If you specify None
+ for `dump` or `load`, you must also specify None for the other.
+
+ - `btree` is the btree module to use to store and process the tokens,
+ such as BTrees.OOBTree. Defaults to catalog.family.IFBTree.
+
+ - `multiple` is a boolean indicating whether the value is a
+ collection.
+
+ - `name` is the name of the index in the catalog. If this is not
+ supplied, the element's `__name__` is used.
+ """
+
+ def iterValueIndexInfo():
+ """return iterable of dicts, each with data for added value indexes.
+
+ See arguments to addValueIndex for keys in dicts."""
+
+ def removeValueIndex(name):
+ """remove value index of given name"""
+
+ def addListener(listener):
+ """add a listener.
+
+ Listener is expected to fulfill IListener.
+
+ If listener is Persistent, make a weak reference to it."""
+
+ def iterListeners():
+ """return iterator of all available listeners"""
+
+ def removeListener(listener):
+ """remove listener"""
+
+ def addDefaultQueryFactory(factory):
+ """add a default query factory."""
+
+ def iterDefaultQueryFactories():
+ """return iterator of all available factories"""
+
+ def removeDefaultQueryFactory(factory):
+ """remove factory"""
+
+ def addSearchIndex(ix):
+ """add a search index"""
+
+ def iterSearchIndexes():
+ """return iterator of all search indexes"""
+
+ def removeSearchIndex(ix):
+ """remove search index"""
+
+ def getRelationModuleTools():
+ """return dict with useful BTree tools.
+
+ keys will include 'BTree', 'Bucket', 'Set', 'TreeSet', 'difference',
+ 'dump', 'intersection', 'load', and 'union'. may also include
+ 'multiunion'.
+ """
+
+ def getValueModuleTools(name):
+ """return dict with useful BTree tools for named value index.
+
+ keys will include 'BTree', 'Bucket', 'Set', 'TreeSet', 'difference',
+ 'dump', 'intersection', 'load', and 'union'. may also include
+ 'multiunion' and other keys for internal use.
+ """
+
+ def getRelationTokens(query=None):
+ """return tokens for given intransitive query, or all relation tokens.
+
+ Returns a None if no Tokens for query.
+
+ This also happens to be equivalent to `findRelationTokens` with
+ a maxDepth of 1, and no other arguments other than the optional
+ query, except that if there are no matches, `findRelationTokens`
+ returns an empty set (so it *always* returns an iterable). """
+
+ def getValueTokens(name, reltoken=None):
+ """return value tokens for name, limited to relation token if given.
+
+ returns a none if no tokens.
+
+ This is identical to `findValueTokens`except that if there are
+ no matches, `findValueTokens` returns an empty set (so it
+ *always* returns an iterable) """
+
+ def yieldRelationTokenChains(query, relData, maxDepth, checkFilter,
+ checkTargetFilter, getQueries,
+ findCycles=True):
+ """a search workhorse for searches that use a query factory
+
+ TODO: explain. :-/"""
+
+ def findValueTokens(name, query=None, maxDepth=None, filter=None,
+ targetQuery=None, targetFilter=None,
+ transitiveQueriesFactory=None):
+ """find token results for searchTerms.
+ - name is the index name wanted for results.
+ - if query is None (or evaluates to boolean False), returns the
+ underlying btree data structure; which is an iterable result but
+ can also be used with BTree operations
+ Otherwise, same arguments as findRelationChains.
+ """
+
+ def findValues(name, query=None, maxDepth=None, filter=None,
+ targetQuery=None, targetFilter=None,
+ transitiveQueriesFactory=None):
+ """Like findValueTokens, but resolves value tokens"""
+
+ def findRelations(query): # XXX
+ """Given a single dictionary of {indexName: token}, return an iterable
+ of relations that match the query intransitively"""
+
+ def findRelationTokenChains(query, maxDepth=None, filter=None,
+ targetQuery=None, targetFilter=None,
+ transitiveQueriesFactory=None):
+ """find tuples of relation tokens for searchTerms.
+ - query is a dictionary of {indexName: token}
+ - maxDepth is None or a positive integer that specifies maximum depth
+ for transitive results. None means that the transitiveMap will be
+ followed until a cycle is detected. It is a ValueError to provide a
+ non-None depth if transitiveQueriesFactory is None and
+ index.defaultTransitiveQueriesFactory is None.
+ - filter is a an optional callable providing IFilter that determines
+ whether relations will be traversed at all.
+ - targetQuery is an optional query that specifies that only paths with
+ final relations that match the targetQuery should be returned.
+ It represents a useful subset of the jobs that can be done with the
+ targetFilter.
+ - targetFilter is an optional callable providing IFilter that
+ determines whether a given path will be included in results (it will
+ still be traversed)
+ - optional transitiveQueriesFactory takes the place of the index's
+ defaultTransitiveQueriesFactory
+ """
+
+ def findRelationChains(query, maxDepth=None, filter=None,
+ targetQuery=None, targetFilter=None,
+ transitiveQueriesFactory=None):
+ "Like findRelationTokenChains, but resolves relation tokens"
+
+ def canFind(query, maxDepth=None, filter=None, targetQuery=None,
+ targetFilter=None, transitiveQueriesFactory=None):
+ """boolean if there is any result for the given search.
+
+ Same arguments as findRelationChains.
+
+ The general algorithm for using the arguments is this:
+ try to yield a single chain from findRelationTokenChains with the
+ given arguments. If one can be found, return True, else False."""
+
+ def tokenizeQuery(query):
+ '''Given a dictionary of {indexName: value} returns a dictionary of
+ {indexname: token} appropriate for the search methods'''
+
+ def resolveQuery(query):
+ '''Given a dictionary of {indexName: token} returns a dictionary of
+ {indexname: value}'''
+
+ def tokenizeValues(values, name):
+ """Returns an iterable of tokens for the values of the given index
+ name"""
+
+ def resolveValueTokens(tokens, name):
+ """Returns an iterable of values for the tokens of the given index
+ name"""
+
+ def tokenizeRelation(rel):
+ """Returns a token for the given relation"""
+
+ def resolveRelationToken(token):
+ """Returns a relation for the given token"""
+
+ def tokenizeRelations(rels):
+ """Returns an iterable of tokens for the relations given"""
+
+ def resolveRelationTokens(tokens):
+ """Returns an iterable of relations for the tokens given"""
Added: Sandbox/poster/zc.relation/trunk/src/zc/relation/optimization.txt
===================================================================
--- Sandbox/poster/zc.relation/trunk/src/zc/relation/optimization.txt (rev 0)
+++ Sandbox/poster/zc.relation/trunk/src/zc/relation/optimization.txt 2007-08-03 01:34:35 UTC (rev 78553)
@@ -0,0 +1,178 @@
+Optimizing Relation Catalog Use
+===============================
+
+There are several best practices and optimization opportunities in regards to
+the catalog.
+
+- Use integer-keyed BTree sets when possible. They can use the BTrees'
+ `multiunion` for a speed boost. Integers' __cmp__ is reliable, and in C.
+
+- Never use persistent objects as keys. They will cause a database load every
+ time you need to look at them, they take up memory and object caches, and
+ they (as of this writing) disable conflict resolution. Intids (or similar)
+ are your best bet for representing objects, and some other immutable such as
+ strings are the next-best bet, and zope.app.keyreferences (or similar) are
+ after that.
+
+- Use multiple-token values in your queries when possible, especially in your
+ transitive query factories.
+
+- Use the cache when you are loading and dumping tokens, and in your
+ transitive query factories.
+
+- When possible, don't load or dump tokens (the values themselves may be used
+ as tokens). This is especially important when you have multiple tokens:
+ store them in a BTree structure in the same module as the zc.relation module
+ for the value.
+
+For some operations, particularly with hundreds or thousands of members in a
+single relation value, some of these optimizations can speed up some
+common-case reindexing work by around 100 times.
+
+The easiest (and perhaps least useful) optimization is that all dump
+calls and all load calls generated by a single operation share a cache
+dictionary per call type (dump/load), per indexed relation value.
+Therefore, for instance, we could stash an intids utility, so that we
+only had to do a utility lookup once, and thereafter it was only a
+single dictionary lookup. This is what the default `generateToken` and
+`resolveToken` functions in zc.relationship's index.py do: look at them
+for an example.
+
+A further optimization is to not load or dump tokens at all, but use values
+that may be tokens. This will be particularly useful if the tokens have
+__cmp__ (or equivalent) in C, such as built-in types like ints. To specify
+this behavior, you create an index with the 'load' and 'dump' values for the
+indexed attribute descriptions explicitly set to None.
+
+
+ >>> import zope.interface
+ >>> class IRelation(zope.interface.Interface):
+ ... subjects = zope.interface.Attribute(
+ ... 'The sources of the relation; the subject of the sentence')
+ ... relationtype = zope.interface.Attribute(
+ ... '''unicode: the single relation type of this relation;
+ ... usually contains the verb of the sentence.''')
+ ... objects = zope.interface.Attribute(
+ ... '''the targets of the relation; usually a direct or
+ ... indirect object in the sentence''')
+ ...
+
+ >>> import BTrees
+ >>> relations = BTrees.family32.IO.BTree()
+ >>> relations[99] = None # just to give us a start
+
+ >>> class Relation(object):
+ ... zope.interface.implements(IRelation)
+ ... def __init__(self, subjects, relationtype, objects):
+ ... self.subjects = subjects
+ ... assert relationtype in relTypes
+ ... self.relationtype = relationtype
+ ... self.objects = objects
+ ... self.id = relations.maxKey() + 1
+ ... relations[self.id] = self
+ ... def __repr__(self):
+ ... return '<%r %s %r>' % (
+ ... self.subjects, self.relationtype, self.objects)
+
+ >>> def token(rel, self):
+ ... return rel.token
+ ...
+ >>> def children(rel, self):
+ ... return rel.children
+ ...
+ >>> def dumpRelation(obj, index, cache):
+ ... return obj.id
+ ...
+ >>> def loadRelation(token, index, cache):
+ ... return relations[token]
+ ...
+
+ >>> relTypes = ['has the role of']
+ >>> def relTypeDump(obj, index, cache):
+ ... assert obj in relTypes, 'unknown relationtype'
+ ... return obj
+ ...
+ >>> def relTypeLoad(token, index, cache):
+ ... assert token in relTypes, 'unknown relationtype'
+ ... return token
+ ...
+
+ >>> import zc.relation
+ >>> factory = zc.relation.TransposingTransitiveQueriesFactory(
+ ... 'subjects', 'objects')
+ >>> catalog = zc.relation.Catalog(
+ ... dumpRelation, loadRelation, factory)
+ >>> catalog.addValueIndex(IRelation['subjects'], multiple=True)
+ >>> catalog.addValueIndex(
+ ... IRelation['relationtype'], relTypeDump, relTypeLoad,
+ ... BTrees.family32.OI, name='reltype')
+ >>> catalog.addValueIndex(IRelation['objects'], multiple=True)
+
+ >>> rel = Relation((1,), 'has the role of', (2,))
+ >>> catalog.index(rel)
+ >>> list(catalog.findValueTokens('objects', {'subjects': 1}))
+ [2]
+
+If you have single relations that relate hundreds or thousands of
+objects, it can be a huge win if the value is a 'multiple' of the same
+type as the stored BTree for the given attribute. The default BTree
+family for attributes is IFBTree; IOBTree is also a good choice, and may
+be preferrable for some applications.
+
+ >>> catalog.unindex(rel)
+ >>> rel = Relation(
+ ... BTrees.family32.IF.TreeSet((1,)), 'has the role of',
+ ... BTrees.family32.IF.TreeSet())
+ >>> catalog.index(rel)
+ >>> list(catalog.findValueTokens('objects', {'subjects': 1}))
+ []
+ >>> list(catalog.findValueTokens('subjects', {'objects': None}))
+ [1]
+
+Reindexing is where some of the big improvements can happen. The following
+gyrations exercise the optimization code.
+
+ >>> rel.objects.insert(2)
+ 1
+ >>> catalog.index(rel)
+ >>> list(catalog.findValueTokens('objects', {'subjects': 1}))
+ [2]
+ >>> rel.subjects = BTrees.family32.IF.TreeSet((3,4,5))
+ >>> catalog.index(rel)
+ >>> list(catalog.findValueTokens('objects', {'subjects': 3}))
+ [2]
+
+ >>> rel.subjects.insert(6)
+ 1
+ >>> catalog.index(rel)
+ >>> list(catalog.findValueTokens('objects', {'subjects': 6}))
+ [2]
+
+ >>> rel.subjects.update(range(100, 200))
+ 100
+ >>> catalog.index(rel)
+ >>> list(catalog.findValueTokens('objects', {'subjects': 100}))
+ [2]
+
+ >>> rel.subjects = BTrees.family32.IF.TreeSet((3,4,5,6))
+ >>> catalog.index(rel)
+ >>> list(catalog.findValueTokens('objects', {'subjects': 3}))
+ [2]
+
+ >>> rel.subjects = BTrees.family32.IF.TreeSet(())
+ >>> catalog.index(rel)
+ >>> list(catalog.findValueTokens('objects', {'subjects': 3}))
+ []
+
+ >>> rel.subjects = BTrees.family32.IF.TreeSet((3,4,5))
+ >>> catalog.index(rel)
+ >>> list(catalog.findValueTokens('objects', {'subjects': 3}))
+ [2]
+
+tokenizeValues and resolveValueTokens work correctly without loaders and
+dumpers--that is, they do nothing.
+
+ >>> catalog.tokenizeValues((3,4,5), 'subjects')
+ (3, 4, 5)
+ >>> catalog.resolveValueTokens((3,4,5), 'subjects')
+ (3, 4, 5)
Property changes on: Sandbox/poster/zc.relation/trunk/src/zc/relation/optimization.txt
___________________________________________________________________
Name: svn:eol-style
+ native
Added: Sandbox/poster/zc.relation/trunk/src/zc/relation/queryfactory.py
===================================================================
--- Sandbox/poster/zc.relation/trunk/src/zc/relation/queryfactory.py (rev 0)
+++ Sandbox/poster/zc.relation/trunk/src/zc/relation/queryfactory.py 2007-08-03 01:34:35 UTC (rev 78553)
@@ -0,0 +1,56 @@
+import persistent
+import BTrees
+import zope.interface
+
+import zc.relation.interfaces
+import zc.relation.catalog
+
+##############################################################################
+# a common case transitive queries factory
+
+_marker = object()
+
+class TransposingTransitive(persistent.Persistent):
+ zope.interface.implements(zc.relation.interfaces.IQueryFactory)
+
+ def __init__(self, name1, name2):
+ self.names = [name1, name2] # a list so we can use index
+
+ def __call__(self, query, catalog):
+ static = []
+ name = other = _marker
+ for nm, val in query.items():
+ try:
+ ix = self.names.index(nm)
+ except ValueError:
+ static.append((nm, val))
+ else:
+ if name is not _marker:
+ # both were specified: no transitive search known.
+ return None
+ else:
+ name = nm
+ other = self.names[not ix]
+ if name is not _marker:
+ def getQueries(relchain):
+ if not relchain:
+ yield query
+ return
+ if other is None:
+ rels = relchain[-1]
+ else:
+ tokens = catalog.getValueTokens(other, relchain[-1])
+ if not tokens:
+ return
+ rels = zc.relation.catalog.Any(tokens)
+ res = BTrees.family32.OO.Bucket(static)
+ res[name] = rels
+ yield res
+ return getQueries
+
+ def __eq__(self, other):
+ return (isinstance(other, self.__class__) and
+ set(self.names) == set(other.names))
+
+ def __ne__(self, other):
+ return not self.__eq__(other)
Added: Sandbox/poster/zc.relation/trunk/src/zc/relation/searchindex.py
===================================================================
--- Sandbox/poster/zc.relation/trunk/src/zc/relation/searchindex.py (rev 0)
+++ Sandbox/poster/zc.relation/trunk/src/zc/relation/searchindex.py 2007-08-03 01:34:35 UTC (rev 78553)
@@ -0,0 +1,439 @@
+import copy
+import itertools
+
+import persistent
+import BTrees
+import zope.interface
+
+import zc.relation.interfaces
+import zc.relation.queryfactory
+import zc.relation.catalog
+import zc.relation.searchindex
+
+##############################################################################
+# common case search indexes
+
+_marker = object()
+
+class TransposingTransitive(persistent.Persistent):
+ zope.interface.implements(zc.relation.interfaces.ISearchIndex)
+
+ name = index = catalog = None
+
+ def __init__(self, forward, reverse, static=(), names=()):
+ # normalize
+ if getattr(static, 'items', None) is not None:
+ static = static.items()
+ self.static = tuple(sorted(static))
+ self.names = BTrees.family32.OO.Bucket([(nm, None) for nm in names])
+ self.forward = forward
+ self.reverse = reverse
+ self.update = frozenset(
+ itertools.chain((k for k, v in static), (forward, reverse)))
+ match = list(static)
+ match.append((forward, _marker))
+ match.sort()
+ self._match = tuple(match)
+ match = list(static)
+ match.append((reverse, _marker))
+ match.sort()
+ self._reverse_match = tuple(match)
+ self.factory = zc.relation.queryfactory.TransposingTransitive(
+ forward, reverse)
+
+ def copy(self, catalog=None):
+ new = self.__class__.__new__(self.__class__)
+ new.names = BTrees.family32.OO.Bucket()
+ for nm, val in self.names.items():
+ if val is not None:
+ new_val = zc.relation.catalog.getMapping(
+ self.catalog.getValueModuleTools(nm))()
+ for k, v in val.items():
+ new_val[k] = copy.copy(v)
+ val = new_val
+ new.names[nm] = val
+ new.forward = self.forward
+ new.reverse = self.reverse
+ new.factory = self.factory
+ new.static = self.static
+ new._match = self._match
+ new._reverse_match = self._reverse_match
+ if self.index is not None:
+ if catalog is None:
+ catalog = self.catalog
+ new.catalog = catalog
+ new.index = zc.relation.catalog.getMapping(
+ self.catalog.getRelationModuleTools())()
+ for k, v in self.index.items():
+ new.index[k] = copy.copy(v)
+ new.factory = self.factory
+ return new
+
+ def setCatalog(self, catalog):
+ if catalog is None:
+ self.index = self.catalog = None
+ return
+ elif self.catalog is not None:
+ raise ValueError('catalog already set')
+ self.catalog = catalog
+ self.index = zc.relation.catalog.getMapping(
+ self.catalog.getRelationModuleTools())()
+ for nm in self.names.keys():
+ self.names[nm] = zc.relation.catalog.getMapping(
+ self.catalog.getValueModuleTools(nm))()
+ for token in catalog.getRelationTokens():
+ if token not in self.index:
+ self._index(token)
+
+ def _buildQuery(self, dynamic):
+ res = BTrees.family32.OO.Bucket(self.static)
+ res[dynamic] = None
+ return res
+
+ def _index(self, token, removals=None, remove=False):
+ starts = set((token,))
+ if removals and self.forward in removals:
+ starts.update(t for t in removals[self.forward] if t is not None)
+ tokens = set()
+ reverseQuery = self._buildQuery(self.reverse)
+ for token in starts:
+ getQueries = self.factory(dict(reverseQuery), self.catalog)
+ tokens.update(chain[-1] for chain in
+ self.catalog.yieldRelationTokenChains(
+ reverseQuery, ((token,),), None, None, None,
+ getQueries))
+ if remove:
+ tokens.remove(token)
+ self.index.pop(token, None)
+ for ix in self.names.values():
+ ix.pop(token, None)
+ # because of the possibilty of cycles involving this token in the
+ # previous state, we first clean out all of the items "above"
+ for token in tokens:
+ self.index.pop(token, None)
+ # now we go back and try to fill them back in again. If there had
+ # been a cycle, we can see now that we have to work down.
+ relTools = self.catalog.getRelationModuleTools()
+ query = self._buildQuery(self.forward)
+ getQueries = self.factory(query, self.catalog)
+ for token in tokens:
+ if token in self.index: # must have filled it in during a cycle
+ continue
+ stack = [[token, None, set(), [], set((token,)), False]]
+ while stack:
+ token, child, sets, empty, traversed_tokens, cycled = stack[-1]
+ if not sets:
+ rels = zc.relation.catalog.multiunion(
+ (self.catalog.getRelationTokens(q) for q in
+ getQueries([token])), relTools)
+ for rel in rels:
+ if rel == token:
+ # cycles on itself.
+ sets.add(relTools['Set']((token,)))
+ continue
+ indexed = self.index.get(rel)
+ if indexed is None:
+ iterator = reversed(stack)
+ traversed = [iterator.next()]
+ for info in iterator:
+ if rel == info[0]:
+ sets = info[2]
+ traversed_tokens = info[4]
+ cycled = True
+ for trav in traversed:
+ sets.update(trav[2])
+ trav[2] = sets
+ traversed_tokens.update(trav[4])
+ trav[4] = traversed_tokens
+ trav[5] = True
+ break
+ traversed.append(info)
+ else:
+ empty.append(rel)
+ else:
+ sets.add(indexed)
+ sets.add(rels)
+ if child is not None:
+ sets.add(child)
+ # clear it out
+ child = stack[-1][1] = None
+ if empty:
+ # We have one of two classes of situations. Either this
+ # *is* currently a cycle, and the result for this and all
+ # children will be the same set; or this *may* be
+ # a cycle, because this is an initial indexing.
+ # Walk down, passing token.
+ next = empty.pop()
+ stack.append(
+ [next, None, set(), [], set((next,)), False])
+ else:
+ stack.pop()
+ assert stack or not cycled, (
+ 'top level should never consider itself cycled')
+ if not cycled:
+ rels = zc.relation.catalog.multiunion(
+ sets, relTools)
+ rels.insert(token)
+ names = {}
+ for nm in self.names.keys():
+ names[nm] = zc.relation.catalog.multiunion(
+ (self.catalog.getValueTokens(nm, rel)
+ for rel in rels),
+ self.catalog.getValueModuleTools(nm))
+ for token in traversed_tokens:
+ self.index[token] = rels
+ for nm, ix in self.names.items():
+ ix[token] = names[nm]
+ if stack:
+ stack[-1][1] = rels
+
+ # listener interface
+
+ def relationAdded(self, token, catalog, additions):
+ if token in self.index and not self.update.intersection(additions):
+ return # no changes; don't do work
+ self._index(token)
+
+ def relationModified(self, token, catalog, additions, removals):
+ if (token in self.index and not self.update.intersection(additions) and
+ not self.update.intersection(removals)):
+ return # no changes; don't do work
+ self._index(token, removals)
+
+ def relationRemoved(self, token, catalog, removals):
+ self._index(token, removals, remove=True)
+
+ def sourceCleared(self, catalog):
+ if self.catalog is catalog:
+ self.setCatalog(None)
+ self.setCatalog(catalog)
+
+ # end listener interface
+
+ def getResults(self, name, query, maxDepth, filter, targetQuery,
+ targetFilter, queryFactory):
+ if (queryFactory != self.factory or
+ name is not None and name not in self.names or
+ maxDepth is not None or filter is not None or
+ len(query) != len(self.static) + 1 or
+ name is not None and (targetQuery or targetFilter is not None)):
+ return None
+ for given, match in itertools.izip(query.items(), self._match):
+ if (given[0] != match[0] or
+ match[1] is not _marker and given[1] != match[1]):
+ return None
+ # TODO: try to use intransitive index, if available
+ rels = self.catalog.getRelationTokens(query)
+ if name is None:
+ tools = self.catalog.getRelationModuleTools()
+ ix = self.index
+ else:
+ tools = self.catalog.getValueModuleTools(name)
+ ix = self.names[name]
+ if rels is None:
+ return tools['Set']()
+ elif not rels:
+ return rels
+ res = zc.relation.catalog.multiunion(
+ (ix.get(rel) for rel in rels), tools)
+ if name is None:
+ checkTargetFilter = zc.relation.catalog.makeCheckTargetFilter(
+ targetQuery, targetFilter, self.catalog)
+ if checkTargetFilter is not None:
+ if not checkTargetFilter: # no results
+ res = tools['Set']()
+ else:
+ res = tools['Set'](
+ rel for rel in res if checkTargetFilter([rel], query))
+ return res
+
+
+class Intransitive(persistent.Persistent):
+ zope.interface.implements(
+ zc.relation.interfaces.ISearchIndex,
+ zc.relation.interfaces.IListener)
+
+ index = catalog = name = queriesFactory = None
+ update = frozenset()
+
+ def __init__(self, names, name=None,
+ queriesFactory=None, getValueTokens=None, update=None):
+ self.names = tuple(sorted(names))
+ self.name = name
+ self.queriesFactory = queriesFactory
+ if update is None:
+ update = names
+ if name is not None:
+ update += (name,)
+ self.update = frozenset(update)
+ self.getValueTokens = getValueTokens
+
+
+ def copy(self, catalog=None):
+ res = self.__class__.__new__(self.__class__)
+ if self.index is not None:
+ if catalog is None:
+ catalog = self.catalog
+ res.catalog = catalog
+ res.index = BTrees.family32.OO.BTree()
+ for k, v in self.index.items():
+ res.index[k] = copy.copy(v)
+ res.names = self.names
+ res.name = self.name
+ res.queriesFactory = self.queriesFactory
+ res.update = self.update
+ res.getValueTokens = self.getValueTokens
+ return res
+
+ def setCatalog(self, catalog):
+ if catalog is None:
+ self.index = self.catalog = None
+ return
+ elif self.catalog is not None:
+ raise ValueError('catalog already set')
+ self.catalog = catalog
+ self.index = BTrees.family32.OO.BTree()
+ self.sourceAdded(catalog)
+
+ def relationAdded(self, token, catalog, additions):
+ self._index(token, catalog, additions)
+
+ def relationModified(self, token, catalog, additions, removals):
+ self._index(token, catalog, additions, removals)
+
+ def relationRemoved(self, token, catalog, removals):
+ self._index(token, catalog, removals=removals, removed=True)
+
+ def _index(self, token, catalog, additions=None, removals=None,
+ removed=False):
+ if ((not additions or not self.update.intersection(additions)) and
+ (not removals or not self.update.intersection(removals))):
+ return
+ if additions is None:
+ additions = {}
+ if removals is None:
+ removals = {}
+ for query in self.getQueries(token, catalog, additions, removals,
+ removed):
+ self._indexQuery(tuple(query.items()))
+
+ def _indexQuery(self, query):
+ dquery = dict(query)
+ if self.queriesFactory is not None:
+ getQueries = self.queriesFactory(dquery, self.catalog)
+ else:
+ getQueries = lambda empty: (query,)
+ res = zc.relation.catalog.multiunion(
+ (self.catalog.getRelationTokens(q) for q in getQueries(())),
+ self.catalog.getRelationModuleTools())
+ if not res:
+ self.index.pop(query, None)
+ else:
+ if self.name is not None:
+ res = zc.relation.catalog.multiunion(
+ (self.catalog.getValueTokens(self.name, r)
+ for r in res),
+ self.catalog.getValueModuleTools(self.name))
+ self.index[query] = res
+
+ def sourceAdded(self, catalog):
+ queries = set()
+ for token in catalog.getRelationTokens():
+ additions = dict(
+ (info['name'], catalog.getValueTokens(info['name'], token))
+ for info in catalog.iterValueIndexInfo())
+ queries.update(
+ tuple(q.items()) for q in
+ self.getQueries(token, catalog, additions, {}, False))
+ for q in queries:
+ self._indexQuery(q)
+
+ def sourceRemoved(self, catalog):
+ # this only really makes sense if the getQueries/getValueTokens was
+ # changed
+ queries = set()
+ for token in catalog.getRelationTokens():
+ removals = dict(
+ (info['name'], catalog.getValueTokens(info['name'], token))
+ for info in catalog.iterValueIndexInfo())
+ queries.update(
+ tuple(q.items()) for q in
+ self.getQueries(token, catalog, {}, removals, True))
+ for q in queries:
+ self._indexQuery(q)
+
+ def sourceCleared(self, catalog):
+ if self.catalog is catalog:
+ self.setCatalog(None)
+ self.setCatalog(catalog)
+
+ def getQueries(self, token, catalog, additions, removals, removed):
+ source = {}
+ for name in self.names:
+ values = set()
+ for changes in (additions, removals):
+ value = changes.get(name, _marker)
+ if value is None:
+ values.add(value)
+ elif value is not _marker:
+ values.update(value)
+ if values:
+ if not removed and source:
+ source.clear()
+ break
+ source[name] = values
+ if removed and not source:
+ return
+ for name in self.names:
+ res = None
+ if self.getValueTokens is not None:
+ res = self.getValueTokens(self, name, token, catalog, source,
+ additions, removals, removed)
+ if res is None:
+ if name in source:
+ continue
+ res = set((None,))
+ current = self.catalog.getValueTokens(name, token)
+ if current:
+ res.update(current)
+ source[name] = res
+ vals = []
+ for name in self.names:
+ src = source[name]
+ iterator = iter(src)
+ value = iterator.next() # should always have at least one
+ vals.append([name, value, iterator, src])
+ while 1:
+ yield BTrees.family32.OO.Bucket(
+ [(name, value) for name, value, iterator, src in vals])
+ for s in vals:
+ name, value, iterator, src = s
+ try:
+ s[1] = iterator.next()
+ except StopIteration:
+ iterator = s[2] = iter(src)
+ s[1] = iterator.next()
+ else:
+ break
+ else:
+ break
+
+ def getResults(self, name, query, maxDepth, filter, targetQuery,
+ targetFilter, queriesFactory):
+ if (name != self.name or maxDepth not in (1, None) or
+ queriesFactory != self.queriesFactory or targetQuery
+ or filter is not None or targetFilter is not None):
+ return # TODO could maybe handle some later
+ names = []
+ query = tuple(query.items())
+ for nm, v in query:
+ if isinstance(v, zc.relation.catalog.Any):
+ return None # TODO open up
+ names.append(nm)
+ res = self.index.get(query)
+ if res is None and self.names == tuple(names):
+ if self.name is None:
+ res = self.catalog.getRelationModuleTools()['Set']()
+ else:
+ res = self.catalog.getValueModuleTools(self.name)['Set']()
+ return res
Added: Sandbox/poster/zc.relation/trunk/src/zc/relation/searchindex.txt
===================================================================
--- Sandbox/poster/zc.relation/trunk/src/zc/relation/searchindex.txt (rev 0)
+++ Sandbox/poster/zc.relation/trunk/src/zc/relation/searchindex.txt 2007-08-03 01:34:35 UTC (rev 78553)
@@ -0,0 +1,466 @@
+=================================================================
+Working with Search Indexes: zc.relation Catalog Extended Example
+=================================================================
+
+.. contents::
+ :local:
+
+Introduction
+============
+
+This document assumes you have read the README.txt and the tokens.txt
+documents, and want to learn a bit more by example. In it, we will
+explore a set of relations that demonstrates most of the aspects of
+working with search indexes and listeners. It will not explain the
+topics that the other documents already addressed. It also describes
+an advanced use case.
+
+As we have seen in the other documents, the relation catalog supports
+search indexes. These can return the results of any search, as desired.
+ Of course, the intent is that you supply an index that optimizes the
+particular searches it claims.
+
+The searchindex module supplies a few search indexes, optimizing
+specified transitive and intransitive searches. We have seen them working
+in other docuements. We will examine them more in depth in this document.
+
+Search indexes update themselves by receiving messages via a "listener"
+interface. We will also look at how this works.
+
+The example described in this file examines a use case similar to that in
+the zc.revision or zc.vault packages: a relation describes a graph of
+other objects. Therefore, this is our first concrete example of purely
+extrinsic relations.
+
+Let's build the example story a bit. Imagine we have a graph, often a
+hierarchy, of tokens--integers. Relations specify that a given integer
+token relates to other integer tokens, with a containment denotation or
+other meaning.
+
+The integers may also have relations that specify that they represent an
+object or objects.
+
+This allows us to have a graph of objects in which changing one part of the
+graph does not require changing the rest. zc.revision and zc.vault thus
+are able to model graphs that can have multiple revisions efficiently and
+with quite a bit of metadata to support merges.
+
+Let's imagine a simple hierarchy. The relation has a `token` attribute
+and a `children` attribute; children point to tokens. Relations will
+identify themselves with ids.
+
+ >>> import BTrees
+ >>> relations = BTrees.family64.IO.BTree()
+ >>> relations[99] = None # just to give us a start
+
+ >>> class Relation(object):
+ ... def __init__(self, token, children=()):
+ ... self.token = token
+ ... self.children = BTrees.family64.IF.TreeSet(children)
+ ... self.id = relations.maxKey() + 1
+ ... relations[self.id] = self
+ ...
+
+ >>> def token(rel, self):
+ ... return rel.token
+ ...
+ >>> def children(rel, self):
+ ... return rel.children
+ ...
+ >>> def dumpRelation(obj, index, cache):
+ ... return obj.id
+ ...
+ >>> def loadRelation(token, index, cache):
+ ... return relations[token]
+ ...
+
+The standard TransposingTransitiveQueriesFactory will be able to handle this
+quite well, so we'll use that for our index.
+
+ >>> import zc.relation.queryfactory
+ >>> factory = zc.relation.queryfactory.TransposingTransitive(
+ ... 'token', 'children')
+ >>> import zc.relation.catalog
+ >>> catalog = zc.relation.catalog.Catalog(
+ ... dumpRelation, loadRelation, BTrees.family64.IO, BTrees.family64)
+ >>> catalog.addValueIndex(token)
+ >>> catalog.addValueIndex(children, multiple=True)
+ >>> catalog.addDefaultQueryFactory(factory)
+
+Now let's quickly create a hierarchy and index it.
+
+ >>> for token, children in (
+ ... (0, (1, 2)), (1, (3, 4)), (2, (10, 11, 12)), (3, (5, 6)),
+ ... (4, (13, 14)), (5, (7, 8, 9)), (6, (15, 16)), (7, (17, 18, 19)),
+ ... (8, (20, 21, 22)), (9, (23, 24)), (10, (25, 26)),
+ ... (11, (27, 28, 29, 30, 31, 32))):
+ ... catalog.index(Relation(token, children))
+ ...
+
+[#queryFactory]_ That hierarchy is arbitrary. Here's what we have, in terms of tokens
+pointing to children::
+
+ _____________0_____________
+ / \
+ ________1_______ ______2____________
+ / \ / | \
+ ______3_____ _4_ 10 ____11_____ 12
+ / \ / \ / \ / / | \ \ \
+ _______5_______ 6 13 14 25 26 27 28 29 30 31 32
+ / | \ / \
+ _7_ _8_ 9 15 16
+ / | \ / | \ / \
+ 17 18 19 20 21 22 23 24
+
+Twelve relations, with tokens 0 through 11, and a total of 33 tokens,
+including children. The ids for the 12 relations are 100 through 111,
+corresponding with the tokens of 0 through 11.
+
+Without a transitive search index, we can get all transitive results.
+The results are iterators.
+
+ >>> res = catalog.findRelationTokens({'token': 0})
+ >>> getattr(res, 'next') is None
+ False
+ >>> print getattr(res, '__len__', None)
+ None
+ >>> sorted(res)
+ [100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111]
+ >>> list(res)
+ []
+
+ >>> res = catalog.findValueTokens('children', {'token': 0})
+ >>> sorted(res) == range(1, 33)
+ True
+ >>> list(res)
+ []
+
+[#findValuesUnindexed]_ `canFind` also can work transitively, and will
+use transitive search indexes, as we'll see below.
+
+ >>> catalog.canFind({'token': 1}, targetQuery={'children': 23})
+ True
+ >>> catalog.canFind({'token': 2}, targetQuery={'children': 23})
+ False
+ >>> catalog.canFind({'children': 23}, targetQuery={'token': 1})
+ True
+ >>> catalog.canFind({'children': 23}, targetQuery={'token': 2})
+ False
+
+`findRelationTokenChains` won't change, but we'll include it in the
+discussion and examples to show that.
+
+ >>> res = catalog.findRelationTokenChains({'token': 2})
+ >>> chains = list(res)
+ >>> len(chains)
+ 3
+ >>> len(list(res))
+ 0
+
+Transitive Search Indexes
+=========================
+
+Now we can add a couple of transitive search index. We'll talk about
+them a bit first.
+
+There is currently one variety of transitive index, which indexes
+relation and value searches for the transposing transitive query
+factory.
+
+The index can only be used under certain conditions.
+
+ - The search is not a request for a relation chain.
+
+ - It does not specify a maximum depth.
+
+ - Filters are not used.
+
+If it is a value search, then specific value indexes cannot be used if a
+target filter or target query are used, but the basic relation index can
+still be used in that case.
+
+The usage of the search indexes is largely transparent: set them up, and
+the relation catalog will use them for the same API calls that used more
+brute force previously. The only difference from external uses is that
+results that use an index will usually be a BTree structure, rather than
+an iterator.
+
+When you add a transitive index for a relation, you must specify the
+transitive name (or names) of the query, and the same for the reverse.
+That's all we'll do now.
+
+ >>> import zc.relation.searchindex
+ >>> catalog.addSearchIndex(
+ ... zc.relation.searchindex.TransposingTransitive(
+ ... 'token', 'children', names=('children',)))
+
+Now we should have a search index installed.
+
+There's just a bit more you can specify here: static fields for a query
+to do a bit of filtering. We don't need any of that for this example.
+
+Now how does the catalog use this index for searches? Three basic ways,
+depending on the kind of search, relations, values, or `canFind`.
+Before we start looking into the internals, let's verify that we're getting
+what we expect: correct answers, and not iterators, but BTree structures.
+
+ >>> res = catalog.findRelationTokens({'token': 0})
+ >>> list(res)
+ [100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111]
+ >>> list(res)
+ [100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111]
+
+ >>> res = catalog.findValueTokens('children', {'token': 0})
+ >>> list(res) == range(1, 33)
+ True
+ >>> list(res) == range(1, 33)
+ True
+
+ >>> catalog.canFind({'token': 1}, targetQuery={'children': 23})
+ True
+ >>> catalog.canFind({'token': 2}, targetQuery={'children': 23})
+ False
+
+[#findValuesIndexed]_ Note that the last two `canFind` examples from
+when we went through these examples without an index do not use the
+index, so we don't show them here: they look the wrong direction for
+this index.
+
+So how do these results happen?
+
+The first, `findRelationTokens`, and the last, `canFind`, are the most
+straightforward. The index finds all relations that match the given
+query, intransitively. Then for each relation, it looks up the indexed
+transitive results for that token. The end result is the union of all
+indexed results found from the intransitive search. `canFind` simply
+casts the result into a boolean.
+
+`findValueTokens` is the same story as above with only one more step. After
+the union of relations is calculated, the method returns the union of the
+sets of the requested value for all found relations.
+
+It will maintain itself when relations are reindexed.
+
+ >>> rel = list(catalog.findRelations({'token': 11}))[0]
+ >>> for t in (27, 28, 29, 30, 31):
+ ... rel.children.remove(t)
+ ...
+ >>> catalog.index(rel)
+
+ >>> catalog.findValueTokens('children', {'token': 0})
+ ... # doctest: +NORMALIZE_WHITESPACE
+ LFSet([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20, 21, 22, 23, 24, 25, 26, 32])
+ >>> catalog.findValueTokens('children', {'token': 2})
+ LFSet([10, 11, 12, 25, 26, 32])
+ >>> catalog.findValueTokens('children', {'token': 11})
+ LFSet([32])
+
+ >>> rel.children.remove(32)
+ >>> catalog.index(rel)
+
+ >>> catalog.findValueTokens('children', {'token': 0})
+ ... # doctest: +NORMALIZE_WHITESPACE
+ LFSet([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20, 21, 22, 23, 24, 25, 26])
+ >>> catalog.findValueTokens('children', {'token': 2})
+ LFSet([10, 11, 12, 25, 26])
+ >>> catalog.findValueTokens('children', {'token': 11})
+ LFSet([])
+
+ >>> rel.children.insert(27)
+ 1
+ >>> catalog.index(rel)
+
+ >>> catalog.findValueTokens('children', {'token': 0})
+ ... # doctest: +NORMALIZE_WHITESPACE
+ LFSet([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20, 21, 22, 23, 24, 25, 26, 27])
+ >>> catalog.findValueTokens('children', {'token': 2})
+ LFSet([10, 11, 12, 25, 26, 27])
+ >>> catalog.findValueTokens('children', {'token': 11})
+ LFSet([27])
+
+When the index is copied, the search index is copied.
+
+ >>> new = catalog.copy()
+ >>> res = list(new.iterSearchIndexes())
+ >>> len(res)
+ 1
+ >>> new_index = res[0]
+ >>> res = list(catalog.iterSearchIndexes())
+ >>> len(res)
+ 1
+ >>> old_index = res[0]
+ >>> new_index is old_index
+ False
+ >>> old_index.index is new_index.index
+ False
+ >>> list(old_index.index.keys()) == list(new_index.index.keys())
+ True
+ >>> for key, value in old_index.index.items():
+ ... v = new_index.index[key]
+ ... if v is value or list(v) != list(value):
+ ... print 'oops', key, value, v
+ ... break
+ ... else:
+ ... print 'good'
+ ...
+ good
+ >>> old_index.names is not new_index.names
+ True
+ >>> list(old_index.names) == list(new_index.names)
+ True
+ >>> for name, old_ix in old_index.names.items():
+ ... new_ix = new_index.names[name]
+ ... if new_ix is old_ix or list(new_ix.keys()) != list(old_ix.keys()):
+ ... print 'oops'
+ ... break
+ ... for key, value in old_ix.items():
+ ... v = new_ix[key]
+ ... if v is value or list(v) != list(value):
+ ... print 'oops', name, key, value, v
+ ... break
+ ... else:
+ ... continue
+ ... break
+ ... else:
+ ... print 'good'
+ ...
+ good
+
+Helpers
+=======
+
+When writing search indexes and query factories, you often want complete
+access to relation catalog data. We've seen a number of these tools already:
+
+- `getRelationModuleTools` gets a dictionary of the BTree tools needed to
+ work with relations.
+
+ >>> sorted(catalog.getRelationModuleTools().keys())
+ ... # doctest: +NORMALIZE_WHITESPACE
+ ['BTree', 'Bucket', 'Set', 'TreeSet', 'difference', 'dump',
+ 'intersection', 'load', 'multiunion', 'union']
+
+ 'multiunion' is only there if the BTree is an I* or L* module.
+ Use the zc.relation.catalog.multiunion helper function to do the
+ best union you can for a given set of tools.
+
+- `getValueModuleTools` does the same for indexed values.
+
+ >>> tools = set(('BTree', 'Bucket', 'Set', 'TreeSet', 'difference',
+ ... 'dump', 'intersection', 'load', 'multiunion', 'union'))
+ >>> tools.difference(catalog.getValueModuleTools('children').keys())
+ set([])
+
+ >>> tools.difference(catalog.getValueModuleTools('token').keys())
+ set([])
+
+- `getRelationTokens` can return all of the tokens in the catalog.
+
+ >>> len(catalog.getRelationTokens()) == len(catalog)
+ True
+
+ This also happens to be equivalent to `findRelationTokens` with an empty
+ query.
+
+ >>> catalog.getRelationTokens() is catalog.findRelationTokens({})
+ True
+
+ It also can return all the tokens that match a given query, or None if
+ there are no matches.
+
+ >>> catalog.getRelationTokens({'token': 0}) # doctest: +ELLIPSIS
+ <BTrees.LOBTree.LOTreeSet object at ...>
+ >>> list(catalog.getRelationTokens({'token': 0}))
+ [100]
+
+ This also happens to be equivalent to `findRelationTokens` with a query,
+ a maxDepth of 1, and no other arguments.
+
+ >>> catalog.findRelationTokens({'token': 0}, maxDepth=1) is (
+ ... catalog.getRelationTokens({'token': 0}))
+ True
+
+ Except that if there are no matches, `findRelationTokens` returns an empty
+ set (so it *always* returns an iterable).
+
+ >>> catalog.findRelationTokens({'token': 50}, maxDepth=1)
+ LOSet([])
+ >>> print catalog.getRelationTokens({'token': 50})
+ None
+
+- `getValueTokens` can return all of the tokens for a given value name in
+ the catalog.
+
+ >>> list(catalog.getValueTokens('token')) == range(12)
+ True
+
+ This is identical to catalog.findValueTokens with a name only (or with
+ an empty query, and a maxDepth of 1).
+
+ >>> list(catalog.findValueTokens('token')) == range(12)
+ True
+ >>> catalog.findValueTokens('token') is catalog.getValueTokens('token')
+ True
+
+ It can also return the values for a given token.
+
+ >>> list(catalog.getValueTokens('children', 100))
+ [1, 2]
+
+ This is identical to catalog.findValueTokens with a name and a query of
+ {None: token}.
+
+ >>> list(catalog.findValueTokens('children', {None: 100}))
+ [1, 2]
+ >>> catalog.getValueTokens('children', 100) is (
+ ... catalog.findValueTokens('children', {None: 100}))
+ True
+
+ Except that if there are no matches, `findValueTokens` returns an empty
+ set (so it *always* returns an iterable); while getValueTokens will
+ return None if the relation has no values (or the relation is unknown).
+
+ >>> catalog.findValueTokens('children', {None: 50}, maxDepth=1)
+ LFSet([])
+ >>> print catalog.getValueTokens('children', 50)
+ None
+
+ >>> rel.children.remove(27)
+ >>> catalog.index(rel)
+ >>> catalog.findValueTokens('children', {None: rel.id}, maxDepth=1)
+ LFSet([])
+ >>> print catalog.getValueTokens('children', rel.id)
+ None
+
+- `yieldRelationTokenChains` is a search workhorse for searches that use a
+ query factory. TODO: describe.
+
+.. ......... ..
+.. Footnotes ..
+.. ......... ..
+
+.. [#queryFactory] The query factory knows when it is not needed--not only
+ when neither of its names are used, but also when both of its names are
+ used.
+
+ >>> list(catalog.findRelationTokens({'token': 0, 'children': 1}))
+ [100]
+
+.. [#findValuesUnindexed] When values are the same as their tokens,
+ `findValues` returns the same result as `findValueTokens`. Here
+ we see this without indexes.
+
+ >>> list(catalog.findValueTokens('children', {'token': 0})) == list(
+ ... catalog.findValues('children', {'token': 0}))
+ True
+
+.. [#findValuesIndexed] Again, when values are the same as their tokens,
+ `findValues` returns the same result as `findValueTokens`. Here
+ we see this with indexes.
+
+ >>> list(catalog.findValueTokens('children', {'token': 0})) == list(
+ ... catalog.findValues('children', {'token': 0}))
+ True
Property changes on: Sandbox/poster/zc.relation/trunk/src/zc/relation/searchindex.txt
___________________________________________________________________
Name: svn:eol-style
+ native
Added: Sandbox/poster/zc.relation/trunk/src/zc/relation/tests.py
===================================================================
--- Sandbox/poster/zc.relation/trunk/src/zc/relation/tests.py (rev 0)
+++ Sandbox/poster/zc.relation/trunk/src/zc/relation/tests.py 2007-08-03 01:34:35 UTC (rev 78553)
@@ -0,0 +1,52 @@
+##############################################################################
+#
+# Copyright (c) 2004 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+"""Relation index tests
+"""
+import unittest
+from zope.testing import doctest
+import zope.testing.module
+
+def setUp(test):
+ zope.testing.module.setUp(test, 'zc.relation.DOCTESTS')
+
+def tearDown(test):
+ db = test.globs.get('db')
+ if db is not None:
+ db.close()
+ zope.testing.module.tearDown(test)
+
+def test_suite():
+ res = unittest.TestSuite((
+ doctest.DocFileSuite(
+ 'README.txt',
+ setUp=setUp, tearDown=tearDown,
+ optionflags=doctest.INTERPRET_FOOTNOTES),
+ doctest.DocFileSuite(
+ 'tokens.txt',
+ setUp=setUp, tearDown=tearDown,
+ optionflags=doctest.INTERPRET_FOOTNOTES),
+ doctest.DocFileSuite(
+ 'searchindex.txt',
+ setUp=setUp, tearDown=tearDown,
+ optionflags=doctest.INTERPRET_FOOTNOTES),
+# doctest.DocFileSuite(
+# 'optimization.txt',
+# setUp=setUp, tearDown=tearDown,
+# optionflags=doctest.INTERPRET_FOOTNOTES),
+# doctest.DocFileSuite(
+# 'administration.txt',
+# setUp=setUp, tearDown=tearDown,
+# optionflags=doctest.INTERPRET_FOOTNOTES),
+ ))
+ return res
Added: Sandbox/poster/zc.relation/trunk/src/zc/relation/tokens.txt
===================================================================
--- Sandbox/poster/zc.relation/trunk/src/zc/relation/tokens.txt (rev 0)
+++ Sandbox/poster/zc.relation/trunk/src/zc/relation/tokens.txt 2007-08-03 01:34:35 UTC (rev 78553)
@@ -0,0 +1,1275 @@
+======================================================
+Tokens and Joins: zc.relation Catalog Extended Example
+======================================================
+
+.. contents::
+ :local:
+
+Introduction and Set Up
+=======================
+
+This document assumes you have read the introductory README.txt and want
+to learn a bit more by example. In it, we will explore a more
+complicated set of relations that demonstrates most of the aspects of
+working with tokens. In particular, we will look at joins, which will
+also give us a chance to look more in depth at query factories and
+search indexes, and introduce the idea of listeners. It will not explain
+the basics that the README already addressed.
+
+Imagine we are indexing security assertions in a system. In this
+system, users may have roles within an organization. Each organization
+may have multiple child organizations and may have a single parent
+organization. A user with a role in a parent organization will have the
+same role in all transitively connected child relations.
+
+We have two kinds of relations, then. One kind of relation will model
+the hierarchy of organizations. We'll do it with an intrinsic relation
+of organizations to their children: that reflects the fact that parent
+organizations choose and are comprised of their children; children do
+not choose their parents.
+
+The other relation will model the (multiple) roles a (single) user has
+in a (single) organization. This relation will be entirely extrinsic.
+
+We could create two catalogs, one for each type. Or we could put them
+both in the same catalog. Initially, we'll go with the single-catalog
+approach for our examples. This single catalog, then, will be indexing
+a heterogeneous collection of relations.
+
+Let's define the two relations with interfaces. We'll include one
+accessor, getOrganization, largely to show how to handle methods.
+
+ >>> import zope.interface
+ >>> class IOrganization(zope.interface.Interface):
+ ... title = zope.interface.Attribute('the title')
+ ... parts = zope.interface.Attribute(
+ ... 'the organizations that make up this one')
+ ...
+ >>> class IRoles(zope.interface.Interface):
+ ... def getOrganization():
+ ... 'return the organization in which this relation operates'
+ ... principal_id = zope.interface.Attribute(
+ ... 'the pricipal id whose roles this relation lists')
+ ... role_ids = zope.interface.Attribute(
+ ... 'the role ids that the principal explicitly has in the '
+ ... 'organization. The principal may have other roles via '
+ ... 'roles in parent organizations.')
+ ...
+
+Now we can create some classes. In the README example, the setup was a bit
+of a toy. This time we will be just a bit more practical. We'll also expect
+to be operating within the ZODB, with a root and transactions. [#ZODB]_
+
+Here's how we will dump and load our relations: use a "registry"
+object, similar to an intid utility. [#faux_intid]_
+
+In this implementation of the "dump" method, we use the cache just to
+show you how you might use it. It probably is overkill for this job,
+and maybe even a speed loss, but you can see the idea.
+
+ >>> def dump(obj, catalog, cache):
+ ... reg = cache.get('registry')
+ ... if reg is None:
+ ... reg = cache['registry'] = catalog._p_jar.root()['registry']
+ ... return reg.getId(obj)
+ ...
+ >>> def load(token, catalog, cache):
+ ... reg = cache.get('registry')
+ ... if reg is None:
+ ... reg = cache['registry'] = catalog._p_jar.root()['registry']
+ ... return reg.getObject(token)
+ ...
+
+Now we can create a relation catalog to hold these items.
+
+ >>> import zc.relation.catalog
+ >>> catalog = root['catalog'] = zc.relation.catalog.Catalog(dump, load)
+ >>> transaction.commit()
+
+Now we set up our indexes. We'll start with just the organizations, and
+set up the catalog with them. This part will be similar to the example
+in README.txt, but will introduce more discussions of optimizations and
+tokens. Then we'll add in the part about roles, and explore queries and
+token-based "joins".
+
+Organizations
+=============
+
+The organization will hold a set of organizations. This is actually not
+inherently easy in the ZODB because this means that we need to compare
+or hash persistent objects, which does not work reliably over time and
+across machines out-of-the-box. To side-step the issue for this example,
+and still do something a bit interesting and real-world, we'll use the
+registry tokens introduced above. This will also give us a chance to
+talk a bit more about optimizations and tokens. (If you would like
+to sanely and transparently hold a set of persistent objects, try the
+zc.set package XXX not yet.)
+
+ >>> import BTrees
+ >>> class Organization(persistent.Persistent):
+ ... zope.interface.implements(IOrganization)
+ ... def __init__(self, title):
+ ... self.title = title
+ ... self.parts = BTrees.family32.IF.TreeSet()
+ ... # the next parts just make the tests prettier
+ ... def __repr__(self):
+ ... return '<Organization instance "' + self.title + '">'
+ ... def __cmp__(self, other):
+ ... # pukes if other doesn't have name
+ ... return cmp(self.title, other.title)
+ ...
+
+OK, now we know how organizations will work. Now we can add the `parts`
+index to the catalog. This will do a few new things from how we added
+indexes in the README.
+
+ >>> catalog.addValueIndex(IOrganization['parts'], multiple=True,
+ ... name="part")
+
+So, what's different from the README examples?
+
+First, we are using an interface element to define the value to be indexed.
+It provides an interface to which objects will be adapted, a default name
+for the index, and information as to whether the attribute should be used
+directly or called.
+
+Second, we are not specifying a dump or load. They are None. This
+means that the indexed value can already be treated as a token. This
+can allow a very significant optimization for reindexing if the indexed
+value is a large collection using the same BTree family as the
+index--which leads us to the next difference.
+
+Third, we are specifying that `multiple=True`. This means that the value
+on a given relation that provides or can be adapted to IOrganization will
+have a collection of `parts`. These will always be regarded as a set,
+whether the actual colection is a BTrees set or the keys of a BTree.
+
+Last, we are specifying a name to be used for queries. I find that queries
+read more easily when the query keys are singular, so I often rename plurals.
+
+As in the README, We can add another simple transposing transitive query
+factory, switching between 'part' and `None`.
+
+ >>> import zc.relation.queryfactory
+ >>> factory1 = zc.relation.queryfactory.TransposingTransitive(
+ ... 'part', None)
+ >>> catalog.addDefaultQueryFactory(factory1)
+
+Let's add a couple of search indexes in too, of the hierarchy looking up...
+
+ >>> import zc.relation.searchindex
+ >>> catalog.addSearchIndex(
+ ... zc.relation.searchindex.TransposingTransitive(
+ ... 'part', None))
+
+...and down.
+
+ >>> catalog.addSearchIndex(
+ ... zc.relation.searchindex.TransposingTransitive(
+ ... None, 'part'))
+
+(These search indexes are a bit gratuitous, especially looking up, but they
+are examples [#verifyObjectTransitive]_.)
+
+Let's create and add a few organizations.
+
+We'll make a structure like this [#silliness]_::
+
+ Ynod Corp Mangement Zookd Corp Management
+ / | \ / | \
+ Ynod Devs Ynod SAs Ynod Admins Zookd Admins Zookd SAs Zookd Devs
+ / \ \ / / \
+ Y3L4 Proj Bet Proj Ynod Zookd Task Force Zookd hOgnmd Zookd Nbd
+
+Here's the Python.
+
+
+ >>> orgs = root['organizations'] = BTrees.family32.OO.BTree()
+ >>> for nm, parts in (
+ ... ('Y3L4 Proj', ()),
+ ... ('Bet Proj', ()),
+ ... ('Ynod Zookd Task Force', ()),
+ ... ('Zookd hOgnmd', ()),
+ ... ('Zookd Nbd', ()),
+ ... ('Ynod Devs', ('Y3L4 Proj', 'Bet Proj')),
+ ... ('Ynod SAs', ()),
+ ... ('Ynod Admins', ('Ynod Zookd Task Force',)),
+ ... ('Zookd Admins', ('Ynod Zookd Task Force',)),
+ ... ('Zookd SAs', ()),
+ ... ('Zookd Devs', ('Zookd hOgnmd', 'Zookd Nbd')),
+ ... ('Ynod Corp Management', ('Ynod Devs', 'Ynod SAs', 'Ynod Admins')),
+ ... ('Zookd Corp Management', ('Zookd Devs', 'Zookd SAs',
+ ... 'Zookd Admins'))):
+ ... org = Organization(nm)
+ ... for part in parts:
+ ... ignore = org.parts.insert(registry.getId(orgs[part]))
+ ... orgs[nm] = org
+ ... catalog.index(org)
+ ...
+
+Now the catalog knows about the relatons.
+
+ >>> len(catalog)
+ 13
+ >>> root['dummy'] = Organization('Foo')
+ >>> root['dummy'] in catalog
+ False
+ >>> orgs['Y3L4 Proj'] in catalog
+ True
+
+Also, now we can search. To do this, we can use some of the token methods that
+the catalog provides. The most commonly used is `tokenizeQuery`. It takes a
+query with values that are not tokenized and converts them to values that are
+tokenized.
+
+ >>> Ynod_SAs_id = registry.getId(orgs['Ynod SAs'])
+ >>> catalog.tokenizeQuery({None: orgs['Ynod SAs']}) == {
+ ... None: Ynod_SAs_id}
+ True
+ >>> Zookd_SAs_id = registry.getId(orgs['Zookd SAs'])
+ >>> Zookd_Devs_id = registry.getId(orgs['Zookd Devs'])
+ >>> catalog.tokenizeQuery(
+ ... {None: zc.relation.catalog.any(
+ ... orgs['Zookd SAs'], orgs['Zookd Devs'])}) == {
+ ... None: zc.relation.catalog.any(Zookd_SAs_id, Zookd_Devs_id)}
+ True
+
+Of course, right now doing this with 'part' alone is kind of silly, since it
+does not change within the relation catalog (because we said that dump and
+load were `None`, as discussed above).
+
+ >>> catalog.tokenizeQuery({'part': Ynod_SAs_id}) == {
+ ... 'part': Ynod_SAs_id}
+ True
+ >>> catalog.tokenizeQuery(
+ ... {'part': zc.relation.catalog.any(Zookd_SAs_id, Zookd_Devs_id)}
+ ... ) == {'part': zc.relation.catalog.any(Zookd_SAs_id, Zookd_Devs_id)}
+ True
+
+The `tokenizeQuery` method is so common that we're going to assign it to
+a variable in our example. Then we'll do a search or two.
+
+So...find the relations that Ynod Devs supervise.
+
+ >>> t = catalog.tokenizeQuery
+ >>> res = list(catalog.findRelationTokens(t({None: orgs['Ynod Devs']})))
+
+OK...we used `findRelationTokens`, as opposed to `findRelations`, so res
+is a couple of numbers now. How do we convert them back?
+`resolveRelationTokens` will do the trick.
+
+ >>> len(res)
+ 3
+ >>> sorted(catalog.resolveRelationTokens(res))
+ ... # doctest: +NORMALIZE_WHITESPACE
+ [<Organization instance "Bet Proj">, <Organization instance "Y3L4 Proj">,
+ <Organization instance "Ynod Devs">]
+
+`resolveQuery` is the mirror image of `tokenizeQuery`: it converts
+tokenized queries to queries with "loaded" values.
+
+ >>> original = {'part': zc.relation.catalog.any(
+ ... Zookd_SAs_id, Zookd_Devs_id),
+ ... None: orgs['Zookd Devs']}
+ >>> tokenized = catalog.tokenizeQuery(original)
+ >>> original == catalog.resolveQuery(tokenized)
+ True
+
+ >>> original = {None: zc.relation.catalog.any(
+ ... orgs['Zookd SAs'], orgs['Zookd Devs']),
+ ... 'part': Zookd_Devs_id}
+ >>> tokenized = catalog.tokenizeQuery(original)
+ >>> original == catalog.resolveQuery(tokenized)
+ True
+
+Likewise, `tokenizeRelations` is the mirror image of `resolveRelationTokens`.
+
+ >>> sorted(catalog.tokenizeRelations(
+ ... [orgs["Bet Proj"], orgs["Y3L4 Proj"]])) == sorted(
+ ... registry.getId(o) for o in
+ ... [orgs["Bet Proj"], orgs["Y3L4 Proj"]])
+ True
+
+The other token-related methods are as follows
+[#show_remaining_token_methods]_:
+
+- `tokenizeValues`, which returns an iterable of tokens for the values
+ of the given index name;
+- `resolveValueTokens`, which returns an iterable of values for the tokens of
+ the given index name;
+- `tokenizeRelation`, which returns a token for the given relation; and
+- `resolveRelationToken`, which returns a relation for the given token.
+
+Why do we bother with these tokens, instead of hiding them away and
+making the API prettier? By exposing them, we enable efficient joining,
+and efficient use in other contexts. For instance, if you use the same
+intid utility to tokenize in other catalogs, our results can be merged
+with the results of other catalogs. Similarly, you can use the results
+of queries to other catalogs--or even "joins" from earlier results of
+querying this catalog--as query values here. We'll explore this in the
+next section.
+
+Roles
+=====
+
+We have set up the Organization relations. Now let's set up the roles, and
+actually be able to answer the questions that we described at the beginning
+of the document.
+
+In our Roles object, roles and principals will simply be strings--ids, if
+this were a real system. The organization will be a direct object reference.
+
+ >>> class Roles(persistent.Persistent):
+ ... zope.interface.implements(IRoles)
+ ... def __init__(self, principal_id, role_ids, organization):
+ ... self.principal_id = principal_id
+ ... self.role_ids = BTrees.family32.OI.TreeSet(role_ids)
+ ... self._organization = organization
+ ... def getOrganization(self):
+ ... return self._organization
+ ... # the rest is for prettier/easier tests
+ ... def __repr__(self):
+ ... return "<Roles instance (%s has %s in %s)>" % (
+ ... self.principal_id, ', '.join(self.role_ids),
+ ... self._organization.title)
+ ... def __cmp__(self, other):
+ ... return cmp(
+ ... (self.principal_id, tuple(self.role_ids),
+ ... self._organization.title),
+ ... (other.principal_id, tuple(other.role_ids),
+ ... other._organization.title))
+ ...
+
+Now let's add add the value indexes to the relation catalog.
+
+ >>> catalog.addValueIndex(IRoles['principal_id'], btree=BTrees.family32.OI)
+ >>> catalog.addValueIndex(IRoles['role_ids'], btree=BTrees.family32.OI,
+ ... multiple=True, name='role_id')
+ >>> catalog.addValueIndex(IRoles['getOrganization'], dump, load,
+ ... name='organization')
+
+Those are some slightly new variations of what we've seen in `addValueIndex`
+before, but all mixing and matching on the same ingredients.
+
+As a reminder, here is our organization structure::
+
+ Ynod Corp Mangement Zookd Corp Management
+ / | \ / | \
+ Ynod Devs Ynod SAs Ynod Admins Zookd Admins Zookd SAs Zookd Devs
+ / \ \ / / \
+ Y3L4 Proj Bet Proj Ynod Zookd Task Force Zookd hOgnmd Zookd Nbd
+
+Now let's create and add some roles.
+
+ >>> principal_ids = [
+ ... 'abe', 'bran', 'cathy', 'david', 'edgar', 'frank', 'gertrude',
+ ... 'harriet', 'ignas', 'jacob', 'karyn', 'lettie', 'molly', 'nancy',
+ ... 'ophelia', 'pat']
+ >>> role_ids = ['user manager', 'writer', 'reviewer', 'publisher']
+ >>> get_role = dict((v[0], v) for v in role_ids).__getitem__
+ >>> roles = root['roles'] = BTrees.family32.IO.BTree()
+ >>> next = 0
+ >>> for prin, org, role_ids in (
+ ... ('abe', orgs['Zookd Corp Management'], 'uwrp'),
+ ... ('bran', orgs['Ynod Corp Management'], 'uwrp'),
+ ... ('cathy', orgs['Ynod Devs'], 'w'),
+ ... ('cathy', orgs['Y3L4 Proj'], 'r'),
+ ... ('david', orgs['Bet Proj'], 'wrp'),
+ ... ('edgar', orgs['Ynod Devs'], 'up'),
+ ... ('frank', orgs['Ynod SAs'], 'uwrp'),
+ ... ('frank', orgs['Ynod Admins'], 'w'),
+ ... ('gertrude', orgs['Ynod Zookd Task Force'], 'uwrp'),
+ ... ('harriet', orgs['Ynod Zookd Task Force'], 'w'),
+ ... ('harriet', orgs['Ynod Admins'], 'r'),
+ ... ('ignas', orgs['Zookd Admins'], 'r'),
+ ... ('ignas', orgs['Zookd Corp Management'], 'w'),
+ ... ('karyn', orgs['Zookd Corp Management'], 'uwrp'),
+ ... ('karyn', orgs['Ynod Corp Management'], 'uwrp'),
+ ... ('lettie', orgs['Zookd Corp Management'], 'u'),
+ ... ('lettie', orgs['Ynod Zookd Task Force'], 'w'),
+ ... ('lettie', orgs['Zookd SAs'], 'w'),
+ ... ('molly', orgs['Zookd SAs'], 'uwrp'),
+ ... ('nancy', orgs['Zookd Devs'], 'wrp'),
+ ... ('nancy', orgs['Zookd hOgnmd'], 'u'),
+ ... ('ophelia', orgs['Zookd Corp Management'], 'w'),
+ ... ('ophelia', orgs['Zookd Devs'], 'r'),
+ ... ('ophelia', orgs['Zookd Nbd'], 'p'),
+ ... ('pat', orgs['Zookd Nbd'], 'wrp')):
+ ... assert prin in principal_ids
+ ... role_ids = [get_role(l) for l in role_ids]
+ ... role = roles[next] = Roles(prin, role_ids, org)
+ ... role.key = next
+ ... next += 1
+ ... catalog.index(role)
+ ...
+
+Now we can begin to do searches [#real_value_tokens]_.
+
+What are all the role settings for ophelia?
+
+ >>> sorted(catalog.findRelations({'principal_id': 'ophelia'}))
+ ... # doctest: +NORMALIZE_WHITESPACE
+ [<Roles instance (ophelia has publisher in Zookd Nbd)>,
+ <Roles instance (ophelia has reviewer in Zookd Devs)>,
+ <Roles instance (ophelia has writer in Zookd Corp Management)>]
+
+That answer does not need to be transitive: we're done.
+
+Next question. Where does ophelia have the 'writer' role?
+
+ >>> list(catalog.findValues(
+ ... 'organization', {'principal_id': 'ophelia',
+ ... 'role_id': 'writer'}))
+ [<Organization instance "Zookd Corp Management">]
+
+Well, that's correct intransitively. Do we need a transitive queries
+factory? No! This is a great chance to look at the token join we talked
+about in the previous section. This should actually be a two-step
+operation: find all of the organizations in which ophelia has writer,
+and then find all of the transitive parts to that organization.
+
+ >>> sorted(catalog.findRelations({None: zc.relation.catalog.Any(
+ ... catalog.findValueTokens('organization',
+ ... {'principal_id': 'ophelia',
+ ... 'role_id': 'writer'}))}))
+ ... # doctest: +NORMALIZE_WHITESPACE
+ [<Organization instance "Ynod Zookd Task Force">,
+ <Organization instance "Zookd Admins">,
+ <Organization instance "Zookd Corp Management">,
+ <Organization instance "Zookd Devs">,
+ <Organization instance "Zookd Nbd">,
+ <Organization instance "Zookd SAs">,
+ <Organization instance "Zookd hOgnmd">]
+
+That's more like it.
+
+Next question. What users have roles in the 'Zookd Devs' organization?
+Intransitively, that's pretty easy.
+
+ >>> sorted(catalog.findValueTokens(
+ ... 'principal_id', t({'organization': orgs['Zookd Devs']})))
+ ['nancy', 'ophelia']
+
+Transitively, we should do another join.
+
+ >>> org_id = registry.getId(orgs['Zookd Devs'])
+ >>> sorted(catalog.findValueTokens(
+ ... 'principal_id', {
+ ... 'organization': zc.relation.catalog.any(
+ ... org_id, *catalog.findRelationTokens({'part': org_id}))}))
+ ['abe', 'ignas', 'karyn', 'lettie', 'nancy', 'ophelia']
+
+That's a little awkward, but it does the trick.
+
+Last question, and the kind of question that started the entire example.
+ What roles does ophelia have in the "Zookd Nbd" organization?
+
+ >>> list(catalog.findValueTokens(
+ ... 'role_id', t({'organization': orgs['Zookd Nbd'],
+ ... 'principal_id': 'ophelia'})))
+ ['publisher']
+
+Intransitively, that's correct. But, transitively, ophelia also has
+reviewer and writer, and that's the answer we want to be able to get quickly.
+
+We could ask the question a different way, then, again leveraging a join.
+We'll set it up as a function, because we will want to use it a little later
+without repeating the code.
+
+ >>> def getRolesInOrganization(principal_id, org):
+ ... org_id = registry.getId(org)
+ ... return sorted(catalog.findValueTokens(
+ ... 'role_id', {
+ ... 'organization': zc.relation.catalog.any(
+ ... org_id,
+ ... *catalog.findRelationTokens({'part': org_id})),
+ ... 'principal_id': principal_id}))
+ ...
+ >>> getRolesInOrganization('ophelia', orgs['Zookd Nbd'])
+ ['publisher', 'reviewer', 'writer']
+
+As you can see, then, working with tokens makes interesting joins possible,
+as long as the tokens are the same across the two queries.
+
+We have examined tokens methods and token techniques like joins. The example
+story we have told can let us get into a few more advanced topics, such as
+query factory joins and search indexes that can increase their read speed.
+
+Query Factory Joins
+===================
+
+We can build a query factory that makes the join automatic. A query
+factory is a callable that takes two arguments: a query (the one that
+starts the search) and the catalog. The factory either returns None,
+indicating that the query factory cannot be used for this query, or it
+returns another callable that takes a chain of relations. The last
+token in the relation chain is the most recent. The output of this
+inner callable is expected to be an iterable of
+BTrees.family32.OO.Bucket queries to search further from the given chain
+of relations.
+
+Here's a flawed approach to this problem.
+
+ >>> def flawed_factory(query, catalog):
+ ... if (len(query) == 2 and
+ ... 'organization' in query and
+ ... 'principal_id' in query):
+ ... def getQueries(relchain):
+ ... if not relchain:
+ ... yield query
+ ... return
+ ... current = catalog.getValueTokens(
+ ... 'organization', relchain[-1])
+ ... if current:
+ ... organizations = catalog.getRelationTokens(
+ ... {'part': zc.relation.catalog.Any(current)})
+ ... if organizations:
+ ... res = BTrees.family32.OO.Bucket(query)
+ ... res['organization'] = zc.relation.catalog.Any(
+ ... organizations)
+ ... yield res
+ ... return getQueries
+ ...
+
+That works for our current example.
+
+ >>> sorted(catalog.findValueTokens(
+ ... 'role_id', t({'organization': orgs['Zookd Nbd'],
+ ... 'principal_id': 'ophelia'}),
+ ... queryFactory=flawed_factory))
+ ['publisher', 'reviewer', 'writer']
+
+However, it won't work for other similar queries.
+
+ >>> getRolesInOrganization('abe', orgs['Zookd Nbd'])
+ ['publisher', 'reviewer', 'user manager', 'writer']
+ >>> sorted(catalog.findValueTokens(
+ ... 'role_id', t({'organization': orgs['Zookd Nbd'],
+ ... 'principal_id': 'abe'}),
+ ... queryFactory=flawed_factory))
+ []
+
+oops.
+
+The flawed_factory is actually a useful pattern for more typical relation
+traversal. It goes from relation to relation to relation, and ophelia has
+connected relations all the way to the top. However, abe only has them at
+the top, so nothing is traversed.
+
+Instead, we can make a query factory that modifies the initial query.
+
+ >>> def factory2(query, catalog):
+ ... if (len(query) == 2 and
+ ... 'organization' in query and
+ ... 'principal_id' in query):
+ ... def getQueries(relchain):
+ ... if not relchain:
+ ... res = BTrees.family32.OO.Bucket(query)
+ ... org_id = query['organization']
+ ... if org_id is not None:
+ ... res['organization'] = zc.relation.catalog.any(
+ ... org_id,
+ ... *catalog.findRelationTokens({'part': org_id}))
+ ... yield res
+ ... return getQueries
+ ...
+
+ >>> sorted(catalog.findValueTokens(
+ ... 'role_id', t({'organization': orgs['Zookd Nbd'],
+ ... 'principal_id': 'ophelia'}),
+ ... queryFactory=factory2))
+ ['publisher', 'reviewer', 'writer']
+
+ >>> sorted(catalog.findValueTokens(
+ ... 'role_id', t({'organization': orgs['Zookd Nbd'],
+ ... 'principal_id': 'abe'}),
+ ... queryFactory=factory2))
+ ['publisher', 'reviewer', 'user manager', 'writer']
+
+A difference between this and the other approach is that it is essentially
+intransitive: this query factory modifies the initial query, and then does
+not give further queries. The catalog currently always stops calling the
+query factory if the queries do not return any results, so an approach like
+the flawed_factory simply won't work for this kind of problem.
+
+We could add this query factory as another default.
+
+ >>> catalog.addDefaultQueryFactory(factory2)
+
+ >>> sorted(catalog.findValueTokens(
+ ... 'role_id', t({'organization': orgs['Zookd Nbd'],
+ ... 'principal_id': 'ophelia'})))
+ ['publisher', 'reviewer', 'writer']
+
+ >>> sorted(catalog.findValueTokens(
+ ... 'role_id', t({'organization': orgs['Zookd Nbd'],
+ ... 'principal_id': 'abe'})))
+ ['publisher', 'reviewer', 'user manager', 'writer']
+
+The previously installed query factory is still available.
+
+ >>> list(catalog.iterDefaultQueryFactories()) == [factory1, factory2]
+ True
+
+ >>> list(catalog.findRelations(
+ ... {'part': registry.getId(orgs['Y3L4 Proj'])}))
+ ... # doctest: +NORMALIZE_WHITESPACE
+ [<Organization instance "Ynod Devs">,
+ <Organization instance "Ynod Corp Management">]
+
+ >>> sorted(catalog.findRelations(
+ ... {None: registry.getId(orgs['Ynod Corp Management'])}))
+ ... # doctest: +NORMALIZE_WHITESPACE
+ [<Organization instance "Bet Proj">, <Organization instance "Y3L4 Proj">,
+ <Organization instance "Ynod Admins">,
+ <Organization instance "Ynod Corp Management">,
+ <Organization instance "Ynod Devs">, <Organization instance "Ynod SAs">,
+ <Organization instance "Ynod Zookd Task Force">]
+
+Search Index for Query Factory Joins
+====================================
+
+Now that we have written a query factory that encapsulates the join, we can
+use a search index that speeds it up. We've only used transitive search
+indexes so far. Now we will add an intransitive search index.
+
+The intransitive search index generally just needs the search value
+names it should be indexing, optionally the result name (defaulting to
+relations), and optionally the query factory to be used.
+
+We need to use two additional options because of the odd join trick we're
+doing. We need to specify what organization and principal_id values need
+to be changed when an object is indexed, and we need to indicate that we
+should update when organization, principal_id, *or* parts changes.
+
+`getValueTokens` specifies the values that need to be indexed. It gets
+the index, the name for the tokens desired, the token, the catalog that
+generated the token change (it may not be the same as the index's
+catalog, the source dictionary that contains a dictionary of the values
+that will be used for tokens if you do not override them, a dict of the
+added values for this token (keys are value names), a dict of the
+removed values for this token, and whether the token has been removed.
+The method can return None, which will leave the index to its default
+behavior that should work if no query factory is used; or an iterable of
+values.
+
+ >>> def getValueTokens(index, name, token, catalog, source,
+ ... additions, removals, removed):
+ ... if name == 'organization':
+ ... orgs = source.get('organization')
+ ... if not removed or not orgs:
+ ... orgs = index.catalog.getValueTokens(
+ ... 'organization', token)
+ ... if not orgs:
+ ... orgs = [token]
+ ... orgs.extend(removals.get('part', ()))
+ ... orgs = set(orgs)
+ ... orgs.update(index.catalog.findValueTokens(
+ ... 'part',
+ ... {None: zc.relation.catalog.Any(
+ ... t for t in orgs if t is not None)}))
+ ... return orgs
+ ... elif name == 'principal_id':
+ ... # we only want custom behavior if this is an organization
+ ... if 'principal_id' in source or index.catalog.getValueTokens(
+ ... 'principal_id', token):
+ ... return
+ ... orgs = set((token,))
+ ... orgs.update(index.catalog.findRelationTokens(
+ ... {'part': token}))
+ ... return set(index.catalog.findValueTokens(
+ ... 'principal_id', {
+ ... 'organization': zc.relation.catalog.Any(orgs)}))
+ ...
+ >>> index = zc.relation.searchindex.Intransitive(
+ ... ('organization', 'principal_id'), 'role_id', factory2,
+ ... getValueTokens,
+ ... ('organization', 'principal_id', 'part', 'role_id'))
+ >>> catalog.addSearchIndex(index)
+
+ >>> res = catalog.findValueTokens(
+ ... 'role_id', t({'organization': orgs['Zookd Nbd'],
+ ... 'principal_id': 'ophelia'}))
+ >>> list(res)
+ ['publisher', 'reviewer', 'writer']
+ >>> list(res)
+ ['publisher', 'reviewer', 'writer']
+
+ >>> res = catalog.findValueTokens(
+ ... 'role_id', t({'organization': orgs['Zookd Nbd'],
+ ... 'principal_id': 'abe'}))
+ >>> list(res)
+ ['publisher', 'reviewer', 'user manager', 'writer']
+ >>> list(res)
+ ['publisher', 'reviewer', 'user manager', 'writer']
+
+[#verifyObjectIntransitive]_ Now we can change and remove relations--both organizations and roles--and
+have the index maintain correct state. Given the current state of
+organizations--
+
+::
+
+ Ynod Corp Mangement Zookd Corp Management
+ / | \ / | \
+ Ynod Devs Ynod SAs Ynod Admins Zookd Admins Zookd SAs Zookd Devs
+ / \ \ / / \
+ Y3L4 Proj Bet Proj Ynod Zookd Task Force Zookd hOgnmd Zookd Nbd
+
+--first we will move Ynod Devs to beneath Zookd Devs, and back out. This will
+briefly give abe full privileges to Y3L4 Proj., among others.
+
+ >>> list(catalog.findValueTokens(
+ ... 'role_id', t({'organization': orgs['Y3L4 Proj'],
+ ... 'principal_id': 'abe'})))
+ []
+ >>> orgs['Zookd Devs'].parts.insert(registry.getId(orgs['Ynod Devs']))
+ 1
+ >>> catalog.index(orgs['Zookd Devs'])
+ >>> res = catalog.findValueTokens(
+ ... 'role_id', t({'organization': orgs['Y3L4 Proj'],
+ ... 'principal_id': 'abe'}))
+ >>> list(res)
+ ['publisher', 'reviewer', 'user manager', 'writer']
+ >>> list(res)
+ ['publisher', 'reviewer', 'user manager', 'writer']
+ >>> orgs['Zookd Devs'].parts.remove(registry.getId(orgs['Ynod Devs']))
+ >>> catalog.index(orgs['Zookd Devs'])
+ >>> list(catalog.findValueTokens(
+ ... 'role_id', t({'organization': orgs['Y3L4 Proj'],
+ ... 'principal_id': 'abe'})))
+ []
+
+As another example, we will change the roles abe has, and see that it is
+propagated down to Zookd Nbd.
+
+ >>> rels = list(catalog.findRelations(t(
+ ... {'principal_id': 'abe',
+ ... 'organization': orgs['Zookd Corp Management']})))
+ >>> len(rels)
+ 1
+ >>> rels[0].role_ids.remove('reviewer')
+ >>> catalog.index(rels[0])
+
+ >>> res = catalog.findValueTokens(
+ ... 'role_id', t({'organization': orgs['Zookd Nbd'],
+ ... 'principal_id': 'abe'}))
+ >>> list(res)
+ ['publisher', 'user manager', 'writer']
+ >>> list(res)
+ ['publisher', 'user manager', 'writer']
+
+Note that search index order matters. In our case, our intransitive search
+index is relying on our transitive index, so the transitive index needs to
+come first. You want transitive relation indexes before name. Right now,
+you are in charge of this order: it will be difficult to come up with a
+reliable algorithm for guessing this.
+
+Listeners, Catalog Administration, and Joining Across Relation Catalogs
+=======================================================================
+
+We've done all of our examples so far with a single catalog that indexes
+both kinds of relations. What if we want to have two catalogs with
+homogenous collections of relations? That can feel cleaner, but it also
+introduces some new wrinkles.
+
+Let's use our current catalog for organizations, removing the extra
+information; and create a new one for roles.
+
+ >>> role_catalog = root['role_catalog'] = catalog.copy()
+ >>> transaction.commit()
+ >>> org_catalog = catalog
+ >>> del catalog
+
+We'll need a slightly different query factory and a slightly different
+search index `getValueTokens` function. We'll write those, then modify the
+configuration of our two catalogs for the new world.
+
+The transitive factory we write here is for the role catalog. It needs
+access to the organzation catalog. We could do this a variety of
+ways--relying on a utility, or finding the catalog from context. We will
+make the role_catalog have a .org_catalog attribute, and rely on that.
+
+ >>> role_catalog.org_catalog = org_catalog
+ >>> def factory3(query, catalog):
+ ... if (len(query) == 2 and
+ ... 'organization' in query and
+ ... 'principal_id' in query):
+ ... def getQueries(relchain):
+ ... if not relchain:
+ ... res = BTrees.family32.OO.Bucket(query)
+ ... org_id = query['organization']
+ ... if org_id is not None:
+ ... res['organization'] = zc.relation.catalog.any(
+ ... org_id,
+ ... *catalog.org_catalog.findRelationTokens(
+ ... {'part': org_id}))
+ ... yield res
+ ... return getQueries
+ ...
+
+ >>> def getValueTokens2(index, name, token, catalog, source,
+ ... additions, removals, removed):
+ ... is_role_catalog = catalog is index.catalog # role_catalog
+ ... if name == 'organization':
+ ... if is_role_catalog:
+ ... orgs = set(source.get('organization') or
+ ... index.catalog.getValueTokens(
+ ... 'organization', token) or ())
+ ... else:
+ ... orgs = set((token,))
+ ... orgs.update(removals.get('part', ()))
+ ... orgs.update(index.catalog.org_catalog.findValueTokens(
+ ... 'part',
+ ... {None: zc.relation.catalog.Any(
+ ... t for t in orgs if t is not None)}))
+ ... return orgs
+ ... elif name == 'principal_id':
+ ... # we only want custom behavior if this is an organization
+ ... if not is_role_catalog:
+ ... orgs = set((token,))
+ ... orgs.update(index.catalog.org_catalog.findRelationTokens(
+ ... {'part': token}))
+ ... return set(index.catalog.findValueTokens(
+ ... 'principal_id', {
+ ... 'organization': zc.relation.catalog.Any(orgs)}))
+ ...
+
+If you are following along in the code and comparing to the originals, you may
+see that this approach is a bit cleaner than the one when the relations were
+in the same catalog.
+
+Now we will fix up the the organization catalog [#compare_copy]_.
+
+ >>> org_catalog.removeValueIndex('organization')
+ >>> org_catalog.removeValueIndex('role_id')
+ >>> org_catalog.removeValueIndex('principal_id')
+ >>> org_catalog.removeDefaultQueryFactory(factory2)
+ >>> org_catalog.removeSearchIndex(index)
+ >>> org_catalog.clear()
+ >>> len(org_catalog)
+ 0
+ >>> for v in orgs.values():
+ ... org_catalog.index(v)
+
+This also shows using the `removeDefaultQueryFactory` and `removeSearchIndex`
+methods [#removeDefaultQueryFactoryExceptions]_.
+
+Now we will set up the role catalog [#copy_unchanged]_.
+
+ >>> role_catalog.removeValueIndex('part')
+ >>> for ix in list(role_catalog.iterSearchIndexes()):
+ ... role_catalog.removeSearchIndex(ix)
+ ...
+ >>> role_catalog.removeDefaultQueryFactory(factory1)
+ >>> role_catalog.removeDefaultQueryFactory(factory2)
+ >>> role_catalog.addDefaultQueryFactory(factory3)
+ >>> root['index2'] = index2 = zc.relation.searchindex.Intransitive(
+ ... ('organization', 'principal_id'), 'role_id', factory3,
+ ... getValueTokens2,
+ ... ('organization', 'principal_id', 'part', 'role_id'))
+ >>> role_catalog.addSearchIndex(index2)
+
+The new role_catalog index needs to be updated from the org_catalog.
+We'll set that up using listeners, a new concept.
+
+ >>> org_catalog.addListener(index2)
+ >>> list(org_catalog.iterListeners()) == [index2]
+ True
+
+Now the role_catalog should be able to answer the same questions as the old
+single catalog approach.
+
+ >>> t = role_catalog.tokenizeQuery
+ >>> list(role_catalog.findValueTokens(
+ ... 'role_id', t({'organization': orgs['Zookd Nbd'],
+ ... 'principal_id': 'abe'})))
+ ['publisher', 'user manager', 'writer']
+
+ >>> list(role_catalog.findValueTokens(
+ ... 'role_id', t({'organization': orgs['Zookd Nbd'],
+ ... 'principal_id': 'ophelia'})))
+ ['publisher', 'reviewer', 'writer']
+
+We can also make changes to both catalogs and the search indexes are
+maintained.
+
+ >>> list(role_catalog.findValueTokens(
+ ... 'role_id', t({'organization': orgs['Y3L4 Proj'],
+ ... 'principal_id': 'abe'})))
+ []
+ >>> orgs['Zookd Devs'].parts.insert(registry.getId(orgs['Ynod Devs']))
+ 1
+ >>> org_catalog.index(orgs['Zookd Devs'])
+ >>> list(role_catalog.findValueTokens(
+ ... 'role_id', t({'organization': orgs['Y3L4 Proj'],
+ ... 'principal_id': 'abe'})))
+ ['publisher', 'user manager', 'writer']
+ >>> orgs['Zookd Devs'].parts.remove(registry.getId(orgs['Ynod Devs']))
+ >>> org_catalog.index(orgs['Zookd Devs'])
+ >>> list(role_catalog.findValueTokens(
+ ... 'role_id', t({'organization': orgs['Y3L4 Proj'],
+ ... 'principal_id': 'abe'})))
+ []
+
+ >>> rels = list(role_catalog.findRelations(t(
+ ... {'principal_id': 'abe',
+ ... 'organization': orgs['Zookd Corp Management']})))
+ >>> len(rels)
+ 1
+ >>> rels[0].role_ids.insert('reviewer')
+ 1
+ >>> role_catalog.index(rels[0])
+
+ >>> res = role_catalog.findValueTokens(
+ ... 'role_id', t({'organization': orgs['Zookd Nbd'],
+ ... 'principal_id': 'abe'}))
+ >>> list(res)
+ ['publisher', 'reviewer', 'user manager', 'writer']
+
+Here we add a new organization.
+
+ >>> orgs['Zookd hOnc'] = org = Organization('Zookd hOnc')
+ >>> orgs['Zookd Devs'].parts.insert(registry.getId(org))
+ 1
+ >>> org_catalog.index(orgs['Zookd hOnc'])
+ >>> org_catalog.index(orgs['Zookd Devs'])
+
+ >>> list(role_catalog.findValueTokens(
+ ... 'role_id', t({'organization': orgs['Zookd hOnc'],
+ ... 'principal_id': 'abe'})))
+ ['publisher', 'reviewer', 'user manager', 'writer']
+
+ >>> list(role_catalog.findValueTokens(
+ ... 'role_id', t({'organization': orgs['Zookd hOnc'],
+ ... 'principal_id': 'ophelia'})))
+ ['reviewer', 'writer']
+
+Now we'll remove it.
+
+ >>> orgs['Zookd Devs'].parts.remove(registry.getId(org))
+ >>> org_catalog.index(orgs['Zookd Devs'])
+ >>> org_catalog.unindex(orgs['Zookd hOnc'])
+
+TODO make sure that intransitive copy looks the way we expect
+
+[#administrivia]_
+
+.. ......... ..
+.. Footnotes ..
+.. ......... ..
+
+.. [#ZODB] Here we will set up a ZODB instance for us to use.
+
+ >>> from ZODB.tests.util import DB
+ >>> db = DB()
+ >>> conn = db.open()
+ >>> root = conn.root()
+
+.. [#faux_intid] Here's a simple persistent keyreference. Notice that it is
+ not persistent itself: this is important for conflict resolution to be
+ able to work (which we don't show here, but we're trying to lean more
+ towards real usage for this example).
+
+ >>> class Reference(object): # see zope.app.keyreference
+ ... def __init__(self, obj):
+ ... self.object = obj
+ ... def __cmp__(self, other):
+ ... # this doesn't work during conflict resolution. See
+ ... # zope.app.keyreference.persistent, 3.5 release, for current
+ ... # best practice.
+ ... if not isinstance(other, Reference):
+ ... raise ValueError('can only compare with Reference objects')
+ ... if self.object._p_jar is None or other.object._p_jar is None:
+ ... raise ValueError(
+ ... 'can only compare when both objects have connections')
+ ... return cmp(
+ ... (self.object._p_jar.db().database_name, self.object._p_oid),
+ ... (other.object._p_jar.db().database_name, other.object._p_oid),
+ ... )
+ ...
+
+ Here's a simple integer identifier tool.
+
+ >>> import persistent
+ >>> import BTrees
+ >>> class Registry(persistent.Persistent): # see zope.app.intid
+ ... def __init__(self, family=BTrees.family32):
+ ... self.family = family
+ ... self.ids = self.family.IO.BTree()
+ ... self.refs = self.family.OI.BTree()
+ ... def getId(self, obj):
+ ... if not isinstance(obj, persistent.Persistent):
+ ... raise ValueError('not a persistent object', obj)
+ ... if obj._p_jar is None:
+ ... self._p_jar.add(obj)
+ ... ref = Reference(obj)
+ ... id = self.refs.get(ref)
+ ... if id is None:
+ ... # naive for conflict resolution; see zope.app.intid
+ ... if self.ids:
+ ... id = self.ids.maxKey() + 1
+ ... else:
+ ... id = self.family.minint
+ ... self.ids[id] = ref
+ ... self.refs[ref] = id
+ ... return id
+ ... def __contains__(self, obj):
+ ... if (not isinstance(obj, persistent.Persistent) or
+ ... obj._p_oid is None):
+ ... return False
+ ... return Reference(obj) in self.refs
+ ... def getObject(self, id, default=None):
+ ... res = self.ids.get(id, None)
+ ... if res is None:
+ ... return default
+ ... else:
+ ... return res.object
+ ... def remove(self, r):
+ ... if isinstance(r, (int, long)):
+ ... self.refs.pop(self.ids.pop(r))
+ ... elif (not isinstance(r, persistent.Persistent) or
+ ... r._p_oid is None):
+ ... raise LookupError(r)
+ ... else:
+ ... self.ids.pop(self.refs.pop(Reference(r)))
+ ...
+ >>> registry = root['registry'] = Registry()
+
+ >>> import transaction
+ >>> transaction.commit()
+
+.. [#verifyObjectTransitive] The TransposingTransitive indexes provide
+ ISearchIndex.
+
+ >>> from zope.interface.verify import verifyObject
+ >>> import zc.relation.interfaces
+ >>> index = list(catalog.iterSearchIndexes())[0]
+ >>> verifyObject(zc.relation.interfaces.ISearchIndex, index)
+ True
+
+.. [#silliness] In _2001: A Space Odyssey_, many people believe the name HAL
+ was chosen because it was ROT25 of IBM.... I cheat a bit sometimes and
+ use ROT1 because the result sounds better.
+
+.. [#show_remaining_token_methods] For what it's worth, here are some small
+ examples of the remaining token-related methods.
+
+ These two are the singular versions of `tokenizeRelations` and
+ `resolveRelationTokens`.
+
+ `tokenizeRelation` returns a token for the given relation.
+
+ >>> catalog.tokenizeRelation(orgs['Zookd Corp Management']) == (
+ ... registry.getId(orgs['Zookd Corp Management']))
+ True
+
+ `resolveRelationToken` returns a relation for the given token.
+
+ >>> catalog.resolveRelationToken(registry.getId(
+ ... orgs['Zookd Corp Management'])) is orgs['Zookd Corp Management']
+ True
+
+ The "values" ones are a bit lame to show now, since the only value
+ we have right now is not tokenized but used straight up. But here
+ goes, showing some fascinating no-ops.
+
+ `tokenizeValues`, returns an iterable of tokens for the values of
+ the given index name.
+
+ >>> list(catalog.tokenizeValues((1,2,3), 'part'))
+ [1, 2, 3]
+
+ `resolveValueTokens` returns an iterable of values for the tokens of
+ the given index name.
+
+ >>> list(catalog.resolveValueTokens((1,2,3), 'part'))
+ [1, 2, 3]
+
+.. [#real_value_tokens] We can also show the values token methods more
+ sanely now.
+
+ >>> original = sorted((orgs['Zookd Devs'], orgs['Ynod SAs']))
+ >>> tokens = list(catalog.tokenizeValues(original, 'organization'))
+ >>> original == sorted(catalog.resolveValueTokens(tokens, 'organization'))
+ True
+
+.. [#verifyObjectIntransitive] The Intransitive search index provides
+ ISearchIndex and IListener.
+
+ >>> from zope.interface.verify import verifyObject
+ >>> import zc.relation.interfaces
+ >>> verifyObject(zc.relation.interfaces.ISearchIndex, index)
+ True
+ >>> verifyObject(zc.relation.interfaces.IListener, index)
+ True
+
+.. [#compare_copy] Before we modify them, let's look at the copy we made.
+ The copy should currently behave identically to the original.
+
+ >>> len(org_catalog)
+ 38
+ >>> len(role_catalog)
+ 38
+ >>> indexed = list(org_catalog)
+ >>> len(indexed)
+ 38
+ >>> orgs['Zookd Devs'] in indexed
+ True
+ >>> for r in indexed:
+ ... if r not in role_catalog:
+ ... print 'bad'
+ ... break
+ ... else:
+ ... print 'good'
+ ...
+ good
+ >>> org_names = set(dir(org_catalog))
+ >>> role_names = set(dir(role_catalog))
+ >>> org_names - role_names
+ set([])
+ >>> role_names - org_names
+ set(['org_catalog'])
+
+ >>> def checkYnodDevsParts(catalog):
+ ... res = sorted(catalog.findRelations(t({None: orgs['Ynod Devs']})))
+ ... if res != [
+ ... orgs["Bet Proj"], orgs["Y3L4 Proj"], orgs["Ynod Devs"]]:
+ ... print "bad", res
+ ...
+ >>> checkYnodDevsParts(org_catalog)
+ >>> checkYnodDevsParts(role_catalog)
+
+ >>> def checkOpheliaRoles(catalog):
+ ... res = sorted(catalog.findRelations({'principal_id': 'ophelia'}))
+ ... if repr(res) != (
+ ... "[<Roles instance (ophelia has publisher in Zookd Nbd)>, " +
+ ... "<Roles instance (ophelia has reviewer in Zookd Devs)>, " +
+ ... "<Roles instance (ophelia has writer in " +
+ ... "Zookd Corp Management)>]"):
+ ... print "bad", res
+ ...
+ >>> checkOpheliaRoles(org_catalog)
+ >>> checkOpheliaRoles(role_catalog)
+
+ >>> def checkOpheliaWriterOrganizations(catalog):
+ ... res = sorted(catalog.findRelations({None: zc.relation.catalog.Any(
+ ... catalog.findValueTokens(
+ ... 'organization', {'principal_id': 'ophelia',
+ ... 'role_id': 'writer'}))}))
+ ... if repr(res) != (
+ ... '[<Organization instance "Ynod Zookd Task Force">, ' +
+ ... '<Organization instance "Zookd Admins">, ' +
+ ... '<Organization instance "Zookd Corp Management">, ' +
+ ... '<Organization instance "Zookd Devs">, ' +
+ ... '<Organization instance "Zookd Nbd">, ' +
+ ... '<Organization instance "Zookd SAs">, ' +
+ ... '<Organization instance "Zookd hOgnmd">]'):
+ ... print "bad", res
+ ...
+ >>> checkOpheliaWriterOrganizations(org_catalog)
+ >>> checkOpheliaWriterOrganizations(role_catalog)
+
+ >>> def checkPrincipalsWithRolesInZookdDevs(catalog):
+ ... org_id = registry.getId(orgs['Zookd Devs'])
+ ... res = sorted(catalog.findValueTokens(
+ ... 'principal_id',
+ ... {'organization': zc.relation.catalog.any(
+ ... org_id, *catalog.findRelationTokens({'part': org_id}))}))
+ ... if res != ['abe', 'ignas', 'karyn', 'lettie', 'nancy', 'ophelia']:
+ ... print "bad", res
+ ...
+ >>> checkPrincipalsWithRolesInZookdDevs(org_catalog)
+ >>> checkPrincipalsWithRolesInZookdDevs(role_catalog)
+
+ >>> def checkOpheliaRolesInZookdNbd(catalog):
+ ... res = sorted(catalog.findValueTokens(
+ ... 'role_id', {
+ ... 'organization': registry.getId(orgs['Zookd Nbd']),
+ ... 'principal_id': 'ophelia'}))
+ ... if res != ['publisher', 'reviewer', 'writer']:
+ ... print "bad", res
+ ...
+ >>> checkOpheliaRolesInZookdNbd(org_catalog)
+ >>> checkOpheliaRolesInZookdNbd(role_catalog)
+
+ >>> def checkAbeRolesInZookdNbd(catalog):
+ ... res = sorted(catalog.findValueTokens(
+ ... 'role_id', {
+ ... 'organization': registry.getId(orgs['Zookd Nbd']),
+ ... 'principal_id': 'abe'}))
+ ... if res != ['publisher', 'user manager', 'writer']:
+ ... print "bad", res
+ ...
+ >>> checkAbeRolesInZookdNbd(org_catalog)
+ >>> checkAbeRolesInZookdNbd(role_catalog)
+
+.. [#removeDefaultQueryFactoryExceptions] You get errors by removing query
+ factories that are not registered.
+
+ >>> org_catalog.removeDefaultQueryFactory(factory2) # doctest: +ELLIPSIS
+ Traceback (most recent call last):
+ ...
+ LookupError: ('factory not found', <function factory2 at ...>)
+
+ >>> org_catalog.removeDefaultQueryFactory(None) # doctest: +ELLIPSIS
+ Traceback (most recent call last):
+ ...
+ LookupError: ('factory not found', None)
+
+.. [#copy_unchanged] Changes to one copy should not affect the other. That
+ means the role_catalog should still work as before.
+
+ >>> len(org_catalog)
+ 13
+ >>> len(list(org_catalog))
+ 13
+
+ >>> len(role_catalog)
+ 38
+ >>> indexed = list(role_catalog)
+ >>> len(indexed)
+ 38
+ >>> orgs['Zookd Devs'] in indexed
+ True
+ >>> orgs['Zookd Devs'] in role_catalog
+ True
+
+ >>> checkYnodDevsParts(role_catalog)
+ >>> checkOpheliaRoles(role_catalog)
+ >>> checkOpheliaWriterOrganizations(role_catalog)
+ >>> checkPrincipalsWithRolesInZookdDevs(role_catalog)
+ >>> checkOpheliaRolesInZookdNbd(role_catalog)
+ >>> checkAbeRolesInZookdNbd(role_catalog)
+
+.. [#administrivia]
+
+ You can add listeners multiple times.
+
+ >>> org_catalog.addListener(index2)
+ >>> list(org_catalog.iterListeners()) == [index2, index2]
+ True
+
+ Now we will remove the listeners, to show we can.
+
+ >>> org_catalog.removeListener(index2)
+ >>> org_catalog.removeListener(index2)
+ >>> org_catalog.removeListener(index2)
+ ... # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
+ Traceback (most recent call last):
+ ...
+ LookupError: ('listener not found',
+ <zc.relation.searchindex.Intransitive object at ...>)
+ >>> org_catalog.removeListener(None)
+ ... # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
+ Traceback (most recent call last):
+ ...
+ LookupError: ('listener not found', None)
+
+ Here's the same for removing a search index we don't have
+
+ >>> org_catalog.removeSearchIndex(index2)
+ ... # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
+ Traceback (most recent call last):
+ ...
+ LookupError: ('index not found',
+ <zc.relation.searchindex.Intransitive object at ...>)
Property changes on: Sandbox/poster/zc.relation/trunk/src/zc/relation/tokens.txt
___________________________________________________________________
Name: svn:eol-style
+ native
More information about the Checkins
mailing list