[Checkins] SVN: zc.catalog/trunk/src/zc/catalog/ add support for
extents that determine their own initial population
Fred L. Drake, Jr.
fdrake at gmail.com
Mon Dec 18 14:30:16 EST 2006
Log message for revision 71596:
add support for extents that determine their own initial population
Changed:
U zc.catalog/trunk/src/zc/catalog/extentcatalog.py
U zc.catalog/trunk/src/zc/catalog/extentcatalog.txt
U zc.catalog/trunk/src/zc/catalog/interfaces.py
-=-
Modified: zc.catalog/trunk/src/zc/catalog/extentcatalog.py
===================================================================
--- zc.catalog/trunk/src/zc/catalog/extentcatalog.py 2006-12-18 19:26:07 UTC (rev 71595)
+++ zc.catalog/trunk/src/zc/catalog/extentcatalog.py 2006-12-18 19:30:16 UTC (rev 71596)
@@ -188,16 +188,45 @@
super(Catalog, self).updateIndex(index)
else:
uidutil = zapi.getUtility(IIntIds)
- for uid in uidutil:
- obj = uidutil.getObject(uid)
- try:
- self.extent.add(uid, obj)
- except ValueError:
- self.unindex_doc(uid)
- else:
+
+ if interfaces.ISelfPopulatingExtent.providedBy(self.extent):
+ if not self.extent.populated:
+ self.extent.populate()
+ assert self.extent.populated
+
+ for uid in self.extent:
+ obj = uidutil.getObject(uid)
index.index_doc(uid, obj)
+ else:
+ for uid in uidutil:
+ obj = uidutil.getObject(uid)
+ try:
+ self.extent.add(uid, obj)
+ except ValueError:
+ self.unindex_doc(uid)
+ else:
+ index.index_doc(uid, obj)
+
def updateIndexes(self):
uidutil = zapi.getUtility(IIntIds)
- for uid in uidutil:
- self.index_doc(uid, uidutil.getObject(uid))
+
+ if interfaces.ISelfPopulatingExtent.providedBy(self.extent):
+ if not self.extent.populated:
+ self.extent.populate()
+ assert self.extent.populated
+
+ site = zope.app.component.hooks.getSite()
+ registered = True
+ if not self.queue:
+ registered = self._register()
+
+ for uid in self.extent:
+ self.queue[uid] = (uidutil.getObject(uid), site)
+
+ if not registered:
+ self._process()
+
+ else:
+ for uid in uidutil:
+ self.index_doc(uid, uidutil.getObject(uid))
Modified: zc.catalog/trunk/src/zc/catalog/extentcatalog.txt
===================================================================
--- zc.catalog/trunk/src/zc/catalog/extentcatalog.txt 2006-12-18 19:26:07 UTC (rev 71595)
+++ zc.catalog/trunk/src/zc/catalog/extentcatalog.txt 2006-12-18 19:30:16 UTC (rev 71596)
@@ -75,9 +75,9 @@
>>> for c in content.values():
... catalog.index_doc(intid.register(c), c)
...
- >>> matches = list(sorted(
- ... [id for id, ob in content.items() if filter(extent, id, ob)]))
- >>> list(sorted(extent)) == list(sorted(index.uids)) == matches
+ >>> matches = sorted(
+ ... [id for id, ob in content.items() if filter(extent, id, ob)])
+ >>> sorted(extent) == sorted(index.uids) == matches
True
If a content object is indexed that used to match the filter but no longer
@@ -90,7 +90,7 @@
>>> 5 in catalog.extent
False
>>> matches.remove(5)
- >>> list(sorted(extent)) == list(sorted(index.uids)) == matches
+ >>> sorted(extent) == sorted(index.uids) == matches
True
Unindexing an object that is in the catalog should simply remove it from the
@@ -106,7 +106,7 @@
>>> 99 in catalog['index'].uids
False
>>> matches.remove(99)
- >>> list(sorted(extent)) == list(sorted(index.uids)) == matches
+ >>> sorted(extent) == sorted(index.uids) == matches
True
And similarly, unindexing an object that is not in the catalog should be a
@@ -117,7 +117,7 @@
>>> catalog.unindex_doc(0)
>>> 0 in catalog.extent
False
- >>> list(sorted(extent)) == list(sorted(index.uids)) == matches
+ >>> sorted(extent) == sorted(index.uids) == matches
True
Clearing the catalog clears both the extent and the contained indexes.
@@ -130,13 +130,13 @@
>>> catalog.updateIndexes()
>>> matches.append(99)
- >>> list(sorted(extent)) == list(sorted(index.uids)) == matches
+ >>> sorted(extent) == sorted(index.uids) == matches
True
>>> index2 = DummyIndex()
>>> catalog['index2'] = index2
>>> index.uids.remove(1) # to confirm that only index 2 is touched
>>> catalog.updateIndex(index2)
- >>> list(sorted(extent)) == list(sorted(index2.uids)) == matches
+ >>> sorted(extent) == sorted(index2.uids) == matches
True
>>> 1 in index.uids
False
@@ -162,7 +162,7 @@
>>> 1 in index2.uids
False
>>> matches.remove(1)
- >>> matches == list(sorted(catalog.extent))
+ >>> matches == sorted(catalog.extent)
True
The extent itself provides a number of merging features to allow its values to
@@ -178,32 +178,152 @@
>>> alt_set = IFBTree.IFTreeSet()
>>> alt_set.update(range(0, 166, 33)) # return value is unimportant here
6
- >>> list(sorted(alt_set))
+ >>> sorted(alt_set)
[0, 33, 66, 99, 132, 165]
- >>> list(sorted(catalog.extent & alt_set))
+ >>> sorted(catalog.extent & alt_set)
[33, 99]
- >>> list(sorted(alt_set & catalog.extent))
+ >>> sorted(alt_set & catalog.extent)
[33, 99]
- >>> list(sorted(catalog.extent.intersection(alt_set)))
+ >>> sorted(catalog.extent.intersection(alt_set))
[33, 99]
>>> union_matches = sets.Set(matches)
>>> union_matches.union_update(alt_set)
- >>> union_matches = list(sorted(union_matches))
- >>> list(sorted(alt_set | catalog.extent)) == union_matches
+ >>> union_matches = sorted(union_matches)
+ >>> sorted(alt_set | catalog.extent) == union_matches
True
- >>> list(sorted(catalog.extent | alt_set)) == union_matches
+ >>> sorted(catalog.extent | alt_set) == union_matches
True
- >>> list(sorted(catalog.extent.union(alt_set))) == union_matches
+ >>> sorted(catalog.extent.union(alt_set)) == union_matches
True
- >>> list(sorted(alt_set - catalog.extent))
+ >>> sorted(alt_set - catalog.extent)
[0, 66, 132, 165]
- >>> list(sorted(catalog.extent.rdifference(alt_set)))
+ >>> sorted(catalog.extent.rdifference(alt_set))
[0, 66, 132, 165]
>>> matches.remove(33)
>>> matches.remove(99)
- >>> list(sorted(catalog.extent - alt_set)) == matches
+ >>> sorted(catalog.extent - alt_set) == matches
True
- >>> list(sorted(catalog.extent.difference(alt_set))) == matches
+ >>> sorted(catalog.extent.difference(alt_set)) == matches
True
+
+
+Self-populating extents
+-----------------------
+
+An extent use the initialize an extent catalog may know how to
+populate itself; this is especially useful if the catalog can be
+initialized with fewer items than those available in the IIntIds
+utility that are also within the nearest Zope 3 site (the policy coded
+in the basic Zope 3 catalog).
+
+The such an extent must implement the `ISelfPopulatingExtent`
+interface, which requires two attributes. Let's use the
+`FilterExtent` class as a base for implementing such an extent, with a
+method that selects object 42 (created and registered above)::
+
+ >>> class PopulatingExtent(extentcatalog.FilterExtent):
+ ...
+ ... interface.implements(interfaces.ISelfPopulatingExtent)
+ ...
+ ... populated = False
+ ...
+ ... def populate(self):
+ ... if self.populated:
+ ... return
+ ... self.add(42, content[42])
+ ... self.populated = True
+
+Creating a catalog based on this extent ignores objects in the
+database already::
+
+ >>> def accept_any(extent, uid, ob):
+ ... return True
+
+ >>> extent = PopulatingExtent(accept_any)
+ >>> catalog = extentcatalog.Catalog(extent)
+ >>> index = DummyIndex()
+ >>> catalog['index'] = index
+
+At this point, our extent remains unpopulated::
+
+ >>> extent.populated
+ False
+
+Iterating over the extent does not cause it to be automatically
+populated::
+
+ >>> list(extent)
+ []
+
+Causing our new index to be filled will cause the `populate()` method
+to be called, setting the `populate` flag as a side-effect::
+
+ >>> catalog.updateIndex(index)
+ >>> extent.populated
+ True
+
+ >>> list(extent)
+ [42]
+
+The index has been updated with the documents identified by the
+extent::
+
+ >>> index.uids
+ Set([42])
+
+Updating the same index repeatedly will continue to use the extent as
+the source of documents to include::
+
+ >>> catalog.updateIndex(index)
+
+ >>> list(extent)
+ [42]
+ >>> index.uids
+ Set([42])
+
+The `updateIndexes()` method has a similar behavior. If we add an
+additional index to the catalog, we see that it indexes only those
+objects from the extent::
+
+ >>> index2 = DummyIndex()
+ >>> catalog['index2'] = index2
+
+ >>> catalog.updateIndexes()
+
+ >>> list(extent)
+ [42]
+ >>> index.uids
+ Set([42])
+ >>> index2.uids
+ Set([42])
+
+When we have fresh catalog and extent (not yet populated), we see that
+`updateIndexes()` will cause the extent to be populated::
+
+ >>> extent = PopulatingExtent(accept_any)
+ >>> catalog = extentcatalog.Catalog(extent)
+ >>> index1 = DummyIndex()
+ >>> index2 = DummyIndex()
+ >>> catalog['index1'] = index1
+ >>> catalog['index2'] = index2
+
+ >>> extent.populated
+ False
+
+ >>> catalog.updateIndexes()
+
+ >>> extent.populated
+ True
+
+ >>> list(extent)
+ [42]
+ >>> index.uids
+ Set([42])
+ >>> index2.uids
+ Set([42])
+
+
+Let's clean up behind ourselves::
+
>>> from zope.app.testing import ztapi
>>> ztapi.unprovideUtility(zope.app.intid.interfaces.IIntIds)
Modified: zc.catalog/trunk/src/zc/catalog/interfaces.py
===================================================================
--- zc.catalog/trunk/src/zc/catalog/interfaces.py 2006-12-18 19:26:07 UTC (rev 71595)
+++ zc.catalog/trunk/src/zc/catalog/interfaces.py 2006-12-18 19:30:16 UTC (rev 71596)
@@ -94,6 +94,29 @@
associated obj and should return a boolean True (is member of extent)
or False (is not member of extent).""")
+
+class ISelfPopulatingExtent(IExtent):
+ """An extent that knows how to create it's own initial population."""
+
+ populated = schema.Bool(
+ title=_("Populated"),
+ description=_(
+ "Flag indicating whether self-population has been performed."),
+ readonly=True,
+ )
+
+ def populate():
+ """Populate the extent based on the current content of the database.
+
+ After a successful call, `populated` will be True. Unsuccessful calls
+ must raise exceptions.
+
+ If `populated` is true when called, this is a no-op. After the
+ initial population, updates should be maintained via other mechanisms.
+
+ """
+
+
class IExtentCatalog(interface.Interface):
"""A catalog of only items within an extent.
More information about the Checkins
mailing list