[Checkins] SVN: zc.catalog/trunk/src/zc/catalog/ update btree
family support for the index classes, including tests for
Fred L. Drake, Jr.
fdrake at gmail.com
Fri Apr 27 14:10:18 EDT 2007
Log message for revision 74865:
update btree family support for the index classes, including tests for
support/update of legacy structures
Changed:
U zc.catalog/trunk/src/zc/catalog/index.py
A zc.catalog/trunk/src/zc/catalog/legacy.txt
U zc.catalog/trunk/src/zc/catalog/tests.py
-=-
Modified: zc.catalog/trunk/src/zc/catalog/index.py
===================================================================
--- zc.catalog/trunk/src/zc/catalog/index.py 2007-04-27 17:44:43 UTC (rev 74864)
+++ zc.catalog/trunk/src/zc/catalog/index.py 2007-04-27 18:10:18 UTC (rev 74865)
@@ -18,8 +18,9 @@
import sys
import datetime
import pytz.reference
+import BTrees
import persistent
-from BTrees import IFBTree, OOBTree, IOBTree, Length
+from BTrees import Length
from BTrees.Interfaces import IMerge
from zope import component, interface
@@ -32,6 +33,31 @@
from zc.catalog.i18n import _
+class FamilyProperty(object):
+
+ __name__ = "family"
+
+ def __get__(self, instance, type=None):
+ if instance is None:
+ return self
+ d = instance.__dict__
+ if "btreemodule" in d:
+ iftype = d["btreemodule"].split(".")[-1][:2]
+ if iftype == "IF":
+ d["family"] = BTrees.family32
+ elif iftype == "LF":
+ d["family"] = BTrees.family64
+ else:
+ raise ValueError("can't determine btree family based on"
+ " btreemodule of %r" % (iftype,))
+ del d["btreemodule"]
+ if "IOBTree" in d:
+ del d["IOBTree"]
+ else:
+ d["family"] = BTrees.family32
+ return d["family"]
+
+
class AbstractIndex(persistent.Persistent):
interface.implements(zope.index.interfaces.IInjection,
@@ -40,26 +66,31 @@
zc.catalog.interfaces.IIndexValues,
)
- btreemodule = 'BTrees.IFBTree'
- IOBTree = IOBTree.IOBTree
+ family = FamilyProperty()
- def __init__(self):
- self.btreemodule = component.queryUtility(
- component.interfaces.IFactory,
- name="IFTreeSet",
- default=IFBTree.IFTreeSet)().__class__.__module__
- self.IOBTree = component.queryUtility(
- zope.component.interfaces.IFactory,
- name='IOBTree', default=IOBTree.IOBTree)().__class__
+ def __init__(self, family=None):
+ if family is not None:
+ self.family = family
self.clear()
+ # These three are deprecated (they were never interface), but can
+ # all be computed from the family attribute:
+
@property
+ def btreemodule(self):
+ return self.family.IF.__name__
+
+ @property
def BTreeAPI(self):
- return sys.modules[self.btreemodule]
+ return self.family.IF
+ @property
+ def IOBTree(self):
+ return self.family.IO.BTree
+
def clear(self):
- self.values_to_documents = OOBTree.OOBTree()
- self.documents_to_values = self.IOBTree()
+ self.values_to_documents = self.family.OO.BTree()
+ self.documents_to_values = self.family.IO.BTree()
self.documentCount = Length.Length(0)
self.wordCount = Length.Length(0)
@@ -114,7 +145,7 @@
values_to_documents = self.values_to_documents
docs = values_to_documents.get(added)
if docs is None:
- values_to_documents[added] = self.BTreeAPI.TreeSet((doc_id,))
+ values_to_documents[added] = self.family.IF.TreeSet((doc_id,))
self.wordCount.change(1)
else:
docs.insert(doc_id)
@@ -156,23 +187,23 @@
if query_type is None:
res = None
elif query_type == 'any_of':
- res = self.BTreeAPI.multiunion(
+ res = self.family.IF.multiunion(
[s for s in (values_to_documents.get(v) for v in query)
if s is not None])
elif query_type == 'any':
if query is None:
- res = self.BTreeAPI.Set(self.ids())
+ res = self.family.IF.Set(self.ids())
else:
assert zc.catalog.interfaces.IExtent.providedBy(query)
- res = query & self.BTreeAPI.Set(self.ids())
+ res = query & self.family.IF.Set(self.ids())
elif query_type == 'between':
- res = self.BTreeAPI.multiunion(
+ res = self.family.IF.multiunion(
[s for s in (values_to_documents.get(v) for v in
values_to_documents.keys(*query))
if s is not None])
elif query_type == 'none':
assert zc.catalog.interfaces.IExtent.providedBy(query)
- res = query - self.BTreeAPI.Set(self.ids())
+ res = query - self.family.IF.Set(self.ids())
else:
raise ValueError(
"unknown query type", query_type)
@@ -203,13 +234,13 @@
for v in added:
docs = values_to_documents.get(v)
if docs is None:
- values_to_documents[v] = self.BTreeAPI.TreeSet((doc_id,))
+ values_to_documents[v] = self.family.IF.TreeSet((doc_id,))
self.wordCount.change(1)
else:
docs.insert(doc_id)
def index_doc(self, doc_id, value):
- new = OOBTree.OOTreeSet(v for v in value if v is not None)
+ new = self.family.OO.TreeSet(v for v in value if v is not None)
if not new:
self.unindex_doc(doc_id)
else:
@@ -221,8 +252,8 @@
self.documentCount.change(1)
self._add_values(doc_id, new)
else:
- removed = OOBTree.difference(old, new)
- added = OOBTree.difference(new, old)
+ removed = self.family.OO.difference(old, new)
+ added = self.family.OO.difference(new, old)
for v in removed:
old.remove(v)
docs = values_to_documents.get(v)
@@ -253,20 +284,20 @@
if query_type is None:
res = None
elif query_type == 'any_of':
- res = self.BTreeAPI.Bucket()
+ res = self.family.IF.Bucket()
for v in query:
- _, res = self.BTreeAPI.weightedUnion(
+ _, res = self.family.IF.weightedUnion(
res, values_to_documents.get(v))
elif query_type == 'any':
if query is None:
- res = self.BTreeAPI.Set(self.ids())
+ res = self.family.IF.Set(self.ids())
else:
assert zc.catalog.interfaces.IExtent.providedBy(query)
- res = query & self.BTreeAPI.Set(self.ids())
+ res = query & self.family.IF.Set(self.ids())
elif query_type == 'all_of':
res = None
values = iter(query)
- empty = self.BTreeAPI.TreeSet()
+ empty = self.family.IF.TreeSet()
try:
res = values_to_documents.get(values.next(), empty)
except StopIteration:
@@ -276,16 +307,16 @@
v = values.next()
except StopIteration:
break
- res = self.BTreeAPI.intersection(
+ res = self.family.IF.intersection(
res, values_to_documents.get(v, empty))
elif query_type == 'between':
- res = self.BTreeAPI.Bucket()
+ res = self.family.IF.Bucket()
for v in values_to_documents.keys(*query):
- _, res = self.BTreeAPI.weightedUnion(res,
- values_to_documents.get(v))
+ _, res = self.family.IF.weightedUnion(
+ res, values_to_documents.get(v))
elif query_type == 'none':
assert zc.catalog.interfaces.IExtent.providedBy(query)
- res = query - self.BTreeAPI.Set(self.ids())
+ res = query - self.family.IF.Set(self.ids())
else:
raise ValueError(
"unknown query type", query_type)
Added: zc.catalog/trunk/src/zc/catalog/legacy.txt
===================================================================
--- zc.catalog/trunk/src/zc/catalog/legacy.txt 2007-04-27 17:44:43 UTC (rev 74864)
+++ zc.catalog/trunk/src/zc/catalog/legacy.txt 2007-04-27 18:10:18 UTC (rev 74865)
@@ -0,0 +1,148 @@
+Support for legacy data
+-----------------------
+
+Prior to the introduction of btree "families" and the
+``BTrees.Interfaces.IBTreeFamily`` interface, the indexes defined by
+the ``zc.catalog.index`` module used the instance attributes
+``btreemodule`` and ``IOBTree``, initialized in the constructor, and
+the ``BTreeAPI`` property. These are replaced by the ``family``
+attribute in the current implementation.
+
+This is a white-box test that verifies that the supported values in
+existing data structures (loaded from pickles) can be used effectively
+with the current implementation.
+
+There are two supported sets of values; one for 32-bit btrees::
+
+ >>> import BTrees.IOBTree
+
+ >>> legacy32 = {
+ ... "btreemodule": "BTrees.IFBTree",
+ ... "IOBTree": BTrees.IOBTree.IOBTree,
+ ... }
+
+and another for 64-bit btrees::
+
+ >>> import BTrees.LOBTree
+
+ >>> legacy64 = {
+ ... "btreemodule": "BTrees.LFBTree",
+ ... "IOBTree": BTrees.LOBTree.LOBTree,
+ ... }
+
+In each case, actual legacy structures will also include index
+structures that match the right integer size::
+
+ >>> import BTrees.OOBTree
+ >>> import BTrees.Length
+
+ >>> legacy32["values_to_documents"] = BTrees.OOBTree.OOBTree()
+ >>> legacy32["documents_to_values"] = BTrees.IOBTree.IOBTree()
+ >>> legacy32["documentCount"] = BTrees.Length.Length(0)
+ >>> legacy32["wordCount"] = BTrees.Length.Length(0)
+
+ >>> legacy64["values_to_documents"] = BTrees.OOBTree.OOBTree()
+ >>> legacy64["documents_to_values"] = BTrees.LOBTree.LOBTree()
+ >>> legacy64["documentCount"] = BTrees.Length.Length(0)
+ >>> legacy64["wordCount"] = BTrees.Length.Length(0)
+
+What we want to do is verify that the ``family`` attribute is properly
+computed for instances loaded from legacy data, and ensure that the
+structure is updated cleanly without providing cause for a read-only
+transaction to become a write-transaction. We'll need to create
+instances that conform to the old data structures, pickle them, and
+show that unpickling them produces instances that use the correct
+families.
+
+Let's create new instances, and force the internal data to match the
+old structures::
+
+ >>> import pickle
+ >>> import zc.catalog.index
+
+ >>> vi32 = zc.catalog.index.ValueIndex()
+ >>> vi32.__dict__ = legacy32.copy()
+ >>> legacy32_pickle = pickle.dumps(vi32)
+
+ >>> vi64 = zc.catalog.index.ValueIndex()
+ >>> vi64.__dict__ = legacy64.copy()
+ >>> legacy64_pickle = pickle.dumps(vi64)
+
+Now, let's unpickle these structures and verify the structures. We'll
+start with the 32-bit variety::
+
+ >>> vi32 = pickle.loads(legacy32_pickle)
+
+ >>> vi32.__dict__["btreemodule"]
+ 'BTrees.IFBTree'
+ >>> vi32.__dict__["IOBTree"]
+ <type 'BTrees.IOBTree.IOBTree'>
+
+ >>> "family" in vi32.__dict__
+ False
+
+ >>> vi32._p_changed
+ False
+
+The ``family`` property returns the ``BTrees.family32`` singleton::
+
+ >>> vi32.family is BTrees.family32
+ True
+
+Once accessed, the legacy values have been cleaned out from the
+instance dictionary::
+
+ >>> "btreemodule" in vi32.__dict__
+ False
+ >>> "IOBTree" in vi32.__dict__
+ False
+
+Accessing these attributes as attributes provides the proper values
+anyway::
+
+ >>> vi32.btreemodule
+ 'BTrees.IFBTree'
+ >>> vi32.IOBTree
+ <type 'BTrees.IOBTree.IOBTree'>
+ >>> vi32.BTreeAPI
+ <module 'BTrees.IFBTree' from ...>
+
+Even though the instance dictionary has been cleaned up, the change
+flag hasn't been set. This is handled this way to avoid turning a
+read-only transaction into a write-transaction::
+
+ >>> vi32._p_changed
+ False
+
+The 64-bit variation provides equivalent behavior::
+
+ >>> vi64 = pickle.loads(legacy64_pickle)
+
+ >>> vi64.__dict__["btreemodule"]
+ 'BTrees.LFBTree'
+ >>> vi64.__dict__["IOBTree"]
+ <type 'BTrees.LOBTree.LOBTree'>
+
+ >>> "family" in vi64.__dict__
+ False
+
+ >>> vi64._p_changed
+ False
+
+ >>> vi64.family is BTrees.family64
+ True
+
+ >>> "btreemodule" in vi64.__dict__
+ False
+ >>> "IOBTree" in vi64.__dict__
+ False
+
+ >>> vi64.btreemodule
+ 'BTrees.LFBTree'
+ >>> vi64.IOBTree
+ <type 'BTrees.LOBTree.LOBTree'>
+ >>> vi64.BTreeAPI
+ <module 'BTrees.LFBTree' from ...>
+
+ >>> vi64._p_changed
+ False
Property changes on: zc.catalog/trunk/src/zc/catalog/legacy.txt
___________________________________________________________________
Name: svn:mime-type
+ text/plain
Name: svn:eol-style
+ native
Modified: zc.catalog/trunk/src/zc/catalog/tests.py
===================================================================
--- zc.catalog/trunk/src/zc/catalog/tests.py 2007-04-27 17:44:43 UTC (rev 74864)
+++ zc.catalog/trunk/src/zc/catalog/tests.py 2007-04-27 18:10:18 UTC (rev 74865)
@@ -87,6 +87,9 @@
tearDown=tearDown),
doctest.DocFileSuite('callablewrapper.txt', setUp=setUp64bit,
tearDown=tearDown),
+
+ # legacy data support
+ doctest.DocFileSuite('legacy.txt', optionflags=doctest.ELLIPSIS),
))
import zc.catalog.stemmer
if not zc.catalog.stemmer.broken:
More information about the Checkins
mailing list