[Checkins] SVN: zc.catalog/trunk/src/zc/catalog/ update btree family support for the index classes, including tests for

Fred L. Drake, Jr. fdrake at gmail.com
Fri Apr 27 14:10:18 EDT 2007


Log message for revision 74865:
  update btree family support for the index classes, including tests for
  support/update of legacy structures
  

Changed:
  U   zc.catalog/trunk/src/zc/catalog/index.py
  A   zc.catalog/trunk/src/zc/catalog/legacy.txt
  U   zc.catalog/trunk/src/zc/catalog/tests.py

-=-
Modified: zc.catalog/trunk/src/zc/catalog/index.py
===================================================================
--- zc.catalog/trunk/src/zc/catalog/index.py	2007-04-27 17:44:43 UTC (rev 74864)
+++ zc.catalog/trunk/src/zc/catalog/index.py	2007-04-27 18:10:18 UTC (rev 74865)
@@ -18,8 +18,9 @@
 import sys
 import datetime
 import pytz.reference
+import BTrees
 import persistent
-from BTrees import IFBTree, OOBTree, IOBTree, Length
+from BTrees import Length
 from BTrees.Interfaces import IMerge
 
 from zope import component, interface
@@ -32,6 +33,31 @@
 from zc.catalog.i18n import _
 
 
+class FamilyProperty(object):
+
+    __name__ = "family"
+
+    def __get__(self, instance, type=None):
+        if instance is None:
+            return self
+        d = instance.__dict__
+        if "btreemodule" in d:
+            iftype = d["btreemodule"].split(".")[-1][:2]
+            if iftype == "IF":
+                d["family"] = BTrees.family32
+            elif iftype == "LF":
+                d["family"] = BTrees.family64
+            else:
+                raise ValueError("can't determine btree family based on"
+                                 " btreemodule of %r" % (iftype,))
+            del d["btreemodule"]
+            if "IOBTree" in d:
+                del d["IOBTree"]
+        else:
+            d["family"] = BTrees.family32
+        return d["family"]
+
+
 class AbstractIndex(persistent.Persistent):
 
     interface.implements(zope.index.interfaces.IInjection,
@@ -40,26 +66,31 @@
                          zc.catalog.interfaces.IIndexValues,
                          )
 
-    btreemodule = 'BTrees.IFBTree'
-    IOBTree = IOBTree.IOBTree
+    family = FamilyProperty()
 
-    def __init__(self):
-        self.btreemodule = component.queryUtility(
-            component.interfaces.IFactory,
-            name="IFTreeSet",
-            default=IFBTree.IFTreeSet)().__class__.__module__
-        self.IOBTree = component.queryUtility(
-            zope.component.interfaces.IFactory,
-            name='IOBTree', default=IOBTree.IOBTree)().__class__
+    def __init__(self, family=None):
+        if family is not None:
+            self.family = family
         self.clear()
 
+    # These three are deprecated (they were never interface), but can
+    # all be computed from the family attribute:
+
     @property
+    def btreemodule(self):
+        return self.family.IF.__name__
+
+    @property
     def BTreeAPI(self):
-        return sys.modules[self.btreemodule]
+        return self.family.IF
 
+    @property
+    def IOBTree(self):
+        return self.family.IO.BTree
+
     def clear(self):
-        self.values_to_documents = OOBTree.OOBTree()
-        self.documents_to_values = self.IOBTree()
+        self.values_to_documents = self.family.OO.BTree()
+        self.documents_to_values = self.family.IO.BTree()
         self.documentCount = Length.Length(0)
         self.wordCount = Length.Length(0)
 
@@ -114,7 +145,7 @@
         values_to_documents = self.values_to_documents
         docs = values_to_documents.get(added)
         if docs is None:
-            values_to_documents[added] = self.BTreeAPI.TreeSet((doc_id,))
+            values_to_documents[added] = self.family.IF.TreeSet((doc_id,))
             self.wordCount.change(1)
         else:
             docs.insert(doc_id)
@@ -156,23 +187,23 @@
         if query_type is None:
             res = None
         elif query_type == 'any_of':
-            res = self.BTreeAPI.multiunion(
+            res = self.family.IF.multiunion(
                 [s for s in (values_to_documents.get(v) for v in query)
                  if s is not None])
         elif query_type == 'any':
             if query is None:
-                res = self.BTreeAPI.Set(self.ids())
+                res = self.family.IF.Set(self.ids())
             else:
                 assert zc.catalog.interfaces.IExtent.providedBy(query)
-                res = query & self.BTreeAPI.Set(self.ids())
+                res = query & self.family.IF.Set(self.ids())
         elif query_type == 'between':
-            res = self.BTreeAPI.multiunion(
+            res = self.family.IF.multiunion(
                 [s for s in (values_to_documents.get(v) for v in
                              values_to_documents.keys(*query))
                  if s is not None])
         elif query_type == 'none':
             assert zc.catalog.interfaces.IExtent.providedBy(query)
-            res = query - self.BTreeAPI.Set(self.ids())
+            res = query - self.family.IF.Set(self.ids())
         else:
             raise ValueError(
                 "unknown query type", query_type)
@@ -203,13 +234,13 @@
         for v in added:
             docs = values_to_documents.get(v)
             if docs is None:
-                values_to_documents[v] = self.BTreeAPI.TreeSet((doc_id,))
+                values_to_documents[v] = self.family.IF.TreeSet((doc_id,))
                 self.wordCount.change(1)
             else:
                 docs.insert(doc_id)
 
     def index_doc(self, doc_id, value):
-        new = OOBTree.OOTreeSet(v for v in value if v is not None)
+        new = self.family.OO.TreeSet(v for v in value if v is not None)
         if not new:
             self.unindex_doc(doc_id)
         else:
@@ -221,8 +252,8 @@
                 self.documentCount.change(1)
                 self._add_values(doc_id, new)
             else:
-                removed = OOBTree.difference(old, new)
-                added = OOBTree.difference(new, old)
+                removed = self.family.OO.difference(old, new)
+                added = self.family.OO.difference(new, old)
                 for v in removed:
                     old.remove(v)
                     docs = values_to_documents.get(v)
@@ -253,20 +284,20 @@
         if query_type is None:
             res = None
         elif query_type == 'any_of':
-            res = self.BTreeAPI.Bucket()
+            res = self.family.IF.Bucket()
             for v in query:
-                _, res = self.BTreeAPI.weightedUnion(
+                _, res = self.family.IF.weightedUnion(
                     res, values_to_documents.get(v))
         elif query_type == 'any':
             if query is None:
-                res = self.BTreeAPI.Set(self.ids())
+                res = self.family.IF.Set(self.ids())
             else:
                 assert zc.catalog.interfaces.IExtent.providedBy(query)
-                res = query & self.BTreeAPI.Set(self.ids())
+                res = query & self.family.IF.Set(self.ids())
         elif query_type == 'all_of':
             res = None
             values = iter(query)
-            empty = self.BTreeAPI.TreeSet()
+            empty = self.family.IF.TreeSet()
             try:
                 res = values_to_documents.get(values.next(), empty)
             except StopIteration:
@@ -276,16 +307,16 @@
                     v = values.next()
                 except StopIteration:
                     break
-                res = self.BTreeAPI.intersection(
+                res = self.family.IF.intersection(
                     res, values_to_documents.get(v, empty))
         elif query_type == 'between':
-            res = self.BTreeAPI.Bucket()
+            res = self.family.IF.Bucket()
             for v in values_to_documents.keys(*query):
-                _, res = self.BTreeAPI.weightedUnion(res,
-                                                     values_to_documents.get(v))
+                _, res = self.family.IF.weightedUnion(
+                    res, values_to_documents.get(v))
         elif query_type == 'none':
             assert zc.catalog.interfaces.IExtent.providedBy(query)
-            res = query - self.BTreeAPI.Set(self.ids())
+            res = query - self.family.IF.Set(self.ids())
         else:
             raise ValueError(
                 "unknown query type", query_type)

Added: zc.catalog/trunk/src/zc/catalog/legacy.txt
===================================================================
--- zc.catalog/trunk/src/zc/catalog/legacy.txt	2007-04-27 17:44:43 UTC (rev 74864)
+++ zc.catalog/trunk/src/zc/catalog/legacy.txt	2007-04-27 18:10:18 UTC (rev 74865)
@@ -0,0 +1,148 @@
+Support for legacy data
+-----------------------
+
+Prior to the introduction of btree "families" and the
+``BTrees.Interfaces.IBTreeFamily`` interface, the indexes defined by
+the ``zc.catalog.index`` module used the instance attributes
+``btreemodule`` and ``IOBTree``, initialized in the constructor, and
+the ``BTreeAPI`` property.  These are replaced by the ``family``
+attribute in the current implementation.
+
+This is a white-box test that verifies that the supported values in
+existing data structures (loaded from pickles) can be used effectively
+with the current implementation.
+
+There are two supported sets of values; one for 32-bit btrees::
+
+  >>> import BTrees.IOBTree
+
+  >>> legacy32 = {
+  ...     "btreemodule": "BTrees.IFBTree",
+  ...     "IOBTree": BTrees.IOBTree.IOBTree,
+  ...     }
+
+and another for 64-bit btrees::
+
+  >>> import BTrees.LOBTree
+
+  >>> legacy64 = {
+  ...     "btreemodule": "BTrees.LFBTree",
+  ...     "IOBTree": BTrees.LOBTree.LOBTree,
+  ...     }
+
+In each case, actual legacy structures will also include index
+structures that match the right integer size::
+
+  >>> import BTrees.OOBTree
+  >>> import BTrees.Length
+
+  >>> legacy32["values_to_documents"] = BTrees.OOBTree.OOBTree()
+  >>> legacy32["documents_to_values"] = BTrees.IOBTree.IOBTree()
+  >>> legacy32["documentCount"] = BTrees.Length.Length(0)
+  >>> legacy32["wordCount"] = BTrees.Length.Length(0)
+
+  >>> legacy64["values_to_documents"] = BTrees.OOBTree.OOBTree()
+  >>> legacy64["documents_to_values"] = BTrees.LOBTree.LOBTree()
+  >>> legacy64["documentCount"] = BTrees.Length.Length(0)
+  >>> legacy64["wordCount"] = BTrees.Length.Length(0)
+
+What we want to do is verify that the ``family`` attribute is properly
+computed for instances loaded from legacy data, and ensure that the
+structure is updated cleanly without providing cause for a read-only
+transaction to become a write-transaction.  We'll need to create
+instances that conform to the old data structures, pickle them, and
+show that unpickling them produces instances that use the correct
+families.
+
+Let's create new instances, and force the internal data to match the
+old structures::
+
+  >>> import pickle
+  >>> import zc.catalog.index
+
+  >>> vi32 = zc.catalog.index.ValueIndex()
+  >>> vi32.__dict__ = legacy32.copy()
+  >>> legacy32_pickle = pickle.dumps(vi32)
+
+  >>> vi64 = zc.catalog.index.ValueIndex()
+  >>> vi64.__dict__ = legacy64.copy()
+  >>> legacy64_pickle = pickle.dumps(vi64)
+
+Now, let's unpickle these structures and verify the structures.  We'll
+start with the 32-bit variety::
+
+  >>> vi32 = pickle.loads(legacy32_pickle)
+
+  >>> vi32.__dict__["btreemodule"]
+  'BTrees.IFBTree'
+  >>> vi32.__dict__["IOBTree"]
+  <type 'BTrees.IOBTree.IOBTree'>
+
+  >>> "family" in vi32.__dict__
+  False
+
+  >>> vi32._p_changed
+  False
+
+The ``family`` property returns the ``BTrees.family32`` singleton::
+
+  >>> vi32.family is BTrees.family32
+  True
+
+Once accessed, the legacy values have been cleaned out from the
+instance dictionary::
+
+  >>> "btreemodule" in vi32.__dict__
+  False
+  >>> "IOBTree" in vi32.__dict__
+  False
+
+Accessing these attributes as attributes provides the proper values
+anyway::
+
+  >>> vi32.btreemodule
+  'BTrees.IFBTree'
+  >>> vi32.IOBTree
+  <type 'BTrees.IOBTree.IOBTree'>
+  >>> vi32.BTreeAPI
+  <module 'BTrees.IFBTree' from ...>
+
+Even though the instance dictionary has been cleaned up, the change
+flag hasn't been set.  This is handled this way to avoid turning a
+read-only transaction into a write-transaction::
+
+  >>> vi32._p_changed
+  False
+
+The 64-bit variation provides equivalent behavior::
+
+  >>> vi64 = pickle.loads(legacy64_pickle)
+
+  >>> vi64.__dict__["btreemodule"]
+  'BTrees.LFBTree'
+  >>> vi64.__dict__["IOBTree"]
+  <type 'BTrees.LOBTree.LOBTree'>
+
+  >>> "family" in vi64.__dict__
+  False
+
+  >>> vi64._p_changed
+  False
+
+  >>> vi64.family is BTrees.family64
+  True
+
+  >>> "btreemodule" in vi64.__dict__
+  False
+  >>> "IOBTree" in vi64.__dict__
+  False
+
+  >>> vi64.btreemodule
+  'BTrees.LFBTree'
+  >>> vi64.IOBTree
+  <type 'BTrees.LOBTree.LOBTree'>
+  >>> vi64.BTreeAPI
+  <module 'BTrees.LFBTree' from ...>
+
+  >>> vi64._p_changed
+  False


Property changes on: zc.catalog/trunk/src/zc/catalog/legacy.txt
___________________________________________________________________
Name: svn:mime-type
   + text/plain
Name: svn:eol-style
   + native

Modified: zc.catalog/trunk/src/zc/catalog/tests.py
===================================================================
--- zc.catalog/trunk/src/zc/catalog/tests.py	2007-04-27 17:44:43 UTC (rev 74864)
+++ zc.catalog/trunk/src/zc/catalog/tests.py	2007-04-27 18:10:18 UTC (rev 74865)
@@ -87,6 +87,9 @@
                              tearDown=tearDown),
         doctest.DocFileSuite('callablewrapper.txt', setUp=setUp64bit,
                              tearDown=tearDown),
+
+        # legacy data support
+        doctest.DocFileSuite('legacy.txt', optionflags=doctest.ELLIPSIS),
         ))
     import zc.catalog.stemmer
     if not zc.catalog.stemmer.broken:



More information about the Checkins mailing list