[Zope-CVS] CVS: Products/Ape/lib/apelib/zodb3 - serializers.py:1.6

Shane Hathaway shane at zope.com
Thu Feb 19 01:44:36 EST 2004


Update of /cvs-repository/Products/Ape/lib/apelib/zodb3
In directory cvs.zope.org:/tmp/cvs-serv14876/lib/apelib/zodb3

Modified Files:
	serializers.py 
Log Message:
Two changes that make storing the Zope root object reasonable:

  - Pickled remainders are now encoded in base64 and stored in the
    properties file.  Ape no longer creates .remainder files, although
    it will read them.  This reduces the number of files Ape creates
    and sets a precedent that any annotation can contain a pickle
    as long as it's encoded as text.

  - There is now one PersistentMappingSerializer.  It stores both
    references and simple objects.  To accomplish this, it has to
    store reference values differently from non-reference values.
    The non-references get pickled and stored in an annotation called 
    "other". 

In testing, Ape successfully acted as the main Zope 
database.  Woohoo!  (A little Zope hack was required, but it 
should be possible to fix that.)



=== Products/Ape/lib/apelib/zodb3/serializers.py 1.5 => 1.6 ===
--- Products/Ape/lib/apelib/zodb3/serializers.py:1.5	Tue Feb 17 00:25:13 2004
+++ Products/Ape/lib/apelib/zodb3/serializers.py	Thu Feb 19 01:44:05 2004
@@ -20,6 +20,7 @@
 from cStringIO import StringIO
 from cPickle import Pickler, Unpickler, UnpickleableError, loads, dumps
 import time
+import base64
 from types import DictType
 
 from Persistence import Persistent, PersistentMapping
@@ -32,75 +33,100 @@
 from apelib.core.schemas import RowSequenceSchema, FieldSchema
 
 
-class StringToPersistentPM:
-    """String-to-Persistent PersistentMapping (de)serializer
-
-    Requires that the PM maps string keys to first-class persistent
-    objects.
+def isPersistent(obj):
+    try:
+        return isinstance(obj, Persistent)
+    except TypeError:
+        # XXX Python 2.1 thinks Persistent is not a class
+        return 0
+
+def encodeToText(s, keys, unmanaged_count=0):
+    """Encodes a binary pickle using base 64.
+
+    Note that Python's text pickle format encodes unicode using full
+    8-bit bytes (Python versions 2.1 through 2.3 all do this), meaning
+    that so-called text pickles are not 7-bit clean.  On the other
+    hand, the text pickle format is fairly easy to read, making
+    debugging easier.  This encoding is a compromise that generates
+    pure 7-bit text but also provides an overview of what's in the
+    pickle.
     """
-    __implements__ = ISerializer
+    comments = ['# pickle-base-64']
+    if keys:
+        comments[0] = comments[0] + ' contents:'
+        for key in keys:
+            r = repr(key).replace('\n', ' ')
+            comments.append('# %s' % r)
+    if unmanaged_count:
+        comments.append('# unmanaged persistent objects: %d' % unmanaged_count)
+    text = base64.encodestring(s)
+    return '%s\n\n%s' % ('\n'.join(comments), text)
 
-    schema = RowSequenceSchema()
-    schema.addField('key', 'string', 1)
-    schema.addField('oid', 'string')
-    schema.addField('classification', 'classification')
-
-    def canSerialize(self, obj):
-        return isinstance(obj, PersistentMapping)
-
-    def serialize(self, event):
-        assert self.canSerialize(event.obj)
-        res = []
-        for key, value in event.obj.items():
-            oid = event.obj_db.identify(value)
-            if oid is None:
-                oid = event.conf.oid_gen.new_oid(event, key, True)
-            event.referenced(key, value, False, oid)
-            # No need to pass classification.
-            res.append((key, oid, None))
-        event.ignore(('data', '_container'))
-        return res
+def decodeFromText(s):
+    """Decodes using base 64, ignoring leading comments.
+    """
+    # Ignore everything before the blank line.
+    s = s.split('\n\n', 1)[-1]
+    return base64.decodestring(s)
 
-    def deserialize(self, event, state):
-        assert self.canSerialize(event.obj)
-        data = {}
-        for (key, oid, classification) in state:
-            value = event.resolve(key, oid, classification)
-            data[key] = value
-        event.obj.__init__(data)
 
 
-class StringToPicklePM:
-    """String-to-Pickle PersistentMapping (de)serializer
+class PersistentMappingSerializer:
+    """(de)serializer of a persistent mapping that uses string keys.
 
-    Requires that the PM maps string keys to second-class persistent
-    objects.
+    Serializes both references and second-class persistent objects.
+    Because of this flexibility, the schema is a little complex.
     """
     __implements__ = ISerializer
 
-    schema = RowSequenceSchema()
-    schema.addField('key', 'string', 1)
-    schema.addField('value', 'string')
+    # This schema includes both a list of items that are references to
+    # persistent objects and a pickle containing items that are not
+    # references.
+    schema1 = RowSequenceSchema()
+    schema1.addField('key', 'string', 1)
+    schema1.addField('oid', 'string')
+    schema1.addField('classification', 'classification')
+    schema2 = FieldSchema('data', 'string')
+    schema = {'references': schema1, 'others': schema2}
 
     def canSerialize(self, obj):
         return isinstance(obj, PersistentMapping)
 
     def serialize(self, event):
         assert self.canSerialize(event.obj)
-        res = []
+        refs = []
+        others = {}
         for key, value in event.obj.items():
-            res.append((key, dumps(value)))
-            event.serialized(key, value, False)
+            if isPersistent(value):
+                oid = event.obj_db.identify(value)
+                if oid is None:
+                    oid = event.conf.oid_gen.new_oid(event, key, True)
+                event.referenced(key, value, False, oid)
+                # No need to pass classification.
+                refs.append((key, oid, None))
+            else:
+                event.serialized(key, value, False)
+                others[key] = value
         event.ignore(('data', '_container'))
-        return res
+        if others:
+            s = encodeToText(dumps(others, 1), others.keys())
+        else:
+            s = ''
+        return {'references': refs, 'others': s}
 
     def deserialize(self, event, state):
         assert self.canSerialize(event.obj)
         data = {}
-        for (key, p) in state:
-            value = loads(p)
+        s = state['others']
+        if s:
+            s = decodeFromText(s)
+            if s:
+                data = loads(s)
+                for key, value in data.items():
+                    event.deserialized(key, value)
+        for (key, oid, classification) in state['references']:
+            value = event.resolve(key, oid, classification)
             data[key] = value
-            event.deserialized(key, value)
         event.obj.__init__(data)
 
 
@@ -143,12 +169,7 @@
     schema = FieldSchema('data', 'string')
 
     def canSerialize(self, obj):
-        try:
-            return isinstance(obj, Persistent)
-        except TypeError:
-            # XXX Python 2.1 thinks Persistent is not a class
-            return 0
-
+        return isPersistent(obj)
 
     def serialize(self, event):
         assert IFullSerializationEvent.isImplementedBy(event)
@@ -170,7 +191,7 @@
             return ''
 
         outfile = StringIO()
-        p = Pickler(outfile)
+        p = Pickler(outfile, 1)  # Binary pickle
         unmanaged = []
 
         def persistent_id(ob, identifyInternal=event.identifyInternal,
@@ -221,9 +242,10 @@
 
         p.persistent_id = lambda ob: None  # Stop recording references
         p.dump(unmanaged)
-        s = outfile.getvalue()
         event.upos.extend(unmanaged)
-        return s
+
+        s = outfile.getvalue()
+        return encodeToText(s, state.keys(), len(unmanaged))
 
 
     def deserialize(self, event, state):
@@ -233,8 +255,15 @@
         # Set up to resolve cyclic references to the object.
         event.deserialized('self', event.obj)
 
+        state = state.strip()
         if state:
-            infile = StringIO(state)
+            if state.startswith('#'):
+                # Text-encoded pickles start with a pound sign.
+                # (A pound sign is not a valid pickle opcode.)
+                data = decodeFromText(state)
+            else:
+                data = state
+            infile = StringIO(data)
             u = Unpickler(infile)
             u.persistent_load = event.resolveInternal
             s = u.load()
@@ -260,11 +289,7 @@
     schema = FieldSchema('mtime', 'int')
 
     def canSerialize(self, obj):
-        try:
-            return isinstance(obj, Persistent)
-        except TypeError:
-            # XXX Python 2.1 thinks Persistent is not a class
-            return 0
+        return isPersistent(obj)
 
     def setTime(self, obj, t):
         """Sets the last modification time of a Persistent obj to float t.




More information about the Zope-CVS mailing list