[Checkins] SVN: zodbupgrade/trunk/src/zodbupgrade/ Snapshot: add tests for pickle rewriting from genops stream, use symbolic

Christian Theune ct at gocept.com
Sun Jun 14 08:43:13 EDT 2009


Log message for revision 100947:
  Snapshot: add tests for pickle rewriting from genops stream, use symbolic
  names instead of string constants, clean up some refactoring mess.
  

Changed:
  U   zodbupgrade/trunk/src/zodbupgrade/analyze.py
  U   zodbupgrade/trunk/src/zodbupgrade/picklefilter.py
  U   zodbupgrade/trunk/src/zodbupgrade/tests.py

-=-
Modified: zodbupgrade/trunk/src/zodbupgrade/analyze.py
===================================================================
--- zodbupgrade/trunk/src/zodbupgrade/analyze.py	2009-06-14 12:33:47 UTC (rev 100946)
+++ zodbupgrade/trunk/src/zodbupgrade/analyze.py	2009-06-14 12:43:13 UTC (rev 100947)
@@ -25,97 +25,73 @@
 logger = logging.getLogger('zodbupgrade')
 
 
-SAFE_OPS = 'IJKML\x8a\x8bSTUN\x88\x89VXFG]ael)t\x85\x86\x87}dsu02(1ghjpqrRbo\x81\x80.PQ'
-KNOWN_HARD = 'ci'
-
-
 class MissingClasses(ValueError):
     pass
 
 
-def find_factory_references(pickle):
-    """Analyze a pickle for moved or missing factory references.
+def update_factory_references(op, arg):
+    """Check a pickle operation for moved or missing factory references.
 
-    Returns: 
+    Returns an updated (op, arg) tuple using the canonical reference for the
+    factory as would be created if the pickle was unpickled and re-pickled.
 
-        - factories whose dotted name could be imported but stem from an
-          indirect import (this is a dictionary)
-
-        - factories whose dotted name could not be imported (an iterable)
-
     """
-    missing_factories = set()
-    rewrites_found = dict()
-    for op, arg, pos in pickletools.genops(pickle):
-        if op.code in SAFE_OPS:
-            continue
-        elif op.code in KNOWN_HARD:
-            module_name, symbol = arg.split(' ')
-            try:
-                module = __import__(module_name, globals(), {}, [symbol])
-                factory = getattr(module, symbol)
-            except (ImportError, AttributeError):
-                missing_factories.add('%s.%s' % (module_name, symbol))
-            else:
-                if not hasattr(factory, '__name__'):
-                    logger.warn(
-                        "factory %r does not have __name__, can't check canonical location" % factory)
-                    continue
-                if not hasattr(factory, '__module__'):
-                    # TODO: This case isn't covered with a test. I just
-                    # couldn't provoke a factory to not have a __module__ but
-                    # users reported this issue to me.
-                    logger.warn(
-                        "factory %r does not have __module__, can't check canonical location" % factory)
-                    continue
-                if ((factory.__module__, factory.__name__) !=
-                    (module_name, symbol)):
-                    # The factory is reachable but it's not the
-                    # canonical location. Mark object for updating.
-                    rewrites_found[(module_name, symbol)] = (
-                        factory.__module__, factory.__name__)
-        else:
-            raise ValueError('Unknown pickle opcode %r' % op.code)
-    return rewrites_found, missing_factories
+    if op.code not in 'ci':
+        return
 
+    factory_module, factory_name = arg.split(' ')
+    module = __import__(factory_module, globals(), {}, [factory_name])
+    factory = getattr(module, factory_name)
+    # XXX Handle missing factories
 
-def analyze_storage(storage):
-    """Analyzes class references of current records of a storage.
+    if not hasattr(factory, '__name__'):
+        logger.warn(
+            "factory %r does not have __name__: "
+            "can't check canonical location" % factory)
+        return
+    if not hasattr(factory, '__module__'):
+        # TODO: This case isn't covered with a test. I just
+        # couldn't provoke a factory to not have a __module__ but
+        # users reported this issue to me.
+        logger.warn(
+            "factory %r does not have __module__: "
+            "can't check canonical location" % factory)
+        return
 
-    Look for missing or moved classes and return a list of OIDs that need
-    updating, a list of classes that are missing, and a list of rewrites.
+    # XXX Log for later reuse
+    new_arg = '%s %s' % (factory.__module__, factory.__name__)
+    return op, new_arg
 
-    """
-    logger.info('Analyzing database ...')
-    missing_classes = set()
-    rewrites_found = dict()
-    oids_rewrite = set()
 
-    count = 0
+def each_record(storage):
     next = None
     while True:
         oid, tid, data, next = storage.record_iternext(next)
-        count += 1
-        pickle_data = StringIO.StringIO(data)
+        yield StringIO.StringIO(data)
+        if next is None:
+            break
 
+
+def update_storage(storage):
+    """Update 
+    and updaAnalyzes class references of current records of a storage.
+
+    Look for missing or moved classes and return a list of OIDs that need
+    updating, a list of classes that are missing, and a list of rewrites.
+
+    """
+    logger.info('Analyzing database ...')
+    for count, data in enumerate(each_record(storage)):
         if not count % 5000:
-            logger.info(
-                '    %i objects - %i moved classes - %i classes missing'
-                % (count, len(rewrites_found), len(missing_classes)))
+            logger.info('    %s objects' % count)
 
         # ZODB records consist of two concatenated pickles, so the following
         # needs to be done twice:
         for i in range(2):
-            r, m = find_factory_references(pickle_data)
-            if r:
-                oids_rewrite.add(oid)
-            rewrites_found.update(r)
-            missing_classes.update(m)
+            zodbupgrade.picklefilter.filter(
+                update_factory_references, pickle_data)
 
-        if next is None:
-            break
     logger.info('    Analyzation completed.')
-    return missing_classes, rewrites_found, oids_rewrite
 
 
 def update_storage(storage, ignore_missing=False, dry=False):

Modified: zodbupgrade/trunk/src/zodbupgrade/picklefilter.py
===================================================================
--- zodbupgrade/trunk/src/zodbupgrade/picklefilter.py	2009-06-14 12:33:47 UTC (rev 100946)
+++ zodbupgrade/trunk/src/zodbupgrade/picklefilter.py	2009-06-14 12:43:13 UTC (rev 100947)
@@ -34,44 +34,71 @@
 arg_len = lambda arg:packi(len(arg))+arg
 unicode_escape = lambda arg:arg.replace('\\', '\\u005c').replace('\n', '\\u000a').encode('raw-unicode-escape')+'\n'
 
-noargs = '().NQR]abdeostu}l\x81\x85\x86\x87\x88\x89210'
+noargs = [pickle.EMPTY_TUPLE,
+          pickle.MARK,
+          pickle.STOP,
+          pickle.NONE,
+          pickle.BINPERSID,
+          pickle.REDUCE,
+          pickle.EMPTY_LIST,
+          pickle.APPEND,
+          pickle.BUILD,
+          pickle.DICT,
+          pickle.APPENDS,
+          pickle.OBJ,
+          pickle.SETITEM,
+          pickle.TUPLE,
+          pickle.SETITEMS,
+          pickle.EMPTY_DICT,
+          pickle.LIST,
+          pickle.POP,
+          pickle.POP_MARK,
+          pickle.DUP,
+          pickle.NEWOBJ,
+          pickle.TUPLE1,
+          pickle.TUPLE2,
+          pickle.TUPLE3,
+          pickle.NEWTRUE,
+          pickle.NEWFALSE]
+
 generators = {
-    'G': lambda arg:struct.pack('>d', arg),
-    'I': lambda arg:reprn(arg) if type(arg) is int else '0%s\n' % int(arg),
-    'J': packi,
-    'K': chr,
-    'L': reprn,
-    'M': lambda arg:"%c%c" % (arg&0xff, arg>>8),
-    'S': reprn,
-    'T': arg_len,
-    'U': lambda arg:chr(len(arg)) + arg,
-    'X': lambda arg:arg_len(arg.encode('utf-8')),
-    'c': fact_ref,
-    'i': fact_ref,
-    'h': chr,
-    'j': packi,
-    'p': reprn,
-    'g': reprn,
-    'q': chr,
-    'r': packi,
-    'P': strn,
-    'V': unicode_escape,
-    '\x80': chr,
-    '\x82': chr,
-    '\x83': lambda arg:"%c%c" % (arg&0xff, arg>>8),
-    '\x84': packi,
-    '\x8a': lambda arg:chr(len(pickle.encode_long(arg)))+pickle.encode_long(arg),
-    '\x8b': lambda arg:arg_len(pickle.encode_long(arg)),
+    pickle.BINFLOAT: lambda arg:struct.pack('>d', arg),
+    pickle.FLOAT: reprn,
+    pickle.INT: lambda arg:reprn(arg) if type(arg) is int else '0%s\n' % int(arg),
+    pickle.BININT: packi,
+    pickle.BININT1: chr,
+    pickle.LONG: reprn,
+    pickle.BININT2: lambda arg:"%c%c" % (arg&0xff, arg>>8),
+    pickle.STRING: reprn,
+    pickle.BINSTRING: arg_len,
+    pickle.SHORT_BINSTRING: lambda arg:chr(len(arg)) + arg,
+    pickle.BINUNICODE: lambda arg:arg_len(arg.encode('utf-8')),
+    pickle.GLOBAL: fact_ref,
+    pickle.INST: fact_ref,
+    pickle.BINGET: chr,
+    pickle.LONG_BINGET: packi,
+    pickle.PUT: reprn,
+    pickle.GET: reprn,
+    pickle.BINPUT: chr,
+    pickle.LONG_BINPUT: packi,
+    pickle.PERSID: strn,
+    pickle.UNICODE: unicode_escape,
+    pickle.PROTO: chr,
+    pickle.EXT1: chr,
+    pickle.EXT2: lambda arg:"%c%c" % (arg&0xff, arg>>8),
+    pickle.EXT4: packi,
+    pickle.LONG1: lambda arg:chr(len(pickle.encode_long(arg)))+pickle.encode_long(arg),
+    pickle.LONG4: lambda arg:arg_len(pickle.encode_long(arg)),
 }
 
 
-def to_pickle_chunk(op, arg):
+def to_pickle_chunk(opcode, arg):
     """Transform an operation and its argument into pickle format."""
-    chunk = op.code
-    if op.code in noargs:
+    chunk = opcode
+    if opcode in noargs:
         pass
-    elif op.code in generators:
-        generated = generators[op.code](arg)
+    elif opcode in generators:
+        generated = generators[opcode](arg)
         chunk += generated
     else:
         raise ValueError('Unknown opcode: %s')

Modified: zodbupgrade/trunk/src/zodbupgrade/tests.py
===================================================================
--- zodbupgrade/trunk/src/zodbupgrade/tests.py	2009-06-14 12:33:47 UTC (rev 100946)
+++ zodbupgrade/trunk/src/zodbupgrade/tests.py	2009-06-14 12:43:13 UTC (rev 100947)
@@ -14,14 +14,16 @@
 
 import ZODB
 import ZODB.FileStorage
+import logging 
 import os
+import pickle
 import sys
 import tempfile
+import transaction
 import types
 import unittest
-import transaction
 import zodbupgrade.analyze
-import logging 
+import zodbupgrade.picklefilter
 
 
 class IgnoringFilter(object):
@@ -130,5 +132,190 @@
         self.assertEquals('AnonymousFactory', self.root['test'].__class__.__name__)
 
 
+class PickleFilterTests(unittest.TestCase):
+    # Tests the pickle filter for re-pickling op-codes
+
+    def assertNonArgCode(self, code):
+        self.assertArgCode(code, code, None)
+
+    def assertArgCode(self, result, code, arg):
+        self.assertEquals(
+            result,
+            zodbupgrade.picklefilter.to_pickle_chunk(code, arg))
+
+    def test_sanity_check(self):
+        # Check binary compatibility on simple "real" pickle
+        pass
+
+    def test_MARK(self):
+        self.assertNonArgCode(pickle.MARK)
+
+    def test_STOP(self):
+        self.assertNonArgCode(pickle.STOP)
+
+    def test_POP(self):
+        self.assertNonArgCode(pickle.POP)
+
+    def test_POP_MARK(self):
+        self.assertNonArgCode(pickle.POP_MARK)
+
+    def test_DUP(self):
+        self.assertNonArgCode(pickle.DUP)
+
+    def test_FLOAT(self):
+        self.assertArgCode('F12.300000000000001\n', pickle.FLOAT, 12.3)
+
+    def test_INT(self):
+        self.assertArgCode('I01237940039285380274899124224\n', pickle.INT, 2**90)
+
+    def test_BININT(self):
+        self.assertArgCode('J\x00\x00\x01\x00', pickle.BININT, 0xffff+1)
+
+    def test_BININT1(self):
+        self.assertArgCode('K\xf0', pickle.BININT1, 0xf0)
+
+    def test_LONG(self):
+        self.assertArgCode("L1546\n", pickle.LONG, 1546)
+
+    def test_BININT2(self):
+        self.assertArgCode('M\xf0\xff', pickle.BININT2, 0xfff0)
+
+    def test_NONE(self):
+        self.assertNonArgCode(pickle.NONE)
+
+    def test_PERSID(self):
+        self.assertArgCode('P12345\n', pickle.PERSID, '12345')
+
+    def test_BINPERSID(self):
+        self.assertNonArgCode(pickle.BINPERSID)
+
+    def test_REDUCE(self):
+        self.assertNonArgCode(pickle.REDUCE)
+
+    def test_STRING(self):
+        self.assertArgCode("S'asdf'\n", pickle.STRING, 'asdf')
+
+    def test_BINSTRING(self):
+        self.assertArgCode('T\x06\x00\x00\x00foobar', pickle.BINSTRING, 'foobar')
+
+    def test_SHORT_BINSTRING(self):
+        self.assertArgCode('U\x04asdf', pickle.SHORT_BINSTRING, 'asdf')
+
+    def test_UNICODE(self):
+        self.assertArgCode('V\xfcnders\n', pickle.UNICODE, u'\xfcnders')
+
+    def test_BINUNICODE(self):
+        self.assertArgCode('X\x06\x00\x00\x00\xc3\xbc1234', pickle.BINUNICODE, u'\xfc1234')
+
+    def test_APPEND(self):
+        self.assertNonArgCode(pickle.APPEND)
+
+    def test_BUILD(self):
+        self.assertNonArgCode(pickle.BUILD)
+
+    def test_GLOBAL(self):
+        self.assertArgCode('cbar\nfoo\n', pickle.GLOBAL, 'bar foo')
+
+    def test_DICT(self):
+        self.assertNonArgCode(pickle.DICT)
+
+    def test_EMPTY_DICT(self):
+        self.assertNonArgCode(pickle.EMPTY_DICT)
+
+    def test_APPENDS(self):
+        self.assertNonArgCode(pickle.APPENDS)
+
+    def test_GET(self):
+        self.assertArgCode('g12\n', pickle.GET, 12)
+
+    def test_BINGET(self):
+        self.assertArgCode('h\x80', pickle.BINGET, 128)
+
+    def test_INST(self):
+        self.assertArgCode('ifoo\nbar\n', pickle.INST, 'foo bar')
+
+    def test_LONG_BINGET(self):
+        self.assertArgCode('j\x00\x04\x00\x00', pickle.LONG_BINGET, 1024)
+
+    def test_LIST(self):
+        self.assertNonArgCode(pickle.LIST)
+
+    def test_EMPTY_LIST(self):
+        self.assertNonArgCode(pickle.EMPTY_LIST)
+
+    def test_OBJ(self):
+        self.assertNonArgCode(pickle.OBJ)
+
+    def test_PUT(self):
+        self.assertArgCode("p12\n", pickle.PUT, 12)
+
+    def test_BINPUT(self):
+        self.assertArgCode('q\x80', pickle.BINPUT, 128)
+
+    def test_LONG_BINPUT(self):
+        self.assertArgCode('r\x00\x04\x00\x00', pickle.LONG_BINPUT, 1024)
+
+    def test_SETITEM(self):
+        self.assertNonArgCode(pickle.SETITEM)
+
+    def test_TUPLE(self):
+        self.assertNonArgCode(pickle.TUPLE)
+
+    def test_EMPTY_TUPLE(self):
+        self.assertNonArgCode(pickle.EMPTY_TUPLE)
+
+    def test_SETITEMS(self):
+        self.assertNonArgCode(pickle.SETITEMS)
+
+    def test_BINFLOAT(self):
+        self.assertArgCode('G@(\x00\x00\x00\x00\x00\x00',
+                           pickle.BINFLOAT, 12.0)
+
+    def test_TRUE(self):
+        self.assertArgCode(pickle.TRUE, pickle.INT, True)
+
+    def test_FALSE(self):
+        self.assertArgCode(pickle.FALSE, pickle.INT, False)
+
+    def test_PROTO(self):
+        self.assertArgCode('\x80\x01', pickle.PROTO, 1)
+
+    def test_NEWOBJ(self):
+        self.assertNonArgCode(pickle.NEWOBJ)
+
+    def test_EXT1(self):
+        self.assertArgCode('\x82\xf0', pickle.EXT1, 0xf0)
+
+    def test_EXT2(self):
+        self.assertArgCode('\x83\x00\x01', pickle.EXT2, 0xff+1)
+
+    def test_EXT4(self):
+        self.assertArgCode('\x84\x00\x00\x01\x00', pickle.EXT4, 0xffff+1)
+
+    def test_TUPLE1(self):
+        self.assertNonArgCode(pickle.TUPLE1)
+
+    def test_TUPLE2(self):
+        self.assertNonArgCode(pickle.TUPLE2)
+
+    def test_TUPLE3(self):
+        self.assertNonArgCode(pickle.TUPLE3)
+
+    def test_NEWTRUE(self):
+        self.assertNonArgCode(pickle.NEWTRUE)
+
+    def test_NEWFALSE(self):
+        self.assertNonArgCode(pickle.NEWFALSE)
+
+    def test_LONG1(self):
+        self.assertArgCode('\x8a\x02\x80\x00', pickle.LONG1, 128)
+
+    def test_LONG4(self):
+        self.assertArgCode('\x8b\x02\x00\x00\x00\x00\x04', pickle.LONG4, 2**10)
+
+
 def test_suite():
-    return unittest.makeSuite(ZODBUpgradeTests)
+    suite = unittest.TestSuite()
+    suite.addTest(unittest.makeSuite(ZODBUpgradeTests))
+    suite.addTest(unittest.makeSuite(PickleFilterTests))
+    return suite



More information about the Checkins mailing list