[Checkins] SVN: zodbupgrade/trunk/src/zodbupgrade/ Snapshot: add tests for pickle rewriting from genops stream, use symbolic
Christian Theune
ct at gocept.com
Sun Jun 14 08:43:13 EDT 2009
Log message for revision 100947:
Snapshot: add tests for pickle rewriting from genops stream, use symbolic
names instead of string constants, clean up some refactoring mess.
Changed:
U zodbupgrade/trunk/src/zodbupgrade/analyze.py
U zodbupgrade/trunk/src/zodbupgrade/picklefilter.py
U zodbupgrade/trunk/src/zodbupgrade/tests.py
-=-
Modified: zodbupgrade/trunk/src/zodbupgrade/analyze.py
===================================================================
--- zodbupgrade/trunk/src/zodbupgrade/analyze.py 2009-06-14 12:33:47 UTC (rev 100946)
+++ zodbupgrade/trunk/src/zodbupgrade/analyze.py 2009-06-14 12:43:13 UTC (rev 100947)
@@ -25,97 +25,73 @@
logger = logging.getLogger('zodbupgrade')
-SAFE_OPS = 'IJKML\x8a\x8bSTUN\x88\x89VXFG]ael)t\x85\x86\x87}dsu02(1ghjpqrRbo\x81\x80.PQ'
-KNOWN_HARD = 'ci'
-
-
class MissingClasses(ValueError):
pass
-def find_factory_references(pickle):
- """Analyze a pickle for moved or missing factory references.
+def update_factory_references(op, arg):
+ """Check a pickle operation for moved or missing factory references.
- Returns:
+ Returns an updated (op, arg) tuple using the canonical reference for the
+ factory as would be created if the pickle was unpickled and re-pickled.
- - factories whose dotted name could be imported but stem from an
- indirect import (this is a dictionary)
-
- - factories whose dotted name could not be imported (an iterable)
-
"""
- missing_factories = set()
- rewrites_found = dict()
- for op, arg, pos in pickletools.genops(pickle):
- if op.code in SAFE_OPS:
- continue
- elif op.code in KNOWN_HARD:
- module_name, symbol = arg.split(' ')
- try:
- module = __import__(module_name, globals(), {}, [symbol])
- factory = getattr(module, symbol)
- except (ImportError, AttributeError):
- missing_factories.add('%s.%s' % (module_name, symbol))
- else:
- if not hasattr(factory, '__name__'):
- logger.warn(
- "factory %r does not have __name__, can't check canonical location" % factory)
- continue
- if not hasattr(factory, '__module__'):
- # TODO: This case isn't covered with a test. I just
- # couldn't provoke a factory to not have a __module__ but
- # users reported this issue to me.
- logger.warn(
- "factory %r does not have __module__, can't check canonical location" % factory)
- continue
- if ((factory.__module__, factory.__name__) !=
- (module_name, symbol)):
- # The factory is reachable but it's not the
- # canonical location. Mark object for updating.
- rewrites_found[(module_name, symbol)] = (
- factory.__module__, factory.__name__)
- else:
- raise ValueError('Unknown pickle opcode %r' % op.code)
- return rewrites_found, missing_factories
+ if op.code not in 'ci':
+ return
+ factory_module, factory_name = arg.split(' ')
+ module = __import__(factory_module, globals(), {}, [factory_name])
+ factory = getattr(module, factory_name)
+ # XXX Handle missing factories
-def analyze_storage(storage):
- """Analyzes class references of current records of a storage.
+ if not hasattr(factory, '__name__'):
+ logger.warn(
+ "factory %r does not have __name__: "
+ "can't check canonical location" % factory)
+ return
+ if not hasattr(factory, '__module__'):
+ # TODO: This case isn't covered with a test. I just
+ # couldn't provoke a factory to not have a __module__ but
+ # users reported this issue to me.
+ logger.warn(
+ "factory %r does not have __module__: "
+ "can't check canonical location" % factory)
+ return
- Look for missing or moved classes and return a list of OIDs that need
- updating, a list of classes that are missing, and a list of rewrites.
+ # XXX Log for later reuse
+ new_arg = '%s %s' % (factory.__module__, factory.__name__)
+ return op, new_arg
- """
- logger.info('Analyzing database ...')
- missing_classes = set()
- rewrites_found = dict()
- oids_rewrite = set()
- count = 0
+def each_record(storage):
next = None
while True:
oid, tid, data, next = storage.record_iternext(next)
- count += 1
- pickle_data = StringIO.StringIO(data)
+ yield StringIO.StringIO(data)
+ if next is None:
+ break
+
+def update_storage(storage):
+ """Update
+ and updaAnalyzes class references of current records of a storage.
+
+ Look for missing or moved classes and return a list of OIDs that need
+ updating, a list of classes that are missing, and a list of rewrites.
+
+ """
+ logger.info('Analyzing database ...')
+ for count, data in enumerate(each_record(storage)):
if not count % 5000:
- logger.info(
- ' %i objects - %i moved classes - %i classes missing'
- % (count, len(rewrites_found), len(missing_classes)))
+ logger.info(' %s objects' % count)
# ZODB records consist of two concatenated pickles, so the following
# needs to be done twice:
for i in range(2):
- r, m = find_factory_references(pickle_data)
- if r:
- oids_rewrite.add(oid)
- rewrites_found.update(r)
- missing_classes.update(m)
+ zodbupgrade.picklefilter.filter(
+ update_factory_references, pickle_data)
- if next is None:
- break
logger.info(' Analyzation completed.')
- return missing_classes, rewrites_found, oids_rewrite
def update_storage(storage, ignore_missing=False, dry=False):
Modified: zodbupgrade/trunk/src/zodbupgrade/picklefilter.py
===================================================================
--- zodbupgrade/trunk/src/zodbupgrade/picklefilter.py 2009-06-14 12:33:47 UTC (rev 100946)
+++ zodbupgrade/trunk/src/zodbupgrade/picklefilter.py 2009-06-14 12:43:13 UTC (rev 100947)
@@ -34,44 +34,71 @@
arg_len = lambda arg:packi(len(arg))+arg
unicode_escape = lambda arg:arg.replace('\\', '\\u005c').replace('\n', '\\u000a').encode('raw-unicode-escape')+'\n'
-noargs = '().NQR]abdeostu}l\x81\x85\x86\x87\x88\x89210'
+noargs = [pickle.EMPTY_TUPLE,
+ pickle.MARK,
+ pickle.STOP,
+ pickle.NONE,
+ pickle.BINPERSID,
+ pickle.REDUCE,
+ pickle.EMPTY_LIST,
+ pickle.APPEND,
+ pickle.BUILD,
+ pickle.DICT,
+ pickle.APPENDS,
+ pickle.OBJ,
+ pickle.SETITEM,
+ pickle.TUPLE,
+ pickle.SETITEMS,
+ pickle.EMPTY_DICT,
+ pickle.LIST,
+ pickle.POP,
+ pickle.POP_MARK,
+ pickle.DUP,
+ pickle.NEWOBJ,
+ pickle.TUPLE1,
+ pickle.TUPLE2,
+ pickle.TUPLE3,
+ pickle.NEWTRUE,
+ pickle.NEWFALSE]
+
generators = {
- 'G': lambda arg:struct.pack('>d', arg),
- 'I': lambda arg:reprn(arg) if type(arg) is int else '0%s\n' % int(arg),
- 'J': packi,
- 'K': chr,
- 'L': reprn,
- 'M': lambda arg:"%c%c" % (arg&0xff, arg>>8),
- 'S': reprn,
- 'T': arg_len,
- 'U': lambda arg:chr(len(arg)) + arg,
- 'X': lambda arg:arg_len(arg.encode('utf-8')),
- 'c': fact_ref,
- 'i': fact_ref,
- 'h': chr,
- 'j': packi,
- 'p': reprn,
- 'g': reprn,
- 'q': chr,
- 'r': packi,
- 'P': strn,
- 'V': unicode_escape,
- '\x80': chr,
- '\x82': chr,
- '\x83': lambda arg:"%c%c" % (arg&0xff, arg>>8),
- '\x84': packi,
- '\x8a': lambda arg:chr(len(pickle.encode_long(arg)))+pickle.encode_long(arg),
- '\x8b': lambda arg:arg_len(pickle.encode_long(arg)),
+ pickle.BINFLOAT: lambda arg:struct.pack('>d', arg),
+ pickle.FLOAT: reprn,
+ pickle.INT: lambda arg:reprn(arg) if type(arg) is int else '0%s\n' % int(arg),
+ pickle.BININT: packi,
+ pickle.BININT1: chr,
+ pickle.LONG: reprn,
+ pickle.BININT2: lambda arg:"%c%c" % (arg&0xff, arg>>8),
+ pickle.STRING: reprn,
+ pickle.BINSTRING: arg_len,
+ pickle.SHORT_BINSTRING: lambda arg:chr(len(arg)) + arg,
+ pickle.BINUNICODE: lambda arg:arg_len(arg.encode('utf-8')),
+ pickle.GLOBAL: fact_ref,
+ pickle.INST: fact_ref,
+ pickle.BINGET: chr,
+ pickle.LONG_BINGET: packi,
+ pickle.PUT: reprn,
+ pickle.GET: reprn,
+ pickle.BINPUT: chr,
+ pickle.LONG_BINPUT: packi,
+ pickle.PERSID: strn,
+ pickle.UNICODE: unicode_escape,
+ pickle.PROTO: chr,
+ pickle.EXT1: chr,
+ pickle.EXT2: lambda arg:"%c%c" % (arg&0xff, arg>>8),
+ pickle.EXT4: packi,
+ pickle.LONG1: lambda arg:chr(len(pickle.encode_long(arg)))+pickle.encode_long(arg),
+ pickle.LONG4: lambda arg:arg_len(pickle.encode_long(arg)),
}
-def to_pickle_chunk(op, arg):
+def to_pickle_chunk(opcode, arg):
"""Transform an operation and its argument into pickle format."""
- chunk = op.code
- if op.code in noargs:
+ chunk = opcode
+ if opcode in noargs:
pass
- elif op.code in generators:
- generated = generators[op.code](arg)
+ elif opcode in generators:
+ generated = generators[opcode](arg)
chunk += generated
else:
raise ValueError('Unknown opcode: %s')
Modified: zodbupgrade/trunk/src/zodbupgrade/tests.py
===================================================================
--- zodbupgrade/trunk/src/zodbupgrade/tests.py 2009-06-14 12:33:47 UTC (rev 100946)
+++ zodbupgrade/trunk/src/zodbupgrade/tests.py 2009-06-14 12:43:13 UTC (rev 100947)
@@ -14,14 +14,16 @@
import ZODB
import ZODB.FileStorage
+import logging
import os
+import pickle
import sys
import tempfile
+import transaction
import types
import unittest
-import transaction
import zodbupgrade.analyze
-import logging
+import zodbupgrade.picklefilter
class IgnoringFilter(object):
@@ -130,5 +132,190 @@
self.assertEquals('AnonymousFactory', self.root['test'].__class__.__name__)
+class PickleFilterTests(unittest.TestCase):
+ # Tests the pickle filter for re-pickling op-codes
+
+ def assertNonArgCode(self, code):
+ self.assertArgCode(code, code, None)
+
+ def assertArgCode(self, result, code, arg):
+ self.assertEquals(
+ result,
+ zodbupgrade.picklefilter.to_pickle_chunk(code, arg))
+
+ def test_sanity_check(self):
+ # Check binary compatibility on simple "real" pickle
+ pass
+
+ def test_MARK(self):
+ self.assertNonArgCode(pickle.MARK)
+
+ def test_STOP(self):
+ self.assertNonArgCode(pickle.STOP)
+
+ def test_POP(self):
+ self.assertNonArgCode(pickle.POP)
+
+ def test_POP_MARK(self):
+ self.assertNonArgCode(pickle.POP_MARK)
+
+ def test_DUP(self):
+ self.assertNonArgCode(pickle.DUP)
+
+ def test_FLOAT(self):
+ self.assertArgCode('F12.300000000000001\n', pickle.FLOAT, 12.3)
+
+ def test_INT(self):
+ self.assertArgCode('I01237940039285380274899124224\n', pickle.INT, 2**90)
+
+ def test_BININT(self):
+ self.assertArgCode('J\x00\x00\x01\x00', pickle.BININT, 0xffff+1)
+
+ def test_BININT1(self):
+ self.assertArgCode('K\xf0', pickle.BININT1, 0xf0)
+
+ def test_LONG(self):
+ self.assertArgCode("L1546\n", pickle.LONG, 1546)
+
+ def test_BININT2(self):
+ self.assertArgCode('M\xf0\xff', pickle.BININT2, 0xfff0)
+
+ def test_NONE(self):
+ self.assertNonArgCode(pickle.NONE)
+
+ def test_PERSID(self):
+ self.assertArgCode('P12345\n', pickle.PERSID, '12345')
+
+ def test_BINPERSID(self):
+ self.assertNonArgCode(pickle.BINPERSID)
+
+ def test_REDUCE(self):
+ self.assertNonArgCode(pickle.REDUCE)
+
+ def test_STRING(self):
+ self.assertArgCode("S'asdf'\n", pickle.STRING, 'asdf')
+
+ def test_BINSTRING(self):
+ self.assertArgCode('T\x06\x00\x00\x00foobar', pickle.BINSTRING, 'foobar')
+
+ def test_SHORT_BINSTRING(self):
+ self.assertArgCode('U\x04asdf', pickle.SHORT_BINSTRING, 'asdf')
+
+ def test_UNICODE(self):
+ self.assertArgCode('V\xfcnders\n', pickle.UNICODE, u'\xfcnders')
+
+ def test_BINUNICODE(self):
+ self.assertArgCode('X\x06\x00\x00\x00\xc3\xbc1234', pickle.BINUNICODE, u'\xfc1234')
+
+ def test_APPEND(self):
+ self.assertNonArgCode(pickle.APPEND)
+
+ def test_BUILD(self):
+ self.assertNonArgCode(pickle.BUILD)
+
+ def test_GLOBAL(self):
+ self.assertArgCode('cbar\nfoo\n', pickle.GLOBAL, 'bar foo')
+
+ def test_DICT(self):
+ self.assertNonArgCode(pickle.DICT)
+
+ def test_EMPTY_DICT(self):
+ self.assertNonArgCode(pickle.EMPTY_DICT)
+
+ def test_APPENDS(self):
+ self.assertNonArgCode(pickle.APPENDS)
+
+ def test_GET(self):
+ self.assertArgCode('g12\n', pickle.GET, 12)
+
+ def test_BINGET(self):
+ self.assertArgCode('h\x80', pickle.BINGET, 128)
+
+ def test_INST(self):
+ self.assertArgCode('ifoo\nbar\n', pickle.INST, 'foo bar')
+
+ def test_LONG_BINGET(self):
+ self.assertArgCode('j\x00\x04\x00\x00', pickle.LONG_BINGET, 1024)
+
+ def test_LIST(self):
+ self.assertNonArgCode(pickle.LIST)
+
+ def test_EMPTY_LIST(self):
+ self.assertNonArgCode(pickle.EMPTY_LIST)
+
+ def test_OBJ(self):
+ self.assertNonArgCode(pickle.OBJ)
+
+ def test_PUT(self):
+ self.assertArgCode("p12\n", pickle.PUT, 12)
+
+ def test_BINPUT(self):
+ self.assertArgCode('q\x80', pickle.BINPUT, 128)
+
+ def test_LONG_BINPUT(self):
+ self.assertArgCode('r\x00\x04\x00\x00', pickle.LONG_BINPUT, 1024)
+
+ def test_SETITEM(self):
+ self.assertNonArgCode(pickle.SETITEM)
+
+ def test_TUPLE(self):
+ self.assertNonArgCode(pickle.TUPLE)
+
+ def test_EMPTY_TUPLE(self):
+ self.assertNonArgCode(pickle.EMPTY_TUPLE)
+
+ def test_SETITEMS(self):
+ self.assertNonArgCode(pickle.SETITEMS)
+
+ def test_BINFLOAT(self):
+ self.assertArgCode('G@(\x00\x00\x00\x00\x00\x00',
+ pickle.BINFLOAT, 12.0)
+
+ def test_TRUE(self):
+ self.assertArgCode(pickle.TRUE, pickle.INT, True)
+
+ def test_FALSE(self):
+ self.assertArgCode(pickle.FALSE, pickle.INT, False)
+
+ def test_PROTO(self):
+ self.assertArgCode('\x80\x01', pickle.PROTO, 1)
+
+ def test_NEWOBJ(self):
+ self.assertNonArgCode(pickle.NEWOBJ)
+
+ def test_EXT1(self):
+ self.assertArgCode('\x82\xf0', pickle.EXT1, 0xf0)
+
+ def test_EXT2(self):
+ self.assertArgCode('\x83\x00\x01', pickle.EXT2, 0xff+1)
+
+ def test_EXT4(self):
+ self.assertArgCode('\x84\x00\x00\x01\x00', pickle.EXT4, 0xffff+1)
+
+ def test_TUPLE1(self):
+ self.assertNonArgCode(pickle.TUPLE1)
+
+ def test_TUPLE2(self):
+ self.assertNonArgCode(pickle.TUPLE2)
+
+ def test_TUPLE3(self):
+ self.assertNonArgCode(pickle.TUPLE3)
+
+ def test_NEWTRUE(self):
+ self.assertNonArgCode(pickle.NEWTRUE)
+
+ def test_NEWFALSE(self):
+ self.assertNonArgCode(pickle.NEWFALSE)
+
+ def test_LONG1(self):
+ self.assertArgCode('\x8a\x02\x80\x00', pickle.LONG1, 128)
+
+ def test_LONG4(self):
+ self.assertArgCode('\x8b\x02\x00\x00\x00\x00\x04', pickle.LONG4, 2**10)
+
+
def test_suite():
- return unittest.makeSuite(ZODBUpgradeTests)
+ suite = unittest.TestSuite()
+ suite.addTest(unittest.makeSuite(ZODBUpgradeTests))
+ suite.addTest(unittest.makeSuite(PickleFilterTests))
+ return suite
More information about the Checkins
mailing list