[Checkins] SVN: zodbupdate/trunk/ Merge sylvain-persistent-load

Sylvain Viollon sylvain at infrae.com
Tue Feb 2 09:37:14 EST 2010


Log message for revision 108710:
  Merge sylvain-persistent-load

Changed:
  U   zodbupdate/trunk/CHANGES.txt
  U   zodbupdate/trunk/README.txt
  U   zodbupdate/trunk/src/zodbupdate/main.py
  D   zodbupdate/trunk/src/zodbupdate/picklefilter.py
  A   zodbupdate/trunk/src/zodbupdate/serialize.py
  U   zodbupdate/trunk/src/zodbupdate/tests.py
  U   zodbupdate/trunk/src/zodbupdate/update.py

-=-
Modified: zodbupdate/trunk/CHANGES.txt
===================================================================
--- zodbupdate/trunk/CHANGES.txt	2010-02-02 13:32:48 UTC (rev 108709)
+++ zodbupdate/trunk/CHANGES.txt	2010-02-02 14:37:14 UTC (rev 108710)
@@ -4,6 +4,13 @@
 0.3 (unreleased)
 ----------------
 
+- Unplickle and re-pickle the code to rename references to moved classes.
+  This make the script works on database created with older versions of
+  ZODB.
+
+- If you are working directly with a FileStorage, POSKeyError are reported
+  but non-fatal.
+
 - Remove superfluous code that tried to prevent commits when no changes
   happened: ZODB does this all by itself already.
 

Modified: zodbupdate/trunk/README.txt
===================================================================
--- zodbupdate/trunk/README.txt	2010-02-02 13:32:48 UTC (rev 108709)
+++ zodbupdate/trunk/README.txt	2010-02-02 14:37:14 UTC (rev 108710)
@@ -9,9 +9,10 @@
 If a class is being moved or renamed, you need to update all references from
 your database to the new name before finally deleting the old code.
 
-This tool looks through all current objects of your database, identifies
-moved/renamed classes and `touches` objects accordingly. It creates a single
-transaction that contains the update of your database.
+This tool looks through all current objects of your database,
+identifies moved/renamed classes and `touches` objects accordingly. It
+creates transactions that contains the update of your database (one
+transaction every 100000 records).
 
 Having run this tool, you are then free to delete the old code.
 

Modified: zodbupdate/trunk/src/zodbupdate/main.py
===================================================================
--- zodbupdate/trunk/src/zodbupdate/main.py	2010-02-02 13:32:48 UTC (rev 108709)
+++ zodbupdate/trunk/src/zodbupdate/main.py	2010-02-02 14:37:14 UTC (rev 108710)
@@ -30,8 +30,6 @@
                   help="load storage from config file")
 parser.add_option("-n", "--dry-run", action="store_true",
                   help="perform a trial run with no changes made")
-parser.add_option("-i", "--ignore-missing", action="store_true",
-                  help="update database even if classes are missing")
 parser.add_option("-s", "--save-renames",
                   help="save automatically determined rename rules to file")
 parser.add_option("-q", "--quiet", action="store_true",
@@ -69,7 +67,7 @@
 
     if options.file and options.config:
         raise SystemExit(
-            'Exactly one of --file or --config must be given.')
+            u'Exactly one of --file or --config must be given.')
 
     if options.file:
         storage = ZODB.FileStorage.FileStorage(options.file)
@@ -77,27 +75,35 @@
         storage = ZODB.config.storageFromURL(options.config)
     else:
         raise SystemExit(
-            'Exactly one of --file or --config must be given.')
+            u'Exactly one of --file or --config must be given.')
 
     rename_rules = {}
     for entry_point in pkg_resources.iter_entry_points('zodbupdate'):
         rules = entry_point.load()
         rename_rules.update(rules)
-        logging.debug('Loaded %s rules from %s:%s' %
+        logging.info(u'Loaded %s rules from %s:%s' %
                       (len(rules), entry_point.module_name, entry_point.name))
 
     updater = zodbupdate.update.Updater(
         storage, dry=options.dry_run,
-        ignore_missing=options.ignore_missing,
         renames=rename_rules)
+
     try:
         updater()
     except Exception, e:
-        logging.debug('An error occured', exc_info=True)
-        logging.error('Stopped processing, due to: %s' % e)
+        logging.debug(u'An error occured', exc_info=True)
+        logging.error(u'Stopped processing, due to: %s' % e)
         raise SystemExit()
 
+    implicit_renames = updater.processor.get_found_implicit_rules()
+    if implicit_renames:
+        print 'Found new rules:'
+        print pprint.pformat(implicit_renames)
     if options.save_renames:
+        print 'Saving rules into %s' % options.save_renames
+        rename_rules.update(implicit_renames)
         f = open(options.save_renames, 'w')
-        f.write('renames = %s' % pprint.pformat(updater.renames))
+        f.write('renames = %s' % pprint.pformat(rename_rules))
         f.close()
+    storage.close()
+

Deleted: zodbupdate/trunk/src/zodbupdate/picklefilter.py
===================================================================
--- zodbupdate/trunk/src/zodbupdate/picklefilter.py	2010-02-02 13:32:48 UTC (rev 108709)
+++ zodbupdate/trunk/src/zodbupdate/picklefilter.py	2010-02-02 14:37:14 UTC (rev 108710)
@@ -1,131 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2009 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE.
-#
-##############################################################################
-"""Tools for filtering a pickle opcode stream (as generated by
-pickletools.genops) and reassemblying the pickle.
-"""
-
-import ZODB
-import sys
-import struct
-import pickle
-import pickletools
-import StringIO
-
-# The following functions were created on the basis of the code found in
-# pickle.py. They reflect all opcodes that pickle knows about and how they get
-# written to the output stream under the knowledge how pickletools.genops
-# parses the opcode arguments.
-
-packi = lambda arg:struct.pack('<i', arg)
-reprn = lambda arg:repr(arg)+'\n'
-strn = lambda arg:str(arg)+'\n'
-fact_ref = lambda arg:arg.replace(' ','\n')+'\n'
-arg_len = lambda arg:packi(len(arg))+arg
-unicode_escape = lambda arg:arg.replace('\\', '\\u005c').replace('\n', '\\u000a').encode('raw-unicode-escape')+'\n'
-
-noargs = [pickle.EMPTY_TUPLE,
-          pickle.MARK,
-          pickle.STOP,
-          pickle.NONE,
-          pickle.BINPERSID,
-          pickle.REDUCE,
-          pickle.EMPTY_LIST,
-          pickle.APPEND,
-          pickle.BUILD,
-          pickle.DICT,
-          pickle.APPENDS,
-          pickle.OBJ,
-          pickle.SETITEM,
-          pickle.TUPLE,
-          pickle.SETITEMS,
-          pickle.EMPTY_DICT,
-          pickle.LIST,
-          pickle.POP,
-          pickle.POP_MARK,
-          pickle.DUP,
-          pickle.NEWOBJ,
-          pickle.TUPLE1,
-          pickle.TUPLE2,
-          pickle.TUPLE3,
-          pickle.NEWTRUE,
-          pickle.NEWFALSE]
-
-def _pickle_int(arg):
-    if type(arg) is int:
-        return reprn(arg)
-    else:
-        return '0%s\n' % int(arg)
-        
-generators = {
-    pickle.BINFLOAT: lambda arg:struct.pack('>d', arg),
-    pickle.FLOAT: reprn,
-    pickle.INT: _pickle_int,
-    pickle.BININT: packi,
-    pickle.BININT1: chr,
-    pickle.LONG: reprn,
-    pickle.BININT2: lambda arg:"%c%c" % (arg&0xff, arg>>8),
-    pickle.STRING: reprn,
-    pickle.BINSTRING: arg_len,
-    pickle.SHORT_BINSTRING: lambda arg:chr(len(arg)) + arg,
-    pickle.BINUNICODE: lambda arg:arg_len(arg.encode('utf-8')),
-    pickle.GLOBAL: fact_ref,
-    pickle.INST: fact_ref,
-    pickle.BINGET: chr,
-    pickle.LONG_BINGET: packi,
-    pickle.PUT: reprn,
-    pickle.GET: reprn,
-    pickle.BINPUT: chr,
-    pickle.LONG_BINPUT: packi,
-    pickle.PERSID: strn,
-    pickle.UNICODE: unicode_escape,
-    pickle.PROTO: chr,
-    pickle.EXT1: chr,
-    pickle.EXT2: lambda arg:"%c%c" % (arg&0xff, arg>>8),
-    pickle.EXT4: packi,
-    pickle.LONG1: lambda arg:chr(len(pickle.encode_long(arg)))+pickle.encode_long(arg),
-    pickle.LONG4: lambda arg:arg_len(pickle.encode_long(arg)),
-}
-
-
-def to_pickle_chunk(opcode, arg):
-    """Transform an operation and its argument into pickle format."""
-    chunk = opcode
-    if opcode in noargs:
-        pass
-    elif opcode in generators:
-        generated = generators[opcode](arg)
-        chunk += generated
-    else:
-        raise ValueError('Unknown opcode: %s' % (opcode,))
-    return chunk
-
-
-def filter(f, pickle_data):
-    """Apply filter function to each opcode of a pickle, return new pickle.
-
-    Calls function for each opcode with the arguments (code, arg) as created
-    by the pickletools.genops function. 
-
-    The filter function is expected to return a new (code, arg) tuple or None
-    which causes the old (code, arg) tuple to be placed into the stream again.
-
-    """
-    new = StringIO.StringIO()
-    for op, arg, pos in pickletools.genops(pickle_data):
-        op = op.code
-        result = f(op, arg)
-        if result is not None:
-            op, arg = result
-        new.write(to_pickle_chunk(op, arg))
-    return new.getvalue()

Copied: zodbupdate/trunk/src/zodbupdate/serialize.py (from rev 108709, zodbupdate/branches/sylvain-persistent-load/src/zodbupdate/serialize.py)
===================================================================
--- zodbupdate/trunk/src/zodbupdate/serialize.py	                        (rev 0)
+++ zodbupdate/trunk/src/zodbupdate/serialize.py	2010-02-02 14:37:14 UTC (rev 108710)
@@ -0,0 +1,182 @@
+##############################################################################
+#
+# Copyright (c) 2009-2010 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+
+from ZODB.broken import find_global, Broken
+import cPickle
+import cStringIO
+import logging
+import types
+
+logger = logging.getLogger('zodbupdate')
+
+
+def isbroken(symb):
+    return isinstance(symb, types.TypeType) and Broken in symb.__mro__
+
+
+class ZODBReference:
+    """Class to remenber reference we don't want to touch.
+    """
+
+    def __init__(self, ref):
+        self.ref = ref
+
+
+class ObjectRenamer:
+    """This load and save a ZODB record, modifying all references to
+    renamed class according the given renaming rules:
+
+    - in global symbols contained in the record,
+
+    - in persistent reference information,
+
+    - in class information (first pickle of the record).
+    """
+
+    def __init__(self, changes):
+        self.__added = dict()
+        self.__changes = dict()
+        for old, new in changes.iteritems():
+            self.__changes[tuple(old.split(' '))] = tuple(new.split(' '))
+        self.__changed = False
+
+    def __update_symb(self, symb_info):
+        """This method look in a klass or symbol have been renamed or
+        not. If the symbol have not been renamed explicitly, it's
+        loaded and its location is checked to see if it have moved as
+        well.
+        """
+        if symb_info in self.__changes:
+            self.__changed = True
+            return self.__changes[symb_info]
+        else:
+            symb = find_global(*symb_info)
+            if isbroken(symb):
+                logger.warning(u'Warning: Missing factory for %s' %
+                               u' '.join(symb_info))
+            elif hasattr(symb, '__name__') and hasattr(symb, '__module__'):
+                new_symb_info = (symb.__module__, symb.__name__)
+                if new_symb_info != symb_info:
+                    logger.info(
+                        u'New implicit rule detected %s to %s' %
+                        (u' '.join(symb_info), u' '.join(new_symb_info)))
+                    self.__changes[symb_info] = new_symb_info
+                    self.__added[symb_info] = new_symb_info
+                    self.__changed = True
+                    return new_symb_info
+        return symb_info
+
+    def __find_global(self, *klass_info):
+        """Find a class with the given name, looking for a renaming
+        rule first.
+
+        Using ZODB find_global let us manage missing classes.
+        """
+        return find_global(*self.__update_symb(klass_info))
+
+    def __persistent_load(self, reference):
+        """Load a persistent reference. The reference might changed
+        according a renaming rules. We give back a special object to
+        represent that reference, and not the real object designated
+        by the reference.
+        """
+        if isinstance(reference, tuple):
+            oid, klass_info = reference
+            if isinstance(klass_info, tuple):
+                klass_info = self.__update_symb(klass_info)
+            return ZODBReference((oid, klass_info))
+        if isinstance(reference, list):
+            mode, information = reference
+            if mode == 'm':
+                database_name, oid, klass_info = information
+                if isinstance(klass_info, tuple):
+                    klass_info = self.__update_symb(klass_info)
+                return ZODBReference(['m', (database_name, oid, klass_info)])
+        return ZODBReference(reference)
+
+    def __unpickler(self, pickle):
+        """Create an unpickler with our custom global symbol loader
+        and reference resolver.
+        """
+        unpickler = cPickle.Unpickler(pickle)
+        unpickler.persistent_load = self.__persistent_load
+        unpickler.find_global = self.__find_global
+        return unpickler
+
+    def __persistent_id(self, obj):
+        """Save the given object as a reference only if it was a
+        reference before. We re-use the same information.
+        """
+        if not isinstance(obj, ZODBReference):
+            return None
+        return obj.ref
+
+    def __pickler(self, output):
+        """Create a pickler able to save to the given file, objects we
+        loaded while paying attention to any reference we loaded.
+        """
+        pickler = cPickle.Pickler(output, 1)
+        pickler.persistent_id = self.__persistent_id
+        return pickler
+
+    def __update_class_meta(self, class_meta):
+        """Update class information, which can contain information
+        about a renamed class.
+        """
+        if isinstance(class_meta, tuple):
+            symb, args = class_meta
+            if isbroken(symb):
+                symb_info = (symb.__module__, symb.__name__)
+                logger.warning(u'Warning: Missing factory for %s' %
+                               u' '.join(symb_info))
+                return (symb_info, args)
+            elif isinstance(symb, tuple):
+                return self.__update_symb(symb), args
+        return class_meta
+
+    def rename(self, input_file):
+        """Take a ZODB record (as a file object) as input. We load it,
+        replace any reference to renamed class we know of. If any
+        modification are done, we save the record again and return it,
+        return None otherwise.
+        """
+        self.__changed = False
+
+        unpickler = self.__unpickler(input_file)
+        class_meta = unpickler.load()
+        data = unpickler.load()
+
+        class_meta = self.__update_class_meta(class_meta)
+
+        if not self.__changed:
+            return None
+
+        output_file = cStringIO.StringIO()
+        pickler = self.__pickler(output_file)
+        try:
+            pickler.dump(class_meta)
+            pickler.dump(data)
+        except cPickle.PicklingError:
+            # Could not pickle that record, likely due to a broken
+            # class ignore it.
+            return None
+
+        output_file.truncate()
+        return output_file
+
+    def get_found_implicit_rules(self):
+        result = {}
+        for old, new in self.__added.items():
+            result[' '.join(old)] = ' '.join(new)
+        return result

Modified: zodbupdate/trunk/src/zodbupdate/tests.py
===================================================================
--- zodbupdate/trunk/src/zodbupdate/tests.py	2010-02-02 13:32:48 UTC (rev 108709)
+++ zodbupdate/trunk/src/zodbupdate/tests.py	2010-02-02 14:37:14 UTC (rev 108710)
@@ -15,7 +15,7 @@
 import ZODB
 import ZODB.broken
 import ZODB.FileStorage
-import logging 
+import logging
 import os
 import persistent
 import pickle
@@ -25,27 +25,36 @@
 import types
 import unittest
 import zodbupdate.update
-import zodbupdate.picklefilter
 
 
-class IgnoringFilter(object):
-    # Do not spit out any logging during testing.
+class LogFilter(object):
+
+    def __init__(self, msg_lst):
+        self.msg_lst = msg_lst
+
+    # Do not spit out any logging, but record them
     def filter(self, record):
+        self.msg_lst.append(record.msg)
         return False
 
-ignore = IgnoringFilter()
 
-
 class ZODBUpdateTests(unittest.TestCase):
 
     def setUp(self):
-        zodbupdate.update.logger.addFilter(ignore)
+        self.log_messages = []
+        self.log_filter = LogFilter(self.log_messages)
+        zodbupdate.update.logger.addFilter(self.log_filter)
 
         sys.modules['module1'] =  types.ModuleType('module1')
+        sys.modules['module2'] =  types.ModuleType('module2')
         class Factory(persistent.Persistent):
             pass
+        class OtherFactory(persistent.Persistent):
+            pass
         sys.modules['module1'].Factory = Factory
         Factory.__module__ = 'module1'
+        sys.modules['module2'].OtherFactory = OtherFactory
+        OtherFactory.__module__ = 'module2'
 
         _, self.dbfile = tempfile.mkstemp()
 
@@ -69,8 +78,9 @@
         return updater
 
     def tearDown(self):
-        zodbupdate.update.logger.removeFilter(ignore)
+        zodbupdate.update.logger.removeFilter(self.log_filter)
         del sys.modules['module1']
+        del sys.modules['module2']
 
         self.db.close()
         os.unlink(self.dbfile)
@@ -78,32 +88,27 @@
         os.unlink(self.dbfile + '.tmp')
         os.unlink(self.dbfile + '.lock')
 
-    def test_factory_missing(self):
-        # Create a ZODB with an object referencing a factory, then 
-        # remove the factory and update the ZODB.
-        self.root['test'] = sys.modules['module1'].Factory()
-        transaction.commit()
-        del sys.modules['module1'].Factory
-
-        self.assertRaises(ValueError, self.update)
-
     def test_factory_ignore_missing(self):
-        # Create a ZODB with an object referencing a factory, then 
+        # Create a ZODB with an object referencing a factory, then
         # remove the factory and update the ZODB.
         self.root['test'] = sys.modules['module1'].Factory()
         transaction.commit()
         del sys.modules['module1'].Factory
 
-        updater = self.update(ignore_missing=True)
+        updater = self.update()
 
         self.assertEquals('cmodule1\nFactory\nq\x01.}q\x02.',
                           self.storage.load(self.root['test']._p_oid, '')[0])
         self.assert_(isinstance(self.root['test'],
                                 ZODB.broken.PersistentBroken))
-        self.assertEquals({}, updater.renames)
+        self.failUnless(len(self.log_messages))
+        self.assertEquals('Warning: Missing factory for module1 Factory',
+                          self.log_messages[0])
+        renames = updater.processor.get_found_implicit_rules()
+        self.assertEquals({}, renames)
 
     def test_factory_renamed(self):
-        # Create a ZODB with an object referencing a factory, then 
+        # Create a ZODB with an object referencing a factory, then
         # rename the the factory but keep a reference from the old name in
         # place. Update the ZODB. Then remove the old reference. We should
         # then still be able to access the object.
@@ -119,7 +124,8 @@
                           self.storage.load(self.root['test']._p_oid, '')[0])
         self.assertEquals('module1', self.root['test'].__class__.__module__)
         self.assertEquals('NewFactory', self.root['test'].__class__.__name__)
-        self.assertEquals({'module1 Factory': 'module1 NewFactory'}, updater.renames)
+        renames = updater.processor.get_found_implicit_rules()
+        self.assertEquals({'module1 Factory': 'module1 NewFactory'}, renames)
 
     def test_factory_renamed_dryrun(self):
         # Run an update with "dy run" option and see that the pickle is
@@ -133,12 +139,14 @@
         updater = self.update(dry=True)
         self.assertEquals('cmodule1\nFactory\nq\x01.}q\x02.',
                           self.storage.load(self.root['test']._p_oid, '')[0])
-        self.assertEquals({'module1 Factory': 'module1 NewFactory'}, updater.renames)
+        renames = updater.processor.get_found_implicit_rules()
+        self.assertEquals({'module1 Factory': 'module1 NewFactory'}, renames)
 
         updater = self.update(dry=False)
         self.assertEquals('cmodule1\nNewFactory\nq\x01.}q\x02.',
                           self.storage.load(self.root['test']._p_oid, '')[0])
-        self.assertEquals({'module1 Factory': 'module1 NewFactory'}, updater.renames)
+        renames = updater.processor.get_found_implicit_rules()
+        self.assertEquals({'module1 Factory': 'module1 NewFactory'}, renames)
 
     def test_factory_registered_with_copy_reg(self):
         # Factories registered with copy_reg.pickle loose their __name__.
@@ -165,7 +173,8 @@
 
         self.assertEquals('module1', self.root['test'].__class__.__module__)
         self.assertEquals('AnonymousFactory', self.root['test'].__class__.__name__)
-        self.assertEquals({}, updater.renames)
+        renames = updater.processor.get_found_implicit_rules()
+        self.assertEquals({}, renames)
 
     def test_no_transaction_if_no_changes(self):
         # If an update run doesn't produce any changes it won't commit the
@@ -173,7 +182,8 @@
         last = self.storage.lastTransaction()
         updater = self.update()
         self.assertEquals(last, self.storage.lastTransaction())
-        self.assertEquals({}, updater.renames)
+        renames = updater.processor.get_found_implicit_rules()
+        self.assertEquals({}, renames)
 
     def test_loaded_renames_override_automatic(self):
         # Same as test_factory_renamed, but provide a pre-defined rename
@@ -189,7 +199,8 @@
 
         self.assertEquals('cmodule2\nOtherFactory\nq\x01.}q\x02.',
                           self.storage.load(self.root['test']._p_oid, '')[0])
-        self.assertEquals({'module1 Factory': 'module2 OtherFactory'}, updater.renames)
+        renames = updater.processor.get_found_implicit_rules()
+        self.assertEquals({}, renames)
 
 
     def test_loaded_renames_override_missing(self):
@@ -204,193 +215,11 @@
 
         self.assertEquals('cmodule2\nOtherFactory\nq\x01.}q\x02.',
                           self.storage.load(self.root['test']._p_oid, '')[0])
-        self.assertEquals({'module1 Factory': 'module2 OtherFactory'}, updater.renames)
+        renames = updater.processor.get_found_implicit_rules()
+        self.assertEquals({}, renames)
 
 
-class PickleFilterTests(unittest.TestCase):
-    # Tests the pickle filter for re-pickling op-codes
-
-    def assertNonArgCode(self, code):
-        self.assertArgCode(code, code, None)
-
-    def assertArgCode(self, result, code, arg):
-        self.assertEquals(
-            result,
-            zodbupdate.picklefilter.to_pickle_chunk(code, arg))
-
-    def test_sanity_check(self):
-        # Check binary compatibility on simple "real" pickle
-        pass
-
-    def test_MARK(self):
-        self.assertNonArgCode(pickle.MARK)
-
-    def test_STOP(self):
-        self.assertNonArgCode(pickle.STOP)
-
-    def test_POP(self):
-        self.assertNonArgCode(pickle.POP)
-
-    def test_POP_MARK(self):
-        self.assertNonArgCode(pickle.POP_MARK)
-
-    def test_DUP(self):
-        self.assertNonArgCode(pickle.DUP)
-
-    def test_FLOAT(self):
-        self.assertArgCode('F12.300000000000001\n', pickle.FLOAT, 12.3)
-
-    def test_INT(self):
-        self.assertArgCode('I01237940039285380274899124224\n', pickle.INT, 2**90)
-
-    def test_BININT(self):
-        self.assertArgCode('J\x00\x00\x01\x00', pickle.BININT, 0xffff+1)
-
-    def test_BININT1(self):
-        self.assertArgCode('K\xf0', pickle.BININT1, 0xf0)
-
-    def test_LONG(self):
-        self.assertArgCode("L1546\n", pickle.LONG, 1546)
-
-    def test_BININT2(self):
-        self.assertArgCode('M\xf0\xff', pickle.BININT2, 0xfff0)
-
-    def test_NONE(self):
-        self.assertNonArgCode(pickle.NONE)
-
-    def test_PERSID(self):
-        self.assertArgCode('P12345\n', pickle.PERSID, '12345')
-
-    def test_BINPERSID(self):
-        self.assertNonArgCode(pickle.BINPERSID)
-
-    def test_REDUCE(self):
-        self.assertNonArgCode(pickle.REDUCE)
-
-    def test_STRING(self):
-        self.assertArgCode("S'asdf'\n", pickle.STRING, 'asdf')
-
-    def test_BINSTRING(self):
-        self.assertArgCode('T\x06\x00\x00\x00foobar', pickle.BINSTRING, 'foobar')
-
-    def test_SHORT_BINSTRING(self):
-        self.assertArgCode('U\x04asdf', pickle.SHORT_BINSTRING, 'asdf')
-
-    def test_UNICODE(self):
-        self.assertArgCode('V\xfcnders\n', pickle.UNICODE, u'\xfcnders')
-
-    def test_BINUNICODE(self):
-        self.assertArgCode('X\x06\x00\x00\x00\xc3\xbc1234', pickle.BINUNICODE, u'\xfc1234')
-
-    def test_APPEND(self):
-        self.assertNonArgCode(pickle.APPEND)
-
-    def test_BUILD(self):
-        self.assertNonArgCode(pickle.BUILD)
-
-    def test_GLOBAL(self):
-        self.assertArgCode('cbar\nfoo\n', pickle.GLOBAL, 'bar foo')
-
-    def test_DICT(self):
-        self.assertNonArgCode(pickle.DICT)
-
-    def test_EMPTY_DICT(self):
-        self.assertNonArgCode(pickle.EMPTY_DICT)
-
-    def test_APPENDS(self):
-        self.assertNonArgCode(pickle.APPENDS)
-
-    def test_GET(self):
-        self.assertArgCode('g12\n', pickle.GET, 12)
-
-    def test_BINGET(self):
-        self.assertArgCode('h\x80', pickle.BINGET, 128)
-
-    def test_INST(self):
-        self.assertArgCode('ifoo\nbar\n', pickle.INST, 'foo bar')
-
-    def test_LONG_BINGET(self):
-        self.assertArgCode('j\x00\x04\x00\x00', pickle.LONG_BINGET, 1024)
-
-    def test_LIST(self):
-        self.assertNonArgCode(pickle.LIST)
-
-    def test_EMPTY_LIST(self):
-        self.assertNonArgCode(pickle.EMPTY_LIST)
-
-    def test_OBJ(self):
-        self.assertNonArgCode(pickle.OBJ)
-
-    def test_PUT(self):
-        self.assertArgCode("p12\n", pickle.PUT, 12)
-
-    def test_BINPUT(self):
-        self.assertArgCode('q\x80', pickle.BINPUT, 128)
-
-    def test_LONG_BINPUT(self):
-        self.assertArgCode('r\x00\x04\x00\x00', pickle.LONG_BINPUT, 1024)
-
-    def test_SETITEM(self):
-        self.assertNonArgCode(pickle.SETITEM)
-
-    def test_TUPLE(self):
-        self.assertNonArgCode(pickle.TUPLE)
-
-    def test_EMPTY_TUPLE(self):
-        self.assertNonArgCode(pickle.EMPTY_TUPLE)
-
-    def test_SETITEMS(self):
-        self.assertNonArgCode(pickle.SETITEMS)
-
-    def test_BINFLOAT(self):
-        self.assertArgCode('G@(\x00\x00\x00\x00\x00\x00',
-                           pickle.BINFLOAT, 12.0)
-
-    def test_TRUE(self):
-        self.assertArgCode(pickle.TRUE, pickle.INT, True)
-
-    def test_FALSE(self):
-        self.assertArgCode(pickle.FALSE, pickle.INT, False)
-
-    def test_PROTO(self):
-        self.assertArgCode('\x80\x01', pickle.PROTO, 1)
-
-    def test_NEWOBJ(self):
-        self.assertNonArgCode(pickle.NEWOBJ)
-
-    def test_EXT1(self):
-        self.assertArgCode('\x82\xf0', pickle.EXT1, 0xf0)
-
-    def test_EXT2(self):
-        self.assertArgCode('\x83\x00\x01', pickle.EXT2, 0xff+1)
-
-    def test_EXT4(self):
-        self.assertArgCode('\x84\x00\x00\x01\x00', pickle.EXT4, 0xffff+1)
-
-    def test_TUPLE1(self):
-        self.assertNonArgCode(pickle.TUPLE1)
-
-    def test_TUPLE2(self):
-        self.assertNonArgCode(pickle.TUPLE2)
-
-    def test_TUPLE3(self):
-        self.assertNonArgCode(pickle.TUPLE3)
-
-    def test_NEWTRUE(self):
-        self.assertNonArgCode(pickle.NEWTRUE)
-
-    def test_NEWFALSE(self):
-        self.assertNonArgCode(pickle.NEWFALSE)
-
-    def test_LONG1(self):
-        self.assertArgCode('\x8a\x02\x80\x00', pickle.LONG1, 128)
-
-    def test_LONG4(self):
-        self.assertArgCode('\x8b\x02\x00\x00\x00\x00\x04', pickle.LONG4, 2**10)
-
-
 def test_suite():
     suite = unittest.TestSuite()
     suite.addTest(unittest.makeSuite(ZODBUpdateTests))
-    suite.addTest(unittest.makeSuite(PickleFilterTests))
     return suite

Modified: zodbupdate/trunk/src/zodbupdate/update.py
===================================================================
--- zodbupdate/trunk/src/zodbupdate/update.py	2010-02-02 13:32:48 UTC (rev 108709)
+++ zodbupdate/trunk/src/zodbupdate/update.py	2010-02-02 14:37:14 UTC (rev 108710)
@@ -13,108 +13,99 @@
 ##############################################################################
 
 from ZODB.DB import DB
-import StringIO
+from ZODB.FileStorage import FileStorage
+from struct import pack, unpack
+import ZODB.POSException
 import ZODB.broken
 import ZODB.utils
+import cStringIO
 import logging
 import pickle
 import pickletools
 import sys
 import transaction
-import zodbupdate.picklefilter
+import zodbupdate.serialize
 
 logger = logging.getLogger('zodbupdate')
 
+TRANSACTION_COUNT = 100000
 
+
 class Updater(object):
     """Update class references for all current objects in a storage."""
 
-    def __init__(self, storage, dry=False, ignore_missing=False, renames=None):
-        self.ignore_missing = ignore_missing
+    def __init__(self, storage, dry=False, renames=None):
         self.dry = dry
         self.storage = storage
-        self.missing = set()
-        self.renames = renames or {}
+        self.processor = zodbupdate.serialize.ObjectRenamer(renames or {})
 
-    def __call__(self):
+    def __new_transaction(self):
         t = transaction.Transaction()
         self.storage.tpc_begin(t)
         t.note('Updated factory references using `zodbupdate`.')
+        return t
 
-        for oid, serial, current in self.records:
-            new = self.update_record(current)
-            if new == current.getvalue():
-                continue
-            logger.debug('Updated %s' % ZODB.utils.oid_repr(oid))
-            self.storage.store(oid, serial, new, '', t)
-
-        if self.dry:
-            logger.info('Dry run selected, aborting transaction.')
+    def __commit_transaction(self, t, changed):
+        if self.dry or not changed:
+            logger.info('Dry run selected or no changes, aborting transaction.')
             self.storage.tpc_abort(t)
         else:
             logger.info('Committing changes.')
             self.storage.tpc_vote(t)
             self.storage.tpc_finish(t)
 
-    @property
-    def records(self):
-        next = None
-        while True:
-            oid, tid, data, next = self.storage.record_iternext(next)
-            yield oid, tid, StringIO.StringIO(data)
-            if next is None:
-                break
+    def __call__(self):
+        count = 0
+        t = self.__new_transaction()
 
-    def update_record(self, old):
-        new = ''
-        for i in range(2):
-            # ZODB data records consist of two concatenated pickles, so the
-            # following needs to be done twice:
-            new += zodbupdate.picklefilter.filter(
-                self.update_operation, old)
-        return new
+        for oid, serial, current in self.records:
+            new = self.processor.rename(current)
+            if new is None:
+                continue
 
-    def update_operation(self, code, arg):
-        """Check a pickle operation for moved or missing factory references.
+            logger.debug('Updated %s' % ZODB.utils.oid_repr(oid))
+            self.storage.store(oid, serial, new.getvalue(), '', t)
+            count += 1
 
-        Returns an updated (code, arg) tuple using the canonical reference for the
-        factory as would be created if the pickle was unpickled and re-pickled.
+            if count > TRANSACTION_COUNT:
+                count = 0
+                self.__commit_transaction(t, True)
+                t = self.__new_transaction()
 
-        """
-        if code not in 'ci':
-            return
+        self.__commit_transaction(t, count != 0)
 
-        if arg in self.renames:
-            return code, self.renames[arg]
 
-        factory_module, factory_name = arg.split(' ')
-        try:
-            module = __import__(factory_module, globals(), {}, [factory_name])
-            factory = getattr(module, factory_name)
-        except (AttributeError, ImportError):
-            name = '%s.%s' % (factory_module, factory_name)
-            message = 'Missing factory: %s' % name
-            logger.info(message)
-            self.missing.add(name)
-            if self.ignore_missing:
-                return
-            raise ValueError(message)
+    @property
+    def records(self):
+        if not isinstance(self.storage, FileStorage):
+            # Only FileStorage as _index (this is not an API defined attribute)
+            next = None
+            while True:
+                oid, tid, data, next = self.storage.record_iternext(next)
+                yield oid, tid, cStringIO.StringIO(data)
+                if next is None:
+                    break
+        else:
+            index = self.storage._index
+            next_oid = None
 
-        if not hasattr(factory, '__name__'):
-            logger.warn(
-                "factory %r does not have __name__: "
-                "can't check canonical location" % factory)
-            return
-        if not hasattr(factory, '__module__'):
-            # TODO: This case isn't covered with a test. I just
-            # couldn't provoke a factory to not have a __module__ but
-            # users reported this issue to me.
-            logger.warn(
-                "factory %r does not have __module__: "
-                "can't check canonical location" % factory)
-            return
+            while True:
+                oid = index.minKey(next_oid)
+                try:
+                    data, tid = self.storage.load(oid, "")
+                except ZODB.POSException.POSKeyError, e:
+                    logger.error(
+                        u'Warning: Jumping record %s, '
+                        u'referencing missing key in database: %s' %
+                        (ZODB.utils.oid_repr(oid), str(e)))
+                else:
+                    yield  oid, tid, cStringIO.StringIO(data)
 
-        new_arg = '%s %s' % (factory.__module__, factory.__name__)
-        if new_arg != arg:
-            self.renames[arg] = new_arg
-        return code, new_arg
+                oid_as_long, = unpack(">Q", oid)
+                next_oid = pack(">Q", oid_as_long + 1)
+                try:
+                    next_oid = index.minKey(next_oid)
+                except ValueError:
+                    # No more records
+                    break
+



More information about the checkins mailing list