[Checkins] SVN: zodbupdate/branches/sylvain-persistent-load/src/zodbupdate/ After some struggle with pickle filter, implement the peristent references changes via load and dump again the record. Didn't find anything better.

Sylvain Viollon sylvain at infrae.com
Thu Nov 5 07:10:08 EST 2009


Log message for revision 105491:
  After some struggle with pickle filter, implement the peristent references changes via load and dump again the record. Didn't find anything better.
  
  

Changed:
  U   zodbupdate/branches/sylvain-persistent-load/src/zodbupdate/main.py
  D   zodbupdate/branches/sylvain-persistent-load/src/zodbupdate/picklefilter.py
  A   zodbupdate/branches/sylvain-persistent-load/src/zodbupdate/serialize.py
  U   zodbupdate/branches/sylvain-persistent-load/src/zodbupdate/update.py

-=-
Modified: zodbupdate/branches/sylvain-persistent-load/src/zodbupdate/main.py
===================================================================
--- zodbupdate/branches/sylvain-persistent-load/src/zodbupdate/main.py	2009-11-05 12:07:38 UTC (rev 105490)
+++ zodbupdate/branches/sylvain-persistent-load/src/zodbupdate/main.py	2009-11-05 12:10:08 UTC (rev 105491)
@@ -30,10 +30,6 @@
                   help="load storage from config file")
 parser.add_option("-n", "--dry-run", action="store_true",
                   help="perform a trial run with no changes made")
-parser.add_option("-i", "--ignore-missing", action="store_true",
-                  help="update database even if classes are missing")
-parser.add_option("-s", "--save-renames",
-                  help="save automatically determined rename rules to file")
 parser.add_option("-q", "--quiet", action="store_true",
                   help="suppress non-error messages")
 parser.add_option("-v", "--verbose", action="store_true",
@@ -83,12 +79,11 @@
     for entry_point in pkg_resources.iter_entry_points('zodbupdate'):
         rules = entry_point.load()
         rename_rules.update(rules)
-        logging.debug('Loaded %s rules from %s:%s' %
+        logging.info('Loaded %s rules from %s:%s' %
                       (len(rules), entry_point.module_name, entry_point.name))
 
     updater = zodbupdate.update.Updater(
         storage, dry=options.dry_run,
-        ignore_missing=options.ignore_missing,
         renames=rename_rules)
     try:
         updater()
@@ -97,7 +92,3 @@
         logging.error('Stopped processing, due to: %s' % e)
         raise SystemExit()
 
-    if options.save_renames:
-        f = open(options.save_renames, 'w')
-        f.write('renames = %s' % pprint.pformat(updater.renames))
-        f.close()

Deleted: zodbupdate/branches/sylvain-persistent-load/src/zodbupdate/picklefilter.py
===================================================================
--- zodbupdate/branches/sylvain-persistent-load/src/zodbupdate/picklefilter.py	2009-11-05 12:07:38 UTC (rev 105490)
+++ zodbupdate/branches/sylvain-persistent-load/src/zodbupdate/picklefilter.py	2009-11-05 12:10:08 UTC (rev 105491)
@@ -1,131 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2009 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE.
-#
-##############################################################################
-"""Tools for filtering a pickle opcode stream (as generated by
-pickletools.genops) and reassemblying the pickle.
-"""
-
-import ZODB
-import sys
-import struct
-import pickle
-import pickletools
-import StringIO
-
-# The following functions were created on the basis of the code found in
-# pickle.py. They reflect all opcodes that pickle knows about and how they get
-# written to the output stream under the knowledge how pickletools.genops
-# parses the opcode arguments.
-
-packi = lambda arg:struct.pack('<i', arg)
-reprn = lambda arg:repr(arg)+'\n'
-strn = lambda arg:str(arg)+'\n'
-fact_ref = lambda arg:arg.replace(' ','\n')+'\n'
-arg_len = lambda arg:packi(len(arg))+arg
-unicode_escape = lambda arg:arg.replace('\\', '\\u005c').replace('\n', '\\u000a').encode('raw-unicode-escape')+'\n'
-
-noargs = [pickle.EMPTY_TUPLE,
-          pickle.MARK,
-          pickle.STOP,
-          pickle.NONE,
-          pickle.BINPERSID,
-          pickle.REDUCE,
-          pickle.EMPTY_LIST,
-          pickle.APPEND,
-          pickle.BUILD,
-          pickle.DICT,
-          pickle.APPENDS,
-          pickle.OBJ,
-          pickle.SETITEM,
-          pickle.TUPLE,
-          pickle.SETITEMS,
-          pickle.EMPTY_DICT,
-          pickle.LIST,
-          pickle.POP,
-          pickle.POP_MARK,
-          pickle.DUP,
-          pickle.NEWOBJ,
-          pickle.TUPLE1,
-          pickle.TUPLE2,
-          pickle.TUPLE3,
-          pickle.NEWTRUE,
-          pickle.NEWFALSE]
-
-def _pickle_int(arg):
-    if type(arg) is int:
-        return reprn(arg)
-    else:
-        return '0%s\n' % int(arg)
-        
-generators = {
-    pickle.BINFLOAT: lambda arg:struct.pack('>d', arg),
-    pickle.FLOAT: reprn,
-    pickle.INT: _pickle_int,
-    pickle.BININT: packi,
-    pickle.BININT1: chr,
-    pickle.LONG: reprn,
-    pickle.BININT2: lambda arg:"%c%c" % (arg&0xff, arg>>8),
-    pickle.STRING: reprn,
-    pickle.BINSTRING: arg_len,
-    pickle.SHORT_BINSTRING: lambda arg:chr(len(arg)) + arg,
-    pickle.BINUNICODE: lambda arg:arg_len(arg.encode('utf-8')),
-    pickle.GLOBAL: fact_ref,
-    pickle.INST: fact_ref,
-    pickle.BINGET: chr,
-    pickle.LONG_BINGET: packi,
-    pickle.PUT: reprn,
-    pickle.GET: reprn,
-    pickle.BINPUT: chr,
-    pickle.LONG_BINPUT: packi,
-    pickle.PERSID: strn,
-    pickle.UNICODE: unicode_escape,
-    pickle.PROTO: chr,
-    pickle.EXT1: chr,
-    pickle.EXT2: lambda arg:"%c%c" % (arg&0xff, arg>>8),
-    pickle.EXT4: packi,
-    pickle.LONG1: lambda arg:chr(len(pickle.encode_long(arg)))+pickle.encode_long(arg),
-    pickle.LONG4: lambda arg:arg_len(pickle.encode_long(arg)),
-}
-
-
-def to_pickle_chunk(opcode, arg):
-    """Transform an operation and its argument into pickle format."""
-    chunk = opcode
-    if opcode in noargs:
-        pass
-    elif opcode in generators:
-        generated = generators[opcode](arg)
-        chunk += generated
-    else:
-        raise ValueError('Unknown opcode: %s' % (opcode,))
-    return chunk
-
-
-def filter(f, pickle_data):
-    """Apply filter function to each opcode of a pickle, return new pickle.
-
-    Calls function for each opcode with the arguments (code, arg) as created
-    by the pickletools.genops function. 
-
-    The filter function is expected to return a new (code, arg) tuple or None
-    which causes the old (code, arg) tuple to be placed into the stream again.
-
-    """
-    new = StringIO.StringIO()
-    for op, arg, pos in pickletools.genops(pickle_data):
-        op = op.code
-        result = f(op, arg)
-        if result is not None:
-            op, arg = result
-        new.write(to_pickle_chunk(op, arg))
-    return new.getvalue()

Added: zodbupdate/branches/sylvain-persistent-load/src/zodbupdate/serialize.py
===================================================================
--- zodbupdate/branches/sylvain-persistent-load/src/zodbupdate/serialize.py	                        (rev 0)
+++ zodbupdate/branches/sylvain-persistent-load/src/zodbupdate/serialize.py	2009-11-05 12:10:08 UTC (rev 105491)
@@ -0,0 +1,82 @@
+
+from ZODB.broken import find_global
+import cPickle
+import cStringIO
+
+
+class ZODBReference:
+    """Class to remenber reference we don't want to touch.
+    """
+
+    def __init__(self, ref):
+        self.ref = ref
+
+
+class ObjectRenamer:
+    """This load and save a record using persistent_id and
+    persistent_load methods defined in the ZODB code to change
+    information at that point as well.
+    """
+
+    def __init__(self, storage, changes):
+        self.__cache = dict()
+        self.__changes = dict()
+        for old, new in changes.iteritems():
+            self.__changes[tuple(old.split(' '))] = tuple(new.split(' '))
+        self.__changed = False
+
+    def __find_global(self, *names):
+        if names in self.__changes:
+            names = self.__changes[names]
+            self.__changed = True
+        return find_global(*names)
+
+    def __factory(self, nothing, modulename, globalname):
+        return self.__find_global(modulename, globalname)
+
+    def __persistent_load(self, reference):
+        if isinstance(reference, tuple):
+            oid, klass = reference
+            if klass in self.__changes:
+                klass = self.__changes[klass]
+                self.__changed = True
+            return ZODBReference((oid, klass))
+        return ZODBReference(reference)
+
+    def __unpickler(self, pickle):
+        unpickler = cPickle.Unpickler(pickle)
+        unpickler.persistent_load = self.__persistent_load
+        unpickler.find_global = self.__find_global
+        return unpickler
+
+    def __persistent_id(self, obj):
+        if not isinstance(obj, ZODBReference):
+            return None
+        return obj.ref
+
+    def __pickler(self, output):
+        pickler = cPickle.Pickler(output, 1)
+        pickler.persistent_id = self.__persistent_id
+        pickler.clear_memo()
+        return pickler
+
+    def rename(self, input_file):
+        self.__changed = False
+        self.__cache.clear()
+
+        unpickler = self.__unpickler(input_file)
+        class_meta = unpickler.load()
+        data = unpickler.load()
+
+        if not self.__changed:
+            input_file.seek(0)
+            return None
+
+        output_file = cStringIO.StringIO()
+        pickler = self.__pickler(output_file)
+        pickler.dump(class_meta)
+        pickler.dump(data)
+
+        output_file.truncate()
+        output_file.seek(0)
+        return output_file


Property changes on: zodbupdate/branches/sylvain-persistent-load/src/zodbupdate/serialize.py
___________________________________________________________________
Added: svn:keywords
   + Author Date Id Revision

Modified: zodbupdate/branches/sylvain-persistent-load/src/zodbupdate/update.py
===================================================================
--- zodbupdate/branches/sylvain-persistent-load/src/zodbupdate/update.py	2009-11-05 12:07:38 UTC (rev 105490)
+++ zodbupdate/branches/sylvain-persistent-load/src/zodbupdate/update.py	2009-11-05 12:10:08 UTC (rev 105491)
@@ -13,7 +13,7 @@
 ##############################################################################
 
 from ZODB.DB import DB
-import StringIO
+import cStringIO
 import ZODB.broken
 import ZODB.utils
 import logging
@@ -21,7 +21,7 @@
 import pickletools
 import sys
 import transaction
-import zodbupdate.picklefilter
+import zodbupdate.serialize
 
 logger = logging.getLogger('zodbupdate')
 
@@ -29,12 +29,10 @@
 class Updater(object):
     """Update class references for all current objects in a storage."""
 
-    def __init__(self, storage, dry=False, ignore_missing=False, renames=None):
-        self.ignore_missing = ignore_missing
+    def __init__(self, storage, dry=False, renames=None):
         self.dry = dry
         self.storage = storage
-        self.missing = set()
-        self.renames = renames or {}
+        self.update = zodbupdate.serialize.ObjectRenamer(storage, renames or {})
 
     def __call__(self):
         t = transaction.Transaction()
@@ -42,11 +40,11 @@
         t.note('Updated factory references using `zodbupdate`.')
 
         for oid, serial, current in self.records:
-            new = self.update_record(current)
-            if new == current.getvalue():
+            new = self.update.rename(current)
+            if new is None:
                 continue
             logger.debug('Updated %s' % ZODB.utils.oid_repr(oid))
-            self.storage.store(oid, serial, new, '', t)
+            self.storage.store(oid, serial, new.getvalue(), '', t)
 
         if self.dry:
             logger.info('Dry run selected, aborting transaction.')
@@ -61,60 +59,7 @@
         next = None
         while True:
             oid, tid, data, next = self.storage.record_iternext(next)
-            yield oid, tid, StringIO.StringIO(data)
+            yield oid, tid, cStringIO.StringIO(data)
             if next is None:
                 break
 
-    def update_record(self, old):
-        new = ''
-        for i in range(2):
-            # ZODB data records consist of two concatenated pickles, so the
-            # following needs to be done twice:
-            new += zodbupdate.picklefilter.filter(
-                self.update_operation, old)
-        return new
-
-    def update_operation(self, code, arg):
-        """Check a pickle operation for moved or missing factory references.
-
-        Returns an updated (code, arg) tuple using the canonical reference for the
-        factory as would be created if the pickle was unpickled and re-pickled.
-
-        """
-        if code not in 'ci':
-            return
-
-        if arg in self.renames:
-            return code, self.renames[arg]
-
-        factory_module, factory_name = arg.split(' ')
-        try:
-            module = __import__(factory_module, globals(), {}, [factory_name])
-            factory = getattr(module, factory_name)
-        except (AttributeError, ImportError):
-            name = '%s.%s' % (factory_module, factory_name)
-            message = 'Missing factory: %s' % name
-            logger.info(message)
-            self.missing.add(name)
-            if self.ignore_missing:
-                return
-            raise ValueError(message)
-
-        if not hasattr(factory, '__name__'):
-            logger.warn(
-                "factory %r does not have __name__: "
-                "can't check canonical location" % factory)
-            return
-        if not hasattr(factory, '__module__'):
-            # TODO: This case isn't covered with a test. I just
-            # couldn't provoke a factory to not have a __module__ but
-            # users reported this issue to me.
-            logger.warn(
-                "factory %r does not have __module__: "
-                "can't check canonical location" % factory)
-            return
-
-        new_arg = '%s %s' % (factory.__module__, factory.__name__)
-        if new_arg != arg:
-            self.renames[arg] = new_arg
-        return code, new_arg



More information about the checkins mailing list