[Zodb-checkins] SVN: ZODB/trunk/src/ - Added migration script

Christian Theune ct at gocept.com
Sat Jun 21 08:17:53 EDT 2008


Log message for revision 87622:
  - Added migration script
  - Fixed bug in bushy layout: oid recognition pattern would not handle hex
    representations correctly
  - Fixed bug in lawn layout: empty strings (the base directory) would be
    recognized as the oid 0.
  

Changed:
  U   ZODB/trunk/src/CHANGES.txt
  U   ZODB/trunk/src/ZODB/blob.py
  A   ZODB/trunk/src/ZODB/scripts/migrateblobs.py
  U   ZODB/trunk/src/ZODB/tests/blob_layout.txt
  U   ZODB/trunk/src/ZODB/tests/testblob.py

-=-
Modified: ZODB/trunk/src/CHANGES.txt
===================================================================
--- ZODB/trunk/src/CHANGES.txt	2008-06-21 09:27:56 UTC (rev 87621)
+++ ZODB/trunk/src/CHANGES.txt	2008-06-21 12:17:51 UTC (rev 87622)
@@ -11,7 +11,8 @@
 - Changed layout strategy for the blob directory to a bushy approach (8 levels
   deep, at most ~256 entries per directory level, one directory for each
   blob). Old directories are automatically detected and will be handled with
-  the old strategy.
+  the old strategy. A migration script (`migrateblobs.py`) is provided to
+  convert the different layouts.
 
 - Versions are no-longer supported.
 

Modified: ZODB/trunk/src/ZODB/blob.py
===================================================================
--- ZODB/trunk/src/ZODB/blob.py	2008-06-21 09:27:56 UTC (rev 87621)
+++ ZODB/trunk/src/ZODB/blob.py	2008-06-21 12:17:51 UTC (rev 87622)
@@ -451,26 +451,24 @@
 
 def auto_layout_select(path):
     # A heuristic to look at a path and determine which directory layout to
-    # use. Basically we try to figure out if the directory is either already
-    # used and contains an explicit marker, is unused or used without a
-    # marker.
+    # use.
     layout_marker = os.path.join(path, LAYOUT_MARKER)
     if not os.path.exists(path):
         log('Blob directory %s does not exist. '
             'Selected `bushy` layout. ' % path)
         layout = 'bushy'
     elif len(os.listdir(path)) == 0:
-        log('Blob directory %s is unused and has no layout marker set.'
+        log('Blob directory `%s` is unused and has no layout marker set. '
             'Selected `bushy` layout. ' % path)
         layout = 'bushy'
     elif LAYOUT_MARKER not in os.listdir(path):
-        log('Blob directory %s is used but has no layout marker set.'
+        log('Blob directory `%s` is used but has no layout marker set. '
             'Selected `lawn` layout. ' % path)
         layout = 'lawn'
     else:
         layout = open(layout_marker, 'rb').read()
         layout = layout.strip()
-        log('Blob directory %s has layout marker set.'
+        log('Blob directory `%s` has layout marker set. '
             'Selected `%s` layout. ' % (path, layout))
     return layout
 
@@ -483,7 +481,7 @@
 
     """
 
-    blob_path_pattern = r'^' + (r'0x[0-9]{1,2}/*'*8) + r'$'
+    blob_path_pattern = r'^' + (r'0x[0-9a-f]{1,2}/*'*8) + r'$'
     blob_path_pattern = re.compile(blob_path_pattern)
 
     def oid_to_path(self, oid):
@@ -496,8 +494,7 @@
 
     def path_to_oid(self, path):
         if self.blob_path_pattern.match(path) is None:
-            raise ValueError("Not a valid OID path: %s" % path)
-        # The path always has a leading slash that we need to ignore.
+            raise ValueError("Not a valid OID path: `%s`" % path)
         path = path.split('/')
         # The path contains the OID in little endian form but the OID itself
         # is big endian.
@@ -522,9 +519,13 @@
 
     def path_to_oid(self, path):
         try:
+            if path == '':
+                # This is a special case where repr_to_oid converts '' to the
+                # OID z64.
+                raise TypeError()
             return utils.repr_to_oid(path)
         except TypeError:
-            raise ValueError('Not a valid OID path: %s' % path)
+            raise ValueError('Not a valid OID path: `%s`' % path)
 
 LAYOUTS['lawn'] = LawnLayout()
 

Added: ZODB/trunk/src/ZODB/scripts/migrateblobs.py
===================================================================
--- ZODB/trunk/src/ZODB/scripts/migrateblobs.py	                        (rev 0)
+++ ZODB/trunk/src/ZODB/scripts/migrateblobs.py	2008-06-21 12:17:51 UTC (rev 87622)
@@ -0,0 +1,74 @@
+##############################################################################
+#
+# Copyright (c) 2008 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+#
+##############################################################################
+"""A script to migrate a blob directory into a different layout.
+"""
+
+import logging
+import optparse
+import os
+
+from ZODB.blob import FilesystemHelper, rename_or_copy_blob
+from ZODB.utils import cp, oid_repr
+
+
+def link_or_copy(f1, f2):
+    try:
+        os.link(f1, f2)
+    except OSError:
+        shutil.copy(f1, f2)
+
+
+def migrate(source, dest, layout):
+    source_fsh = FilesystemHelper(source)
+    source_fsh.create()
+    dest_fsh = FilesystemHelper(dest, layout)
+    dest_fsh.create()
+    print "Migrating blob data from `%s` (%s) to `%s` (%s)" % (
+        source, source_fsh.layout_name, dest, dest_fsh.layout_name)
+    for oid, path in source_fsh.listOIDs():
+        dest_path = dest_fsh.getPathForOID(oid, create=True)
+        files = os.listdir(path)
+        for file in files:
+            source_file = os.path.join(path, file)
+            dest_file = os.path.join(dest_path, file)
+            link_or_copy(source_file, dest_file)
+        print "\tOID: %s - %s files " % (oid_repr(oid), len(files))
+
+
+def main(source=None, dest=None, layout="bushy"):
+    usage = "usage: %prog [options] <source> <dest> <layout>"
+    description = ("Create the new directory <dest> and migrate all blob "
+                   "data <source> to <dest> while using the new <layout> for "
+                   "<dest>")
+
+    parser = optparse.OptionParser(usage=usage, description=description)
+    parser.add_option("-l", "--layout",
+                      default=layout, type='choice',
+                      choices=['bushy', 'lawn'],
+                      help="Define the layout to use for the new directory "
+                      "(bushy or lawn). Default: %default")
+    options, args = parser.parse_args()
+
+    if not len(args) == 2:
+        parser.error("source and destination must be given")
+
+    logging.getLogger().addHandler(logging.StreamHandler())
+    logging.getLogger().setLevel(0)
+
+    source, dest = args
+    migrate(source, dest, options.layout)
+
+
+if __name__ == '__main__':
+    main()


Property changes on: ZODB/trunk/src/ZODB/scripts/migrateblobs.py
___________________________________________________________________
Name: svn:eol-style
   + native

Modified: ZODB/trunk/src/ZODB/tests/blob_layout.txt
===================================================================
--- ZODB/trunk/src/ZODB/tests/blob_layout.txt	2008-06-21 09:27:56 UTC (rev 87621)
+++ ZODB/trunk/src/ZODB/tests/blob_layout.txt	2008-06-21 12:17:51 UTC (rev 87622)
@@ -31,12 +31,14 @@
 
 >>> bushy.path_to_oid('0x01/0x00/0x00/0x00/0x00/0x00/0x00/0x00')
 '\x00\x00\x00\x00\x00\x00\x00\x01'
+>>> bushy.path_to_oid('0xff/0x00/0x00/0x00/0x00/0x00/0x00/0x00')
+'\x00\x00\x00\x00\x00\x00\x00\xff'
 
 Paths that do not represent an OID will cause a ValueError:
 
 >>> bushy.path_to_oid('tmp')
 Traceback (most recent call last):
-ValueError: Not a valid OID path: tmp
+ValueError: Not a valid OID path: `tmp`
 
 
 The `lawn` layout
@@ -61,7 +63,10 @@
 
 >>> lawn.path_to_oid('tmp')
 Traceback (most recent call last):
-ValueError: Not a valid OID path: tmp
+ValueError: Not a valid OID path: `tmp`
+>>> lawn.path_to_oid('')
+Traceback (most recent call last):
+ValueError: Not a valid OID path: ``
 
 
 Auto-detecting the layout of a directory
@@ -162,3 +167,117 @@
 
 
 >>> shutil.rmtree(d)
+
+
+Migrating between directory layouts
+===================================
+
+A script called `migrateblobs.py` is distributed with the ZODB for offline
+migration capabilities between different directory layouts. It can migrate any
+blob directory layout to any other layout. It leaves the original blob
+directory untouched (except from eventually creating a temporary directory and
+the storage layout marker).
+
+The migration is accessible as a library function:
+
+>>> from ZODB.scripts.migrateblobs import migrate
+
+Create a `lawn` directory structure and migrate it to the new `bushy` one:
+
+>>> from ZODB.blob import FilesystemHelper
+>>> d = tempfile.mkdtemp()
+>>> old = os.path.join(d, 'old')
+>>> old_fsh = FilesystemHelper(old, 'lawn')
+>>> old_fsh.create()
+>>> blob1 = old_fsh.getPathForOID(7039, create=True)
+>>> blob2 = old_fsh.getPathForOID(10, create=True)
+>>> blob3 = old_fsh.getPathForOID(7034, create=True)
+>>> open(os.path.join(blob1, 'foo'), 'wb').write('foo')
+>>> open(os.path.join(blob1, 'foo2'), 'wb').write('bar')
+>>> open(os.path.join(blob2, 'foo3'), 'wb').write('baz')
+>>> open(os.path.join(blob2, 'foo4'), 'wb').write('qux')
+>>> open(os.path.join(blob3, 'foo5'), 'wb').write('quux')
+>>> open(os.path.join(blob3, 'foo6'), 'wb').write('corge')
+
+Committed blobs have their permissions set to 000
+
+The migration function is called with the old and the new path and the layout
+that shall be used for the new directory:
+
+>>> bushy = os.path.join(d, 'bushy')
+>>> migrate(old, bushy, 'bushy')  # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
+Migrating blob data from `/.../old` (lawn) to `/.../bushy` (bushy)
+    OID: 0x1b7f - 2 files 
+    OID: 0x0a - 2 files 
+    OID: 0x1b7a - 2 files 
+
+The new directory now contains the same files in different directories, but
+with the same sizes and permissions:
+
+>>> import string
+>>> def stat(path):
+...     s = os.stat(path)
+...     print "%s\t%s\t%s" % (string.rjust(oct(s.st_mode), 10), s.st_size, path)
+>>> def ls(path):
+...     for p, dirs, files in os.walk(path):
+...         stat(p)
+...         for file in files:
+...             stat(os.path.join(p, file))
+>>> ls(bushy)
+     040700  4096  /.../bushy
+    0100644  5     /.../bushy/.layout
+     040700  4096  /.../bushy/0x7a
+     040700  4096  /.../bushy/0x7a/0x1b
+     040700  4096  /.../bushy/0x7a/0x1b/0x00
+     040700  4096  /.../bushy/0x7a/0x1b/0x00/0x00
+     040700  4096  /.../bushy/0x7a/0x1b/0x00/0x00/0x00
+     040700  4096  /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00
+     040700  4096  /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00/0x00
+     040700  4096  /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00/0x00/0x00
+    0100644  5     /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00/0x00/0x00/foo6
+    0100644  4     /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00/0x00/0x00/foo5
+     040700  4096  /.../bushy/tmp
+     040700  4096  /.../bushy/0x0a
+     040700  4096  /.../bushy/0x0a/0x00
+     040700  4096  /.../bushy/0x0a/0x00/0x00
+     040700  4096  /.../bushy/0x0a/0x00/0x00/0x00
+     040700  4096  /.../bushy/0x0a/0x00/0x00/0x00/0x00
+     040700  4096  /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00
+     040700  4096  /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00/0x00
+     040700  4096  /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00/0x00/0x00
+    0100644  3     /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00/0x00/0x00/foo4
+    0100644  3     /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00/0x00/0x00/foo3
+     040700  4096  /.../bushy/0x7f
+     040700  4096  /.../bushy/0x7f/0x1b
+     040700  4096  /.../bushy/0x7f/0x1b/0x00
+     040700  4096  /.../bushy/0x7f/0x1b/0x00/0x00
+     040700  4096  /.../bushy/0x7f/0x1b/0x00/0x00/0x00
+     040700  4096  /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00
+     040700  4096  /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00/0x00
+     040700  4096  /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00/0x00/0x00
+    0100644  3     /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00/0x00/0x00/foo
+    0100644  3     /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00/0x00/0x00/foo2
+
+We can also migrate the bushy layout back to the lawn layout:
+
+>>> lawn = os.path.join(d, 'lawn')
+>>> migrate(bushy, lawn, 'lawn')
+Migrating blob data from `/.../bushy` (bushy) to `/.../lawn` (lawn)
+    OID: 0x1b7a - 2 files 
+    OID: 0x0a - 2 files 
+    OID: 0x1b7f - 2 files 
+>>> ls(lawn)
+    040700  4096    /.../lawn
+   0100644  4       /.../lawn/.layout
+    040700  4096    /.../lawn/0x1b7f
+   0100644  3       /.../lawn/0x1b7f/foo
+   0100644  3       /.../lawn/0x1b7f/foo2
+    040700  4096    /.../lawn/tmp
+    040700  4096    /.../lawn/0x0a
+   0100644  3       /.../lawn/0x0a/foo4
+   0100644  3       /.../lawn/0x0a/foo3
+    040700  4096    /.../lawn/0x1b7a
+   0100644  5       /.../lawn/0x1b7a/foo6
+   0100644  4       /.../lawn/0x1b7a/foo5
+
+>>> shutil.rmtree(d)

Modified: ZODB/trunk/src/ZODB/tests/testblob.py
===================================================================
--- ZODB/trunk/src/ZODB/tests/testblob.py	2008-06-21 09:27:56 UTC (rev 87621)
+++ ZODB/trunk/src/ZODB/tests/testblob.py	2008-06-21 12:17:51 UTC (rev 87622)
@@ -502,10 +502,16 @@
     suite.addTest(doctest.DocFileSuite(
         "blob_basic.txt",  "blob_connection.txt", "blob_transaction.txt",
         "blob_packing.txt", "blob_importexport.txt", "blob_consume.txt",
-        "blob_tempdir.txt", "blob_layout.txt",
+        "blob_tempdir.txt",
         setUp=ZODB.tests.util.setUp,
         tearDown=ZODB.tests.util.tearDown,
         ))
+    suite.addTest(doctest.DocFileSuite(
+        "blob_layout.txt",
+        optionflags=doctest.ELLIPSIS|doctest.NORMALIZE_WHITESPACE|doctest.REPORT_NDIFF,
+        setUp=ZODB.tests.util.setUp,
+        tearDown=ZODB.tests.util.tearDown,
+        ))
     suite.addTest(doctest.DocTestSuite(
         setUp=ZODB.tests.util.setUp,
         tearDown=ZODB.tests.util.tearDown,



More information about the Zodb-checkins mailing list