[Checkins] SVN: zc.zodbdgc/trunk/src/zc/zodbdgc/ Added support for storages with transformed (e.g. compressed) data

Jim Fulton jim at zope.com
Thu May 27 15:06:30 EDT 2010


Log message for revision 112785:
  Added support for storages with transformed (e.g. compressed) data
  records.
  

Changed:
  U   zc.zodbdgc/trunk/src/zc/zodbdgc/README.test
  U   zc.zodbdgc/trunk/src/zc/zodbdgc/README.txt
  U   zc.zodbdgc/trunk/src/zc/zodbdgc/__init__.py
  U   zc.zodbdgc/trunk/src/zc/zodbdgc/tests.py

-=-
Modified: zc.zodbdgc/trunk/src/zc/zodbdgc/README.test
===================================================================
--- zc.zodbdgc/trunk/src/zc/zodbdgc/README.test	2010-05-27 19:06:27 UTC (rev 112784)
+++ zc.zodbdgc/trunk/src/zc/zodbdgc/README.test	2010-05-27 19:06:29 UTC (rev 112785)
@@ -183,7 +183,6 @@
 
 Save databases for later:
 
-    >>> import shutil
     >>> for n in range(1, 4):
     ...     shutil.copyfile('%s.fs' % n, '%s.fs-2' %n)
     >>> shutil.copytree('1.blobs', '1.blobs-2')
@@ -248,8 +247,10 @@
 We can use separate databases for the analysis and update.
 First restore the databases.
 
+    >>> import os
     >>> for n in range(1, 4):
-    ...     shutil.copyfile('%s.fs-2' % n, '%s.fs' %n)
+    ...     shutil.copyfile('%s.fs-2' % n, '%s.fs' % n)
+    ...     os.remove('%s.fs.index' % n)
 
 Make a secondary config:
 
@@ -281,28 +282,33 @@
     >>> logging.getLogger().setLevel(logging.INFO)
     >>> logging.getLogger().addHandler(handler)
 
+    >>> import os
+    >>> old_columns = os.environ.get('COLUMNS')
+    >>> os.environ['COLUMNS'] = '70'
     >>> old_prog = sys.argv[0]
-    >>> sys.argv[0] = 'test'
+    >>> sys.argv[0] = 'multi-zodb-gc'
     >>> try: zc.zodbdgc.gc_command([])
     ... except SystemExit: pass
-    Usage: test [options] config1 [config2]
+    Usage: multi-zodb-gc [Options] config1 [config2]
     <BLANKLINE>
     Options:
       -h, --help            show this help message and exit
-      -d DAYS, --days=DAYS  Number of trailing days (defaults to 1) to treat as
-                            non-garbage
+      -d DAYS, --days=DAYS  Number of trailing days (defaults to 1) to
+                            treat as non-garbage
       -f FS, --file-storage=FS
-                            name=path, use the given file storage path for
-                            analysis of the.named database
+                            name=path, use the given file storage path
+                            for analysis of the.named database
       -i IGNORE, --ignore-database=IGNORE
-                            Ignore references to the given database name.
+                            Ignore references to the given database
+                            name.
       -l LEVEL, --log-level=LEVEL
                             The logging level. The default is WARNING.
+      -u UNTRANSFORM, --untransform=UNTRANSFORM
+                            Funciion (module:expr) used to untransform
+                            data records in files identified using the
+                            -file-storage/-f option
 
     >>> bad2 = zc.zodbdgc.gc_command(['-d2', 'config', 'config2'])
-    Ignoring index for 1.fs
-    Ignoring index for 2.fs
-    Ignoring index for 3.fs
     Using secondary configuration, config2, for analysis
     db1: roots
     db1: recent
@@ -416,14 +422,15 @@
     >>> logging.basicConfig = old_basicConfig
     >>> sys.argv[:] = old_argv
 
->>> try: zc.zodbdgc.check_command([])
-... except SystemExit: pass
-Usage: test [options] config
-<BLANKLINE>
-Options:
-  -h, --help            show this help message and exit
-  -r REFDB, --references-filestorage=REFDB
-                        The name of a file-storage to save reference info in.
+    >>> try: zc.zodbdgc.check_command([])
+    ... except SystemExit: pass
+    Usage: multi-zodb-gc [Options] config
+    <BLANKLINE>
+    Options:
+      -h, --help            show this help message and exit
+      -r REFDB, --references-filestorage=REFDB
+                            The name of a file-storage to save reference
+                            info in.
 
     >>> zc.zodbdgc.check_command(['config'])
 
@@ -431,12 +438,10 @@
 
     >>> for n in range(1, 4):
     ...     shutil.copyfile('%s.fs' % n, '%s.fs-2' %n)
+    ...     os.remove('%s.fs-2.index' % n)
 
     >>> sorted(zc.zodbdgc.gc_command(['config', 'config2']).iterator())
     Using secondary configuration, config2, for analysis
-    Ignoring index for 1.fs-2
-    Ignoring index for 2.fs-2
-    Ignoring index for 3.fs-2
     db1: roots
     db1: recent
     db2: roots
@@ -479,10 +484,8 @@
 
     >>> for n in range(1, 4):
     ...     shutil.copyfile('%s.fs-save' % n, '%s.fs-2' %n)
+    ...     os.remove('%s.fs-2.index' % n)
     >>> db = ZODB.config.databaseFromFile(open('config2'))
-    Ignoring index for 1.fs-2
-    Ignoring index for 2.fs-2
-    Ignoring index for 3.fs-2
 
     >>> for d in db.databases.values():
     ...     d.pack()
@@ -644,6 +647,8 @@
 
     >>> os.remove('one.fs')
     >>> os.remove('two.fs')
+    >>> os.remove('one.fs.index')
+    >>> os.remove('two.fs.index')
 
 Using file-storage iterators directly
 -------------------------------------
@@ -667,9 +672,6 @@
     ... </zodb>
     ... """)
     >>> db = ZODB.config.databaseFromFile(open('config'))
-    Ignoring index for one.fs
-    Ignoring index for two.fs
-
     >>> conn = db.open()
     >>> conn.get_connection('db2').root.x = C()
     >>> transaction.commit()
@@ -716,3 +718,8 @@
     >>> logging.getLogger().removeHandler(handler)
     >>> time.time = time_time
     >>> sys.argv[0] = old_prog
+
+    >>> if old_columns is None:
+    ...     del os.environ['COLUMNS']
+    ... else:
+    ...     os.environ['COLUMNS'] = old_columns

Modified: zc.zodbdgc/trunk/src/zc/zodbdgc/README.txt
===================================================================
--- zc.zodbdgc/trunk/src/zc/zodbdgc/README.txt	2010-05-27 19:06:27 UTC (rev 112784)
+++ zc.zodbdgc/trunk/src/zc/zodbdgc/README.txt	2010-05-27 19:06:29 UTC (rev 112785)
@@ -72,10 +72,16 @@
 Change History
 ==============
 
+0.6.0 2010-05-27
+----------------
+
+- Added support for storages with transformed (e.g. compressed) data
+  records.
+
 0.5.0 2009-11-10
 ----------------
 
-- Fixed a bug in the delet throttle that made it delete objects way
+- Fixed a bug in the delay throttle that made it delete objects way
   too slowly.
 
 0.4.0 2009-09-08

Modified: zc.zodbdgc/trunk/src/zc/zodbdgc/__init__.py
===================================================================
--- zc.zodbdgc/trunk/src/zc/zodbdgc/__init__.py	2010-05-27 19:06:27 UTC (rev 112784)
+++ zc.zodbdgc/trunk/src/zc/zodbdgc/__init__.py	2010-05-27 19:06:29 UTC (rev 112785)
@@ -44,7 +44,7 @@
 logger = logging.getLogger(__name__)
 log_format = "%(asctime)s %(name)s %(levelname)s: %(message)s"
 
-def gc_command(args=None):
+def gc_command(args=None, ptid=None):
     if args is None:
         args = sys.argv[1:]
         level = logging.WARNING
@@ -65,6 +65,10 @@
     parser.add_option(
         '-l', '--log-level', dest='level',
         help='The logging level. The default is WARNING.')
+    parser.add_option(
+        '-u', '--untransform', dest='untransform',
+        help='Funciion (module:expr) used to untransform data records in'
+        ' files identified using the -file-storage/-f option')
 
     options, args = parser.parse_args(args)
 
@@ -85,20 +89,38 @@
             level = getattr(logging, level)
         logging.basicConfig(level=level, format=log_format)
 
+    untransform = options.untransform
+    if untransform is not None:
+        mod, expr = untransform.split(':', 1)
+        untransform = eval(expr, __import__(mod, {}, {}, ['*']).__dict__)
+
     return gc(args[0], options.days, options.ignore or (), conf2=conf2,
-              fs=dict(o.split('=') for o in options.fs or ()))
+              fs=dict(o.split('=') for o in options.fs or ()),
+              untransform=untransform, ptid=ptid)
 
 
-def gc(conf, days=1, ignore=(), conf2=None, fs=()):
+def gc(conf, days=1, ignore=(), conf2=None, fs=(), untransform=None, ptid=None):
     close = []
     try:
-        return gc_(close, conf, days, ignore, conf2, fs)
+        return gc_(close, conf, days, ignore, conf2, fs, untransform, ptid)
     finally:
         for db in close:
             for db in db.databases.itervalues():
                 db.close()
 
-def gc_(close, conf, days, ignore, conf2, fs):
+def gc_(close, conf, days, ignore, conf2, fs, untransform, ptid):
+
+    FileIterator = ZODB.FileStorage.FileIterator
+    if untransform is not None:
+        def FileIterator(*args):
+            def transit(trans):
+                for record in trans:
+                    if record.data:
+                        record.data = untransform(record.data)
+                    yield record
+            for t in ZODB.FileStorage.FileIterator(*args):
+                yield transit(t)
+
     db1 = ZODB.config.databaseFromFile(open(conf))
     close.append(db1)
     if conf2 is None:
@@ -113,16 +135,19 @@
     databases = db2.databases
     storages = sorted((name, d.storage) for (name, d) in databases.items())
 
-    ptid = repr(
-        ZODB.TimeStamp.TimeStamp(*time.gmtime(time.time() - 86400*days)[:6])
-        )
+    if ptid is None:
+        ptid = repr(
+            ZODB.TimeStamp.TimeStamp(
+                *time.gmtime(time.time() - 86400*days)[:6]
+                ))
 
     good = oidset(databases)
     bad = Bad(databases)
     deleted = oidset(databases)
 
     for name, storage in storages:
-        logger.info("%s: roots", name)
+        fsname = name or ''
+        logger.info("%s: roots", fsname)
         # Make sure we can get the roots
         data, s = storage.load(z64, '')
         good.insert(name, z64)
@@ -134,8 +159,8 @@
             # All non-deleted new records are good
             logger.info("%s: recent", name)
 
-            if name in fs:
-                it = ZODB.FileStorage.FileIterator(fs[name], ptid)
+            if fsname in fs:
+                it = FileIterator(fs[fsname], ptid)
             else:
                 it = storage.iterator(ptid)
 
@@ -164,8 +189,8 @@
                             good.remove(name, oid)
 
         # Now iterate over older records
-        if name in fs:
-            it = ZODB.FileStorage.FileIterator(fs[name], None, ptid)
+        if fsname in fs:
+            it = FileIterator(fs[fsname], None, ptid)
         else:
             it = storage.iterator(None, ptid)
 

Modified: zc.zodbdgc/trunk/src/zc/zodbdgc/tests.py
===================================================================
--- zc.zodbdgc/trunk/src/zc/zodbdgc/tests.py	2010-05-27 19:06:27 UTC (rev 112784)
+++ zc.zodbdgc/trunk/src/zc/zodbdgc/tests.py	2010-05-27 19:06:29 UTC (rev 112785)
@@ -15,12 +15,106 @@
 
 $Id$
 """
-from zope.testing import doctest, setupstack, renormalizing
+from zope.testing import setupstack, renormalizing
+import binascii
+import doctest
 import re
+import time
 import unittest
 
+def untransform(data):
+    if data[:2] == '.h':
+        data = binascii.a2b_hex(data[2:])
+    return data
+
+def hex_pack(self, pack_time, referencesf, *args):
+    def refs(p, oids=None):
+        if p and p[:2] == '.h':
+            p = binascii.a2b_hex(p[2:])
+        return referencesf(p, oids)
+    return self.base.pack(pack_time, refs, *args)
+
+def test_untransform():
+    r"""
+If a file storage is transformed, you can use the --untransform/-u
+option with the --file-storage/-f option to specify a function to
+untransform data records when accessing the file-storage file directly.
+
+.. XXX whimper hexstorage's pack is broken.
+
+    >>> import ZODB.tests.hexstorage
+    >>> ZODB.tests.hexstorage.HexStorage.pack = hex_pack
+
+First, open a database and create some data:
+
+    >>> open('config', 'w').write('''
+    ... %import ZODB.tests
+    ... <zodb>
+    ...   <hexstorage>
+    ...     <filestorage>
+    ...       path data.fs
+    ...       pack-gc false
+    ...     </filestorage>
+    ...   </hexstorage>
+    ... </zodb>
+    ... ''')
+    >>> import ZODB.config
+    >>> db = ZODB.config.databaseFromFile(open('config'))
+    >>> conn = db.open()
+    >>> for i in range(9):
+    ...     conn.root()[i] = conn.root().__class__()
+    ...     conn.transaction_manager.commit()
+
+Now, we'll try t make some garbage:
+
+    >>> for i in range(3):
+    ...     del conn.root()[i]
+    ...     conn.transaction_manager.commit()
+
+The records we just deleted aren't garbage yet, because there are
+revisions pointing to them.
+
+    >>> db.pack()
+
+Now there aren't. :)
+
+    >>> for i in range(3, 6):
+    ...     del conn.root()[i]
+    ...     conn.transaction_manager.commit()
+
+Save the current tid so we can give it to gc:
+
+    >>> ptid = conn.root()._p_serial
+
+Delete some more:
+
+    >>> for i in range(6, 9):
+    ...     del conn.root()[i]
+    ...     conn.transaction_manager.commit()
+
+    >>> len(db.storage)
+    10
+    >>> db.close()
+
+Now GC. We should lose 3 objects:
+
+    >>> import zc.zodbdgc, pprint
+    >>> pprint.pprint(list(zc.zodbdgc.gc_command(
+    ...   '-f=data.fs -uzc.zodbdgc.tests:untransform config'
+    ...   .split(), ptid).iterator()))
+    [('', '\x00\x00\x00\x00\x00\x00\x00\x01'),
+     ('', '\x00\x00\x00\x00\x00\x00\x00\x02'),
+     ('', '\x00\x00\x00\x00\x00\x00\x00\x03')]
+
+    >>> db = ZODB.config.databaseFromFile(open('config'))
+    >>> db.pack()
+    >>> len(db.storage)
+    7
+    >>> db.close()
+    """
+
 def test_suite():
-    return unittest.TestSuite((
+    suite = unittest.TestSuite((
         doctest.DocFileSuite(
             'README.test', 'oidset.test',
             setUp=setupstack.setUpDirectory, tearDown = setupstack.tearDown,
@@ -30,4 +124,13 @@
                 ]),
             ),
         ))
+    try:
+        import ZODB.tests.hexstorage
+    except ImportError:
+        pass
+    else:
+        suite.addTest(doctest.DocTestSuite(
+            setUp=setupstack.setUpDirectory, tearDown = setupstack.tearDown,
+            ))
+    return suite
 



More information about the checkins mailing list