[Checkins] SVN: zc.zodbdgc/trunk/ - Added an option to ignore references to some databases.

Jim Fulton jim at zope.com
Mon Jun 15 14:15:51 EDT 2009


Log message for revision 101023:
  - Added an option to ignore references to some databases.
  

Changed:
  U   zc.zodbdgc/trunk/README.txt
  U   zc.zodbdgc/trunk/src/zc/zodbdgc/README.test
  U   zc.zodbdgc/trunk/src/zc/zodbdgc/README.txt
  U   zc.zodbdgc/trunk/src/zc/zodbdgc/__init__.py

-=-
Modified: zc.zodbdgc/trunk/README.txt
===================================================================
--- zc.zodbdgc/trunk/README.txt	2009-06-15 18:10:52 UTC (rev 101022)
+++ zc.zodbdgc/trunk/README.txt	2009-06-15 18:15:51 UTC (rev 101023)
@@ -1,11 +1 @@
-***********************
-Title Here
-***********************
-
-Changes
-*******
-
-0.1 (yyyy-mm-dd)
-================
-
-Initial release
+See src/zc/zodbdgc/README.txt

Modified: zc.zodbdgc/trunk/src/zc/zodbdgc/README.test
===================================================================
--- zc.zodbdgc/trunk/src/zc/zodbdgc/README.test	2009-06-15 18:10:52 UTC (rev 101022)
+++ zc.zodbdgc/trunk/src/zc/zodbdgc/README.test	2009-06-15 18:15:51 UTC (rev 101023)
@@ -291,6 +291,8 @@
       -h, --help            show this help message and exit
       -d DAYS, --days=DAYS  Number of trailing days (defaults to 1) to treat as
                             non-garbage
+      -i IGNORE, --ignore-database=IGNORE
+                            Ignore references to the given database name.
       -l LEVEL, --log-level=LEVEL
                             The logging level. The default is WARNING.
 
@@ -511,6 +513,70 @@
     !!! db2 2 ?
     POSKeyError: 0x02
 
+
+Ignoring databases
+------------------
+
+Sometimes, when doing garbage collection, you want to ignore some
+databases.
+
+    >>> db = ZODB.config.databaseFromString("""
+    ... <zodb db1>
+    ...     <filestorage>
+    ...         path one.fs
+    ...         pack-gc false
+    ...     </filestorage>
+    ... </zodb>
+    ... <zodb db2>
+    ...     <filestorage>
+    ...         path two.fs
+    ...         pack-gc false
+    ...     </filestorage>
+    ... </zodb>
+    ... """)
+
+    >>> conn = db.open()
+    >>> conn.get_connection('db2').root.x = C()
+    >>> transaction.commit()
+    >>> conn.root.x = C()
+    >>> conn.root.x.x = conn.get_connection('db2').root.x
+    >>> transaction.commit()
+    >>> conn.root.a = C()
+    >>> transaction.commit()
+    >>> conn.root.b = C()
+    >>> conn.root.a.b = conn.root.b
+    >>> conn.root.b.a = conn.root.a
+    >>> transaction.commit()
+    >>> del conn.root.a
+    >>> del conn.root.b
+    >>> transaction.commit()
+
+    >>> now += 2*86400
+
+    >>> db.pack()
+
+    >>> _ = [db.close() for db in db.databases.itervalues()]
+
+    >>> open('config', 'w').write("""
+    ... <zodb db1>
+    ...     <filestorage>
+    ...         path one.fs
+    ...     </filestorage>
+    ... </zodb>
+    ... """)
+
+    >>> sorted(zc.zodbdgc.gc_command(['config']).iterator())
+    Traceback (most recent call last):
+    ...
+    KeyError: 'db2'
+
+    >>> sorted(zc.zodbdgc.gc_command(['-idb2', 'config']).iterator())
+    ... # doctest: +NORMALIZE_WHITESPACE
+    Removed 2 objects from db1
+    [('db1', '\x00\x00\x00\x00\x00\x00\x00\x02'),
+     ('db1', '\x00\x00\x00\x00\x00\x00\x00\x03')]
+
+
 .. cleanup
 
     >>> logging.getLogger().setLevel(old_level)

Modified: zc.zodbdgc/trunk/src/zc/zodbdgc/README.txt
===================================================================
--- zc.zodbdgc/trunk/src/zc/zodbdgc/README.txt	2009-06-15 18:10:52 UTC (rev 101022)
+++ zc.zodbdgc/trunk/src/zc/zodbdgc/README.txt	2009-06-15 18:15:51 UTC (rev 101023)
@@ -61,3 +61,18 @@
 
 You can run the script with the ``--help`` option to get usage
 information.
+
+Change History
+==============
+
+0.2.0 2009-06-15
+----------------
+
+- Added an option to ignore references to some databases.
+
+- Fixed a bug in handling of the logging level option.
+
+0.1.0 2009-06-11
+----------------
+
+Initial release

Modified: zc.zodbdgc/trunk/src/zc/zodbdgc/__init__.py
===================================================================
--- zc.zodbdgc/trunk/src/zc/zodbdgc/__init__.py	2009-06-15 18:10:52 UTC (rev 101022)
+++ zc.zodbdgc/trunk/src/zc/zodbdgc/__init__.py	2009-06-15 18:15:51 UTC (rev 101023)
@@ -44,13 +44,24 @@
 
 logger = logging.getLogger(__name__)
 
-def gc(conf, days=1, conf2=None, batch_size=10000):
+def gc(conf, days=1, ignore=(), conf2=None, batch_size=10000):
+    close = []
+    try:
+        return gc_(close, conf, days, ignore, conf2, batch_size)
+    finally:
+        for db in close:
+            for db in db.databases.itervalues():
+                db.close()
+
+def gc_(close, conf, days, ignore, conf2, batch_size):
     db1 = ZODB.config.databaseFromFile(open(conf))
+    close.append(db1)
     if conf2 is None:
         db2 = db1
     else:
         logger.info("Using secondary configuration, %s, for analysis", conf2)
         db2 = ZODB.config.databaseFromFile(open(conf2))
+        close.append(db1)
         if set(db1.databases) != set(db2.databases):
             raise ValueError("primary and secondary databases don't match.")
 
@@ -71,7 +82,7 @@
         # Make sure we can get the roots
         data, s = storage.load(z64, '')
         good.insert(name, z64)
-        for ref in getrefs(data, name):
+        for ref in getrefs(data, name, ignore):
             good.insert(*ref)
 
         if days:
@@ -87,7 +98,7 @@
                         good.insert(name, oid)
 
                         # and anything they reference
-                        for ref in getrefs(data, name):
+                        for ref in getrefs(data, name, ignore):
                             if not deleted.has(*ref):
                                 good.insert(*ref)
                     else:
@@ -105,11 +116,11 @@
                     if deleted.has(name, oid):
                         continue
                     if good.has(name, oid):
-                        for ref in getrefs(data, name):
+                        for ref in getrefs(data, name, ignore):
                             if deleted.has(*ref):
                                 continue
                             if good.insert(*ref) and bad.has(*ref):
-                                bad_to_good(storages, bad, good, *ref)
+                                bad_to_good(storages, ignore, bad, good, *ref)
                     else:
                         bad.insert(name, oid)
                 else:
@@ -123,6 +134,7 @@
     if conf2 is not None:
         for db in db2.databases.itervalues():
             db.close()
+        close.pop()
 
     # Now, we have the garbage in bad.  Remove it.
     for name, db in db1.databases.iteritems():
@@ -149,14 +161,13 @@
         else:
             storage.tpc_abort(t)
             t.abort()
-        db.close()
 
     return bad
 
 def bad_path(baddir, name, oid):
     return os.path.join(baddir, name, base64.urlsafe_b64encode(oid))
 
-def bad_to_good(storages, bad, good, name, oid):
+def bad_to_good(storages, ignore, bad, good, name, oid):
 
     to_do = [(name, oid)]
     while to_do:
@@ -166,25 +177,26 @@
 
         for h in storage.history(oid, size=1<<99):
             data = storage.loadSerial(oid, h['tid'])
-            for ref in getrefs(data, name):
+            for ref in getrefs(data, name, ignore):
                 if good.insert(*ref) and bad.has(*ref):
                     to_do.append(ref)
 
-def getrefs(p, rname):
+def getrefs(p, rname, ignore):
     refs = []
     u = cPickle.Unpickler(cStringIO.StringIO(p))
     u.persistent_load = refs
     u.noload()
     u.noload()
     for ref in refs:
-        name = rname
         if isinstance(ref, tuple):
             yield rname, ref[0]
         elif isinstance(ref, str):
             yield rname, ref
         else:
             assert isinstance(ref, list)
-            yield ref[1][:2]
+            ref = ref[1]
+            if ref[0] not in ignore:
+                yield ref[:2]
 
 class oidset(dict):
 
@@ -258,6 +270,9 @@
         '-d', '--days', dest='days', type='int', default=1,
         help='Number of trailing days (defaults to 1) to treat as non-garbage')
     parser.add_option(
+        '-i', '--ignore-database', dest='ignore', action='append',
+        help='Ignore references to the given database name.')
+    parser.add_option(
         '-l', '--log-level', dest='level',
         help='The logging level. The default is WARNING.')
 
@@ -276,7 +291,7 @@
             level = getattr(logging, level)
         logging.basicConfig(level=level)
 
-    return gc(args[0], options.days, *args[1:])
+    return gc(args[0], options.days, options.ignore or (), *args[1:])
 
 
 
@@ -377,7 +392,7 @@
             print "%s: %s" % (t.__name__, v)
             continue
 
-        for ref in getrefs(p, name):
+        for ref in getrefs(p, name, ()):
             if (ref[0] != name) and not databases[name].xrefs:
                 print 'bad xref', ref[0], u64(ref[1]), name, u64(oid)
 



More information about the Checkins mailing list