[Checkins] SVN: zc.FileStorage/trunk/ - Added a utility script to make snapshote in time copy of file storages

Jim Fulton jim at zope.com
Thu Aug 25 11:32:23 EDT 2011


Log message for revision 122692:
  - Added a utility script to make snapshote in time copy of file storages
    containing only current records as of a given time.
  

Changed:
  U   zc.FileStorage/trunk/CHANGES.txt
  U   zc.FileStorage/trunk/setup.py
  U   zc.FileStorage/trunk/src/zc/FileStorage/__init__.py
  A   zc.FileStorage/trunk/src/zc/FileStorage/snapshotintime.py
  U   zc.FileStorage/trunk/src/zc/FileStorage/tests.py

-=-
Modified: zc.FileStorage/trunk/CHANGES.txt
===================================================================
--- zc.FileStorage/trunk/CHANGES.txt	2011-08-25 09:33:37 UTC (rev 122691)
+++ zc.FileStorage/trunk/CHANGES.txt	2011-08-25 15:32:22 UTC (rev 122692)
@@ -1,6 +1,9 @@
 after 1.2.0
 ===========
 
+- Added a utility script to make snapshote in time copy of file storages
+  containing only current records as of a given time.
+
 - Added test extra to declare test dependency on ``zope.testing``.
 
 - Using Python's ``doctest`` module instead of depreacted

Modified: zc.FileStorage/trunk/setup.py
===================================================================
--- zc.FileStorage/trunk/setup.py	2011-08-25 09:33:37 UTC (rev 122691)
+++ zc.FileStorage/trunk/setup.py	2011-08-25 15:32:22 UTC (rev 122692)
@@ -2,6 +2,10 @@
 
 from setuptools import setup, find_packages
 from distutils.core import Extension
+entry_points = """
+[console_scripts]
+snapshot-in-time = zc.FileStorage.snapshotintime:main
+"""
 
 setup(
     name = name,
@@ -27,4 +31,5 @@
             ]),
     include_package_data = True,
     zip_safe = False,
+    entry_points = entry_points,
     )

Modified: zc.FileStorage/trunk/src/zc/FileStorage/__init__.py
===================================================================
--- zc.FileStorage/trunk/src/zc/FileStorage/__init__.py	2011-08-25 09:33:37 UTC (rev 122691)
+++ zc.FileStorage/trunk/src/zc/FileStorage/__init__.py	2011-08-25 15:32:22 UTC (rev 122692)
@@ -284,8 +284,9 @@
 
 class PackProcess(FileStoragePacker):
 
-    def __init__(self, path, stop, current_size, blob_dir,
-                 sleep, transform, untransform):
+    def __init__(self, path, stop, current_size,
+                 blob_dir=None, sleep=0, transform=None, untransform=None,
+                 ):
         self._name = path
         # We open our own handle on the storage so that much of pack can
         # proceed in parallel.  It's important to close this file at every
@@ -324,7 +325,7 @@
         self._freecache(pos)
         return FileStoragePacker._read_txn_header(self, pos, tid)
 
-    def pack(self):
+    def pack(self, snapshot_in_time_path=None):
         packed, index, packpos = self.buildPackIndex(self._stop, self.file_end)
         logging.info('initial scan %s objects at %s', len(index), packpos)
         if packed:
@@ -334,9 +335,15 @@
             return
 
         logging.info('copy to pack time')
-        output = open(self._name + ".pack", "w+b")
+        output = open(snapshot_in_time_path or (self._name + ".pack"), "w+b")
         self._freeoutputcache = _freefunc(output)
         index, new_pos = self.copyToPacktime(packpos, index, output)
+        if snapshot_in_time_path:
+            # We just want a snapshot in time, containing current records as
+            # of that time.
+            index.save(packpos, snapshot_in_time_path+'.index')
+            return
+
         if new_pos == packpos:
             # pack didn't free any data.  there's no point in continuing.
             self._file.close()

Added: zc.FileStorage/trunk/src/zc/FileStorage/snapshotintime.py
===================================================================
--- zc.FileStorage/trunk/src/zc/FileStorage/snapshotintime.py	                        (rev 0)
+++ zc.FileStorage/trunk/src/zc/FileStorage/snapshotintime.py	2011-08-25 15:32:22 UTC (rev 122692)
@@ -0,0 +1,82 @@
+##############################################################################
+#
+# Copyright (c) 2005-2011 Zope Foundation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+
+import os
+import re
+import sys
+import zc.FileStorage
+import ZODB.TimeStamp
+
+usage = """Usage: %s [input-path utc-snapshot-time output-path]
+
+Make a point-in time snapshot of a file-storage data file containing
+just the current records as of the given time.  The resulting file can
+be used as a basis of a demo storage.
+
+If the output file isn't given, then a file name will be generated
+based on the input file name and the utc-snapshot-time.
+
+If the utc-snapshot-time is ommitted, then the current time will be used.
+
+Note: blobs (if any) aren't copied.
+
+The UTC time is a string of the form: YYYY-MM-DDTHH:MM:SS.  The time
+conponents are optional.  The time defaults to midnight, UTC.
+"""
+
+
+
+def main(args=None):
+    if args is None:
+        args = sys.argv[1:]
+
+    if len(args) < 2 or len(args) > 3:
+        print >>sys.stderr, usage % sys.argv[0]
+        sys.exit(1)
+
+
+    try:
+        if len(args) > 2:
+            inpath, stop, outpath = args
+        else:
+            inpath, stop = args
+            if inpath.endswith('.fs'):
+                outpath = inpath[:-3]+stop+'.fs'
+            else:
+                outpath = inpath+stop
+    except ValueError:
+        print >>sys.stderr, usage % sys.argv[0]
+        sys.exit(1)
+
+    if not os.path.exists(inpath):
+        print >>sys.stderr, inpath, 'Does not exist.'
+        sys.exit(1)
+
+    try:
+        date, time = (stop.split('T')+[''])[:2]
+        year, month, day = map(int, date.split('-'))
+        if time:
+            hour, minute, second = (map(int, time.split(':'))+[0,0])[:3]
+        else:
+            hour = minute = second = 0
+        stop = repr(
+            ZODB.TimeStamp.TimeStamp(year, month, day, hour, minute, second)
+            )
+    except Exception:
+        print >>sys.stderr, 'Bad date-time:', stop
+        sys.exit(1)
+
+    zc.FileStorage.PackProcess(inpath, stop, os.stat(inpath).st_size
+                               ).pack(snapshot_in_time_path=outpath)
+


Property changes on: zc.FileStorage/trunk/src/zc/FileStorage/snapshotintime.py
___________________________________________________________________
Added: svn:keywords
   + Id
Added: svn:eol-style
   + native

Modified: zc.FileStorage/trunk/src/zc/FileStorage/tests.py
===================================================================
--- zc.FileStorage/trunk/src/zc/FileStorage/tests.py	2011-08-25 09:33:37 UTC (rev 122691)
+++ zc.FileStorage/trunk/src/zc/FileStorage/tests.py	2011-08-25 15:32:22 UTC (rev 122692)
@@ -374,6 +374,155 @@
 
     """
 
+def snapshot_in_time():
+    r"""We can take a snapshot in time
+
+    This is a copy of a database as of a given time and containing
+    only current records as of that time.
+
+    First, we'll hack time:
+
+    >>> import logging
+    >>> exec(time_hack_template)
+
+    Next, we'll create a file storage with some data:
+
+    >>> import ZODB.FileStorage
+
+    >>> conn = ZODB.connection('data.fs')
+    >>> for i in range(5):
+    ...     conn.root()[i] = conn.root().__class__()
+    ...     transaction.commit()
+    >>> for i in range(5):
+    ...     conn.root()[i].x = 0
+    ...     transaction.commit()
+    >>> for j in range(10):
+    ...     for i in range(5):
+    ...         conn.root()[i].x += 1
+    ...         transaction.commit()
+
+    >>> import ZODB.TimeStamp
+    >>> copy_time = ZODB.TimeStamp.TimeStamp(
+    ...    conn.db().storage.lastTransaction())
+
+    >>> for j in range(10):
+    ...     for i in range(5):
+    ...         conn.root()[i].x += 1
+    ...         transaction.commit()
+
+    We'll comput a hash of the old file contents:
+
+    >>> import hashlib
+    >>> hash = hashlib.sha1(open('data.fs').read()).digest()
+
+    OK, we have a database with a bunch of revisions.
+    Now, let's make a snapshot:
+
+    >>> import zc.FileStorage.snapshotintime
+
+    >>> copy_time = '%s-%s-%sT%s:%s:%s' % (
+    ...   copy_time.year(), copy_time.month(), copy_time.day(),
+    ...   copy_time.hour(), copy_time.minute(), int(copy_time.second()))
+    >>> zc.FileStorage.snapshotintime.main(
+    ...    ['data.fs', copy_time, 'snapshot.fs'])
+
+    >>> sorted(os.listdir('.')) # doctest: +NORMALIZE_WHITESPACE
+    ['data.fs', 'data.fs.index', 'data.fs.lock', 'data.fs.tmp',
+    'snapshot.fs', 'snapshot.fs.index']
+
+    The orginal file is unchanged:
+
+    >>> hashlib.sha1(open('data.fs').read()).digest() == hash
+    True
+
+    The new file has just the final records:
+
+    >>> for t in ZODB.FileStorage.FileIterator('snapshot.fs'):
+    ...     print ZODB.TimeStamp.TimeStamp(t.tid)
+    ...     for record in t:
+    ...         print `record.oid`
+    2010-03-09 20:28:05.000000
+    '\x00\x00\x00\x00\x00\x00\x00\x00'
+    2010-03-09 20:28:56.000000
+    '\x00\x00\x00\x00\x00\x00\x00\x01'
+    2010-03-09 20:28:57.000000
+    '\x00\x00\x00\x00\x00\x00\x00\x02'
+    2010-03-09 20:28:58.000000
+    '\x00\x00\x00\x00\x00\x00\x00\x03'
+    2010-03-09 20:28:59.000000
+    '\x00\x00\x00\x00\x00\x00\x00\x04'
+    2010-03-09 20:29:00.000000
+    '\x00\x00\x00\x00\x00\x00\x00\x05'
+
+    Of course, we can open the copy:
+
+    >>> conn.close()
+    >>> conn = ZODB.connection('snapshot.fs')
+    >>> sorted(conn.root().iterkeys()) == range(5)
+    True
+
+    >>> for i in range(5):
+    ...     if conn.root()[i].x != 10:
+    ...         print 'oops', conn.root()[i].x
+
+    >>> time.time, time.sleep = time_time, time_sleep
+
+    We get usage if the wrong number or form of arguments are given:
+
+    >>> import sys
+    >>> stderr = sys.stderr
+    >>> sys.stderr = sys.stdout
+    >>> argv0 = sys.argv[0]
+    >>> sys.argv[0] = 'snapshot-in-time'
+    >>> try: zc.FileStorage.snapshotintime.main([])
+    ... except SystemExit, v: pass
+    ... else: print 'oops'
+    Usage: snapshot-in-time [input-path utc-snapshot-time output-path]
+    <BLANKLINE>
+    Make a point-in time snapshot of a file-storage data file containing
+    just the current records as of the given time.  The resulting file can
+    be used as a basis of a demo storage.
+    <BLANKLINE>
+    If the output file isn't given, then a file name will be generated
+    based on the input file name and the utc-snapshot-time.
+    <BLANKLINE>
+    If the utc-snapshot-time is ommitted, then the current time will be used.
+    <BLANKLINE>
+    Note: blobs (if any) aren't copied.
+    <BLANKLINE>
+    The UTC time is a string of the form: YYYY-MM-DDTHH:MM:SS.  The time
+    conponents are optional.  The time defaults to midnight, UTC.
+    <BLANKLINE>
+
+    >>> sys.argv[0] = argv0
+
+    >>> try: zc.FileStorage.snapshotintime.main(['xxx', 'xxx', 'xxx'])
+    ... except SystemExit, v: pass
+    ... else: print 'oops'
+    xxx Does not exist.
+
+    >>> try: zc.FileStorage.snapshotintime.main(['data.fs', 'xxx', 'xxx'])
+    ... except SystemExit, v: pass
+    ... else: print 'oops'
+    Bad date-time: xxx
+
+    >>> sys.stderr = stderr
+
+    If you omit the output file, a file name will be generated based on the
+    time:
+
+    >>> zc.FileStorage.snapshotintime.main(['data.fs', copy_time])
+
+    >>> sorted(os.listdir('.')) # doctest: +NORMALIZE_WHITESPACE
+    ['data.fs', 'data.fs.index', 'data.fs.lock', 'data.fs.tmp',
+     'data2010-3-9T20:29:0.fs', 'data2010-3-9T20:29:0.fs.index',
+     'snapshot.fs', 'snapshot.fs.index', 'snapshot.fs.lock', 'snapshot.fs.tmp']
+
+    >>> open('data2010-3-9T20:29:0.fs').read() == open('snapshot.fs').read()
+    True
+
+    """
+
 def hexer(data):
     return (data[:2] == '.h') and data or ('.h'+data.encode('hex'))
 def unhexer(data):



More information about the checkins mailing list