[Checkins] SVN: zc.FileStorage/trunk/ - Now (ZODB 3.10) that the filestorage packer option can take

Jim Fulton jim at zope.com
Fri May 21 15:23:32 EDT 2010


Log message for revision 112638:
  - Now (ZODB 3.10) that the filestorage packer option can take
    expressions, expose a packer factory, ``Packer``.
  
  - In addition to a sleep argument, to control resting between
    transactions, provide transform and untransform options to provide
    record-transformation functions.  The primary use case for this is
    to use packing to compress storages not previously compressed.
  

Changed:
  U   zc.FileStorage/trunk/CHANGES.txt
  U   zc.FileStorage/trunk/src/zc/FileStorage/__init__.py
  U   zc.FileStorage/trunk/src/zc/FileStorage/tests.py

-=-
Modified: zc.FileStorage/trunk/CHANGES.txt
===================================================================
--- zc.FileStorage/trunk/CHANGES.txt	2010-05-21 18:48:48 UTC (rev 112637)
+++ zc.FileStorage/trunk/CHANGES.txt	2010-05-21 19:23:32 UTC (rev 112638)
@@ -1,4 +1,15 @@
 
+1.2.0 (1010-05-21)
+==================
+
+- Now (ZODB 3.10) that the filestorage packer option can take
+  expressions, expose a packer factory, ``Packer``.
+
+- In addition to a sleep argument, to control resting between
+  transactions, provide transform and untransform options to provide
+  record-transformation functions.  The primary use case for this is
+  to use packing to compress storages not previously compressed.
+
 1.1.0 (1010-03-10)
 ==================
 

Modified: zc.FileStorage/trunk/src/zc/FileStorage/__init__.py
===================================================================
--- zc.FileStorage/trunk/src/zc/FileStorage/__init__.py	2010-05-21 18:48:48 UTC (rev 112637)
+++ zc.FileStorage/trunk/src/zc/FileStorage/__init__.py	2010-05-21 19:23:32 UTC (rev 112638)
@@ -30,23 +30,27 @@
 
 GIG = 1<<30
 
-def meta_packer(sleep):
+def Packer(sleep=0, transform=None, untransform=None):
     def packer(storage, referencesf, stop, gc):
-        return FileStoragePacker(storage, stop, sleep).pack()
+        return FileStoragePacker(storage, stop, sleep, transform, untransform
+                                 ).pack()
     return packer
 
-packer = meta_packer(0)
-packer1 = meta_packer(1)
-packer2 = meta_packer(2)
-packer4 = meta_packer(3)
-packer8 = meta_packer(4)
+packer  = Packer(0)
+packer1 = Packer(1)
+packer2 = Packer(2)
+packer4 = Packer(3)
+packer8 = Packer(4)
 
 class FileStoragePacker(FileStorageFormatter):
 
-    def __init__(self, storage, stop, sleep=0):
+    def __init__(self, storage, stop,
+                 sleep=0, transform=None, untransform=None):
         self.storage = storage
         self._name = path = storage._file.name
         self.sleep = sleep
+        self.transform_option = transform
+        self.untransform_option = untransform
 
         # We open our own handle on the storage so that much of pack can
         # proceed in parallel.  It's important to close this file at every
@@ -84,6 +88,8 @@
             syspath = sys.path,
             blob_dir = self.storage.blob_dir,
             sleep = self.sleep,
+            transform = self.transform_option,
+            untransform = self.untransform_option,
             ))
         for name in 'error', 'log':
             name = self._name+'.pack'+name
@@ -163,6 +169,7 @@
         finally:
             self._file.close()
 
+    transform = None
     def _copyNewTrans(self, input_pos, output, index,
                       acquire=None, release=None):
         tindex = {}
@@ -171,6 +178,7 @@
         if release is not None:
             release()
 
+        transform = self.transform
         start_time = time.time()
         output_tpos = output.tell()
         copier.setTxnPos(output_tpos)
@@ -190,6 +198,8 @@
                 if h.back:
                     prev_txn = self.getTxnFromData(h.oid, h.back)
 
+            if data and (transform is not None):
+                data = transform(data)
             copier.copy(h.oid, h.tid, data, prev_txn,
                         output_tpos, output.tell())
 
@@ -258,7 +268,8 @@
 
 try:
     packer = zc.FileStorage.PackProcess(%(path)r, %(stop)r, %(size)r,
-                                        %(blob_dir)r, %(sleep)s)
+                                        %(blob_dir)r, %(sleep)s,
+                                        %(transform)r, %(untransform)r)
     packer.pack()
 except Exception, v:
     logging.exception('packing')
@@ -273,7 +284,8 @@
 
 class PackProcess(FileStoragePacker):
 
-    def __init__(self, path, stop, current_size, blob_dir, sleep):
+    def __init__(self, path, stop, current_size, blob_dir,
+                 sleep, transform, untransform):
         self._name = path
         # We open our own handle on the storage so that much of pack can
         # proceed in parallel.  It's important to close this file at every
@@ -297,6 +309,12 @@
 
         self._freecache = _freefunc(self._file)
         self.sleep = sleep
+        if isinstance(transform, str):
+            transform = getglobal(transform)
+        self.transform = transform
+        if isinstance(untransform, str):
+            untransform = getglobal(untransform)
+        self.untransform = untransform
         logging.info('packing to %s, sleep %s',
                      ZODB.TimeStamp.TimeStamp(self._stop),
                      self.sleep)
@@ -393,7 +411,14 @@
         output.write(self._file.read(self._metadata_size))
         new_index = ZODB.fsIndex.fsIndex()
         pack_blobs = self.pack_blobs
-        is_blob_record = ZODB.blob.is_blob_record
+        transform = self.transform
+        untransform = self.untransform
+        if untransform is None:
+            is_blob_record = ZODB.blob.is_blob_record
+        else:
+            _is_blob_record = ZODB.blob.is_blob_record
+            def is_blob_record(data):
+                return _is_blob_record(untransform(data))
 
         log_pos = pos
 
@@ -456,6 +481,9 @@
                     # to write the data in the new record.
                     data = self.fetchBackpointer(h.oid, h.back) or ''
 
+                if transform is not None:
+                    data = self.transform(data)
+
                 h.prev = 0
                 h.back = 0
                 h.plen = len(data)
@@ -521,7 +549,11 @@
             time.sleep((time.time()-start_time)*self.sleep)
         return pos
 
+def getglobal(s):
+    module, expr = s.split(':', 1)
+    return eval(expr, __import__(module, {}, {}, ['*']).__dict__)
 
+
 def _freefunc(f):
     # Return an posix_fadvise-based cache freeer.
 

Modified: zc.FileStorage/trunk/src/zc/FileStorage/tests.py
===================================================================
--- zc.FileStorage/trunk/src/zc/FileStorage/tests.py	2010-05-21 18:48:48 UTC (rev 112637)
+++ zc.FileStorage/trunk/src/zc/FileStorage/tests.py	2010-05-21 19:23:32 UTC (rev 112638)
@@ -294,6 +294,91 @@
 
     """
 
+def data_transform_and_untransform_hooks():
+    r"""The Packer factory takes uptions to transform and untransform data
+
+This is helpful when data records aren't raw pickles or when you want
+to transform them so that they aren't raw pickles.  To test this,
+we'll take a file storage database and convert it to use the
+ZODB.tests.hexstorage trandormation.
+
+    >>> import ZODB.FileStorage
+    >>> db = ZODB.DB(ZODB.FileStorage.FileStorage(
+    ...     'data.fs', blob_dir='blobs',
+    ...     packer=zc.FileStorage.Packer(
+    ...            transform='zc.FileStorage.tests:hexer',
+    ...            untransform='zc.FileStorage.tests:unhexer',
+    ...            )))
+    >>> conn = db.open()
+    >>> conn.root.b = ZODB.blob.Blob('test')
+    >>> conn.transaction_manager.commit()
+
+    >>> _ = conn.root.b.open().read()
+
+So, here we have some untransformed data. Now, we'll pack it:
+
+    >>> db.pack()
+
+Now, the database records are hex:
+
+    >>> db.storage.load('\0'*8)[0][:50]
+    '.h6370657273697374656e742e6d617070696e670a50657273'
+
+    >>> db.storage.load('\0'*7+'\1')[0][:50]
+    '.h635a4f44422e626c6f620a426c6f620a71012e4e2e'
+
+Let's add an object. (WE get away with this because the object's we
+use are in the cache. :)
+
+    >>> conn.root.a = conn.root().__class__()
+    >>> conn.transaction_manager.commit()
+
+Now the root and the new object are not hex:
+
+    >>> db.storage.load('\0'*8)[0][:50]
+    'cpersistent.mapping\nPersistentMapping\nq\x01.}q\x02U\x04data'
+
+    >>> db.storage.load('\0'*7+'\2')[0][:50]
+    'cpersistent.mapping\nPersistentMapping\nq\x01.}q\x02U\x04data'
+
+We capture the current time as the pack time:
+
+    >>> import time
+    >>> pack_time = time.time()
+    >>> time.sleep(.1)
+
+We'll throw in a blob modification:
+
+    >>> conn.root.b.open('w').write('test 2')
+    >>> conn.transaction_manager.commit()
+
+Now pack and make sure all the records have been transformed:
+
+
+    >>> db.pack()
+    >>> from ZODB.utils import p64
+    >>> for i in range(len(db.storage)):
+    ...     if db.storage.load(p64(i))[0][:2] != '.h':
+    ...         print i
+
+We should have only one blob file:
+
+    >>> nblobs = 0
+    >>> for _, _, files in os.walk('blobs'):
+    ...     for file in files:
+    ...         if file.endswith('.blob'):
+    ...             nblobs += 1
+    >>> nblobs
+    1
+
+    """
+
+def hexer(data):
+    return (data[:2] == '.h') and data or ('.h'+data.encode('hex'))
+def unhexer(data):
+    return data and (data[:2] == '.h' and data[2:].decode('hex') or data)
+
+
 def test_suite():
     suite = unittest.TestSuite()
     suite.addTest(unittest.makeSuite(ZCFileStorageTests, "check"))



More information about the checkins mailing list