[Checkins] SVN: zc.FileStorage/trunk/ Updated to work with ZODB 3.9. Also, no longer support GC as part of

Jim Fulton jim at zope.com
Fri Nov 6 13:44:54 EST 2009


Log message for revision 105506:
  Updated to work with ZODB 3.9. Also, no longer support GC as part of
  packing.  Assume will use an external GC (zc.zodbdgc).
  

Changed:
  _U  zc.FileStorage/trunk/
  U   zc.FileStorage/trunk/buildout.cfg
  U   zc.FileStorage/trunk/setup.py
  D   zc.FileStorage/trunk/src/zc/FileStorage/_ILBTree.c
  U   zc.FileStorage/trunk/src/zc/FileStorage/__init__.py
  A   zc.FileStorage/trunk/src/zc/FileStorage/blob_packing.txt
  D   zc.FileStorage/trunk/src/zc/FileStorage/mru.py
  D   zc.FileStorage/trunk/src/zc/FileStorage/mru.txt
  U   zc.FileStorage/trunk/src/zc/FileStorage/tests.py

-=-

Property changes on: zc.FileStorage/trunk
___________________________________________________________________
Modified: svn:externals
   - 3.8 svn+ssh://svn.zope.org/repos/main/ZODB/branches/3.8

   + 


Modified: zc.FileStorage/trunk/buildout.cfg
===================================================================
--- zc.FileStorage/trunk/buildout.cfg	2009-11-06 17:34:20 UTC (rev 105505)
+++ zc.FileStorage/trunk/buildout.cfg	2009-11-06 18:44:53 UTC (rev 105506)
@@ -1,5 +1,5 @@
 [buildout]
-develop = . 3.8
+develop = .
 parts = py test
 
 [py]
@@ -9,29 +9,13 @@
 
 [test]
 recipe = zc.recipe.testrunner
-eggs = ZODB3 <3.9dev
-       zc.FileStorage
+eggs = zc.FileStorage
 initialization =
   import os, tempfile, shutil
   if os.path.exists('tmp'): shutil.rmtree('tmp')
   os.mkdir('tmp')
   tempfile.tempdir = os.path.abspath('tmp')
 
-  import zc.FileStorage
-  zc.FileStorage.FileReferences.cache_size = 2
-  zc.FileStorage.FileReferences.entry_size = 2
-
-  import ZODB.tests.VersionStorage, ZODB.tests.TransactionalUndoVersionStorage
-  class C: pass
-  ZODB.tests.VersionStorage.VersionStorage = C
-  class C: pass
-  ZODB.tests.TransactionalUndoVersionStorage.TransactionalUndoVersionStorage = C
-  import ZODB.tests.testDB
-  del ZODB.tests.testDB.DBTests.test_removeVersionPool
-  del ZODB.tests.testDB.DBTests.test_removeVersionPool_while_connection_open
-  import ZODB.tests.testZODB
-  del ZODB.tests.testZODB.ZODBTests.checkVersionOnly
-
 # There's mo point in running the zeo tests, since zeo will run the
 # server in a separate process that won't see the zc.FileStorage
 # import.

Modified: zc.FileStorage/trunk/setup.py
===================================================================
--- zc.FileStorage/trunk/setup.py	2009-11-06 17:34:20 UTC (rev 105505)
+++ zc.FileStorage/trunk/setup.py	2009-11-06 18:44:53 UTC (rev 105506)
@@ -15,15 +15,11 @@
     ext_modules=[
         Extension('zc.FileStorage._zc_FileStorage_posix_fadvise',
                   ['src/zc/FileStorage/_zc_FileStorage_posix_fadvise.c']),
-        Extension('zc.FileStorage._ILBTree',
-                  ['src/zc/FileStorage/_ILBTree.c'],
-                  include_dirs=['3.8/src'],
-                  ),
         ],
     namespace_packages = ['zc'],
     package_dir = {'': 'src'},
     install_requires = ['setuptools',
-                        'ZODB3 >=3.8dev, <3.9dev'
+                        'ZODB3 >=3.9dev'
                         ],
     include_package_data = True,
     zip_safe = False,

Deleted: zc.FileStorage/trunk/src/zc/FileStorage/_ILBTree.c
===================================================================
--- zc.FileStorage/trunk/src/zc/FileStorage/_ILBTree.c	2009-11-06 17:34:20 UTC (rev 105505)
+++ zc.FileStorage/trunk/src/zc/FileStorage/_ILBTree.c	2009-11-06 18:44:53 UTC (rev 105506)
@@ -1,67 +0,0 @@
-/*############################################################################
-#
-# Copyright (c) 2004 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE.
-#
-############################################################################*/
-
-#define MASTER_ID "$Id: _IIBTree.c 25186 2004-06-02 15:07:33Z jim $\n"
-
-/* IIBTree - int key, int value BTree
-
-   Implements a collection using int type keys
-   and int type values
-*/
-
-/* Setup template macros */
-
-#define PERSISTENT
-
-#define MOD_NAME_PREFIX "IL"
-#define INITMODULE init_ILBTree
-#define DEFAULT_MAX_BUCKET_SIZE 120
-#define DEFAULT_MAX_BTREE_SIZE 500
-
-#include "BTrees/intkeymacros.h"
-
-
-#define VALUEMACROS_H "$Id:$\n"
-
-#define NEED_LONG_LONG_SUPPORT
-#define VALUE_TYPE PY_LONG_LONG
-#define VALUE_PARSE "L"
-#define COPY_VALUE_TO_OBJECT(O, K) O=longlong_as_object(K)
-#define COPY_VALUE_FROM_ARG(TARGET, ARG, STATUS) \
-    if (PyInt_Check(ARG)) TARGET=PyInt_AS_LONG(ARG); else \
-        if (longlong_check(ARG)) TARGET=PyLong_AsLongLong(ARG); else \
-            if (PyLong_Check(ARG)) { \
-                PyErr_SetString(PyExc_ValueError, "long integer out of range"); \
-                (STATUS)=0; (TARGET)=0; } \
-            else { \
-            PyErr_SetString(PyExc_TypeError, "expected integer value");   \
-            (STATUS)=0; (TARGET)=0; }
-
-
-#undef VALUE_TYPE_IS_PYOBJECT
-#define TEST_VALUE(K, T) (((K) < (T)) ? -1 : (((K) > (T)) ? 1: 0)) 
-#define VALUE_SAME(VALUE, TARGET) ( (VALUE) == (TARGET) )
-#define DECLARE_VALUE(NAME) VALUE_TYPE NAME
-#define DECREF_VALUE(k)
-#define INCREF_VALUE(k)
-#define COPY_VALUE(V, E) (V=(E))
-
-#define NORMALIZE_VALUE(V, MIN) ((MIN) > 0) ? ((V)/=(MIN)) : 0
-
-#define MERGE_DEFAULT 1
-#define MERGE(O1, w1, O2, w2) ((O1)*(w1)+(O2)*(w2))
-#define MERGE_WEIGHT(O, w) ((O)*(w))
-
-
-#include "BTrees/BTreeModuleTemplate.c"

Modified: zc.FileStorage/trunk/src/zc/FileStorage/__init__.py
===================================================================
--- zc.FileStorage/trunk/src/zc/FileStorage/__init__.py	2009-11-06 17:34:20 UTC (rev 105505)
+++ zc.FileStorage/trunk/src/zc/FileStorage/__init__.py	2009-11-06 18:44:53 UTC (rev 105506)
@@ -14,74 +14,63 @@
 
 import cPickle
 import logging
-import marshal
 import os
-import shutil
 import subprocess
 import sys
 
-import zc.FileStorage.mru
-
 from ZODB.FileStorage.format import FileStorageFormatter, CorruptedDataError
-from ZODB.serialize import referencesf
 from ZODB.utils import p64, u64, z64
 from ZODB.FileStorage.format import TRANS_HDR_LEN
 
-import BTrees.IOBTree, BTrees.LOBTree, _ILBTree
 import ZODB.FileStorage
 import ZODB.FileStorage.fspack
 import ZODB.fsIndex
 import ZODB.TimeStamp
 
-class OptionalSeekFile(file):
-    """File that doesn't seek to current position.
+def packer(storage, referencesf, stop, gc):
+    return FileStoragePacker(storage, stop).pack()
 
-    This is to try to avoid gobs of system calls.
-    """
-
-    def seek(self, pos, whence=0):
-        if whence or (pos != self.tell()):
-            file.seek(self, pos, whence)
-    
-
 class FileStoragePacker(FileStorageFormatter):
 
-    def __init__(self, path, stop, la, lr, cla, clr, current_size):
-        self._name = path
+    def __init__(self, storage, stop):
+        self.storage = storage
+        self._name = path = storage._file.name
+
         # We open our own handle on the storage so that much of pack can
         # proceed in parallel.  It's important to close this file at every
         # return point, else on Windows the caller won't be able to rename
         # or remove the storage file.
+        self._file = open(path, "rb")
 
-        # We set the buffer quite high (32MB) to try to reduce seeks
-        # when the storage is disk is doing other io
-
-        self._file = OptionalSeekFile(path, "rb")
-
         self._stop = stop
         self.locked = 0
-        self.file_end = current_size
 
         # The packer needs to acquire the parent's commit lock
         # during the copying stage, so the two sets of lock acquire
         # and release methods are passed to the constructor.
-        self._lock_acquire = la
-        self._lock_release = lr
-        self._commit_lock_acquire = cla
-        self._commit_lock_release = clr
+        self._lock_acquire = storage._lock_acquire
+        self._lock_release = storage._lock_release
+        self._commit_lock_acquire = storage._commit_lock_acquire
+        self._commit_lock_release = storage._commit_lock_release
 
+        self._lock_acquire()
+        try:
+            storage._file.seek(0, 2)
+            self.file_end = storage._file.tell()
+        finally:
+            self._lock_release()
+
         self.ltid = z64
 
     def pack(self):
-        
+
         script = self._name+'.packscript'
         open(script, 'w').write(pack_script_template % dict(
             path = self._name,
             stop = self._stop,
             size = self.file_end,
             syspath = sys.path,
-            fr_cache_size = FileReferences.cache_size,
-            fr_entry_size = FileReferences.entry_size,
+            blob_dir = self.storage.blob_dir,
             ))
         for name in 'error', 'log':
             name = self._name+'.pack'+name
@@ -93,7 +82,6 @@
             stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
             close_fds=True,
             )
-        
 
         proc.stdin.close()
         out = proc.stdout.read()
@@ -114,7 +102,7 @@
         os.remove(packindex_path)
         os.remove(self._name+".packscript")
 
-        output = OptionalSeekFile(self._name + ".pack", "r+b")
+        output = open(self._name + ".pack", "r+b")
         output.seek(0, 2)
         assert output.tell() == opos
         self.copyRest(self.file_end, output, index)
@@ -123,17 +111,8 @@
         pos = output.tell()
         output.close()
 
-        # Grrrrr. The caller wants these attrs
-        self.index = index
-        self.vindex = {}
-        self.tindex = {}
-        self.tvindex = {}
-        self.oid2tid = {}
-        self.toid2tid = {}
-        self.toid2tid_delete = {}
+        return pos, index
 
-        return pos
-
     def copyRest(self, input_pos, output, index):
         # Copy data records written since packing started.
 
@@ -151,7 +130,7 @@
         # trailing 0 argument, and then on every platform except
         # native Windows it was observed that we could read stale
         # data from the tail end of the file.
-        self._file = OptionalSeekFile(self._name, "rb", 0)
+        self._file = open(self._name, "rb", 0)
         try:
             try:
                 while 1:
@@ -174,11 +153,11 @@
     def _copyNewTrans(self, input_pos, output, index,
                       acquire=None, release=None):
         tindex = {}
-        copier = PackCopier(output, index, {}, tindex, {})
+        copier = PackCopier(output, index, tindex)
         th = self._read_txn_header(input_pos)
         if release is not None:
             release()
-            
+
         output_tpos = output.tell()
         copier.setTxnPos(output_tpos)
         output.write(th.asString())
@@ -197,10 +176,7 @@
                 if h.back:
                     prev_txn = self.getTxnFromData(h.oid, h.back)
 
-            if h.version:
-                self.fail(pos, "Versions are not supported.")
-
-            copier.copy(h.oid, h.tid, data, '', prev_txn,
+            copier.copy(h.oid, h.tid, data, prev_txn,
                         output_tpos, output.tell())
 
             input_pos += h.recordlen()
@@ -230,10 +206,6 @@
         data, tid = self._loadBackTxn(oid, back, 0)
         return data
 
-sys.modules['ZODB.FileStorage.FileStorage'
-            ].FileStoragePacker = FileStoragePacker
-ZODB.FileStorage.FileStorage.supportsVersions = lambda self: False
-
 class PackCopier(ZODB.FileStorage.fspack.PackCopier):
 
     def _txn_find(self, tid, stop_at_pack):
@@ -269,12 +241,9 @@
    '%%(asctime)s %%(name)s %%(levelname)s %%(message)s'))
 logging.getLogger().addHandler(handler)
 
-# The next 2 lines support testing:
-zc.FileStorage.FileReferences.cache_size = %(fr_cache_size)s
-zc.FileStorage.FileReferences.entry_size = %(fr_entry_size)s
-
 try:
-    packer = zc.FileStorage.PackProcess(%(path)r, %(stop)r, %(size)r)
+    packer = zc.FileStorage.PackProcess(%(path)r, %(stop)r, %(size)r,
+                                        %(blob_dir)r)
     packer.pack()
 except Exception, v:
     logging.exception('packing')
@@ -289,18 +258,20 @@
 
 class PackProcess(FileStoragePacker):
 
-    def __init__(self, path, stop, current_size):
+    def __init__(self, path, stop, current_size, blob_dir):
         self._name = path
         # We open our own handle on the storage so that much of pack can
         # proceed in parallel.  It's important to close this file at every
         # return point, else on Windows the caller won't be able to rename
         # or remove the storage file.
 
-        # We set the buffer quite high (32MB) to try to reduce seeks
-        # when the storage is disk is doing other io
+        if blob_dir:
+            self.pack_blobs = True
+            self.blob_removed = open(os.path.join(blob_dir, '.removed'), 'w')
+        else:
+            self.pack_blobs = False
 
-        
-        self._file = OptionalSeekFile(path, "rb")
+        self._file = open(path, "rb")
 
         self._name = path
         self._stop = stop
@@ -317,46 +288,19 @@
         self._freecache(pos)
         return FileStoragePacker._read_txn_header(self, pos, tid)
 
-    def _log_memory(self): # only on linux, oh well
-        status_path = "/proc/%s/status" % os.getpid()
-        if not os.path.exists(status_path):
-            return
-        try:
-            f = open(status_path)
-        except IOError:
-            return
-
-        for line in f:
-            for name in ('Peak', 'Size', 'RSS'):
-                if line.startswith('Vm'+name):
-                    logging.info(line.strip())
-                
-
     def pack(self):
-        do_gc = not os.path.exists(self._name+'.packnogc')
-        packed, index, references, packpos = self.buildPackIndex(
-            self._stop, self.file_end, do_gc)
+        packed, index, packpos = self.buildPackIndex(self._stop, self.file_end)
         logging.info('initial scan %s objects at %s', len(index), packpos)
-        self._log_memory()
         if packed:
             # nothing to do
             logging.info('done, nothing to do')
             self._file.close()
             return
 
-        if do_gc:
-            logging.info('read to end for gc')
-            self.updateReferences(references, packpos, self.file_end)
-            logging.info('gc')
-            index = self.gc(index, references)
-
-        
-        self._log_memory()
         logging.info('copy to pack time')
-        output = OptionalSeekFile(self._name + ".pack", "w+b")
-        output._freecache = _freefunc(output)
+        output = open(self._name + ".pack", "w+b")
+        self._freecache = _freefunc(output)
         index, new_pos = self.copyToPacktime(packpos, index, output)
-        self._log_memory()
         if new_pos == packpos:
             # pack didn't free any data.  there's no point in continuing.
             self._file.close()
@@ -367,7 +311,6 @@
 
         logging.info('copy from pack time')
         self.copyFromPacktime(packpos, self.file_end, output, index)
-        self._log_memory()
 
         # Save the index so the parent process can use it as a starting point.
         f = open(self._name + ".packindex", 'wb')
@@ -379,16 +322,11 @@
         self._file.close()
 
 
-    def buildPackIndex(self, stop, file_end, do_gc):
+    def buildPackIndex(self, stop, file_end):
         index = ZODB.fsIndex.fsIndex()
-        references = self.ReferencesClass(self._name)
         pos = 4L
         packed = True
-        if do_gc:
-            update_refs = self._update_refs
-        else:
-            update_refs = lambda dh, references: None
-            
+
         while pos < file_end:
             th = self._read_txn_header(pos)
             if th.tid > stop:
@@ -404,10 +342,12 @@
             while pos < end:
                 dh = self._read_data_header(pos)
                 self.checkData(th, tpos, dh, pos)
-                if dh.version:
-                    self.fail(pos, "Versions are not supported")
-                index[dh.oid] = pos
-                update_refs(dh, references)
+                if dh.plen or dh.back:
+                    index[dh.oid] = pos
+                else:
+                    # deleted
+                    if dh.oid in index:
+                        del index[dh.oid]
                 pos += dh.recordlen()
 
             tlen = self._read_num(pos)
@@ -417,89 +357,15 @@
                           tlen, th.tlen)
             pos += 8
 
-        return packed, index, references, pos
+        return packed, index, pos
 
-    def updateReferences(self, references, pos, file_end):
-
-        # Note that we don't update an index in this step.  This is
-        # because we don't care about objects created after the pack
-        # time.  We'll add those in a later phase. We only care about
-        # references to existing objects.
-        
-        while pos < file_end:
-            th = self._read_txn_header(pos)
-            self.checkTxn(th, pos)
-
-            tpos = pos
-            end = pos + th.tlen
-            pos += th.headerlen()
-
-            while pos < end:
-                dh = self._read_data_header(pos)
-                self.checkData(th, tpos, dh, pos)
-                if dh.version:
-                    self.fail(pos, "Versions are not supported")
-                self._update_refs(dh, references, 1)
-                pos += dh.recordlen()
-
-            tlen = self._read_num(pos)
-            if tlen != th.tlen:
-                self.fail(pos, "redundant transaction length does not "
-                          "match initial transaction length: %d != %d",
-                          tlen, th.tlen)
-            pos += 8
-
-    def _update_refs(self, dh, references, merge=False):
-        oid = u64(dh.oid)
-
-        # Chase backpointers until we get to the record with the refs
-        while dh.back:
-            dh = self._read_data_header(dh.back)
-
-        if dh.plen:
-            refs = referencesf(self._file.read(dh.plen))
-            if refs:
-                if merge:
-                    initial = references.get(oid)
-                    if initial:
-                        refs = set(refs)
-                        refs.update(initial)
-                        refs = list(refs)
-                references[oid] = refs
-                return
-
-        if not merge:
-            references.rmf(oid)
-                
-    def gc(self, index, references):
-        to_do = BTrees.LOBTree.TreeSet([0])
-        reachable = ZODB.fsIndex.fsIndex()
-        while to_do:
-            ioid = to_do.maxKey()
-            to_do.remove(ioid)
-            oid = p64(ioid)
-            if oid in reachable:
-                continue
-
-            # Note that the references include references made
-            # after the pack time.  These include references to
-            # objects created after the pack time, which won't be
-            # in the index.
-            reachable[oid] = index.get(oid, 0)
-
-            for ref in references.get(ioid):
-                iref = u64(ref)
-                if (iref not in to_do) and (ref not in reachable):
-                    to_do.insert(iref)
-                
-        references.clear()
-        return reachable
-
     def copyToPacktime(self, packpos, index, output):
         pos = new_pos = self._metadata_size
         self._file.seek(0)
         output.write(self._file.read(self._metadata_size))
         new_index = ZODB.fsIndex.fsIndex()
+        pack_blobs = self.pack_blobs
+        is_blob_record = ZODB.blob.is_blob_record
 
         while pos < packpos:
             th = self._read_txn_header(pos)
@@ -510,6 +376,35 @@
                 h = self._read_data_header(pos)
                 if index.get(h.oid) != pos:
                     pos += h.recordlen()
+                    if pack_blobs:
+                        if h.plen:
+                            data = self._file.read(h.plen)
+                        else:
+                            data = self.fetchDataViaBackpointer(h.oid, h.back)
+                        if data and is_blob_record(data):
+                            # We need to remove the blob record. Maybe we
+                            # need to remove oid.
+
+                            # But first, we need to make sure the
+                            # record we're looking at isn't a dup of
+                            # the current record. There's a bug in ZEO
+                            # blob support that causes duplicate data
+                            # records.
+                            rpos = index.get(h.oid)
+                            is_dup = (rpos and
+                                      self._read_data_header(rpos).tid == h.tid)
+                            if not is_dup:
+                                # Note that we delete the revision.
+                                # If rpos was None, then we could
+                                # remove the oid.  What if somehow,
+                                # another blob update happened after
+                                # the deletion. This shouldn't happen,
+                                # but we can leave it to the cleanup
+                                # code to take care of removing the
+                                # directory for us.
+                                self.blob_removed.write(
+                                    (h.oid+h.tid).encode('hex')+'\n')
+
                     continue
 
                 pos += h.recordlen()
@@ -555,17 +450,29 @@
                     output.write(tlen)
                     output.seek(new_pos)
 
-                output._freecache(new_pos)
-                
+                self._freecache(new_pos)
 
+
             pos += 8
 
         return new_index, new_pos
 
+    def fetchDataViaBackpointer(self, oid, back):
+        """Return the data for oid via backpointer back
+
+        If `back` is 0 or ultimately resolves to 0, return None.
+        In this case, the transaction undoes the object
+        creation.
+        """
+        if back == 0:
+            return None
+        data, tid = self._loadBackTxn(oid, back, 0)
+        return data
+
     def copyFromPacktime(self, input_pos, file_end, output, index):
         while input_pos < file_end:
             input_pos = self._copyNewTrans(input_pos, output, index)
-            output._freecache(output.tell())
+            self._freecache(output.tell())
         return input_pos
 
 
@@ -591,125 +498,3 @@
             _zc_FileStorage_posix_fadvise.POSIX_FADV_DONTNEED)
 
     return _free
-
-
-class MemoryReferences:
-
-    def __init__(self, path):
-        self.references = BTrees.LOBTree.LOBTree()
-        self.clear = self.references.clear
-
-    def get(self, oid):
-        references = self.references
-        ioid1, ioid2 = divmod(oid, 2147483648L)
-
-        references_ioid1 = references.get(ioid1)
-        if not references_ioid1:
-            return ()
-
-        ioid2 = int(ioid2)
-        result = references_ioid1[0].get(ioid2)
-        if result:
-            return [p64(result)]
-        return references_ioid1[1].get(ioid2, ())
-
-    def __setitem__(self, oid, refs):
-        references = self.references
-        ioid1, ioid2 = divmod(oid, 2147483648L)
-        ioid2 = int(ioid2)
-        references_ioid1 = references.get(ioid1)
-        if references_ioid1 is None:
-            references_ioid1 = references[ioid1] = (
-                _ILBTree.ILBTree(),      # {ioid2 -> single_referenced_oid}
-                BTrees.IOBTree.IOBTree() # {ioid2 -> referenced_oids}
-                )
-
-        if len(refs) == 1:
-            references_ioid1[0][ioid2] = u64(refs.pop())
-            references_ioid1[1].pop(ioid2, None)
-        else:
-            references_ioid1[1][ioid2] = refs
-            references_ioid1[0].pop(ioid2, None)
-            
-    def rmf(self, oid):
-        # Remove the oid, if present
-        ioid1, ioid2 = divmod(oid, 2147483648L)
-        references_ioid1 = self.references.get(ioid1)
-        if not references_ioid1:
-            return
-
-        ioid2 = int(ioid2)
-        if references_ioid1[0].pop(ioid2, None) is None:
-            references_ioid1[1].pop(ioid2, None)
-
-def _rmtree_onerror(func, path, exc_info):
-    if os.path.exists(path):
-        raise exc_info[0], exc_info[1], exc_info[2]
-    logging.info('burp removing %s', path)
-
-class FileReferences:
-
-    cache_size = 999
-    entry_size = 256
-
-    def __init__(self, path):
-        self._cache = zc.FileStorage.mru.MRU(self.cache_size,
-                                             lambda k, v: v.save())
-        path += '.refs'
-        if os.path.isdir(path):
-            shutil.rmtree(path, onerror=_rmtree_onerror)
-        os.mkdir(path)
-        self._tmp = path
-
-    def clear(self):
-        cache = self._cache
-        for k in cache:
-            cache[k].dirty = False
-        self._cache.clear()
-        shutil.rmtree(self._tmp, onerror=_rmtree_onerror)
-
-    def _load(self, oid):
-        base, index = divmod(long(oid), self.entry_size)
-        key = hex(base)[2:-1]
-        data = self._cache.get(key)
-        if data is None:
-            data = _refdata(os.path.join(self._tmp, key))
-            self._cache[key] = data
-        return data, index
-
-    def get(self, oid):
-        data, index = self._load(oid)
-        return data.get(index, ())
-
-    def __setitem__(self, oid, refs):
-        data, index = self._load(oid)
-        if set(refs) != set(data.get(index, ())):
-            data[index] = refs
-
-    def rmf(self, oid):
-        data, index = self._load(oid)
-        if index in data:
-            del data[index]
-
-class _refdata(dict):
-    
-    def __init__(self, path):
-        self.path = path
-        if os.path.exists(path):
-            self.update(marshal.load(open(path, 'rb')))
-        self.dirty = False
-
-    def save(self):
-        if self.dirty:
-            marshal.dump(dict(self), open(self.path, 'wb'))
-            self.dirty = False
-
-    def __setitem__(self, key, value):
-        self.dirty = True
-        dict.__setitem__(self, key, value)
-
-    def __delitem__(self, key):
-        self.dirty = True
-        dict.__delitem__(self, key)
-
-PackProcess.ReferencesClass = FileReferences

Copied: zc.FileStorage/trunk/src/zc/FileStorage/blob_packing.txt (from rev 105505, zc.FileStorage/branches/jim-dev/src/zc/FileStorage/blob_packing.txt)
===================================================================
--- zc.FileStorage/trunk/src/zc/FileStorage/blob_packing.txt	                        (rev 0)
+++ zc.FileStorage/trunk/src/zc/FileStorage/blob_packing.txt	2009-11-06 18:44:53 UTC (rev 105506)
@@ -0,0 +1,144 @@
+Packing support for blob data
+=============================
+
+XXX Gaaa. This is a copy because the original assumed the storage
+packed with GC.  zc.FileStorage only works with external gc. :/
+
+
+Set up:
+
+    >>> from ZODB.serialize import referencesf
+    >>> from ZODB.blob import Blob
+    >>> from ZODB import utils
+    >>> from ZODB.DB import DB
+    >>> import transaction
+
+A helper method to assure a unique timestamp across multiple platforms:
+
+    >>> from ZODB.tests.testblob import new_time
+
+UNDOING
+=======
+
+We need a database with an undoing blob supporting storage:
+
+    >>> import ZODB.FileStorage, zc.FileStorage
+    >>> blob_storage = ZODB.FileStorage.FileStorage(
+    ...     'data.fs', blob_dir='data.blobs',
+    ...     packer=zc.FileStorage.packer)
+    >>> database = DB(blob_storage)
+
+Create our root object:
+
+    >>> connection1 = database.open()
+    >>> root = connection1.root()
+
+Put some revisions of a blob object in our database and on the filesystem:
+
+    >>> import os
+    >>> tids = []
+    >>> times = []
+    >>> nothing = transaction.begin()
+    >>> times.append(new_time())
+    >>> blob = Blob()
+    >>> blob.open('w').write('this is blob data 0')
+    >>> root['blob'] = blob
+    >>> transaction.commit()
+    >>> tids.append(blob._p_serial)
+
+    >>> nothing = transaction.begin()
+    >>> times.append(new_time())
+    >>> root['blob'].open('w').write('this is blob data 1')
+    >>> transaction.commit()
+    >>> tids.append(blob._p_serial)
+
+    >>> nothing = transaction.begin()
+    >>> times.append(new_time())
+    >>> root['blob'].open('w').write('this is blob data 2')
+    >>> transaction.commit()
+    >>> tids.append(blob._p_serial)
+
+    >>> nothing = transaction.begin()
+    >>> times.append(new_time())
+    >>> root['blob'].open('w').write('this is blob data 3')
+    >>> transaction.commit()
+    >>> tids.append(blob._p_serial)
+
+    >>> nothing = transaction.begin()
+    >>> times.append(new_time())
+    >>> root['blob'].open('w').write('this is blob data 4')
+    >>> transaction.commit()
+    >>> tids.append(blob._p_serial)
+
+    >>> oid = root['blob']._p_oid
+    >>> fns = [ blob_storage.fshelper.getBlobFilename(oid, x) for x in tids ]
+    >>> [ os.path.exists(x) for x in fns ]
+    [True, True, True, True, True]
+
+Do a pack to the slightly before the first revision was written:
+
+    >>> packtime = times[0]
+    >>> blob_storage.pack(packtime, referencesf)
+    >>> [ os.path.exists(x) for x in fns ]
+    [True, True, True, True, True]
+
+Do a pack to the slightly before the second revision was written:
+
+    >>> packtime = times[1]
+    >>> blob_storage.pack(packtime, referencesf)
+    >>> [ os.path.exists(x) for x in fns ]
+    [True, True, True, True, True]
+
+Do a pack to the slightly before the third revision was written:
+
+    >>> packtime = times[2]
+    >>> blob_storage.pack(packtime, referencesf)
+    >>> [ os.path.exists(x) for x in fns ]
+    [False, True, True, True, True]
+
+Do a pack to the slightly before the fourth revision was written:
+
+    >>> packtime = times[3]
+    >>> blob_storage.pack(packtime, referencesf)
+    >>> [ os.path.exists(x) for x in fns ]
+    [False, False, True, True, True]
+
+Do a pack to the slightly before the fifth revision was written:
+
+    >>> packtime = times[4]
+    >>> blob_storage.pack(packtime, referencesf)
+    >>> [ os.path.exists(x) for x in fns ]
+    [False, False, False, True, True]
+
+Do a pack to now:
+
+    >>> packtime = new_time()
+    >>> blob_storage.pack(packtime, referencesf)
+    >>> [ os.path.exists(x) for x in fns ]
+    [False, False, False, False, True]
+
+Delete the object and do a pack, it should get rid of the most current
+revision as well as the entire directory:
+
+
+    >>> t = transaction.begin()
+    >>> oid, serial = root['blob']._p_oid, root['blob']._p_serial
+    >>> del root['blob']
+    >>> transaction.commit()
+
+    >>> t = transaction.begin()
+    >>> blob_storage.tpc_begin(t)
+    >>> blob_storage.deleteObject(oid, serial, t)
+    >>> blob_storage.tpc_vote(t)
+    >>> blob_storage.tpc_finish(t)
+
+    >>> packtime = new_time()
+    >>> blob_storage.pack(packtime, referencesf)
+    >>> [ os.path.exists(x) for x in fns ]
+    [False, False, False, False, False]
+    >>> os.path.exists(os.path.split(fns[0])[0])
+    False
+
+Clean up our blob directory and database:
+
+    >>> blob_storage.close()

Deleted: zc.FileStorage/trunk/src/zc/FileStorage/mru.py
===================================================================
--- zc.FileStorage/trunk/src/zc/FileStorage/mru.py	2009-11-06 17:34:20 UTC (rev 105505)
+++ zc.FileStorage/trunk/src/zc/FileStorage/mru.py	2009-11-06 18:44:53 UTC (rev 105506)
@@ -1,95 +0,0 @@
-##############################################################################
-#
-# Copyright (c) Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE.
-#
-##############################################################################
-
-class MRU:
-
-    def __init__(self, size, evicted=lambda k, v: None):
-        assert size > 0
-        self.size = size
-        self.evicted = evicted
-        self.data = {}
-        self.top = _node()
-
-    def clear(self):
-        while self.data:
-            self.pop()
-    
-    def __len__(self):
-        return len(self.data)
-
-    def __iter__(self):
-        # We van't do a generator. We have to take a snapshot, otherwise
-        # the client might do operations that would change the order!
-        result = []
-        top = node = self.top
-        while 1:
-            node = node.previous
-            if node is top:
-                break
-            result.append(node.key)
-        return iter(result)
-
-    def get(self, key, default=None):
-        node = self.data.get(key)
-        if node is None:
-            return default
-        if node.next != self.top:
-            node.unlink()
-            node.link(self.top)
-        return node.value
-
-    def __getitem__(self, key):
-        result = self.get(key, self)
-        if result is not self:
-            return result
-        raise KeyError(key)
-
-    def __setitem__(self, key, value):
-        assert value is not self
-        data = self.data
-        node = data.get(key)
-        if node is None:
-            node = _node(self.top)
-            data[key] = node
-            node.key = key
-            if len(data) > self.size:
-                self.pop()
-        node.value = value
-
-    def pop(self):
-        doomed = self.top.next
-        self.evicted(doomed.key, doomed.value)
-        del self.data[doomed.key]
-        doomed.unlink()
-        
-
-class _node:
-
-    next = previous = key = value = None
-
-    def __init__(self, next=None):
-        if next is None:
-            next = self
-        self.link(next)
-
-    def link(self, next):
-        self.next = next
-        self.previous = next.previous
-        next.previous = self
-        self.previous.next = self
-
-    def unlink(self):
-        self.next.previous = self.previous
-        self.previous.next = self.next
-        

Deleted: zc.FileStorage/trunk/src/zc/FileStorage/mru.txt
===================================================================
--- zc.FileStorage/trunk/src/zc/FileStorage/mru.txt	2009-11-06 17:34:20 UTC (rev 105505)
+++ zc.FileStorage/trunk/src/zc/FileStorage/mru.txt	2009-11-06 18:44:53 UTC (rev 105506)
@@ -1,46 +0,0 @@
-Simple most-recently-used cache
-===============================
-
-An mru cache is a simple mapping object that has a limited size. To
-create an mru cache, we call the MRU constructor passing a size an an
-optional eviction callback.  The eviscion callback is called just
-before an item is evicted.
-
-    >>> def evicted(key, value):
-    ...     print 'evicted', key, value
-    >>> from zc.FileStorage.mru import MRU
-    >>> cache = MRU(5, evicted)
-    >>> len(cache), list(cache)
-    (0, [])
-
-We add items to the cache as we would any mapping object:
-
-    >>> cache[1] = 'one'
-    >>> cache[2] = 'two'
-    >>> cache[3] = 'three'
-    >>> cache[4] = 'four'
-    >>> cache[1]
-    'one'
-    >>> cache.get(3)
-    'three'
-
-    >>> len(cache), list(cache)
-    (4, [3, 1, 4, 2])
-
-Note the order of the keys. 3 and 1 are first because we accessed them most
-recently.  4 is next because it was added last and an add counts as an
-access.
-
-Let's add some more values:
-
-    >>> cache[5] = 'five'
-    >>> cache[6] = 'six'
-    evicted 2 two
-    >>> cache[7] = 'seven'
-    evicted 4 four
-
-    >>> len(cache), list(cache)
-    (5, [7, 6, 5, 3, 1])
-
-    >>> cache.get(4)
-

Modified: zc.FileStorage/trunk/src/zc/FileStorage/tests.py
===================================================================
--- zc.FileStorage/trunk/src/zc/FileStorage/tests.py	2009-11-06 17:34:20 UTC (rev 105505)
+++ zc.FileStorage/trunk/src/zc/FileStorage/tests.py	2009-11-06 18:44:53 UTC (rev 105506)
@@ -19,28 +19,32 @@
 # tests affected by the lack of gc in pack.
 ##############################################################################
 
-
-import os
+import pickle
 import unittest
-from zope.testing import doctest
+import zc.FileStorage
+import ZODB.blob
+import ZODB.tests.testblob
 
 from ZODB.tests.testFileStorage import * # :-P
 from ZODB.tests.PackableStorage import * # :-P
 from ZODB.tests.TransactionalUndoStorage import * # :-P
 
-class NoGCFileStorageTests(FileStorageTests):
+from zope.testing import doctest, setupstack
 
+class ZCFileStorageTests(FileStorageTests):
+
+    blob_dir = None
+
     def setUp(self):
-        self.open(create=1)
-        self.__gcpath = os.path.abspath('FileStorageTests.fs.packnogc')
-        open(self.__gcpath, 'w')
+        self.open(create=1, packer=zc.FileStorage.packer,
+                  blob_dir=self.blob_dir)
 
     def tearDown(self):
         self._storage.close()
         self._storage.cleanup()
-        os.remove(self.__gcpath)
+        if self.blob_dir:
+            ZODB.blob.remove_committed_dir(self.blob_dir)
 
-
     def checkPackAllRevisions(self):
         self._initroot()
         eq = self.assertEqual
@@ -99,50 +103,22 @@
         # The undo log contains only the most resent transaction
         self.assertEqual(3, len(self._storage.undoLog()))
 
-
-    def checkTransactionalUndoAfterPack(self):
-        eq = self.assertEqual
-        # Add a few object revisions
-        oid = self._storage.new_oid()
-        revid1 = self._dostore(oid, data=MinPO(51))
-        snooze()
-        packtime = time.time()
-        snooze()                # time.time() now distinct from packtime
-        revid2 = self._dostore(oid, revid=revid1, data=MinPO(52))
-        self._dostore(oid, revid=revid2, data=MinPO(53))
-        # Now get the undo log
-        info = self._storage.undoInfo()
-        eq(len(info), 3)
-        tid = info[0]['id']
-        # Now pack just the initial revision of the object.  We need the
-        # second revision otherwise we won't be able to undo the third
-        # revision!
-        self._storage.pack(packtime, referencesf)
-        # Make some basic assertions about the undo information now
-        info2 = self._storage.undoInfo()
-        eq(len(info2), 3)
-        # And now attempt to undo the last transaction
-        t = Transaction()
-        self._storage.tpc_begin(t)
-        tid, oids = self._storage.undo(tid, t)
-        self._storage.tpc_vote(t)
-        self._storage.tpc_finish(t)
-        eq(len(oids), 1)
-        eq(oids[0], oid)
-        data, revid = self._storage.load(oid, '')
-        # The object must now be at the second state
-        eq(zodb_unpickle(data), MinPO(52))
-        self._iterate()
-
-    
     def checkPackWithGCOnDestinationAfterRestore(self):
         pass
 
     def checkPackWithMultiDatabaseReferences(self):
         pass
 
+class ZCFileStorageTestsWithBlobs(ZCFileStorageTests):
+
+    blob_dir = 'blobs'
+
 def test_suite():
     suite = unittest.TestSuite()
-    suite.addTest(unittest.makeSuite(NoGCFileStorageTests, "check"))
-    suite.addTest(doctest.DocFileSuite('mru.txt'))
+    suite.addTest(unittest.makeSuite(ZCFileStorageTests, "check"))
+    suite.addTest(unittest.makeSuite(ZCFileStorageTestsWithBlobs, "check"))
+    suite.addTest(doctest.DocFileSuite(
+        'blob_packing.txt',
+        setUp=setupstack.setUpDirectory, tearDown=setupstack.tearDown,
+        ))
     return suite



More information about the checkins mailing list