[Checkins] SVN: ZODB/branches/jim-dev/src/ZODB/FileStorage/ switched to keeping track of blobs deleted rather than bobs kept.

Jim Fulton jim at zope.com
Tue Dec 16 15:59:01 EST 2008


Log message for revision 94132:
  switched to keeping track of blobs deleted rather than bobs kept.

Changed:
  U   ZODB/branches/jim-dev/src/ZODB/FileStorage/FileStorage.py
  U   ZODB/branches/jim-dev/src/ZODB/FileStorage/fspack.py
  U   ZODB/branches/jim-dev/src/ZODB/FileStorage/interfaces.py

-=-
Modified: ZODB/branches/jim-dev/src/ZODB/FileStorage/FileStorage.py
===================================================================
--- ZODB/branches/jim-dev/src/ZODB/FileStorage/FileStorage.py	2008-12-16 20:31:19 UTC (rev 94131)
+++ ZODB/branches/jim-dev/src/ZODB/FileStorage/FileStorage.py	2008-12-16 20:59:00 UTC (rev 94132)
@@ -1105,24 +1105,63 @@
         # Move any blobs linked or copied while packing to the
         # pack dir, which will become the old dir
         lblob_dir = len(self.blob_dir)
-        for path, dir_names, file_names in os.walk(self.blob_dir, False):
-            n = 0
+        fshelper = self.fshelper
+        old = self.blob_dir+'.old'
+        os.mkdir(old, 0777)
+
+        # Helper to clean up dirs left empty after moving things to old
+        def maybe_remove_empty_dir_containing(path):
+            path = os.path.dirname(path)
+            if len(path) <= lblob_dir:
+                return
+            if not os.listdir(path):
+                os.rmdir(path)
+                maybe_remove_empty_dir_containing(path)
+
+        # Helper that moves a oid dir or revision file to the old dir.
+        def move(path):
+            dest = os.path.dirname(old+path[lblob_dir:])
+            if not os.path.exists(dest):
+                os.makedirs(dest, 0700)
+            os.rename(path, old+path[lblob_dir:])
+            maybe_remove_empty_dir_containing(path)
+            
+        # Fist step: "remove" oids or revisions by moving them to .old
+        # (Later, when we add an option to not keep old files, we'll
+        # be able to simply remove.)
+        for line in open(os.path.join(self.blob_dir, '.removed')):
+            line = line.strip().decode('hex')
+
+            if len(line) == 8:
+                # oid is garbage, re/move dir
+                path = fshelper.getPathForOID(line)
+                if not os.path.exists(path):
+                    # Hm, already gone. Odd.
+                    continue
+                move(path)
+                continue
+            
+            if len(line) != 16:
+                raise ValueError("Bad record in ", self.blob_dir, '.removed')
+            
+            oid, tid = line[:8], line[8:]
+            path = fshelper.getBlobFilename(oid, tid)
+            if not os.path.exists(path):
+                # Hm, already gone. Odd.
+                continue
+            move(path)
+            
+        # Second step, copy remaining files.
+        link_or_copy = ZODB.blob.link_or_copy
+        for path, dir_names, file_names in os.walk(self.blob_dir):
             for file_name in file_names:
                 if not file_name.endswith('.blob'):
                     continue
-                file_packed = os.path.join(
-                    path[:lblob_dir]+'.pack'+path[lblob_dir:],
-                    file_name)
-                if not os.path.exists(file_packed):
-                    if not os.path.exists(os.path.dirname(file_packed)):
-                        os.makedirs(os.path.dirname(file_packed), 0700)
-                    ZODB.blob.rename_or_copy_blob(
-                        os.path.join(path, file_name),
-                        file_packed)
-                    n += 1
-            if (n == len(file_names)) and not os.listdir(path):
-                os.rmdir(path)
-        os.rename(self.blob_dir+'.pack', self.blob_dir+'.old')
+                file_path = os.path.join(path, file_name)
+                dest = os.path.dirname(old+file_path[lblob_dir:])
+                if not os.path.exists(dest):
+                    os.makedirs(dest, 0700)
+                link_or_copy(file_path, old+file_path[lblob_dir:])
         
     def iterator(self, start=None, stop=None):
         return FileIterator(self._file_name, start, stop)

Modified: ZODB/branches/jim-dev/src/ZODB/FileStorage/fspack.py
===================================================================
--- ZODB/branches/jim-dev/src/ZODB/FileStorage/fspack.py	2008-12-16 20:31:19 UTC (rev 94131)
+++ ZODB/branches/jim-dev/src/ZODB/FileStorage/fspack.py	2008-12-16 20:59:00 UTC (rev 94132)
@@ -337,12 +337,11 @@
     def __init__(self, storage, referencesf, stop, gc=True):
         self._storage = storage
         if storage.blob_dir:
-            self.blob_dir = storage.blob_dir+'.pack'
-            self.fshelper = ZODB.blob.FilesystemHelper(
-                self.blob_dir, storage.fshelper.layout_name)
-            self.fshelper.create()
+            self.pack_blobs = True
+            self.blob_removed = open(
+                os.path.join(storage.blob_dir, '.removed'), 'w')
         else:
-            self.blob_dir = None
+            self.pack_blobs = False
             
         path = storage._file.name
         self._name = path
@@ -492,8 +491,24 @@
         while pos < tend:
             h = self._read_data_header(pos)
             if not self.gc.isReachable(h.oid, pos):
+                if self.pack_blobs:
+                    # We need to find out if this is a blob, so get the data:
+                    if h.plen:
+                        data = self._file.read(h.plen)
+                    else:
+                        data = self.fetchDataViaBackpointer(h.oid, h.back)
+                    if data and ZODB.blob.is_blob_record(data):
+                        # We need to remove the blob record. Maybe we
+                        # need to remove oid:
+                        if h.oid not in self.gc.reachable:
+                            self.blob_removed.write(h.oid.encode('hex')+'\n')
+                        else:
+                            self.blob_removed.write(
+                                (h.oid+h.tid).encode('hex')+'\n')
+                
                 pos += h.recordlen()
                 continue
+
             pos += h.recordlen()
 
             # If we are going to copy any data, we need to copy
@@ -510,24 +525,18 @@
             if h.plen:
                 data = self._file.read(h.plen)
             else:
-                # If a current record has a backpointer, fetch
-                # refs and data from the backpointer.  We need
-                # to write the data in the new record.
-                data = self.fetchBackpointer(h.oid, h.back)
+                data = self.fetchDataViaBackpointer(h.oid, h.back)
 
             self.writePackedDataRecord(h, data, new_tpos)
             new_pos = self._tfile.tell()
 
-            if ZODB.blob.is_blob_record(data):
-                self.copyBlob(h.oid, h.tid)
-
         return new_tpos, pos
 
-    def fetchBackpointer(self, oid, back):
-        """Return data and refs backpointer `back` to object `oid.
+    def fetchDataViaBackpointer(self, oid, back):
+        """Return the data for oid via backpointer back
 
-        If `back` is 0 or ultimately resolves to 0, return None
-        and None.  In this case, the transaction undoes the object
+        If `back` is 0 or ultimately resolves to 0, return None.
+        In this case, the transaction undoes the object
         creation.
         """
         if back == 0:
@@ -535,15 +544,6 @@
         data, tid = self._loadBackTxn(oid, back, 0)
         return data
 
-    def copyBlob(self, oid, tid):
-        if not self.blob_dir:
-            return
-        self.fshelper.createPathForOID(oid)
-        ZODB.blob.link_or_copy(
-            self._storage.fshelper.getBlobFilename(oid, tid),
-            self.fshelper.getBlobFilename(oid, tid),
-            )
-
     def writePackedDataRecord(self, h, data, new_tpos):
         # Update the header to reflect current information, then write
         # it to the output file.
@@ -599,16 +599,13 @@
             if h.plen:
                 data = self._file.read(h.plen)
             else:
-                data = self.fetchBackpointer(h.oid, h.back)
+                data = self.fetchDataViaBackpointer(h.oid, h.back)
                 if h.back:
                     prev_txn = self.getTxnFromData(h.oid, h.back)
 
             self._copier.copy(h.oid, h.tid, data, prev_txn,
                               pos, self._tfile.tell())
 
-            if ZODB.blob.is_blob_record(data):
-                self.copyBlob(h.oid, h.tid)
-
         tlen = self._tfile.tell() - pos
         assert tlen == th.tlen
         self._tfile.write(p64(tlen))

Modified: ZODB/branches/jim-dev/src/ZODB/FileStorage/interfaces.py
===================================================================
--- ZODB/branches/jim-dev/src/ZODB/FileStorage/interfaces.py	2008-12-16 20:31:19 UTC (rev 94131)
+++ ZODB/branches/jim-dev/src/ZODB/FileStorage/interfaces.py	2008-12-16 20:59:00 UTC (rev 94132)
@@ -20,17 +20,34 @@
 
         The new file will have the same name as the old file with
         '.pack' appended. (The packer can get the old file name via
-        storage._file.name.)
+        storage._file.name.) If blobs are supported, if the storages
+        blob_dir attribute is not None or empty, then a .removed file
+        most be created in the blob directory. This file contains of
+        the form:
 
+           (oid+serial).encode('hex')+'\n'
+
+        or, of the form:
+
+           oid.encode('hex')+'\n'
+        
+
         If packing is unnecessary, or would not change the file, then
-        None is returned, otherwise a tule is returned with:
+        no pack or removed files are created None is returned,
+        otherwise a tuple is returned with:
 
         - the size of the packed file, and
 
         - the packed index
 
         If and only if packing was necessary (non-None) and there was
-        no error, then the commit lock must be acquired.
+        no error, then the commit lock must be acquired.  In addition,
+        it is up to FileStorage to:
+
+        - Rename the .pack file, and
+
+        - process the blob_dir/.removed file by removing the blobs
+          corresponding to the file records.        
         """
 
 class IFileStorage(zope.interface.Interface):



More information about the Checkins mailing list