[Zope3-checkins] CVS: Zope3/src/zodb/storage - memory.py:1.2 interfaces.py:1.17 fsdump.py:1.6 bdbminimal.py:1.18 bdbfull.py:1.24 base.py:1.27 fsindex.py:NONE file.py:NONE

Jeremy Hylton jeremy@zope.com
Tue, 22 Apr 2003 11:23:14 -0400


Update of /cvs-repository/Zope3/src/zodb/storage
In directory cvs.zope.org:/tmp/cvs-serv13741

Modified Files:
	memory.py interfaces.py fsdump.py bdbminimal.py bdbfull.py 
	base.py 
Removed Files:
	fsindex.py file.py 
Log Message:
Merge the jeremy-new-pack-branch to the trunk.

The primary change is a completely new implementation of file storage pack.


=== Zope3/src/zodb/storage/memory.py 1.1 => 1.2 ===
--- Zope3/src/zodb/storage/memory.py:1.1	Thu Mar 20 17:58:16 2003
+++ Zope3/src/zodb/storage/memory.py	Tue Apr 22 11:23:13 2003
@@ -224,8 +224,8 @@
             raise db.DBNotFoundError
         while self._keys[i] == key and self._vals[i] <> val:
             i += 1
-        if i > len(self):
-            raise db.DBNotFoundError
+            if i >= len(self):
+                raise db.DBNotFoundError
         return self._getrec(i)
 
     def set(self, key):
@@ -293,3 +293,6 @@
 
     def append(self, val, txn=None):
         super(FakeQueue, self).append(val)
+
+    def values(self):
+        return self


=== Zope3/src/zodb/storage/interfaces.py 1.16 => 1.17 ===
--- Zope3/src/zodb/storage/interfaces.py:1.16	Tue Apr  8 10:45:55 2003
+++ Zope3/src/zodb/storage/interfaces.py	Tue Apr 22 11:23:13 2003
@@ -186,8 +186,8 @@
         incremental pack, only old object revisions are removed.  In a full gc
         pack, cyclic garbage detection and removal is also performed.
 
-        t is the pack time.  All non-current object revisions older than t
-        will be removed in an incremental pack.
+        t is the pack time.  All non-current object revisions older than
+        or the same age as t will be removed in an incremental pack.
 
         pack() always performs an incremental pack.  If the gc flag is True,
         then pack() will also perform a garbage collection.  Some storages


=== Zope3/src/zodb/storage/fsdump.py 1.5 => 1.6 ===
--- Zope3/src/zodb/storage/fsdump.py:1.5	Mon Mar 17 15:18:27 2003
+++ Zope3/src/zodb/storage/fsdump.py	Tue Apr 22 11:23:13 2003
@@ -11,98 +11,4 @@
 # FOR A PARTICULAR PURPOSE.
 #
 ##############################################################################
-"""A low-level utility to dump the internal FileStorage representation."""
-
-import struct
-from zodb.storage.file \
-     import TRANS_HDR, TRANS_HDR_LEN, DATA_HDR, DATA_HDR_LEN
-from zodb.utils import u64
-from zodb.storage.base import splitrefs
-from zodb.storage.tests.base import zodb_unpickle
-
-def fmt(p64):
-    # Return a nicely formatted string for a packaged 64-bit value
-    return "%016x" % u64(p64)
-
-def dump(path, dest=None):
-    Dumper(path, dest).dump()
-
-class Dumper:
-    """A very verbose dumper for debugging FileStorage problems."""
-
-    def __init__(self, path, dest=None):
-        self.file = open(path, "rb")
-        self.dest = dest
-
-    def dump(self):
-        fid = self.file.read(1024)
-        print >> self.dest, "*" * 60
-        print >> self.dest, "file identifier: %r" % fid[:4]
-        print >> self.dest, "database version: %r" % fid[4:8]
-        # XXX perhaps verify that the rest of the metadata is nulls?
-        while self.dump_txn():
-            pass
-
-    def dump_txn(self):
-        pos = self.file.tell()
-        h = self.file.read(TRANS_HDR_LEN)
-        if not h:
-            return False
-        tid, tlen, status, ul, dl, el = struct.unpack(TRANS_HDR, h)
-        end = pos + tlen
-        print >> self.dest, "=" * 60
-        print >> self.dest, "offset: %d" % pos
-        print >> self.dest, "end pos: %d" % end
-        print >> self.dest, "transaction id: %s" % fmt(tid)
-        print >> self.dest, "trec len: %d" % tlen
-        print >> self.dest, "status: %r" % status
-        user = descr = extra = ""
-        if ul:
-            user = self.file.read(ul)
-        if dl:
-            descr = self.file.read(dl)
-        if el:
-            extra = self.file.read(el)
-        print >> self.dest, "user: %r" % user
-        print >> self.dest, "description: %r" % descr
-        print >> self.dest, "len(extra): %d" % el
-        while self.file.tell() < end:
-            self.dump_data(pos)
-        tlen2 = u64(self.file.read(8))
-        print >> self.dest, "redundant trec len: %d" % tlen2
-        return True
-
-    def dump_data(self, tloc):
-        pos = self.file.tell()
-        h = self.file.read(DATA_HDR_LEN)
-        assert len(h) == DATA_HDR_LEN
-        oid, revid, prev, tloc, vlen, nrefs, dlen = struct.unpack(DATA_HDR, h)
-        print >> self.dest, "-" * 60
-        print >> self.dest, "offset: %d" % pos
-        print >> self.dest, "oid: %s" % fmt(oid)
-        print >> self.dest, "revid: %s" % fmt(revid)
-        print >> self.dest, "previous record offset: %d" % prev
-        print >> self.dest, "transaction offset: %d" % tloc
-        if vlen:
-            pnv = self.file.read(8)
-            sprevdata = self.file.read(8)
-            version = self.file.read(vlen)
-            print >> self.dest, "version: %r" % version
-            print >> self.dest, "non-version data offset: %d" % u64(pnv)
-            print >> self.dest, \
-                  "previous version data offset: %d" % u64(sprevdata)
-        print >> self.dest, 'numrefs:', nrefs
-        for ref in splitrefs(self.file.read(nrefs * 8)):
-            print >> self.dest, '\t%s' % fmt(ref)
-        print >> self.dest, "len(data): %d" % dlen
-        data = self.file.read(dlen)
-        # A debugging feature for use with the test suite.
-        if data.startswith("(czodb.storage.tests.minpo\nMinPO\n"):
-            print >> self.dest, "value: %r" % zodb_unpickle(data).value
-        if not dlen:
-            sbp = self.file.read(8)
-            print >> self.dest, "backpointer: %d" % u64(sbp)
-
-if __name__ == "__main__":
-    import sys
-    Dumper(sys.argv[1]).dump()
+from zodb.storage.file.dump import *


=== Zope3/src/zodb/storage/bdbminimal.py 1.17 => 1.18 ===
--- Zope3/src/zodb/storage/bdbminimal.py:1.17	Wed Apr  9 13:58:20 2003
+++ Zope3/src/zodb/storage/bdbminimal.py	Tue Apr 22 11:23:13 2003
@@ -23,6 +23,8 @@
 from zodb.conflict import ResolvedSerial
 from zodb.storage.base import db, BerkeleyBase, PackStop, _WorkThread
 from zodb.storage.base import splitrefs
+# For debugging
+from zodb.interfaces import _fmt_oid as fo
 
 ABORT = 'A'
 COMMIT = 'C'
@@ -101,7 +103,7 @@
         #     It is also used during pack to list objects for which no more
         #     references exist, such that the objects can be completely packed
         #     away.
-        pass
+        self._oidqueue = self._setupDB('oidqueue', 0, db.DB_QUEUE, 8)
 
     def _version_check(self, txn):
         version = self._info.get('version')
@@ -472,6 +474,7 @@
         finally:
             c.close()
         # We're done with the mark table
+        self._packmark.truncate(txn=txn)
 
     def _collect_objs(self, txn):
         orec = self._oidqueue.consume(txn)


=== Zope3/src/zodb/storage/bdbfull.py 1.23 => 1.24 ===
--- Zope3/src/zodb/storage/bdbfull.py:1.23	Thu Apr 10 15:06:53 2003
+++ Zope3/src/zodb/storage/bdbfull.py	Tue Apr 22 11:23:13 2003
@@ -31,6 +31,8 @@
 from zodb.storage.base import db, BerkeleyBase, PackStop, _WorkThread, \
      splitrefs
 from zodb.storage._helper import incr
+# For debugging
+from zodb.interfaces import _fmt_oid as fo
 
 ABORT = 'A'
 COMMIT = 'C'
@@ -120,7 +122,7 @@
         #     pending table is empty, the oids, pvids, and prevrevids tables
         #     must also be empty.
         #
-        # packmark -- [oid]
+        # packmark -- oid -> [tid]
         #     Every object reachable from the root during a classic pack
         #     operation will have its oid present in this table.
         #
@@ -232,6 +234,7 @@
         # Tables to support packing.
         self._objrevs = self._setupDB('objrevs', db.DB_DUP)
         self._delqueue = self._setupDB('delqueue', 0, db.DB_QUEUE, 8)
+        self._oidqueue = self._setupDB('oidqueue', 0, db.DB_QUEUE, 16)
 
     def _version_check(self, txn):
         version = self._info.get('version')
@@ -452,7 +455,7 @@
         # created in the interrim.
         if self._packing:
             for oid in self._oids.keys():
-                self._packmark.put(oid, PRESENT, txn=txn)
+                self._packmark.put(oid, tid, txn=txn)
         self._oids.truncate(txn)
 
     def _dobegin(self, txn, tid):
@@ -1422,8 +1425,6 @@
                     if self._metadata.has_key(orevid):
                         metadata = self._metadata[orevid]
                         self._metadata.delete(orevid, txn=txn)
-                        if self._references.has_key(orevid):
-                            self._references.delete(orevid, txn=txn)
                         # Decref the pickle
                         self._decrefPickle(oid, metadata[16:24], txn)
                     try:
@@ -1452,7 +1453,7 @@
         refcount = u64(self._pickleRefcounts.get(revid, ZERO)) - 1
         assert refcount >= 0
         if refcount == 0:
-            # We can collect this pickle
+            # We can collect this pickle and the references
             self._pickleRefcounts.delete(revid, txn=txn)
             self._pickles.delete(revid, txn=txn)
             # And decref all objects pointed to by this pickle
@@ -1461,6 +1462,7 @@
                 deltas = {}
                 self._update(deltas, references, -1)
                 self._decref(deltas, txn)
+                self._references.delete(revid, txn=txn)
         else:
             self._pickleRefcounts.put(revid, p64(refcount), txn=txn)
 
@@ -1550,7 +1552,7 @@
         # BAW: Maybe this could probably be more efficient by not doing so
         # much searching, but it would also be more complicated, so the
         # tradeoff should be measured.
-        serial = None
+        serial, tid = self._getSerialAndTid(oid)
         c = self._metadata.cursor(txn=txn)
         try:
             rec = c.set_range(oid)
@@ -1568,9 +1570,60 @@
             c.close()
         return serial
 
+    def _rootset(self, packtid, txn):
+        c = self._txnoids.cursor(txn)
+        try:
+            rec = c.first()
+            while rec:
+                tid, oid = rec
+                rec = c.next()
+        finally:
+            c.close()
+        # Find the root set for reachability purposes.  A root set is a tuple
+        # of oid and tid.  First, the current root object as of the pack time
+        # is always in the root set.  Second, any object revision after the
+        # pack time that has a back pointer (lrevid) to before the pack time
+        # serves as another root because some future undo could then revive
+        # any referenced objects.
+        try:
+            zerorev = self._findrev(ZERO, packtid, txn)
+        except KeyError:
+            # There's no root object
+            return
+        self._oidqueue.append(ZERO+zerorev, txn)
+        c = self._txnoids.cursor(txn)
+        try:
+            try:
+                rec = c.set_range(packtid)
+            except db.DBNotFoundError:
+                rec = None
+            while rec:
+                tid, oid = rec
+                revid = oid + tid
+                rec = c.next()
+                lrevid = self._metadata[revid][16:24]
+                if lrevid < packtid:
+                    self._oidqueue.append(revid, txn)
+        finally:
+            c.close()
+
+    # tid is None if all we care about is that any object revision is present.
+    def _packmark_has(self, oid, tid, txn):
+        if tid is None:
+            return self._packmark.has_key(oid)
+        c = self._packmark.cursor(txn)
+        try:
+            try:
+                c.set_both(oid, tid)
+                return True
+            except db.DBNotFoundError:
+                return False
+        finally:
+            c.close()
+
     def _mark(self, txn, packtid):
         # Find the oids for all the objects reachable from the root, as of the
-        # pack time.  To reduce the amount of in-core memory we need do do a
+        # pack time.  To reduce the amount of in-core memory we need to do a
         # pack operation, we'll save the mark data in the packmark table.  The
         # oidqueue is a BerkeleyDB Queue that holds the list of object ids to
         # look at next, and by using this we don't need to keep an in-memory
@@ -1579,20 +1632,23 @@
         # Quick exit for empty storages
         if not self._serials:
             return
-        # The oid of the object we're looking at, starting at the root
-        oid = ZERO
-        # Start at the root, find all the objects the current revision of the
-        # root references, and then for each of those, find all the objects it
-        # references, and so on until we've traversed the entire object graph.
-        while oid:
+        self._rootset(packtid, txn)
+        rec = self._oidqueue.consume(txn)
+        while rec:
             if self._stop:
                 raise PackStop, 'stopped in _mark()'
-            if not self._packmark.has_key(oid):
-                # We haven't seen this object yet
-                self._packmark.put(oid, PRESENT, txn=txn)
-                # Get the list of references for the most current revision of
-                # this object as of the pack time.
-                tid = self._findrev(oid, packtid, txn)
+            revid = rec[1]
+            oid = revid[:8]
+            tid = revid[8:]
+            # See if this revision is already in the packmark
+            if not self._packmark_has(oid, tid, txn):
+                # BAW: We are more conservative than FileStorage here, since
+                # any reference to an object keeps all the object references
+                # alive.  FileStorage will collect individual object
+                # revisions.  I think our way is fine since we'll eventually
+                # collect everything incrementally anyway, and for Berkeley,
+                # all object revisions add to the refcount total.
+                self._packmark.put(oid, tid, txn=txn)
                 # Say there's no root object (as is the case in some of the
                 # unit tests), and we're looking up oid ZERO.  Then serial
                 # will be None.
@@ -1602,11 +1658,13 @@
                     # object revision
                     references = self._references.get(oid+lrevid)
                     if references:
-                        for oid in splitrefs(references):
-                            self._oidqueue.append(oid, txn)
+                        for roid in splitrefs(references):
+                            # Find the most recent object revision as of the
+                            # timestamp of the under-focus revision.
+                            rtid = self._findrev(roid, tid, txn)
+                            self._oidqueue.append(roid+rtid, txn)
             # Pop the next oid off the queue and do it all again
             rec = self._oidqueue.consume(txn)
-            oid = rec and rec[1]
         assert len(self._oidqueue) == 0
 
     def _sweep(self, txn, packtid):
@@ -1627,7 +1685,7 @@
                 # Otherwise, if packmark (which knows about all the root
                 # reachable objects) doesn't have a record for this guy, then
                 # we can zap it.  Do so by appending to oidqueue.
-                if not self._packmark.has_key(oid):
+                if not self._packmark_has(oid, None, txn):
                     self._delqueue.append(oid, txn)
         finally:
             c.close()


=== Zope3/src/zodb/storage/base.py 1.26 => 1.27 ===
--- Zope3/src/zodb/storage/base.py:1.26	Wed Apr  9 13:54:51 2003
+++ Zope3/src/zodb/storage/base.py	Tue Apr 22 11:23:13 2003
@@ -80,6 +80,7 @@
     _vote()
     _abort()
     _finish()
+    _clear_temp()
 
     If the subclass wants to implement IUndoStorage, it must implement
     all the methods in that interface.
@@ -535,8 +536,7 @@
         self._references = self._setupDB('references')
         self._oids = self._setupDB('oids')
         self._pending = self._setupDB('pending')
-        self._packmark = self._setupDB('packmark')
-        self._oidqueue = self._setupDB('oidqueue', 0, db.DB_QUEUE, 8)
+        self._packmark = self._setupDB('packmark', db.DB_DUP)
         # Do storage specific initialization
         self._init()
         self._withtxn(self._version_check)

=== Removed File Zope3/src/zodb/storage/fsindex.py ===

=== Removed File Zope3/src/zodb/storage/file.py ===