[Zodb-checkins] CVS: StandaloneZODB/bsddb3Storage/bsddb3Storage - Full.py:1.40.2.2

Barry Warsaw barry@wooz.org
Wed, 17 Apr 2002 16:51:33 -0400


Update of /cvs-repository/StandaloneZODB/bsddb3Storage/bsddb3Storage
In directory cvs.zope.org:/tmp/cvs-serv28216/bsddb3Storage

Modified Files:
      Tag: bsddb3Storage-picklelog-branch
	Full.py 
Log Message:
Completion of Jim's idea to reduce the size of transactions by storing
the pickle data at the time of the store() call instead of at the time
of the finish() call.

I also added some commented out support for hotshot profiling.

Specifically,

_setupDBs(): Add the picklelog table which mapps oid+revid to the
empty string (the value is ignored).

close(): Close the picklelog table too.

_finish(): Support for the picklelog and CommitLog changes.  Also
introduced a bunch of micro-optimizations based on hotshot profiling.
Most of these are of the form: move direct Berkeley table
setting/getting out of the main CommitLog loop.  Instead do everything
for a single table at the same time, hopefully improving the locality
of reference.

_abort(): We need to truncate the picklelog after deleting all the
referenced pickles.

_log_object(): Private method to write the new pickle data and
picklelog entries.

store(): Use _log_object() to store the pickle data.


=== StandaloneZODB/bsddb3Storage/bsddb3Storage/Full.py 1.40.2.1 => 1.40.2.2 ===
 #DNE = 'nonexist'                                  # does not exist
 
+import _helper
 
 
 class Full(BerkeleyBase, ConflictResolvingStorage):
@@ -103,15 +104,13 @@
         # pickles -- {oid+revid -> pickle}
         #     Maps the concrete object referenced by oid+revid to that
         #     object's data pickle.
-
-        # Jim
         #
-        # picklelog -- {oid+revid}
+        # picklelog -- {oid+revid -> ''}
         #     Keeps a log of pickles that haven't been committed yet.
         #     This allows us to write pickles as we get them in the
-        #     in separate BDB transactions.
-        
-        
+        #     in separate BDB transactions.  The value of the mapping is
+        #     ignored.
+        #
         # These are used only by the Full implementation.
         #
         # vids -- {version_string -> vid}
@@ -182,7 +181,6 @@
         self._serials = self._setupDB('serials')
         self._pickles = self._setupDB('pickles')
         self._picklelog = self._setupDB('picklelog')
-        
         # These are specific to the full implementation
         self._vids            = self._setupDB('vids')
         self._versions        = self._setupDB('versions')
@@ -204,8 +202,10 @@
             self.__nextvid = 0L
         # DEBUGGING
         #self._nextserial = 0L
+        #self.profiler = hotshot.Profile('profile.dat', lineevents=1)
         
     def close(self):
+        #self.profiler.close()
         self._serials.close()
         self._pickles.close()
         self._picklelog.close()
@@ -232,7 +232,18 @@
             self._commitlog = FullLog(dir=self._env.db_home)
         self._commitlog.start()
 
+# To turn on hotshot profiling, uncomment the following function, and rename
+# _finish() to _real_finish().  Also, uncomment out the creation of the
+# profiler in _setupDBs() above, and the closing of the profiler in close()
+# below.  Then check out the profout.py file for dumping out the profiling
+# information.
+#
+##    def _finish(self, tid, u, d, e):
+##        self.profiler.runcall(self._real_finish, tid, u, d, e)
+
     def _finish(self, tid, u, d, e):
+##        pack = struct.pack
+##        unpack = struct.unpack
         # This is called from the storage interface's tpc_finish() method.
         # Its responsibilities are to finish the transaction with the
         # underlying database.
@@ -270,6 +281,11 @@
             self._txnMetadata.put(tid,
                                   UNDOABLE_TRANSACTION + lengths + u + d + e,
                                   txn=txn)
+            picklekeys = []
+            metadata = []
+            picklerefcounts = {}
+            serials = []
+            refcounts = {}
             while 1:
                 rec = self._commitlog.next()
                 if rec is None:
@@ -280,55 +296,43 @@
                     # this object must be stored in the pickle table, the
                     # object metadata table, the currentVersions tables , and
                     # the transactions->oid table.
-                    oid, vid, nvrevid, lrevid, pickle, prevrevid = data
+                    oid, vid, nvrevid, lrevid, refdoids, prevrevid = data
                     key = oid + tid
-                    if pickle:
-                        # This was the result of a store() call which gives us
-                        # a brand new pickle, so we need to update the pickles
-                        # table.  The lrevid will be empty, and we make it the
-                        # tid of this transaction
+                    if refdoids is not None:
+                        # This was the result of a store() call which gave us
+                        # new pickle data.  Since the pickle is already
+                        # stored, we just need to twiddle with reference
+                        # counts.  We also need to clear the picklelog for
+                        # this object revision.
                         #
                         # Otherwise, this was the result of a commitVersion()
                         # or abortVersion() call, essentially moving the
                         # object to a new version.  We don't need to update
-                        # the pickle table because we aren't creating a new
+                        # any of the tables because we aren't creating a new
                         # pickle.
-                        self._pickles.put(key, pickle, txn=txn)
                         lrevid = tid
                         # Boost the refcount of all the objects referred to by
-                        # this pickle.  referencesf() scans a pickle and
-                        # returns the list of objects referenced by the
-                        # pickle.  BAW: the signature of referencesf() has
-                        # changed for Zope 2.4, to make it more convenient to
-                        # use.  Gotta stick with the backwards compatible
-                        # version for now.
+                        # this pickle.
                         #
                         # FIXME: need to watch for two object revisions in the
                         # same transaction and only bump the refcount once,
                         # since we only keep the last of any such revisions.
-                        refdoids = []
-                        referencesf(pickle, refdoids)
                         for roid in refdoids:
-                            refcount = self._refcounts.get(roid, ZERO, txn=txn)
-                            refcount = p64(U64(refcount) + 1)
-                            self._refcounts.put(roid, refcount, txn=txn)
+                            refcounts[roid] = refcounts.get(roid, 0) + 1
                     # Update the metadata table
-                    self._metadata.put(key, vid+nvrevid+lrevid+prevrevid,
-                                       txn=txn)
+                    metadata.append((key,
+                                     ''.join((vid,nvrevid,lrevid,prevrevid))))
                     # If we're in a real version, update this table too.  This
                     # ends up putting multiple copies of the vid/oid records
                     # in the table, but it's easier to weed those out later
                     # than to weed them out now.
                     if vid <> ZERO:
                         self._currentVersions.put(vid, oid, txn=txn)
-                    self._serials.put(oid, tid, txn=txn)
-                    self._txnoids.put(tid, oid, txn=txn)
+                    serials.append((oid, tid))
                     # Update the pickle's reference count.  Remember, the
                     # refcount is stored as a string, so we have to do the
                     # string->long->string dance.
-                    refcount = self._pickleRefcounts.get(key, ZERO, txn=txn)
-                    refcount = p64(U64(refcount) + 1)
-                    self._pickleRefcounts.put(key, refcount, txn=txn)
+                    picklerefcounts[key] = picklerefcounts.get(key, 0) + 1
                 elif op == 'v':
                     # This is a "create-a-version" record
                     version, vid = data
@@ -345,6 +349,24 @@
                             rec = c.next_dup()
                     finally:
                         c.close()
+            # It's actually faster to boogie through this  list twice
+            for tid, oid in serials:
+                self._txnoids.put(tid, oid, txn=txn)
+            for tid, oid in serials:
+                self._serials.put(oid, tid, txn=txn)
+            for key, data in metadata:
+                self._metadata.put(key, data, txn=txn)
+            add = _helper.incr
+            for roid, delta in refcounts.items():
+                refcount = self._refcounts.get(roid, ZERO, txn=txn)
+#                refcount = pack('>Q', unpack('>Q', refcount)[0] + delta)
+                self._refcounts.put(roid, add(refcount, delta), txn=txn)
+            for key, delta in picklerefcounts.items():
+                refcount = self._pickleRefcounts.get(key, ZERO, txn=txn)
+#                refcount = pack('>Q', unpack('>Q', refcount)[0] + delta)
+                self._pickleRefcounts.put(key, add(refcount, delta), txn=txn)
+            # We're done with the picklelog
+            self._picklelog.truncate(txn)
         except:
             # If any errors whatsoever occurred, abort the transaction with
             # Berkeley, leave the commit log file in the PROMISED state (since
@@ -357,6 +379,15 @@
             txn.commit()
             self._closelog()
 
+    def _abort(self):
+        # We need to clear the picklelog and all the stored pickles in the
+        # pickle log, since we're abort this transaction.
+        for key in self._picklelog.keys():
+            del self._pickles[key]
+        # Done with the picklelog
+        self._picklelog.truncate()
+        BerkeleyBase._abort(self)
+
     #
     # Do some things in a version
     #
@@ -597,6 +628,28 @@
             self._commitlog.write_new_version(version, vid)
         return vid
 
+    def _log_object(self, oid, vid, nvrevid, data, oserial):
+        # Save data for later commit.  We do this by writing the pickle
+        # directly to the table and saving the pickle key in the pickle log.
+        # We extract the references and save them in the transaction log.
+        #
+        # Get the oids to the objects this pickle references
+        refdoids = []
+        referencesf(data, refdoids)
+        # Record the update to this object in the commit log.
+        self._commitlog.write_object(oid, vid, nvrevid, refdoids, oserial)
+        # Save the pickle in the database:
+        txn = self._env.txn_begin()
+        try:
+            key = oid + self._serial
+            self._pickles.put(key, data[:1000], txn=txn)
+            self._picklelog.put(key, '', txn=txn)
+        except:
+            txn.abort()
+            raise
+        else:
+            txn.commit()
+
     def store(self, oid, serial, data, version, transaction):
         # Transaction equivalence guard
         if transaction is not self._transaction:
@@ -661,10 +714,8 @@
                         tuple(map(U64, (oid, ovid, vid))))
                 else:
                     nvrevid = onvrevid
-
+            # Store the object
             self._log_object(oid, vid, nvrevid, data, oserial)
-
-            
         finally:
             self._lock_release()
         # Return our cached serial number for the object.  If conflict