[Checkins] SVN: ZODB/branches/3.8/src/ZODB/FileStorage/FileStorage.py Removed the transaction id cache.

Jim Fulton jim at zope.com
Wed Sep 17 14:13:33 EDT 2008


Log message for revision 91211:
  Removed the transaction id cache.
  

Changed:
  U   ZODB/branches/3.8/src/ZODB/FileStorage/FileStorage.py

-=-
Modified: ZODB/branches/3.8/src/ZODB/FileStorage/FileStorage.py
===================================================================
--- ZODB/branches/3.8/src/ZODB/FileStorage/FileStorage.py	2008-09-17 16:21:55 UTC (rev 91210)
+++ ZODB/branches/3.8/src/ZODB/FileStorage/FileStorage.py	2008-09-17 18:13:33 UTC (rev 91211)
@@ -43,8 +43,6 @@
 from ZODB.loglevels import BLATHER
 from ZODB.fsIndex import fsIndex
 
-import BTrees.OOBTree
-
 packed_version = "FS21"
 
 logger = logging.getLogger('ZODB.FileStorage')
@@ -122,10 +120,8 @@
 
         BaseStorage.BaseStorage.__init__(self, file_name)
 
-        (index, vindex, tindex, tvindex,
-         oid2tid, toid2tid, toid2tid_delete) = self._newIndexes()
-        self._initIndex(index, vindex, tindex, tvindex,
-                        oid2tid, toid2tid, toid2tid_delete)
+        index, vindex, tindex, tvindex = self._newIndexes()
+        self._initIndex(index, vindex, tindex, tvindex)
 
         # Now open the file
 
@@ -159,8 +155,7 @@
             self._used_index = 1 # Marker for testing
             index, vindex, start, ltid = r
 
-            self._initIndex(index, vindex, tindex, tvindex,
-                            oid2tid, toid2tid, toid2tid_delete)
+            self._initIndex(index, vindex, tindex, tvindex)
             self._pos, self._oid, tid = read_index(
                 self._file, file_name, index, vindex, tindex, stop,
                 ltid=ltid, start=start, read_only=read_only,
@@ -194,11 +189,7 @@
 
         self._quota = quota
 
-        # tid cache statistics.
-        self._oid2tid_nlookups = self._oid2tid_nhits = 0
-
-    def _initIndex(self, index, vindex, tindex, tvindex,
-                   oid2tid, toid2tid, toid2tid_delete):
+    def _initIndex(self, index, vindex, tindex, tvindex):
         self._index=index
         self._vindex=vindex
         self._tindex=tindex
@@ -206,32 +197,12 @@
         self._index_get=index.get
         self._vindex_get=vindex.get
 
-        # .store() needs to compare the passed-in serial to the
-        # current tid in the database.  _oid2tid caches the oid ->
-        # current tid mapping for non-version data (if the current
-        # record for oid is version data, the oid is not a key in
-        # _oid2tid).  The point is that otherwise seeking into the
-        # storage is needed to extract the current tid, and that's
-        # an expensive operation.  For example, if a transaction
-        # stores 4000 objects, and each random seek + read takes 7ms
-        # (that was approximately true on Linux and Windows tests in
-        # mid-2003), that's 28 seconds just to find the old tids.
-        # TODO:  Probably better to junk this and redefine _index as mapping
-        # oid to (offset, tid) pair, via a new memory-efficient BTree type.
-        self._oid2tid = oid2tid
-        # oid->tid map to transactionally add to _oid2tid.
-        self._toid2tid = toid2tid
-        # Set of oids to transactionally delete from _oid2tid (e.g.,
-        # oids reverted by undo, or for which the most recent record
-        # becomes version data).
-        self._toid2tid_delete = toid2tid_delete
-
     def __len__(self):
         return len(self._index)
 
     def _newIndexes(self):
         # hook to use something other than builtin dict
-        return fsIndex(), {}, {}, {}, BTrees.OOBTree.OOBTree(), {}, {}
+        return fsIndex(), {}, {}, {}
 
     _saved = 0
     def _save_index(self):
@@ -409,27 +380,6 @@
             # Log the error and continue
             logger.error("Error saving index on close()", exc_info=True)
 
-    # Return tid of most recent record for oid if that's in the
-    # _oid2tid cache.  Else return None.  It's important to use this
-    # instead of indexing _oid2tid directly so that cache statistics
-    # can be logged.
-    def _get_cached_tid(self, oid):
-        self._oid2tid_nlookups += 1
-        result = self._oid2tid.get(oid)
-        if result is not None:
-            self._oid2tid_nhits += 1
-
-        # Log a msg every ~8000 tries.
-        if self._oid2tid_nlookups & 0x1fff == 0:
-            logger.log(BLATHER,
-                    "_oid2tid size %s lookups %s hits %s rate %.1f%%",
-                    len(self._oid2tid),
-                    self._oid2tid_nlookups,
-                    self._oid2tid_nhits,
-                    100.0 * self._oid2tid_nhits / self._oid2tid_nlookups)
-
-        return result
-
     def abortVersion(self, src, transaction):
         return self.commitVersion(src, '', transaction, abort=True)
 
@@ -504,7 +454,6 @@
 
             srcpos = h.vprev
             spos = p64(srcpos)
-        self._toid2tid_delete.update(current_oids)
         return self._tid, oids
 
     def getSize(self):
@@ -616,24 +565,23 @@
             if oid > self._oid:
                 self.set_max_oid(oid)
             old = self._index_get(oid, 0)
-            cached_tid = None
+            committed_tid = None
             pnv = None
             if old:
-                cached_tid = self._get_cached_tid(oid)
-                if cached_tid is None:
-                    h = self._read_data_header(old, oid)
-                    if h.version:
-                        if h.version != version:
-                            raise VersionLockError(oid, h.version)
-                        pnv = h.pnv
-                    cached_tid = h.tid
+                h = self._read_data_header(old, oid)
+                if h.version:
+                    if h.version != version:
+                        raise VersionLockError(oid, h.version)
+                    pnv = h.pnv
+                committed_tid = h.tid
 
-                if oldserial != cached_tid:
-                    rdata = self.tryToResolveConflict(oid, cached_tid,
+                if oldserial != committed_tid:
+                    rdata = self.tryToResolveConflict(oid, committed_tid,
                                                      oldserial, data)
                     if rdata is None:
                         raise POSException.ConflictError(
-                            oid=oid, serials=(cached_tid, oldserial), data=data)
+                            oid=oid, serials=(committed_tid, oldserial),
+                            data=data)
                     else:
                         data = rdata
 
@@ -651,9 +599,6 @@
                     pnv = old
                 new.setVersion(version, pnv, pv)
                 self._tvindex[version] = here
-                self._toid2tid_delete[oid] = 1
-            else:
-                self._toid2tid[oid] = self._tid
 
             self._tfile.write(new.asString())
             self._tfile.write(data)
@@ -663,7 +608,7 @@
                 raise FileStorageQuotaError(
                     "The storage quota has been exceeded.")
 
-            if old and oldserial != cached_tid:
+            if old and oldserial != committed_tid:
                 return ConflictResolution.ResolvedSerial
             else:
                 return self._tid
@@ -771,9 +716,6 @@
                     vprev = self._vindex.get(version, 0)
                 new.setVersion(version, pnv, vprev)
                 self._tvindex[version] = here
-                self._toid2tid_delete[oid] = 1
-            else:
-                self._toid2tid[oid] = serial
 
             self._tfile.write(new.asString())
 
@@ -822,8 +764,6 @@
     def _clear_temp(self):
         self._tindex.clear()
         self._tvindex.clear()
-        self._toid2tid.clear()
-        self._toid2tid_delete.clear()
         if self._tfile is not None:
             self._tfile.seek(0)
 
@@ -894,13 +834,7 @@
 
             self._index.update(self._tindex)
             self._vindex.update(self._tvindex)
-            self._oid2tid.update(self._toid2tid)
-            for oid in self._toid2tid_delete.keys():
-                try:
-                    del self._oid2tid[oid]
-                except KeyError:
-                    pass
-
+            
             # Update the number of records that we've written
             # +1 for the transaction record
             self._records_written += len(self._tindex) + 1
@@ -945,17 +879,12 @@
     def getTid(self, oid):
         self._lock_acquire()
         try:
-            result = self._get_cached_tid(oid)
-            if result is None:
-                pos = self._lookup_pos(oid)
-                h = self._read_data_header(pos, oid)
-                if h.plen == 0 and h.back == 0:
-                    # Undone creation
-                    raise POSKeyError(oid)
-                else:
-                    result = h.tid
-                    self._oid2tid[oid] = result
-            return result
+            pos = self._lookup_pos(oid)
+            h = self._read_data_header(pos, oid)
+            if h.plen == 0 and h.back == 0:
+                # Undone creation
+                raise POSKeyError(oid)
+            return h.tid
         finally:
             self._lock_release()
 
@@ -1103,10 +1032,6 @@
         tpos = self._txn_find(tid, 1)
         tindex = self._txn_undo_write(tpos)
         self._tindex.update(tindex)
-        # Arrange to clear the affected oids from the oid2tid cache.
-        # It's too painful to try to update them to correct current
-        # values instead.
-        self._toid2tid_delete.update(tindex)
         return self._tid, tindex.keys()
 
     def _txn_find(self, tid, stop_at_pack):
@@ -1343,9 +1268,7 @@
                 # OK, we're beyond the point of no return
                 os.rename(self._file_name + '.pack', self._file_name)
                 self._file = open(self._file_name, 'r+b')
-                self._initIndex(p.index, p.vindex, p.tindex, p.tvindex,
-                                p.oid2tid, p.toid2tid,
-                                p.toid2tid_delete)
+                self._initIndex(p.index, p.vindex, p.tindex, p.tvindex)
                 self._pos = opos
                 self._save_index()
             finally:



More information about the Checkins mailing list