[Checkins] SVN: ZODB/trunk/src/ZODB/FileStorage/FileStorage.py Removed the transaction id cache.

Jim Fulton jim at zope.com
Wed Sep 17 16:35:47 EDT 2008


Log message for revision 91215:
  Removed the transaction id cache.
  

Changed:
  U   ZODB/trunk/src/ZODB/FileStorage/FileStorage.py

-=-
Modified: ZODB/trunk/src/ZODB/FileStorage/FileStorage.py
===================================================================
--- ZODB/trunk/src/ZODB/FileStorage/FileStorage.py	2008-09-17 20:35:43 UTC (rev 91214)
+++ ZODB/trunk/src/ZODB/FileStorage/FileStorage.py	2008-09-17 20:35:47 UTC (rev 91215)
@@ -44,8 +44,6 @@
 from ZODB.loglevels import BLATHER
 from ZODB.fsIndex import fsIndex
 
-import BTrees.OOBTree
-
 packed_version = "FS21"
 
 logger = logging.getLogger('ZODB.FileStorage')
@@ -125,8 +123,8 @@
 
         BaseStorage.BaseStorage.__init__(self, file_name)
 
-        (index, tindex, oid2tid, toid2tid, toid2tid_delete) = self._newIndexes()
-        self._initIndex(index, tindex, oid2tid, toid2tid, toid2tid_delete)
+        index, tindex = self._newIndexes()
+        self._initIndex(index, tindex)
 
         # Now open the file
 
@@ -160,7 +158,7 @@
             self._used_index = 1 # Marker for testing
             index, start, ltid = r
 
-            self._initIndex(index, tindex, oid2tid, toid2tid, toid2tid_delete)
+            self._initIndex(index, tindex)
             self._pos, self._oid, tid = read_index(
                 self._file, file_name, index, tindex, stop,
                 ltid=ltid, start=start, read_only=read_only,
@@ -194,37 +192,17 @@
 
         self._quota = quota
 
-        # tid cache statistics.
-        self._oid2tid_nlookups = self._oid2tid_nhits = 0
-
-    def _initIndex(self, index, tindex, oid2tid, toid2tid, toid2tid_delete):
+    def _initIndex(self, index, tindex):
         self._index=index
         self._tindex=tindex
         self._index_get=index.get
 
-        # .store() needs to compare the passed-in serial to the
-        # current tid in the database.  _oid2tid caches the oid ->
-        # current tid mapping. The point is that otherwise seeking into the
-        # storage is needed to extract the current tid, and that's
-        # an expensive operation.  For example, if a transaction
-        # stores 4000 objects, and each random seek + read takes 7ms
-        # (that was approximately true on Linux and Windows tests in
-        # mid-2003), that's 28 seconds just to find the old tids.
-        # TODO:  Probably better to junk this and redefine _index as mapping
-        # oid to (offset, tid) pair, via a new memory-efficient BTree type.
-        self._oid2tid = oid2tid
-        # oid->tid map to transactionally add to _oid2tid.
-        self._toid2tid = toid2tid
-        # Set of oids to transactionally delete from _oid2tid (e.g.,
-        # oids reverted by undo).
-        self._toid2tid_delete = toid2tid_delete
-
     def __len__(self):
         return len(self._index)
 
     def _newIndexes(self):
         # hook to use something other than builtin dict
-        return fsIndex(), {}, BTrees.OOBTree.OOBTree(), {}, {}
+        return fsIndex(), {}
 
     _saved = 0
     def _save_index(self):
@@ -394,27 +372,6 @@
             # Log the error and continue
             logger.error("Error saving index on close()", exc_info=True)
 
-    # Return tid of most recent record for oid if that's in the
-    # _oid2tid cache.  Else return None.  It's important to use this
-    # instead of indexing _oid2tid directly so that cache statistics
-    # can be logged.
-    def _get_cached_tid(self, oid):
-        self._oid2tid_nlookups += 1
-        result = self._oid2tid.get(oid)
-        if result is not None:
-            self._oid2tid_nhits += 1
-
-        # Log a msg every ~8000 tries.
-        if self._oid2tid_nlookups & 0x1fff == 0:
-            logger.log(BLATHER,
-                    "_oid2tid size %s lookups %s hits %s rate %.1f%%",
-                    len(self._oid2tid),
-                    self._oid2tid_nlookups,
-                    self._oid2tid_nhits,
-                    100.0 * self._oid2tid_nhits / self._oid2tid_nlookups)
-
-        return result
-
     def getSize(self):
         return self._pos
 
@@ -499,20 +456,19 @@
             if oid > self._oid:
                 self.set_max_oid(oid)
             old = self._index_get(oid, 0)
-            cached_tid = None
+            committed_tid = None
             pnv = None
             if old:
-                cached_tid = self._get_cached_tid(oid)
-                if cached_tid is None:
-                    h = self._read_data_header(old, oid)
-                    cached_tid = h.tid
+                h = self._read_data_header(old, oid)
+                committed_tid = h.tid
 
-                if oldserial != cached_tid:
-                    rdata = self.tryToResolveConflict(oid, cached_tid,
+                if oldserial != committed_tid:
+                    rdata = self.tryToResolveConflict(oid, committed_tid,
                                                      oldserial, data)
                     if rdata is None:
                         raise POSException.ConflictError(
-                            oid=oid, serials=(cached_tid, oldserial), data=data)
+                            oid=oid, serials=(committed_tid, oldserial),
+                            data=data)
                     else:
                         data = rdata
 
@@ -521,8 +477,6 @@
             self._tindex[oid] = here
             new = DataHeader(oid, self._tid, old, pos, 0, len(data))
 
-            self._toid2tid[oid] = self._tid
-
             self._tfile.write(new.asString())
             self._tfile.write(data)
 
@@ -531,7 +485,7 @@
                 raise FileStorageQuotaError(
                     "The storage quota has been exceeded.")
 
-            if old and oldserial != cached_tid:
+            if old and oldserial != committed_tid:
                 return ConflictResolution.ResolvedSerial
             else:
                 return self._tid
@@ -634,7 +588,6 @@
 
             # Write the recovery data record
             new = DataHeader(oid, serial, old, self._pos, 0, dlen)
-            self._toid2tid[oid] = serial
 
             self._tfile.write(new.asString())
 
@@ -656,8 +609,6 @@
 
     def _clear_temp(self):
         self._tindex.clear()
-        self._toid2tid.clear()
-        self._toid2tid_delete.clear()
         if self._tfile is not None:
             self._tfile.seek(0)
 
@@ -727,13 +678,7 @@
             self._pos = nextpos
 
             self._index.update(self._tindex)
-            self._oid2tid.update(self._toid2tid)
-            for oid in self._toid2tid_delete.keys():
-                try:
-                    del self._oid2tid[oid]
-                except KeyError:
-                    pass
-
+            
             # Update the number of records that we've written
             # +1 for the transaction record
             self._records_written += len(self._tindex) + 1
@@ -778,17 +723,12 @@
     def getTid(self, oid):
         self._lock_acquire()
         try:
-            result = self._get_cached_tid(oid)
-            if result is None:
-                pos = self._lookup_pos(oid)
-                h = self._read_data_header(pos, oid)
-                if h.plen == 0 and h.back == 0:
-                    # Undone creation
-                    raise POSKeyError(oid)
-                else:
-                    result = h.tid
-                    self._oid2tid[oid] = result
-            return result
+            pos = self._lookup_pos(oid)
+            h = self._read_data_header(pos, oid)
+            if h.plen == 0 and h.back == 0:
+                # Undone creation
+                raise POSKeyError(oid)
+            return h.tid
         finally:
             self._lock_release()
 
@@ -925,10 +865,6 @@
         tpos = self._txn_find(tid, 1)
         tindex = self._txn_undo_write(tpos)
         self._tindex.update(tindex)
-        # Arrange to clear the affected oids from the oid2tid cache.
-        # It's too painful to try to update them to correct current
-        # values instead.
-        self._toid2tid_delete.update(tindex)
         return self._tid, tindex.keys()
 
     def _txn_find(self, tid, stop_at_pack):
@@ -1098,9 +1034,7 @@
                 # OK, we're beyond the point of no return
                 os.rename(self._file_name + '.pack', self._file_name)
                 self._file = open(self._file_name, 'r+b')
-                self._initIndex(p.index, p.tindex,
-                                p.oid2tid, p.toid2tid,
-                                p.toid2tid_delete)
+                self._initIndex(p.index, p.tindex)
                 self._pos = opos
                 self._save_index()
             finally:



More information about the Checkins mailing list