[Checkins] SVN: zc.FileStorage/trunk/ Augmented the file-based reference storage with an in-memory lru cache

Jim Fulton jim at zope.com
Sun Dec 7 12:41:06 EST 2008


Log message for revision 93755:
  Augmented the file-based reference storage with an in-memory lru cache
  to improve performance. Switched to using marshal.
  
  Added memory logging on linux.
  
  Remove the pack log before packing.
  
  Only run ZODB and zc.FileStorage tests.
  

Changed:
  U   zc.FileStorage/trunk/buildout.cfg
  U   zc.FileStorage/trunk/src/zc/FileStorage/__init__.py
  A   zc.FileStorage/trunk/src/zc/FileStorage/mru.py
  A   zc.FileStorage/trunk/src/zc/FileStorage/mru.txt
  U   zc.FileStorage/trunk/src/zc/FileStorage/tests.py

-=-
Modified: zc.FileStorage/trunk/buildout.cfg
===================================================================
--- zc.FileStorage/trunk/buildout.cfg	2008-12-07 17:11:09 UTC (rev 93754)
+++ zc.FileStorage/trunk/buildout.cfg	2008-12-07 17:41:06 UTC (rev 93755)
@@ -18,6 +18,9 @@
   tempfile.tempdir = os.path.abspath('tmp')
 
   import zc.FileStorage
+  zc.FileStorage.FileReferences.cache_size = 2
+  zc.FileStorage.FileReferences.entry_size = 2
+
   import ZODB.tests.VersionStorage, ZODB.tests.TransactionalUndoVersionStorage
   class C: pass
   ZODB.tests.VersionStorage.VersionStorage = C
@@ -28,3 +31,8 @@
   del ZODB.tests.testDB.DBTests.test_removeVersionPool_while_connection_open
   import ZODB.tests.testZODB
   del ZODB.tests.testZODB.ZODBTests.checkVersionOnly
+
+# There's mo point in running the zeo tests, since zeo will run the
+# server in a separate process that won't see the zc.FileStorage
+# import.
+defaults = ['-s', 'ZODB', '-s', 'zc.FileStorage']

Modified: zc.FileStorage/trunk/src/zc/FileStorage/__init__.py
===================================================================
--- zc.FileStorage/trunk/src/zc/FileStorage/__init__.py	2008-12-07 17:11:09 UTC (rev 93754)
+++ zc.FileStorage/trunk/src/zc/FileStorage/__init__.py	2008-12-07 17:41:06 UTC (rev 93755)
@@ -14,12 +14,14 @@
 
 import cPickle
 import logging
+import marshal
 import os
 import shutil
 import subprocess
 import sys
-import tempfile
 
+import zc.FileStorage.mru
+
 from ZODB.FileStorage.format import FileStorageFormatter, CorruptedDataError
 from ZODB.serialize import referencesf
 from ZODB.utils import p64, u64, z64
@@ -29,6 +31,7 @@
 import ZODB.FileStorage
 import ZODB.FileStorage.fspack
 import ZODB.fsIndex
+import ZODB.TimeStamp
 
 class OptionalSeekFile(file):
     """File that doesn't seek to current position.
@@ -77,7 +80,13 @@
             stop = self._stop,
             size = self.file_end,
             syspath = sys.path,
+            fr_cache_size = FileReferences.cache_size,
+            fr_entry_size = FileReferences.entry_size,
             ))
+        for name in 'error', 'log':
+            name = self._name+'.pack'+name
+            if os.path.exists(name):
+                os.remove(name)
         proc = subprocess.Popen(
             (sys.executable, script),
             stdin=subprocess.PIPE,
@@ -260,6 +269,10 @@
    '%%(asctime)s %%(name)s %%(levelname)s %%(message)s'))
 logging.getLogger().addHandler(handler)
 
+# The next 2 lines support testing:
+zc.FileStorage.FileReferences.cache_size = %(fr_cache_size)s
+zc.FileStorage.FileReferences.entry_size = %(fr_entry_size)s
+
 try:
     packer = zc.FileStorage.PackProcess(%(path)r, %(stop)r, %(size)r)
     packer.pack()
@@ -297,16 +310,34 @@
         self.ltid = z64
 
         self._freecache = _freefunc(self._file)
+        logging.info('packing to %s',
+                     ZODB.TimeStamp.TimeStamp(self._stop))
 
     def _read_txn_header(self, pos, tid=None):
         self._freecache(pos)
         return FileStoragePacker._read_txn_header(self, pos, tid)
 
+    def _log_memory(self): # only on linux, oh well
+        status_path = "/proc/%s/status" % os.getpid()
+        if not os.path.exists(status_path):
+            return
+        try:
+            f = open(status_path)
+        except IOError:
+            return
+
+        for line in f:
+            for name in ('Peak', 'Size', 'RSS'):
+                if line.startswith('Vm'+name):
+                    logging.info(line.strip())
+                
+
     def pack(self):
-        logging.info('started')
         do_gc = not os.path.exists(self._name+'.packnogc')
         packed, index, references, packpos = self.buildPackIndex(
             self._stop, self.file_end, do_gc)
+        logging.info('initial scan %s objects at %s', len(index), packpos)
+        self._log_memory()
         if packed:
             # nothing to do
             logging.info('done, nothing to do')
@@ -320,10 +351,12 @@
             index = self.gc(index, references)
 
         
+        self._log_memory()
         logging.info('copy to pack time')
         output = OptionalSeekFile(self._name + ".pack", "w+b")
         output._freecache = _freefunc(output)
         index, new_pos = self.copyToPacktime(packpos, index, output)
+        self._log_memory()
         if new_pos == packpos:
             # pack didn't free any data.  there's no point in continuing.
             self._file.close()
@@ -334,6 +367,7 @@
 
         logging.info('copy from pack time')
         self.copyFromPacktime(packpos, self.file_end, output, index)
+        self._log_memory()
 
         # Save the index so the parent process can use it as a starting point.
         f = open(self._name + ".packindex", 'wb')
@@ -347,8 +381,7 @@
 
     def buildPackIndex(self, stop, file_end, do_gc):
         index = ZODB.fsIndex.fsIndex()
-        references = MemoryReferences()
-        references = FileReferences(self._name)
+        references = self.ReferencesClass(self._name)
         pos = 4L
         packed = True
         if do_gc:
@@ -562,7 +595,7 @@
 
 class MemoryReferences:
 
-    def __init__(self):
+    def __init__(self, path):
         self.references = BTrees.LOBTree.LOBTree()
         self.clear = self.references.clear
 
@@ -612,28 +645,33 @@
 
 class FileReferences:
 
+    cache_size = 999
+    entry_size = 256
+
     def __init__(self, path):
-        self._tmp = tempfile.mkdtemp('.refs', dir=os.path.dirname(path))
-        self._path = self._data = None
+        self._cache = zc.FileStorage.mru.MRU(self.cache_size,
+                                             lambda k, v: v.save())
+        path += '.refs'
+        if os.path.isdir(path):
+            shutil.rmtree(path)
+        os.mkdir(path)
+        self._tmp = path
 
     def clear(self):
+        cache = self._cache
+        for k in cache:
+            cache[k].dirty = False
+        self._cache.clear()
         shutil.rmtree(self._tmp)
 
     def _load(self, oid):
-        base, index = divmod(long(oid), 256)
-        path = os.path.join(self._tmp, hex(base))
-        if path != self._path:
-            try:
-                f = open(path, 'rb')
-            except IOError:
-                assert not os.path.exists(path)
-                data = {}
-            else:
-                data = cPickle.Unpickler(f).load()
-                f.close()
-            self._data = data
-            self._path = path
-        return self._data, index
+        base, index = divmod(long(oid), self.entry_size)
+        key = hex(base)[2:-1]
+        data = self._cache.get(key)
+        if data is None:
+            data = _refdata(os.path.join(self._tmp, key))
+            self._cache[key] = data
+        return data, index
 
     def get(self, oid):
         data, index = self._load(oid)
@@ -643,12 +681,31 @@
         data, index = self._load(oid)
         if set(refs) != set(data.get(index, ())):
             data[index] = refs
-            cPickle.Pickler(open(self._path, 'wb'), 1).dump(data)
 
-
     def rmf(self, oid):
         data, index = self._load(oid)
         if index in data:
             del data[index]
-            cPickle.Pickler(open(self._path, 'wb'), 1).dump(data)
-            
+
+class _refdata(dict):
+    
+    def __init__(self, path):
+        self.path = path
+        if os.path.exists(path):
+            self.update(marshal.load(open(path, 'rb')))
+        self.dirty = False
+
+    def save(self):
+        if self.dirty:
+            marshal.dump(dict(self), open(self.path, 'wb'))
+            self.dirty = False
+
+    def __setitem__(self, key, value):
+        self.dirty = True
+        dict.__setitem__(self, key, value)
+
+    def __delitem__(self, key):
+        self.dirty = True
+        dict.__delitem__(self, key)
+
+PackProcess.ReferencesClass = FileReferences

Added: zc.FileStorage/trunk/src/zc/FileStorage/mru.py
===================================================================
--- zc.FileStorage/trunk/src/zc/FileStorage/mru.py	                        (rev 0)
+++ zc.FileStorage/trunk/src/zc/FileStorage/mru.py	2008-12-07 17:41:06 UTC (rev 93755)
@@ -0,0 +1,95 @@
+##############################################################################
+#
+# Copyright (c) Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+
+class MRU:
+
+    def __init__(self, size, evicted=lambda k, v: None):
+        assert size > 0
+        self.size = size
+        self.evicted = evicted
+        self.data = {}
+        self.top = _node()
+
+    def clear(self):
+        while self.data:
+            self.pop()
+    
+    def __len__(self):
+        return len(self.data)
+
+    def __iter__(self):
+        # We van't do a generator. We have to take a snapshot, otherwise
+        # the client might do operations that would change the order!
+        result = []
+        top = node = self.top
+        while 1:
+            node = node.previous
+            if node is top:
+                break
+            result.append(node.key)
+        return iter(result)
+
+    def get(self, key, default=None):
+        node = self.data.get(key)
+        if node is None:
+            return default
+        if node.next != self.top:
+            node.unlink()
+            node.link(self.top)
+        return node.value
+
+    def __getitem__(self, key):
+        result = self.get(key, self)
+        if result is not self:
+            return result
+        raise KeyError(key)
+
+    def __setitem__(self, key, value):
+        assert value is not self
+        data = self.data
+        node = data.get(key)
+        if node is None:
+            node = _node(self.top)
+            data[key] = node
+            node.key = key
+            if len(data) > self.size:
+                self.pop()
+        node.value = value
+
+    def pop(self):
+        doomed = self.top.next
+        self.evicted(doomed.key, doomed.value)
+        del self.data[doomed.key]
+        doomed.unlink()
+        
+
+class _node:
+
+    next = previous = key = value = None
+
+    def __init__(self, next=None):
+        if next is None:
+            next = self
+        self.link(next)
+
+    def link(self, next):
+        self.next = next
+        self.previous = next.previous
+        next.previous = self
+        self.previous.next = self
+
+    def unlink(self):
+        self.next.previous = self.previous
+        self.previous.next = self.next
+        


Property changes on: zc.FileStorage/trunk/src/zc/FileStorage/mru.py
___________________________________________________________________
Added: svn:keywords
   + Id
Added: svn:eol-style
   + native

Added: zc.FileStorage/trunk/src/zc/FileStorage/mru.txt
===================================================================
--- zc.FileStorage/trunk/src/zc/FileStorage/mru.txt	                        (rev 0)
+++ zc.FileStorage/trunk/src/zc/FileStorage/mru.txt	2008-12-07 17:41:06 UTC (rev 93755)
@@ -0,0 +1,46 @@
+Simple most-recently-used cache
+===============================
+
+An mru cache is a simple mapping object that has a limited size. To
+create an mru cache, we call the MRU constructor passing a size an an
+optional eviction callback.  The eviscion callback is called just
+before an item is evicted.
+
+    >>> def evicted(key, value):
+    ...     print 'evicted', key, value
+    >>> from zc.FileStorage.mru import MRU
+    >>> cache = MRU(5, evicted)
+    >>> len(cache), list(cache)
+    (0, [])
+
+We add items to the cache as we would any mapping object:
+
+    >>> cache[1] = 'one'
+    >>> cache[2] = 'two'
+    >>> cache[3] = 'three'
+    >>> cache[4] = 'four'
+    >>> cache[1]
+    'one'
+    >>> cache.get(3)
+    'three'
+
+    >>> len(cache), list(cache)
+    (4, [3, 1, 4, 2])
+
+Note the order of the keys. 3 and 1 are first because we accessed them most
+recently.  4 is next because it was added last and an add counts as an
+access.
+
+Let's add some more values:
+
+    >>> cache[5] = 'five'
+    >>> cache[6] = 'six'
+    evicted 2 two
+    >>> cache[7] = 'seven'
+    evicted 4 four
+
+    >>> len(cache), list(cache)
+    (5, [7, 6, 5, 3, 1])
+
+    >>> cache.get(4)
+


Property changes on: zc.FileStorage/trunk/src/zc/FileStorage/mru.txt
___________________________________________________________________
Added: svn:eol-style
   + native

Modified: zc.FileStorage/trunk/src/zc/FileStorage/tests.py
===================================================================
--- zc.FileStorage/trunk/src/zc/FileStorage/tests.py	2008-12-07 17:11:09 UTC (rev 93754)
+++ zc.FileStorage/trunk/src/zc/FileStorage/tests.py	2008-12-07 17:41:06 UTC (rev 93755)
@@ -22,6 +22,7 @@
 
 import os
 import unittest
+from zope.testing import doctest
 
 from ZODB.tests.testFileStorage import * # :-P
 from ZODB.tests.PackableStorage import * # :-P
@@ -137,7 +138,11 @@
     def checkPackWithGCOnDestinationAfterRestore(self):
         pass
 
+    def checkPackWithMultiDatabaseReferences(self):
+        pass
+
 def test_suite():
-    return unittest.TestSuite(unittest.makeSuite(NoGCFileStorageTests, "check"))
-
-    
+    suite = unittest.TestSuite()
+    suite.addTest(unittest.makeSuite(NoGCFileStorageTests, "check"))
+    suite.addTest(doctest.DocFileSuite('mru.txt'))
+    return suite



More information about the Checkins mailing list