[Checkins] SVN: zc.FileStorage/trunk/ Augmented the file-based reference storage with an in-memory lru cache
Jim Fulton
jim at zope.com
Sun Dec 7 12:41:06 EST 2008
Log message for revision 93755:
Augmented the file-based reference storage with an in-memory lru cache
to improve performance. Switched to using marshal.
Added memory logging on linux.
Remove the pack log before packing.
Only run ZODB and zc.FileStorage tests.
Changed:
U zc.FileStorage/trunk/buildout.cfg
U zc.FileStorage/trunk/src/zc/FileStorage/__init__.py
A zc.FileStorage/trunk/src/zc/FileStorage/mru.py
A zc.FileStorage/trunk/src/zc/FileStorage/mru.txt
U zc.FileStorage/trunk/src/zc/FileStorage/tests.py
-=-
Modified: zc.FileStorage/trunk/buildout.cfg
===================================================================
--- zc.FileStorage/trunk/buildout.cfg 2008-12-07 17:11:09 UTC (rev 93754)
+++ zc.FileStorage/trunk/buildout.cfg 2008-12-07 17:41:06 UTC (rev 93755)
@@ -18,6 +18,9 @@
tempfile.tempdir = os.path.abspath('tmp')
import zc.FileStorage
+ zc.FileStorage.FileReferences.cache_size = 2
+ zc.FileStorage.FileReferences.entry_size = 2
+
import ZODB.tests.VersionStorage, ZODB.tests.TransactionalUndoVersionStorage
class C: pass
ZODB.tests.VersionStorage.VersionStorage = C
@@ -28,3 +31,8 @@
del ZODB.tests.testDB.DBTests.test_removeVersionPool_while_connection_open
import ZODB.tests.testZODB
del ZODB.tests.testZODB.ZODBTests.checkVersionOnly
+
+# There's mo point in running the zeo tests, since zeo will run the
+# server in a separate process that won't see the zc.FileStorage
+# import.
+defaults = ['-s', 'ZODB', '-s', 'zc.FileStorage']
Modified: zc.FileStorage/trunk/src/zc/FileStorage/__init__.py
===================================================================
--- zc.FileStorage/trunk/src/zc/FileStorage/__init__.py 2008-12-07 17:11:09 UTC (rev 93754)
+++ zc.FileStorage/trunk/src/zc/FileStorage/__init__.py 2008-12-07 17:41:06 UTC (rev 93755)
@@ -14,12 +14,14 @@
import cPickle
import logging
+import marshal
import os
import shutil
import subprocess
import sys
-import tempfile
+import zc.FileStorage.mru
+
from ZODB.FileStorage.format import FileStorageFormatter, CorruptedDataError
from ZODB.serialize import referencesf
from ZODB.utils import p64, u64, z64
@@ -29,6 +31,7 @@
import ZODB.FileStorage
import ZODB.FileStorage.fspack
import ZODB.fsIndex
+import ZODB.TimeStamp
class OptionalSeekFile(file):
"""File that doesn't seek to current position.
@@ -77,7 +80,13 @@
stop = self._stop,
size = self.file_end,
syspath = sys.path,
+ fr_cache_size = FileReferences.cache_size,
+ fr_entry_size = FileReferences.entry_size,
))
+ for name in 'error', 'log':
+ name = self._name+'.pack'+name
+ if os.path.exists(name):
+ os.remove(name)
proc = subprocess.Popen(
(sys.executable, script),
stdin=subprocess.PIPE,
@@ -260,6 +269,10 @@
'%%(asctime)s %%(name)s %%(levelname)s %%(message)s'))
logging.getLogger().addHandler(handler)
+# The next 2 lines support testing:
+zc.FileStorage.FileReferences.cache_size = %(fr_cache_size)s
+zc.FileStorage.FileReferences.entry_size = %(fr_entry_size)s
+
try:
packer = zc.FileStorage.PackProcess(%(path)r, %(stop)r, %(size)r)
packer.pack()
@@ -297,16 +310,34 @@
self.ltid = z64
self._freecache = _freefunc(self._file)
+ logging.info('packing to %s',
+ ZODB.TimeStamp.TimeStamp(self._stop))
def _read_txn_header(self, pos, tid=None):
self._freecache(pos)
return FileStoragePacker._read_txn_header(self, pos, tid)
+ def _log_memory(self): # only on linux, oh well
+ status_path = "/proc/%s/status" % os.getpid()
+ if not os.path.exists(status_path):
+ return
+ try:
+ f = open(status_path)
+ except IOError:
+ return
+
+ for line in f:
+ for name in ('Peak', 'Size', 'RSS'):
+ if line.startswith('Vm'+name):
+ logging.info(line.strip())
+
+
def pack(self):
- logging.info('started')
do_gc = not os.path.exists(self._name+'.packnogc')
packed, index, references, packpos = self.buildPackIndex(
self._stop, self.file_end, do_gc)
+ logging.info('initial scan %s objects at %s', len(index), packpos)
+ self._log_memory()
if packed:
# nothing to do
logging.info('done, nothing to do')
@@ -320,10 +351,12 @@
index = self.gc(index, references)
+ self._log_memory()
logging.info('copy to pack time')
output = OptionalSeekFile(self._name + ".pack", "w+b")
output._freecache = _freefunc(output)
index, new_pos = self.copyToPacktime(packpos, index, output)
+ self._log_memory()
if new_pos == packpos:
# pack didn't free any data. there's no point in continuing.
self._file.close()
@@ -334,6 +367,7 @@
logging.info('copy from pack time')
self.copyFromPacktime(packpos, self.file_end, output, index)
+ self._log_memory()
# Save the index so the parent process can use it as a starting point.
f = open(self._name + ".packindex", 'wb')
@@ -347,8 +381,7 @@
def buildPackIndex(self, stop, file_end, do_gc):
index = ZODB.fsIndex.fsIndex()
- references = MemoryReferences()
- references = FileReferences(self._name)
+ references = self.ReferencesClass(self._name)
pos = 4L
packed = True
if do_gc:
@@ -562,7 +595,7 @@
class MemoryReferences:
- def __init__(self):
+ def __init__(self, path):
self.references = BTrees.LOBTree.LOBTree()
self.clear = self.references.clear
@@ -612,28 +645,33 @@
class FileReferences:
+ cache_size = 999
+ entry_size = 256
+
def __init__(self, path):
- self._tmp = tempfile.mkdtemp('.refs', dir=os.path.dirname(path))
- self._path = self._data = None
+ self._cache = zc.FileStorage.mru.MRU(self.cache_size,
+ lambda k, v: v.save())
+ path += '.refs'
+ if os.path.isdir(path):
+ shutil.rmtree(path)
+ os.mkdir(path)
+ self._tmp = path
def clear(self):
+ cache = self._cache
+ for k in cache:
+ cache[k].dirty = False
+ self._cache.clear()
shutil.rmtree(self._tmp)
def _load(self, oid):
- base, index = divmod(long(oid), 256)
- path = os.path.join(self._tmp, hex(base))
- if path != self._path:
- try:
- f = open(path, 'rb')
- except IOError:
- assert not os.path.exists(path)
- data = {}
- else:
- data = cPickle.Unpickler(f).load()
- f.close()
- self._data = data
- self._path = path
- return self._data, index
+ base, index = divmod(long(oid), self.entry_size)
+ key = hex(base)[2:-1]
+ data = self._cache.get(key)
+ if data is None:
+ data = _refdata(os.path.join(self._tmp, key))
+ self._cache[key] = data
+ return data, index
def get(self, oid):
data, index = self._load(oid)
@@ -643,12 +681,31 @@
data, index = self._load(oid)
if set(refs) != set(data.get(index, ())):
data[index] = refs
- cPickle.Pickler(open(self._path, 'wb'), 1).dump(data)
-
def rmf(self, oid):
data, index = self._load(oid)
if index in data:
del data[index]
- cPickle.Pickler(open(self._path, 'wb'), 1).dump(data)
-
+
+class _refdata(dict):
+
+ def __init__(self, path):
+ self.path = path
+ if os.path.exists(path):
+ self.update(marshal.load(open(path, 'rb')))
+ self.dirty = False
+
+ def save(self):
+ if self.dirty:
+ marshal.dump(dict(self), open(self.path, 'wb'))
+ self.dirty = False
+
+ def __setitem__(self, key, value):
+ self.dirty = True
+ dict.__setitem__(self, key, value)
+
+ def __delitem__(self, key):
+ self.dirty = True
+ dict.__delitem__(self, key)
+
+PackProcess.ReferencesClass = FileReferences
Added: zc.FileStorage/trunk/src/zc/FileStorage/mru.py
===================================================================
--- zc.FileStorage/trunk/src/zc/FileStorage/mru.py (rev 0)
+++ zc.FileStorage/trunk/src/zc/FileStorage/mru.py 2008-12-07 17:41:06 UTC (rev 93755)
@@ -0,0 +1,95 @@
+##############################################################################
+#
+# Copyright (c) Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+
+class MRU:
+
+ def __init__(self, size, evicted=lambda k, v: None):
+ assert size > 0
+ self.size = size
+ self.evicted = evicted
+ self.data = {}
+ self.top = _node()
+
+ def clear(self):
+ while self.data:
+ self.pop()
+
+ def __len__(self):
+ return len(self.data)
+
+ def __iter__(self):
+ # We van't do a generator. We have to take a snapshot, otherwise
+ # the client might do operations that would change the order!
+ result = []
+ top = node = self.top
+ while 1:
+ node = node.previous
+ if node is top:
+ break
+ result.append(node.key)
+ return iter(result)
+
+ def get(self, key, default=None):
+ node = self.data.get(key)
+ if node is None:
+ return default
+ if node.next != self.top:
+ node.unlink()
+ node.link(self.top)
+ return node.value
+
+ def __getitem__(self, key):
+ result = self.get(key, self)
+ if result is not self:
+ return result
+ raise KeyError(key)
+
+ def __setitem__(self, key, value):
+ assert value is not self
+ data = self.data
+ node = data.get(key)
+ if node is None:
+ node = _node(self.top)
+ data[key] = node
+ node.key = key
+ if len(data) > self.size:
+ self.pop()
+ node.value = value
+
+ def pop(self):
+ doomed = self.top.next
+ self.evicted(doomed.key, doomed.value)
+ del self.data[doomed.key]
+ doomed.unlink()
+
+
+class _node:
+
+ next = previous = key = value = None
+
+ def __init__(self, next=None):
+ if next is None:
+ next = self
+ self.link(next)
+
+ def link(self, next):
+ self.next = next
+ self.previous = next.previous
+ next.previous = self
+ self.previous.next = self
+
+ def unlink(self):
+ self.next.previous = self.previous
+ self.previous.next = self.next
+
Property changes on: zc.FileStorage/trunk/src/zc/FileStorage/mru.py
___________________________________________________________________
Added: svn:keywords
+ Id
Added: svn:eol-style
+ native
Added: zc.FileStorage/trunk/src/zc/FileStorage/mru.txt
===================================================================
--- zc.FileStorage/trunk/src/zc/FileStorage/mru.txt (rev 0)
+++ zc.FileStorage/trunk/src/zc/FileStorage/mru.txt 2008-12-07 17:41:06 UTC (rev 93755)
@@ -0,0 +1,46 @@
+Simple most-recently-used cache
+===============================
+
+An mru cache is a simple mapping object that has a limited size. To
+create an mru cache, we call the MRU constructor passing a size an an
+optional eviction callback. The eviscion callback is called just
+before an item is evicted.
+
+ >>> def evicted(key, value):
+ ... print 'evicted', key, value
+ >>> from zc.FileStorage.mru import MRU
+ >>> cache = MRU(5, evicted)
+ >>> len(cache), list(cache)
+ (0, [])
+
+We add items to the cache as we would any mapping object:
+
+ >>> cache[1] = 'one'
+ >>> cache[2] = 'two'
+ >>> cache[3] = 'three'
+ >>> cache[4] = 'four'
+ >>> cache[1]
+ 'one'
+ >>> cache.get(3)
+ 'three'
+
+ >>> len(cache), list(cache)
+ (4, [3, 1, 4, 2])
+
+Note the order of the keys. 3 and 1 are first because we accessed them most
+recently. 4 is next because it was added last and an add counts as an
+access.
+
+Let's add some more values:
+
+ >>> cache[5] = 'five'
+ >>> cache[6] = 'six'
+ evicted 2 two
+ >>> cache[7] = 'seven'
+ evicted 4 four
+
+ >>> len(cache), list(cache)
+ (5, [7, 6, 5, 3, 1])
+
+ >>> cache.get(4)
+
Property changes on: zc.FileStorage/trunk/src/zc/FileStorage/mru.txt
___________________________________________________________________
Added: svn:eol-style
+ native
Modified: zc.FileStorage/trunk/src/zc/FileStorage/tests.py
===================================================================
--- zc.FileStorage/trunk/src/zc/FileStorage/tests.py 2008-12-07 17:11:09 UTC (rev 93754)
+++ zc.FileStorage/trunk/src/zc/FileStorage/tests.py 2008-12-07 17:41:06 UTC (rev 93755)
@@ -22,6 +22,7 @@
import os
import unittest
+from zope.testing import doctest
from ZODB.tests.testFileStorage import * # :-P
from ZODB.tests.PackableStorage import * # :-P
@@ -137,7 +138,11 @@
def checkPackWithGCOnDestinationAfterRestore(self):
pass
+ def checkPackWithMultiDatabaseReferences(self):
+ pass
+
def test_suite():
- return unittest.TestSuite(unittest.makeSuite(NoGCFileStorageTests, "check"))
-
-
+ suite = unittest.TestSuite()
+ suite.addTest(unittest.makeSuite(NoGCFileStorageTests, "check"))
+ suite.addTest(doctest.DocFileSuite('mru.txt'))
+ return suite
More information about the Checkins
mailing list