[Checkins] SVN: zc.FileStorage/branches/jim-dev/ Initial slash and burn to remove old gc support.
Jim Fulton
jim at zope.com
Mon Oct 5 06:46:18 EDT 2009
Log message for revision 104789:
Initial slash and burn to remove old gc support.
Changed:
U zc.FileStorage/branches/jim-dev/buildout.cfg
D zc.FileStorage/branches/jim-dev/src/zc/FileStorage/_ILBTree.c
U zc.FileStorage/branches/jim-dev/src/zc/FileStorage/__init__.py
D zc.FileStorage/branches/jim-dev/src/zc/FileStorage/mru.py
D zc.FileStorage/branches/jim-dev/src/zc/FileStorage/mru.txt
U zc.FileStorage/branches/jim-dev/src/zc/FileStorage/tests.py
-=-
Modified: zc.FileStorage/branches/jim-dev/buildout.cfg
===================================================================
--- zc.FileStorage/branches/jim-dev/buildout.cfg 2009-10-05 10:19:00 UTC (rev 104788)
+++ zc.FileStorage/branches/jim-dev/buildout.cfg 2009-10-05 10:46:18 UTC (rev 104789)
@@ -1,5 +1,5 @@
[buildout]
-develop = . 3.8
+develop = .
parts = py test
[py]
@@ -18,8 +18,6 @@
tempfile.tempdir = os.path.abspath('tmp')
import zc.FileStorage
- zc.FileStorage.FileReferences.cache_size = 2
- zc.FileStorage.FileReferences.entry_size = 2
import ZODB.tests.VersionStorage, ZODB.tests.TransactionalUndoVersionStorage
class C: pass
Deleted: zc.FileStorage/branches/jim-dev/src/zc/FileStorage/_ILBTree.c
===================================================================
--- zc.FileStorage/branches/jim-dev/src/zc/FileStorage/_ILBTree.c 2009-10-05 10:19:00 UTC (rev 104788)
+++ zc.FileStorage/branches/jim-dev/src/zc/FileStorage/_ILBTree.c 2009-10-05 10:46:18 UTC (rev 104789)
@@ -1,67 +0,0 @@
-/*############################################################################
-#
-# Copyright (c) 2004 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE.
-#
-############################################################################*/
-
-#define MASTER_ID "$Id: _IIBTree.c 25186 2004-06-02 15:07:33Z jim $\n"
-
-/* IIBTree - int key, int value BTree
-
- Implements a collection using int type keys
- and int type values
-*/
-
-/* Setup template macros */
-
-#define PERSISTENT
-
-#define MOD_NAME_PREFIX "IL"
-#define INITMODULE init_ILBTree
-#define DEFAULT_MAX_BUCKET_SIZE 120
-#define DEFAULT_MAX_BTREE_SIZE 500
-
-#include "BTrees/intkeymacros.h"
-
-
-#define VALUEMACROS_H "$Id:$\n"
-
-#define NEED_LONG_LONG_SUPPORT
-#define VALUE_TYPE PY_LONG_LONG
-#define VALUE_PARSE "L"
-#define COPY_VALUE_TO_OBJECT(O, K) O=longlong_as_object(K)
-#define COPY_VALUE_FROM_ARG(TARGET, ARG, STATUS) \
- if (PyInt_Check(ARG)) TARGET=PyInt_AS_LONG(ARG); else \
- if (longlong_check(ARG)) TARGET=PyLong_AsLongLong(ARG); else \
- if (PyLong_Check(ARG)) { \
- PyErr_SetString(PyExc_ValueError, "long integer out of range"); \
- (STATUS)=0; (TARGET)=0; } \
- else { \
- PyErr_SetString(PyExc_TypeError, "expected integer value"); \
- (STATUS)=0; (TARGET)=0; }
-
-
-#undef VALUE_TYPE_IS_PYOBJECT
-#define TEST_VALUE(K, T) (((K) < (T)) ? -1 : (((K) > (T)) ? 1: 0))
-#define VALUE_SAME(VALUE, TARGET) ( (VALUE) == (TARGET) )
-#define DECLARE_VALUE(NAME) VALUE_TYPE NAME
-#define DECREF_VALUE(k)
-#define INCREF_VALUE(k)
-#define COPY_VALUE(V, E) (V=(E))
-
-#define NORMALIZE_VALUE(V, MIN) ((MIN) > 0) ? ((V)/=(MIN)) : 0
-
-#define MERGE_DEFAULT 1
-#define MERGE(O1, w1, O2, w2) ((O1)*(w1)+(O2)*(w2))
-#define MERGE_WEIGHT(O, w) ((O)*(w))
-
-
-#include "BTrees/BTreeModuleTemplate.c"
Modified: zc.FileStorage/branches/jim-dev/src/zc/FileStorage/__init__.py
===================================================================
--- zc.FileStorage/branches/jim-dev/src/zc/FileStorage/__init__.py 2009-10-05 10:19:00 UTC (rev 104788)
+++ zc.FileStorage/branches/jim-dev/src/zc/FileStorage/__init__.py 2009-10-05 10:46:18 UTC (rev 104789)
@@ -14,36 +14,19 @@
import cPickle
import logging
-import marshal
import os
-import shutil
import subprocess
import sys
-import zc.FileStorage.mru
-
from ZODB.FileStorage.format import FileStorageFormatter, CorruptedDataError
-from ZODB.serialize import referencesf
from ZODB.utils import p64, u64, z64
from ZODB.FileStorage.format import TRANS_HDR_LEN
-import BTrees.IOBTree, BTrees.LOBTree, _ILBTree
import ZODB.FileStorage
import ZODB.FileStorage.fspack
import ZODB.fsIndex
import ZODB.TimeStamp
-class OptionalSeekFile(file):
- """File that doesn't seek to current position.
-
- This is to try to avoid gobs of system calls.
- """
-
- def seek(self, pos, whence=0):
- if whence or (pos != self.tell()):
- file.seek(self, pos, whence)
-
-
class FileStoragePacker(FileStorageFormatter):
def __init__(self, path, stop, la, lr, cla, clr, current_size):
@@ -52,12 +35,8 @@
# proceed in parallel. It's important to close this file at every
# return point, else on Windows the caller won't be able to rename
# or remove the storage file.
+ self._file = open(path, "rb")
- # We set the buffer quite high (32MB) to try to reduce seeks
- # when the storage is disk is doing other io
-
- self._file = OptionalSeekFile(path, "rb")
-
self._stop = stop
self.locked = 0
self.file_end = current_size
@@ -73,15 +52,13 @@
self.ltid = z64
def pack(self):
-
+
script = self._name+'.packscript'
open(script, 'w').write(pack_script_template % dict(
path = self._name,
stop = self._stop,
size = self.file_end,
syspath = sys.path,
- fr_cache_size = FileReferences.cache_size,
- fr_entry_size = FileReferences.entry_size,
))
for name in 'error', 'log':
name = self._name+'.pack'+name
@@ -93,8 +70,8 @@
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
close_fds=True,
)
-
+
proc.stdin.close()
out = proc.stdout.read()
if proc.wait():
@@ -178,7 +155,7 @@
th = self._read_txn_header(input_pos)
if release is not None:
release()
-
+
output_tpos = output.tell()
copier.setTxnPos(output_tpos)
output.write(th.asString())
@@ -230,9 +207,9 @@
data, tid = self._loadBackTxn(oid, back, 0)
return data
-sys.modules['ZODB.FileStorage.FileStorage'
- ].FileStoragePacker = FileStoragePacker
-ZODB.FileStorage.FileStorage.supportsVersions = lambda self: False
+# sys.modules['ZODB.FileStorage.FileStorage'
+# ].FileStoragePacker = FileStoragePacker
+# ZODB.FileStorage.FileStorage.supportsVersions = lambda self: False
class PackCopier(ZODB.FileStorage.fspack.PackCopier):
@@ -269,10 +246,6 @@
'%%(asctime)s %%(name)s %%(levelname)s %%(message)s'))
logging.getLogger().addHandler(handler)
-# The next 2 lines support testing:
-zc.FileStorage.FileReferences.cache_size = %(fr_cache_size)s
-zc.FileStorage.FileReferences.entry_size = %(fr_entry_size)s
-
try:
packer = zc.FileStorage.PackProcess(%(path)r, %(stop)r, %(size)r)
packer.pack()
@@ -299,7 +272,7 @@
# We set the buffer quite high (32MB) to try to reduce seeks
# when the storage is disk is doing other io
-
+
self._file = OptionalSeekFile(path, "rb")
self._name = path
@@ -330,12 +303,10 @@
for name in ('Peak', 'Size', 'RSS'):
if line.startswith('Vm'+name):
logging.info(line.strip())
-
+
def pack(self):
- do_gc = not os.path.exists(self._name+'.packnogc')
- packed, index, references, packpos = self.buildPackIndex(
- self._stop, self.file_end, do_gc)
+ packed, index, packpos = self.buildPackIndex(self._stop, self.file_end)
logging.info('initial scan %s objects at %s', len(index), packpos)
self._log_memory()
if packed:
@@ -344,13 +315,6 @@
self._file.close()
return
- if do_gc:
- logging.info('read to end for gc')
- self.updateReferences(references, packpos, self.file_end)
- logging.info('gc')
- index = self.gc(index, references)
-
-
self._log_memory()
logging.info('copy to pack time')
output = OptionalSeekFile(self._name + ".pack", "w+b")
@@ -379,16 +343,11 @@
self._file.close()
- def buildPackIndex(self, stop, file_end, do_gc):
+ def buildPackIndex(self, stop, file_end):
index = ZODB.fsIndex.fsIndex()
- references = self.ReferencesClass(self._name)
pos = 4L
packed = True
- if do_gc:
- update_refs = self._update_refs
- else:
- update_refs = lambda dh, references: None
-
+
while pos < file_end:
th = self._read_txn_header(pos)
if th.tid > stop:
@@ -407,7 +366,6 @@
if dh.version:
self.fail(pos, "Versions are not supported")
index[dh.oid] = pos
- update_refs(dh, references)
pos += dh.recordlen()
tlen = self._read_num(pos)
@@ -417,84 +375,8 @@
tlen, th.tlen)
pos += 8
- return packed, index, references, pos
+ return packed, index, pos
- def updateReferences(self, references, pos, file_end):
-
- # Note that we don't update an index in this step. This is
- # because we don't care about objects created after the pack
- # time. We'll add those in a later phase. We only care about
- # references to existing objects.
-
- while pos < file_end:
- th = self._read_txn_header(pos)
- self.checkTxn(th, pos)
-
- tpos = pos
- end = pos + th.tlen
- pos += th.headerlen()
-
- while pos < end:
- dh = self._read_data_header(pos)
- self.checkData(th, tpos, dh, pos)
- if dh.version:
- self.fail(pos, "Versions are not supported")
- self._update_refs(dh, references, 1)
- pos += dh.recordlen()
-
- tlen = self._read_num(pos)
- if tlen != th.tlen:
- self.fail(pos, "redundant transaction length does not "
- "match initial transaction length: %d != %d",
- tlen, th.tlen)
- pos += 8
-
- def _update_refs(self, dh, references, merge=False):
- oid = u64(dh.oid)
-
- # Chase backpointers until we get to the record with the refs
- while dh.back:
- dh = self._read_data_header(dh.back)
-
- if dh.plen:
- refs = referencesf(self._file.read(dh.plen))
- if refs:
- if merge:
- initial = references.get(oid)
- if initial:
- refs = set(refs)
- refs.update(initial)
- refs = list(refs)
- references[oid] = refs
- return
-
- if not merge:
- references.rmf(oid)
-
- def gc(self, index, references):
- to_do = BTrees.LOBTree.TreeSet([0])
- reachable = ZODB.fsIndex.fsIndex()
- while to_do:
- ioid = to_do.maxKey()
- to_do.remove(ioid)
- oid = p64(ioid)
- if oid in reachable:
- continue
-
- # Note that the references include references made
- # after the pack time. These include references to
- # objects created after the pack time, which won't be
- # in the index.
- reachable[oid] = index.get(oid, 0)
-
- for ref in references.get(ioid):
- iref = u64(ref)
- if (iref not in to_do) and (ref not in reachable):
- to_do.insert(iref)
-
- references.clear()
- return reachable
-
def copyToPacktime(self, packpos, index, output):
pos = new_pos = self._metadata_size
self._file.seek(0)
@@ -556,8 +438,8 @@
output.seek(new_pos)
output._freecache(new_pos)
-
+
pos += 8
return new_index, new_pos
@@ -591,125 +473,3 @@
_zc_FileStorage_posix_fadvise.POSIX_FADV_DONTNEED)
return _free
-
-
-class MemoryReferences:
-
- def __init__(self, path):
- self.references = BTrees.LOBTree.LOBTree()
- self.clear = self.references.clear
-
- def get(self, oid):
- references = self.references
- ioid1, ioid2 = divmod(oid, 2147483648L)
-
- references_ioid1 = references.get(ioid1)
- if not references_ioid1:
- return ()
-
- ioid2 = int(ioid2)
- result = references_ioid1[0].get(ioid2)
- if result:
- return [p64(result)]
- return references_ioid1[1].get(ioid2, ())
-
- def __setitem__(self, oid, refs):
- references = self.references
- ioid1, ioid2 = divmod(oid, 2147483648L)
- ioid2 = int(ioid2)
- references_ioid1 = references.get(ioid1)
- if references_ioid1 is None:
- references_ioid1 = references[ioid1] = (
- _ILBTree.ILBTree(), # {ioid2 -> single_referenced_oid}
- BTrees.IOBTree.IOBTree() # {ioid2 -> referenced_oids}
- )
-
- if len(refs) == 1:
- references_ioid1[0][ioid2] = u64(refs.pop())
- references_ioid1[1].pop(ioid2, None)
- else:
- references_ioid1[1][ioid2] = refs
- references_ioid1[0].pop(ioid2, None)
-
- def rmf(self, oid):
- # Remove the oid, if present
- ioid1, ioid2 = divmod(oid, 2147483648L)
- references_ioid1 = self.references.get(ioid1)
- if not references_ioid1:
- return
-
- ioid2 = int(ioid2)
- if references_ioid1[0].pop(ioid2, None) is None:
- references_ioid1[1].pop(ioid2, None)
-
-def _rmtree_onerror(func, path, exc_info):
- if os.path.exists(path):
- raise exc_info[0], exc_info[1], exc_info[2]
- logging.info('burp removing %s', path)
-
-class FileReferences:
-
- cache_size = 999
- entry_size = 256
-
- def __init__(self, path):
- self._cache = zc.FileStorage.mru.MRU(self.cache_size,
- lambda k, v: v.save())
- path += '.refs'
- if os.path.isdir(path):
- shutil.rmtree(path, onerror=_rmtree_onerror)
- os.mkdir(path)
- self._tmp = path
-
- def clear(self):
- cache = self._cache
- for k in cache:
- cache[k].dirty = False
- self._cache.clear()
- shutil.rmtree(self._tmp, onerror=_rmtree_onerror)
-
- def _load(self, oid):
- base, index = divmod(long(oid), self.entry_size)
- key = hex(base)[2:-1]
- data = self._cache.get(key)
- if data is None:
- data = _refdata(os.path.join(self._tmp, key))
- self._cache[key] = data
- return data, index
-
- def get(self, oid):
- data, index = self._load(oid)
- return data.get(index, ())
-
- def __setitem__(self, oid, refs):
- data, index = self._load(oid)
- if set(refs) != set(data.get(index, ())):
- data[index] = refs
-
- def rmf(self, oid):
- data, index = self._load(oid)
- if index in data:
- del data[index]
-
-class _refdata(dict):
-
- def __init__(self, path):
- self.path = path
- if os.path.exists(path):
- self.update(marshal.load(open(path, 'rb')))
- self.dirty = False
-
- def save(self):
- if self.dirty:
- marshal.dump(dict(self), open(self.path, 'wb'))
- self.dirty = False
-
- def __setitem__(self, key, value):
- self.dirty = True
- dict.__setitem__(self, key, value)
-
- def __delitem__(self, key):
- self.dirty = True
- dict.__delitem__(self, key)
-
-PackProcess.ReferencesClass = FileReferences
Deleted: zc.FileStorage/branches/jim-dev/src/zc/FileStorage/mru.py
===================================================================
--- zc.FileStorage/branches/jim-dev/src/zc/FileStorage/mru.py 2009-10-05 10:19:00 UTC (rev 104788)
+++ zc.FileStorage/branches/jim-dev/src/zc/FileStorage/mru.py 2009-10-05 10:46:18 UTC (rev 104789)
@@ -1,95 +0,0 @@
-##############################################################################
-#
-# Copyright (c) Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE.
-#
-##############################################################################
-
-class MRU:
-
- def __init__(self, size, evicted=lambda k, v: None):
- assert size > 0
- self.size = size
- self.evicted = evicted
- self.data = {}
- self.top = _node()
-
- def clear(self):
- while self.data:
- self.pop()
-
- def __len__(self):
- return len(self.data)
-
- def __iter__(self):
- # We van't do a generator. We have to take a snapshot, otherwise
- # the client might do operations that would change the order!
- result = []
- top = node = self.top
- while 1:
- node = node.previous
- if node is top:
- break
- result.append(node.key)
- return iter(result)
-
- def get(self, key, default=None):
- node = self.data.get(key)
- if node is None:
- return default
- if node.next != self.top:
- node.unlink()
- node.link(self.top)
- return node.value
-
- def __getitem__(self, key):
- result = self.get(key, self)
- if result is not self:
- return result
- raise KeyError(key)
-
- def __setitem__(self, key, value):
- assert value is not self
- data = self.data
- node = data.get(key)
- if node is None:
- node = _node(self.top)
- data[key] = node
- node.key = key
- if len(data) > self.size:
- self.pop()
- node.value = value
-
- def pop(self):
- doomed = self.top.next
- self.evicted(doomed.key, doomed.value)
- del self.data[doomed.key]
- doomed.unlink()
-
-
-class _node:
-
- next = previous = key = value = None
-
- def __init__(self, next=None):
- if next is None:
- next = self
- self.link(next)
-
- def link(self, next):
- self.next = next
- self.previous = next.previous
- next.previous = self
- self.previous.next = self
-
- def unlink(self):
- self.next.previous = self.previous
- self.previous.next = self.next
-
Deleted: zc.FileStorage/branches/jim-dev/src/zc/FileStorage/mru.txt
===================================================================
--- zc.FileStorage/branches/jim-dev/src/zc/FileStorage/mru.txt 2009-10-05 10:19:00 UTC (rev 104788)
+++ zc.FileStorage/branches/jim-dev/src/zc/FileStorage/mru.txt 2009-10-05 10:46:18 UTC (rev 104789)
@@ -1,46 +0,0 @@
-Simple most-recently-used cache
-===============================
-
-An mru cache is a simple mapping object that has a limited size. To
-create an mru cache, we call the MRU constructor passing a size an an
-optional eviction callback. The eviscion callback is called just
-before an item is evicted.
-
- >>> def evicted(key, value):
- ... print 'evicted', key, value
- >>> from zc.FileStorage.mru import MRU
- >>> cache = MRU(5, evicted)
- >>> len(cache), list(cache)
- (0, [])
-
-We add items to the cache as we would any mapping object:
-
- >>> cache[1] = 'one'
- >>> cache[2] = 'two'
- >>> cache[3] = 'three'
- >>> cache[4] = 'four'
- >>> cache[1]
- 'one'
- >>> cache.get(3)
- 'three'
-
- >>> len(cache), list(cache)
- (4, [3, 1, 4, 2])
-
-Note the order of the keys. 3 and 1 are first because we accessed them most
-recently. 4 is next because it was added last and an add counts as an
-access.
-
-Let's add some more values:
-
- >>> cache[5] = 'five'
- >>> cache[6] = 'six'
- evicted 2 two
- >>> cache[7] = 'seven'
- evicted 4 four
-
- >>> len(cache), list(cache)
- (5, [7, 6, 5, 3, 1])
-
- >>> cache.get(4)
-
Modified: zc.FileStorage/branches/jim-dev/src/zc/FileStorage/tests.py
===================================================================
--- zc.FileStorage/branches/jim-dev/src/zc/FileStorage/tests.py 2009-10-05 10:19:00 UTC (rev 104788)
+++ zc.FileStorage/branches/jim-dev/src/zc/FileStorage/tests.py 2009-10-05 10:46:18 UTC (rev 104789)
@@ -144,5 +144,4 @@
def test_suite():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(NoGCFileStorageTests, "check"))
- suite.addTest(doctest.DocFileSuite('mru.txt'))
return suite
More information about the checkins
mailing list