[Checkins] SVN: zc.FileStorage/branches/jim-dev/ Initial slash and burn to remove old gc support.

Mon Oct 5 06:46:18 EDT 2009

Log message for revision 104789:
  Initial slash and burn to remove old gc support.

Changed:
  U   zc.FileStorage/branches/jim-dev/buildout.cfg
  D   zc.FileStorage/branches/jim-dev/src/zc/FileStorage/_ILBTree.c
  U   zc.FileStorage/branches/jim-dev/src/zc/FileStorage/__init__.py
  D   zc.FileStorage/branches/jim-dev/src/zc/FileStorage/mru.py
  D   zc.FileStorage/branches/jim-dev/src/zc/FileStorage/mru.txt
  U   zc.FileStorage/branches/jim-dev/src/zc/FileStorage/tests.py

-=-
Modified: zc.FileStorage/branches/jim-dev/buildout.cfg
===================================================================

--- zc.FileStorage/branches/jim-dev/buildout.cfg	2009-10-05 10:19:00 UTC (rev 104788)
+++ zc.FileStorage/branches/jim-dev/buildout.cfg	2009-10-05 10:46:18 UTC (rev 104789)
@@ -1,5 +1,5 @@
 [buildout]
-develop = . 3.8
+develop = .
 parts = py test
 
 [py]
@@ -18,8 +18,6 @@
   tempfile.tempdir = os.path.abspath('tmp')
 
   import zc.FileStorage
-  zc.FileStorage.FileReferences.cache_size = 2
-  zc.FileStorage.FileReferences.entry_size = 2
 
   import ZODB.tests.VersionStorage, ZODB.tests.TransactionalUndoVersionStorage
   class C: pass

Deleted: zc.FileStorage/branches/jim-dev/src/zc/FileStorage/_ILBTree.c
===================================================================
--- zc.FileStorage/branches/jim-dev/src/zc/FileStorage/_ILBTree.c	2009-10-05 10:19:00 UTC (rev 104788)
+++ zc.FileStorage/branches/jim-dev/src/zc/FileStorage/_ILBTree.c	2009-10-05 10:46:18 UTC (rev 104789)
@@ -1,67 +0,0 @@
-/*############################################################################
-#
-# Copyright (c) 2004 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE.
-#
-############################################################################*/
-
-#define MASTER_ID "$Id: _IIBTree.c 25186 2004-06-02 15:07:33Z jim $\n"
-
-/* IIBTree - int key, int value BTree
-
-   Implements a collection using int type keys
-   and int type values
-*/
-
-/* Setup template macros */
-
-#define PERSISTENT
-
-#define MOD_NAME_PREFIX "IL"
-#define INITMODULE init_ILBTree
-#define DEFAULT_MAX_BUCKET_SIZE 120
-#define DEFAULT_MAX_BTREE_SIZE 500
-
-#include "BTrees/intkeymacros.h"
-
-
-#define VALUEMACROS_H "$Id:$\n"
-
-#define NEED_LONG_LONG_SUPPORT
-#define VALUE_TYPE PY_LONG_LONG
-#define VALUE_PARSE "L"
-#define COPY_VALUE_TO_OBJECT(O, K) O=longlong_as_object(K)
-#define COPY_VALUE_FROM_ARG(TARGET, ARG, STATUS) \
-    if (PyInt_Check(ARG)) TARGET=PyInt_AS_LONG(ARG); else \
-        if (longlong_check(ARG)) TARGET=PyLong_AsLongLong(ARG); else \
-            if (PyLong_Check(ARG)) { \
-                PyErr_SetString(PyExc_ValueError, "long integer out of range"); \
-                (STATUS)=0; (TARGET)=0; } \
-            else { \
-            PyErr_SetString(PyExc_TypeError, "expected integer value");   \
-            (STATUS)=0; (TARGET)=0; }
-
-
-#undef VALUE_TYPE_IS_PYOBJECT
-#define TEST_VALUE(K, T) (((K) < (T)) ? -1 : (((K) > (T)) ? 1: 0)) 
-#define VALUE_SAME(VALUE, TARGET) ( (VALUE) == (TARGET) )
-#define DECLARE_VALUE(NAME) VALUE_TYPE NAME
-#define DECREF_VALUE(k)
-#define INCREF_VALUE(k)
-#define COPY_VALUE(V, E) (V=(E))
-
-#define NORMALIZE_VALUE(V, MIN) ((MIN) > 0) ? ((V)/=(MIN)) : 0
-
-#define MERGE_DEFAULT 1
-#define MERGE(O1, w1, O2, w2) ((O1)*(w1)+(O2)*(w2))
-#define MERGE_WEIGHT(O, w) ((O)*(w))
-
-
-#include "BTrees/BTreeModuleTemplate.c"

Modified: zc.FileStorage/branches/jim-dev/src/zc/FileStorage/__init__.py
===================================================================
--- zc.FileStorage/branches/jim-dev/src/zc/FileStorage/__init__.py	2009-10-05 10:19:00 UTC (rev 104788)
+++ zc.FileStorage/branches/jim-dev/src/zc/FileStorage/__init__.py	2009-10-05 10:46:18 UTC (rev 104789)
@@ -14,36 +14,19 @@
 
 import cPickle
 import logging
-import marshal
 import os
-import shutil
 import subprocess
 import sys
 
-import zc.FileStorage.mru
-
 from ZODB.FileStorage.format import FileStorageFormatter, CorruptedDataError
-from ZODB.serialize import referencesf
 from ZODB.utils import p64, u64, z64
 from ZODB.FileStorage.format import TRANS_HDR_LEN
 
-import BTrees.IOBTree, BTrees.LOBTree, _ILBTree
 import ZODB.FileStorage
 import ZODB.FileStorage.fspack
 import ZODB.fsIndex
 import ZODB.TimeStamp
 
-class OptionalSeekFile(file):
-    """File that doesn't seek to current position.
-
-    This is to try to avoid gobs of system calls.
-    """
-
-    def seek(self, pos, whence=0):
-        if whence or (pos != self.tell()):
-            file.seek(self, pos, whence)
-    
-
 class FileStoragePacker(FileStorageFormatter):
 
     def __init__(self, path, stop, la, lr, cla, clr, current_size):
@@ -52,12 +35,8 @@
         # proceed in parallel.  It's important to close this file at every
         # return point, else on Windows the caller won't be able to rename
         # or remove the storage file.
+        self._file = open(path, "rb")
 
-        # We set the buffer quite high (32MB) to try to reduce seeks
-        # when the storage is disk is doing other io
-
-        self._file = OptionalSeekFile(path, "rb")
-
         self._stop = stop
         self.locked = 0
         self.file_end = current_size
@@ -73,15 +52,13 @@
         self.ltid = z64
 
     def pack(self):
-        
+
         script = self._name+'.packscript'
         open(script, 'w').write(pack_script_template % dict(
             path = self._name,
             stop = self._stop,
             size = self.file_end,
             syspath = sys.path,
-            fr_cache_size = FileReferences.cache_size,
-            fr_entry_size = FileReferences.entry_size,
             ))
         for name in 'error', 'log':
             name = self._name+'.pack'+name
@@ -93,8 +70,8 @@
             stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
             close_fds=True,
             )
-        
 
+
         proc.stdin.close()
         out = proc.stdout.read()
         if proc.wait():
@@ -178,7 +155,7 @@
         th = self._read_txn_header(input_pos)
         if release is not None:
             release()
-            
+
         output_tpos = output.tell()
         copier.setTxnPos(output_tpos)
         output.write(th.asString())
@@ -230,9 +207,9 @@
         data, tid = self._loadBackTxn(oid, back, 0)
         return data
 
-sys.modules['ZODB.FileStorage.FileStorage'
-            ].FileStoragePacker = FileStoragePacker
-ZODB.FileStorage.FileStorage.supportsVersions = lambda self: False
+# sys.modules['ZODB.FileStorage.FileStorage'
+#             ].FileStoragePacker = FileStoragePacker
+# ZODB.FileStorage.FileStorage.supportsVersions = lambda self: False
 
 class PackCopier(ZODB.FileStorage.fspack.PackCopier):
 
@@ -269,10 +246,6 @@
    '%%(asctime)s %%(name)s %%(levelname)s %%(message)s'))
 logging.getLogger().addHandler(handler)
 
-# The next 2 lines support testing:
-zc.FileStorage.FileReferences.cache_size = %(fr_cache_size)s
-zc.FileStorage.FileReferences.entry_size = %(fr_entry_size)s
-
 try:
     packer = zc.FileStorage.PackProcess(%(path)r, %(stop)r, %(size)r)
     packer.pack()
@@ -299,7 +272,7 @@
         # We set the buffer quite high (32MB) to try to reduce seeks
         # when the storage is disk is doing other io
 
-        
+
         self._file = OptionalSeekFile(path, "rb")
 
         self._name = path
@@ -330,12 +303,10 @@
             for name in ('Peak', 'Size', 'RSS'):
                 if line.startswith('Vm'+name):
                     logging.info(line.strip())
-                
 
+
     def pack(self):
-        do_gc = not os.path.exists(self._name+'.packnogc')
-        packed, index, references, packpos = self.buildPackIndex(
-            self._stop, self.file_end, do_gc)
+        packed, index, packpos = self.buildPackIndex(self._stop, self.file_end)
         logging.info('initial scan %s objects at %s', len(index), packpos)
         self._log_memory()
         if packed:
@@ -344,13 +315,6 @@
             self._file.close()
             return
 
-        if do_gc:
-            logging.info('read to end for gc')
-            self.updateReferences(references, packpos, self.file_end)
-            logging.info('gc')
-            index = self.gc(index, references)
-
-        
         self._log_memory()
         logging.info('copy to pack time')
         output = OptionalSeekFile(self._name + ".pack", "w+b")
@@ -379,16 +343,11 @@
         self._file.close()
 
 
-    def buildPackIndex(self, stop, file_end, do_gc):
+    def buildPackIndex(self, stop, file_end):
         index = ZODB.fsIndex.fsIndex()
-        references = self.ReferencesClass(self._name)
         pos = 4L
         packed = True
-        if do_gc:
-            update_refs = self._update_refs
-        else:
-            update_refs = lambda dh, references: None
-            
+
         while pos < file_end:
             th = self._read_txn_header(pos)
             if th.tid > stop:
@@ -407,7 +366,6 @@
                 if dh.version:
                     self.fail(pos, "Versions are not supported")
                 index[dh.oid] = pos
-                update_refs(dh, references)
                 pos += dh.recordlen()
 
             tlen = self._read_num(pos)
@@ -417,84 +375,8 @@
                           tlen, th.tlen)
             pos += 8
 
-        return packed, index, references, pos
+        return packed, index, pos
 
-    def updateReferences(self, references, pos, file_end):
-
-        # Note that we don't update an index in this step.  This is
-        # because we don't care about objects created after the pack
-        # time.  We'll add those in a later phase. We only care about
-        # references to existing objects.
-        
-        while pos < file_end:
-            th = self._read_txn_header(pos)
-            self.checkTxn(th, pos)
-
-            tpos = pos
-            end = pos + th.tlen
-            pos += th.headerlen()
-
-            while pos < end:
-                dh = self._read_data_header(pos)
-                self.checkData(th, tpos, dh, pos)
-                if dh.version:
-                    self.fail(pos, "Versions are not supported")
-                self._update_refs(dh, references, 1)
-                pos += dh.recordlen()
-
-            tlen = self._read_num(pos)
-            if tlen != th.tlen:
-                self.fail(pos, "redundant transaction length does not "
-                          "match initial transaction length: %d != %d",
-                          tlen, th.tlen)
-            pos += 8
-
-    def _update_refs(self, dh, references, merge=False):
-        oid = u64(dh.oid)
-
-        # Chase backpointers until we get to the record with the refs
-        while dh.back:
-            dh = self._read_data_header(dh.back)
-
-        if dh.plen:
-            refs = referencesf(self._file.read(dh.plen))
-            if refs:
-                if merge:
-                    initial = references.get(oid)
-                    if initial:
-                        refs = set(refs)
-                        refs.update(initial)
-                        refs = list(refs)
-                references[oid] = refs
-                return
-
-        if not merge:
-            references.rmf(oid)
-                
-    def gc(self, index, references):
-        to_do = BTrees.LOBTree.TreeSet([0])
-        reachable = ZODB.fsIndex.fsIndex()
-        while to_do:
-            ioid = to_do.maxKey()
-            to_do.remove(ioid)
-            oid = p64(ioid)
-            if oid in reachable:
-                continue
-
-            # Note that the references include references made
-            # after the pack time.  These include references to
-            # objects created after the pack time, which won't be
-            # in the index.
-            reachable[oid] = index.get(oid, 0)
-
-            for ref in references.get(ioid):
-                iref = u64(ref)
-                if (iref not in to_do) and (ref not in reachable):
-                    to_do.insert(iref)
-                
-        references.clear()
-        return reachable
-
     def copyToPacktime(self, packpos, index, output):
         pos = new_pos = self._metadata_size
         self._file.seek(0)
@@ -556,8 +438,8 @@
                     output.seek(new_pos)
 
                 output._freecache(new_pos)
-                
 
+
             pos += 8
 
         return new_index, new_pos
@@ -591,125 +473,3 @@
             _zc_FileStorage_posix_fadvise.POSIX_FADV_DONTNEED)
 
     return _free
-
-
-class MemoryReferences:
-
-    def __init__(self, path):
-        self.references = BTrees.LOBTree.LOBTree()
-        self.clear = self.references.clear
-
-    def get(self, oid):
-        references = self.references
-        ioid1, ioid2 = divmod(oid, 2147483648L)
-
-        references_ioid1 = references.get(ioid1)
-        if not references_ioid1:
-            return ()
-
-        ioid2 = int(ioid2)
-        result = references_ioid1[0].get(ioid2)
-        if result:
-            return [p64(result)]
-        return references_ioid1[1].get(ioid2, ())
-
-    def __setitem__(self, oid, refs):
-        references = self.references
-        ioid1, ioid2 = divmod(oid, 2147483648L)
-        ioid2 = int(ioid2)
-        references_ioid1 = references.get(ioid1)
-        if references_ioid1 is None:
-            references_ioid1 = references[ioid1] = (
-                _ILBTree.ILBTree(),      # {ioid2 -> single_referenced_oid}
-                BTrees.IOBTree.IOBTree() # {ioid2 -> referenced_oids}
-                )
-
-        if len(refs) == 1:
-            references_ioid1[0][ioid2] = u64(refs.pop())
-            references_ioid1[1].pop(ioid2, None)
-        else:
-            references_ioid1[1][ioid2] = refs
-            references_ioid1[0].pop(ioid2, None)
-            
-    def rmf(self, oid):
-        # Remove the oid, if present
-        ioid1, ioid2 = divmod(oid, 2147483648L)
-        references_ioid1 = self.references.get(ioid1)
-        if not references_ioid1:
-            return
-
-        ioid2 = int(ioid2)
-        if references_ioid1[0].pop(ioid2, None) is None:
-            references_ioid1[1].pop(ioid2, None)
-
-def _rmtree_onerror(func, path, exc_info):
-    if os.path.exists(path):
-        raise exc_info[0], exc_info[1], exc_info[2]
-    logging.info('burp removing %s', path)
-
-class FileReferences:
-
-    cache_size = 999
-    entry_size = 256
-
-    def __init__(self, path):
-        self._cache = zc.FileStorage.mru.MRU(self.cache_size,
-                                             lambda k, v: v.save())
-        path += '.refs'
-        if os.path.isdir(path):
-            shutil.rmtree(path, onerror=_rmtree_onerror)
-        os.mkdir(path)
-        self._tmp = path
-
-    def clear(self):
-        cache = self._cache
-        for k in cache:
-            cache[k].dirty = False
-        self._cache.clear()
-        shutil.rmtree(self._tmp, onerror=_rmtree_onerror)
-
-    def _load(self, oid):
-        base, index = divmod(long(oid), self.entry_size)
-        key = hex(base)[2:-1]
-        data = self._cache.get(key)
-        if data is None:
-            data = _refdata(os.path.join(self._tmp, key))
-            self._cache[key] = data
-        return data, index
-
-    def get(self, oid):
-        data, index = self._load(oid)
-        return data.get(index, ())
-
-    def __setitem__(self, oid, refs):
-        data, index = self._load(oid)
-        if set(refs) != set(data.get(index, ())):
-            data[index] = refs
-
-    def rmf(self, oid):
-        data, index = self._load(oid)
-        if index in data:
-            del data[index]
-
-class _refdata(dict):
-    
-    def __init__(self, path):
-        self.path = path
-        if os.path.exists(path):
-            self.update(marshal.load(open(path, 'rb')))
-        self.dirty = False
-
-    def save(self):
-        if self.dirty:
-            marshal.dump(dict(self), open(self.path, 'wb'))
-            self.dirty = False
-
-    def __setitem__(self, key, value):
-        self.dirty = True
-        dict.__setitem__(self, key, value)
-
-    def __delitem__(self, key):
-        self.dirty = True
-        dict.__delitem__(self, key)
-
-PackProcess.ReferencesClass = FileReferences

Deleted: zc.FileStorage/branches/jim-dev/src/zc/FileStorage/mru.py
===================================================================
--- zc.FileStorage/branches/jim-dev/src/zc/FileStorage/mru.py	2009-10-05 10:19:00 UTC (rev 104788)
+++ zc.FileStorage/branches/jim-dev/src/zc/FileStorage/mru.py	2009-10-05 10:46:18 UTC (rev 104789)
@@ -1,95 +0,0 @@
-##############################################################################
-#
-# Copyright (c) Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE.
-#
-##############################################################################
-
-class MRU:
-
-    def __init__(self, size, evicted=lambda k, v: None):
-        assert size > 0
-        self.size = size
-        self.evicted = evicted
-        self.data = {}
-        self.top = _node()
-
-    def clear(self):
-        while self.data:
-            self.pop()
-    
-    def __len__(self):
-        return len(self.data)
-
-    def __iter__(self):
-        # We van't do a generator. We have to take a snapshot, otherwise
-        # the client might do operations that would change the order!
-        result = []
-        top = node = self.top
-        while 1:
-            node = node.previous
-            if node is top:
-                break
-            result.append(node.key)
-        return iter(result)
-
-    def get(self, key, default=None):
-        node = self.data.get(key)
-        if node is None:
-            return default
-        if node.next != self.top:
-            node.unlink()
-            node.link(self.top)
-        return node.value
-
-    def __getitem__(self, key):
-        result = self.get(key, self)
-        if result is not self:
-            return result
-        raise KeyError(key)
-
-    def __setitem__(self, key, value):
-        assert value is not self
-        data = self.data
-        node = data.get(key)
-        if node is None:
-            node = _node(self.top)
-            data[key] = node
-            node.key = key
-            if len(data) > self.size:
-                self.pop()
-        node.value = value
-
-    def pop(self):
-        doomed = self.top.next
-        self.evicted(doomed.key, doomed.value)
-        del self.data[doomed.key]
-        doomed.unlink()
-        
-
-class _node:
-
-    next = previous = key = value = None
-
-    def __init__(self, next=None):
-        if next is None:
-            next = self
-        self.link(next)
-
-    def link(self, next):
-        self.next = next
-        self.previous = next.previous
-        next.previous = self
-        self.previous.next = self
-
-    def unlink(self):
-        self.next.previous = self.previous
-        self.previous.next = self.next
-        

Deleted: zc.FileStorage/branches/jim-dev/src/zc/FileStorage/mru.txt
===================================================================
--- zc.FileStorage/branches/jim-dev/src/zc/FileStorage/mru.txt	2009-10-05 10:19:00 UTC (rev 104788)
+++ zc.FileStorage/branches/jim-dev/src/zc/FileStorage/mru.txt	2009-10-05 10:46:18 UTC (rev 104789)
@@ -1,46 +0,0 @@
-Simple most-recently-used cache
-===============================
-
-An mru cache is a simple mapping object that has a limited size. To
-create an mru cache, we call the MRU constructor passing a size an an
-optional eviction callback.  The eviscion callback is called just
-before an item is evicted.
-
-    >>> def evicted(key, value):
-    ...     print 'evicted', key, value
-    >>> from zc.FileStorage.mru import MRU
-    >>> cache = MRU(5, evicted)
-    >>> len(cache), list(cache)
-    (0, [])
-
-We add items to the cache as we would any mapping object:
-
-    >>> cache[1] = 'one'
-    >>> cache[2] = 'two'
-    >>> cache[3] = 'three'
-    >>> cache[4] = 'four'
-    >>> cache[1]
-    'one'
-    >>> cache.get(3)
-    'three'
-
-    >>> len(cache), list(cache)
-    (4, [3, 1, 4, 2])
-
-Note the order of the keys. 3 and 1 are first because we accessed them most
-recently.  4 is next because it was added last and an add counts as an
-access.
-
-Let's add some more values:
-
-    >>> cache[5] = 'five'
-    >>> cache[6] = 'six'
-    evicted 2 two
-    >>> cache[7] = 'seven'
-    evicted 4 four
-
-    >>> len(cache), list(cache)
-    (5, [7, 6, 5, 3, 1])
-
-    >>> cache.get(4)
-

Modified: zc.FileStorage/branches/jim-dev/src/zc/FileStorage/tests.py
===================================================================
--- zc.FileStorage/branches/jim-dev/src/zc/FileStorage/tests.py	2009-10-05 10:19:00 UTC (rev 104788)
+++ zc.FileStorage/branches/jim-dev/src/zc/FileStorage/tests.py	2009-10-05 10:46:18 UTC (rev 104789)
@@ -144,5 +144,4 @@
 def test_suite():
     suite = unittest.TestSuite()
     suite.addTest(unittest.makeSuite(NoGCFileStorageTests, "check"))
-    suite.addTest(doctest.DocFileSuite('mru.txt'))
     return suite