[ZODB-Dev] BTree fsIndex patch for StandaloneZODB

Neil Schemenauer nas@mems-exchange.org
Fri, 8 Feb 2002 17:13:52 -0500


--ikeVEW9yuYc//A+q
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

I've extracted this patch from BTreeFSIndex-branch in the Zope CVS tree.
I think it should apply cleanly to current copy of StandaloneZODB.  We
are now using this on our development machines.  For our DB, it
dramatically reduces the amount of memory used by the ZEO server (RSS
down from 49MB to 10MB).  Hopefully this change can get integrated into
the main ZODB branch.

  Neil

--ikeVEW9yuYc//A+q
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="fsIndex.diff"

--- /home/akuchlin/src/StandaloneZODB/BTrees/Setup	Tue Mar 27 11:37:42 2001
+++ StandaloneZODB/BTrees/Setup	Mon Feb  4 17:41:00 2002
@@ -3,3 +3,4 @@
 _OIBTree _OIBTree.c  -I../../Components/ExtensionClass/src -I../ZODB
 _IIBTree _IIBTree.c  -I../../Components/ExtensionClass/src -I../ZODB -I../../Components
 _IOBTree _IOBTree.c  -I../../Components/ExtensionClass/src -I../ZODB -I../../Components
+_fsBTree _fsBTree.c  -I../../Components/ExtensionClass/src -I../ZODB -I../../Components
--- /home/akuchlin/src/StandaloneZODB/BTrees/_fsBTree.c	Wed Dec 31 19:00:00 1969
+++ StandaloneZODB/BTrees/_fsBTree.c	Mon Feb  4 17:41:25 2002
@@ -0,0 +1,58 @@
+/* fsBTree - FileStorage index BTree
+
+   This BTree implments a mapping from 2-character strings
+   to six-character strings. This allows us to effieciently store
+   a FileStorage index as a nested mapping of 6-character oid prefix
+   to mapping of 2-character oid suffix to 6-character (byte) file
+   positions.
+*/
+
+#include <string.h>
+
+typedef unsigned char char2[2];
+typedef unsigned char char6[6];
+
+
+/* Setup template macros */
+
+#define MASTER_ID "$Id: _fsBTree.c,v 1.1.2.1 2001/11/21 23:04:54 jim Exp $\n"
+
+#define PERSISTENT
+
+#define MOD_NAME_PREFIX "fs"
+#define INITMODULE init_fsBTree
+#define DEFAULT_MAX_BUCKET_SIZE 500
+#define DEFAULT_MAX_BTREE_SIZE 500
+                
+/*#include "intkeymacros.h"*/
+
+#define KEYMACROS_H "$Id: _fsBTree.c,v 1.1.2.1 2001/11/21 23:04:54 jim Exp $\n"
+#define KEY_TYPE char2
+#define KEY_CHECK(K) (PyString_Check(K) && PyString_GET_SIZE(K)==2)
+#define TEST_KEY(K, T) ((*(K) < *(T) || (*(K) == *(T) && (K)[1] < (T)[1])) ? -1 : ((*(K) == *(T) && (K)[1] == (T)[1]) ? 0 : 1))
+#define DECREF_KEY(KEY)
+#define INCREF_KEY(k)
+#define COPY_KEY(KEY, E) (*(KEY)=*(E), (KEY)[1]=(E)[1])
+#define COPY_KEY_TO_OBJECT(O, K) O=PyString_FromStringAndSize(K,2)
+#define COPY_KEY_FROM_ARG(TARGET, ARG, STATUS) \
+  if (KEY_CHECK(ARG)) memcpy(TARGET, PyString_AS_STRING(ARG), 2); else { \
+      PyErr_SetString(PyExc_TypeError, "expected two-character string key"); \
+      (STATUS)=0; } 
+
+/*#include "intvaluemacros.h"*/
+#define VALUEMACROS_H "$Id: _fsBTree.c,v 1.1.2.1 2001/11/21 23:04:54 jim Exp $\n"
+#define VALUE_TYPE char6
+#define TEST_VALUE(K, T) strncmp(K,T,6)
+#define DECLARE_VALUE(NAME) VALUE_TYPE NAME
+#define DECREF_VALUE(k)
+#define INCREF_VALUE(k)
+#define COPY_VALUE(V, E) (memcpy(V, E, 6))
+#define COPY_VALUE_TO_OBJECT(O, K) O=PyString_FromStringAndSize(K,6)
+#define COPY_VALUE_FROM_ARG(TARGET, ARG, STATUS) \
+  if ((PyString_Check(ARG) && PyString_GET_SIZE(ARG)==6)) \
+      memcpy(TARGET, PyString_AS_STRING(ARG), 6); else { \
+      PyErr_SetString(PyExc_TypeError, "expected six-character string key"); \
+      (STATUS)=0; } 
+  
+#define NORMALIZE_VALUE(V, MIN)
+#include "BTreeModuleTemplate.c"
--- /home/akuchlin/src/StandaloneZODB/ZODB/FileStorage.py	Fri Jan 25 11:52:06 2002
+++ StandaloneZODB/ZODB/FileStorage.py	Mon Feb  4 17:46:46 2002
@@ -129,6 +129,12 @@
 from cPickle import Pickler, Unpickler, loads
 import ConflictResolution
 
+try:
+    from fsIndex import fsIndex
+except ImportError:
+    def fsIndex():
+        return {}
+
 try: from posix import fsync
 except: fsync=None
 
@@ -272,7 +278,7 @@
 
     def _newIndexes(self):
         # hook to use something other than builtin dict
-        return {}, {}, {}, {}
+        return fsIndex(), {}, {}, {}
         
     def abortVersion(self, src, transaction):
         return self.commitVersion(src, '', transaction, abort=1)
@@ -1326,7 +1332,7 @@
     
             rootl=[z64]
             pop=rootl.pop
-            pindex={}
+            pindex=fsIndex()
             referenced=pindex.has_key
             _load=self._load
             _loada=self._loada
@@ -1356,7 +1362,7 @@
     
             # Index for non-version data.  This is a temporary structure
             # to reduce I/O during packing
-            nvindex={}
+            nvindex=fsIndex()
     
             # Cache a bunch of methods
             seek=file.seek
--- /home/akuchlin/src/StandaloneZODB/ZODB/fsIndex.py	Wed Dec 31 19:00:00 1969
+++ StandaloneZODB/ZODB/fsIndex.py	Mon Feb  4 17:42:06 2002
@@ -0,0 +1,117 @@
+##############################################################################
+#
+# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
+# 
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+# 
+##############################################################################
+"""Implement an OID to File-position (long integer) mapping
+"""
+# 
+# To save space, we do two things:
+# 
+#     1. We split the keys (OIDS) into 6-byte prefixes and 2-byte suffixes.
+#        We use the prefixes as keys in a mapping from prefix to mappings
+#        of suffix to data:
+# 
+#           data is  {prefix -> {suffix -> data}}
+# 
+#     2. We limit the data size to 48 bits. This should allow databases
+#        as large as 256 terabytes.
+# 
+# Mostof the space is consumed by items in the mappings from 2-byte
+# suffix to 6-byte data. This should reduce the overall memory usage to
+# 8-16 bytes per OID.
+# 
+# We use p64 to convert integers to 8-byte strings and lop off the two
+# high-order bytes when saving. On loading data, we add the leading
+# bytes back before using U64 to convert the data back to (long)
+# integers.
+
+from BTrees._fsBTree import fsBTree as _fsBTree
+
+import struct
+
+# convert between numbers and six-byte strings
+
+_t32 = 1L<< 32
+
+def num2str(n):
+    h, l = divmod(long(n), _t32)
+    return struct.pack(">HI", h, l)
+
+def str2num(s):
+    h, l = struct.unpack(">HI", s)
+    if h:
+        return (long(h) << 32) + l
+    else:
+       return l
+
+class fsIndex:
+
+    def __init__(self):
+        self._data = {}
+
+    def __getitem__(self, key):
+        return str2num(self._data[key[:6]][key[6:]])
+
+    def get(self, key, default=None):
+        tree = self._data.get(key[:6], default)
+        if tree is default:
+            return default
+        v = tree.get(key[6:], default)
+        if v is default:
+            return default
+        return str2num(v)
+
+    def __setitem__(self, key, value):
+        value = num2str(value)
+        treekey = key[:6]
+        tree = self._data.get(treekey)
+        if tree is None:
+            tree = _fsBTree()
+            self._data[treekey] = tree
+        tree[key[6:]] = value
+
+    def __len__(self):
+        r = 0
+        for tree in self._data.values():
+            r += len(tree)
+        return r
+
+    def update(self, mapping):
+        for k, v in mapping.items():
+            self[k] = v
+
+    def has_key(self, key):
+        v=self.get(key, self)
+        return v is not self
+
+    def clear(self):
+        self._data.clear()
+
+    def keys(self):
+        r = []
+        for prefix, tree in self._data.items():
+            for suffix in tree.keys():
+                r.append(prefix + suffix)
+        return r
+
+    def items(self):
+        r = []
+        for prefix, tree in self._data.items():
+            for suffix, v in tree.items():
+                r.append(((prefix + suffix), str2num(v)))
+        return r
+
+    def values(self):
+        r = []
+        for prefix, tree in self._data.items():
+            for v in tree.values():
+                r.append(str2num(v))
+        return r
--- /home/akuchlin/src/StandaloneZODB/setup.py	Mon Jan 21 11:47:01 2002
+++ StandaloneZODB/setup.py	Mon Feb  4 17:54:42 2002
@@ -114,10 +114,16 @@
                 define_macros = [('EXCLUDE_INTSET_SUPPORT', None)],
                 )
 
+fsb = Extension(name = "BTrees._fsBTree",
+                include_dirs = btree_include,
+                sources = ['BTrees/_fsBTree.c'],
+                define_macros = [('EXCLUDE_INTSET_SUPPORT', None)],
+                )
+
 setup(name="BTrees",
       version="?",
       packages=["BTrees", "BTrees.tests"],
-      ext_modules = [oob, oib, iib, iob],
+      ext_modules = [oob, oib, iib, iob, fsb],
       author = zope_corp,
       )
 

--ikeVEW9yuYc//A+q--