[Checkins] SVN: gocept.zeoraid/trunk/ tests for packing and explanation why packing works correctly this way; registerdb cleanup

Wed Jan 16 10:03:44 EST 2008

Log message for revision 82921:
  tests for packing and explanation why packing works correctly this way; registerdb cleanup

Changed:
  U   gocept.zeoraid/trunk/ROADMAP.txt
  U   gocept.zeoraid/trunk/src/gocept/zeoraid/storage.py
  U   gocept.zeoraid/trunk/src/gocept/zeoraid/tests/test_basics.py

-=-
Modified: gocept.zeoraid/trunk/ROADMAP.txt
===================================================================

--- gocept.zeoraid/trunk/ROADMAP.txt	2008-01-16 10:55:36 UTC (rev 82920)
+++ gocept.zeoraid/trunk/ROADMAP.txt	2008-01-16 15:03:44 UTC (rev 82921)
@@ -49,7 +49,7 @@
 
 
 Future
-===
+======
 
 - Support packing?
 
@@ -60,6 +60,8 @@
 - Make the read requests come from different backends to optimize caching and
   distribute IO load.
 
+  beware of hard-coded priority queue during packing
+
 - Allow adding and removing new backend servers while running.
 
 - Incremental backup.
@@ -68,4 +70,5 @@
 
 - Better performance for reading (distribute read load)
 
-- Cleaner compatibility setup
+- Verify parallel/backend invalidations + optimize invalidations
+  that they get passed on only once.

Modified: gocept.zeoraid/trunk/src/gocept/zeoraid/storage.py
===================================================================
--- gocept.zeoraid/trunk/src/gocept/zeoraid/storage.py	2008-01-16 10:55:36 UTC (rev 82920)
+++ gocept.zeoraid/trunk/src/gocept/zeoraid/storage.py	2008-01-16 15:03:44 UTC (rev 82921)
@@ -93,6 +93,10 @@
     closed = False
     _transaction = None
 
+    # We store the registered database to be able to re-register storages when
+    # we bring them back into the pool of optimal storages.
+    _db = None
+
     # This flag signals whether any `store` operation should be logged. This
     # is necessary to support the two-phase recovery process. It is set to
     # `true` when a recovery starts and set back to `false` when it is
@@ -219,15 +223,30 @@
         finally:
             self._lock_release()
 
-    # XXX
     @ensure_writable
     def pack(self, t, referencesf):
+        """Pack the storage."""
+        # Packing is an interesting problem when talking to multiple storages,
+        # especially when doing it in parallel:
+        # As packing might take a long time, you can end up with a couple of
+        # storages that are packed and others that are still packing.
+        # As soon as one storage is packed, you have to prefer reading from
+        # this storage.
+        #
+        # Here, we rely on the following behaviour:
+        # a) always read from the first optimal storage
+        # b) pack beginning with the first optimal storage, working our way
+        #    through the list.
+        # This is a simplified implementation of a way to prioritize the list
+        # of optimal storages.
         self._apply_all_storages('pack', (t, referencesf))
 
-    # XXX
     def registerDB(self, db, limit=None):
-        # XXX Is it safe to register a DB with multiple storages or do we need some kind
-        # of wrapper here?
+        # We can safely register all storages here as it will only cause
+        # invalidations to be sent out multiple times. Transaction
+        # coordination by the StorageServer and set semantics in ZODB's
+        # Connection class make this correct and cheap.
+        self._db = db
         self._apply_all_storages('registerDB', (db,))
 
     # XXX
@@ -510,6 +529,7 @@
 
     @ensure_open_storage
     def _apply_single_storage(self, method_name, args=(), kw={}):
+        """Calls the given method on the first optimal storage."""
         # Try to find a storage that we can talk to. Stop after we found a
         # reliable result.
         for name in self.storages_optimal[:]:
@@ -524,6 +544,7 @@
     @ensure_open_storage
     def _apply_all_storages(self, method_name, args=(), kw={},
                             expect_connected=True):
+        """Calls the given method on all optimal backend storages in order."""
         results = []
         exceptions = []
         for name in self.storages_optimal[:]:
@@ -649,6 +670,11 @@
                 # has caught up by now and we can put it into optimal state
                 # again.
                 self.storages_recovering.remove(name)
+                if self._db:
+                    # We are registered with a database already. We need to
+                    # re-register the recovered storage to make invalidations
+                    # pass through.
+                    self.storages[name].registerDB(self._db)
                 self.storages_optimal.append(name)
                 # We can also stop logging stores now.
                 self._log_stores = False

Modified: gocept.zeoraid/trunk/src/gocept/zeoraid/tests/test_basics.py
===================================================================
--- gocept.zeoraid/trunk/src/gocept/zeoraid/tests/test_basics.py	2008-01-16 10:55:36 UTC (rev 82920)
+++ gocept.zeoraid/trunk/src/gocept/zeoraid/tests/test_basics.py	2008-01-16 15:03:44 UTC (rev 82921)
@@ -1,6 +1,13 @@
+# vim:fileencoding=utf-8
+# Copyright (c) 2007 gocept gmbh & co. kg
+# See also LICENSE.txt
+# $Id$
+"""Test harness for gocept.zeoraid."""
+
 import unittest
 import tempfile
 import os
+import time
 
 import zope.interface.verify
 
@@ -486,7 +493,68 @@
                           self._storage.new_oid)
         self.assertEquals('failed', self._storage.raid_status())
 
+    def test_pack_degrading1(self):
+        # We store differently sized data for each revision so that packing
+        # definitely yields different file sizes.
+        # We work on the root object to avoid garbage collection
+        # kicking in.
+        oid = ZODB.utils.z64
+        revid = self._dostore(oid=oid, revid=None, data=1)
+        revid2 = self._dostore(oid=oid, revid=revid, data=2)
 
+        self.assertEquals(256, self._backend(0).getSize())
+        self.assertEquals(256, self._backend(1).getSize())
+        self.assertEquals(256, self._storage.getSize())
+
+        self._storage.pack(time.time(), ZODB.serialize.referencesf)
+        self.assertEquals(130, self._backend(0).getSize())
+        self.assertEquals(130, self._backend(1).getSize())
+        self.assertEquals(130, self._storage.getSize())
+
+        revid3 = self._dostore(oid=oid, revid=revid2, data=3)
+        self.assertEquals(256, self._backend(0).getSize())
+        self.assertEquals(256, self._backend(1).getSize())
+        self.assertEquals(256, self._storage.getSize())
+
+        self._disable_storage(0)
+        self._storage.pack(time.time(), ZODB.serialize.referencesf)
+        self.assertEquals(130, self._backend(0).getSize())
+        self.assertEquals(130, self._storage.getSize())
+
+        self._dostore(oid=oid, revid=revid3, data=4)
+        self.assertEquals(256, self._storage.getSize())
+        self._disable_storage(0)
+        self.assertRaises(gocept.zeoraid.interfaces.RAIDError,
+                          self._storage.pack,
+                          time.time(), ZODB.serialize.referencesf)
+
+    def test_pack_degrading2(self):
+        # We store differently sized data for each revision so that packing
+        # definitely yields different file sizes.
+        # We work on the root object to avoid garbage collection
+        # kicking in.
+        oid = ZODB.utils.z64
+        revid = self._dostore(oid=oid, revid=None, data=1)
+        revid2 = self._dostore(oid=oid, revid=revid, data=2)
+        self.assertEquals(256, self._storage.getSize())
+
+        self._backend(0).fail('pack')
+        self._storage.pack(time.time(), ZODB.serialize.referencesf)
+        self.assertEquals(130, self._backend(0).getSize())
+        self.assertEquals(130, self._storage.getSize())
+        self.assertEquals('degraded', self._storage.raid_status())
+
+        revid3 = self._dostore(oid=oid, revid=revid2, data=3)
+        self.assertEquals(256, self._backend(0).getSize())
+        self.assertEquals(256, self._storage.getSize())
+
+        self._backend(0).fail('pack')
+        self.assertRaises(gocept.zeoraid.interfaces.RAIDError,
+                          self._storage.pack,
+                          time.time(), ZODB.serialize.referencesf)
+        self.assertEquals('failed', self._storage.raid_status())
+
+
 class ZEOReplicationStorageTests(ZEOStorageBackendTests,
                                  ReplicationStorageTests,
                                  ThreadTests.ThreadTests):