[Checkins] SVN: relstorage/trunk/ Detect and handle backward time travel, which can happen after
Shane Hathaway
shane at hathawaymix.org
Wed Sep 30 04:23:21 EDT 2009
Log message for revision 104642:
Detect and handle backward time travel, which can happen after
failover to an out-of-date asynchronous slave database. For
simplicity, invalidate the whole ZODB cache when this happens.
Changed:
U relstorage/trunk/CHANGES.txt
U relstorage/trunk/relstorage/adapters/poller.py
U relstorage/trunk/relstorage/tests/reltestbase.py
-=-
Modified: relstorage/trunk/CHANGES.txt
===================================================================
--- relstorage/trunk/CHANGES.txt 2009-09-30 07:55:35 UTC (rev 104641)
+++ relstorage/trunk/CHANGES.txt 2009-09-30 08:23:21 UTC (rev 104642)
@@ -34,6 +34,10 @@
- Use the store connection rather than the load connection for OID
allocation.
+- Detect and handle backward time travel, which can happen after
+ failover to an out-of-date asynchronous slave database. For
+ simplicity, invalidate the whole ZODB cache when this happens.
+
1.3.0b1 (2009-09-04)
--------------------
Modified: relstorage/trunk/relstorage/adapters/poller.py
===================================================================
--- relstorage/trunk/relstorage/adapters/poller.py 2009-09-30 07:55:35 UTC (rev 104641)
+++ relstorage/trunk/relstorage/adapters/poller.py 2009-09-30 08:23:21 UTC (rev 104642)
@@ -14,6 +14,8 @@
from relstorage.adapters.interfaces import IPoller
from zope.interface import implements
+import logging
+log = logging.getLogger(__name__)
class Poller:
"""Database change notification poller"""
@@ -51,8 +53,12 @@
# If the previously polled transaction no longer exists,
# the cache is too old and needs to be cleared.
# XXX Do we actually need to detect this condition? I think
- # if we delete this block of code, all the reachable objects
- # will be invalidated anyway.
+ # if we delete this block of code, all the unreachable
+ # objects will be invalidated anyway. So, as a test, I have
+ # not written the equivalent of this block of code for
+ # history-free storage. If something goes wrong, then we'll
+ # know there's some other edge condition we have to account
+ # for.
stmt = "SELECT 1 FROM transaction WHERE tid = %(tid)s"
cursor.execute(intern(stmt % self.runner.script_vars),
{'tid': prev_polled_tid})
@@ -63,23 +69,42 @@
return None, new_polled_tid
# Get the list of changed OIDs and return it.
- if self.keep_history:
- stmt = """
- SELECT zoid
- FROM current_object
- WHERE tid > %(tid)s
- """
+ if new_polled_tid > prev_polled_tid:
+ if self.keep_history:
+ stmt = """
+ SELECT zoid
+ FROM current_object
+ WHERE tid > %(tid)s
+ """
+ else:
+ stmt = """
+ SELECT zoid
+ FROM object_state
+ WHERE tid > %(tid)s
+ """
+ params = {'tid': prev_polled_tid}
+ if ignore_tid is not None:
+ stmt += " AND tid != %(self_tid)s"
+ params['self_tid'] = ignore_tid
+ stmt = intern(stmt % self.runner.script_vars)
+
else:
- stmt = """
- SELECT zoid
- FROM object_state
- WHERE tid > %(tid)s
- """
- params = {'tid': prev_polled_tid}
- if ignore_tid is not None:
- stmt += " AND tid != %(self_tid)s"
- params['self_tid'] = ignore_tid
- stmt = intern(stmt % self.runner.script_vars)
+ # We moved backward in time. This can happen after failover
+ # to an asynchronous slave that is not fully up to date. If
+ # this was not caused by failover, it suggests that
+ # transaction IDs are not being created in order, which can
+ # lead to consistency violations.
+ log.warning(
+ "Detected backward time travel (old tid %d, new tid %d). "
+ "This is acceptable if it was caused by failover to a "
+ "read-only asynchronous slave, but otherwise it may "
+ "indicate a problem.",
+ prev_polled_tid, new_polled_tid)
+ # Although we could handle this situation by looking at the
+ # whole cache and invalidating only certain objects,
+ # invalidating the whole cache is simpler.
+ return None, new_polled_tid
+
cursor.execute(stmt, params)
oids = [oid for (oid,) in cursor]
Modified: relstorage/trunk/relstorage/tests/reltestbase.py
===================================================================
--- relstorage/trunk/relstorage/tests/reltestbase.py 2009-09-30 07:55:35 UTC (rev 104641)
+++ relstorage/trunk/relstorage/tests/reltestbase.py 2009-09-30 08:23:21 UTC (rev 104642)
@@ -421,7 +421,60 @@
self.assertRaises(UnpicklingError, self._storage.pack,
time.time() + 10000, referencesf)
+ def checkBackwardTimeTravel(self):
+ # When a failover event causes the storage to switch to an
+ # asynchronous slave that is not fully up to date, the poller
+ # should notice that backward time travel has occurred and
+ # handle the situation by invalidating all objects that have
+ # changed in the interval. (Currently, we simply invalidate all
+ # objects when backward time travel occurs.)
+ import os
+ import shutil
+ import tempfile
+ from ZODB.FileStorage import FileStorage
+ db = DB(self._storage)
+ try:
+ c = db.open()
+ r = c.root()
+ r['alpha'] = PersistentMapping()
+ transaction.commit()
+ # To simulate failover to an out of date async slave, take
+ # a snapshot of the database at this point, change some
+ # object, then restore the database to its earlier state.
+
+ d = tempfile.mkdtemp()
+ try:
+ fs = FileStorage(os.path.join(d, 'Data.fs'))
+ fs.copyTransactionsFrom(c._storage)
+
+ r['beta'] = PersistentMapping()
+ transaction.commit()
+ self.assertTrue('beta' in r)
+
+ c._storage.zap_all()
+ c._storage.copyTransactionsFrom(fs)
+
+ fs.close()
+ finally:
+ shutil.rmtree(d)
+
+ # r should still be in the cache.
+ self.assertTrue('beta' in r)
+
+ # Now sync, which will call poll_invalidations().
+ c.sync()
+
+ # r should have been invalidated
+ self.assertEqual(r._p_changed, None)
+
+ # r should be reverted to its earlier state.
+ self.assertFalse('beta' in r)
+
+ finally:
+ db.close()
+
+
class DoubleCommitter(Persistent):
"""A crazy persistent class that changes self in __getstate__"""
def __getstate__(self):
More information about the checkins
mailing list