[Checkins] SVN: relstorage/trunk/ Still working through an issue that seems to involve Oracle RAC.

Shane Hathaway shane at hathawaymix.org
Fri Sep 17 03:37:03 EDT 2010


Log message for revision 116479:
  Still working through an issue that seems to involve Oracle RAC.
  Latest improvements:
  
  - Use the client time instead of the database server time as a factor
    in the transaction ID.  RelStorage was using the database server time
    to reduce the need for synchronized clocks, but in practice, that
    policy broke tests and did not really avoid the need to synchronize
    clocks.  Also, the effect of unsynchronized clocks is predictable
    and manageable: you'll get bunches of transactions with very similar
    timestamps.
  
  - If the database returns an object from the future (which should never
    happen), generate a ReadConflictError, hopefully giving the application
    a chance to recover.  The most likely causes of this are a broken
    database and threading bugs.
  
  

Changed:
  U   relstorage/trunk/CHANGES.txt
  U   relstorage/trunk/relstorage/adapters/interfaces.py
  U   relstorage/trunk/relstorage/adapters/txncontrol.py
  U   relstorage/trunk/relstorage/cache.py
  U   relstorage/trunk/relstorage/storage.py
  U   relstorage/trunk/relstorage/tests/test_cache.py
  U   relstorage/trunk/setup.py

-=-
Modified: relstorage/trunk/CHANGES.txt
===================================================================
--- relstorage/trunk/CHANGES.txt	2010-09-17 01:53:41 UTC (rev 116478)
+++ relstorage/trunk/CHANGES.txt	2010-09-17 07:37:03 UTC (rev 116479)
@@ -1,4 +1,20 @@
 
+Next Release
+------------
+
+- Use the client time instead of the database server time as a factor
+  in the transaction ID.  RelStorage was using the database server time
+  to reduce the need for synchronized clocks, but in practice, that
+  policy broke tests and did not really avoid the need to synchronize
+  clocks.  Also, the effect of unsynchronized clocks is predictable
+  and manageable: you'll get bunches of transactions with very similar
+  timestamps.
+
+- If the database returns an object from the future (which should never
+  happen), generate a ReadConflictError, hopefully giving the application
+  a chance to recover.  The most likely causes of this are a broken
+  database and threading bugs.
+
 1.4.0c3 (2010-07-31)
 --------------------
 
@@ -12,7 +28,6 @@
   sync method.  This should help the poll-interval option do its job
   better.
 
-
 1.4.0c2 (2010-07-28)
 --------------------
 

Modified: relstorage/trunk/relstorage/adapters/interfaces.py
===================================================================
--- relstorage/trunk/relstorage/adapters/interfaces.py	2010-09-17 01:53:41 UTC (rev 116478)
+++ relstorage/trunk/relstorage/adapters/interfaces.py	2010-09-17 07:37:03 UTC (rev 116479)
@@ -422,12 +422,9 @@
 class ITransactionControl(Interface):
     """Begin, commit, and abort transactions."""
 
-    def get_tid_and_time(cursor):
-        """Returns the most recent tid and the current database time.
+    def get_tid(cursor):
+        """Returns the most recent tid."""
 
-        The database time is the number of seconds since the epoch.
-        """
-
     def add_transaction(cursor, tid, username, description, extension,
             packed=False):
         """Add a transaction."""

Modified: relstorage/trunk/relstorage/adapters/txncontrol.py
===================================================================
--- relstorage/trunk/relstorage/adapters/txncontrol.py	2010-09-17 01:53:41 UTC (rev 116478)
+++ relstorage/trunk/relstorage/adapters/txncontrol.py	2010-09-17 07:37:03 UTC (rev 116479)
@@ -55,14 +55,11 @@
     def __init__(self, keep_history):
         self.keep_history = keep_history
 
-    def get_tid_and_time(self, cursor):
-        """Returns the most recent tid and the current database time.
-
-        The database time is the number of seconds since the epoch.
-        """
+    def get_tid(self, cursor):
+        """Returns the most recent tid."""
         if self.keep_history:
             stmt = """
-            SELECT tid, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP)
+            SELECT tid
             FROM transaction
             ORDER BY tid DESC
             LIMIT 1
@@ -70,7 +67,7 @@
             cursor.execute(stmt)
         else:
             stmt = """
-            SELECT tid, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP)
+            SELECT tid
             FROM object_state
             ORDER BY tid DESC
             LIMIT 1
@@ -78,11 +75,10 @@
             cursor.execute(stmt)
             if not cursor.rowcount:
                 # nothing has been stored yet
-                stmt = "SELECT 0, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP)"
-                cursor.execute(stmt)
+                return 0
 
         assert cursor.rowcount == 1
-        return cursor.fetchone()
+        return cursor.fetchone()[0]
 
     def add_transaction(self, cursor, tid, username, description, extension,
             packed=False):
@@ -107,15 +103,12 @@
         self.keep_history = keep_history
         self.Binary = Binary
 
-    def get_tid_and_time(self, cursor):
-        """Returns the most recent tid and the current database time.
-
-        The database time is the number of seconds since the epoch.
-        """
+    def get_tid(self, cursor):
+        """Returns the most recent tid."""
         # Lock in share mode to ensure the data being read is up to date.
         if self.keep_history:
             stmt = """
-            SELECT tid, UNIX_TIMESTAMP()
+            SELECT tid
             FROM transaction
             ORDER BY tid DESC
             LIMIT 1
@@ -124,7 +117,7 @@
             cursor.execute(stmt)
         else:
             stmt = """
-            SELECT tid, UNIX_TIMESTAMP()
+            SELECT tid
             FROM object_state
             ORDER BY tid DESC
             LIMIT 1
@@ -133,18 +126,10 @@
             cursor.execute(stmt)
             if not cursor.rowcount:
                 # nothing has been stored yet
-                stmt = "SELECT 0, UNIX_TIMESTAMP()"
-                cursor.execute(stmt)
+                return 0
 
         assert cursor.rowcount == 1
-        tid, timestamp = cursor.fetchone()
-        # MySQL does not provide timestamps with more than one second
-        # precision.  To provide more precision, if the system time is
-        # within one minute of the MySQL time, use the system time instead.
-        now = time.time()
-        if abs(now - timestamp) <= 60.0:
-            timestamp = now
-        return tid, timestamp
+        return cursor.fetchone()[0]
 
     def add_transaction(self, cursor, tid, username, description, extension,
             packed=False):
@@ -189,45 +174,29 @@
         day, hour, min, sec = [float(v) for v in mo.groups()]
         return day * 86400 + hour * 3600 + min * 60 + sec
 
-    def get_tid_and_time(self, cursor):
-        """Returns the most recent tid and the current database time.
-
-        The database time is the number of seconds since the epoch.
+    def get_tid(self, cursor):
+        """Returns the most recent tid.
         """
         if self.keep_history:
             stmt = """
-            SELECT MAX(tid),
-                TO_CHAR(TO_DSINTERVAL(SYSTIMESTAMP - TO_TIMESTAMP_TZ(
-                '1970-01-01 00:00:00 +00:00','YYYY-MM-DD HH24:MI:SS TZH:TZM')))
+            SELECT MAX(tid)
             FROM transaction
             """
             cursor.execute(stmt)
             rows = list(cursor)
         else:
             stmt = """
-            SELECT MAX(tid),
-                TO_CHAR(TO_DSINTERVAL(SYSTIMESTAMP - TO_TIMESTAMP_TZ(
-                '1970-01-01 00:00:00 +00:00','YYYY-MM-DD HH24:MI:SS TZH:TZM')))
+            SELECT MAX(tid)
             FROM object_state
             """
             cursor.execute(stmt)
             rows = list(cursor)
             if not rows:
                 # nothing has been stored yet
-                stmt = """
-                SELECT 0,
-                TO_CHAR(TO_DSINTERVAL(SYSTIMESTAMP - TO_TIMESTAMP_TZ(
-                '1970-01-01 00:00:00 +00:00','YYYY-MM-DD HH24:MI:SS TZH:TZM')))
-                FROM DUAL
-                """
-                cursor.execute(stmt)
-                rows = list(cursor)
+                return 0
 
         assert len(rows) == 1
-        tid, now = rows[0]
-        if tid is None:
-            tid = 0
-        return tid, self._parse_dsinterval(now)
+        return rows[0][0]
 
     def add_transaction(self, cursor, tid, username, description, extension,
             packed=False):

Modified: relstorage/trunk/relstorage/cache.py
===================================================================
--- relstorage/trunk/relstorage/cache.py	2010-09-17 01:53:41 UTC (rev 116478)
+++ relstorage/trunk/relstorage/cache.py	2010-09-17 07:37:03 UTC (rev 116479)
@@ -15,6 +15,8 @@
 from relstorage.autotemp import AutoTemporaryFile
 from ZODB.utils import p64
 from ZODB.utils import u64
+from ZODB.POSException import ReadConflictError
+from ZODB.TimeStamp import TimeStamp
 import logging
 import random
 import threading
@@ -118,6 +120,73 @@
         self.current_tid = 0
         self.commit_count = object()
 
+    def _check_tid_after_load(self, oid_int, actual_tid_int,
+            expect_tid_int=None):
+        """Verify the tid of an object loaded from the database is sane."""
+
+        if actual_tid_int > self.current_tid:
+            # Strangely, the database just gave us data from a future
+            # transaction.  We can't give the data to ZODB because that
+            # would be a consistency violation.  However, the cause is hard
+            # to track down, so issue a ReadConflictError and hope that
+            # the application retries successfully.
+            raise ReadConflictError("Got data for OID 0x%(oid_int)x from "
+                "future transaction %(actual_tid_int)d (%(got_ts)s).  "
+                "Current transaction is %(current_tid)d (%(current_ts)s)."
+                % {
+                    'oid_int': oid_int,
+                    'actual_tid_int': actual_tid_int,
+                    'current_tid': self.current_tid,
+                    'got_ts': str(TimeStamp(p64(actual_tid_int))),
+                    'current_ts': str(TimeStamp(p64(self.current_tid))),
+                })
+
+        if expect_tid_int is not None and actual_tid_int != expect_tid_int:
+            # Uh-oh, the cache is inconsistent with the database.
+            # Possible causes:
+            #
+            # - The database MUST provide a snapshot view for each
+            #   session; this error can occur if that requirement is
+            #   violated. For example, MySQL's MyISAM engine is not
+            #   sufficient for the object_state table because MyISAM
+            #   can not provide a snapshot view. (InnoDB is
+            #   sufficient.)
+            #
+            # - Something could be writing to the database out
+            #   of order, such as a version of RelStorage that
+            #   acquires a different commit lock.
+            #
+            # - A software bug. In the past, there was a subtle bug
+            #   in after_poll() that caused it to ignore the
+            #   transaction order, leading it to sometimes put the
+            #   wrong tid in delta_after*.
+            cp0, cp1 = self.checkpoints
+            import os
+            import thread
+            raise AssertionError("Detected an inconsistency "
+                "between the RelStorage cache and the database "
+                "while loading an object using the delta_after0 dict.  "
+                "Please verify the database is configured for "
+                "ACID compliance and that all clients are using "
+                "the same commit lock.  "
+                "(oid_int=%(oid_int)r, expect_tid_int=%(expect_tid_int)r, "
+                "actual_tid_int=%(actual_tid_int)r, "
+                "current_tid=%(current_tid)r, cp0=%(cp0)r, cp1=%(cp1)r, "
+                "len(delta_after0)=%(lda0)r, len(delta_after1)=%(lda1)r, "
+                "pid=%(pid)r, thread_ident=%(thread_ident)r)"
+                % {
+                    'oid_int': oid_int,
+                    'expect_tid_int': expect_tid_int,
+                    'actual_tid_int': actual_tid_int,
+                    'current_tid': self.current_tid,
+                    'cp0': cp0,
+                    'cp1': cp1,
+                    'lda0': len(self.delta_after0),
+                    'lda1': len(self.delta_after1),
+                    'pid': os.getpid(),
+                    'thread_ident': thread.get_ident(),
+                })
+
     def load(self, cursor, oid_int):
         """Load the given object from cache if possible.
 
@@ -162,53 +231,8 @@
             # Cache miss.
             state, actual_tid_int = self.adapter.mover.load_current(
                 cursor, oid_int)
-            if actual_tid_int != tid_int:
-                # Uh-oh, the cache is inconsistent with the database.
-                # Possible causes:
-                #
-                # - The database MUST provide a snapshot view for each
-                #   session; this error can occur if that requirement is
-                #   violated. For example, MySQL's MyISAM engine is not
-                #   sufficient for the object_state table because MyISAM
-                #   can not provide a snapshot view. (InnoDB is
-                #   sufficient.)
-                #
-                # - Something could be writing to the database out
-                #   of order, such as a version of RelStorage that
-                #   acquires a different commit lock.
-                #
-                # - A software bug. In the past, there was a subtle bug
-                #   in after_poll() that caused it to ignore the
-                #   transaction order, leading it to sometimes put the
-                #   wrong tid in delta_after*.
+            self._check_tid_after_load(oid_int, actual_tid_int, tid_int)
 
-                cp0, cp1 = self.checkpoints
-                import os
-                import thread
-                raise AssertionError("Detected an inconsistency "
-                    "between the RelStorage cache and the database "
-                    "while loading an object using the delta_after0 dict.  "
-                    "Please verify the database is configured for "
-                    "ACID compliance and that all clients are using "
-                    "the same commit lock.  "
-                    "(oid_int=%(oid_int)r, tid_int=%(tid_int)r, "
-                    "actual_tid_int=%(actual_tid_int)r, "
-                    "current_tid=%(current_tid)r, cp0=%(cp0)r, cp1=%(cp1)r, "
-                    "len(delta_after0)=%(lda0)r, len(delta_after1)=%(lda1)r, "
-                    "pid=%(pid)r, thread_ident=%(thread_ident)r)"
-                    % {
-                        'oid_int': oid_int,
-                        'tid_int': tid_int,
-                        'actual_tid_int': actual_tid_int,
-                        'current_tid': self.current_tid,
-                        'cp0': cp0,
-                        'cp1': cp1,
-                        'lda0': len(self.delta_after0),
-                        'lda1': len(self.delta_after1),
-                        'pid': os.getpid(),
-                        'thread_ident': thread.get_ident(),
-                    })
-
             cache_data = '%s%s' % (p64(tid_int), state or '')
             for client in self.clients_local_first:
                 client.set(cachekey, cache_data)
@@ -254,6 +278,7 @@
         # Cache miss.
         state, tid_int = self.adapter.mover.load_current(cursor, oid_int)
         if tid_int:
+            self._check_tid_after_load(oid_int, tid_int)
             cache_data = '%s%s' % (p64(tid_int), state or '')
             for client in self.clients_local_first:
                 client.set(cp0_key, cache_data)

Modified: relstorage/trunk/relstorage/storage.py
===================================================================
--- relstorage/trunk/relstorage/storage.py	2010-09-17 01:53:41 UTC (rev 116478)
+++ relstorage/trunk/relstorage/storage.py	2010-09-17 07:37:03 UTC (rev 116479)
@@ -668,10 +668,11 @@
         user, desc, ext = self._ude
 
         # Choose a transaction ID.
-        # Base the transaction ID on the database time,
-        # while ensuring that the tid of this transaction
+        # Base the transaction ID on the current time,
+        # but ensure that the tid of this transaction
         # is greater than any existing tid.
-        last_tid, now = adapter.txncontrol.get_tid_and_time(cursor)
+        last_tid = adapter.txncontrol.get_tid(cursor)
+        now = time.time()
         stamp = TimeStamp(*(time.gmtime(now)[:5] + (now % 60,)))
         stamp = stamp.laterThan(TimeStamp(p64(last_tid)))
         tid = repr(stamp)

Modified: relstorage/trunk/relstorage/tests/test_cache.py
===================================================================
--- relstorage/trunk/relstorage/tests/test_cache.py	2010-09-17 01:53:41 UTC (rev 116478)
+++ relstorage/trunk/relstorage/tests/test_cache.py	2010-09-17 07:37:03 UTC (rev 116479)
@@ -59,6 +59,7 @@
         from ZODB.utils import p64
         adapter = MockAdapter()
         c = self.getClass()(adapter, MockOptionsWithFakeCache())
+        c.current_tid = 60
         c.checkpoints = (50, 40)
         c.delta_after0[2] = 55
         data['myprefix:state:55:2'] = p64(55) + 'abc'
@@ -70,6 +71,7 @@
         from ZODB.utils import p64
         adapter = MockAdapter()
         c = self.getClass()(adapter, MockOptionsWithFakeCache())
+        c.current_tid = 60
         c.checkpoints = (50, 40)
         c.delta_after0[2] = 55
         adapter.mover.data[2] = ('abc', 55)
@@ -81,6 +83,7 @@
         from ZODB.utils import p64
         adapter = MockAdapter()
         c = self.getClass()(adapter, MockOptionsWithFakeCache())
+        c.current_tid = 60
         c.checkpoints = (50, 40)
         c.delta_after0[2] = 55
         adapter.mover.data[2] = ('abc', 56)
@@ -91,11 +94,29 @@
         else:
             self.fail("Failed to report cache inconsistency")
 
+    def test_load_using_delta_after0_future_error(self):
+        from relstorage.tests.fakecache import data
+        from ZODB.utils import p64
+        adapter = MockAdapter()
+        c = self.getClass()(adapter, MockOptionsWithFakeCache())
+        c.current_tid = 55
+        c.checkpoints = (50, 40)
+        c.delta_after0[2] = 55
+        adapter.mover.data[2] = ('abc', 56)
+        from ZODB.POSException import ReadConflictError
+        try:
+            c.load(None, 2)
+        except ReadConflictError, e:
+            self.assertTrue('future' in e.message)
+        else:
+            self.fail("Failed to generate a conflict error")
+
     def test_load_using_checkpoint0_hit(self):
         from relstorage.tests.fakecache import data
         from ZODB.utils import p64
         adapter = MockAdapter()
         c = self.getClass()(adapter, MockOptionsWithFakeCache())
+        c.current_tid = 60
         c.checkpoints = (50, 40)
         data['myprefix:state:50:2'] = p64(45) + 'xyz'
         res = c.load(None, 2)
@@ -106,6 +127,7 @@
         from ZODB.utils import p64
         adapter = MockAdapter()
         c = self.getClass()(adapter, MockOptionsWithFakeCache())
+        c.current_tid = 60
         c.checkpoints = (50, 40)
         adapter.mover.data[2] = ('xyz', 45)
         res = c.load(None, 2)
@@ -117,6 +139,7 @@
         from ZODB.utils import p64
         adapter = MockAdapter()
         c = self.getClass()(adapter, MockOptionsWithFakeCache())
+        c.current_tid = 60
         c.checkpoints = (50, 40)
         c.delta_after1[2] = 45
         data['myprefix:state:45:2'] = p64(45) + 'abc'
@@ -129,6 +152,7 @@
         from ZODB.utils import p64
         adapter = MockAdapter()
         c = self.getClass()(adapter, MockOptionsWithFakeCache())
+        c.current_tid = 60
         c.checkpoints = (50, 40)
         c.delta_after1[2] = 45
         adapter.mover.data[2] = ('abc', 45)
@@ -141,6 +165,7 @@
         from ZODB.utils import p64
         adapter = MockAdapter()
         c = self.getClass()(adapter, MockOptionsWithFakeCache())
+        c.current_tid = 60
         c.checkpoints = (50, 40)
         data['myprefix:state:40:2'] = p64(35) + '123'
         res = c.load(None, 2)
@@ -152,6 +177,7 @@
         from ZODB.utils import p64
         adapter = MockAdapter()
         c = self.getClass()(adapter, MockOptionsWithFakeCache())
+        c.current_tid = 60
         c.checkpoints = (50, 40)
         adapter.mover.data[2] = ('123', 35)
         res = c.load(None, 2)

Modified: relstorage/trunk/setup.py
===================================================================
--- relstorage/trunk/setup.py	2010-09-17 01:53:41 UTC (rev 116478)
+++ relstorage/trunk/setup.py	2010-09-17 07:37:03 UTC (rev 116479)
@@ -13,7 +13,7 @@
 ##############################################################################
 """A backend for ZODB that stores pickles in a relational database."""
 
-VERSION = "1.4.0c3"
+VERSION = "1.4.0c4"
 
 # The choices for the Trove Development Status line:
 # Development Status :: 5 - Production/Stable



More information about the checkins mailing list