[Checkins] SVN: relstorage/trunk/ more pack optimizations, primarily for MySQL

Shane Hathaway shane at hathawaymix.org
Mon Dec 22 16:16:54 EST 2008


Log message for revision 94251:
  more pack optimizations, primarily for MySQL
  

Changed:
  U   relstorage/trunk/CHANGES.txt
  U   relstorage/trunk/relstorage/adapters/common.py
  U   relstorage/trunk/relstorage/adapters/mysql.py
  U   relstorage/trunk/relstorage/adapters/oracle.py
  U   relstorage/trunk/relstorage/relstorage.py

-=-
Modified: relstorage/trunk/CHANGES.txt
===================================================================
--- relstorage/trunk/CHANGES.txt	2008-12-22 17:59:40 UTC (rev 94250)
+++ relstorage/trunk/CHANGES.txt	2008-12-22 21:16:54 UTC (rev 94251)
@@ -1,4 +1,4 @@
-Version 1.1.1
+Version 1.1.1 (unreleased)
 
 - Worked around MySQL performance bugs in packing.  Used temporary
   tables and another column in the pack_object table.  The
@@ -8,7 +8,7 @@
   bringing write speed back up to where it was in version 1.0.
 
 
-Version 1.1
+Version 1.1 (2008-12-19)
 
 - Normalized poll-invalidation patches as Solaris' patch command would not
   accept the current format. The patches now apply with:

Modified: relstorage/trunk/relstorage/adapters/common.py
===================================================================
--- relstorage/trunk/relstorage/adapters/common.py	2008-12-22 17:59:40 UTC (rev 94250)
+++ relstorage/trunk/relstorage/adapters/common.py	2008-12-22 21:16:54 UTC (rev 94251)
@@ -98,9 +98,43 @@
                         JOIN temp_pack_visit USING (zoid)
                 )
             """,
+
+        'pack_current_object': """
+            DELETE FROM current_object
+            WHERE tid = %(tid)s
+                AND zoid in (
+                    SELECT pack_state.zoid
+                    FROM pack_state
+                    WHERE pack_state.tid = %(tid)s
+                )
+            """,
+
+        'pack_object_state': """
+            DELETE FROM object_state
+            WHERE tid = %(tid)s
+                AND zoid in (
+                    SELECT pack_state.zoid
+                    FROM pack_state
+                    WHERE pack_state.tid = %(tid)s
+                )
+            """,
+
+        'pack_object_ref': """
+            DELETE FROM object_refs_added
+            WHERE tid IN (
+                SELECT tid
+                FROM transaction
+                WHERE empty = %(TRUE)s
+                );
+            DELETE FROM object_ref
+            WHERE tid IN (
+                SELECT tid
+                FROM transaction
+                WHERE empty = %(TRUE)s
+                )
+            """,
     }
 
-
     def _run_script_stmt(self, cursor, generic_stmt, generic_params=()):
         """Execute a statement from a script with the given parameters.
 
@@ -185,7 +219,7 @@
         """Iterate over the transactions in the given range, oldest first.
 
         Includes packed transactions.
-        Yields (tid, packed, username, description, extension)
+        Yields (tid, username, description, extension, packed)
         for each transaction.
         """
         stmt = """
@@ -740,6 +774,10 @@
 
                 log.info("pack: will pack %d transaction(s)", len(tid_rows))
 
+                stmt = self._scripts['create_temp_pack_visit']
+                if stmt:
+                    self._run_script(cursor, stmt)
+
                 # Hold the commit lock while packing to prevent deadlocks.
                 # Pack in small batches of transactions in order to minimize
                 # the interruption of concurrent write operations.
@@ -785,22 +823,18 @@
             has_removable):
         """Pack one transaction.  Requires populated pack tables."""
         log.debug("pack: transaction %d: packing", tid)
-        counters = {}
+        removed_objects = 0
+        removed_states = 0
 
         if has_removable:
-            for _table in ('current_object', 'object_state'):
-                stmt = """
-                DELETE FROM _table
-                WHERE tid = %(tid)s
-                    AND zoid IN (
-                        SELECT pack_state.zoid
-                        FROM pack_state
-                        WHERE pack_state.tid = %(tid)s
-                    )
-                """.replace('_table', _table)
-                self._run_script_stmt(cursor, stmt, {'tid': tid})
-                counters[_table] = cursor.rowcount
+            stmt = self._scripts['pack_current_object']
+            self._run_script_stmt(cursor, stmt, {'tid': tid})
+            removed_objects = cursor.rowcount
 
+            stmt = self._scripts['pack_object_state']
+            self._run_script_stmt(cursor, stmt, {'tid': tid})
+            removed_states = cursor.rowcount
+
             # Terminate prev_tid chains
             stmt = """
             UPDATE object_state SET prev_tid = 0
@@ -828,9 +862,7 @@
 
         log.debug(
             "pack: transaction %d (%s): removed %d object(s) and %d state(s)",
-            tid, state,
-            counters.get('current_object', 0),
-            counters.get('object_state', 0))
+            tid, state, removed_objects, removed_states)
 
 
     def _pack_cleanup(self, conn, cursor):
@@ -839,7 +871,13 @@
         conn.commit()
         self._release_commit_lock(cursor)
         self._hold_commit_lock(cursor)
-        log.info("pack: removing empty packed transactions")
+        log.info("pack: cleaning up")
+
+        log.debug("pack: removing unused object references")
+        stmt = self._scripts['pack_object_ref']
+        self._run_script(cursor, stmt)
+
+        log.debug("pack: removing empty packed transactions")
         stmt = """
         DELETE FROM transaction
         WHERE packed = %(TRUE)s
@@ -856,28 +894,7 @@
             stmt = '%(TRUNCATE)s ' + _table
             self._run_script_stmt(cursor, stmt)
 
-        log.debug("pack: removing unused object references")
-        stmt = """
-        DELETE FROM object_ref
-        WHERE tid IN (
-            SELECT tid
-            FROM transaction
-            WHERE empty = %(TRUE)s
-            )
-        """
-        self._run_script_stmt(cursor, stmt)
 
-        stmt = """
-        DELETE FROM object_refs_added
-        WHERE tid IN (
-            SELECT tid
-            FROM transaction
-            WHERE empty = %(TRUE)s
-            )
-        """
-        self._run_script_stmt(cursor, stmt)
-
-
     def poll_invalidations(self, conn, cursor, prev_polled_tid, ignore_tid):
         """Polls for new transactions.
 
@@ -902,7 +919,8 @@
             return (), new_polled_tid
 
         stmt = "SELECT 1 FROM transaction WHERE tid = %(tid)s"
-        cursor.execute(stmt % self._script_vars, {'tid': prev_polled_tid})
+        cursor.execute(intern(stmt % self._script_vars),
+            {'tid': prev_polled_tid})
         rows = cursor.fetchall()
         if not rows:
             # Transaction not found; perhaps it has been packed.
@@ -916,10 +934,11 @@
         WHERE tid > %(tid)s
         """
         if ignore_tid is None:
-            cursor.execute(stmt % self._script_vars, {'tid': prev_polled_tid})
+            cursor.execute(intern(stmt % self._script_vars),
+                {'tid': prev_polled_tid})
         else:
             stmt += " AND tid != %(self_tid)s"
-            cursor.execute(stmt % self._script_vars,
+            cursor.execute(intern(stmt % self._script_vars),
                 {'tid': prev_polled_tid, 'self_tid': ignore_tid})
         oids = [oid for (oid,) in cursor]
 

Modified: relstorage/trunk/relstorage/adapters/mysql.py
===================================================================
--- relstorage/trunk/relstorage/adapters/mysql.py	2008-12-22 17:59:40 UTC (rev 94250)
+++ relstorage/trunk/relstorage/adapters/mysql.py	2008-12-22 21:16:54 UTC (rev 94251)
@@ -67,33 +67,62 @@
     # Work around a MySQL performance bug by avoiding an expensive subquery.
     # See: http://mail.zope.org/pipermail/zodb-dev/2008-May/011880.html
     #      http://bugs.mysql.com/bug.php?id=28257
-    _scripts['create_temp_pack_visit'] = """
-    CREATE TEMPORARY TABLE temp_pack_visit (
-        zoid BIGINT NOT NULL
-    );
-    CREATE UNIQUE INDEX temp_pack_visit_zoid ON temp_pack_visit (zoid);
-    CREATE TEMPORARY TABLE temp_pack_child (
-        zoid BIGINT NOT NULL
-    );
-    CREATE UNIQUE INDEX temp_pack_child_zoid ON temp_pack_child (zoid);
-    """
+    _scripts.update({
+        'create_temp_pack_visit': """
+            CREATE TEMPORARY TABLE temp_pack_visit (
+                zoid BIGINT NOT NULL
+            );
+            CREATE UNIQUE INDEX temp_pack_visit_zoid ON temp_pack_visit (zoid);
+            CREATE TEMPORARY TABLE temp_pack_child (
+                zoid BIGINT NOT NULL
+            );
+            CREATE UNIQUE INDEX temp_pack_child_zoid ON temp_pack_child (zoid);
+            """,
 
-    # Note: UPDATE must be the last statement in the script
-    # because it returns a value.
-    _scripts['prepack_follow_child_refs'] = """
-    %(TRUNCATE)s temp_pack_child;
+        # Note: UPDATE must be the last statement in the script
+        # because it returns a value.
+        'prepack_follow_child_refs': """
+            %(TRUNCATE)s temp_pack_child;
 
-    INSERT INTO temp_pack_child
-    SELECT DISTINCT to_zoid
-    FROM object_ref
-        JOIN temp_pack_visit USING (zoid);
+            INSERT INTO temp_pack_child
+            SELECT DISTINCT to_zoid
+            FROM object_ref
+                JOIN temp_pack_visit USING (zoid);
 
-    -- MySQL-specific syntax for table join in update
-    UPDATE pack_object, temp_pack_child SET keep = %(TRUE)s
-    WHERE keep = %(FALSE)s
-        AND pack_object.zoid = temp_pack_child.zoid;
-    """
+            -- MySQL-specific syntax for table join in update
+            UPDATE pack_object, temp_pack_child SET keep = %(TRUE)s
+            WHERE keep = %(FALSE)s
+                AND pack_object.zoid = temp_pack_child.zoid;
+            """,
 
+        # MySQL optimizes deletion far better when using a join syntax.
+        'pack_current_object': """
+            DELETE FROM current_object
+            USING current_object
+                JOIN pack_state USING (zoid, tid)
+            WHERE current_object.tid = %(tid)s
+            """,
+
+        'pack_object_state': """
+            DELETE FROM object_state
+            USING object_state
+                JOIN pack_state USING (zoid, tid)
+            WHERE object_state.tid = %(tid)s
+            """,
+
+        'pack_object_ref': """
+            DELETE FROM object_refs_added
+            USING object_refs_added
+                JOIN transaction USING (tid)
+            WHERE transaction.empty = true;
+
+            DELETE FROM object_ref
+            USING object_ref
+                JOIN transaction USING (tid)
+            WHERE transaction.empty = true
+            """,
+        })
+
     def __init__(self, **params):
         self._params = params.copy()
 
@@ -142,6 +171,7 @@
             tid         BIGINT NOT NULL,
             FOREIGN KEY (zoid, tid) REFERENCES object_state (zoid, tid)
         ) ENGINE = InnoDB;
+        CREATE INDEX current_object_tid ON current_object (tid);
 
         -- A list of referenced OIDs from each object_state.
         -- This table is populated as needed during packing.

Modified: relstorage/trunk/relstorage/adapters/oracle.py
===================================================================
--- relstorage/trunk/relstorage/adapters/oracle.py	2008-12-22 17:59:40 UTC (rev 94250)
+++ relstorage/trunk/relstorage/adapters/oracle.py	2008-12-22 21:16:54 UTC (rev 94251)
@@ -61,7 +61,8 @@
         'max_tid':      ':max_tid',
     }
 
-    _scripts = {
+    _scripts = Adapter._scripts.copy()
+    _scripts.update({
         'choose_pack_transaction': """
             SELECT MAX(tid)
             FROM transaction
@@ -79,11 +80,8 @@
             FROM object_state
             WHERE tid = %(tid)s
             """,
+    })
 
-        'prepack_follow_child_refs':
-            Adapter._scripts['prepack_follow_child_refs'],
-    }
-
     def __init__(self, user, password, dsn, twophase=False, arraysize=64,
             use_inline_lobs=None):
         """Create an Oracle adapter.

Modified: relstorage/trunk/relstorage/relstorage.py
===================================================================
--- relstorage/trunk/relstorage/relstorage.py	2008-12-22 17:59:40 UTC (rev 94250)
+++ relstorage/trunk/relstorage/relstorage.py	2008-12-22 21:16:54 UTC (rev 94251)
@@ -834,6 +834,9 @@
                     # Nothing needs to be packed.
                     return
 
+                s = time.ctime(TimeStamp(p64(tid_int)).timeTime())
+                log.info("packing transactions committed %s or before", s)
+
                 # In pre_pack, the adapter fills tables with
                 # information about what to pack.  The adapter
                 # should not actually pack anything yet.



More information about the Checkins mailing list