[Checkins] SVN: relstorage/trunk/ Added logging and periodic commits to the pack code. This should help diagnose packing speed issues.

Shane Hathaway shane at hathawaymix.org
Sat Mar 29 18:13:49 EDT 2008


Log message for revision 85014:
  Added logging and periodic commits to the pack code.  This should help diagnose packing speed issues.
  

Changed:
  U   relstorage/trunk/CHANGELOG.txt
  U   relstorage/trunk/relstorage/adapters/common.py
  U   relstorage/trunk/relstorage/adapters/mysql.py

-=-
Modified: relstorage/trunk/CHANGELOG.txt
===================================================================
--- relstorage/trunk/CHANGELOG.txt	2008-03-29 18:50:37 UTC (rev 85013)
+++ relstorage/trunk/CHANGELOG.txt	2008-03-29 22:13:49 UTC (rev 85014)
@@ -6,7 +6,12 @@
   an issue with converting databases.  Thanks to Kevin Smith for
   discovering this.
 
+- Added logging to the pack code to help diagnose performance issues.
 
+- Additions to the object_ref table are now periodically committed
+  during pre_pack so that the work is not lost if pre_pack fails.
+
+
 RelStorage 1.0.1
 
 - The speedtest script failed if run on a test database that has no tables.

Modified: relstorage/trunk/relstorage/adapters/common.py
===================================================================
--- relstorage/trunk/relstorage/adapters/common.py	2008-03-29 18:50:37 UTC (rev 85013)
+++ relstorage/trunk/relstorage/adapters/common.py	2008-03-29 22:13:49 UTC (rev 85014)
@@ -362,11 +362,18 @@
             self.close(conn, cursor)
 
 
+    def open_for_pre_pack(self):
+        """Open a connection to be used for the pre-pack phase.
+        Returns (conn, cursor).
+
+        Subclasses may override this.
+        """
+        return self.open()
+
+
     def pre_pack(self, pack_tid, get_references, gc):
         """Decide what to pack.
 
-        Subclasses may override this.
-
         tid specifies the most recent transaction to pack.
 
         get_references is a function that accepts a pickled state and
@@ -377,23 +384,29 @@
         even if nothing refers to it.  Packing with gc disabled can be
         much faster.
         """
-        conn, cursor = self.open()
+        conn, cursor = self.open_for_pre_pack()
         try:
             try:
                 if gc:
-                    self._pre_pack_with_gc(cursor, pack_tid, get_references)
+                    log.info("pre_pack: start with gc enabled")
+                    self._pre_pack_with_gc(
+                        conn, cursor, pack_tid, get_references)
                 else:
-                    self._pre_pack_without_gc(cursor, pack_tid)
+                    log.info("pre_pack: start without gc")
+                    self._pre_pack_without_gc(
+                        conn, cursor, pack_tid)
             except:
+                log.exception("pre_pack: failed")
                 conn.rollback()
                 raise
             else:
+                log.info("pre_pack: finished successfully")
                 conn.commit()
         finally:
             self.close(conn, cursor)
 
 
-    def _pre_pack_without_gc(self, cursor, pack_tid):
+    def _pre_pack_without_gc(self, conn, cursor, pack_tid):
         """Determine what to pack, without garbage collection.
 
         With garbage collection disabled, there is no need to follow
@@ -401,6 +414,8 @@
         """
         # Fill the pack_object table with OIDs, but configure them
         # all to be kept by setting keep and keep_tid.
+        log.debug("pre_pack: populating pack_object")
+        subselect = self._scripts['select_keep_tid']
         stmt = """
         DELETE FROM pack_object;
 
@@ -409,20 +424,20 @@
         FROM object_state
         WHERE tid <= %(pack_tid)s;
 
-        UPDATE pack_object SET keep_tid = (@select_keep_tid@)
+        UPDATE pack_object SET keep_tid = (""" + subselect + """)
         """
-        stmt = stmt.replace(
-            '@select_keep_tid@', self._scripts['select_keep_tid'])
         self._run_script(cursor, stmt, {'pack_tid': pack_tid})
 
 
-    def _pre_pack_with_gc(self, cursor, pack_tid, get_references):
+    def _pre_pack_with_gc(self, conn, cursor, pack_tid, get_references):
         """Determine what to pack, with garbage collection.
         """
+        log.info("pre_pack: following references after the pack point")
         # Fill object_ref with references from object states
         # in transactions that will not be packed.
-        self._fill_nonpacked_refs(cursor, pack_tid, get_references)
+        self._fill_nonpacked_refs(conn, cursor, pack_tid, get_references)
 
+        log.debug("pre_pack: populating pack_object")
         # Fill the pack_object table with OIDs that either will be
         # removed (if nothing references the OID) or whose history will
         # be cut.
@@ -468,13 +483,17 @@
         # include objects currently set to be removed, keep
         # those objects as well.  Do this
         # repeatedly until all references have been satisfied.
+        pass_num = 1
         while True:
+            log.info("pre_pack: following references before the pack point, "
+                "pass %d", pass_num)
 
             # Make a list of all parent objects that still need
             # to be visited.  Then set keep_tid for all pack_object
             # rows with keep = true.
             # keep_tid must be set before _fill_pack_object_refs examines
             # references.
+            subselect = self._scripts['select_keep_tid']
             stmt = """
             DELETE FROM temp_pack_visit;
 
@@ -484,14 +503,17 @@
             WHERE keep = %(TRUE)s
                 AND keep_tid IS NULL;
 
-            UPDATE pack_object SET keep_tid = (@select_keep_tid@)
-            WHERE keep = %(TRUE)s AND keep_tid IS NULL
+            UPDATE pack_object SET keep_tid = (""" + subselect + """)
+            WHERE keep = %(TRUE)s AND keep_tid IS NULL;
+
+            SELECT COUNT(1) FROM temp_pack_visit
             """
-            stmt = stmt.replace(
-                '@select_keep_tid@', self._scripts['select_keep_tid'])
             self._run_script(cursor, stmt, {'pack_tid': pack_tid})
+            visit_count = cursor.fetchone()[0]
+            log.debug("pre_pack: checking references from %d object(s)",
+                visit_count)
 
-            self._fill_pack_object_refs(cursor, get_references)
+            self._fill_pack_object_refs(conn, cursor, get_references)
 
             # Visit the children of all parent objects that were
             # just visited.
@@ -505,12 +527,18 @@
                 )
             """
             self._run_script_stmt(cursor, stmt)
-            if not cursor.rowcount:
+            found_count = cursor.rowcount
+
+            log.info("pre_pack: found %d more referenced object(s) in "
+                "pass %d", found_count, pass_num)
+            if not found_count:
                 # No new references detected.
                 break
+            else:
+                pass_num += 1
 
 
-    def _fill_nonpacked_refs(self, cursor, pack_tid, get_references):
+    def _fill_nonpacked_refs(self, conn, cursor, pack_tid, get_references):
         """Fill object_ref for all transactions that will not be packed."""
         stmt = """
         SELECT DISTINCT tid
@@ -523,11 +551,11 @@
             )
         """
         self._run_script_stmt(cursor, stmt, {'pack_tid': pack_tid})
-        for (tid,) in cursor.fetchall():
-            self._add_refs_for_tid(cursor, tid, get_references)
+        tids = [tid for (tid,) in cursor]
+        self._add_refs_for_tids(conn, cursor, tids, get_references)
 
 
-    def _fill_pack_object_refs(self, cursor, get_references):
+    def _fill_pack_object_refs(self, conn, cursor, get_references):
         """Fill object_ref for all pack_object rows that have keep_tid."""
         stmt = """
         SELECT DISTINCT keep_tid
@@ -540,8 +568,8 @@
             )
         """
         cursor.execute(stmt)
-        for (tid,) in cursor.fetchall():
-            self._add_refs_for_tid(cursor, tid, get_references)
+        tids = [tid for (tid,) in cursor]
+        self._add_refs_for_tids(conn, cursor, tids, get_references)
 
 
     def _add_object_ref_rows(self, cursor, add_rows):
@@ -560,7 +588,12 @@
 
     def _add_refs_for_tid(self, cursor, tid, get_references):
         """Fill object_refs with all states for a transaction.
+
+        Returns the number of references added.
         """
+        log.debug("pre_pack: transaction %d: computing references ", tid)
+        from_count = 0
+
         stmt = """
         SELECT zoid, state
         FROM object_state
@@ -574,6 +607,7 @@
                 # Oracle
                 state = state.read()
             if state:
+                from_count += 1
                 to_oids = get_references(str(state))
                 for to_oid in to_oids:
                     add_rows.append((from_oid, tid, to_oid))
@@ -588,7 +622,26 @@
         """
         self._run_script_stmt(cursor, stmt, {'tid': tid})
 
+        to_count = len(add_rows)
+        log.debug("pre_pack: transaction %d: has %d reference(s) "
+            "from %d object(s)", tid, to_count, from_count)
+        return to_count
 
+
+    def _add_refs_for_tids(self, conn, cursor, tids, get_references):
+        """Fill object_refs with all states for multiple transactions."""
+        if tids:
+            added = 0
+            log.info("pre_pack: examining all references from objects in %d "
+                "transaction(s)" % len(tids))
+            for tid in tids:
+                added += self._add_refs_for_tid(cursor, tid, get_references)
+                if added >= 1000:
+                    # save the work done so far
+                    conn.commit()
+                    added = 0
+
+
     def _hold_commit_lock(self, cursor):
         """Hold the commit lock for packing"""
         cursor.execute("LOCK TABLE commit_lock IN EXCLUSIVE MODE")
@@ -601,26 +654,61 @@
         conn, cursor = self.open()
         try:
             try:
+                log.info("pack: start, pack_tid = %d", pack_tid)
+
+                stmt = """
+                select COUNT(1)
+                FROM transaction
+                WHERE tid > 0
+                    AND tid <= %(pack_tid)s
+                """
+                self._run_script_stmt(cursor, stmt, {'pack_tid': pack_tid})
+                transaction_count = cursor.fetchone()[0]
+
+                stmt = """
+                SELECT COUNT(1)
+                FROM pack_object
+                WHERE keep = %(FALSE)s
+                """
+                self._run_script_stmt(cursor, stmt)
+                delete_count = cursor.fetchone()[0]
+
+                stmt = """
+                SELECT COUNT(1)
+                FROM pack_object
+                WHERE keep = %(TRUE)s
+                """
+                self._run_script_stmt(cursor, stmt)
+                trim_count = cursor.fetchone()[0]
+
+                log.info(
+                    "pack: will pack %d transaction(s), delete %s object(s),"
+                        " and trim %s old object(s)",
+                        transaction_count, delete_count, trim_count)
+
                 # hold the commit lock for a moment to prevent deadlocks.
                 self._hold_commit_lock(cursor)
 
                 for table in ('object_ref', 'current_object', 'object_state'):
 
-                    # Remove objects that are in pack_object and have keep
-                    # set to false.
-                    stmt = """
-                    DELETE FROM %s
-                    WHERE zoid IN (
-                            SELECT zoid
-                            FROM pack_object
-                            WHERE keep = %%(FALSE)s
-                        )
-                    """ % table
-                    self._run_script_stmt(cursor, stmt)
+                    if delete_count > 0:
+                        # Remove objects that are in pack_object and have keep
+                        # set to false.
+                        log.debug("pack: deleting objects from %s", table)
+                        stmt = """
+                        DELETE FROM %s
+                        WHERE zoid IN (
+                                SELECT zoid
+                                FROM pack_object
+                                WHERE keep = %%(FALSE)s
+                            )
+                        """ % table
+                        self._run_script_stmt(cursor, stmt)
 
-                    if table != 'current_object':
+                    if trim_count > 0 and table != 'current_object':
                         # Cut the history of objects in pack_object that
                         # have keep set to true.
+                        log.debug("pack: trimming objects in %s", table)
                         stmt = """
                         DELETE FROM %s
                         WHERE zoid IN (
@@ -636,14 +724,18 @@
                         """ % (table, table)
                         self._run_script_stmt(cursor, stmt)
 
+                log.debug("pack: terminating prev_tid chains")
                 stmt = """
-                -- Terminate prev_tid chains
                 UPDATE object_state SET prev_tid = 0
                 WHERE tid <= %(pack_tid)s
-                    AND prev_tid != 0;
+                    AND prev_tid != 0
+                """
+                self._run_script_stmt(cursor, stmt, {'pack_tid': pack_tid})
 
-                -- For each tid to be removed, delete the corresponding row in
-                -- object_refs_added.
+                # For each tid to be removed, delete the corresponding row in
+                # object_refs_added.
+                log.debug("pack: deleting from object_refs_added")
+                stmt = """
                 DELETE FROM object_refs_added
                 WHERE tid > 0
                     AND tid <= %(pack_tid)s
@@ -651,9 +743,12 @@
                         SELECT 1
                         FROM object_state
                         WHERE tid = object_refs_added.tid
-                    );
+                    )
+                """
+                self._run_script_stmt(cursor, stmt, {'pack_tid': pack_tid})
 
-                -- Delete transactions no longer used.
+                log.debug("pack: deleting transactions")
+                stmt = """
                 DELETE FROM transaction
                 WHERE tid > 0
                     AND tid <= %(pack_tid)s
@@ -661,24 +756,29 @@
                         SELECT 1
                         FROM object_state
                         WHERE tid = transaction.tid
-                    );
+                    )
+                """
+                self._run_script_stmt(cursor, stmt, {'pack_tid': pack_tid})
 
-                -- Mark the remaining packable transactions as packed
+                log.debug("pack: marking transactions as packed")
+                stmt = """
                 UPDATE transaction SET packed = %(TRUE)s
                 WHERE tid > 0
                     AND tid <= %(pack_tid)s
-                    AND packed = %(FALSE)s;
-
-                -- Clean up.
-                DELETE FROM pack_object;
+                    AND packed = %(FALSE)s
                 """
-                self._run_script(cursor, stmt, {'pack_tid': pack_tid})
+                self._run_script_stmt(cursor, stmt, {'pack_tid': pack_tid})
 
+                log.debug("pack: clearing pack_object")
+                cursor.execute("DELETE FROM pack_object")
+
             except:
+                log.exception("pack: failed")
                 conn.rollback()
                 raise
 
             else:
+                log.info("pack: finished successfully")
                 conn.commit()
 
         finally:

Modified: relstorage/trunk/relstorage/adapters/mysql.py
===================================================================
--- relstorage/trunk/relstorage/adapters/mysql.py	2008-03-29 18:50:37 UTC (rev 85013)
+++ relstorage/trunk/relstorage/adapters/mysql.py	2008-03-29 22:13:49 UTC (rev 85014)
@@ -543,8 +543,9 @@
         cursor.execute(stmt)
 
 
-    def pre_pack(self, pack_tid, get_references, gc):
-        """Decide what to pack.
+    def open_for_pre_pack(self):
+        """Open a connection to be used for the pre-pack phase.
+        Returns (conn, cursor).
 
         This overrides the method by the same name in common.Adapter.
         """
@@ -553,12 +554,10 @@
             # This phase of packing works best with transactions
             # disabled.  It changes no user-facing data.
             conn.autocommit(True)
-            if gc:
-                self._pre_pack_with_gc(cursor, pack_tid, get_references)
-            else:
-                self._pre_pack_without_gc(cursor, pack_tid)
-        finally:
+            return conn, cursor
+        except:
             self.close(conn, cursor)
+            raise
 
 
     def _hold_commit_lock(self, cursor):



More information about the Checkins mailing list