[Checkins] SVN: relstorage/trunk/ The new graph traversal using Python sets is much faster. Use it exclusively.

Shane Hathaway shane at hathawaymix.org
Fri Feb 4 02:31:47 EST 2011


Log message for revision 120089:
  The new graph traversal using Python sets is much faster.  Use it exclusively.
  

Changed:
  U   relstorage/trunk/CHANGES.txt
  U   relstorage/trunk/relstorage/adapters/packundo.py
  U   relstorage/trunk/relstorage/options.py

-=-
Modified: relstorage/trunk/CHANGES.txt
===================================================================
--- relstorage/trunk/CHANGES.txt	2011-02-03 16:28:56 UTC (rev 120088)
+++ relstorage/trunk/CHANGES.txt	2011-02-04 07:31:46 UTC (rev 120089)
@@ -10,9 +10,12 @@
 - Added a --dry-run option to zodbpack.  Set it to force a dry run
   of packing.
 
-- Improved the efficiency of packing databases that are both broad
-  and deep.
+- Replaced the object traversal portion of the pack code with
+  a more efficient implementation using Python sets (instead of SQL).
+  The new code is much faster for packing databases with deeply
+  nested objects.
 
+
 1.5.0a1 (2010-10-21)
 --------------------
 

Modified: relstorage/trunk/relstorage/adapters/packundo.py
===================================================================
--- relstorage/trunk/relstorage/adapters/packundo.py	2011-02-03 16:28:56 UTC (rev 120088)
+++ relstorage/trunk/relstorage/adapters/packundo.py	2011-02-04 07:31:46 UTC (rev 120089)
@@ -52,20 +52,10 @@
             self.connmanager.close(conn, cursor)
 
     def _traverse_graph(self, cursor):
-        """Visit the entire object graph and set the pack_object.keep flags.
+        """Visit the entire object graph to find out what should be kept.
 
-        Dispatches to the implementation specified in
-        self.options.pack_gc_traversal.
+        Sets the pack_object.keep flags.
         """
-        m = getattr(self,
-            '_traverse_graph_%s' % self.options.pack_gc_traversal)
-        m(cursor)
-
-    def _traverse_graph_python(self, cursor):
-        """Visit the entire object graph and set the pack_object.keep flags.
-
-        Python implementation.
-        """
         log.info("pre_pack: downloading pack_object and object_ref.")
 
         # Download the list of root objects to keep from pack_object.
@@ -104,7 +94,8 @@
 
         # Traverse the object graph.  Add all of the reachable OIDs
         # to keep_set.
-        log.info("pre_pack: traversing the object graph.")
+        log.info("pre_pack: traversing the object graph "
+            "to find reachable objects.")
         parents = set()
         parents.update(keep_set)
         pass_num = 0
@@ -143,67 +134,6 @@
         if batch:
             upload_batch()
 
-    def _traverse_graph_sql(self, cursor):
-        """Visit the entire object graph and set the pack_object.keep flags.
-
-        SQL implementation.
-        """
-        # Each of the objects to be kept might refer to other objects.
-        # Mark the referenced objects to be kept as well. Do this
-        # repeatedly until all references have been satisfied.
-        pass_num = 1
-        while True:
-            log.info("pre_pack: following references, pass %d", pass_num)
-
-            # Make a list of all parent objects that still need to be
-            # visited. Then set pack_object.visited for all pack_object
-            # rows with keep = true.
-            stmt = """
-            %(TRUNCATE)s temp_pack_visit;
-
-            INSERT INTO temp_pack_visit (zoid, keep_tid)
-            SELECT zoid, keep_tid
-            FROM pack_object
-            WHERE keep = %(TRUE)s
-                AND visited = %(FALSE)s;
-
-            UPDATE pack_object SET visited = %(TRUE)s
-            WHERE keep = %(TRUE)s
-                AND visited = %(FALSE)s
-            """
-            self.runner.run_script(cursor, stmt)
-            visit_count = cursor.rowcount
-
-            if self.verify_sane_database:
-                # Verify the update actually worked.
-                # MySQL 5.1.23 fails this test; 5.1.24 passes.
-                stmt = """
-                SELECT 1
-                FROM pack_object
-                WHERE keep = %(TRUE)s AND visited = %(FALSE)s
-                """
-                self.runner.run_script_stmt(cursor, stmt)
-                if list(cursor):
-                    raise AssertionError(
-                        "database failed to update pack_object")
-
-            log.debug("pre_pack: checking references from %d object(s)",
-                visit_count)
-
-            # Visit the children of all parent objects that were
-            # just visited.
-            stmt = self._script_pre_pack_follow_child_refs
-            self.runner.run_script(cursor, stmt)
-            found_count = cursor.rowcount
-
-            log.debug("pre_pack: found %d more referenced object(s) in "
-                "pass %d", found_count, pass_num)
-            if not found_count:
-                # No new references detected.
-                break
-            else:
-                pass_num += 1
-
     def _pause_pack(self, sleep, start):
         """Pause packing to allow concurrent commits."""
         if sleep is None:
@@ -246,17 +176,6 @@
         CREATE INDEX temp_pack_keep_tid ON temp_pack_visit (keep_tid)
         """
 
-    _script_pre_pack_follow_child_refs = """
-        UPDATE pack_object SET keep = %(TRUE)s
-        WHERE keep = %(FALSE)s
-            AND zoid IN (
-                SELECT DISTINCT to_zoid
-                FROM object_ref
-                    JOIN temp_pack_visit USING (zoid)
-                WHERE object_ref.tid >= temp_pack_visit.keep_tid
-            )
-        """
-
     _script_create_temp_undo = """
         CREATE TEMPORARY TABLE temp_undo (
             zoid BIGINT NOT NULL,
@@ -447,7 +366,7 @@
             self.on_filling_object_refs()
             tid_count = len(tids)
             txns_done = 0
-            log.info("analyzing references from objects in %d "
+            log.info("analyzing references from objects in %d new "
                 "transaction(s)", tid_count)
             for tid in tids:
                 self._add_refs_for_tid(cursor, tid, get_references)
@@ -856,29 +775,8 @@
         );
         CREATE UNIQUE INDEX temp_pack_visit_zoid ON temp_pack_visit (zoid);
         CREATE INDEX temp_pack_keep_tid ON temp_pack_visit (keep_tid);
-        CREATE TEMPORARY TABLE temp_pack_child (
-            zoid BIGINT UNSIGNED NOT NULL
-        );
-        CREATE UNIQUE INDEX temp_pack_child_zoid ON temp_pack_child (zoid);
         """
 
-    # Note: UPDATE must be the last statement in the script
-    # because it returns a value.
-    _script_pre_pack_follow_child_refs = """
-        %(TRUNCATE)s temp_pack_child;
-
-        INSERT IGNORE INTO temp_pack_child
-        SELECT to_zoid
-        FROM object_ref
-            JOIN temp_pack_visit USING (zoid)
-        WHERE object_ref.tid >= temp_pack_visit.keep_tid;
-
-        -- MySQL-specific syntax for table join in update
-        UPDATE pack_object, temp_pack_child SET keep = %(TRUE)s
-        WHERE keep = %(FALSE)s
-            AND pack_object.zoid = temp_pack_child.zoid;
-        """
-
     # MySQL optimizes deletion far better when using a join syntax.
     _script_pack_current_object = """
         DELETE FROM current_object
@@ -959,16 +857,6 @@
         CREATE INDEX temp_pack_keep_tid ON temp_pack_visit (keep_tid)
         """
 
-    _script_pre_pack_follow_child_refs = """
-        UPDATE pack_object SET keep = %(TRUE)s
-        WHERE keep = %(FALSE)s
-            AND zoid IN (
-                SELECT DISTINCT to_zoid
-                FROM object_ref
-                    JOIN temp_pack_visit USING (zoid)
-            )
-        """
-
     def verify_undoable(self, cursor, undo_tid):
         """Raise UndoError if it is not safe to undo the specified txn."""
         raise UndoError("Undo is not supported by this storage")
@@ -1259,29 +1147,9 @@
             keep_tid BIGINT UNSIGNED NOT NULL
         );
         CREATE UNIQUE INDEX temp_pack_visit_zoid ON temp_pack_visit (zoid);
-        CREATE TEMPORARY TABLE temp_pack_child (
-            zoid BIGINT UNSIGNED NOT NULL
-        );
-        CREATE UNIQUE INDEX temp_pack_child_zoid ON temp_pack_child (zoid);
         """
 
-    # Note: UPDATE must be the last statement in the script
-    # because it returns a value.
-    _script_pre_pack_follow_child_refs = """
-        %(TRUNCATE)s temp_pack_child;
 
-        INSERT IGNORE INTO temp_pack_child
-        SELECT to_zoid
-        FROM object_ref
-            JOIN temp_pack_visit USING (zoid);
-
-        -- MySQL-specific syntax for table join in update
-        UPDATE pack_object, temp_pack_child SET keep = %(TRUE)s
-        WHERE keep = %(FALSE)s
-            AND pack_object.zoid = temp_pack_child.zoid;
-        """
-
-
 class OracleHistoryFreePackUndo(HistoryFreePackUndo):
 
     _script_choose_pack_transaction = """

Modified: relstorage/trunk/relstorage/options.py
===================================================================
--- relstorage/trunk/relstorage/options.py	2011-02-03 16:28:56 UTC (rev 120088)
+++ relstorage/trunk/relstorage/options.py	2011-02-04 07:31:46 UTC (rev 120089)
@@ -37,7 +37,6 @@
         self.replica_timeout = 600.0
         self.poll_interval = 0
         self.pack_gc = True
-        self.pack_gc_traversal = 'python'
         self.pack_dry_run = False
         self.pack_batch_timeout = 5.0
         self.pack_duty_cycle = 0.5



More information about the checkins mailing list