[Checkins] SVN: relstorage/trunk/ The pack code was not removing as many objects as it should with GC on.

Shane Hathaway shane at hathawaymix.org
Sat Jan 24 15:44:19 EST 2009


Log message for revision 94984:
  The pack code was not removing as many objects as it should with GC on.
  
  This happened because the pack code was following references from all 
  object states in the object_ref table, when it should have been 
  following references only from states that will exist after the pack.
  
  The existence of this bug was obscured by an optimization that 
  populated only the necessary portion of the object_ref table.  This 
  suggests the optimization was a bad choice.  As it turns out, the
  object_ref table could be very useful for debugging, and 
  populating it is pretty easy, so now we populate the entire object_ref 
  table before packing.
  
  

Changed:
  U   relstorage/trunk/CHANGES.txt
  A   relstorage/trunk/notes/migrate-to-1.1.2.txt
  U   relstorage/trunk/relstorage/adapters/common.py
  U   relstorage/trunk/relstorage/adapters/mysql.py
  U   relstorage/trunk/relstorage/adapters/oracle.py
  U   relstorage/trunk/relstorage/adapters/postgresql.py
  U   relstorage/trunk/relstorage/tests/reltestbase.py

-=-
Modified: relstorage/trunk/CHANGES.txt
===================================================================
--- relstorage/trunk/CHANGES.txt	2009-01-24 15:24:05 UTC (rev 94983)
+++ relstorage/trunk/CHANGES.txt	2009-01-24 20:44:19 UTC (rev 94984)
@@ -1,5 +1,10 @@
 Next Release
 
+- Refined the pack algorithm.  It was not removing as many object states
+  as it should have.  As a bonus, there is now a method of adapters called
+  fill_object_refs(), which could be useful for debugging.  It ensures the
+  object_ref table is fully populated.
+
 - Began using zc.buildout for development.
 
 

Added: relstorage/trunk/notes/migrate-to-1.1.2.txt
===================================================================
--- relstorage/trunk/notes/migrate-to-1.1.2.txt	                        (rev 0)
+++ relstorage/trunk/notes/migrate-to-1.1.2.txt	2009-01-24 20:44:19 UTC (rev 94984)
@@ -0,0 +1,13 @@
+
+Migrating from RelStorage version 1.1.1 to version 1.1.2
+
+Before following these directions, first upgrade to the schema of
+RelStorage version 1.1.1 by following the directions in "migrate-to-1.1.1.txt".
+
+Only Oracle needs a schema update for this release:
+
+    DROP TABLE temp_pack_visit;
+    CREATE GLOBAL TEMPORARY TABLE temp_pack_visit (
+        zoid        NUMBER(20) NOT NULL PRIMARY KEY,
+        keep_tid    NUMBER(20)
+    );

Modified: relstorage/trunk/relstorage/adapters/common.py
===================================================================
--- relstorage/trunk/relstorage/adapters/common.py	2009-01-24 15:24:05 UTC (rev 94983)
+++ relstorage/trunk/relstorage/adapters/common.py	2009-01-24 20:44:19 UTC (rev 94984)
@@ -67,7 +67,8 @@
 
         'create_temp_pack_visit': """
             CREATE TEMPORARY TABLE temp_pack_visit (
-                zoid BIGINT NOT NULL
+                zoid BIGINT NOT NULL,
+                keep_tid BIGINT
             );
             CREATE UNIQUE INDEX temp_pack_visit_zoid ON temp_pack_visit (zoid)
             """,
@@ -96,6 +97,7 @@
                     SELECT DISTINCT to_zoid
                     FROM object_ref
                         JOIN temp_pack_visit USING (zoid)
+                    WHERE object_ref.tid >= temp_pack_visit.keep_tid
                 )
             """,
 
@@ -528,12 +530,9 @@
         if stmt:
             self._run_script(cursor, stmt)
 
-        log.info("pre_pack: following references after the pack point")
-        # Fill object_ref with references from object states
-        # in transactions that will not be packed.
-        self._fill_nonpacked_refs(conn, cursor, pack_tid, get_references)
+        self.fill_object_refs(conn, cursor, get_references)
 
-        log.debug("pre_pack: populating pack_object")
+        log.info("pre_pack: filling the pack_object table")
         # Fill the pack_object table with OIDs that either will be
         # removed (if nothing references the OID) or whose history will
         # be cut.
@@ -561,7 +560,7 @@
         -- Keep objects that are still referenced by object states in
         -- transactions that will not be packed.
         -- Use temp_pack_visit for temporary state; otherwise MySQL 5 chokes.
-        INSERT INTO temp_pack_visit
+        INSERT INTO temp_pack_visit (zoid)
         SELECT DISTINCT to_zoid
         FROM object_ref
         WHERE tid > %(pack_tid)s;
@@ -583,8 +582,7 @@
         # repeatedly until all references have been satisfied.
         pass_num = 1
         while True:
-            log.info("pre_pack: following references before the pack point, "
-                "pass %d", pass_num)
+            log.info("pre_pack: following references, pass %d", pass_num)
 
             # Make a list of all parent objects that still need
             # to be visited.  Then set pack_object.visited for all pack_object
@@ -592,8 +590,8 @@
             stmt = """
             %(TRUNCATE)s temp_pack_visit;
 
-            INSERT INTO temp_pack_visit (zoid)
-            SELECT zoid
+            INSERT INTO temp_pack_visit (zoid, keep_tid)
+            SELECT zoid, keep_tid
             FROM pack_object
             WHERE keep = %(TRUE)s
                 AND visited = %(FALSE)s;
@@ -621,8 +619,6 @@
             log.debug("pre_pack: checking references from %d object(s)",
                 visit_count)
 
-            self._fill_pack_object_refs(conn, cursor, get_references)
-
             # Visit the children of all parent objects that were
             # just visited.
             stmt = self._scripts['prepack_follow_child_refs']
@@ -638,37 +634,6 @@
                 pass_num += 1
 
 
-    def _fill_nonpacked_refs(self, conn, cursor, pack_tid, get_references):
-        """Fill object_ref for all transactions that will not be packed."""
-        stmt = """
-        SELECT DISTINCT tid
-        FROM object_state
-        WHERE tid > %(pack_tid)s
-            AND NOT EXISTS (
-                SELECT 1
-                FROM object_refs_added
-                WHERE tid = object_state.tid
-            )
-        """
-        self._run_script_stmt(cursor, stmt, {'pack_tid': pack_tid})
-        tids = [tid for (tid,) in cursor]
-        self._add_refs_for_tids(conn, cursor, tids, get_references)
-
-
-    def _fill_pack_object_refs(self, conn, cursor, get_references):
-        """Fill object_ref for all objects that are to be kept."""
-        stmt = """
-        SELECT DISTINCT keep_tid
-        FROM pack_object
-            LEFT JOIN object_refs_added ON (keep_tid = tid)
-        WHERE keep = %(TRUE)s
-            AND object_refs_added.tid IS NULL
-        """
-        self._run_script_stmt(cursor, stmt)
-        tids = [tid for (tid,) in cursor]
-        self._add_refs_for_tids(conn, cursor, tids, get_references)
-
-
     def _add_object_ref_rows(self, cursor, add_rows):
         """Add rows to object_ref.
 
@@ -725,18 +690,29 @@
         return to_count
 
 
-    def _add_refs_for_tids(self, conn, cursor, tids, get_references):
-        """Fill object_refs with all states for multiple transactions."""
+    def fill_object_refs(self, conn, cursor, get_references):
+        """Update the object_refs table by analyzing new transactions."""
+        stmt = """
+        SELECT transaction.tid
+        FROM transaction
+            LEFT JOIN object_refs_added
+                ON (transaction.tid = object_refs_added.tid)
+        WHERE object_refs_added.tid IS NULL
+        """
+        cursor.execute(stmt)
+        tids = [tid for (tid,) in cursor]
         if tids:
             added = 0
-            log.info("pre_pack: discovering references from objects in %d "
+            log.info("discovering references from objects in %d "
                 "transaction(s)" % len(tids))
             for tid in tids:
                 added += self._add_refs_for_tid(cursor, tid, get_references)
-                if added >= 1000:
+                if added >= 10000:
                     # save the work done so far
                     conn.commit()
                     added = 0
+            if added:
+                conn.commit()
 
 
     def _hold_commit_lock(self, cursor):

Modified: relstorage/trunk/relstorage/adapters/mysql.py
===================================================================
--- relstorage/trunk/relstorage/adapters/mysql.py	2009-01-24 15:24:05 UTC (rev 94983)
+++ relstorage/trunk/relstorage/adapters/mysql.py	2009-01-24 20:44:19 UTC (rev 94984)
@@ -70,7 +70,8 @@
     _scripts.update({
         'create_temp_pack_visit': """
             CREATE TEMPORARY TABLE temp_pack_visit (
-                zoid BIGINT NOT NULL
+                zoid BIGINT NOT NULL,
+                keep_tid BIGINT
             );
             CREATE UNIQUE INDEX temp_pack_visit_zoid ON temp_pack_visit (zoid);
             CREATE TEMPORARY TABLE temp_pack_child (
@@ -87,7 +88,8 @@
             INSERT INTO temp_pack_child
             SELECT DISTINCT to_zoid
             FROM object_ref
-                JOIN temp_pack_visit USING (zoid);
+                JOIN temp_pack_visit USING (zoid)
+            WHERE object_ref.tid >= temp_pack_visit.keep_tid;
 
             -- MySQL-specific syntax for table join in update
             UPDATE pack_object, temp_pack_child SET keep = %(TRUE)s
@@ -203,8 +205,8 @@
         -- the object and all its revisions will be removed.
         -- If keep is true, instead of removing the object,
         -- the pack operation will cut the object's history.
-        -- The keep_tid field specifies which revision to keep within
-        -- the list of packable transactions.
+        -- The keep_tid field specifies the oldest revision
+        -- of the object to keep.
         -- The visited flag is set when pre_pack is visiting an object's
         -- references, and remains set.
         CREATE TABLE pack_object (

Modified: relstorage/trunk/relstorage/adapters/oracle.py
===================================================================
--- relstorage/trunk/relstorage/adapters/oracle.py	2009-01-24 15:24:05 UTC (rev 94983)
+++ relstorage/trunk/relstorage/adapters/oracle.py	2009-01-24 20:44:19 UTC (rev 94984)
@@ -223,9 +223,10 @@
         -- the object and all its revisions will be removed.
         -- If keep is 'Y', instead of removing the object,
         -- the pack operation will cut the object's history.
-        -- If keep is 'Y' then the keep_tid field must also be set.
-        -- The keep_tid field specifies which revision to keep within
-        -- the list of packable transactions.
+        -- The keep_tid field specifies the oldest revision
+        -- of the object to keep.
+        -- The visited flag is set when pre_pack is visiting an object's
+        -- references, and remains set.
         CREATE TABLE pack_object (
             zoid        NUMBER(20) NOT NULL PRIMARY KEY,
             keep        CHAR NOT NULL CHECK (keep IN ('N', 'Y')),
@@ -250,7 +251,8 @@
         -- Temporary state during packing: a list of objects
         -- whose references need to be examined.
         CREATE GLOBAL TEMPORARY TABLE temp_pack_visit (
-            zoid        NUMBER(20) NOT NULL PRIMARY KEY
+            zoid        NUMBER(20) NOT NULL PRIMARY KEY,
+            keep_tid    NUMBER(20)
         );
 
         -- Temporary state during undo: a list of objects

Modified: relstorage/trunk/relstorage/adapters/postgresql.py
===================================================================
--- relstorage/trunk/relstorage/adapters/postgresql.py	2009-01-24 15:24:05 UTC (rev 94983)
+++ relstorage/trunk/relstorage/adapters/postgresql.py	2009-01-24 20:44:19 UTC (rev 94984)
@@ -106,8 +106,8 @@
         -- the object and all its revisions will be removed.
         -- If keep is true, instead of removing the object,
         -- the pack operation will cut the object's history.
-        -- The keep_tid field specifies which revision to keep within
-        -- the list of packable transactions.
+        -- The keep_tid field specifies the oldest revision
+        -- of the object to keep.
         -- The visited flag is set when pre_pack is visiting an object's
         -- references, and remains set.
         CREATE TABLE pack_object (

Modified: relstorage/trunk/relstorage/tests/reltestbase.py
===================================================================
--- relstorage/trunk/relstorage/tests/reltestbase.py	2009-01-24 15:24:05 UTC (rev 94983)
+++ relstorage/trunk/relstorage/tests/reltestbase.py	2009-01-24 20:44:19 UTC (rev 94984)
@@ -449,7 +449,38 @@
         self._storage._options.pack_gc = False
         self.checkPackGC(gc_enabled=False)
 
+    def checkPackOldUnreferenced(self):
+        db = DB(self._storage)
+        try:
+            c1 = db.open()
+            r1 = c1.root()
+            r1['A'] = PersistentMapping()
+            B = PersistentMapping()
+            r1['A']['B'] = B
+            transaction.get().note('add A then add B to A')
+            transaction.commit()
 
+            del r1['A']['B']
+            transaction.get().note('remove B from A')
+            transaction.commit()
+
+            r1['A']['C'] = ''
+            transaction.get().note('add C to A')
+            transaction.commit()
+
+            now = packtime = time.time()
+            while packtime <= now:
+                packtime = time.time()
+            self._storage.pack(packtime, referencesf)
+
+            # B should be gone, since nothing refers to it.
+            self.assertRaises(KeyError, self._storage.load, B._p_oid, '')
+
+        finally:
+            db.close()
+        
+
+
 class IteratorDeepCompareUnordered:
     # Like IteratorDeepCompare, but compensates for OID order
     # differences in transactions.



More information about the Checkins mailing list