[Checkins] SVN: relstorage/trunk/ Make it possible to run just the pre-pack or the pack phase individually

Martijn Pieters mj at zopatista.com
Fri Feb 25 08:14:01 EST 2011


Log message for revision 120572:
  Make it possible to run just the pre-pack or the pack phase individually

Changed:
  U   relstorage/trunk/README.txt
  U   relstorage/trunk/relstorage/adapters/packundo.py
  U   relstorage/trunk/relstorage/component.xml
  U   relstorage/trunk/relstorage/options.py
  U   relstorage/trunk/relstorage/storage.py
  U   relstorage/trunk/relstorage/tests/hptestbase.py
  U   relstorage/trunk/relstorage/zodbpack.py

-=-
Modified: relstorage/trunk/README.txt
===================================================================
--- relstorage/trunk/README.txt	2011-02-25 13:13:48 UTC (rev 120571)
+++ relstorage/trunk/README.txt	2011-02-25 13:14:00 UTC (rev 120572)
@@ -453,15 +453,26 @@
         Disabling garbage collection is also a hack that ensures
         inter-database references never break.
 
-``pack-dry-run``
-        If pack-dry-run is true, pack operations perform a full analysis
-        of what to pack, but no data is actually removed.  After a dry run,
+``pack-prepack-only``
+        If pack-prepack-only is true, pack operations perform a full analysis
+        of what to pack, but no data is actually removed.  After a pre-pack,
         the pack_object, pack_state, and pack_state_tid tables are filled
         with the list of object states and objects that would have been
-        removed.  The object_ref table will also be fully populated.
-        The object_ref table can be queried to discover references
+        removed.  If pack-gc is true, the object_ref table will also be fully 
+        populated. The object_ref table can be queried to discover references
         between stored objects.
 
+``pack-skip-prepack``
+        If pack-skip-prepack is true, the pre-pack phase is skipped and it
+        is assumed the pack_object, pack_state and pack_state_tid tables have
+        been filled already. Thus packing will only affect records already
+        targeted for packing by a previous pre-pack analysis run.
+
+        Use this option together with pack-prepack-only to split packing into
+        distinct phases, where each phase can be run during different
+        timeslots, or where a pre-pack analysis is run on a copy of the
+        database to alleviate a production database load.
+
 ``pack-batch-timeout``
         Packing occurs in batches of transactions; this specifies the
         timeout in seconds for each batch.  Note that some database
@@ -635,11 +646,16 @@
     history-free storages, since unreferenced objects are not removed
     from the database until the specified number of days have passed.
 
-  ``--dry-run``
-    Instructs the storage to run a dry run of the pack but not actually
-    delete anything.  This is equivalent to specifying ``pack-dry-run true``
-    in the storage options.
+  ``--prepack``
+    Instructs the storage to only run the pre-pack phase of the pack but not
+    actually delete anything.  This is equivalent to specifying 
+    ``pack-prepack-only true`` in the storage options.
 
+  ``--use-prepack-state``
+    Instructs the storage to only run the deletion (packing) phase, skipping
+    the pre-pack analysis phase. This is equivalento to specifying
+    ``pack-skip-prepack true`` in the storage options.
+
 Development
 ===========
 

Modified: relstorage/trunk/relstorage/adapters/packundo.py
===================================================================
--- relstorage/trunk/relstorage/adapters/packundo.py	2011-02-25 13:13:48 UTC (rev 120571)
+++ relstorage/trunk/relstorage/adapters/packundo.py	2011-02-25 13:14:00 UTC (rev 120572)
@@ -186,6 +186,13 @@
 
     _script_reset_temp_undo = "DROP TABLE temp_undo"
 
+    _script_find_pack_tid = """
+        SELECT keep_tid
+        FROM pack_object
+        ORDER BY keep_tid DESC
+        LIMIT 1
+        """
+
     _script_transaction_has_data = """
         SELECT tid
         FROM object_state
@@ -609,6 +616,16 @@
         self._traverse_graph(cursor)
 
 
+    def _find_pack_tid(self):
+        """If pack was not completed, find our pack tid again"""
+
+        conn, cursor = self.connmanager.open_for_pre_pack()
+        stmt = self._script_find_pack_tid
+        self.runner.run_script_stmt(cursor, stmt)
+        res = [tid for (tid,) in cursor]
+        return res and res[0] or 0
+
+
     def pack(self, pack_tid, sleep=None, packed_func=None):
         """Pack.  Requires the information provided by pre_pack."""
 
@@ -840,6 +857,11 @@
     _script_create_temp_undo = None
     _script_reset_temp_undo = "DELETE FROM temp_undo"
 
+    _script_find_pack_tid = """
+        SELECT MAX(keep_tid)
+        FROM pack_object
+        """
+
     _script_transaction_has_data = """
         SELECT DISTINCT tid
         FROM object_state
@@ -1058,6 +1080,14 @@
         self._traverse_graph(cursor)
 
 
+    def _find_pack_tid(self):
+        """If pack was not completed, find our pack tid again"""
+
+        # pack (below) ignores it's pack_tid argument, so we can safely
+        # return None here
+        return None
+
+
     def pack(self, pack_tid, sleep=None, packed_func=None):
         """Run garbage collection.
 

Modified: relstorage/trunk/relstorage/component.xml
===================================================================
--- relstorage/trunk/relstorage/component.xml	2011-02-25 13:13:48 UTC (rev 120571)
+++ relstorage/trunk/relstorage/component.xml	2011-02-25 13:14:00 UTC (rev 120572)
@@ -44,9 +44,12 @@
     <key name="pack-gc" datatype="boolean" default="true">
       <description>See the RelStorage README.txt file.</description>
     </key>
-    <key name="pack-dry-run" datatype="boolean" default="false">
+    <key name="pack-prepack-only" datatype="boolean" default="false">
       <description>See the RelStorage README.txt file.</description>
     </key>
+    <key name="pack-skip-prepack" datatype="boolean" default="false">
+      <description>See the RelStorage README.txt file.</description>
+    </key>
     <key name="pack-batch-timeout" datatype="float" required="no">
       <description>See the RelStorage README.txt file.</description>
     </key>

Modified: relstorage/trunk/relstorage/options.py
===================================================================
--- relstorage/trunk/relstorage/options.py	2011-02-25 13:13:48 UTC (rev 120571)
+++ relstorage/trunk/relstorage/options.py	2011-02-25 13:14:00 UTC (rev 120572)
@@ -37,7 +37,8 @@
         self.replica_timeout = 600.0
         self.poll_interval = 0
         self.pack_gc = True
-        self.pack_dry_run = False
+        self.pack_prepack_only = False
+        self.pack_skip_prepack = False
         self.pack_batch_timeout = 5.0
         self.pack_duty_cycle = 0.5
         self.pack_max_delay = 20.0

Modified: relstorage/trunk/relstorage/storage.py
===================================================================
--- relstorage/trunk/relstorage/storage.py	2011-02-25 13:13:48 UTC (rev 120571)
+++ relstorage/trunk/relstorage/storage.py	2011-02-25 13:14:00 UTC (rev 120572)
@@ -1060,14 +1060,16 @@
         finally:
             self._lock_release()
 
-    def pack(self, t, referencesf, dry_run=False, sleep=None):
+    def pack(self, t, referencesf, prepack_only=False, skip_prepack=False,
+             sleep=None):
         if self._is_read_only:
             raise POSException.ReadOnlyError()
 
-        dry_run = dry_run or self._options.pack_dry_run
+        prepack_only = prepack_only or self._options.pack_prepack_only
+        skip_prepack = skip_prepack or self._options.pack_skip_prepack
 
-        pack_point = repr(TimeStamp(*time.gmtime(t)[:5] + (t % 60,)))
-        pack_point_int = u64(pack_point)
+        if prepack_only and skip_prepack:
+            raise ValueError('Pick either prepack_only or skip_prepack.')
 
         def get_references(state):
             """Return the set of OIDs the given state refers to."""
@@ -1086,28 +1088,34 @@
         try:
             adapter.locker.hold_pack_lock(lock_cursor)
             try:
-                # Find the latest commit before or at the pack time.
-                tid_int = adapter.packundo.choose_pack_transaction(
-                    pack_point_int)
-                if tid_int is None:
-                    log.debug("all transactions before %s have already "
-                        "been packed", time.ctime(t))
-                    return
+                if not skip_prepack:
+                    # Find the latest commit before or at the pack time.
+                    pack_point = repr(
+                        TimeStamp(*time.gmtime(t)[:5] + (t % 60,)))
+                    tid_int = adapter.packundo.choose_pack_transaction(
+                        u64(pack_point))
+                    if tid_int is None:
+                        log.debug("all transactions before %s have already "
+                            "been packed", time.ctime(t))
+                        return
 
-                if dry_run:
-                    log.info("pack: beginning dry run")
+                    if prepack_only:
+                        log.info("pack: beginning pre-pack")
 
-                s = time.ctime(TimeStamp(p64(tid_int)).timeTime())
-                log.info("pack: analyzing transactions committed "
-                    "%s or before", s)
+                    s = time.ctime(TimeStamp(p64(tid_int)).timeTime())
+                    log.info("pack: analyzing transactions committed "
+                        "%s or before", s)
 
-                # In pre_pack, the adapter fills tables with
-                # information about what to pack.  The adapter
-                # must not actually pack anything yet.
-                adapter.packundo.pre_pack(tid_int, get_references)
+                    # In pre_pack, the adapter fills tables with
+                    # information about what to pack.  The adapter
+                    # must not actually pack anything yet.
+                    adapter.packundo.pre_pack(tid_int, get_references)
+                else:
+                    # Need to determine the tid_int from the pack_object table
+                    tid_int = adapter.packundo._find_pack_tid()
 
-                if dry_run:
-                    log.info("pack: dry run complete")
+                if prepack_only:
+                    log.info("pack: pre-pack complete")
                 else:
                     # Now pack.
                     if self.blobhelper is not None:

Modified: relstorage/trunk/relstorage/tests/hptestbase.py
===================================================================
--- relstorage/trunk/relstorage/tests/hptestbase.py	2011-02-25 13:13:48 UTC (rev 120571)
+++ relstorage/trunk/relstorage/tests/hptestbase.py	2011-02-25 13:14:00 UTC (rev 120572)
@@ -190,15 +190,25 @@
                 self._storage.load(oid, '')
         finally:
             db.close()
+        return oid
 
     def checkPackGCDisabled(self):
         self._storage._adapter.packundo.options.pack_gc = False
         self.checkPackGC(expect_object_deleted=False)
 
-    def checkPackGCDryRun(self):
-        self._storage._options.pack_dry_run = True
+    def checkPackGCPrePackOnly(self):
+        self._storage._options.pack_prepack_only = True
         self.checkPackGC(expect_object_deleted=False)
 
+    def checkPackGCReusePrePackData(self):
+        self._storage._options.pack_prepack_only = True
+        oid = self.checkPackGC(expect_object_deleted=False)
+        # We now have pre-pack analysis data
+        self._storage._options.pack_prepack_only = False
+        self._storage.pack(0, referencesf, skip_prepack=True)
+        # The object should now be gone
+        self.assertRaises(KeyError, self._storage.load, oid, '')
+
     def checkPackOldUnreferenced(self):
         db = DB(self._storage)
         try:

Modified: relstorage/trunk/relstorage/zodbpack.py
===================================================================
--- relstorage/trunk/relstorage/zodbpack.py	2011-02-25 13:13:48 UTC (rev 120571)
+++ relstorage/trunk/relstorage/zodbpack.py	2011-02-25 13:14:00 UTC (rev 120572)
@@ -42,11 +42,18 @@
         help="Days of history to keep (default 0)",
     )
     parser.add_option(
-        "--dry-run", dest="dry_run", default=False,
+        "--prepack", dest="prepack", default=False,
         action="store_true",
-        help="Perform a dry run of the pack. "
+        help="Perform only the pre-pack preparation stage of a pack. "
         "(Only works with some storage types)",
     )
+    parser.add_option(
+        "--use-prepack-state", dest="reuse_prepack", default=False,
+        action="store_true",
+        help="Skip the preparation stage and go straight to packing. "
+        "Requires that a pre-pack has been run, or that packing was aborted "
+        "before it was completed.",
+    )
     options, args = parser.parse_args(argv[1:])
 
     if len(args) != 1:
@@ -65,9 +72,12 @@
         log.info("Opening %s...", name)
         storage = s.open()
         log.info("Packing %s.", name)
-        if options.dry_run:
-            storage.pack(t, ZODB.serialize.referencesf, dry_run=True)
+        if options.prepack or options.reuse_prepack:
+            storage.pack(t, ZODB.serialize.referencesf,
+                prepack_only=options.prepack,
+                skip_prepack=options.reuse_prepack)
         else:
+            # Be non-relstorage Storages friendly
             storage.pack(t, ZODB.serialize.referencesf)
         storage.close()
         log.info("Packed %s.", name)



More information about the checkins mailing list