[Checkins] SVN: relstorage/trunk/ Make it possible to run just the pre-pack or the pack phase individually
Martijn Pieters
mj at zopatista.com
Fri Feb 25 08:14:01 EST 2011
Log message for revision 120572:
Make it possible to run just the pre-pack or the pack phase individually
Changed:
U relstorage/trunk/README.txt
U relstorage/trunk/relstorage/adapters/packundo.py
U relstorage/trunk/relstorage/component.xml
U relstorage/trunk/relstorage/options.py
U relstorage/trunk/relstorage/storage.py
U relstorage/trunk/relstorage/tests/hptestbase.py
U relstorage/trunk/relstorage/zodbpack.py
-=-
Modified: relstorage/trunk/README.txt
===================================================================
--- relstorage/trunk/README.txt 2011-02-25 13:13:48 UTC (rev 120571)
+++ relstorage/trunk/README.txt 2011-02-25 13:14:00 UTC (rev 120572)
@@ -453,15 +453,26 @@
Disabling garbage collection is also a hack that ensures
inter-database references never break.
-``pack-dry-run``
- If pack-dry-run is true, pack operations perform a full analysis
- of what to pack, but no data is actually removed. After a dry run,
+``pack-prepack-only``
+ If pack-prepack-only is true, pack operations perform a full analysis
+ of what to pack, but no data is actually removed. After a pre-pack,
the pack_object, pack_state, and pack_state_tid tables are filled
with the list of object states and objects that would have been
- removed. The object_ref table will also be fully populated.
- The object_ref table can be queried to discover references
+ removed. If pack-gc is true, the object_ref table will also be fully
+ populated. The object_ref table can be queried to discover references
between stored objects.
+``pack-skip-prepack``
+ If pack-skip-prepack is true, the pre-pack phase is skipped and it
+ is assumed the pack_object, pack_state and pack_state_tid tables have
+ been filled already. Thus packing will only affect records already
+ targeted for packing by a previous pre-pack analysis run.
+
+ Use this option together with pack-prepack-only to split packing into
+ distinct phases, where each phase can be run during different
+ timeslots, or where a pre-pack analysis is run on a copy of the
+ database to alleviate a production database load.
+
``pack-batch-timeout``
Packing occurs in batches of transactions; this specifies the
timeout in seconds for each batch. Note that some database
@@ -635,11 +646,16 @@
history-free storages, since unreferenced objects are not removed
from the database until the specified number of days have passed.
- ``--dry-run``
- Instructs the storage to run a dry run of the pack but not actually
- delete anything. This is equivalent to specifying ``pack-dry-run true``
- in the storage options.
+ ``--prepack``
+ Instructs the storage to only run the pre-pack phase of the pack but not
+ actually delete anything. This is equivalent to specifying
+ ``pack-prepack-only true`` in the storage options.
+ ``--use-prepack-state``
+ Instructs the storage to only run the deletion (packing) phase, skipping
+ the pre-pack analysis phase. This is equivalento to specifying
+ ``pack-skip-prepack true`` in the storage options.
+
Development
===========
Modified: relstorage/trunk/relstorage/adapters/packundo.py
===================================================================
--- relstorage/trunk/relstorage/adapters/packundo.py 2011-02-25 13:13:48 UTC (rev 120571)
+++ relstorage/trunk/relstorage/adapters/packundo.py 2011-02-25 13:14:00 UTC (rev 120572)
@@ -186,6 +186,13 @@
_script_reset_temp_undo = "DROP TABLE temp_undo"
+ _script_find_pack_tid = """
+ SELECT keep_tid
+ FROM pack_object
+ ORDER BY keep_tid DESC
+ LIMIT 1
+ """
+
_script_transaction_has_data = """
SELECT tid
FROM object_state
@@ -609,6 +616,16 @@
self._traverse_graph(cursor)
+ def _find_pack_tid(self):
+ """If pack was not completed, find our pack tid again"""
+
+ conn, cursor = self.connmanager.open_for_pre_pack()
+ stmt = self._script_find_pack_tid
+ self.runner.run_script_stmt(cursor, stmt)
+ res = [tid for (tid,) in cursor]
+ return res and res[0] or 0
+
+
def pack(self, pack_tid, sleep=None, packed_func=None):
"""Pack. Requires the information provided by pre_pack."""
@@ -840,6 +857,11 @@
_script_create_temp_undo = None
_script_reset_temp_undo = "DELETE FROM temp_undo"
+ _script_find_pack_tid = """
+ SELECT MAX(keep_tid)
+ FROM pack_object
+ """
+
_script_transaction_has_data = """
SELECT DISTINCT tid
FROM object_state
@@ -1058,6 +1080,14 @@
self._traverse_graph(cursor)
+ def _find_pack_tid(self):
+ """If pack was not completed, find our pack tid again"""
+
+ # pack (below) ignores it's pack_tid argument, so we can safely
+ # return None here
+ return None
+
+
def pack(self, pack_tid, sleep=None, packed_func=None):
"""Run garbage collection.
Modified: relstorage/trunk/relstorage/component.xml
===================================================================
--- relstorage/trunk/relstorage/component.xml 2011-02-25 13:13:48 UTC (rev 120571)
+++ relstorage/trunk/relstorage/component.xml 2011-02-25 13:14:00 UTC (rev 120572)
@@ -44,9 +44,12 @@
<key name="pack-gc" datatype="boolean" default="true">
<description>See the RelStorage README.txt file.</description>
</key>
- <key name="pack-dry-run" datatype="boolean" default="false">
+ <key name="pack-prepack-only" datatype="boolean" default="false">
<description>See the RelStorage README.txt file.</description>
</key>
+ <key name="pack-skip-prepack" datatype="boolean" default="false">
+ <description>See the RelStorage README.txt file.</description>
+ </key>
<key name="pack-batch-timeout" datatype="float" required="no">
<description>See the RelStorage README.txt file.</description>
</key>
Modified: relstorage/trunk/relstorage/options.py
===================================================================
--- relstorage/trunk/relstorage/options.py 2011-02-25 13:13:48 UTC (rev 120571)
+++ relstorage/trunk/relstorage/options.py 2011-02-25 13:14:00 UTC (rev 120572)
@@ -37,7 +37,8 @@
self.replica_timeout = 600.0
self.poll_interval = 0
self.pack_gc = True
- self.pack_dry_run = False
+ self.pack_prepack_only = False
+ self.pack_skip_prepack = False
self.pack_batch_timeout = 5.0
self.pack_duty_cycle = 0.5
self.pack_max_delay = 20.0
Modified: relstorage/trunk/relstorage/storage.py
===================================================================
--- relstorage/trunk/relstorage/storage.py 2011-02-25 13:13:48 UTC (rev 120571)
+++ relstorage/trunk/relstorage/storage.py 2011-02-25 13:14:00 UTC (rev 120572)
@@ -1060,14 +1060,16 @@
finally:
self._lock_release()
- def pack(self, t, referencesf, dry_run=False, sleep=None):
+ def pack(self, t, referencesf, prepack_only=False, skip_prepack=False,
+ sleep=None):
if self._is_read_only:
raise POSException.ReadOnlyError()
- dry_run = dry_run or self._options.pack_dry_run
+ prepack_only = prepack_only or self._options.pack_prepack_only
+ skip_prepack = skip_prepack or self._options.pack_skip_prepack
- pack_point = repr(TimeStamp(*time.gmtime(t)[:5] + (t % 60,)))
- pack_point_int = u64(pack_point)
+ if prepack_only and skip_prepack:
+ raise ValueError('Pick either prepack_only or skip_prepack.')
def get_references(state):
"""Return the set of OIDs the given state refers to."""
@@ -1086,28 +1088,34 @@
try:
adapter.locker.hold_pack_lock(lock_cursor)
try:
- # Find the latest commit before or at the pack time.
- tid_int = adapter.packundo.choose_pack_transaction(
- pack_point_int)
- if tid_int is None:
- log.debug("all transactions before %s have already "
- "been packed", time.ctime(t))
- return
+ if not skip_prepack:
+ # Find the latest commit before or at the pack time.
+ pack_point = repr(
+ TimeStamp(*time.gmtime(t)[:5] + (t % 60,)))
+ tid_int = adapter.packundo.choose_pack_transaction(
+ u64(pack_point))
+ if tid_int is None:
+ log.debug("all transactions before %s have already "
+ "been packed", time.ctime(t))
+ return
- if dry_run:
- log.info("pack: beginning dry run")
+ if prepack_only:
+ log.info("pack: beginning pre-pack")
- s = time.ctime(TimeStamp(p64(tid_int)).timeTime())
- log.info("pack: analyzing transactions committed "
- "%s or before", s)
+ s = time.ctime(TimeStamp(p64(tid_int)).timeTime())
+ log.info("pack: analyzing transactions committed "
+ "%s or before", s)
- # In pre_pack, the adapter fills tables with
- # information about what to pack. The adapter
- # must not actually pack anything yet.
- adapter.packundo.pre_pack(tid_int, get_references)
+ # In pre_pack, the adapter fills tables with
+ # information about what to pack. The adapter
+ # must not actually pack anything yet.
+ adapter.packundo.pre_pack(tid_int, get_references)
+ else:
+ # Need to determine the tid_int from the pack_object table
+ tid_int = adapter.packundo._find_pack_tid()
- if dry_run:
- log.info("pack: dry run complete")
+ if prepack_only:
+ log.info("pack: pre-pack complete")
else:
# Now pack.
if self.blobhelper is not None:
Modified: relstorage/trunk/relstorage/tests/hptestbase.py
===================================================================
--- relstorage/trunk/relstorage/tests/hptestbase.py 2011-02-25 13:13:48 UTC (rev 120571)
+++ relstorage/trunk/relstorage/tests/hptestbase.py 2011-02-25 13:14:00 UTC (rev 120572)
@@ -190,15 +190,25 @@
self._storage.load(oid, '')
finally:
db.close()
+ return oid
def checkPackGCDisabled(self):
self._storage._adapter.packundo.options.pack_gc = False
self.checkPackGC(expect_object_deleted=False)
- def checkPackGCDryRun(self):
- self._storage._options.pack_dry_run = True
+ def checkPackGCPrePackOnly(self):
+ self._storage._options.pack_prepack_only = True
self.checkPackGC(expect_object_deleted=False)
+ def checkPackGCReusePrePackData(self):
+ self._storage._options.pack_prepack_only = True
+ oid = self.checkPackGC(expect_object_deleted=False)
+ # We now have pre-pack analysis data
+ self._storage._options.pack_prepack_only = False
+ self._storage.pack(0, referencesf, skip_prepack=True)
+ # The object should now be gone
+ self.assertRaises(KeyError, self._storage.load, oid, '')
+
def checkPackOldUnreferenced(self):
db = DB(self._storage)
try:
Modified: relstorage/trunk/relstorage/zodbpack.py
===================================================================
--- relstorage/trunk/relstorage/zodbpack.py 2011-02-25 13:13:48 UTC (rev 120571)
+++ relstorage/trunk/relstorage/zodbpack.py 2011-02-25 13:14:00 UTC (rev 120572)
@@ -42,11 +42,18 @@
help="Days of history to keep (default 0)",
)
parser.add_option(
- "--dry-run", dest="dry_run", default=False,
+ "--prepack", dest="prepack", default=False,
action="store_true",
- help="Perform a dry run of the pack. "
+ help="Perform only the pre-pack preparation stage of a pack. "
"(Only works with some storage types)",
)
+ parser.add_option(
+ "--use-prepack-state", dest="reuse_prepack", default=False,
+ action="store_true",
+ help="Skip the preparation stage and go straight to packing. "
+ "Requires that a pre-pack has been run, or that packing was aborted "
+ "before it was completed.",
+ )
options, args = parser.parse_args(argv[1:])
if len(args) != 1:
@@ -65,9 +72,12 @@
log.info("Opening %s...", name)
storage = s.open()
log.info("Packing %s.", name)
- if options.dry_run:
- storage.pack(t, ZODB.serialize.referencesf, dry_run=True)
+ if options.prepack or options.reuse_prepack:
+ storage.pack(t, ZODB.serialize.referencesf,
+ prepack_only=options.prepack,
+ skip_prepack=options.reuse_prepack)
else:
+ # Be non-relstorage Storages friendly
storage.pack(t, ZODB.serialize.referencesf)
storage.close()
log.info("Packed %s.", name)
More information about the checkins
mailing list