[Checkins] SVN: relstorage/ Backported the history-free pack fix to 1.4.
Shane Hathaway
shane at hathawaymix.org
Tue Feb 1 01:18:00 EST 2011
Log message for revision 120032:
Backported the history-free pack fix to 1.4.
Changed:
U relstorage/branches/1.4/CHANGES.txt
U relstorage/branches/1.4/notes/migrate-to-1.4.txt
U relstorage/branches/1.4/relstorage/adapters/packundo.py
U relstorage/branches/1.4/relstorage/adapters/schema.py
U relstorage/branches/1.4/relstorage/tests/reltestbase.py
U relstorage/branches/1.4/relstorage/zodbpack.py
U relstorage/trunk/CHANGES.txt
U relstorage/trunk/notes/migrate-to-1.4.txt
-=-
Modified: relstorage/branches/1.4/CHANGES.txt
===================================================================
--- relstorage/branches/1.4/CHANGES.txt 2011-02-01 05:44:15 UTC (rev 120031)
+++ relstorage/branches/1.4/CHANGES.txt 2011-02-01 06:18:00 UTC (rev 120032)
@@ -5,6 +5,14 @@
there was a problem with conflict errors. The RelStorage patch of the
sync() method now works with ZODB 3.10.
+- Fixed a bug in packing history-free databases. If changes were
+ made to the database during the pack, the pack code could delete
+ too many objects. Thanks to Chris Withers for writing test code
+ that revealed the bug. A schema migration is required for history-free
+ databases; see notes/migration-to-1.4.txt.
+
+- Enabled logging to stderr in zodbpack.
+
1.4.1 (2010-10-21)
------------------
Modified: relstorage/branches/1.4/notes/migrate-to-1.4.txt
===================================================================
--- relstorage/branches/1.4/notes/migrate-to-1.4.txt 2011-02-01 05:44:15 UTC (rev 120031)
+++ relstorage/branches/1.4/notes/migrate-to-1.4.txt 2011-02-01 06:18:00 UTC (rev 120032)
@@ -1,4 +1,39 @@
+Migrating to RelStorage version 1.4.2
+-------------------------------------
+
+If you are using a history-free storage, you need to drop and re-create
+the object_refs_added table. It contains only temporary state used during
+packing, so it is safe to drop and create the table at any time except while
+packing.
+
+Do not make these changes to history-preserving databases.
+
+PostgreSQL:
+
+ DROP TABLE object_refs_added;
+ CREATE TABLE object_refs_added (
+ zoid BIGINT NOT NULL PRIMARY KEY,
+ tid BIGINT NOT NULL
+ );
+
+MySQL:
+
+ DROP TABLE object_refs_added;
+ CREATE TABLE object_refs_added (
+ zoid BIGINT NOT NULL PRIMARY KEY,
+ tid BIGINT NOT NULL
+ ) ENGINE = MyISAM;
+
+Oracle:
+
+ DROP TABLE object_refs_added;
+ CREATE TABLE object_refs_added (
+ zoid NUMBER(20) NOT NULL PRIMARY KEY,
+ tid NUMBER(20) NOT NULL
+ );
+
+
Migrating to RelStorage version 1.4
-----------------------------------
Modified: relstorage/branches/1.4/relstorage/adapters/packundo.py
===================================================================
--- relstorage/branches/1.4/relstorage/adapters/packundo.py 2011-02-01 05:44:15 UTC (rev 120031)
+++ relstorage/branches/1.4/relstorage/adapters/packundo.py 2011-02-01 06:18:00 UTC (rev 120032)
@@ -50,99 +50,6 @@
finally:
self.connmanager.close(conn, cursor)
- def fill_object_refs(self, conn, cursor, get_references):
- """Update the object_refs table by analyzing new transactions."""
- if self.keep_history:
- stmt = """
- SELECT transaction.tid
- FROM transaction
- LEFT JOIN object_refs_added
- ON (transaction.tid = object_refs_added.tid)
- WHERE object_refs_added.tid IS NULL
- ORDER BY transaction.tid
- """
- else:
- stmt = """
- SELECT transaction.tid
- FROM (SELECT DISTINCT tid FROM object_state) transaction
- LEFT JOIN object_refs_added
- ON (transaction.tid = object_refs_added.tid)
- WHERE object_refs_added.tid IS NULL
- ORDER BY transaction.tid
- """
-
- self.runner.run_script_stmt(cursor, stmt)
- tids = [tid for (tid,) in cursor]
- if tids:
- added = 0
- log.info("discovering references from objects in %d "
- "transaction(s)" % len(tids))
- for tid in tids:
- added += self._add_refs_for_tid(cursor, tid, get_references)
- if added >= 10000:
- # save the work done so far
- conn.commit()
- added = 0
- if added:
- conn.commit()
-
- def _add_refs_for_tid(self, cursor, tid, get_references):
- """Fill object_refs with all states for a transaction.
-
- Returns the number of references added.
- """
- log.debug("pre_pack: transaction %d: computing references ", tid)
- from_count = 0
-
- stmt = """
- SELECT zoid, state
- FROM object_state
- WHERE tid = %(tid)s
- """
- self.runner.run_script_stmt(cursor, stmt, {'tid': tid})
-
- replace_rows = []
- add_rows = [] # [(from_oid, tid, to_oid)]
- for from_oid, state in cursor:
- replace_rows.append((from_oid,))
- if hasattr(state, 'read'):
- # Oracle
- state = state.read()
- if state:
- from_count += 1
- try:
- to_oids = get_references(str(state))
- except:
- log.error("pre_pack: can't unpickle "
- "object %d in transaction %d; state length = %d" % (
- from_oid, tid, len(state)))
- raise
- for to_oid in to_oids:
- add_rows.append((from_oid, tid, to_oid))
-
- if not self.keep_history:
- stmt = "DELETE FROM object_ref WHERE zoid = %s"
- self.runner.run_many(cursor, stmt, replace_rows)
-
- stmt = """
- INSERT INTO object_ref (zoid, tid, to_zoid)
- VALUES (%s, %s, %s)
- """
- self.runner.run_many(cursor, stmt, add_rows)
-
- # The references have been computed for this transaction.
- stmt = """
- INSERT INTO object_refs_added (tid)
- VALUES (%(tid)s)
- """
- self.runner.run_script_stmt(cursor, stmt, {'tid': tid})
-
- to_count = len(add_rows)
- log.debug("pre_pack: transaction %d: has %d reference(s) "
- "from %d object(s)", tid, to_count, from_count)
- return to_count
-
-
def _visit_all_references(self, cursor):
"""Visit all references in pack_object and set the keep flags.
"""
@@ -416,6 +323,91 @@
return res
+ def on_filling_object_refs(self):
+ """Test injection point"""
+
+ def fill_object_refs(self, conn, cursor, get_references):
+ """Update the object_refs table by analyzing new transactions."""
+ stmt = """
+ SELECT transaction.tid
+ FROM transaction
+ LEFT JOIN object_refs_added
+ ON (transaction.tid = object_refs_added.tid)
+ WHERE object_refs_added.tid IS NULL
+ ORDER BY transaction.tid
+ """
+ self.runner.run_script_stmt(cursor, stmt)
+ tids = [tid for (tid,) in cursor]
+ if tids:
+ self.on_filling_object_refs()
+ added = 0
+ log.info("discovering references from objects in %d "
+ "transaction(s)" % len(tids))
+ for tid in tids:
+ added += self._add_refs_for_tid(cursor, tid, get_references)
+ if added >= 10000:
+ # save the work done so far
+ conn.commit()
+ added = 0
+ if added:
+ conn.commit()
+
+ def _add_refs_for_tid(self, cursor, tid, get_references):
+ """Fill object_refs with all states for a transaction.
+
+ Returns the number of references added.
+ """
+ log.debug("pre_pack: transaction %d: computing references ", tid)
+ from_count = 0
+
+ stmt = """
+ SELECT zoid, state
+ FROM object_state
+ WHERE tid = %(tid)s
+ """
+ self.runner.run_script_stmt(cursor, stmt, {'tid': tid})
+
+ replace_rows = []
+ add_rows = [] # [(from_oid, tid, to_oid)]
+ for from_oid, state in cursor:
+ replace_rows.append((from_oid,))
+ if hasattr(state, 'read'):
+ # Oracle
+ state = state.read()
+ if state:
+ from_count += 1
+ try:
+ to_oids = get_references(str(state))
+ except:
+ log.error("pre_pack: can't unpickle "
+ "object %d in transaction %d; state length = %d" % (
+ from_oid, tid, len(state)))
+ raise
+ for to_oid in to_oids:
+ add_rows.append((from_oid, tid, to_oid))
+
+ if not self.keep_history:
+ stmt = "DELETE FROM object_ref WHERE zoid = %s"
+ self.runner.run_many(cursor, stmt, replace_rows)
+
+ stmt = """
+ INSERT INTO object_ref (zoid, tid, to_zoid)
+ VALUES (%s, %s, %s)
+ """
+ self.runner.run_many(cursor, stmt, add_rows)
+
+ # The references have been computed for this transaction.
+ stmt = """
+ INSERT INTO object_refs_added (tid)
+ VALUES (%(tid)s)
+ """
+ self.runner.run_script_stmt(cursor, stmt, {'tid': tid})
+
+ to_count = len(add_rows)
+ log.debug("pre_pack: transaction %d: has %d reference(s) "
+ "from %d object(s)", tid, to_count, from_count)
+ return to_count
+
def pre_pack(self, pack_tid, get_references, options):
"""Decide what to pack.
@@ -869,6 +861,88 @@
"""
raise UndoError("Undo is not supported by this storage")
+ def on_filling_object_refs(self):
+ """Test injection point"""
+
+ def fill_object_refs(self, conn, cursor, get_references):
+ """Update the object_refs table by analyzing new object states."""
+ stmt = """
+ SELECT object_state.zoid FROM object_state
+ LEFT JOIN object_refs_added USING (zoid)
+ WHERE object_refs_added.tid IS NULL
+ OR object_refs_added.tid != object_state.tid
+ ORDER BY object_state.zoid
+ """
+ self.runner.run_script_stmt(cursor, stmt)
+ oids = [oid for (oid,) in cursor]
+ if oids:
+ self.on_filling_object_refs()
+ added = 0
+ log.info("discovering references from %d objects", len(oids))
+ while oids:
+ batch = oids[:100]
+ oids = oids[100:]
+ added += self._add_refs_for_oids(cursor, batch, get_references)
+ if added >= 10000:
+ # save the work done so far
+ conn.commit()
+ added = 0
+ if added:
+ conn.commit()
+
+ def _add_refs_for_oids(self, cursor, oids, get_references):
+ """Fill object_refs with the states for some objects.
+
+ Returns the number of references added.
+ """
+ to_count = 0
+ oid_list = ','.join(str(oid) for oid in oids)
+
+ stmt = """
+ SELECT zoid, tid, state
+ FROM object_state
+ WHERE zoid IN (%s)
+ """ % oid_list
+ self.runner.run_script_stmt(cursor, stmt)
+ rows = list(cursor)
+ if not rows:
+ return 0
+
+ add_objects = []
+ add_refs = []
+ for from_oid, tid, state in rows:
+ if hasattr(state, 'read'):
+ # Oracle
+ state = state.read()
+ add_objects.append((from_oid, tid))
+ if state:
+ try:
+ to_oids = get_references(str(state))
+ except:
+ log.error("pre_pack: can't unpickle "
+ "object %d in transaction %d; state length = %d" % (
+ from_oid, tid, len(state)))
+ raise
+ for to_oid in to_oids:
+ add_refs.append((from_oid, tid, to_oid))
+
+ stmt = "DELETE FROM object_refs_added WHERE zoid IN (%s)" % oid_list
+ self.runner.run_script_stmt(cursor, stmt)
+ stmt = "DELETE FROM object_ref WHERE zoid IN (%s)" % oid_list
+ self.runner.run_script_stmt(cursor, stmt)
+
+ stmt = """
+ INSERT INTO object_ref (zoid, tid, to_zoid) VALUES (%s, %s, %s)
+ """
+ self.runner.run_many(cursor, stmt, add_refs)
+
+ stmt = """
+ INSERT INTO object_refs_added (zoid, tid) VALUES (%s, %s)
+ """
+ self.runner.run_many(cursor, stmt, add_objects)
+
+ return len(add_refs)
+
def pre_pack(self, pack_tid, get_references, options):
"""Decide what the garbage collector should delete.
@@ -937,7 +1011,7 @@
def pack(self, pack_tid, options, sleep=None, packed_func=None):
"""Run garbage collection.
- Requires the information provided by _pre_gc.
+ Requires the information provided by pre_pack.
"""
# Read committed mode is sufficient.
conn, cursor = self.connmanager.open()
@@ -1010,9 +1084,10 @@
stmt = """
DELETE FROM object_refs_added
- WHERE tid NOT IN (
- SELECT DISTINCT tid
- FROM object_state
+ WHERE zoid IN (
+ SELECT zoid
+ FROM pack_object
+ WHERE keep = %(FALSE)s
);
DELETE FROM object_ref
Modified: relstorage/branches/1.4/relstorage/adapters/schema.py
===================================================================
--- relstorage/branches/1.4/relstorage/adapters/schema.py 2011-02-01 05:44:15 UTC (rev 120031)
+++ relstorage/branches/1.4/relstorage/adapters/schema.py 2011-02-01 06:18:00 UTC (rev 120032)
@@ -476,23 +476,25 @@
PRIMARY KEY (zoid, to_zoid)
);
-# The object_refs_added table tracks whether object_refs has been
-# populated for all states in a given transaction. An entry is added
-# only when the work is finished.
+# The object_refs_added table tracks which state of each object
+# has been analyzed for references to other objects.
postgresql:
CREATE TABLE object_refs_added (
- tid BIGINT NOT NULL PRIMARY KEY
+ zoid BIGINT NOT NULL PRIMARY KEY,
+ tid BIGINT NOT NULL
);
mysql:
CREATE TABLE object_refs_added (
- tid BIGINT NOT NULL PRIMARY KEY
+ zoid BIGINT NOT NULL PRIMARY KEY,
+ tid BIGINT NOT NULL
) ENGINE = MyISAM;
oracle:
CREATE TABLE object_refs_added (
- tid NUMBER(20) NOT NULL PRIMARY KEY
+ zoid NUMBER(20) NOT NULL PRIMARY KEY,
+ tid NUMBER(20) NOT NULL
);
# pack_object contains temporary state during garbage collection: The
Modified: relstorage/branches/1.4/relstorage/tests/reltestbase.py
===================================================================
--- relstorage/branches/1.4/relstorage/tests/reltestbase.py 2011-02-01 05:44:15 UTC (rev 120031)
+++ relstorage/branches/1.4/relstorage/tests/reltestbase.py 2011-02-01 06:18:00 UTC (rev 120032)
@@ -544,6 +544,40 @@
finally:
db.close()
+ def checkPackWhileReferringObjectChanges(self):
+ # Packing should not remove objects referenced by an
+ # object that changes during packing.
+ db = DB(self._storage)
+ try:
+ # add some data to be packed
+ c = db.open()
+ root = c.root()
+ child = PersistentMapping()
+ root['child'] = child
+ transaction.commit()
+ expect_oids = [child._p_oid]
+
+ def inject_changes():
+ # Change the database just after the list of objects
+ # to analyze has been determined.
+ child2 = PersistentMapping()
+ root['child2'] = child2
+ transaction.commit()
+ expect_oids.append(child2._p_oid)
+
+ adapter = self._storage._adapter
+ adapter.packundo.on_filling_object_refs = inject_changes
+ packtime = time.time()
+ self._storage.pack(packtime, referencesf)
+
+ self.assertEqual(len(expect_oids), 2,
+ "The on_filling_object_refs hook should have been called once")
+ # Both children should still exist.
+ self._storage.load(expect_oids[0], '')
+ self._storage.load(expect_oids[1], '')
+ finally:
+ db.close()
+
def checkPackBrokenPickle(self):
# Verify the pack stops with the right exception if it encounters
# a broken pickle.
Modified: relstorage/branches/1.4/relstorage/zodbpack.py
===================================================================
--- relstorage/branches/1.4/relstorage/zodbpack.py 2011-02-01 05:44:15 UTC (rev 120031)
+++ relstorage/branches/1.4/relstorage/zodbpack.py 2011-02-01 06:18:00 UTC (rev 120032)
@@ -16,6 +16,7 @@
"""
from StringIO import StringIO
+import logging
import optparse
import sys
import time
@@ -30,6 +31,9 @@
</schema>
"""
+log = logging.getLogger("zodbpack")
+
+
def main(argv=sys.argv):
parser = optparse.OptionParser(description=__doc__,
usage="%prog [options] config_file")
@@ -42,14 +46,22 @@
if len(args) != 1:
parser.error("The name of one configuration file is required.")
+ logging.basicConfig(
+ level=logging.INFO,
+ format="%(asctime)s [%(name)s] %(levelname)s %(message)s")
+
schema = ZConfig.loadSchemaFile(StringIO(schema_xml))
config, handler = ZConfig.loadConfig(schema, args[0])
t = time.time() - float(options.days) * 86400.0
for s in config.storages:
+ name = '%s (%s)' % ((s.name or 'storage'), s.__class__.__name__)
+ log.info("Opening %s...", name)
storage = s.open()
+ log.info("Packing %s.", name)
storage.pack(t, ZODB.serialize.referencesf)
storage.close()
+ log.info("Packed %s.", name)
if __name__ == '__main__':
main()
Modified: relstorage/trunk/CHANGES.txt
===================================================================
--- relstorage/trunk/CHANGES.txt 2011-02-01 05:44:15 UTC (rev 120031)
+++ relstorage/trunk/CHANGES.txt 2011-02-01 06:18:00 UTC (rev 120032)
@@ -2,16 +2,9 @@
Next Release
------------
-- Fixed a bug in packing history-free databases. If changes were
- made to the database during the pack, the pack code could delete
- too many objects. Thanks to Chris Withers for writing test code
- that revealed the bug.
-
- Added more logging during zodbconvert to show that something is
happening and give an indication of how far along the process is.
-- Enabled logging to stderr in zodbpack.
-
- Fixed a missing import in the blob cache cleanup code.
1.5.0a1 (2010-10-21)
@@ -31,6 +24,14 @@
there was a problem with conflict errors. The RelStorage patch of the
sync() method now works with ZODB 3.10.
+- Fixed a bug in packing history-free databases. If changes were
+ made to the database during the pack, the pack code could delete
+ too many objects. Thanks to Chris Withers for writing test code
+ that revealed the bug. A schema migration is required for history-free
+ databases; see notes/migration-to-1.4.txt.
+
+- Enabled logging to stderr in zodbpack.
+
1.4.1 (2010-10-21)
------------------
Modified: relstorage/trunk/notes/migrate-to-1.4.txt
===================================================================
--- relstorage/trunk/notes/migrate-to-1.4.txt 2011-02-01 05:44:15 UTC (rev 120031)
+++ relstorage/trunk/notes/migrate-to-1.4.txt 2011-02-01 06:18:00 UTC (rev 120032)
@@ -1,4 +1,39 @@
+Migrating to RelStorage version 1.4.2
+-------------------------------------
+
+If you are using a history-free storage, you need to drop and re-create
+the object_refs_added table. It contains only temporary state used during
+packing, so it is safe to drop and create the table at any time except while
+packing.
+
+Do not make these changes to history-preserving databases.
+
+PostgreSQL:
+
+ DROP TABLE object_refs_added;
+ CREATE TABLE object_refs_added (
+ zoid BIGINT NOT NULL PRIMARY KEY,
+ tid BIGINT NOT NULL
+ );
+
+MySQL:
+
+ DROP TABLE object_refs_added;
+ CREATE TABLE object_refs_added (
+ zoid BIGINT NOT NULL PRIMARY KEY,
+ tid BIGINT NOT NULL
+ ) ENGINE = MyISAM;
+
+Oracle:
+
+ DROP TABLE object_refs_added;
+ CREATE TABLE object_refs_added (
+ zoid NUMBER(20) NOT NULL PRIMARY KEY,
+ tid NUMBER(20) NOT NULL
+ );
+
+
Migrating to RelStorage version 1.4
-----------------------------------
More information about the checkins
mailing list