[Checkins] SVN: zc.queue/branches/andrew-conflict-resolution-persistent/src/zc/queue/ Fix conflict resolution by adding PersistentReferenceSet..
Andrew Sung
asung at zope.com
Mon Jan 9 23:44:06 UTC 2012
Log message for revision 124007:
Fix conflict resolution by adding PersistentReferenceSet..
Changed:
U zc.queue/branches/andrew-conflict-resolution-persistent/src/zc/queue/_queue.py
U zc.queue/branches/andrew-conflict-resolution-persistent/src/zc/queue/queue.txt
U zc.queue/branches/andrew-conflict-resolution-persistent/src/zc/queue/tests.py
-=-
Modified: zc.queue/branches/andrew-conflict-resolution-persistent/src/zc/queue/_queue.py
===================================================================
--- zc.queue/branches/andrew-conflict-resolution-persistent/src/zc/queue/_queue.py 2012-01-09 20:48:57 UTC (rev 124006)
+++ zc.queue/branches/andrew-conflict-resolution-persistent/src/zc/queue/_queue.py 2012-01-09 23:44:05 UTC (rev 124007)
@@ -1,4 +1,5 @@
from persistent import Persistent
+from ZODB.ConflictResolution import PersistentReference
from ZODB.POSException import ConflictError
from zope import interface
@@ -51,6 +52,104 @@
PersistentQueue = BucketQueue # for legacy instances, be conservative
+class PersistentReferenceSet(object):
+ """PersistentReferenceSet
+
+ `ZODB.ConflictResolution.PersistentReference` doesn't get handled correctly
+ in the resolveQueueConflict function due to lack of the `__hash__` method.
+ So we make workaround here to utilize `__cmp__` method of
+ `PersistentReference`.
+
+ """
+ def __init__(self, seq):
+ assert isinstance(seq, tuple)
+ self._data = self._dedup(seq)
+
+ def _dedup(self, seq):
+ seq = list(seq)
+ cnt = 0
+ while len(seq) > cnt:
+ remove = []
+ for idx, item in enumerate(seq[cnt + 1:]):
+ try:
+ if item == seq[cnt]:
+ remove.append(cnt + idx + 1)
+ except ValueError:
+ pass
+ for idx in reversed(remove):
+ seq.pop(idx)
+ cnt += 1
+ return tuple(seq)
+
+ def __cmp__(self, other):
+ if len(self._data) == len(other._data):
+ other_data = list(other._data[:])
+ for item in self._data:
+ for index, other_item in enumerate(other_data):
+ try:
+ if item == other_item:
+ other_data.pop(index)
+ except ValueError:
+ pass
+ else:
+ break
+ else:
+ break
+ else:
+ assert len(other_data) == 0
+ return 0
+ raise ValueError(
+ "can't reliably compare against different "
+ "PersistentReferences")
+
+ def __sub__(self, other):
+ self_data = list(self._data[:])
+ for other_item in other._data:
+ for index, item in enumerate(self_data):
+ try:
+ if other_item == item:
+ self_data.pop(index)
+ except ValueError:
+ pass
+ else:
+ break
+ return PersistentReferenceSet(tuple(self_data))
+
+ def __and__(self, other):
+ self_data = list(self._data[:])
+ intersection = []
+ for other_item in other._data:
+ for index, item in enumerate(self_data):
+ try:
+ if other_item == item:
+ self_data.pop(index)
+ intersection.append(item)
+ except ValueError:
+ pass
+ else:
+ break
+ return PersistentReferenceSet(tuple(intersection))
+
+ def __iter__(self):
+ for item in self._data:
+ yield item
+
+ def __len__(self):
+ return len(self._data)
+
+ def __repr__(self):
+ return "PRSet(%s)" % str(self._data)
+
+ def __contains__(self, item):
+ for data in self._data:
+ try:
+ if item == data:
+ return True
+ except ValueError:
+ pass
+ return False
+
+
def resolveQueueConflict(oldstate, committedstate, newstate, bucket=False):
# we only know how to merge _data. If anything else is different,
# puke.
@@ -66,10 +165,19 @@
committed = committedstate['_data']
new = newstate['_data']
- old_set = set(old)
- committed_set = set(committed)
- new_set = set(new)
+ # If items in the queue are persistent object, we can't use set().
+ # see 'queue.txt'
+ for item in (old + committed + new):
+ if not isinstance(item, PersistentReference):
+ Set = set
+ break
+ else:
+ Set = PersistentReferenceSet
+ old_set = Set(old)
+ committed_set = Set(committed)
+ new_set = Set(new)
+
if bucket and bool(old_set) and (bool(committed_set) ^ bool(new_set)):
# This is a bucket, part of a CompositePersistentQueue. The old set
# of this bucket had items, and one of the two transactions cleaned
@@ -98,7 +206,7 @@
mod_committed.append(v)
if new_added:
ordered_new_added = new[-len(new_added):]
- assert set(ordered_new_added) == new_added
+ assert Set(ordered_new_added) == new_added
mod_committed.extend(ordered_new_added)
committedstate['_data'] = tuple(mod_committed)
return committedstate
Modified: zc.queue/branches/andrew-conflict-resolution-persistent/src/zc/queue/queue.txt
===================================================================
--- zc.queue/branches/andrew-conflict-resolution-persistent/src/zc/queue/queue.txt 2012-01-09 20:48:57 UTC (rev 124006)
+++ zc.queue/branches/andrew-conflict-resolution-persistent/src/zc/queue/queue.txt 2012-01-09 23:44:05 UTC (rev 124007)
@@ -394,6 +394,132 @@
>>> db.close() # cleanup
+======================
+PersistentReferenceSet
+======================
+
+As `ZODB.ConflictResolution.PersistentReference` doesn't get handled
+properly in `set` due to lack of `__hash__` method, we define a class
+utilizing `__cmp__` method of contained items [#workaround]_.
+
+
+
+Let's make some Stubbed persistent reference object.
+
+ >>> from zc.queue.tests import StubPersistentReference
+ >>> pr1 = StubPersistentReference(1)
+ >>> pr2 = StubPersistentReference(2)
+ >>> pr3 = StubPersistentReference(3)
+
+Let's make copy of them to simulate different transaction states.
+
+ >>> import copy
+ >>> pr_c1 = copy.copy(pr1)
+ >>> pr_c2 = copy.copy(pr2)
+ >>> pr_c3 = copy.copy(pr3)
+ >>> pr1 == pr_c1
+ True
+ >>> pr2 == pr_c2
+ True
+ >>> pr3 == pr_c3
+ True
+ >>> id(pr1) == id(pr_c1)
+ False
+ >>> id(pr2) == id(pr_c2)
+ False
+ >>> id(pr3) == id(pr_c3)
+ False
+
+`set` doesn't handle persistent reference objects properly. All
+following set operations produce wrong results.
+
+ >>> set1 = set((pr1, pr2))
+ >>> len(set1)
+ 2
+ >>> set2 = set((pr_c1, pr_c3))
+ >>> len(set2)
+ 2
+ >>> set_c1 = set((pr_c1, pr_c2))
+ >>> len(set_c1)
+ 2
+
+Minus operation:
+
+ >>> set3 = set1 - set2
+ >>> len(set3)
+ 2
+ >>> set3
+ set([SPR (1), SPR (2)])
+
+Deduplication:
+
+ >>> set4 = set((pr1, pr2, pr3, pr_c1, pr_c2, pr_c3))
+ >>> len(set4)
+ 6
+
+Contains:
+
+ >>> pr3 in set2
+ False
+
+Intersection:
+
+ >>> set1 & set2
+ set([])
+
+Compare:
+
+ >>> set1 == set_c1
+ False
+
+So we made `PersistentReferenceSet` acting as `set`.
+
+ >>> from zc.queue._queue import PersistentReferenceSet
+ >>> pr_set1 = PersistentReferenceSet((pr1, pr2))
+ >>> len(pr_set1)
+ 2
+ >>> pr_set2 = PersistentReferenceSet((pr_c1, pr_c3))
+ >>> len(pr_set1)
+ 2
+ >>> pr_set_c1 = PersistentReferenceSet((pr_c1, pr_c2))
+ >>> len(pr_set1)
+ 2
+
+Minus operation:
+
+ >>> pr_set3 = pr_set1 - pr_set2
+ >>> len(pr_set3)
+ 1
+ >>> pr_set3
+ PRSet((SPR (2),))
+
+Deduplication:
+
+ >>> pr_set4 = PersistentReferenceSet(
+ ... (pr1, pr2, pr3, pr_c1, pr_c2, pr_c3))
+ >>> len(pr_set4)
+ 3
+ >>> pr_set4
+ PRSet((SPR (1), SPR (2), SPR (3)))
+
+Contains:
+
+ >>> pr3 in pr_set2
+ True
+
+ >>> pr2 not in pr_set2
+ True
+
+Intersection:
+
+ >>> pr_set1 & pr_set2
+ PRSet((SPR (1),))
+
+Compare:
+
+ >>> pr_set1 == pr_set_c1
+ True
+
-----
.. [#why] The queue's `pull` method is actually the interesting part in why
@@ -404,7 +530,7 @@
state, it's a conflict error. Ideally you don't enter another equal
item in that queue again, or else in fact this is still an
error-prone heuristic:
-
+
- start queue == [X];
- begin transactions A and B;
- B removes X and commits;
@@ -424,7 +550,7 @@
a better confidence of doing the right thing.
Also, FWIW, this is policy I want: for my use cases, it would be
- possible to put in two items in a queue that handle the same issue.
+ possible to put in two items in a queue that handle the same issue.
With the right equality code, this can be avoided with the policy
the queue has now.
@@ -435,13 +561,10 @@
objects in a queue (or any other collection with conflict resolution
code, such as a BTree), the collection will get a placeholder object
(ZODB.ConflictResolution.PersistentReference), rather than the real
- contained object. This object has a standard Python __cmp__ based
- on identity in memory (e.g. `id(obj)`). The same oid will get the
- same placeholder in the different states for conflict resolution, so
- that may be sufficient for reasonable behavior in a queue as long as
- only objects with the same oid may be considered equal. (This is very
- problematic behavior if your persistent object is a key in a BTree,
- however.)
+ contained object. This object has __cmp__ method, but doesn't have
+ __hash__ method, The same oid will get different placeholder in the
+ different states because of different identity in memory (e.g. `id(obj)`)
+ for conflict resolution, which is wrong behavior in a queue.
Another is that, in ZEO, conflict resolution is currently done on
the server, so the ZEO server must have a copy of the classes
@@ -456,3 +579,8 @@
the information it needs to do the comparison on itself, so the
absence of the persistent object during conflict resolution is
unimportant.
+
+.. [#workaround] The reason why we defined
+ `PersistentReferenceSet` is that there would be a significant risk
+ of unintended consequenses for some ZODB users if we add __hash__
+ method to PersistentReference.
Modified: zc.queue/branches/andrew-conflict-resolution-persistent/src/zc/queue/tests.py
===================================================================
--- zc.queue/branches/andrew-conflict-resolution-persistent/src/zc/queue/tests.py 2012-01-09 20:48:57 UTC (rev 124006)
+++ zc.queue/branches/andrew-conflict-resolution-persistent/src/zc/queue/tests.py 2012-01-09 23:44:05 UTC (rev 124007)
@@ -168,7 +168,20 @@
"""
+class StubPersistentReference(object):
+ def __init__(self, oid):
+ self.oid = oid
+ def __cmp__(self, other):
+ if self.oid == other.oid:
+ return 0
+ else:
+ raise ValueError("Can't compare")
+
+ def __repr__(self):
+ return "SPR (%d)" % self.oid
+
+
def test_suite():
return unittest.TestSuite((
doctest.DocFileSuite(
More information about the checkins
mailing list