[Checkins] SVN: zc.queue/branches/andrew-conflict-resolution-persistent/src/zc/queue/ Fix conflict resolution by adding PersistentReferenceSet..

Mon Jan 9 23:44:06 UTC 2012

Log message for revision 124007:
  Fix conflict resolution by adding PersistentReferenceSet..
  
  

Changed:
  U   zc.queue/branches/andrew-conflict-resolution-persistent/src/zc/queue/_queue.py
  U   zc.queue/branches/andrew-conflict-resolution-persistent/src/zc/queue/queue.txt
  U   zc.queue/branches/andrew-conflict-resolution-persistent/src/zc/queue/tests.py

-=-
Modified: zc.queue/branches/andrew-conflict-resolution-persistent/src/zc/queue/_queue.py
===================================================================

--- zc.queue/branches/andrew-conflict-resolution-persistent/src/zc/queue/_queue.py	2012-01-09 20:48:57 UTC (rev 124006)
+++ zc.queue/branches/andrew-conflict-resolution-persistent/src/zc/queue/_queue.py	2012-01-09 23:44:05 UTC (rev 124007)
@@ -1,4 +1,5 @@
 from persistent import Persistent
+from ZODB.ConflictResolution import PersistentReference
 from ZODB.POSException import ConflictError
 from zope import interface
 
@@ -51,6 +52,104 @@
 PersistentQueue = BucketQueue  # for legacy instances, be conservative
 
 
+class PersistentReferenceSet(object):
+    """PersistentReferenceSet
+
+    `ZODB.ConflictResolution.PersistentReference` doesn't get handled correctly
+    in the resolveQueueConflict function due to lack of the `__hash__` method.
+    So we make workaround here to utilize `__cmp__` method of
+    `PersistentReference`.
+
+    """
+    def __init__(self, seq):
+        assert isinstance(seq, tuple)
+        self._data = self._dedup(seq)
+
+    def _dedup(self, seq):
+        seq = list(seq)
+        cnt = 0
+        while len(seq) > cnt:
+            remove = []
+            for idx, item in enumerate(seq[cnt + 1:]):
+                try:
+                    if item == seq[cnt]:
+                        remove.append(cnt + idx + 1)
+                except ValueError:
+                    pass
+            for idx in reversed(remove):
+                seq.pop(idx)
+            cnt += 1
+        return tuple(seq)
+
+    def __cmp__(self, other):
+        if len(self._data) == len(other._data):
+            other_data = list(other._data[:])
+            for item in self._data:
+                for index, other_item in enumerate(other_data):
+                    try:
+                        if item == other_item:
+                            other_data.pop(index)
+                    except ValueError:
+                        pass
+                    else:
+                        break
+                else:
+                    break
+            else:
+                assert len(other_data) == 0
+                return 0
+        raise ValueError(
+            "can't reliably compare against different "
+            "PersistentReferences")
+
+    def __sub__(self, other):
+        self_data = list(self._data[:])
+        for other_item in other._data:
+            for index, item in enumerate(self_data):
+                try:
+                    if other_item == item:
+                        self_data.pop(index)
+                except ValueError:
+                    pass
+                else:
+                    break
+        return PersistentReferenceSet(tuple(self_data))
+
+    def __and__(self, other):
+        self_data = list(self._data[:])
+        intersection = []
+        for other_item in other._data:
+            for index, item in enumerate(self_data):
+                try:
+                    if other_item == item:
+                        self_data.pop(index)
+                        intersection.append(item)
+                except ValueError:
+                    pass
+                else:
+                    break
+        return PersistentReferenceSet(tuple(intersection))
+
+    def __iter__(self):
+        for item in self._data:
+            yield item
+
+    def __len__(self):
+        return len(self._data)
+
+    def __repr__(self):
+        return "PRSet(%s)" % str(self._data)
+
+    def __contains__(self, item):
+        for data in self._data:
+            try:
+                if item == data:
+                    return True
+            except ValueError:
+                pass
+        return False
+
+
 def resolveQueueConflict(oldstate, committedstate, newstate, bucket=False):
     # we only know how to merge _data.  If anything else is different,
     # puke.
@@ -66,10 +165,19 @@
     committed = committedstate['_data']
     new = newstate['_data']
 
-    old_set = set(old)
-    committed_set = set(committed)
-    new_set = set(new)
+    # If items in the queue are persistent object, we can't use set().
+    # see 'queue.txt'
+    for item in (old + committed + new):
+        if not isinstance(item, PersistentReference):
+            Set = set
+            break
+    else:
+        Set = PersistentReferenceSet
 
+    old_set = Set(old)
+    committed_set = Set(committed)
+    new_set = Set(new)
+
     if bucket and bool(old_set) and (bool(committed_set) ^ bool(new_set)):
         # This is a bucket, part of a CompositePersistentQueue.  The old set
         # of this bucket had items, and one of the two transactions cleaned
@@ -98,7 +206,7 @@
             mod_committed.append(v)
     if new_added:
         ordered_new_added = new[-len(new_added):]
-        assert set(ordered_new_added) == new_added
+        assert Set(ordered_new_added) == new_added
         mod_committed.extend(ordered_new_added)
     committedstate['_data'] = tuple(mod_committed)
     return committedstate

Modified: zc.queue/branches/andrew-conflict-resolution-persistent/src/zc/queue/queue.txt
===================================================================
--- zc.queue/branches/andrew-conflict-resolution-persistent/src/zc/queue/queue.txt	2012-01-09 20:48:57 UTC (rev 124006)
+++ zc.queue/branches/andrew-conflict-resolution-persistent/src/zc/queue/queue.txt	2012-01-09 23:44:05 UTC (rev 124007)
@@ -394,6 +394,132 @@
     >>> db.close() # cleanup
 
 
+======================
+PersistentReferenceSet
+======================
+
+As `ZODB.ConflictResolution.PersistentReference` doesn't get handled
+properly in `set` due to lack of `__hash__` method, we define a class
+utilizing `__cmp__` method of contained items [#workaround]_.
+
+
+
+Let's make some Stubbed persistent reference object.
+
+    >>> from zc.queue.tests import StubPersistentReference
+    >>> pr1 = StubPersistentReference(1)
+    >>> pr2 = StubPersistentReference(2)
+    >>> pr3 = StubPersistentReference(3)
+
+Let's make copy of them to simulate different transaction states.
+
+    >>> import copy
+    >>> pr_c1 = copy.copy(pr1)
+    >>> pr_c2 = copy.copy(pr2)
+    >>> pr_c3 = copy.copy(pr3)
+    >>> pr1 == pr_c1
+    True
+    >>> pr2 == pr_c2
+    True
+    >>> pr3 == pr_c3
+    True
+    >>> id(pr1) == id(pr_c1)
+    False
+    >>> id(pr2) == id(pr_c2)
+    False
+    >>> id(pr3) == id(pr_c3)
+    False
+
+`set` doesn't handle persistent reference objects properly. All
+following set operations produce wrong results.
+
+    >>> set1 = set((pr1, pr2))
+    >>> len(set1)
+    2
+    >>> set2 = set((pr_c1, pr_c3))
+    >>> len(set2)
+    2
+    >>> set_c1 = set((pr_c1, pr_c2))
+    >>> len(set_c1)
+    2
+
+Minus operation:
+
+    >>> set3 = set1 - set2
+    >>> len(set3)
+    2
+    >>> set3
+    set([SPR (1), SPR (2)])
+
+Deduplication:
+
+    >>> set4 = set((pr1, pr2, pr3, pr_c1, pr_c2, pr_c3))
+    >>> len(set4)
+    6
+
+Contains:
+
+    >>> pr3 in set2
+    False
+
+Intersection:
+
+    >>> set1 & set2
+    set([])
+
+Compare:
+
+    >>> set1 == set_c1
+    False
+
+So we made `PersistentReferenceSet` acting as `set`.
+
+    >>> from zc.queue._queue import PersistentReferenceSet
+    >>> pr_set1 = PersistentReferenceSet((pr1, pr2))
+    >>> len(pr_set1)
+    2
+    >>> pr_set2 = PersistentReferenceSet((pr_c1, pr_c3))
+    >>> len(pr_set1)
+    2
+    >>> pr_set_c1 = PersistentReferenceSet((pr_c1, pr_c2))
+    >>> len(pr_set1)
+    2
+
+Minus operation:
+
+    >>> pr_set3 = pr_set1 - pr_set2
+    >>> len(pr_set3)
+    1
+    >>> pr_set3
+    PRSet((SPR (2),))
+
+Deduplication:
+
+    >>> pr_set4 = PersistentReferenceSet(
+    ...     (pr1, pr2, pr3, pr_c1, pr_c2, pr_c3))
+    >>> len(pr_set4)
+    3
+    >>> pr_set4
+    PRSet((SPR (1), SPR (2), SPR (3)))
+
+Contains:
+
+    >>> pr3 in pr_set2
+    True
+
+    >>> pr2 not in pr_set2
+    True
+
+Intersection:
+
+    >>> pr_set1 & pr_set2
+    PRSet((SPR (1),))
+
+Compare:
+
+    >>> pr_set1 == pr_set_c1
+    True
+
 -----
 
 .. [#why] The queue's `pull` method is actually the interesting part in why
@@ -404,7 +530,7 @@
     state, it's a conflict error.  Ideally you don't enter another equal
     item in that queue again, or else in fact this is still an
     error-prone heuristic:
-    
+
       - start queue == [X];
       - begin transactions A and B;
       - B removes X and commits;
@@ -424,7 +550,7 @@
     a better confidence of doing the right thing.
 
     Also, FWIW, this is policy I want: for my use cases, it would be
-    possible to put in two items in a queue that handle the same issue. 
+    possible to put in two items in a queue that handle the same issue.
     With the right equality code, this can be avoided with the policy
     the queue has now.
 
@@ -435,13 +561,10 @@
     objects in a queue (or any other collection with conflict resolution
     code, such as a BTree), the collection will get a placeholder object
     (ZODB.ConflictResolution.PersistentReference), rather than the real
-    contained object.  This object has a standard Python __cmp__ based
-    on identity in memory (e.g. `id(obj)`).  The same oid will get the
-    same placeholder in the different states for conflict resolution, so
-    that may be sufficient for reasonable behavior in a queue as long as
-    only objects with the same oid may be considered equal.  (This is very
-    problematic behavior if your persistent object is a key in a BTree,
-    however.)
+    contained object.  This object has __cmp__ method, but doesn't have
+    __hash__ method, The same oid will get different placeholder in the
+    different states because of different identity in memory (e.g. `id(obj)`)
+    for conflict resolution, which is wrong behavior in a queue.
 
     Another is that, in ZEO, conflict resolution is currently done on
     the server, so the ZEO server must have a copy of the classes
@@ -456,3 +579,8 @@
     the information it needs to do the comparison on itself, so the
     absence of the persistent object during conflict resolution is
     unimportant.
+
+.. [#workaround] The reason why we defined
+    `PersistentReferenceSet` is that there would be a significant risk
+    of unintended consequenses for some ZODB users if we add __hash__
+    method to PersistentReference.

Modified: zc.queue/branches/andrew-conflict-resolution-persistent/src/zc/queue/tests.py
===================================================================
--- zc.queue/branches/andrew-conflict-resolution-persistent/src/zc/queue/tests.py	2012-01-09 20:48:57 UTC (rev 124006)
+++ zc.queue/branches/andrew-conflict-resolution-persistent/src/zc/queue/tests.py	2012-01-09 23:44:05 UTC (rev 124007)
@@ -168,7 +168,20 @@
 
     """
 
+class StubPersistentReference(object):
+    def __init__(self, oid):
+        self.oid = oid
 
+    def __cmp__(self, other):
+        if self.oid == other.oid:
+            return 0
+        else:
+            raise ValueError("Can't compare")
+
+    def __repr__(self):
+        return "SPR (%d)" % self.oid
+
+
 def test_suite():
     return unittest.TestSuite((
         doctest.DocFileSuite(