[Checkins] SVN: zc.queue/trunk/src/zc/queue/ Merge andrew-conflict-resolution-persistent branch back to trunk

Andrew Sung asung at zope.com
Wed Jan 11 23:16:53 UTC 2012


Log message for revision 124031:
  Merge andrew-conflict-resolution-persistent branch back to trunk
  
  svn merge -r123976:HEAD svn+ssh://svn.zope.org/repos/main/zc.queue/branches/andrew-conflict-resolution-persistent
  
  

Changed:
  U   zc.queue/trunk/src/zc/queue/_queue.py
  U   zc.queue/trunk/src/zc/queue/queue.txt
  U   zc.queue/trunk/src/zc/queue/tests.py

-=-
Modified: zc.queue/trunk/src/zc/queue/_queue.py
===================================================================
--- zc.queue/trunk/src/zc/queue/_queue.py	2012-01-11 23:02:27 UTC (rev 124030)
+++ zc.queue/trunk/src/zc/queue/_queue.py	2012-01-11 23:16:53 UTC (rev 124031)
@@ -1,4 +1,5 @@
 from persistent import Persistent
+from ZODB.ConflictResolution import PersistentReference
 from ZODB.POSException import ConflictError
 from zope import interface
 
@@ -51,6 +52,32 @@
 PersistentQueue = BucketQueue  # for legacy instances, be conservative
 
 
+class PersistentReferenceProxy(object):
+    """PersistentReferenceProxy
+
+    `ZODB.ConflictResolution.PersistentReference` doesn't get handled correctly
+    in the resolveQueueConflict function due to lack of the `__hash__` method.
+    So we make workaround here to utilize `__cmp__` method of
+    `PersistentReference`.
+
+    """
+    def __init__(self, pr):
+        assert isinstance(pr, PersistentReference)
+        self.pr = pr
+
+    def __hash__(self):
+        return 1
+
+    def __eq__(self, other):
+        try:
+            return self.pr == other.pr
+        except ValueError:
+            return False
+
+    def __repr__(self):
+        return self.pr.__repr__()
+
+
 def resolveQueueConflict(oldstate, committedstate, newstate, bucket=False):
     # we only know how to merge _data.  If anything else is different,
     # puke.
@@ -62,10 +89,15 @@
     # basically, we are ok with anything--willing to merge--
     # unless committedstate and newstate have one or more of the
     # same deletions or additions in comparison to the oldstate.
-    old = oldstate['_data']
-    committed = committedstate['_data']
-    new = newstate['_data']
 
+    # If items in the queue are persistent object, we need to wrap
+    # PersistentReference objects. See 'queue.txt'
+    wrap = lambda x: (
+        PersistentReferenceProxy(x) if isinstance(x, PersistentReference) else x)
+    old = map(wrap, oldstate['_data'])
+    committed = map(wrap, committedstate['_data'])
+    new = map(wrap, newstate['_data'])
+
     old_set = set(old)
     committed_set = set(committed)
     new_set = set(new)
@@ -93,13 +125,14 @@
     # Now we do the merge.  We'll merge into the committed state and
     # return it.
     mod_committed = []
+    unwrap = lambda x: x.pr if isinstance(x, PersistentReferenceProxy) else x
     for v in committed:
         if v not in new_removed:
-            mod_committed.append(v)
+            mod_committed.append(unwrap(v))
     if new_added:
         ordered_new_added = new[-len(new_added):]
         assert set(ordered_new_added) == new_added
-        mod_committed.extend(ordered_new_added)
+        mod_committed.extend(map(unwrap, ordered_new_added))
     committedstate['_data'] = tuple(mod_committed)
     return committedstate
 

Modified: zc.queue/trunk/src/zc/queue/queue.txt
===================================================================
--- zc.queue/trunk/src/zc/queue/queue.txt	2012-01-11 23:02:27 UTC (rev 124030)
+++ zc.queue/trunk/src/zc/queue/queue.txt	2012-01-11 23:16:53 UTC (rev 124031)
@@ -33,12 +33,13 @@
 
 The basic API is simple: use `put` to add items to the back of the queue, and
 `pull` to pull things off the queue, defaulting to the front of the queue.
+Note that `Item` could be either persistent or non persistent object.
 
-    >>> q.put(1)
-    >>> q.put(2)
+    >>> q.put(Item(1))
+    >>> q.put(Item(2))
     >>> q.pull()
     1
-    >>> q.put(3)
+    >>> q.put(Item(3))
     >>> q.pull()
     2
     >>> q.pull()
@@ -47,9 +48,9 @@
 The `pull` method takes an optional zero-based index argument, and can accept
 negative values.
 
-    >>> q.put(4)
-    >>> q.put(5)
-    >>> q.put(6)
+    >>> q.put(Item(4))
+    >>> q.put(Item(5))
+    >>> q.put(Item(6))
     >>> q.pull(-1)
     6
     >>> q.pull(1)
@@ -66,8 +67,8 @@
 
 Requesting an invalid index value does the same.
 
-    >>> q.put(7)
-    >>> q.put(8)
+    >>> q.put(Item(7))
+    >>> q.put(Item(8))
     >>> q.pull(2) # doctest: +ELLIPSIS
     Traceback (most recent call last):
     ...
@@ -92,9 +93,9 @@
     Traceback (most recent call last):
     ...
     StopIteration
-    >>> q.put(9)
-    >>> q.put(10)
-    >>> q.put(11)
+    >>> q.put(Item(9))
+    >>> q.put(Item(10))
+    >>> q.put(Item(11))
     >>> iter(q).next()
     9
     >>> [i for i in q]
@@ -117,7 +118,7 @@
 
 ...and list-like bracket access (which again does *not* empty the queue).
 
-    >>> q.put(12)
+    >>> q.put(Item(12))
     >>> q[0]
     12
     >>> q.pull()
@@ -127,7 +128,7 @@
     ...
     IndexError: ...
     >>> for i in range (13, 23):
-    ...     q.put(i)
+    ...     q.put(Item(i))
     ...
     >>> q[0]
     13
@@ -184,8 +185,8 @@
 In this example, even though q_1 is modified first, q_2's transaction is
 committed first, so q_2's addition is first after the merge.
 
-    >>> q_1.put(1001)
-    >>> q_2.put(1000)
+    >>> q_1.put(Item(1001))
+    >>> q_2.put(Item(1000))
     >>> transactionmanager_2.commit()
     >>> transactionmanager_1.commit()
     >>> connection_1.sync()
@@ -203,9 +204,9 @@
     >>> from zc import queue
     >>> if isinstance(q_1, queue.Queue):
     ...     for i in range(5):
-    ...         q_1.put(i)
+    ...         q_1.put(Item(i))
     ...     for i in range(1002, 1005):
-    ...         q_2.put(i)
+    ...         q_2.put(Item(i))
     ...     transactionmanager_2.commit()
     ...     transactionmanager_1.commit()
     ...     connection_1.sync()
@@ -223,8 +224,8 @@
 
     >>> if isinstance(q_1, queue.CompositeQueue):
     ...     for i1, i2 in ((1002, 1003), (1004, 0), (1, 2), (3, 4)):
-    ...         q_1.put(i1)
-    ...         q_2.put(i2)
+    ...         q_1.put(Item(i1))
+    ...         q_2.put(Item(i2))
     ...         transactionmanager_1.commit()
     ...         transactionmanager_2.commit()
     ...         connection_1.sync()
@@ -240,13 +241,16 @@
 
 If two users try to add the same item, then a conflict error is raised.
 
-    >>> q_1.put(5)
-    >>> q_2.put(5)
+    >>> five = Item(5)
+    >>> q_1.put(five)
+    >>> q_2.put(five)
     >>> transactionmanager_1.commit()
-    >>> transactionmanager_2.commit() # doctest: +ELLIPSIS
-    Traceback (most recent call last):
-    ...
-    ConflictError: ...
+    >>> from ZODB.POSException import ConflictError, InvalidObjectReference
+    >>> try:
+    ...     transactionmanager_2.commit() # doctest: +ELLIPSIS
+    ... except (ConflictError, InvalidObjectReference):
+    ...     print "Conflict Error"
+    Conflict Error
     >>> transactionmanager_2.abort()
     >>> connection_1.sync()
     >>> connection_2.sync()
@@ -314,8 +318,8 @@
     1003
     >>> q_1.pull()
     1004
-    >>> q_2.put(6)
-    >>> q_2.put(7)
+    >>> q_2.put(Item(6))
+    >>> q_2.put(Item(7))
     >>> transactionmanager_1.commit()
     >>> transactionmanager_2.commit()
     >>> connection_1.sync()
@@ -341,10 +345,10 @@
     >>> res_2
     [6, 4, 2]
     >>> for i in range(8, 12):
-    ...     q_1.put(i)
+    ...     q_1.put(Item(i))
     ...
     >>> for i in range(12, 16):
-    ...     q_2.put(i)
+    ...     q_2.put(Item(i))
     ...
     >>> list(q_1)
     [2, 4, 6, 8, 9, 10, 11]
@@ -378,11 +382,11 @@
     ...     secondsrc_1 = firstsrc_1[:]
     ...     secondsrc_2 = firstsrc_2[:]
     ...     for val in [12, 13, 14, 15]:
-    ...         firstsrc_1.remove(val)
-    ...         firstsrc_2.remove(val)
+    ...         firstsrc_1.remove(Item(val))
+    ...         firstsrc_2.remove(Item(val))
     ...     for val in [8, 9, 10, 11]:
-    ...         secondsrc_1.remove(val)
-    ...         secondsrc_2.remove(val)
+    ...         secondsrc_1.remove(Item(val))
+    ...         secondsrc_2.remove(Item(val))
     ...     res_1 = firstsrc_1 + secondsrc_1
     ...     res_2 = firstsrc_2 + secondsrc_2
     ...
@@ -394,6 +398,191 @@
     >>> db.close() # cleanup
 
 
+========================
+PersistentReferenceProxy
+========================
+
+As `ZODB.ConflictResolution.PersistentReference` doesn't get handled
+properly in `set` due to lack of `__hash__` method, we define a class
+utilizing `__cmp__` method of contained items [#workaround]_.
+
+
+Let's make some stub persistent reference objects. Also make some
+objects that have same oid to simulate different transaction states.
+
+    >>> from zc.queue.tests import StubPersistentReference
+    >>> pr1 = StubPersistentReference(1)
+    >>> pr2 = StubPersistentReference(2)
+    >>> pr3 = StubPersistentReference(3)
+    >>> pr_c1 = StubPersistentReference(1)
+    >>> pr_c2 = StubPersistentReference(2)
+    >>> pr_c3 = StubPersistentReference(3)
+
+    >>> pr1 == pr_c1
+    True
+    >>> pr2 == pr_c2
+    True
+    >>> pr3 == pr_c3
+    True
+    >>> id(pr1) == id(pr_c1)
+    False
+    >>> id(pr2) == id(pr_c2)
+    False
+    >>> id(pr3) == id(pr_c3)
+    False
+
+    >>> set1 = set((pr1, pr2))
+    >>> set1
+    set([SPR (1), SPR (2)])
+    >>> len(set1)
+    2
+    >>> set2 = set((pr_c1, pr_c3))
+    >>> set2
+    set([SPR (1), SPR (3)])
+    >>> len(set2)
+    2
+    >>> set_c1 = set((pr_c1, pr_c2))
+    >>> set_c1
+    set([SPR (1), SPR (2)])
+    >>> len(set_c1)
+    2
+
+`set` doesn't handle persistent reference objects properly. All
+following set operations produce wrong results.
+
+Deduplication:
+
+    >>> set((pr1, pr_c1))
+    set([SPR (1), SPR (1)])
+    >>> set((pr2, pr_c2))
+    set([SPR (2), SPR (2)])
+    >>> set((pr1, pr_c1, pr2))
+    set([SPR (1), SPR (1), SPR (2)])
+    >>> set4 = set((pr1, pr2, pr3, pr_c1, pr_c2, pr_c3))
+    >>> len(set4)
+    6
+
+Minus operation:
+
+    >>> set3 = set1 - set2
+    >>> len(set3)
+    2
+    >>> set3
+    set([SPR (1), SPR (2)])
+
+Contains:
+
+    >>> pr3 in set2
+    False
+
+Intersection:
+
+    >>> set1 & set2
+    set([])
+
+Compare:
+
+    >>> set1 == set_c1
+    False
+
+So we made `PersistentReferenceProxy` wrapping `PersistentReference`
+to work with sets.
+
+    >>> from zc.queue._queue import PersistentReferenceProxy
+    >>> prp1 = PersistentReferenceProxy(pr1)
+    >>> prp2 = PersistentReferenceProxy(pr2)
+    >>> prp3 = PersistentReferenceProxy(pr3)
+    >>> prp_c1 = PersistentReferenceProxy(pr_c1)
+    >>> prp_c2 = PersistentReferenceProxy(pr_c2)
+    >>> prp_c3 = PersistentReferenceProxy(pr_c3)
+    >>> prp1 == prp_c1
+    True
+    >>> prp2 == prp_c2
+    True
+    >>> prp3 == prp_c3
+    True
+    >>> id(prp1) == id(prp_c1)
+    False
+    >>> id(prp2) == id(prp_c2)
+    False
+    >>> id(prp3) == id(prp_c3)
+    False
+
+    >>> set1 = set((prp1, prp2))
+    >>> set1
+    set([SPR (1), SPR (2)])
+    >>> len(set1)
+    2
+    >>> set2 = set((prp_c1, prp_c3))
+    >>> set2
+    set([SPR (1), SPR (3)])
+    >>> len(set2)
+    2
+    >>> set_c1 = set((prp_c1, prp_c2))
+    >>> set_c1
+    set([SPR (1), SPR (2)])
+    >>> len(set_c1)
+    2
+
+`set` handles persistent reference properly now. All following set
+operations produce correct results.
+
+Deduplication:
+
+    >>> set4 = set((prp1, prp2, prp3, prp_c1, prp_c2, prp_c3))
+    >>> len(set4)
+    3
+    >>> set((prp1, prp_c1))
+    set([SPR (1)])
+    >>> set((prp2, prp_c2))
+    set([SPR (2)])
+    >>> set((prp1, prp_c1, prp2))
+    set([SPR (1), SPR (2)])
+
+Minus operation:
+
+    >>> set3 = set1 - set2
+    >>> len(set3)
+    1
+    >>> set3
+    set([SPR (2)])
+    >>> set1 - set1
+    set([])
+    >>> set2 - set3
+    set([SPR (1), SPR (3)])
+    >>> set3 - set2
+    set([SPR (2)])
+
+Contains:
+
+    >>> prp3 in set2
+    True
+    >>> prp1 in set2
+    True
+    >>> prp_c1 in set2
+    True
+    >>> prp2 not in set2
+    True
+
+Intersection:
+
+    >>> set1 & set2
+    set([SPR (1)])
+    >>> set1 & set_c1
+    set([SPR (1), SPR (2)])
+    >>> set2 & set3
+    set([])
+
+Compare:
+
+    >>> set1 == set_c1
+    True
+    >>> set1 == set2
+    False
+    >>> set1 == set4
+    False
+
+
 -----
 
 .. [#why] The queue's `pull` method is actually the interesting part in why
@@ -404,7 +593,7 @@
     state, it's a conflict error.  Ideally you don't enter another equal
     item in that queue again, or else in fact this is still an
     error-prone heuristic:
-    
+
       - start queue == [X];
       - begin transactions A and B;
       - B removes X and commits;
@@ -424,7 +613,7 @@
     a better confidence of doing the right thing.
 
     Also, FWIW, this is policy I want: for my use cases, it would be
-    possible to put in two items in a queue that handle the same issue. 
+    possible to put in two items in a queue that handle the same issue.
     With the right equality code, this can be avoided with the policy
     the queue has now.
 
@@ -435,13 +624,10 @@
     objects in a queue (or any other collection with conflict resolution
     code, such as a BTree), the collection will get a placeholder object
     (ZODB.ConflictResolution.PersistentReference), rather than the real
-    contained object.  This object has a standard Python __cmp__ based
-    on identity in memory (e.g. `id(obj)`).  The same oid will get the
-    same placeholder in the different states for conflict resolution, so
-    that may be sufficient for reasonable behavior in a queue as long as
-    only objects with the same oid may be considered equal.  (This is very
-    problematic behavior if your persistent object is a key in a BTree,
-    however.)
+    contained object.  This object has __cmp__ method, but doesn't have
+    __hash__ method, The same oid will get different placeholder in the
+    different states because of different identity in memory (e.g. `id(obj)`)
+    for conflict resolution, which is wrong behavior in a queue.
 
     Another is that, in ZEO, conflict resolution is currently done on
     the server, so the ZEO server must have a copy of the classes
@@ -456,3 +642,8 @@
     the information it needs to do the comparison on itself, so the
     absence of the persistent object during conflict resolution is
     unimportant.
+
+.. [#workaround] The reason why we defined
+    `PersistentReferenceProxy` is that there would be a significant risk
+    of unintended consequenses for some ZODB users if we add __hash__
+    method to PersistentReference.

Modified: zc.queue/trunk/src/zc/queue/tests.py
===================================================================
--- zc.queue/trunk/src/zc/queue/tests.py	2012-01-11 23:02:27 UTC (rev 124030)
+++ zc.queue/trunk/src/zc/queue/tests.py	2012-01-11 23:16:53 UTC (rev 124031)
@@ -1,7 +1,8 @@
 from ZODB import ConflictResolution, MappingStorage, POSException
-import zc.queue
+from persistent import Persistent
 import doctest
 import unittest
+import zc.queue
 
 # TODO: this approach is useful, but fragile.  It also puts a dependency in
 # this package on the ZODB, when otherwise it would only depend on persistent.
@@ -169,13 +170,53 @@
     """
 
 
+class StubPersistentReference(ConflictResolution.PersistentReference):
+    def __init__(self, oid):
+        self.oid = oid
+
+    def __cmp__(self, other):
+        if self.oid == other.oid:
+            return 0
+        else:
+            raise ValueError("Can't compare")
+
+    def __repr__(self):
+        return "SPR (%d)" % self.oid
+
+
+class PersistentObject(Persistent):
+    def __init__(self, value):
+        self.value = value
+
+    def __eq__(self, other):
+        return self.value == other.value
+
+    def __repr__(self):
+        return "%s" % self.value
+
+
 def test_suite():
     return unittest.TestSuite((
         doctest.DocFileSuite(
-            'queue.txt', globs={'Queue': zc.queue.Queue}),
+            'queue.txt',
+            globs={
+                'Queue': zc.queue.Queue,
+                'Item': PersistentObject}),
         doctest.DocFileSuite(
             'queue.txt',
-            globs={'Queue': lambda: zc.queue.CompositeQueue(2)}),
+            globs={
+                'Queue': lambda: zc.queue.CompositeQueue(2),
+                'Item': PersistentObject}),
+        doctest.DocFileSuite(
+            'queue.txt',
+            globs={
+                'Queue': zc.queue.Queue,
+                'Item': lambda x: x}),
+        doctest.DocFileSuite(
+            'queue.txt',
+            globs={
+                'Queue': lambda: zc.queue.CompositeQueue(2),
+                'Item': lambda x: x}),
         doctest.DocTestSuite()
         ))
 



More information about the checkins mailing list