[Checkins] SVN: Sandbox/poster/zc.blist/trunk/src/zc/blist/ another
edge case; more canary tests and debugging helpers.
Gary Poster
gary at zope.com
Thu May 22 23:13:16 EDT 2008
Log message for revision 86912:
another edge case; more canary tests and debugging helpers.
Changed:
U Sandbox/poster/zc.blist/trunk/src/zc/blist/__init__.py
U Sandbox/poster/zc.blist/trunk/src/zc/blist/testing.py
U Sandbox/poster/zc.blist/trunk/src/zc/blist/tests.py
-=-
Modified: Sandbox/poster/zc.blist/trunk/src/zc/blist/__init__.py
===================================================================
--- Sandbox/poster/zc.blist/trunk/src/zc/blist/__init__.py 2008-05-22 23:46:49 UTC (rev 86911)
+++ Sandbox/poster/zc.blist/trunk/src/zc/blist/__init__.py 2008-05-23 03:13:14 UTC (rev 86912)
@@ -642,7 +642,6 @@
del bucket[ix:stop_ix]
elif (stop_bucket_id != start_bucket_id and
bucket.previous is not None):
- # TODO this logic could maybe be moved earlier?
stop_bucket_id = bucket.previous
break
removed = bucket[ix:]
@@ -682,7 +681,7 @@
# B+ tree algorithm descriptions go
# one-item-at-a-time, while we may have swaths of
# changes to which we need to adjust.
- #
+
# Key adjustments are different than the standard B+
# tree story because this is a sequence, and our keys
# are indices that we need to adjust to accomodate the
@@ -694,6 +693,7 @@
fill_minimum = fill_maximum // 2
original_stop_bucket_id = stop_bucket_id
+ check_next = False
while start_bucket_id is not None:
@@ -705,103 +705,177 @@
# anything in to; we'll want to look at it and the
# `stop_bucket` (if different) to see if we need to
# adjust.
- #
- # if bucket and stop_bucket are different and
- # stop_bucket is not empty and either are below the
- # fill_minimum...
- # if the combination is less than the fill_maximum,
- # put in bucket and empty stop_bucket
- # else redistribute across so both are above
- # fill_minimum
len_bucket = len(bucket)
- stop_bucket = self._mapping[stop_bucket_id]
- len_stop = len(stop_bucket)
- if (bucket is not stop_bucket and
- len_stop and (
- len_bucket < fill_minimum or
- len_stop < fill_minimum)):
- bucket = self._mutable(bucket)
- stop_bucket = self._mutable(stop_bucket)
- bucket.rotate(stop_bucket)
- len_bucket = len(bucket)
- len_stop = len(stop_bucket)
- # if (bucket is stop_bucket or stop_bucket is empty)
- # and bucket.previous is None and stop_bucket.next is
- # None, shortcut: just make sure this is the top
- # bucket and break.
-
- if ((bucket is stop_bucket or not len_stop) and
- bucket.previous is None and stop_bucket.next is None):
- bucket = self._mutable(bucket)
- if bucket.identifier != self._top_index:
- # get rid of unnecessary parents
- p = bucket
- while p.identifier != self._top_index:
- p = self._mapping[p.parent]
- if p.shared:
- p.collections.remove(self)
- del self._mapping[p.identifier]
- # this is now the top of the tree
- self._top_index = bucket.identifier
- bucket.parent = None
+ # Usually we only care about two buckets here: ``bucket``
+ # and ``stop_bucket``. However, the first thing we do here
+ # is handle an exceptional case. We might be working with
+ # an index row in which a lower row had to include three
+ # buckets in the calculation. See where ``check_next =
+ # True`` occurs (pretty far down) to understand the
+ # condition.
+ next = None
+ if check_next:
+ if bucket.next is None:
+ check_next = False
else:
- assert bucket.parent is None
- bucket.next = None
- stop_bucket = stop_bucket_id = None
- break
+ assert len_bucket >= fill_minimum
+ next = self._mapping[bucket.next]
+ if 0 < len(next) < fill_minimum:
+ bucket = self._mutable(bucket)
+ next = self._mutable(next)
+ bucket.rotate(next)
+ len_bucket = len(bucket) # reset; it has
+ # changed if next needs to be removed from
+ # parent, it will be done later (below)
+ if not next: # (if empty)
+ # next's parent, if different than
+ # bucket's, should not have to be
+ # explicitly balanced in this way, because
+ # it is either empty (and will be removed)
+ # or the stop_bucket's parent (and will be
+ # balanced anyway).
+ check_next = False
+ else:
+ # We are not mutating next bucket, so next's
+ # parent should not have to be balanced
+ check_next = False
- # now these are the possible states:
- # - bucket is stop_bucket and is empty
- # - bucket is stop_bucket and is too small
- # - bucket is stop_bucket and is ok
- # - bucket and stop_bucket are both empty
- # - bucket is ok and stop_bucket is empty
- # - bucket is too small and stop_bucket is empty
- # - bucket is ok and stop_bucket is ok
- #
- # Therefore,
- # - if the stop_bucket is ok or the bucket is empty,
- # we're ok with this step, and can move on to
- # adjusting the indexes and pointers.
- # - otherwise the bucket is too small, and there is
- # another bucket to rotate with. Find the bucket
- # and adjust so that no non-empty buckets are
- # beneath the fill_minimum. Make sure to adjust the
- # start_bucket or stop_bucket to include the altered
- # bucket.
+ stop_bucket = self._mapping[stop_bucket_id]
+ len_stop = len(stop_bucket)
+ if next:
+ # We're still handing the exceptional case. ``next`` is
+ # not None and not empty. This means that we are
+ # working with indexes on this level. It also means
+ # that bucket and next are above the minimum fill
+ # level. The only thing we don't know is if the stop
+ # bucket is not next and the stop bucket is not empty
+ # and below the fill level. If that's the case, we need
+ # to balance next and stop.
+ if len_stop and len_stop < fill_minimum:
+ assert stop_bucket_id != next.identifier
+ next = self._mutable(next)
+ next.rotate(stop_bucket)
+ len_stop = len(stop_bucket)
+ # now, bucket is not empty and above the minimum,
+ # next is not empty and above the minimum, and
+ # stop_bucket is either empty, or not empty and
+ # above the minimum.
+ assert len_bucket >= fill_minimum
+ assert len(next) >= fill_minimum
+ assert not len_stop or len_stop >= fill_minimum
+ # this means we are balanced, and that we have two
+ # buckets, so neither of them is the top bucket.
+ else:
+ # this is the regular code path. We only need to look
+ # at the start bucket and the stop bucket.
- if len_bucket < fill_minimum:
- previous = bucket.previous
- next = stop_bucket.next
- assert previous is not None or next is not None
- assert bucket is stop_bucket or not len_stop
- if next is not None:
- next = self._mapping[next]
- if (next is None or
- previous is not None and
- len(next) + len_bucket > fill_maximum):
- # work with previous
- previous = self._mutable(self._mapping[previous])
+ # if bucket and stop_bucket are different and
+ # stop_bucket is not empty and either are below the
+ # fill_minimum...
+ # if the combination is less than the fill_maximum,
+ # put in bucket and empty stop_bucket
+ # else redistribute across so both are above
+ # fill_minimum
+ if (bucket is not stop_bucket and
+ len_stop and (
+ len_bucket < fill_minimum or
+ len_stop < fill_minimum)):
bucket = self._mutable(bucket)
- previous.rotate(bucket)
- if bucket.identifier == start_bucket_id:
- bucket = previous
- start_bucket_id = previous.identifier
- if not bucket:
- bucket = previous
- assert bucket
- else:
- # work with next
+ stop_bucket = self._mutable(stop_bucket)
+ bucket.rotate(stop_bucket)
+ len_bucket = len(bucket)
+ len_stop = len(stop_bucket)
+
+ # if (bucket is stop_bucket or stop_bucket is empty)
+ # and bucket.previous is None and stop_bucket.next is
+ # None, shortcut: just make sure this is the top
+ # bucket and break.
+
+ if ((bucket is stop_bucket or not len_stop) and
+ bucket.previous is None and
+ stop_bucket.next is None):
bucket = self._mutable(bucket)
- next = self._mutable(next)
- bucket.rotateRight(next)
- stop_bucket_id = next.identifier
+ if bucket.identifier != self._top_index:
+ # get rid of unnecessary parents
+ p = bucket
+ while p.identifier != self._top_index:
+ p = self._mapping[p.parent]
+ if p.shared:
+ p.collections.remove(self)
+ del self._mapping[p.identifier]
+ # this is now the top of the tree
+ self._top_index = bucket.identifier
+ bucket.parent = None
+ else:
+ assert bucket.parent is None
+ bucket.next = None
+ stop_bucket = stop_bucket_id = None
+ break
- # OK, now we need to adjust pointers and get rid of
- # empty buckets. We'll go level-by-level.
+ # now these are the possible states:
+ # - bucket is stop_bucket and is empty
+ # - bucket is stop_bucket and is too small
+ # - bucket is stop_bucket and is ok
+ # - bucket and stop_bucket are both empty
+ # - bucket is ok and stop_bucket is empty
+ # - bucket is too small and stop_bucket is empty
+ # - bucket is ok and stop_bucket is ok
+ #
+ # Therefore,
+ # - if the stop_bucket is ok or the bucket is empty,
+ # we're ok with this step, and can move on to
+ # adjusting the indexes and pointers.
+ # - otherwise the bucket is too small, and there is
+ # another bucket to rotate with. Find the bucket
+ # and adjust so that no non-empty buckets are
+ # beneath the fill_minimum. Make sure to adjust the
+ # start_bucket or stop_bucket to include the altered
+ # bucket.
+
+ if len_bucket < fill_minimum:
+ previous = bucket.previous
+ next = stop_bucket.next
+ assert previous is not None or next is not None
+ assert bucket is stop_bucket or not len_stop
+ if next is not None:
+ next = self._mapping[next]
+ if (next is None or
+ previous is not None and
+ len(next) + len_bucket > fill_maximum):
+ # work with previous
+ previous = self._mutable(
+ self._mapping[previous])
+ bucket = self._mutable(bucket)
+ previous.rotate(bucket)
+ if bucket.identifier == start_bucket_id:
+ if bucket:
+ # now bucket may have a parent that
+ # needs to be balanced that is *not*
+ # the parent of start bucket or of stop
+ # bucket. We'll check this the next
+ # time we go through this loop, with
+ # the parents of this level.
+ check_next = True
+ bucket = previous
+ start_bucket_id = previous.identifier
+ if not bucket:
+ bucket = previous
+ assert bucket
+ else:
+ # work with next
+ bucket = self._mutable(bucket)
+ next = self._mutable(next)
+ bucket.rotateRight(next)
+ stop_bucket_id = next.identifier
+ # We do this part of the code both in the exceptional
+ # ``check_next`` case and in the usual case.
+
+ # Now we need to adjust pointers and get rid of empty
+ # buckets. We'll go level-by-level.
+
# we keep track of the bucket to reindex on separately from
# the start_index because we may have to move it to a
# previous bucket if the start_bucket is deleted to make
Modified: Sandbox/poster/zc.blist/trunk/src/zc/blist/testing.py
===================================================================
--- Sandbox/poster/zc.blist/trunk/src/zc/blist/testing.py 2008-05-22 23:46:49 UTC (rev 86911)
+++ Sandbox/poster/zc.blist/trunk/src/zc/blist/testing.py 2008-05-23 03:13:14 UTC (rev 86912)
@@ -1,3 +1,5 @@
+import random
+
import zc.blist
def checkIndex(ix, b, previous, previous_ix=0):
@@ -33,6 +35,7 @@
def matches(b, result):
assert list(b) == result, repr(list(b)) + ' != ' + repr(result)
assert len(b) == len(result)
+ assert list(b[::-1]) == list(reversed(result))
res = []
bad = [(i, (b_i, r_i)) for (i, b_i, r_i) in
((i, b[i], result[i]) for i in range(len(b)))
@@ -73,3 +76,21 @@
two not in v.collections)
diff.append((k, v, alt))
return one_diff, two_diff, diff
+
+def RandomGenerator():
+ while 1:
+ yield random.randint(-sys.maxint, sys.maxint)
+
+def StringGenerator(src='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'):
+ "infinite-ish unique string generator"
+ for el in src:
+ yield el
+ for pre in StringGenerator(src):
+ for el in src:
+ yield pre + el
+
+def NumberGenerator(number=0, interval=1):
+ "infinite-ish unique number generator"
+ while 1:
+ yield number
+ number += interval
\ No newline at end of file
Modified: Sandbox/poster/zc.blist/trunk/src/zc/blist/tests.py
===================================================================
--- Sandbox/poster/zc.blist/trunk/src/zc/blist/tests.py 2008-05-22 23:46:49 UTC (rev 86911)
+++ Sandbox/poster/zc.blist/trunk/src/zc/blist/tests.py 2008-05-23 03:13:14 UTC (rev 86912)
@@ -12,6 +12,7 @@
#
##############################################################################
import os
+import pickle
import unittest
import random
import sys
@@ -213,12 +214,16 @@
[56, 7, 10]
"""
-class Canary(object):
- def __init__(self, comp, blist = None):
+class AbstractCanary(object):
+ def __init__(self, comp, blist=None, generator=None):
+ if generator is None:
+ generator = zc.blist.testing.StringGenerator()
+ self.generator = generator
if blist is None:
blist = zc.blist.BList(comp)
self.blist = blist
self.comp = comp
+ zc.blist.testing.matches(self.blist, self.comp)
self.ops = [getattr(self, n) for n in dir(self) if n.startswith('t_')]
self.empty_ops = [getattr(self, n) for n in dir(self) if n.startswith('e_')]
self.ops.extend(self.empty_ops)
@@ -235,18 +240,28 @@
orig = self.blist.copy()
orig_comp = self.comp[:]
try:
- zc.blist.testing.checkCopies(self.blist, orig) # initial
- args = call()
+ zc.blist.testing.checkCopies(self.blist, orig) # pre
+ c, args = call()
+ c()
zc.blist.testing.matches(self.blist, self.comp)
zc.blist.testing.matches(orig, orig_comp)
return zc.blist.testing.checkCopies(self.blist, orig) # post
except Exception, e:
traceback.print_exc()
import pdb; pdb.post_mortem(sys.exc_info()[2])
+ orig.bad = self.blist
+ orig.args = args
+ for i in range(1000):
+ nm = 'bad_op_%d_%r.pickle' % (random.randint(1, 1000), call)
+ if not os.path.exists(nm):
+ f = open(nm, 'w')
+ pickle.dump(orig, f)
+ f.close()
+ break
raise
def _getval(self):
- return random.randint(-sys.maxint, sys.maxint)
+ return self.generator.next()
def _getloc(self, adjustment=-1):
max = len(self.comp)+adjustment
@@ -293,120 +308,145 @@
def _getvals(self):
return [self._getval() for i in range(random.randint(1, 100))]
- def e_append(self):
- val = self._getval()
- self.comp.append(val)
- self.blist.append(val)
- return (val,)
+class BigOperationCanary(AbstractCanary):
def e_extend(self):
new = self._getvals()
- self.comp.extend(new)
- self.blist.extend(new)
- return (new,)
+ def test():
+ self.comp.extend(new)
+ self.blist.extend(new)
+ return test, (new,)
+ def t_delslice(self):
+ start, stop = self._get_start_stop()
+ def test():
+ del self.comp[start:stop]
+ del self.blist[start:stop]
+ return test, (start, stop)
+
+ def e_setitem_slice(self):
+ start, stop = self._get_start_stop()
+ vals = self._getvals()
+ def test():
+ self.comp[start:stop] = vals
+ self.blist[start:stop] = vals
+ return test, (start, stop, vals)
+
+ def e_setitem_slice_step(self):
+ start, stop, step = self._get_start_stop_step()
+ vals = [self._getval() for i in range(len(self.comp[start:stop:step]))]
+ def test():
+ self.comp[start:stop:step] = vals
+ self.blist[start:stop:step] = vals
+ return test, (start, stop, step, vals)
+
+ def t_delslice_step(self):
+ start, stop, step = self._get_start_stop_step()
+ def test():
+ del self.comp[start:stop:step]
+ del self.blist[start:stop:step]
+ return test, (start, stop, step)
+
+class Canary(BigOperationCanary):
+
+ def e_append(self):
+ val = self._getval()
+ def test():
+ self.comp.append(val)
+ self.blist.append(val)
+ return test, (val,)
+
def e_iadd(self):
new = self._getvals()
- self.comp += new
- self.blist += new
- return (new,)
+ def test():
+ self.comp += new
+ self.blist += new
+ return test, (new,)
def e_insert(self):
val = self._getval()
location = self._getloc(0) # can insert after last item
- self.comp.insert(location, val)
- self.blist.insert(location, val)
- return location, val
+ def test():
+ self.comp.insert(location, val)
+ self.blist.insert(location, val)
+ return test, (location, val)
def t_delitem(self):
location = self._getloc()
- del self.comp[location]
- del self.blist[location]
- return (location,)
+ def test():
+ del self.comp[location]
+ del self.blist[location]
+ return test, (location,)
- def t_delslice(self):
- start, stop = self._get_start_stop()
- del self.comp[start:stop]
- del self.blist[start:stop]
- return start, stop
-
- def t_delslice_step(self):
- start, stop, step = self._get_start_stop_step()
- del self.comp[start:stop:step]
- del self.blist[start:stop:step]
- return start, stop, step
-
def e_delslice_noop(self):
stop, start = self._get_start_stop()
- del self.comp[start:stop]
- del self.blist[start:stop]
- return start, stop
+ def test():
+ del self.comp[start:stop]
+ del self.blist[start:stop]
+ return test, (start, stop)
def e_delslice_step_noop(self):
stop, start, step = self._get_start_stop_step()
- del self.comp[start:stop:step]
- del self.blist[start:stop:step]
- return start, stop, step
+ def test():
+ del self.comp[start:stop:step]
+ del self.blist[start:stop:step]
+ return test, (start, stop, step)
def t_pop(self):
location = self._getloc()
- assert self.comp.pop(location) == self.blist.pop(location)
- return (location,)
+ def test():
+ assert self.comp.pop(location) == self.blist.pop(location)
+ return test, (location,)
def t_remove(self):
val = self.comp[self._getloc()]
- self.comp.remove(val)
- self.blist.remove(val)
- return (val,)
+ def test():
+ self.comp.remove(val)
+ self.blist.remove(val)
+ return test, (val,)
def t_reverse(self):
- self.comp.reverse()
- self.blist.reverse()
- return ()
+ def test():
+ self.comp.reverse()
+ self.blist.reverse()
+ return test, ()
def t_sort(self):
- self.comp.sort()
- self.blist.sort()
- return ()
+ def test():
+ self.comp.sort()
+ self.blist.sort()
+ return test, ()
def t_sort_cmp(self):
- self.comp.sort(lambda s, o: cmp(str(o), str(s))) # reverse, by
- self.blist.sort(lambda s, o: cmp(str(o), str(s))) # string
- return ()
+ def test():
+ self.comp.sort(lambda s, o: cmp(str(o), str(s))) # reverse, by
+ self.blist.sort(lambda s, o: cmp(str(o), str(s))) # string
+ return test, ()
def t_sort_key(self):
- self.comp.sort(key=lambda v: str(v))
- self.blist.sort(key=lambda v: str(v))
- return ()
+ def test():
+ self.comp.sort(key=lambda v: str(v))
+ self.blist.sort(key=lambda v: str(v))
+ return test, ()
def t_sort_reverse(self):
- self.comp.sort(reverse = True)
- self.blist.sort(reverse = True)
- return ()
+ def test():
+ self.comp.sort(reverse = True)
+ self.blist.sort(reverse = True)
+ return test, ()
def t_setitem(self):
location = self._getloc()
val = self._getval()
- self.comp[location] = val
- self.blist[location] = val
- return location, val
+ def test():
+ self.comp[location] = val
+ self.blist[location] = val
+ return test, (location, val)
- def e_setitem_slice(self):
- start, stop = self._get_start_stop()
- vals = self._getvals()
- self.comp[start:stop] = vals
- self.blist[start:stop] = vals
- return start, stop, vals
- def e_setitem_slice_step(self):
- start, stop, step = self._get_start_stop_step()
- vals = [self._getval() for i in range(len(self.comp[start:stop:step]))]
- self.comp[start:stop:step] = vals
- self.blist[start:stop:step] = vals
- return start, stop, step, vals
class CanaryTestCase(unittest.TestCase):
- def test_empty_canary(self):
+ def test_canary(self):
c = Canary([])
c()
@@ -414,6 +454,29 @@
c = Canary([], zc.blist.BList(bucket_size=5, index_size=4))
c()
+ def test_big_canary(self):
+ for i in range(32):
+ c = Canary([], zc.blist.BList(bucket_size=5, index_size=4))
+ c(1000)
+
+ def test_several_small_canaries(self):
+ for i in range(128):
+ c = Canary([], zc.blist.BList(bucket_size=5, index_size=4))
+ c(10)
+
+ def test_big_bigop_canary(self):
+ for i in range(32):
+ c = BigOperationCanary(
+ [], zc.blist.BList(bucket_size=5, index_size=4))
+ c(1000)
+
+ def test_big_big_bigop_canary(self):
+ for i in range(32):
+ c = BigOperationCanary(
+ range(10000),
+ zc.blist.BList(range(10000), bucket_size=5, index_size=4))
+ c(2000)
+
def test_suite():
return unittest.TestSuite((
doctest.DocFileSuite(
More information about the Checkins
mailing list