[Checkins] SVN: zc.twist/trunk/ make failures have much smaller pickles; prepare for 1.2

Gary Poster gary at zope.com
Wed Apr 9 11:52:06 EDT 2008


Log message for revision 85194:
  make failures have much smaller pickles; prepare for 1.2

Changed:
  U   zc.twist/trunk/CHANGES.txt
  U   zc.twist/trunk/setup.py
  U   zc.twist/trunk/src/zc/twist/README.txt
  U   zc.twist/trunk/src/zc/twist/__init__.py

-=-
Modified: zc.twist/trunk/CHANGES.txt
===================================================================
--- zc.twist/trunk/CHANGES.txt	2008-04-09 15:33:31 UTC (rev 85193)
+++ zc.twist/trunk/CHANGES.txt	2008-04-09 15:52:06 UTC (rev 85194)
@@ -1,3 +1,12 @@
+1.2 (2008-4-9)
+--------------
+
+* New subclass of twisted.python.failure.Failure begins with only reprs,
+  and it pickles to exclude the stack, exclude the global vars in the frames,
+  and truncate the reprs of the local vars in the frames.  The goal is to
+  keep the pickle size of Failures down to a manageable size.  ``sanitize``
+  now uses this class.
+
 1.1 (2008-3-27)
 ---------------
 

Modified: zc.twist/trunk/setup.py
===================================================================
--- zc.twist/trunk/setup.py	2008-04-09 15:33:31 UTC (rev 85193)
+++ zc.twist/trunk/setup.py	2008-04-09 15:52:06 UTC (rev 85194)
@@ -13,7 +13,7 @@
 
 setup(
     name='zc.twist',
-    version='1.1',
+    version='1.2',
     packages=find_packages('src'),
     package_dir={'':'src'},
     zip_safe=False,

Modified: zc.twist/trunk/src/zc/twist/README.txt
===================================================================
--- zc.twist/trunk/src/zc/twist/README.txt	2008-04-09 15:33:31 UTC (rev 85193)
+++ zc.twist/trunk/src/zc/twist/README.txt	2008-04-09 15:52:06 UTC (rev 85194)
@@ -197,6 +197,11 @@
     ...
     ...TypeError: unsupported operand type(s) for +=: 'int' and 'str'
 
+The failure is sanitized in that the traceback is gone and the frame values
+are turned in to reprs.  If you pickle the failure then it truncates the
+reprs to a maximum of 20 characters plus "[...]" to indicate the
+truncation[#show_sanitation]_.
+
 The call tries to be a good connection citizen, waiting for a connection
 if the pool is at its maximum size.  This code relies on the twisted
 reactor; we'll use a `time_flies` function, which takes seconds to move
@@ -416,6 +421,107 @@
 
     >>> demo = root['demo']
 
+.. [#show_sanitation] Before pickling, the failure includes full information
+    about before and after the exception was caught, as well as locals and
+    globals.  Everything has been repr'd, though, and the traceback object
+    removed.
+    
+    >>> print res.getTraceback() # doctest: +ELLIPSIS
+    Traceback (most recent call last):
+      File ".../zc/twist/__init__.py", line ..., in __call__
+        get_connection(db, reactor=self.getReactor()).addCallback(
+      File ".../twisted/internet/defer.py", line ..., in addCallback
+        callbackKeywords=kw)
+      File ".../twisted/internet/defer.py", line ..., in addCallbacks
+        self._runCallbacks()
+      File ".../twisted/internet/defer.py", line ..., in _runCallbacks
+        self.result = callback(self.result, *args, **kw)
+    --- <exception caught here> ---
+      File ".../zc/twist/__init__.py", line ..., in _call
+        res = call(*args, **kwargs)
+      File "<doctest README.txt[...]>", line ..., in __call__
+        self.count += amount
+    exceptions.TypeError: unsupported operand type(s) for +=: 'int' and 'str'
+    <BLANKLINE>
+
+    (The failure traceback at "verbose" detail is wildly verbose--this example
+    takes out more than 90% of the text, just so you know.)
+
+    >>> print res.getTraceback(detail='verbose') # doctest: +ELLIPSIS
+    *--- Failure #... (pickled) ---
+    .../zc/twist/__init__.py:...: __call__(...)
+     [ Locals ]...
+      args : "('I do not add well with integers',)...
+     ( Globals )...
+      Partial : "<class 'zc.twist.Partial'>...
+    .../twisted/internet/defer.py:...: addCallback(...)
+     [ Locals ]...
+      args : '(<Deferred at ...
+     ( Globals )...
+      Deferred : '<class twisted.internet.defer.Deferred at ...
+    .../twisted/internet/defer.py:...: addCallbacks(...)
+     [ Locals ]...
+     ( Globals )...
+    .../twisted/internet/defer.py:...: _runCallbacks(...)
+     [ Locals ]...
+     ( Globals )...
+    --- <exception caught here> ---
+    .../zc/twist/__init__.py:...: _call(...)
+     [ Locals ]...
+      args : "['I do not add well with integers']...
+     ( Globals )...
+      Partial : "<class 'zc.twist.Partial'>...
+    <doctest README.txt[...]>:...: __call__(...)
+     [ Locals ]...
+      amount : "'I do not add well with integers'...
+     ( Globals )...
+      Partial : "<class 'zc.twist.Partial'>...
+    exceptions.TypeError: unsupported operand type(s) for +=: 'int' and 'str'
+    *--- End of Failure #... ---
+    <BLANKLINE>
+
+    After pickling, the failure only includes information for when the
+    exception was caught and beyond (after the "--- <exception caught
+    here> ---" lines above), does not have globals, and has local reprs
+    truncated to a maximum of 20 characters plus "[...]" to indicate the
+    truncation. This addresses past problems of large pickle size for
+    failures, which can cause performance problems.
+
+    >>> import pickle
+    >>> print pickle.loads(pickle.dumps(res)).getTraceback()
+    ... # doctest: +ELLIPSIS
+    Traceback (most recent call last):
+      File ".../zc/twist/__init__.py", line ..., in _call
+        res = call(*args, **kwargs)
+      File "<doctest README.txt[...]>", line ..., in __call__
+        self.count += amount
+    exceptions.TypeError: unsupported operand type(s) for +=: 'int' and 'str'
+    <BLANKLINE>
+
+    >>> print pickle.loads(pickle.dumps(res)).getTraceback(detail='verbose')
+    ... # doctest: +ELLIPSIS
+    *--- Failure #... (pickled) ---
+    /Users/gary/opt/zc.twist/src/zc/twist/__init__.py:232: _call(...)
+     [ Locals ]
+      tm : '<transaction._manager[...]'
+      call : '<zc.twist.README.Demo[...]'
+      d : '<Deferred at ...[...]'
+      kwargs : '{}'
+      self : '<zc.twist.Partial obj[...]'
+      args : "['I do not add well w[...]"
+      conn : '<Connection at ...[...]'
+     ( Globals )
+    <doctest README.txt[...]>:...: __call__(...)
+     [ Locals ]
+      amount : "'I do not add well wi[...]"
+      self : '<zc.twist.README.Demo[...]'
+     ( Globals )
+    exceptions.TypeError: unsupported operand type(s) for +=: 'int' and 'str'
+    *--- End of Failure #... ---
+    <BLANKLINE>
+
+
+
 .. [#relies_on_twisted_reactor] We monkeypatch twisted.internet.reactor
     (and revert it in another footnote below).
 

Modified: zc.twist/trunk/src/zc/twist/__init__.py
===================================================================
--- zc.twist/trunk/src/zc/twist/__init__.py	2008-04-09 15:33:31 UTC (rev 85193)
+++ zc.twist/trunk/src/zc/twist/__init__.py	2008-04-09 15:52:06 UTC (rev 85194)
@@ -1,6 +1,4 @@
-import random
-import types
-import warnings
+import copy, random, types, warnings
 
 import ZODB.interfaces
 import ZODB.POSException
@@ -97,18 +95,15 @@
 
 missing = object()
 
-def get_connection(db, deferred=None, backoff=None, reactor=None):
+def get_connection(db, deferred=None, backoff=0, reactor=None):
     if deferred is None:
         deferred = twisted.internet.defer.Deferred()
-    if backoff is None:
-        backoff = random.random() / 10 # max of 1/10 of a second
-    else:
-        backoff *= 2
+    backoff += random.random() / 20.0 + .05 # .05 to .10 of a second
     # if this is taking too long (i.e., the cumulative backoff is taking
-    # about a second) then we'll just take one.  This might be a bad idea:
-    # we'll have to see in practice.  Otherwise, if the backoff isn't too
-    # long and we don't have a connection within our limit, try again
-    # later.
+    # more than half a second) then we'll just take one.  This might be
+    # a bad idea: we'll have to see in practice.  Otherwise, if the
+    # backoff isn't too long and we don't have a connection within our
+    # limit, try again later.
     if backoff < .5 and not availableConnectionCount(db):
         if reactor is None:
             reactor = twisted.internet.reactor
@@ -119,12 +114,57 @@
         transaction_manager=transaction.TransactionManager()))
     return deferred
 
+def truncate(str):
+    if len(str) > 21: # 64 bit int or so
+        str = str[:21]+"[...]"
+    return str
+
+class Failure(twisted.python.failure.Failure):
+
+    sanitized = False
+
+    def __init__(self, exc_value=None, exc_type=None, exc_tb=None):
+        twisted.python.failure.Failure.__init__(
+            self, exc_value, exc_type, exc_tb)
+        self.__dict__ = twisted.python.failure.Failure.__getstate__(self)
+
+    def cleanFailure(self):
+        pass # already done
+
+    def __getstate__(self):
+        res = self.__dict__
+        if not self.sanitized:
+            res = copy.deepcopy(res)
+            res['stack'] = []
+            res['frames'] = [
+                [
+                    v[0], v[1], v[2],
+                    [(j[0], truncate(j[1])) for j in v[3]],
+                    [] # [(j[0], truncate(j[1])) for j in v[4]]
+                ] for v in self.frames
+            ]
+
+            res['sanitized'] = True
+        return res
+
+    def printTraceback(
+        self, file=None, elideFrameworkCode=0, detail='default'):
+        return twisted.python.failure.Failure.printTraceback(
+            self, file, elideFrameworkCode or self.sanitized, detail)
+            
+class _Dummy: # twisted.python.failure.Failure is an old-style class
+    pass # so we use old-style hacks instead of __new__
+
 def sanitize(failure):
     # failures may have some bad things in the traceback frames.  This
     # converts everything to strings
-    state = failure.__getstate__()
-    failure.__dict__.update(state)
-    return failure
+    if not isinstance(failure, Failure):
+        res = _Dummy()
+        res.__class__ = Failure
+        res.__dict__ = failure.__getstate__()
+    else:
+        res = failure
+    return res
 
 class Partial(object):
 
@@ -196,7 +236,7 @@
             db = conn.db()
             conn.close()
             if self.attempt_count >= 5: # TODO configurable
-                res = sanitize(twisted.python.failure.Failure())
+                res = Failure()
                 d.errback(res)
             else:
                 get_connection(db, reactor=self.getReactor()).addCallback(
@@ -204,13 +244,13 @@
         except EXPLOSIVE_ERRORS:
             tm.abort()
             conn.close()
-            res = sanitize(twisted.python.failure.Failure())
+            res = Failure()
             d.errback(res)
             raise
         except:
             tm.abort()
             conn.close()
-            res = sanitize(twisted.python.failure.Failure())
+            res = Failure()
             d.errback(res)
         else:
             conn.close()



More information about the Checkins mailing list