[Checkins] SVN: zc.fsutils/branches/dev/src/zc/fsutil/references.
added a low-memory collection format
Jim Fulton
jim at zope.com
Tue Sep 25 19:27:16 EDT 2007
Log message for revision 80063:
added a low-memory collection format
Changed:
U zc.fsutils/branches/dev/src/zc/fsutil/references.py
U zc.fsutils/branches/dev/src/zc/fsutil/references.txt
-=-
Modified: zc.fsutils/branches/dev/src/zc/fsutil/references.py
===================================================================
--- zc.fsutils/branches/dev/src/zc/fsutil/references.py 2007-09-25 22:40:23 UTC (rev 80062)
+++ zc.fsutils/branches/dev/src/zc/fsutil/references.py 2007-09-25 23:27:16 UTC (rev 80063)
@@ -20,71 +20,121 @@
import ZODB.utils
-def references(iterator):
+def collect(iterator, output):
"""Create a database of database references.
Return a dictionary mapping oids to dictionaries with keys 'from'
and 'to' with values that are dictionaries mapping oids to lists
of references.
"""
- result = {}
+ pickler = cPickle.Pickler(open(output, 'w'))
+
for trans in iterator:
+ trandata = trans.tid, trans._tpos
for record in trans:
- from_oid = ZODB.utils.oid_repr(record.oid)
- from_data = result.get(from_oid)
- if from_data is None:
- from_data = result[from_oid] = {
- 'from': {}, 'to': {}, 'serials': [],
- }
- from_data['serials'].append(record.tid)
-
refs = []
u = cPickle.Unpickler(cStringIO.StringIO(record.data))
u.persistent_load = refs
u.noload()
u.noload()
- for ref in refs:
- if isinstance(ref, tuple):
- to_oid = ZODB.utils.oid_repr(ref[0])
- elif isinstance(ref, str):
- to_oid = ZODB.utils.oid_repr(ref)
- elif isinstance(ref, list):
- if len(ref) == 1:
- to_oid = ZODB.utils.oid_repr(ref[0])
- else:
- try:
- reference_type, args = ref
- except ValueError:
- print 'wtf', ref
- continue
+ pickler.dump((trandata, record.oid, record.tid, refs))
+
+def collect_script(args=None):
+ if args is None:
+ args = sys.argv[1:]
- if reference_type == 'w':
- to_oid = ZODB.utils.oid_repr(args[0])
- elif reference_type in 'nm':
- to_oid = args[0], ZODB.utils.oid_repr(args[1])
- else:
- print wtf, reference_type, args
- else:
+ [inp, outp] = args
+
+ iterator = sys.modules['ZODB.FileStorage.FileStorage' # :(
+ ].FileIterator(inp)
+ data = collect(iterator, outp)
+
+def load(fname):
+ unpickler = cPickle.Unpickler(open(fname))
+ result = {}
+ while 1:
+ try:
+ data = unpickler.load()
+ except EOFError:
+ return result
+ _update(result, *data)
+
+
+def _update(result, tinfo, oid, serial, refs):
+ """Create a database of database references.
+
+ Return a dictionary mapping oids to dictionaries with keys 'from'
+ and 'to' with values that are dictionaries mapping oids to lists
+ of references.
+ """
+ from_oid = ZODB.utils.oid_repr(oid)
+ from_data = result.get(from_oid)
+ if from_data is None:
+ from_data = result[from_oid] = {
+ 'from': {}, 'to': {}, 'serials': [],
+ }
+ from_data['serials'].append(serial)
+
+ for ref in refs:
+ if isinstance(ref, tuple):
+ to_oid = ZODB.utils.oid_repr(ref[0])
+ elif isinstance(ref, str):
+ to_oid = ZODB.utils.oid_repr(ref)
+ elif isinstance(ref, list):
+ if len(ref) == 1:
+ to_oid = ZODB.utils.oid_repr(ref[0])
+ else:
+ try:
+ reference_type, args = ref
+ except ValueError:
print 'wtf', ref
continue
- ref = dict(ref=ref, tid=trans.tid, tpos=trans._tpos)
-
- from_to = from_data['to'].get(to_oid)
- if from_to is None:
- from_to = from_data['to'][to_oid] = []
- from_to.append(ref)
+ if reference_type == 'w':
+ to_oid = ZODB.utils.oid_repr(args[0])
+ elif reference_type in 'nm':
+ to_oid = args[0], ZODB.utils.oid_repr(args[1])
+ else:
+ print wtf, reference_type, args
+ else:
+ print 'wtf', ref
+ continue
- to_data = result.get(to_oid)
- if to_data is None:
- to_data = result[to_oid] = {
- 'to': {}, 'from': {}, 'serials': [],
- }
- to_from = to_data['from'].get(from_oid)
- if to_from is None:
- to_from = to_data['from'][from_oid] = []
- to_from.append(ref)
-
+ ref = dict(ref=ref, tinfo=tinfo)
+
+ from_to = from_data['to'].get(to_oid)
+ if from_to is None:
+ from_to = from_data['to'][to_oid] = []
+ from_to.append(ref)
+
+ to_data = result.get(to_oid)
+ if to_data is None:
+ to_data = result[to_oid] = {
+ 'to': {}, 'from': {}, 'serials': [],
+ }
+ to_from = to_data['from'].get(from_oid)
+ if to_from is None:
+ to_from = to_data['from'][from_oid] = []
+ to_from.append(ref)
+
+def references(iterator):
+ """Create a database of database references.
+
+ Return a dictionary mapping oids to dictionaries with keys 'from'
+ and 'to' with values that are dictionaries mapping oids to lists
+ of references.
+ """
+ result = {}
+ for trans in iterator:
+ trandata = trans.tid, trans._tpos
+ for record in trans:
+ refs = []
+ u = cPickle.Unpickler(cStringIO.StringIO(record.data))
+ u.persistent_load = refs
+ u.noload()
+ u.noload()
+ _update(result, trandata, record.oid, record.tid, refs)
+
return result
def references_script(args=None):
@@ -97,7 +147,3 @@
].FileIterator(inp)
data = references(iterator)
cPickle.Pickler(open(outp, 'w'), 1).dump(data)
-
-
-
-
Modified: zc.fsutils/branches/dev/src/zc/fsutil/references.txt
===================================================================
--- zc.fsutils/branches/dev/src/zc/fsutil/references.txt 2007-09-25 22:40:23 UTC (rev 80062)
+++ zc.fsutils/branches/dev/src/zc/fsutil/references.txt 2007-09-25 23:27:16 UTC (rev 80063)
@@ -97,44 +97,44 @@
'\x03p\x98[\xe0C\xd74'],
'to': {'0x01': [{'ref': ('\x00\x00\x00\x00\x00\x00\x00\x01',
None),
- 'tid': '\x03p\x98[\xe0C\xd74',
- 'tpos': 168L}]}},
+ 'tinfo': ('\x03p\x98[\xe0C\xd74',
+ 168L)}]}},
'0x01': {'from': {'0x00': [{'ref': ('\x00\x00\x00\x00\x00\x00\x00\x01',
None),
- 'tid': '\x03p\x98[\xe0C\xd74',
- 'tpos': 168L}]},
+ 'tinfo': ('\x03p\x98[\xe0C\xd74',
+ 168L)}]},
'serials': ['\x03p\x98[\xe0C\xd74',
'\x03p\x98[\xe0C\xd75'],
'to': {'0x02': [{'ref': ('\x00\x00\x00\x00\x00\x00\x00\x02',
None),
- 'tid': '\x03p\x98[\xe0C\xd74',
- 'tpos': 168L},
+ 'tinfo': ('\x03p\x98[\xe0C\xd74',
+ 168L)},
{'ref': ('\x00\x00\x00\x00\x00\x00\x00\x02',
None),
- 'tid': '\x03p\x98[\xe0C\xd75',
- 'tpos': 535L}],
+ 'tinfo': ('\x03p\x98[\xe0C\xd75',
+ 535L)}],
('db2', '0x02'): [{'ref': ['m',
('db2',
'\x00\x00\x00\x00\x00\x00\x00\x02',
None)],
- 'tid': '\x03p\x98[\xe0C\xd75',
- 'tpos': 535L}]}},
+ 'tinfo': ('\x03p\x98[\xe0C\xd75',
+ 535L)}]}},
'0x02': {'from': {'0x01': [{'ref': ('\x00\x00\x00\x00\x00\x00\x00\x02',
None),
- 'tid': '\x03p\x98[\xe0C\xd74',
- 'tpos': 168L},
+ 'tinfo': ('\x03p\x98[\xe0C\xd74',
+ 168L)},
{'ref': ('\x00\x00\x00\x00\x00\x00\x00\x02',
None),
- 'tid': '\x03p\x98[\xe0C\xd75',
- 'tpos': 535L}]},
+ 'tinfo': ('\x03p\x98[\xe0C\xd75',
+ 535L)}]},
'serials': ['\x03p\x98[\xe0C\xd74'],
'to': {}},
('db2', '0x02'): {'from': {'0x01': [{'ref': ['m',
('db2',
'\x00\x00\x00\x00\x00\x00\x00\x02',
None)],
- 'tid': '\x03p\x98[\xe0C\xd75',
- 'tpos': 535L}]},
+ 'tinfo': ('\x03p\x98[\xe0C\xd75',
+ 535L)}]},
'serials': [],
'to': {}}}
@@ -177,30 +177,30 @@
'serials': ['\x03p\x98[\xe0C\xd75'],
'to': {'0x01': [{'ref': ('\x00\x00\x00\x00\x00\x00\x00\x01',
None),
- 'tid': '\x03p\x98[\xe0C\xd75',
- 'tpos': 4L}],
+ 'tinfo': ('\x03p\x98[\xe0C\xd75',
+ 4L)}],
'0x02': [{'ref': ['w',
('\x00\x00\x00\x00\x00\x00\x00\x02',)],
- 'tid': '\x03p\x98[\xe0C\xd75',
- 'tpos': 4L}]}},
+ 'tinfo': ('\x03p\x98[\xe0C\xd75',
+ 4L)}]}},
'0x01': {'from': {'0x00': [{'ref': ('\x00\x00\x00\x00\x00\x00\x00\x01',
None),
- 'tid': '\x03p\x98[\xe0C\xd75',
- 'tpos': 4L}]},
+ 'tinfo': ('\x03p\x98[\xe0C\xd75',
+ 4L)}]},
'serials': ['\x03p\x98[\xe0C\xd76',
'\x03p\x98[\xe0C\xd77'],
'to': {'0x02': [{'ref': ('\x00\x00\x00\x00\x00\x00\x00\x02',
None),
- 'tid': '\x03p\x98[\xe0C\xd77',
- 'tpos': 324L}]}},
+ 'tinfo': ('\x03p\x98[\xe0C\xd77',
+ 324L)}]}},
'0x02': {'from': {'0x00': [{'ref': ['w',
('\x00\x00\x00\x00\x00\x00\x00\x02',)],
- 'tid': '\x03p\x98[\xe0C\xd75',
- 'tpos': 4L}],
+ 'tinfo': ('\x03p\x98[\xe0C\xd75',
+ 4L)}],
'0x01': [{'ref': ('\x00\x00\x00\x00\x00\x00\x00\x02',
None),
- 'tid': '\x03p\x98[\xe0C\xd77',
- 'tpos': 324L}]},
+ 'tinfo': ('\x03p\x98[\xe0C\xd77',
+ 324L)}]},
'serials': [],
'to': {}}}
@@ -216,12 +216,12 @@
[('0x02',
{'from': {'0x00': [{'ref': ['w',
('\x00\x00\x00\x00\x00\x00\x00\x02',)],
- 'tid': '\x03p\x98[\xe0C\xd75',
- 'tpos': 4L}],
+ 'tinfo': ('\x03p\x98[\xe0C\xd75',
+ 4L)}],
'0x01': [{'ref': ('\x00\x00\x00\x00\x00\x00\x00\x02',
None),
- 'tid': '\x03p\x98[\xe0C\xd77',
- 'tpos': 324L}]},
+ 'tinfo': ('\x03p\x98[\xe0C\xd77',
+ 324L)}]},
'serials': [],
'to': {}})]
@@ -231,3 +231,11 @@
Our data structure can't tell us about broken cross-database references
directly.
+For large databases, we may need to avoid consuming lots of memory on
+the database machine. We can use a collection script that doesn't
+accumulate data in memory and the use a load function to load the
+data:
+
+ >>> zc.fsutil.references.collect_script(['fs2', 'fs2.dat'])
+ >>> zc.fsutil.references.load('fs2.dat') == refs2
+ True
More information about the Checkins
mailing list