[ZODB-Dev] Tool for exploring raw ZODB data

Christian Reis kiko@async.com.br
Fri, 14 Feb 2003 23:22:19 -0200


--6c2NcOVqGQ03X4Wi
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline


I did a bit of hacking on it to make it easier to follow (for myself),
and added a pprint so debugging is a bit easier when it hits something
it doesn't know. I'm sending it back to you so you can check it out and
tell me what I did wrong.

Take care,
--
Christian Reis, Senior Engineer, Async Open Source, Brazil.
http://async.com.br/~kiko/ | [+55 16] 261 2331 | NMFL

--6c2NcOVqGQ03X4Wi
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=ZODB_direct_load

#!/usr/bin/env python

##  ZODB_direct_load.py
## Copyright 2003 Jeff Sasmor
## 

__doc__ = """
Load an object's data directly from ZODB
given the OPath. Does not actually create objects.

OPath is object path thru containers to actual obj,
with the elements separated with + signs (or in a list).

For example: if there's a container in the root
persistent mapping (the base element of ZODB) and its
name is RootFolder, and there's another container 
in RootFolder named Folder, and within Folder there's
an object named zot.py then the OPath would be:

"RootFolder+Folder+zot.py"

or alteratively

["RootFolder","Folder","zot.py"]


Due to the need to import from the ZODB package,
this module must be placed somewhere where
it is able to do the import. It doesn't use
any other part of the ZODB pkg besides the
FileStorage module.

The storage is opened in read-only mode so theoretically
there should be no possibility of damaging the database.

**********************************************************
License:
This software is free for non-commercial or commercial use, 
subject to the same general terms as the PSF (Python 
Software Foundation), and most specifically the following:
    
1. Jeff Sasmor (JMS) makes this software available on an "AS IS"
basis.  JMS MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, JMS MAKES NO AND
DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THIS SOFTWARE WILL NOT
INFRINGE ANY THIRD PARTY RIGHTS.

2. JMS SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THIS
SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING THIS SOFTWARE,
OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.

This is extracted from the Python 2.2 license. 

In other words, use this software on a copy of your valuable database
and I'm not responsible if anything goes wrong when you use it on
your only copy of a valuable database.
************************************************************
"""

import os
import sys
from cPickle import Unpickler
from cStringIO import StringIO
from types import StringType, TupleType, DictType
from pprint import pprint

from ZODB.FileStorage import FileStorage

debug = 0

def _extractObject(storage,oid):
    """storage is a FileStorage instance
       oid is the object ID
       
       Returns a dictionary with the names of the subobjs and attrs of
       the object named 'objName'. Each dict key is a name of a subobj or
       attr. 
       Dict values for subobjs are tuples:
           (oid,(module info))
       Dict values for attrs are the attr values (string or whatever)
           
       returns None if the object isn't found
    """
    try:
        p, serial = storage.load(oid,'')
    except KeyError:
        return None  #indicate nothing found
    
    file = StringIO(p)
    unpickler = Unpickler(file)
    # this is just a tuple with the module and class, we chuck this out
    unpickler.persistent_load = []
    unpickler.load() 
    objDict = unpickler.noload()
    return objDict

def _traverse(OPath,storage,obj_dict,redir_info=None):
    """traverse thru the OPath to find the obj
       
       returns the object dict ret'd by extract_obj
    """

    # get path list into list fmt if necc
    if type(OPath) == StringType: 
        OPath = OPath.split('+')
    
    if redir_info:
        redir_module,redir_class,redir_attr = redir_info
        initial_obj_dict = obj_dict.copy()
    else:
        redir_module = redir_class = redir_attr = None
   
    for objName in OPath:
        if not hasattr(obj_dict, "keys"):
            print "Warning: I can't traverse %s, aborting" % type(obj_dict)
            break
        keys = obj_dict.keys()
        if debug:
            print "objname = %s, keys = %s" % (objName,keys)

        if objName not in keys:  #this means that the current pos in the path list is found
            return None

        oid,klass = obj_dict[objName]
        obj_dict = _extractObject(storage,oid)
        if not obj_dict: 
            return None

        #check for redirection, if there's real class info
        if not redir_info or not isinstance(klass,TupleType): 
            continue
        module,name = klass    #get mod and name
        if debug:
            print module,name,obj_dict
        #test for mod/name match, attr existence, attr <> None.
        if redir_module == module and redir_class == name and \
           obj_dict.has_key(redir_attr) and obj_dict[redir_attr]:
            #if so, then recurse. Note that original obj dict
            #(the 'root') is used, and that redir params not passed
            #again so there's only one redirection.
            if debug:
                print "redirecting"
            return _traverse(obj_dict[redir_attr],storage,initial_obj_dict)

    return obj_dict

def getObjectDirect(dbFile,OPath,redir_info=None):
    """get an object without actually recreating it.
       dbFile is the name of the file, OPath is a + separated
       path to the object you want (see docstring for module)
       Alternatively, it can be a list of the path elements.
     """    
    #some sanity checks
    if dbFile == None or not isinstance(dbFile,StringType):
        return None
    
    if os.access(dbFile,os.F_OK) == 0:  #no such file
        return None
    
    try:
        #open FileStorage in RO mode
        file_storage = FileStorage(dbFile,read_only = 1) 
    except:
        if file_storage:
            file_storage.close()
        return None
    
    if not file_storage:
        return None

    # the base PersistentMapping is OID zero
    base = _extractObject(file_storage,'\0\0\0\0\0\0\0\0') 

    if not base: 
        # pretty odd if there's no base persistent mapping in the
        # FileStorage!
        file_storage.close()
        return None
        
    # this is a dict with the objects in the base PersistentMapping.
    # Each key is the name of an obj, each value is a tuple with the
    # class info in it.
    obj_dict = base['_container'] 

    #get the object, or None if it does not exist
    retval = _traverse(OPath,file_storage,obj_dict,redir_info) 
    
    file_storage.close()
    
    return retval
 
def usage():
    print "\nUsage:Python ZODB_direct_load dbFile OPath [redir_module,redir_class,redir_attr]"
    print "dbFile = path to ZODB database file"
    print "OPath = + sign separated path to target object"
    print "Optional:"
    print "redir_module = module name with class for an object used as a 'link', if any"
    print "redir_class = class name of link object"
    print "redir_attr = name of attr of class that has actual redirection information\n"
    

# a test that makes sense for PyFrame 
if __name__ == '__main__':
    argLen = len(sys.argv) 
    
    if argLen < 3:
        usage()
        sys.exit(1)

    if argLen > 3 and argLen < 6:
        print "\nError: Need all three redir option\n"
        usage()
        sys.exit(1)

    path = sys.argv[1]
    
    if os.access(path,os.F_OK) == 0:  #no such file
        print "\nError: invalid database file path\n%s" % path
        usage()
        sys.exit(1)
        
    OPath = sys.argv[2]
    if argLen > 3:
        redir_info = (sys.argv[3],sys.argv[4],sys.argv[5])
    else:
        redir_info = None
        
    print "\nfinding object %s" % OPath   
    if redir_info:
        print "Using redirect module:%s, class:%s, attr:%s" % redir_info
    print 

    obj = getObjectDirect(path,OPath,redir_info)
    if not obj:
        print "%s was not found" % OPath
        raise SystemExit

    if not hasattr(obj, "items"):
        pprint(obj)
        raise SystemExit

    for k,v in obj.items():

        normal = 1
        #sloppy but quick...(to code, that is)
        if isinstance(v,StringType):
            from string import printable
            output = ''
            for char in v:
                if char in printable:
                    output = output + char
                else:
                    output = output + " \\x%02X " % ord(char)
                    normal = 0
        if normal:
            output = v

        print k, ':', type(v), '::', output

--6c2NcOVqGQ03X4Wi--