[Zope-dev] delete feature to z3c.extfile

Jayarajan Jn jayarajan at musmo.com
Fri Sep 26 09:38:04 EDT 2008


On Wed, Sep 24, 2008 at 8:58 PM, Jürgen kartnaller <
juergen.kartnaller at gmail.com> wrote:

>
>
> On Wed, Sep 24, 2008 at 9:10 AM, Jayarajan Jn <jayarajan at musmo.com> wrote:
>
>> Hi Jürgen,
>>
>> First of all thank you for your reply....
>> hmm you are right too... And i am comfortable with the explicit delete.
>> But in my prototype for i have also added webDAV interface for managing
>> extfile.... Then when a user deletes the extfile object through webDAV
>> interface, the file remains there.... coz i don't know where to put my codes
>> to call delete() in that case... And in my project there wont be more than
>> one extfiles refering to same file! so in my case its ok to go for implicit
>> delete! but its now working!
>>
>
> You can never be sure if a file  is only used once. If two user upload the
> same file only one copy is stored in extfile because both files have the
> same hash.
>

Yea thats right ... we will never know how many extfile objects share the
file. But there is enough resources to be able to know it.

We can add a reference counting  functionality to HashDir class. And then we
can keep track of this.... see my code bellow.... i have created a
ReferenceCounter and it works fine. I can now implicitly delete with out
worry.


But still when i use it in my zope application the '__del__' itself is *not*
being invoked :?

plz tell me what you think....

referencecounter.py:-
------------------------------------------------------------------------------------
import os
from persistent import Persistent

class ReferenceCounter(Persistent):
    """ keeps track number of ExtBytesProperties
        refering to the a file in HashDir        """

    def __init__(self, dirpath):
        self.path = os.path.join(dirpath,'refcount')
        self.counts={}
        if os.path.exists(self.path): # else case? we will deal with it in
commit!
            f= open(self.path)
            for line in f:
                digest, count=line.split(',')
                self.counts[digest]=int(count)
            f.close()

    def getCount(self,digest):
        return self.counts.get(digest,0)

    def getTotalFiles(self):
        return len(self.counts)

    def addReference(self,digest):
        newCount=self.getCount(digest)+1
        self.counts[digest]=newCount
        return newCount

    def removeReference(self,digest):
        newCount=self.getCount(digest)-1
        if newCount > 0:
            self.counts[digest]=newCount
        elif newCount == 0:
            del self.counts[digest]
        return newCount

    def commit(self):
        f=open(self.path,'w')
        f.write('\n'.join("%s,%s" % (digest,count)
            for digest,count in self.counts.items()))
        f.close()
--------------------------------------------------------------------------------
hashdir.py
--------------------------------------------------------------------------------
import sha
import os
import stat
import tempfile
import shutil
from types import StringTypes, UnicodeType
import interfaces
from zope import interface
from persistent import Persistent
from zope.cachedescriptors.property import Lazy
from referencecounter import ReferenceCounter

class HashDir(Persistent):

    """a directory holding files named after their sha1 hash"""

    interface.implements(interfaces.IHashDir)
    _path = None

    def __init__(self, path=None):
        self.path = path
        self.refCounter = ReferenceCounter(self.etc)

    def _setPath(self, path):
        if path is None:
            return
        self._path = os.path.abspath(path)
        self.tmp = os.path.join(self.path, 'tmp')
        self.var = os.path.join(self.path, 'var')
        self.etc = os.path.join(self.path, 'etc')
        self._initPaths()

    def _getPath(self):
        return self._path

    path = property(_getPath,_setPath)

    def _initPaths(self):
        for path in [self.path,self.var,self.tmp,self.etc]:
            if not os.path.exists(path):
                os.mkdir(path)

    def new(self):
        """returns a new filehandle"""
        handle, path = tempfile.mkstemp(prefix='dirty.',
                                        dir=self.tmp)
        return WriteFile(self, handle, path)

    def commit(self, f):
        """commit a file, this is called by the file"""
        digest = f.sha.hexdigest()
        target = os.path.join(self.var, digest)
        if os.path.exists(target):
            # we have that content so just delete the tmp file
            os.remove(f.path)
        else:
            shutil.move(f.path, target)
            os.chmod(target, 0440)
        self.refCounter.addReference(digest)
        self.refCounter.commit()
        return digest

    def digests(self):
        """returns all digests stored"""
        return os.listdir(self.var)

    def getPath(self, digest):
        if type(digest) not in StringTypes or len(digest) != 40:
            raise ValueError, repr(digest)
        if type(self.var) is UnicodeType:
            digest = unicode(digest)
        path = os.path.join(self.var, digest)
        if not os.path.isfile(path):
            raise KeyError, digest
        return path

    def getSize(self, digest):
        return os.path.getsize(self.getPath(digest))

    def open(self, digest):
        return ReadFile(self.getPath(digest))

    def delete(self,digest):
        """delete the file"""
        path=self.getPath(digest)
        if os.path.exists(path):
            if self.refCounter.removeReference(digest) is 0:
                os.remove(path)
            self.refCounter.commit()
        return


class ReadFile(object):

    """A lazy read file implementation"""

    interface.implements(interfaces.IReadFile)

    def __init__(self, name, bufsize=-1):
        self.name = name
        self.digest = str(os.path.split(self.name)[1])
        self.bufsize=bufsize
        self._v_len = None
        self._v_file = None

    @property
    def _file(self):
        if not self.closed:
            return self._v_file
        self._v_file = file(self.name, 'rb', self.bufsize)
        return self._v_file

    @Lazy
    def ctime(self):
        return int(os.stat(self.name)[stat.ST_CTIME])

    @Lazy
    def atime(self):
        return int(os.stat(self.name)[stat.ST_ATIME])

    def __len__(self):
        if self._v_len is None:
            self._v_len = int(os.stat(self.name)[stat.ST_SIZE])
        return self._v_len

    def __repr__(self):
        return "<ReadFile named %s>" % repr(self.digest)

    @property
    def closed(self):
        """like file closed, but lazy"""
        return self._v_file is None or self._v_file.closed

    def seek(self, offset, whence=0):
        """see file.seek"""
        # we optimize when we have 0, 0 then we do not need to open
        # the file if it is closed, because on the next read we are at
        # 0
        if offset==0 and whence==0 and self.closed:
            return
        return self._file.seek(offset, whence)

    def tell(self):
        """see file.tell"""
        if self.closed:
            return 0
        return self._file.tell()

    def read(self, size=-1):
        """see file.read"""
        return self._file.read(size)

    def close(self):
        """see file.close"""
        if not self.closed:
            self._v_file.close()
        self._v_file = None

    def fileno(self):
        return self._file.fileno()

    def __iter__(self):
        return self._file.__iter__()


class WriteFile(object):

    interface.implements(interfaces.IWriteFile)

    def __init__(self, hd, handle, path):
        self.hd = hd
        self.handle = handle
        self.path = path
        self.sha = sha.new()
        self._pos = 0

    def write(self, s):
        self.sha.update(s)
        os.write(self.handle, s)
        self._pos += len(s)

    def commit(self):
        """returns the sha digest and saves the file"""
        os.close(self.handle)
        return self.hd.commit(self)

    def tell(self):
        """see file.tell"""
        return self._pos

    def abort(self):
        """abort the write and delete file"""
        os.close(self.handle)
        os.unlink(self.path)
--------------------------------------------------------------------------------------
property.py
-------------------------------------------------------------------------------------
from zope import component
import interfaces
from cStringIO import StringIO

from datamanager import getFile, _storage

_marker = object()

BLOCK_SIZE = 1024*128

class ExtBytesProperty(object):

    """a property which's values are stored as external files"""

    def __init__(self, name):
        self.__name = name

    def __delete__(self,inst):
        digest = inst.__dict__[self.__name]
        self.hd.delete(digest)

    @property
    def hd(self):
        return component.getUtility(interfaces.IHashDir)

    def __get__(self, inst, klass):

        if inst is None:
            return self
        digest = inst.__dict__.get(self.__name, _marker)
        if digest is _marker:
            return None
        return getFile(digest)

    def __set__(self, inst, value):
        # ignore if value is None
        if value is None:
            if inst.__dict__.has_key(self.__name):
                del inst.__dict__[self.__name]
            return
        # Handle case when value is a string
        if isinstance(value, unicode):
            value = value.encode('UTF-8')
        if isinstance(value, str):
            value = StringIO(value)
        value.seek(0)
        f = self.hd.new()
        while True:
            chunk = value.read(BLOCK_SIZE)
            if not chunk:
                newDigest = f.commit()
                oldDigest = inst.__dict__.get(self.__name, _marker)
                if newDigest == oldDigest:
                    # we have no change, so we have to seek to zero
                    # because this is normal behaviour when setting a
                    # new value
                    if hasattr(_storage, 'dataManager'):
                        if newDigest in _storage.dataManager.files:
                            f = _storage.dataManager.files[newDigest]
                            f.seek(0)
                else:
                    inst.__dict__[self.__name] = newDigest
                """
                    case 1: oldDigest=newDigets though the object is still
refering
                            to same file f.comit would have incremented the
reference by one.
                            So to balance it oldDigest must be deleted.
                    case 2: object referencing a new file. So old file must
be deleted
                """
                if oldDigest is not _marker:
                    self.hd.delete(oldDigest)

                break
            f.write(chunk)

--------------------------------------------------------------------------------------
file/file.py
--------------------------------------------------------------------------------------
from persistent import  Persistent
from z3c.extfile.property import ExtBytesProperty
from interfaces import IExtFile
from zope import interface

class ExtFile(Persistent):

    """A zope file implementation based on z3c.extfile"""

    interface.implements(IExtFile)
    data = ExtBytesProperty('data')

    def __init__(self, data='', contentType=''):
        self.data = data
        self.contentType = contentType

    def __del__(self):
        del self.data
        print "deleted data"


    def delete(self):

        del self.data
        print "deleted data via delete()"

    def getSize(self):
        return len(self.data)
--------------------------------------------------------------------------------------

rgds,

jayaraj



>>
>> But form a general perspective you are correct.... may be HashDir will
>> have to evolve to add reference counting feture just like python do!
>>
>> On Wed, Sep 24, 2008 at 11:34 AM, Jürgen kartnaller <
>> juergen.kartnaller at gmail.com> wrote:
>>
>>> Hi jayaraj,
>>> it is definitely not a good idea to implicitily delete files.
>>>
>>> What if two ExtFile objects reference the same file ?
>>>
>>> Jürgen
>>>
>>> On Wed, Sep 24, 2008 at 4:37 AM, Jayarajan Jn <jayarajan at musmo.com>wrote:
>>>
>>>> Hi,
>>>> i am now doing some prototypes for my projects which will be dealing
>>>> with tones of files. After a little scouting i decided to try z3c.extfile.
>>>> Every thing works fine. But i found it strange that there is no delete
>>>> feature in z3c.extfile. ie, even if i can delete a ExtFile object, the file
>>>> in the hash directory is not getting deleted! and it keeps on
>>>> accumulating...
>>>>
>>>> So i  thought i 'll add a delete feature... but  my  __del__() approach
>>>> doesn't  work for me. but i added an additional delete() function too which
>>>> can be invoked explicitly to delete the file before trying to delete ExtFile
>>>> object.
>>>>
>>>> i made following  changes to the source...
>>>> inside  z3c.extfile.file.file.ExtFile,
>>>>
>>>>
>>>> ----------------------------------------------------------------------------------------------
>>>> class ExtFile(Persistent):
>>>>
>>>>     """A zope file implementation based on z3c.extfile"""
>>>>
>>>>     interface.implements(IExtFile)
>>>>     data = ExtBytesProperty('data')
>>>>
>>>>     def __init__(self, data='', contentType=''):
>>>>         self.data = data
>>>>         self.contentType = contentType
>>>>
>>>>     # added the following lines#
>>>>
>>>> *    def __del__(self):   # <- this is not being invoked when i try to
>>>> delete an extfile object
>>>>         del self.data
>>>>         #print "deleted data via destructor"
>>>>
>>>>     def delete(self):     # <- added this to be able to manually able to
>>>> delete files
>>>>         del self.data
>>>>         #print "deleted data via delete()"
>>>>
>>>>     # # # # # # #  # # # # # # #  #
>>>> *
>>>>     def getSize(self):
>>>>         return len(self.data)
>>>>
>>>>
>>>> ----------------------------------------------------------------------------------------------
>>>>
>>>> and the 'data' is a 'property'  (ExtBytesProperty)
>>>> so i made following changes to z3c.extfile.property.ExtBytesProperty
>>>>
>>>>
>>>> ----------------------------------------------------------------------------------------------
>>>> class ExtBytesProperty(object):
>>>>
>>>>     """a property which's values are stored as external files"""
>>>>
>>>>     def __init__(self, name):
>>>>         self.__name = name
>>>>
>>>>
>>>>
>>>>     # added the following lines#
>>>>
>>>> *    def __delete__(self,inst):
>>>>         digest = inst.__dict__[self.__name]
>>>>         self.hd.delete(digest)
>>>>
>>>> *    *# # # # # # #  # # # # # # #  #
>>>>
>>>>
>>>>
>>>> *    @property
>>>>     def hd(self):
>>>>         return component.getUtility(interfaces.IHashDir)
>>>>
>>>>     def __get__(self, inst, klass):
>>>>
>>>>         if inst is None:
>>>>             return self
>>>>         digest = inst.__dict__.get(self.__name, _marker)
>>>>         if digest is _marker:
>>>>             return None
>>>>         return getFile(digest)
>>>>
>>>>     def __set__(self, inst, value):
>>>>         # ignore if value is None
>>>>         if value is None:
>>>>             if inst.__dict__.has_key(self.__name):
>>>>                 del inst.__dict__[self.__name]
>>>>             return
>>>>         # Handle case when value is a string
>>>>         if isinstance(value, unicode):
>>>>             value = value.encode('UTF-8')
>>>>         if isinstance(value, str):
>>>>             value = StringIO(value)
>>>>         value.seek(0)
>>>>         f = self.hd.new()
>>>>         while True:
>>>>             chunk = value.read(BLOCK_SIZE)
>>>>             if not chunk:
>>>>                 newDigest = f.commit()
>>>>                 oldDigest = inst.__dict__.get(self.__name, _marker)
>>>>                 if newDigest == oldDigest:
>>>>                     # we have no change, so we have to seek to zero
>>>>                     # because this is normal behaviour when setting a
>>>>                     # new value
>>>>                     if hasattr(_storage, 'dataManager'):
>>>>                         if newDigest in _storage.dataManager.files:
>>>>                             f = _storage.dataManager.files[newDigest]
>>>>                             f.seek(0)
>>>>                 else:
>>>>                     inst.__dict__[self.__name] = newDigest
>>>>                 break
>>>>             f.write(chunk)
>>>>
>>>> ----------------------------------------------------------------------------------------------------
>>>>
>>>> and at last added the real code which delete the file in hash directory
>>>> too
>>>>
>>>> i added following codes inside z3c.extfile.hashdir.HashDir class
>>>> ---------------------------------------------------
>>>> def delete(self,digest):
>>>>         """delete the file"""
>>>>         path=self.getPath(digest)
>>>>         if os.path.exists(path):
>>>>             os.remove(path)
>>>>         return
>>>> ----------------------------------------------------
>>>>
>>>> Now, everything works fine when i try to delete an ExtFile object in
>>>> ZODB, __del__() is not being invoked!!!!!
>>>>
>>>> can anyone tell me how can i fix this???
>>>>
>>>> thanks in advance
>>>>
>>>> jayaraj
>>>>
>>>> _______________________________________________
>>>> Zope-Dev maillist  -  Zope-Dev at zope.org
>>>> http://mail.zope.org/mailman/listinfo/zope-dev
>>>> **  No cross posts or HTML encoding!  **
>>>> (Related lists -
>>>>  http://mail.zope.org/mailman/listinfo/zope-announce
>>>>  http://mail.zope.org/mailman/listinfo/zope )
>>>>
>>>>
>>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://mail.zope.org/pipermail/zope-dev/attachments/20080926/c1e8a949/attachment-0001.html 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: z3c.extfile-diffs.tar.gz
Type: application/x-gzip
Size: 1303 bytes
Desc: not available
Url : http://mail.zope.org/pipermail/zope-dev/attachments/20080926/c1e8a949/attachment-0002.gz 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: z3c.extfile-modified.tar.gz
Type: application/x-gzip
Size: 21492 bytes
Desc: not available
Url : http://mail.zope.org/pipermail/zope-dev/attachments/20080926/c1e8a949/attachment-0003.gz 


More information about the Zope-Dev mailing list