[Zope-dev] delete feature to z3c.extfile

Jayarajan Jn jayarajan at musmo.com
Fri Sep 26 09:50:18 EDT 2008

for the convenience  of  all  separately attaching the diffs


On Fri, Sep 26, 2008 at 7:08 PM, Jayarajan Jn <jayarajan at musmo.com> wrote:

> On Wed, Sep 24, 2008 at 8:58 PM, Jürgen kartnaller <
> juergen.kartnaller at gmail.com> wrote:
>> On Wed, Sep 24, 2008 at 9:10 AM, Jayarajan Jn <jayarajan at musmo.com>wrote:
>>> Hi Jürgen,
>>> First of all thank you for your reply....
>>> hmm you are right too... And i am comfortable with the explicit delete.
>>> But in my prototype for i have also added webDAV interface for managing
>>> extfile.... Then when a user deletes the extfile object through webDAV
>>> interface, the file remains there.... coz i don't know where to put my codes
>>> to call delete() in that case... And in my project there wont be more than
>>> one extfiles refering to same file! so in my case its ok to go for implicit
>>> delete! but its now working!
>> You can never be sure if a file  is only used once. If two user upload the
>> same file only one copy is stored in extfile because both files have the
>> same hash.
> Yea thats right ... we will never know how many extfile objects share the
> file. But there is enough resources to be able to know it.
> We can add a reference counting  functionality to HashDir class. And then
> we can keep track of this.... see my code bellow.... i have created a
> ReferenceCounter and it works fine. I can now implicitly delete with out
> worry.
> But still when i use it in my zope application the '__del__' itself is
> *not* being invoked :?
> plz tell me what you think....
> referencecounter.py:-
> ------------------------------------------------------------------------------------
> import os
> from persistent import Persistent
> class ReferenceCounter(Persistent):
>     """ keeps track number of ExtBytesProperties
>         refering to the a file in HashDir        """
>     def __init__(self, dirpath):
>         self.path = os.path.join(dirpath,'refcount')
>         self.counts={}
>         if os.path.exists(self.path): # else case? we will deal with it in
> commit!
>             f= open(self.path)
>             for line in f:
>                 digest, count=line.split(',')
>                 self.counts[digest]=int(count)
>             f.close()
>     def getCount(self,digest):
>         return self.counts.get(digest,0)
>     def getTotalFiles(self):
>         return len(self.counts)
>     def addReference(self,digest):
>         newCount=self.getCount(digest)+1
>         self.counts[digest]=newCount
>         return newCount
>     def removeReference(self,digest):
>         newCount=self.getCount(digest)-1
>         if newCount > 0:
>             self.counts[digest]=newCount
>         elif newCount == 0:
>             del self.counts[digest]
>         return newCount
>     def commit(self):
>         f=open(self.path,'w')
>         f.write('\n'.join("%s,%s" % (digest,count)
>             for digest,count in self.counts.items()))
>         f.close()
> --------------------------------------------------------------------------------
> hashdir.py
> --------------------------------------------------------------------------------
> import sha
> import os
> import stat
> import tempfile
> import shutil
> from types import StringTypes, UnicodeType
> import interfaces
> from zope import interface
> from persistent import Persistent
> from zope.cachedescriptors.property import Lazy
> from referencecounter import ReferenceCounter
> class HashDir(Persistent):
>     """a directory holding files named after their sha1 hash"""
>     interface.implements(interfaces.IHashDir)
>     _path = None
>     def __init__(self, path=None):
>         self.path = path
>         self.refCounter = ReferenceCounter(self.etc)
>     def _setPath(self, path):
>         if path is None:
>             return
>         self._path = os.path.abspath(path)
>         self.tmp = os.path.join(self.path, 'tmp')
>         self.var = os.path.join(self.path, 'var')
>         self.etc = os.path.join(self.path, 'etc')
>         self._initPaths()
>     def _getPath(self):
>         return self._path
>     path = property(_getPath,_setPath)
>     def _initPaths(self):
>         for path in [self.path,self.var,self.tmp,self.etc]:
>             if not os.path.exists(path):
>                 os.mkdir(path)
>     def new(self):
>         """returns a new filehandle"""
>         handle, path = tempfile.mkstemp(prefix='dirty.',
>                                         dir=self.tmp)
>         return WriteFile(self, handle, path)
>     def commit(self, f):
>         """commit a file, this is called by the file"""
>         digest = f.sha.hexdigest()
>         target = os.path.join(self.var, digest)
>         if os.path.exists(target):
>             # we have that content so just delete the tmp file
>             os.remove(f.path)
>         else:
>             shutil.move(f.path, target)
>             os.chmod(target, 0440)
>         self.refCounter.addReference(digest)
>         self.refCounter.commit()
>         return digest
>     def digests(self):
>         """returns all digests stored"""
>         return os.listdir(self.var)
>     def getPath(self, digest):
>         if type(digest) not in StringTypes or len(digest) != 40:
>             raise ValueError, repr(digest)
>         if type(self.var) is UnicodeType:
>             digest = unicode(digest)
>         path = os.path.join(self.var, digest)
>         if not os.path.isfile(path):
>             raise KeyError, digest
>         return path
>     def getSize(self, digest):
>         return os.path.getsize(self.getPath(digest))
>     def open(self, digest):
>         return ReadFile(self.getPath(digest))
>     def delete(self,digest):
>         """delete the file"""
>         path=self.getPath(digest)
>         if os.path.exists(path):
>             if self.refCounter.removeReference(digest) is 0:
>                 os.remove(path)
>             self.refCounter.commit()
>         return
> class ReadFile(object):
>     """A lazy read file implementation"""
>     interface.implements(interfaces.IReadFile)
>     def __init__(self, name, bufsize=-1):
>         self.name = name
>         self.digest = str(os.path.split(self.name)[1])
>         self.bufsize=bufsize
>         self._v_len = None
>         self._v_file = None
>     @property
>     def _file(self):
>         if not self.closed:
>             return self._v_file
>         self._v_file = file(self.name, 'rb', self.bufsize)
>         return self._v_file
>     @Lazy
>     def ctime(self):
>         return int(os.stat(self.name)[stat.ST_CTIME])
>     @Lazy
>     def atime(self):
>         return int(os.stat(self.name)[stat.ST_ATIME])
>     def __len__(self):
>         if self._v_len is None:
>             self._v_len = int(os.stat(self.name)[stat.ST_SIZE])
>         return self._v_len
>     def __repr__(self):
>         return "<ReadFile named %s>" % repr(self.digest)
>     @property
>     def closed(self):
>         """like file closed, but lazy"""
>         return self._v_file is None or self._v_file.closed
>     def seek(self, offset, whence=0):
>         """see file.seek"""
>         # we optimize when we have 0, 0 then we do not need to open
>         # the file if it is closed, because on the next read we are at
>         # 0
>         if offset==0 and whence==0 and self.closed:
>             return
>         return self._file.seek(offset, whence)
>     def tell(self):
>         """see file.tell"""
>         if self.closed:
>             return 0
>         return self._file.tell()
>     def read(self, size=-1):
>         """see file.read"""
>         return self._file.read(size)
>     def close(self):
>         """see file.close"""
>         if not self.closed:
>             self._v_file.close()
>         self._v_file = None
>     def fileno(self):
>         return self._file.fileno()
>     def __iter__(self):
>         return self._file.__iter__()
> class WriteFile(object):
>     interface.implements(interfaces.IWriteFile)
>     def __init__(self, hd, handle, path):
>         self.hd = hd
>         self.handle = handle
>         self.path = path
>         self.sha = sha.new()
>         self._pos = 0
>     def write(self, s):
>         self.sha.update(s)
>         os.write(self.handle, s)
>         self._pos += len(s)
>     def commit(self):
>         """returns the sha digest and saves the file"""
>         os.close(self.handle)
>         return self.hd.commit(self)
>     def tell(self):
>         """see file.tell"""
>         return self._pos
>     def abort(self):
>         """abort the write and delete file"""
>         os.close(self.handle)
>         os.unlink(self.path)
> --------------------------------------------------------------------------------------
> property.py
> -------------------------------------------------------------------------------------
> from zope import component
> import interfaces
> from cStringIO import StringIO
> from datamanager import getFile, _storage
> _marker = object()
> BLOCK_SIZE = 1024*128
> class ExtBytesProperty(object):
>     """a property which's values are stored as external files"""
>     def __init__(self, name):
>         self.__name = name
>     def __delete__(self,inst):
>         digest = inst.__dict__[self.__name]
>         self.hd.delete(digest)
>     @property
>     def hd(self):
>         return component.getUtility(interfaces.IHashDir)
>     def __get__(self, inst, klass):
>         if inst is None:
>             return self
>         digest = inst.__dict__.get(self.__name, _marker)
>         if digest is _marker:
>             return None
>         return getFile(digest)
>     def __set__(self, inst, value):
>         # ignore if value is None
>         if value is None:
>             if inst.__dict__.has_key(self.__name):
>                 del inst.__dict__[self.__name]
>             return
>         # Handle case when value is a string
>         if isinstance(value, unicode):
>             value = value.encode('UTF-8')
>         if isinstance(value, str):
>             value = StringIO(value)
>         value.seek(0)
>         f = self.hd.new()
>         while True:
>             chunk = value.read(BLOCK_SIZE)
>             if not chunk:
>                 newDigest = f.commit()
>                 oldDigest = inst.__dict__.get(self.__name, _marker)
>                 if newDigest == oldDigest:
>                     # we have no change, so we have to seek to zero
>                     # because this is normal behaviour when setting a
>                     # new value
>                     if hasattr(_storage, 'dataManager'):
>                         if newDigest in _storage.dataManager.files:
>                             f = _storage.dataManager.files[newDigest]
>                             f.seek(0)
>                 else:
>                     inst.__dict__[self.__name] = newDigest
>                 """
>                     case 1: oldDigest=newDigets though the object is still
> refering
>                             to same file f.comit would have incremented the
> reference by one.
>                             So to balance it oldDigest must be deleted.
>                     case 2: object referencing a new file. So old file must
> be deleted
>                 """
>                 if oldDigest is not _marker:
>                     self.hd.delete(oldDigest)
>                 break
>             f.write(chunk)
> --------------------------------------------------------------------------------------
> file/file.py
> --------------------------------------------------------------------------------------
> from persistent import  Persistent
> from z3c.extfile.property import ExtBytesProperty
> from interfaces import IExtFile
> from zope import interface
> class ExtFile(Persistent):
>     """A zope file implementation based on z3c.extfile"""
>     interface.implements(IExtFile)
>     data = ExtBytesProperty('data')
>     def __init__(self, data='', contentType=''):
>         self.data = data
>         self.contentType = contentType
>     def __del__(self):
>         del self.data
>         print "deleted data"
>     def delete(self):
>         del self.data
>         print "deleted data via delete()"
>     def getSize(self):
>         return len(self.data)
> --------------------------------------------------------------------------------------
> rgds,
> jayaraj
>>> But form a general perspective you are correct.... may be HashDir will
>>> have to evolve to add reference counting feture just like python do!
>>> On Wed, Sep 24, 2008 at 11:34 AM, Jürgen kartnaller <
>>> juergen.kartnaller at gmail.com> wrote:
>>>> Hi jayaraj,
>>>> it is definitely not a good idea to implicitily delete files.
>>>> What if two ExtFile objects reference the same file ?
>>>> Jürgen
>>>> On Wed, Sep 24, 2008 at 4:37 AM, Jayarajan Jn <jayarajan at musmo.com>wrote:
>>>>> Hi,
>>>>> i am now doing some prototypes for my projects which will be dealing
>>>>> with tones of files. After a little scouting i decided to try z3c.extfile.
>>>>> Every thing works fine. But i found it strange that there is no delete
>>>>> feature in z3c.extfile. ie, even if i can delete a ExtFile object, the file
>>>>> in the hash directory is not getting deleted! and it keeps on
>>>>> accumulating...
>>>>> So i  thought i 'll add a delete feature... but  my  __del__() approach
>>>>> doesn't  work for me. but i added an additional delete() function too which
>>>>> can be invoked explicitly to delete the file before trying to delete ExtFile
>>>>> object.
>>>>> i made following  changes to the source...
>>>>> inside  z3c.extfile.file.file.ExtFile,
>>>>> ----------------------------------------------------------------------------------------------
>>>>> class ExtFile(Persistent):
>>>>>     """A zope file implementation based on z3c.extfile"""
>>>>>     interface.implements(IExtFile)
>>>>>     data = ExtBytesProperty('data')
>>>>>     def __init__(self, data='', contentType=''):
>>>>>         self.data = data
>>>>>         self.contentType = contentType
>>>>>     # added the following lines#
>>>>> *    def __del__(self):   # <- this is not being invoked when i try to
>>>>> delete an extfile object
>>>>>         del self.data
>>>>>         #print "deleted data via destructor"
>>>>>     def delete(self):     # <- added this to be able to manually able
>>>>> to delete files
>>>>>         del self.data
>>>>>         #print "deleted data via delete()"
>>>>>     # # # # # # #  # # # # # # #  #
>>>>> *
>>>>>     def getSize(self):
>>>>>         return len(self.data)
>>>>> ----------------------------------------------------------------------------------------------
>>>>> and the 'data' is a 'property'  (ExtBytesProperty)
>>>>> so i made following changes to z3c.extfile.property.ExtBytesProperty
>>>>> ----------------------------------------------------------------------------------------------
>>>>> class ExtBytesProperty(object):
>>>>>     """a property which's values are stored as external files"""
>>>>>     def __init__(self, name):
>>>>>         self.__name = name
>>>>>     # added the following lines#
>>>>> *    def __delete__(self,inst):
>>>>>         digest = inst.__dict__[self.__name]
>>>>>         self.hd.delete(digest)
>>>>> *    *# # # # # # #  # # # # # # #  #
>>>>> *    @property
>>>>>     def hd(self):
>>>>>         return component.getUtility(interfaces.IHashDir)
>>>>>     def __get__(self, inst, klass):
>>>>>         if inst is None:
>>>>>             return self
>>>>>         digest = inst.__dict__.get(self.__name, _marker)
>>>>>         if digest is _marker:
>>>>>             return None
>>>>>         return getFile(digest)
>>>>>     def __set__(self, inst, value):
>>>>>         # ignore if value is None
>>>>>         if value is None:
>>>>>             if inst.__dict__.has_key(self.__name):
>>>>>                 del inst.__dict__[self.__name]
>>>>>             return
>>>>>         # Handle case when value is a string
>>>>>         if isinstance(value, unicode):
>>>>>             value = value.encode('UTF-8')
>>>>>         if isinstance(value, str):
>>>>>             value = StringIO(value)
>>>>>         value.seek(0)
>>>>>         f = self.hd.new()
>>>>>         while True:
>>>>>             chunk = value.read(BLOCK_SIZE)
>>>>>             if not chunk:
>>>>>                 newDigest = f.commit()
>>>>>                 oldDigest = inst.__dict__.get(self.__name, _marker)
>>>>>                 if newDigest == oldDigest:
>>>>>                     # we have no change, so we have to seek to zero
>>>>>                     # because this is normal behaviour when setting a
>>>>>                     # new value
>>>>>                     if hasattr(_storage, 'dataManager'):
>>>>>                         if newDigest in _storage.dataManager.files:
>>>>>                             f = _storage.dataManager.files[newDigest]
>>>>>                             f.seek(0)
>>>>>                 else:
>>>>>                     inst.__dict__[self.__name] = newDigest
>>>>>                 break
>>>>>             f.write(chunk)
>>>>> ----------------------------------------------------------------------------------------------------
>>>>> and at last added the real code which delete the file in hash directory
>>>>> too
>>>>> i added following codes inside z3c.extfile.hashdir.HashDir class
>>>>> ---------------------------------------------------
>>>>> def delete(self,digest):
>>>>>         """delete the file"""
>>>>>         path=self.getPath(digest)
>>>>>         if os.path.exists(path):
>>>>>             os.remove(path)
>>>>>         return
>>>>> ----------------------------------------------------
>>>>> Now, everything works fine when i try to delete an ExtFile object in
>>>>> ZODB, __del__() is not being invoked!!!!!
>>>>> can anyone tell me how can i fix this???
>>>>> thanks in advance
>>>>> jayaraj
