[Checkins] SVN: z3c.filetype/ initial import of z3c.filetype

Bernd Dorn bernd.dorn at fhv.at
Fri Aug 11 09:56:25 EDT 2006


Log message for revision 69401:
  initial import of z3c.filetype
  

Changed:
  A   z3c.filetype/
  A   z3c.filetype/trunk/
  A   z3c.filetype/trunk/LICENSES.txt
  A   z3c.filetype/trunk/src/
  A   z3c.filetype/trunk/src/z3c/
  A   z3c.filetype/trunk/src/z3c/__init__.py
  A   z3c.filetype/trunk/src/z3c/filetype/
  A   z3c.filetype/trunk/src/z3c/filetype/README.txt
  A   z3c.filetype/trunk/src/z3c/filetype/__init__.py
  A   z3c.filetype/trunk/src/z3c/filetype/api.py
  A   z3c.filetype/trunk/src/z3c/filetype/guess.py
  A   z3c.filetype/trunk/src/z3c/filetype/interfaces/
  A   z3c.filetype/trunk/src/z3c/filetype/interfaces/__init__.py
  A   z3c.filetype/trunk/src/z3c/filetype/interfaces/filetypes.py
  A   z3c.filetype/trunk/src/z3c/filetype/magic.mime
  A   z3c.filetype/trunk/src/z3c/filetype/magic.py
  A   z3c.filetype/trunk/src/z3c/filetype/magic.txt
  A   z3c.filetype/trunk/src/z3c/filetype/testdata/
  A   z3c.filetype/trunk/src/z3c/filetype/testdata/DS_Store
  A   z3c.filetype/trunk/src/z3c/filetype/testdata/jumps.mov
  A   z3c.filetype/trunk/src/z3c/filetype/testdata/logo.gif
  A   z3c.filetype/trunk/src/z3c/filetype/testdata/test.flv
  A   z3c.filetype/trunk/src/z3c/filetype/testdata/test.gnutar
  A   z3c.filetype/trunk/src/z3c/filetype/testdata/test.html
  A   z3c.filetype/trunk/src/z3c/filetype/testdata/test.png
  A   z3c.filetype/trunk/src/z3c/filetype/testdata/test.tar
  A   z3c.filetype/trunk/src/z3c/filetype/testdata/test.tgz
  A   z3c.filetype/trunk/src/z3c/filetype/testdata/test.txt.gz
  A   z3c.filetype/trunk/src/z3c/filetype/testdata/test2.html
  A   z3c.filetype/trunk/src/z3c/filetype/testdata/test2.thml
  A   z3c.filetype/trunk/src/z3c/filetype/testdata/thumbnailImage_small.jpeg
  A   z3c.filetype/trunk/src/z3c/filetype/tests.py

-=-
Added: z3c.filetype/trunk/LICENSES.txt
===================================================================
--- z3c.filetype/trunk/LICENSES.txt	2006-08-11 12:55:34 UTC (rev 69400)
+++ z3c.filetype/trunk/LICENSES.txt	2006-08-11 13:56:21 UTC (rev 69401)
@@ -0,0 +1,18 @@
+========
+LICENCES
+========
+
+z3c.filetype.magic
+==================
+
+Unknown author without a license as stated in the source:
+
+# Found on a russian zope mailing list, and modified to fix bugs in parsing
+# the magic file and string making
+# -- Daniel Berlin <dberlin at dberlin.org>
+
+magic.mime file
+===============
+
+See attributions in file.
+


Property changes on: z3c.filetype/trunk/LICENSES.txt
___________________________________________________________________
Name: svn:eol-style
   + native

Added: z3c.filetype/trunk/src/z3c/__init__.py
===================================================================
--- z3c.filetype/trunk/src/z3c/__init__.py	2006-08-11 12:55:34 UTC (rev 69400)
+++ z3c.filetype/trunk/src/z3c/__init__.py	2006-08-11 13:56:21 UTC (rev 69401)
@@ -0,0 +1,23 @@
+##############################################################################
+#
+# Copyright (c) 2004 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+#
+# This file is necessary to make this directory a package.
+
+try:
+    # Declare this a namespace package if pkg_resources is available.
+    import pkg_resources
+    pkg_resources.declare_namespace('z3c')
+except ImportError:
+    pass
+


Property changes on: z3c.filetype/trunk/src/z3c/__init__.py
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native

Added: z3c.filetype/trunk/src/z3c/filetype/README.txt
===================================================================
--- z3c.filetype/trunk/src/z3c/filetype/README.txt	2006-08-11 12:55:34 UTC (rev 69400)
+++ z3c.filetype/trunk/src/z3c/filetype/README.txt	2006-08-11 13:56:21 UTC (rev 69401)
@@ -0,0 +1,76 @@
+================
+Filetype Package
+================
+
+This package provides a way to get interfaces that are provided based
+on their content, filename or mime-type.
+
+  >>> from z3c.filetype import api
+
+We take some files for demonstration from the testdata directory.
+
+  >>> import os
+  >>> testData = os.path.join(os.path.dirname(api.__file__),'testdata')
+
+  >>> fileNames = sorted(os.listdir(testData))
+
+  >>> for name in fileNames:
+  ...     path = os.path.join(testData, name)
+  ...     i =  api.getInterfacesFor(file(path))
+  ...     print name
+  ...     print i
+  DS_Store
+  set([])
+  jumps.mov
+  set([<InterfaceClass z3c.filetype.interfaces.filetypes.IVideoFile>])
+  logo.gif
+  set([<InterfaceClass z3c.filetype.interfaces.filetypes.IGIFFile>])
+  test.flv
+  set([<InterfaceClass z3c.filetype.interfaces.filetypes.IFLVFile>])
+  test.gnutar
+  set([<InterfaceClass z3c.filetype.interfaces.filetypes.ITARFile>])
+  test.html
+  set([<InterfaceClass z3c.filetype.interfaces.filetypes.IHTMLFile>])
+  test.png
+  set([<InterfaceClass z3c.filetype.interfaces.filetypes.IPNGFile>])
+  test.tar
+  set([<InterfaceClass z3c.filetype.interfaces.filetypes.ITARFile>])
+  test.tgz
+  set([<InterfaceClass z3c.filetype.interfaces.filetypes.IGZIPFile>])
+  test.txt.gz
+  set([<InterfaceClass z3c.filetype.interfaces.filetypes.IGZIPFile>])
+  test2.html
+  set([<InterfaceClass z3c.filetype.interfaces.filetypes.IHTMLFile>])
+  test2.thml
+  set([<InterfaceClass z3c.filetype.interfaces.filetypes.IHTMLFile>])
+  thumbnailImage_small.jpeg
+  set([<InterfaceClass z3c.filetype.interfaces.filetypes.IJPGFile>])
+
+The filename is only used if no interface is found, because we should
+not trust the filename in most cases.
+
+  >>> f = open(os.path.join(testData, 'test.tar'))
+  >>> sorted(api.getInterfacesFor(f))
+  [<InterfaceClass z3c.filetype.interfaces.filetypes.ITARFile>]
+
+  >>> sorted(api.getInterfacesFor(filename="x.png"))
+  [<InterfaceClass z3c.filetype.interfaces.filetypes.IPNGFile>]
+
+  >>> sorted(api.getInterfacesFor(f, filename="x.png"))
+  [<InterfaceClass z3c.filetype.interfaces.filetypes.ITARFile>]
+
+
+If a mimeType is given then the interfaces derived from it is added to
+the result, regardless if the content of the file tells something
+different.
+
+  >>> sorted(api.getInterfacesFor(f, mimeType="text/plain"))
+  [<InterfaceClass z3c.filetype.interfaces.filetypes.ITARFile>,
+   <InterfaceClass z3c.filetype.interfaces.filetypes.ITextFile>]
+
+You can also provide a path instead of a stream.
+
+  >>> f.name
+  '/.../z3c/filetype/testdata/test.tar'
+  >>> sorted(api.getInterfacesFor(f.name))
+  [<InterfaceClass z3c.filetype.interfaces.filetypes.ITARFile>]


Property changes on: z3c.filetype/trunk/src/z3c/filetype/README.txt
___________________________________________________________________
Name: svn:eol-style
   + native

Added: z3c.filetype/trunk/src/z3c/filetype/__init__.py
===================================================================
--- z3c.filetype/trunk/src/z3c/filetype/__init__.py	2006-08-11 12:55:34 UTC (rev 69400)
+++ z3c.filetype/trunk/src/z3c/filetype/__init__.py	2006-08-11 13:56:21 UTC (rev 69401)
@@ -0,0 +1 @@
+#package


Property changes on: z3c.filetype/trunk/src/z3c/filetype/__init__.py
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native

Added: z3c.filetype/trunk/src/z3c/filetype/api.py
===================================================================
--- z3c.filetype/trunk/src/z3c/filetype/api.py	2006-08-11 12:55:34 UTC (rev 69400)
+++ z3c.filetype/trunk/src/z3c/filetype/api.py	2006-08-11 13:56:21 UTC (rev 69401)
@@ -0,0 +1,86 @@
+from z3c.filetype import magic
+from interfaces import filetypes
+from zope.contenttype import guess_content_type
+from zope import interface
+
+magicFile = magic.MagicFile()
+
+def byMimeType(t):
+
+    """returns interfaces implemented by mimeType"""
+    
+    ifaces = [iface for name, iface in vars(filetypes).items() \
+              if name.startswith("I")]
+    res = InterfaceSet()
+    for iface in ifaces:
+        mtm = iface.queryTaggedValue(filetypes.MTM)
+        if mtm is not None:
+            if mtm.match(t):
+               res.add(iface)
+    return res
+
+def getInterfacesFor(file=None, filename=None, mimeType=None):
+
+    """returns a sequence of interfaces that are provided by file like
+    objects (file argument) with an optional filename as name or
+    mimeType as mime-type
+    """
+    
+    ifaces = set()
+    if file is not None:
+        t = magicFile.detect(file)
+        if t is not None:
+            ifaces.update(byMimeType(t))
+    if mimeType is not None:
+        ifaces.update(byMimeType(mimeType))
+    if filename is not None and not ifaces:
+        t = guess_content_type(filename)[0]
+        # dont trust this here because zope does not recognize some
+        # binary files.
+        if t and not t == 'text/x-unknown-content-type':
+            ifaces.update(byMimeType(t))
+    return ifaces
+
+
+class InterfaceSet(object):
+
+    """a set that only holds most specific interfaces
+
+    >>> s = InterfaceSet(filetypes.IBinaryFile, filetypes.IImageFile)
+    >>> sorted(s)
+    [<InterfaceClass z3c.filetype.interfaces.filetypes.IBinaryFile>,
+     <InterfaceClass z3c.filetype.interfaces.filetypes.IImageFile>]
+
+    Now we add a jpeg file which is a subclass of all ifaces in the set
+    >>> s.add(filetypes.IJPGFile)
+    >>> sorted(s)
+    [<InterfaceClass z3c.filetype.interfaces.filetypes.IJPGFile>]
+
+    If we add a new which is not a subclass it is contained
+    >>> s.add(filetypes.ITextFile)
+    >>> sorted(s)
+    [<InterfaceClass z3c.filetype.interfaces.filetypes.IJPGFile>,
+     <InterfaceClass z3c.filetype.interfaces.filetypes.ITextFile>]
+
+    """
+
+    def __init__(self, *ifaces):
+        self._data = set()
+        for iface in ifaces:
+            self.add(iface)
+            
+    def add(self, iface):
+        assert(issubclass(iface, interface.Interface))
+        toDelete = set()
+        for i in self._data:
+            if issubclass(i,iface):
+                return
+            if issubclass(iface, i):
+                toDelete.add(i)
+        self._data.add(iface)
+        self._data.difference_update(toDelete)
+        
+    def __iter__(self):
+
+        return iter(self._data)
+


Property changes on: z3c.filetype/trunk/src/z3c/filetype/api.py
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native

Added: z3c.filetype/trunk/src/z3c/filetype/guess.py
===================================================================
--- z3c.filetype/trunk/src/z3c/filetype/guess.py	2006-08-11 12:55:34 UTC (rev 69400)
+++ z3c.filetype/trunk/src/z3c/filetype/guess.py	2006-08-11 13:56:21 UTC (rev 69401)
@@ -0,0 +1,9 @@
+
+def getInterfacesFor(data=None, filename=None, mimeType=None):
+
+    """returns a sequence of interfaces that are provided by files
+    having data as content, filename as name and mimeType as
+    mime-type
+    """
+    raise NotImplementedError
+


Property changes on: z3c.filetype/trunk/src/z3c/filetype/guess.py
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native

Added: z3c.filetype/trunk/src/z3c/filetype/interfaces/__init__.py
===================================================================
--- z3c.filetype/trunk/src/z3c/filetype/interfaces/__init__.py	2006-08-11 12:55:34 UTC (rev 69400)
+++ z3c.filetype/trunk/src/z3c/filetype/interfaces/__init__.py	2006-08-11 13:56:21 UTC (rev 69401)
@@ -0,0 +1 @@
+# interaces package


Property changes on: z3c.filetype/trunk/src/z3c/filetype/interfaces/__init__.py
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native

Added: z3c.filetype/trunk/src/z3c/filetype/interfaces/filetypes.py
===================================================================
--- z3c.filetype/trunk/src/z3c/filetype/interfaces/filetypes.py	2006-08-11 12:55:34 UTC (rev 69400)
+++ z3c.filetype/trunk/src/z3c/filetype/interfaces/filetypes.py	2006-08-11 13:56:21 UTC (rev 69401)
@@ -0,0 +1,75 @@
+from zope import interface
+import re
+
+# mimeTypeMatch holds regular expression for matching mime-types
+MTM = 'mimeTypesMatch'
+# mimeType holds the preferred mime type to be returned for this
+# interface
+MT = "mimeType"
+
+class ITypedFile(interface.Interface):
+    """A Type of a file"""
+
+class IBinaryFile(ITypedFile):
+    """Binary file"""
+IBinaryFile.setTaggedValue(MTM,re.compile('application/octet-stream'))
+IBinaryFile.setTaggedValue(MT,'application/octet-stream')
+
+class ITARFile(ITypedFile):
+    """Binary file"""
+ITARFile.setTaggedValue(MTM,re.compile('application/x-tar'))
+ITARFile.setTaggedValue(MT,'application/x-tar')
+
+class IGZIPFile(ITypedFile):
+    """Binary file"""
+IGZIPFile.setTaggedValue(MTM,re.compile('application/x-gzip'))
+IGZIPFile.setTaggedValue(MT,'application/x-gzip')
+
+class ITextFile(ITypedFile):
+    """text files"""
+ITextFile.setTaggedValue(MTM,re.compile('^text/.+$'))
+ITextFile.setTaggedValue(MT,'text/plain')
+
+class IImageFile(ITypedFile):
+    """image files"""
+IImageFile.setTaggedValue(MTM,re.compile('^image/.+$'))
+
+class IJPGFile(IImageFile, IBinaryFile):
+    """jpeg file"""
+IJPGFile.setTaggedValue(MTM,re.compile('image/jpe?g'))
+IJPGFile.setTaggedValue(MT,'image/jpeg')
+
+class IPNGFile(IImageFile, IBinaryFile):
+    """png file"""
+IPNGFile.setTaggedValue(MTM,re.compile('image/png'))
+IPNGFile.setTaggedValue(MT,'image/png')
+
+class IGIFFile(IImageFile, IBinaryFile):
+    """gif file"""
+IGIFFile.setTaggedValue(MTM,re.compile('image/gif'))
+IGIFFile.setTaggedValue(MT,'image/gif')
+
+class IVideoFile(ITypedFile):
+    """video file"""
+IVideoFile.setTaggedValue(MTM,re.compile('^video/.+$'))
+
+class IFLVFile(IVideoFile, IBinaryFile):
+    """Macromedia Flash FLV Video File Format"""
+IFLVFile.setTaggedValue(MTM,re.compile('video/x-flv'))
+IFLVFile.setTaggedValue(MT,'video/x-flv')
+
+class IAudioFile(ITypedFile):
+    """audio file"""
+IAudioFile.setTaggedValue(MTM,re.compile('^audio/.+$'))
+
+class IHTMLFile(ITextFile):
+    """HTML file"""
+IHTMLFile.setTaggedValue(MTM,re.compile('text/html'))
+IHTMLFile.setTaggedValue(MT,'text/html')
+
+class IXMLFile(ITextFile):
+    """XML File"""
+IXMLFile.setTaggedValue(MTM,re.compile('text/xml'))
+IXMLFile.setTaggedValue(MT,'text/xml')
+
+


Property changes on: z3c.filetype/trunk/src/z3c/filetype/interfaces/filetypes.py
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native

Added: z3c.filetype/trunk/src/z3c/filetype/magic.mime
===================================================================
--- z3c.filetype/trunk/src/z3c/filetype/magic.mime	2006-08-11 12:55:34 UTC (rev 69400)
+++ z3c.filetype/trunk/src/z3c/filetype/magic.mime	2006-08-11 13:56:21 UTC (rev 69401)
@@ -0,0 +1,916 @@
+# Magic data for KMimeMagic (originally for file(1) command)
+#
+# The format is 4-5 columns:
+#    Column #1: byte number to begin checking from, ">" indicates continuation
+#    Column #2: type of data to match
+#    Column #3: contents of data to match
+#    Column #4: MIME type of result
+#    Column #5: MIME encoding of result (optional)
+
+#------------------------------------------------------------------------------
+# Localstuff:  file(1) magic for locally observed files
+# Add any locally observed files here.
+
+# Real Audio (Magic .ra\0375)
+0	belong		0x2e7261fd	audio/x-pn-realaudio
+0	string		.RMF		application/vnd.rn-realmedia
+
+#video/x-pn-realvideo
+#video/vnd.rn-realvideo
+#application/vnd.rn-realmedia
+#	sigh, there are many mimes for that but the above are the most common.
+
+# Taken from magic, converted to magic.mime
+# mime types according to http://www.geocities.com/nevilo/mod.htm:
+#	audio/it	.it
+#	audio/x-zipped-it	.itz
+#	audio/xm	fasttracker modules
+#	audio/x-s3m	screamtracker modules
+#	audio/s3m	screamtracker modules
+#	audio/x-zipped-mod	mdz
+#	audio/mod	mod
+#	audio/x-mod	All modules (mod, s3m, 669, mtm, med, xm, it, mdz, stm, itz, xmz, s3z)
+
+# Taken from loader code from mikmod version 2.14
+# by Steve McIntyre (stevem at chiark.greenend.org.uk)
+# <doj at cubic.org> added title printing on 2003-06-24
+0	string	MAS_UTrack_V00
+>14	string	>/0		audio/x-mod
+#audio/x-tracker-module
+
+#0	string	UN05		MikMod UNI format module sound data
+
+0	string	Extended\ Module: audio/x-mod
+#audio/x-tracker-module
+##>17	string	>\0		Title: "%s"
+
+21	string/c	!SCREAM!	audio/x-mod
+#audio/x-screamtracker-module
+21	string	BMOD2STM	audio/x-mod
+#audio/x-screamtracker-module
+1080	string	M.K.		audio/x-mod
+#audio/x-protracker-module
+#>0	string	>\0		Title: "%s"
+1080	string	M!K!		audio/x-mod
+#audio/x-protracker-module
+#>0	string	>\0		Title: "%s"
+1080	string	FLT4		audio/x-mod
+#audio/x-startracker-module
+#>0	string	>\0		Title: "%s"
+1080	string	FLT8		audio/x-mod
+#audio/x-startracker-module
+#>0	string	>\0		Title: "%s"
+1080	string	4CHN		audio/x-mod
+#audio/x-fasttracker-module
+#>0	string	>\0		Title: "%s"
+1080	string	6CHN		audio/x-mod
+#audio/x-fasttracker-module
+#>0	string	>\0		Title: "%s"
+1080	string	8CHN		audio/x-mod
+#audio/x-fasttracker-module
+#>0	string	>\0		Title: "%s"
+1080	string	CD81		audio/x-mod
+#audio/x-oktalyzer-tracker-module
+#>0	string	>\0		Title: "%s"
+1080	string	OKTA		audio/x-mod
+#audio/x-oktalyzer-tracker-module
+#>0	string	>\0		Title: "%s"
+# Not good enough.
+#1082	string	CH
+#>1080	string	>/0		%.2s-channel Fasttracker "oktalyzer" module sound data
+1080	string	16CN		audio/x-mod
+#audio/x-taketracker-module
+#>0	string	>\0		Title: "%s"
+1080	string	32CN		audio/x-mod
+#audio/x-taketracker-module
+#>0	string	>\0		Title: "%s"
+
+# Impuse tracker module (it)
+0	string		IMPM		audio/x-mod
+#>4	string		>\0		"%s"
+#>40	leshort		!0		compatible w/ITv%x
+#>42	leshort		!0		created w/ITv%x
+
+#------------------------------------------------------------------------------
+# end local stuff
+#------------------------------------------------------------------------------
+
+# xml based formats!
+
+# svg
+
+0	string		\<?xml
+#			text/xml
+>38	string		\<\!DOCTYPE\040svg	image/svg+xml
+
+
+# xml
+0	string		\<?xml			text/xml
+
+
+#------------------------------------------------------------------------------
+# Java
+
+0	short		0xcafe
+>2	short		0xbabe		application/java
+
+#------------------------------------------------------------------------------
+# audio:  file(1) magic for sound formats
+#
+# from Jan Nicolai Langfeldt <janl at ifi.uio.no>,
+#
+
+# Sun/NeXT audio data
+0	string		.snd
+>12	belong		1		audio/basic
+>12	belong		2		audio/basic
+>12	belong		3		audio/basic
+>12	belong		4		audio/basic
+>12	belong		5		audio/basic
+>12	belong		6		audio/basic
+>12	belong		7		audio/basic
+
+>12	belong		23		audio/x-adpcm
+
+# DEC systems (e.g. DECstation 5000) use a variant of the Sun/NeXT format
+# that uses little-endian encoding and has a different magic number
+# (0x0064732E in little-endian encoding).
+0	lelong		0x0064732E	
+>12	lelong		1		audio/x-dec-basic
+>12	lelong		2		audio/x-dec-basic
+>12	lelong		3		audio/x-dec-basic
+>12	lelong		4		audio/x-dec-basic
+>12	lelong		5		audio/x-dec-basic
+>12	lelong		6		audio/x-dec-basic
+>12	lelong		7		audio/x-dec-basic
+#                                       compressed (G.721 ADPCM)
+>12	lelong		23		audio/x-dec-adpcm
+
+# Bytes 0-3 of AIFF, AIFF-C, & 8SVX audio files are "FORM"
+#					AIFF audio data
+8	string		AIFF		audio/x-aiff	
+#					AIFF-C audio data
+8	string		AIFC		audio/x-aiff	
+#					IFF/8SVX audio data
+8	string		8SVX		audio/x-aiff	
+
+
+
+# Creative Labs AUDIO stuff
+#					Standard MIDI data
+0	string	MThd			audio/unknown	
+#>9 	byte	>0			(format %d)
+#>11	byte	>1			using %d channels
+#					Creative Music (CMF) data
+0	string	CTMF			audio/unknown	
+#					SoundBlaster instrument data
+0	string	SBI			audio/unknown	
+#					Creative Labs voice data
+0	string	Creative\ Voice\ File	audio/unknown	
+## is this next line right?  it came this way...
+#>19	byte	0x1A
+#>23	byte	>0			- version %d
+#>22	byte	>0			\b.%d
+
+# [GRR 950115:  is this also Creative Labs?  Guessing that first line
+#  should be string instead of unknown-endian long...]
+#0	long		0x4e54524b	MultiTrack sound data
+#0	string		NTRK		MultiTrack sound data
+#>4	long		x		- version %ld
+
+# Microsoft WAVE format (*.wav)
+# [GRR 950115:  probably all of the shorts and longs should be leshort/lelong]
+#					Microsoft RIFF
+0	string		RIFF
+#					- WAVE format
+>8	string		WAVE		audio/x-wav
+>8	string		AVI		video/x-msvideo
+#
+0	belong		0x2e7261fd	application/x-realaudio
+>8 	string		CDRA		image/x-coreldraw
+
+
+# MPEG Layer 3 sound files
+#0       beshort		&0xffe0		audio/mpeg
+#MP3 with ID3 tag
+0	string		ID3		audio/mpeg
+# Ogg/Vorbis
+0	string		OggS		application/ogg
+
+#------------------------------------------------------------------------------
+# c-lang:  file(1) magic for C programs or various scripts
+#
+
+# XPM icons (Greg Roelofs, newt at uchicago.edu)
+# ideally should go into "images", but entries below would tag XPM as C source
+0	string		/*\ XPM		image/x-xpm	7bit
+
+# 3DS (3d Studio files)
+16	beshort		0x3d3d		image/x-3ds
+
+# this first will upset you if you're a PL/1 shop... (are there any left?)
+# in which case rm it; ascmagic will catch real C programs
+#					C or REXX program text
+#0	string		/*		text/x-c
+#					C++ program text
+#0	string		//		text/x-c++
+
+#------------------------------------------------------------------------------
+# commands:  file(1) magic for various shells and interpreters
+#
+#0       string          :\ shell archive or commands for antique kernel text
+0       string          #!/bin/sh               application/x-shellscript
+0       string          #!\ /bin/sh             application/x-shellscript
+0       string          #!/bin/csh              application/x-shellscript
+0       string          #!\ /bin/csh            application/x-shellscript
+# korn shell magic, sent by George Wu, gwu at clyde.att.com
+0       string          #!/bin/ksh              application/x-shellscript
+0       string          #!\ /bin/ksh            application/x-shellscript
+0       string          #!/bin/tcsh             application/x-shellscript
+0       string          #!\ /bin/tcsh           application/x-shellscript
+0       string          #!/usr/local/tcsh       application/x-shellscript
+0       string          #!\ /usr/local/tcsh     application/x-shellscript
+0       string          #!/usr/local/bin/tcsh   application/x-shellscript
+0       string          #!\ /usr/local/bin/tcsh application/x-shellscript
+# bash shell magic, from Peter Tobias (tobias at server.et-inf.fho-emden.de)
+0       string          #!/bin/bash     		application/x-shellscript
+0       string          #!\ /bin/bash           application/x-shellscript
+0       string          #!/usr/local/bin/bash   application/x-shellscript
+0       string          #!\ /usr/local/bin/bash application/x-shellscript
+
+#
+# zsh/ash/ae/nawk/gawk magic from cameron at cs.unsw.oz.au (Cameron Simpson)
+0       string          #!/bin/zsh	        application/x-shellscript
+0       string          #!/usr/bin/zsh	        application/x-shellscript
+0       string          #!/usr/local/bin/zsh    application/x-shellscript
+0       string          #!\ /usr/local/bin/zsh  application/x-shellscript
+0       string          #!/usr/local/bin/ash    application/x-shellscript
+0       string          #!\ /usr/local/bin/ash  application/x-shellscript
+#0       string          #!/usr/local/bin/ae     Neil Brown's ae
+#0       string          #!\ /usr/local/bin/ae   Neil Brown's ae
+0       string          #!/bin/nawk             application/x-nawk
+0       string          #!\ /bin/nawk           application/x-nawk
+0       string          #!/usr/bin/nawk         application/x-nawk
+0       string          #!\ /usr/bin/nawk       application/x-nawk
+0       string          #!/usr/local/bin/nawk   application/x-nawk
+0       string          #!\ /usr/local/bin/nawk application/x-nawk
+0       string          #!/bin/gawk             application/x-gawk
+0       string          #!\ /bin/gawk           application/x-gawk
+0       string          #!/usr/bin/gawk         application/x-gawk
+0       string          #!\ /usr/bin/gawk       application/x-gawk
+0       string          #!/usr/local/bin/gawk   application/x-gawk
+0       string          #!\ /usr/local/bin/gawk application/x-gawk
+#
+0       string          #!/bin/awk              application/x-awk
+0       string          #!\ /bin/awk            application/x-awk
+0       string          #!/usr/bin/awk          application/x-awk
+0       string          #!\ /usr/bin/awk        application/x-awk
+0       string          BEGIN                   application/x-awk
+
+# For Larry Wall's perl language.  The ``eval'' line recognizes an
+# outrageously clever hack for USG systems.
+#                               Keith Waclena <keith at cerberus.uchicago.edu>
+0       string          #!/bin/perl                     application/x-perl
+0       string          #!\ /bin/perl                   application/x-perl
+0       string          eval\ "exec\ /bin/perl          application/x-perl
+0       string          #!/usr/bin/perl                 application/x-perl
+0       string          #!\ /usr/bin/perl               application/x-perl
+0       string          eval\ "exec\ /usr/bin/perl      application/x-perl
+0       string          #!/usr/local/bin/perl           application/x-perl
+0       string          #!\ /usr/local/bin/perl         application/x-perl
+0       string          eval\ "exec\ /usr/local/bin/perl application/x-perl
+
+#------------------------------------------------------------------------------
+# compress:  file(1) magic for pure-compression formats (no archives)
+#
+# compress, gzip, pack, compact, huf, squeeze, crunch, freeze, yabba, whap, etc.
+#
+# Formats for various forms of compressed data
+# Formats for "compress" proper have been moved into "compress.c",
+# because it tries to uncompress it to figure out what's inside.
+
+# standard unix compress
+0	string		\037\235	application/x-compress
+
+# gzip (GNU zip, not to be confused with [Info-ZIP/PKWARE] zip archiver)
+0       string          \037\213        application/x-gzip
+
+0		string			PK\003\004		application/x-zip
+
+# RAR archiver (Greg Roelofs, newt at uchicago.edu)
+0	string		Rar!		application/x-rar
+
+# According to gzip.h, this is the correct byte order for packed data.
+0	string		\037\036	application/octet-stream
+#
+# This magic number is byte-order-independent.
+#
+0	short		017437		application/octet-stream
+
+# XXX - why *two* entries for "compacted data", one of which is
+# byte-order independent, and one of which is byte-order dependent?
+#
+# compacted data
+0	short		0x1fff		application/octet-stream
+0	string		\377\037	application/octet-stream
+# huf output
+0	short		0145405		application/octet-stream
+
+# Squeeze and Crunch...
+# These numbers were gleaned from the Unix versions of the programs to
+# handle these formats.  Note that I can only uncrunch, not crunch, and
+# I didn't have a crunched file handy, so the crunch number is untested.
+#				Keith Waclena <keith at cerberus.uchicago.edu>
+#0	leshort		0x76FF		squeezed data (CP/M, DOS)
+#0	leshort		0x76FE		crunched data (CP/M, DOS)
+
+# Freeze
+#0	string		\037\237	Frozen file 2.1
+#0	string		\037\236	Frozen file 1.0 (or gzip 0.5)
+
+# lzh?
+#0	string		\037\240	LZH compressed data
+
+257	string		ustar		application/x-tar	posix
+257	string		ustar\040\040   application/x-tar	gnu
+
+0	short		070707		application/x-cpio
+0	short		0143561		application/x-cpio	swapped
+
+0	string		=<ar>		application/x-archive
+0	string		!<arch>ch>		application/x-archive
+>8	string		debian		application/x-debian-package
+
+#------------------------------------------------------------------------------
+#
+# RPM: file(1) magic for Red Hat Packages   Erik Troan (ewt at redhat.com)
+#
+0       beshort         0xedab
+>2      beshort         0xeedb          application/x-rpm
+
+0	lelong&0x8080ffff	0x0000081a	application/x-arc	lzw
+0	lelong&0x8080ffff	0x0000091a	application/x-arc	squashed
+0	lelong&0x8080ffff	0x0000021a	application/x-arc	uncompressed
+0	lelong&0x8080ffff	0x0000031a	application/x-arc	packed
+0	lelong&0x8080ffff	0x0000041a	application/x-arc	squeezed
+0	lelong&0x8080ffff	0x0000061a	application/x-arc	crunched
+
+0	leshort	0xea60	application/x-arj
+
+# LHARC/LHA archiver (Greg Roelofs, newt at uchicago.edu)
+2	string	-lh0-	application/x-lharc	lh0
+2	string	-lh1-	application/x-lharc	lh1
+2	string	-lz4-	application/x-lharc	lz4
+2	string	-lz5-	application/x-lharc	lz5
+#	[never seen any but the last; -lh4- reported in comp.compression:]
+2	string	-lzs-	application/x-lha	lzs
+2	string	-lh\ -	application/x-lha	lh
+2	string	-lhd-	application/x-lha	lhd
+2	string	-lh2-	application/x-lha	lh2
+2	string	-lh3-	application/x-lha	lh3
+2	string	-lh4-	application/x-lha	lh4
+2	string	-lh5-	application/x-lha	lh5
+2	string	-lh6-	application/x-lha	lh6
+2	string	-lh7-	application/x-lha	lh7
+# Shell archives
+10	string	#\ This\ is\ a\ shell\ archive	application/octet-stream	x-shell
+
+#------------------------------------------------------------------------------
+# frame:  file(1) magic for FrameMaker files
+#
+# This stuff came on a FrameMaker demo tape, most of which is
+# copyright, but this file is "published" as witness the following:
+#
+0	string		\<MakerFile	application/x-frame
+0	string		\<MIFFile	application/x-frame
+0	string		\<MakerDictionary	application/x-frame
+0	string		\<MakerScreenFon	application/x-frame
+0	string		\<MML		application/x-frame
+0	string		\<Book		application/x-frame
+0	string		\<Maker		application/x-frame
+
+#------------------------------------------------------------------------------
+# html:  file(1) magic for HTML (HyperText Markup Language) docs
+#
+# from Daniel Quinlan <quinlan at yggdrasil.com>
+#
+0	string		\<HEAD	text/html
+0	string		\<head	text/html
+0	string		\<TITLE	text/html
+0	string		\<title	text/html
+0   string      \<html	text/html
+0   string      \<HTML	text/html
+0	string		\<!--	text/html
+0	string		\<h1	text/html
+0	string		\<H1	text/html
+0	string		\<!doctype\ HTML	text/html
+0	string		\<!DOCTYPE\ HTML	text/html
+0	string		\<!doctype\ html	text/html
+0	string		\<!doctype\ HTML	text/html
+
+#------------------------------------------------------------------------------
+# images:  file(1) magic for image formats (see also "c-lang" for XPM bitmaps)
+#
+# originally from jef at helios.ee.lbl.gov (Jef Poskanzer),
+# additions by janl at ifi.uio.no as well as others. Jan also suggested
+# merging several one- and two-line files into here.
+#
+# XXX - byte order for GIF and TIFF fields?
+# [GRR:  TIFF allows both byte orders; GIF is probably little-endian]
+#
+
+# [GRR:  what the hell is this doing in here?]
+#0	string		xbtoa		btoa'd file
+
+# PBMPLUS
+#					PBM file
+0	string		P1		image/x-portable-bitmap	7bit
+#					PGM file
+0	string		P2		image/x-portable-greymap	7bit
+#					PPM file
+0	string		P3		image/x-portable-pixmap	7bit
+#					PBM "rawbits" file
+0	string		P4		image/x-portable-bitmap
+#					PGM "rawbits" file
+0	string		P5		image/x-portable-greymap
+#					PPM "rawbits" file
+0	string		P6		image/x-portable-pixmap
+
+# NIFF (Navy Interchange File Format, a modification of TIFF)
+# [GRR:  this *must* go before TIFF]
+0	string		IIN1		image/x-niff
+
+# TIFF and friends
+#					TIFF file, big-endian
+0	string		MM		image/tiff
+#					TIFF file, little-endian
+0	string		II		image/tiff
+
+# possible GIF replacements; none yet released!
+# (Greg Roelofs, newt at uchicago.edu)
+#
+# GRR 950115:  this was mine ("Zip GIF"):
+#					ZIF image (GIF+deflate alpha)
+0	string		GIF94z		image/unknown
+#
+# GRR 950115:  this is Jeremy Wohl's Free Graphics Format (better):
+#					FGF image (GIF+deflate beta)
+0	string		FGF95a		image/unknown
+#
+# GRR 950115:  this is Thomas Boutell's Portable Bitmap Format proposal
+# (best; not yet implemented):
+#					PBF image (deflate compression)
+0	string		PBF		image/unknown
+
+# GIF
+0	string		GIF		image/gif
+
+# JPEG images
+0	beshort		0xffd8		image/jpeg
+
+# PC bitmaps (OS/2, Windoze BMP files)  (Greg Roelofs, newt at uchicago.edu)
+0	string		BM		image/bmp
+#>14	byte		12		(OS/2 1.x format)
+#>14	byte		64		(OS/2 2.x format)
+#>14	byte		40		(Windows 3.x format)
+#0	string		IC		icon
+#0	string		PI		pointer
+#0	string		CI		color icon
+#0	string		CP		color pointer
+#0	string		BA		bitmap array
+
+# CDROM Filesystems
+32769    string    CD001     application/x-iso9660
+
+# StuffIt X
+0	string		StuffIt!		application/x-stuffitx
+
+# Newer StuffIt archives (grant at netbsd.org)
+0	string		StuffIt			application/x-stuffit
+#>162	string		>0			: %s
+
+# BinHex is the Macintosh ASCII-encoded file format (see also "apple")
+# Daniel Quinlan, quinlan at yggdrasil.com
+11	string	must\ be\ converted\ with\ BinHex\ 4	application/mac-binhex40
+##>41	string	x					\b, version %.3s
+
+
+#------------------------------------------------------------------------------
+# lisp:  file(1) magic for lisp programs
+#
+# various lisp types, from Daniel Quinlan (quinlan at yggdrasil.com)
+0	string	;;			text/plain	8bit
+# Emacs 18 - this is always correct, but not very magical.
+0	string	\012(			application/x-elc
+# Emacs 19
+0	string	;ELC\023\000\000\000	application/x-elc
+
+#------------------------------------------------------------------------------
+# mail.news:  file(1) magic for mail and news
+#
+# There are tests to ascmagic.c to cope with mail and news.
+0	string		Relay-Version: 	message/rfc822	7bit
+0	string		#!\ rnews	message/rfc822	7bit
+0	string		N#!\ rnews	message/rfc822	7bit
+0	string		Forward\ to 	message/rfc822	7bit
+0	string		Pipe\ to 	message/rfc822	7bit
+0	string		Return-Path:	message/rfc822	7bit
+0	string		Received:	message/rfc822
+0	string		Path:		message/news	8bit
+0	string		Xref:		message/news	8bit
+0	string		From:		message/rfc822	7bit
+0	string		Article 	message/news	8bit
+#------------------------------------------------------------------------------
+# msword: file(1) magic for MS Word files
+#
+# Contributor claims:
+# Reversed-engineered MS Word magic numbers
+#
+
+0	string		\376\067\0\043			application/msword
+0	string		\320\317\021\340\241\261	application/msword
+0	string		\333\245-\0\0\0			application/msword
+
+
+
+#------------------------------------------------------------------------------
+# printer:  file(1) magic for printer-formatted files
+#
+
+# PostScript
+0	string		%!		application/postscript
+0	string		\004%!		application/postscript
+
+# Acrobat
+# (due to clamen at cs.cmu.edu)
+0	string		%PDF-		application/pdf
+
+#------------------------------------------------------------------------------
+# sc:  file(1) magic for "sc" spreadsheet
+#
+38	string		Spreadsheet	application/x-sc
+
+#------------------------------------------------------------------------------
+# tex:  file(1) magic for TeX files
+#
+# XXX - needs byte-endian stuff (big-endian and little-endian DVI?)
+#
+# From <conklin at talisman.kaleida.com>
+
+# Although we may know the offset of certain text fields in TeX DVI
+# and font files, we can't use them reliably because they are not
+# zero terminated. [but we do anyway, christos]
+0	string		\367\002	application/x-dvi
+#0	string		\367\203	TeX generic font data
+#0	string		\367\131	TeX packed font data
+#0	string		\367\312	TeX virtual font data
+#0	string		This\ is\ TeX,	TeX transcript text	
+#0	string		This\ is\ METAFONT,	METAFONT transcript text
+
+# There is no way to detect TeX Font Metric (*.tfm) files without
+# breaking them apart and reading the data.  The following patterns
+# match most *.tfm files generated by METAFONT or afm2tfm.
+2	string		\000\021	application/x-tex-tfm
+2	string		\000\022	application/x-tex-tfm
+#>34	string		>\0		(%s)
+
+# Texinfo and GNU Info, from Daniel Quinlan (quinlan at yggdrasil.com)
+0	string		\\input\ texinfo		text/x-texinfo
+0	string		This\ is\ Info\ file	text/x-info
+
+# correct TeX magic for Linux (and maybe more)
+# from Peter Tobias (tobias at server.et-inf.fho-emden.de)
+#
+0	leshort		0x02f7		application/x-dvi
+
+# RTF - Rich Text Format
+0	string		{\\rtf		text/rtf
+
+#------------------------------------------------------------------------------
+# animation:  file(1) magic for animation/movie formats
+#
+# animation formats, originally from vax at ccwf.cc.utexas.edu (VaX#n8)
+#						MPEG file
+0	belong		0x000001b3			video/mpeg
+0	belong		0x000001ba			video/mpeg
+# FLI animation format
+0	leshort		0xAF11				video/fli
+# FLC animation format
+0	leshort		0xAF12				video/flc
+# AVI
+>8	string		AVI\ 				video/avi
+#
+# SGI and Apple formats
+#
+0	string		MOVI				video/sgi
+4	string		moov				video/quicktime	moov
+4	string		mdat				video/quicktime	mdat
+# The contributor claims:
+#   I couldn't find a real magic number for these, however, this
+#   -appears- to work.  Note that it might catch other files, too,
+#   so BE CAREFUL!
+#
+# Note that title and author appear in the two 20-byte chunks
+# at decimal offsets 2 and 22, respectively, but they are XOR'ed with
+# 255 (hex FF)! DL format SUCKS BIG ROCKS.
+#
+#						DL file version 1 , medium format (160x100, 4 images/screen)
+0	byte		1			video/unknown
+0	byte		2			video/unknown
+#
+# Databases
+#
+# GDBM magic numbers
+#  Will be maintained as part of the GDBM distribution in the future.
+#  <downsj at teeny.org>
+0       belong  0x13579ace      application/x-gdbm
+0       lelong  0x13579ace      application/x-gdbm
+0       string  GDBM            application/x-gdbm
+#
+0       belong  0x061561        application/x-dbm
+#
+# Executables
+#
+0       string          \177ELF 
+>4      byte            0
+>4      byte            1
+>4      byte            2
+>5      byte            0
+>5      byte            1
+>>16    leshort         0
+>>16    leshort         1               application/x-object
+>>16    leshort         2               application/x-executable
+>>16    leshort         3               application/x-sharedlib
+>>16    leshort         4               application/x-coredump
+#
+# DOS
+0		string			MZ				application/x-dosexec
+#
+# KDE
+0		string	[KDE\ Desktop\ Entry]	application/x-kdelnk
+0		string	\#\ KDE\ Config\ File	application/x-kdelnk
+# xmcd database file for kscd
+0		string	\#\ xmcd                text/xmcd
+
+#------------------------------------------------------------------------------
+# pkgadd:  file(1) magic for SysV R4 PKG Datastreams
+#
+0       string          #\ PaCkAgE\ DaTaStReAm  application/x-svr4-package
+
+#PNG Image Format
+0	string		\x89PNG			image/png
+
+# MNG Video Format, <URL:http://www.libpng.org/pub/mng/spec/>
+0	string		\x8aMNG			video/x-mng
+0	string		\x8aJNG			video/x-jng
+
+#------------------------------------------------------------------------------
+# Hierarchical Data Format, used to facilitate scientific data exchange
+# specifications at http://hdf.ncsa.uiuc.edu/
+0	belong		0x0e031301	Hierarchical Data Format (version 4) data
+0	string		\211HDF\r\n\032	Hierarchical Data Format (version 5) data
+
+# Adobe Photoshop
+0	string		8BPS			image/x-photoshop
+
+# Felix von Leitner <felix-file at fefe.de>
+0	string		d8:announce		application/x-bittorrent
+
+
+# lotus 1-2-3 document
+0	belong	0x00001a00	application/x-123
+0	belong	0x00000200 	application/x-123
+
+# MS Access database
+4	string	Standard\ Jet\ DB	application/msaccess
+
+## magic for XBase files
+#0      byte       0x02	
+#>8     leshort	  >0
+#>>12   leshort    0	application/x-dbf
+#
+#0      byte       0x03	
+#>8     leshort	  >0
+#>>12   leshort    0	application/x-dbf
+#
+#0      byte       0x04	
+#>8     leshort	  >0
+#>>12   leshort    0	application/x-dbf
+#
+#0      byte       0x05	
+#>8     leshort	  >0
+#>>12   leshort    0	application/x-dbf
+#
+#0      byte       0x30
+#>8     leshort	  >0
+#>>12   leshort    0	application/x-dbf
+#
+#0      byte       0x43
+#>8     leshort	  >0
+#>>12   leshort    0	application/x-dbf
+#
+#0      byte       0x7b
+#>8     leshort	  >0
+#>>12   leshort    0	application/x-dbf
+#
+#0      byte       0x83	
+#>8     leshort	  >0
+#>>12   leshort    0	application/x-dbf
+#
+#0      byte       0x8b
+#>8     leshort	  >0
+#>>12   leshort    0	application/x-dbf
+#
+#0      byte       0x8e	
+#>8     leshort	  >0
+#>>12   leshort    0	application/x-dbf
+#
+#0      byte       0xb3
+#>8     leshort	  >0
+#>>12   leshort    0	application/x-dbf
+#
+#0      byte       0xf5
+#>8     leshort	  >0
+#>>12   leshort    0	application/x-dbf
+#
+#0	leshort		0x0006		application/x-dbt
+
+# Debian has entries for the old PGP formats:
+# pgp:  file(1) magic for Pretty Good Privacy
+# see http://lists.gnupg.org/pipermail/gnupg-devel/1999-September/016052.html
+0       beshort         0x9900                  text/PGP key public ring
+0       beshort         0x9501                  text/PGP key security ring
+0       beshort         0x9500                  text/PGP key security ring
+0       beshort         0xa600                  text/PGP encrypted data
+0       string          -----BEGIN\040PGP       text/PGP armored data 
+>15     string          PUBLIC\040KEY\040BLOCK- public key block
+>15     string          MESSAGE-                message
+>15     string          SIGNED\040MESSAGE-      signed message
+>15     string          PGP\040SIGNATURE-       signature
+0       beshort         0x8501                  data
+#
+# GnuPG Magic:
+# 
+0       beshort         0x9901                  text/GnuPG key public ring
+0       beshort         0x8501                  text/OpenPGP data
+
+# flash:        file(1) magic for Macromedia Flash file format
+#
+# See
+#
+#       http://www.macromedia.com/software/flash/open/
+#
+0	string		FWS             
+>3	byte		x			application/x-shockwave-flash
+
+# The following paramaters are created for Namazu.
+# <http://www.namazu.org/>
+#
+# 1999/08/13
+#0	string		\<!--\ MHonArc		text/html; x-type=mhonarc
+0	string		BZh			application/x-bzip2
+
+# 1999/09/09
+# VRML (suggested by Masao Takaku)
+0	string		#VRML\ V1.0\ ascii	model/vrml
+0	string		#VRML\ V2.0\ utf8	model/vrml
+
+#------------------------------------------------------------------------------
+# ichitaro456: file(1) magic for Just System Word Processor Ichitaro
+#
+# Contributor kenzo-:
+# Reversed-engineered JS Ichitaro magic numbers
+#
+
+0	string		DOC
+>43	byte		0x14		application/ichitaro4
+>144	string	JDASH		application/ichitaro4
+
+0	string		DOC
+>43	byte		0x15		application/ichitaro5
+
+0	string		DOC
+>43	byte		0x16		application/ichitaro6
+
+#------------------------------------------------------------------------------
+# office97: file(1) magic for MicroSoft Office files
+#
+# Contributor kenzo-:
+# Reversed-engineered MS Office magic numbers
+#
+
+#0       string          \320\317\021\340\241\261\032\341
+#>48     byte            0x1B            application/excel
+
+2080	string	Microsoft\ Excel\ 5.0\ Worksheet	application/excel
+2114	string	Biff5					application/excel
+
+0       string	\224\246\056		application/msword
+
+0	belong	0x31be0000		application/msword
+
+0	string	PO^Q`			application/msword
+
+0	string	\320\317\021\340\241\261\032\341
+>546	string	bjbj			application/msword
+>546	string	jbjb			application/msword
+
+512	string	R\0o\0o\0t\0\ \0E\0n\0t\0r\0y	application/msword
+
+2080	string	Microsoft\ Word\ 6.0\ Document	application/msword
+2080	string	Documento\ Microsoft\ Word\ 6	application/msword
+2112	string	MSWordDoc			application/msword
+
+#0	string	\320\317\021\340\241\261\032\341	application/powerpoint
+0	string	\320\317\021\340\241\261\032\341	application/msword
+
+0       string  #\ PaCkAgE\ DaTaStReAm  application/x-svr4-package
+
+
+# WinNT/WinCE PE files (Warner Losh, imp at village.org)
+#
+128		string	PE\000\000	application/octet-stream
+0		string	PE\000\000	application/octet-stream
+
+# miscellaneous formats
+0		string	LZ		application/octet-stream
+
+
+# .EXE formats (Greg Roelofs, newt at uchicago.edu)
+#
+0		string	MZ
+>24		string	@		application/octet-stream
+
+0		string	MZ
+>30		string	Copyright\ 1989-1990\ PKWARE\ Inc.	application/x-zip
+
+0		string	MZ
+>30		string	PKLITE\ Copr.	application/x-zip
+
+0		string	MZ
+>36		string	LHa's\ SFX	application/x-lha
+
+0		string	MZ		application/octet-stream
+
+# LHA archiver
+2		string	-lh
+>6		string	-		application/x-lha
+
+
+# Zoo archiver
+20		lelong	0xfdc4a7dc	application/x-zoo
+
+# ARC archiver
+0       	lelong&0x8080ffff	0x0000081a	application/x-arc
+0		lelong&0x8080ffff	0x0000091a	application/x-arc
+0		lelong&0x8080ffff	0x0000021a	application/x-arc
+0		lelong&0x8080ffff	0x0000031a	application/x-arc
+0		lelong&0x8080ffff	0x0000041a	application/x-arc
+0		lelong&0x8080ffff	0x0000061a	application/x-arc
+
+# Microsoft Outlook's Transport Neutral Encapsulation Format (TNEF)
+0		lelong	0x223e9f78	application/ms-tnef
+
+
+#
+# QuickTime format
+# Contributer: Peter Breton
+#
+
+0	string		MOVI		video/quicktime
+4	string		moov		video/quicktime
+4	string		mdat		video/quicktime
+
+# Video Formate
+0       string          RIFF
+# AVI == Audio Video Interleave
+>8      string          AVI\            
+#>0xbc   string         >\0             video/x-%s
+
+>>0xbc  string          DIV3            video/x-DivX-3
+>>0xbc  string          div3            video/x-divX-3
+>>0xbc  string          DIV4            video/x-DivX-4
+>>0x70  string          DX50            video/x-DIVX-5
+>>0x70  string          MP4             video/x-MPEG-4
+>>0xbc  string          DIVX            video/x-DivX
+>8      string          WAVE            audio/x-wav
+
+# From: stephane.loeuillet at tiscali.f
+# http://www.djvuzone.org/
+0	string		AT&TFORM	image/x.djvu
+
+# Danny Milosavljevic <danny.milo at gmx.net>
+# this are adrift (adventure game standard) game files, extension .taf
+# depending on version magic continues with 0x93453E6139FA (V 4.0)
+# 0x9445376139FA (V 3.90)
+# 0x9445366139FA (V 3.80)
+# this is from source (http://www.adrift.org.uk/) and I have some taf
+# files, and checked them.
+#0       belong          0x3C423FC9
+#>4      belong          0x6A87C2CF application/x-adrift
+
+
+# local additions
+
+# Macromedia Flash FLV Video File Format
+0	string		FLV			video/x-flv

Added: z3c.filetype/trunk/src/z3c/filetype/magic.py
===================================================================
--- z3c.filetype/trunk/src/z3c/filetype/magic.py	2006-08-11 12:55:34 UTC (rev 69400)
+++ z3c.filetype/trunk/src/z3c/filetype/magic.py	2006-08-11 13:56:21 UTC (rev 69401)
@@ -0,0 +1,714 @@
+# Found on a russian zope mailing list, and modified to fix bugs in parsing
+# the magic file and string making
+# -- Daniel Berlin <dberlin at dberlin.org>
+import sys, struct, time, re, exceptions, pprint, stat, os, pwd, grp
+
+_mew = 0
+
+_magic = os.path.join(os.path.dirname(__file__),'magic.mime')
+
+mime = 1
+
+_ldate_adjust = lambda x: time.mktime( time.gmtime(x) )
+
+BUFFER_SIZE = 1024 * 128 # 128K should be enough...
+
+class MagicError(exceptions.Exception): pass
+
+def _handle(fmt='@x',adj=None): return fmt, struct.calcsize(fmt), adj
+
+KnownTypes = {
+    'byte':_handle('@b'),
+    'byte':_handle('@B'),
+    'ubyte':_handle('@B'),
+    'string':('s',0,None),
+    'pstring':_handle('p'),
+    'short':_handle('@h'),
+    'beshort':_handle('>h'),
+    'leshort':_handle('<h'),
+    'short':_handle('@H'),
+    'beshort':_handle('>H'),
+    'leshort':_handle('<H'),
+    'ushort':_handle('@H'),
+    'ubeshort':_handle('>H'),
+    'uleshort':_handle('<H'),
+    
+    'long':_handle('@l'),
+    'belong':_handle('>l'),
+    'lelong':_handle('<l'),
+    'ulong':_handle('@L'),
+    'ubelong':_handle('>L'),
+    'ulelong':_handle('<L'),
+    
+    'date':_handle('=l'),
+    'bedate':_handle('>l'),
+    'ledate':_handle('<l'),
+    'ldate':_handle('=l',_ldate_adjust),
+    'beldate':_handle('>l',_ldate_adjust),
+    'leldate':_handle('<l',_ldate_adjust),
+}
+
+_mew_cnt = 0
+def mew(x):
+    global _mew_cnt
+    if _mew :
+        if x=='.' :
+            _mew_cnt += 1
+            if _mew_cnt % 64 == 0 : sys.stderr.write( '\n' )
+            sys.stderr.write( '.' )
+        else:
+            sys.stderr.write( '\b'+x )
+
+def has_format(s):
+    n = 0
+    l = None
+    for c in s :
+        if c == '%' :
+            if l == '%' : n -= 1
+            else        : n += 1
+        l = c
+    return n
+
+def read_asciiz(file,size=None,pos=None):
+    s = []
+    if pos :
+        mew('s')
+        file.seek( pos, 0 )
+    mew('z')
+    if size is not None :
+        s = [file.read( size ).split('\0')[0]]
+    else:
+        while 1 :
+            c = file.read(1)
+            if (not c) or (ord(c)==0) or (c=='\n') : break
+            s.append (c)
+    mew('Z')
+    return ''.join(s)
+
+def a2i(v,base=0):
+    if v[-1:] in 'lL' : v = v[:-1]
+    return int( v, base )
+
+_cmap = {
+        '\\' : '\\',
+        '0' : '\0',
+}
+for c in range(ord('a'),ord('z')+1) :
+    try               : e = eval('"\\%c"' % chr(c))
+    except ValueError : pass
+    else              : _cmap[chr(c)] = e
+else:
+    del c
+    del e
+
+def make_string(s):
+    # hack, is this the right way?
+    s = s.replace('\\<','<')
+    s = s.replace('\\ ',' ')
+    return eval( '"'+s.replace('"','\\"')+'"')
+
+class MagicTestError(MagicError): pass
+
+class MagicTest:
+    def __init__(self,offset,mtype,test,message,line=None,level=None):
+        self.line, self.level = line, level
+        self.mtype = mtype
+        self.mtest = test
+        self.subtests = []
+        self.mask = None
+        self.smod = None
+        self.nmod = None
+        self.offset, self.type, self.test, self.message = \
+                        offset,mtype,test,message
+        if self.mtype == 'true' : return # XXX hack to enable level skips
+        if test[-1:]=='\\' and test[-2:]!='\\\\' :
+            self.test += 'n' # looks like someone wanted EOL to match?
+        if mtype[:6]=='string' :
+            if '/' in mtype : # for strings
+                self.type, self.smod = \
+                                        mtype[:mtype.find('/')], mtype[mtype.find('/')+1:]
+        else:
+            for nm in '&+-' :
+                if nm in mtype : # for integer-based
+                    self.nmod, self.type, self.mask = (
+                            nm,
+                            mtype[:mtype.find(nm)],
+                            # convert mask to int, autodetect base
+                            int( mtype[mtype.find(nm)+1:], 0 )
+                    )
+                    break
+        self.struct, self.size, self.cast = KnownTypes[ self.type ]
+    def __str__(self):
+        return '%s %s %s %s' % (
+                self.offset, self.mtype, self.mtest, self.message
+        )
+    def __repr__(self):
+        return 'MagicTest(%s,%s,%s,%s,line=%s,level=%s,subtests=\n%s%s)' % (
+                `self.offset`, `self.mtype`, `self.mtest`, `self.message`,
+                `self.line`, `self.level`,
+                '\t'*self.level, pprint.pformat(self.subtests)
+        )
+    def run(self,file):
+        result = ''
+        do_close = 0
+        try:
+            if type(file) == type('x') :
+                file = open( file, 'r', BUFFER_SIZE )
+                do_close = 1
+#                       else:
+#                               saved_pos = file.tell()
+            if self.mtype != 'true' :
+                data = self.read(file)
+                last = file.tell()
+            else:
+                data = last = None
+            if self.check( data ) :
+                result = self.message+' '
+                if has_format( result ) : result %= data
+                for test in self.subtests :
+                    m = test.run(file)
+                    if m is not None : result += m
+                return make_string( result )
+        finally:
+            if do_close :
+                file.close()
+#                       else:
+#                               file.seek( saved_pos, 0 )
+    def get_mod_and_value(self):
+        if self.type[-6:] == 'string' :
+#             if 'ustar' in self.test:
+#                 import pdb;pdb.set_trace()
+            # "something like\tthis\n"
+            if self.test[0] in '=<>' :
+                mod, value = self.test[0], make_string( self.test[1:] )
+            else:
+                mod, value = '=', make_string( self.test )
+        else:
+            if self.test[0] in '=<>&^' :
+                mod, value = self.test[0], a2i(self.test[1:])
+            elif self.test[0] == 'x':
+                mod = self.test[0]
+                value = 0
+            else:
+                mod, value = '=', a2i(self.test)
+        return mod, value
+    def read(self,file):
+        mew( 's' )
+        file.seek( self.offset(file), 0 ) # SEEK_SET
+        mew( 'r' )
+        try:
+            data = rdata = None
+            # XXX self.size might be 0 here...
+            if self.size == 0 :
+                # this is an ASCIIZ string...
+                size = None
+                if self.test != '>\\0' : # magic's hack for string read...
+                    value = self.get_mod_and_value()[1]
+                    size = (value=='\0') and None or len(value)
+                rdata = data = read_asciiz( file, size=size )
+            else:
+                rdata = file.read( self.size )
+                if not rdata or (len(rdata)!=self.size) : return None
+                data = struct.unpack( self.struct, rdata )[0] # XXX hack??
+        except:
+            print >>sys.stderr, self
+            print >>sys.stderr, '@%s struct=%s size=%d rdata=%s' % (
+                    self.offset, `self.struct`, self.size,`rdata`)
+            raise
+        mew( 'R' )
+        if self.cast : data = self.cast( data )
+        if self.mask :
+            try:
+                if   self.nmod == '&' : data &= self.mask
+                elif self.nmod == '+' : data += self.mask
+                elif self.nmod == '-' : data -= self.mask
+                else: raise MagicTestError(self.nmod)
+            except:
+                print >>sys.stderr,'data=%s nmod=%s mask=%s' % (
+                        `data`, `self.nmod`, `self.mask`
+                )
+                raise
+        return data
+    def check(self,data):
+        mew('.')
+        if self.mtype == 'true' :
+            return '' # not None !
+        mod, value = self.get_mod_and_value()
+        if self.type[-6:] == 'string' :
+            # "something like\tthis\n"
+            if self.smod:
+                #import pdb;pdb.set_trace()
+                xdata = data
+                if 'b' in self.smod : # all blanks are optional
+                    xdata = ''.join( data.split() )
+                    value = ''.join( value.split() )
+                if 'c' in self.smod : # all blanks are optional
+                    xdata = xdata.upper()
+                    value = value.upper()
+                if 'B' in self.smod : # compact blanks
+                    data = ' '.join( data.split() )
+                    if ' ' not in data : return None
+            else:
+                xdata = data
+        try:
+            if   mod == '=' : result = data == value
+            elif mod == '<' : result = data < value
+            elif mod == '>' : result = data > value
+            elif mod == '&' : result = data & value
+            elif mod == '^' : result = (data & (~value)) == 0
+            elif mod == 'x' : result = 1
+            else            : raise MagicTestError(self.test)
+            if result :
+                zdata, zval = `data`, `value`
+                if self.mtype[-6:]!='string' :
+                    try: zdata, zval = hex(data), hex(value)
+                    except: zdata, zval = `data`, `value`
+                if 0 : print >>sys.stderr, '%s @%s %s:%s %s %s => %s (%s)' % (
+                        '>'*self.level, self.offset,
+                        zdata, self.mtype, `mod`, zval, `result`,
+                        self.message
+                )
+            return result
+        except:
+            print >>sys.stderr,'mtype=%s data=%s mod=%s value=%s' % (
+                    `self.mtype`, `data`, `mod`, `value`
+            )
+            raise
+    def add(self,mt):
+        if not isinstance(mt,MagicTest) :
+            raise MagicTestError((mt,'incorrect subtest type %s'%(type(mt),)))
+        if mt.level == self.level+1 :
+            self.subtests.append( mt )
+        elif self.subtests :
+            self.subtests[-1].add( mt )
+        elif mt.level > self.level+1 :
+            # it's possible to get level 3 just after level 1 !!! :-(
+            level = self.level + 1
+            while level < mt.level :
+                xmt = MagicTest(None,'true','x','',line=self.line,level=level)
+                self.add( xmt )
+                level += 1
+            else:
+                self.add( mt ) # retry...
+        else:
+            raise MagicTestError((mt,'incorrect subtest level %s'%(`mt.level`,)))
+    def last_test(self):
+        return self.subtests[-1]
+#end class MagicTest
+
+class OffsetError(MagicError): pass
+
+class Offset:
+    pos_format = {'b':'<B','B':'>B','s':'<H','S':'>H','l':'<I','L':'>I',}
+    pattern0 = re.compile(r'''    # mere offset
+                ^
+                &?                                          # possible ampersand
+                (       0                                       # just zero
+                |       [1-9]{1,1}[0-9]*        # decimal
+                |       0[0-7]+                         # octal
+                |       0x[0-9a-f]+                     # hex
+                )
+                $
+                ''', re.X|re.I
+    )
+    pattern1 = re.compile(r'''    # indirect offset
+                ^\(
+                (?P<base>&?0                  # just zero
+                        |&?[1-9]{1,1}[0-9]* # decimal
+                        |&?0[0-7]*          # octal
+                        |&?0x[0-9A-F]+      # hex
+                )
+                (?P<type>
+                        \.         # this dot might be alone
+                        [BSL]? # one of this chars in either case
+                )?
+                (?P<sign>
+                        [-+]{0,1}
+                )?
+                (?P<off>0              # just zero
+                        |[1-9]{1,1}[0-9]*  # decimal
+                        |0[0-7]*           # octal
+                        |0x[0-9a-f]+       # hex
+                )?
+                \)$''', re.X|re.I
+    )
+    def __init__(self,s):
+        self.source = s
+        self.value  = None
+        self.relative = 0
+        self.base = self.type = self.sign = self.offs = None
+        m = Offset.pattern0.match( s )
+        if m : # just a number
+            if s[0] == '&' :
+                self.relative, self.value = 1, int( s[1:], 0 )
+            else:
+                self.value = int( s, 0 )
+            return
+        m = Offset.pattern1.match( s )
+        if m : # real indirect offset
+            try:
+                self.base = m.group('base')
+                if self.base[0] == '&' :
+                    self.relative, self.base = 1, int( self.base[1:], 0 )
+                else:
+                    self.base = int( self.base, 0 )
+                if m.group('type') : self.type = m.group('type')[1:]
+                self.sign = m.group('sign')
+                if m.group('off') : self.offs = int( m.group('off'), 0 )
+                if self.sign == '-' : self.offs = 0 - self.offs
+            except:
+                print >>sys.stderr, '$$', m.groupdict()
+                raise
+            return
+        raise OffsetError(`s`)
+    def __call__(self,file=None):
+        if self.value is not None : return self.value
+        pos = file.tell()
+        try:
+            if not self.relative : file.seek( self.offset, 0 )
+            frmt = Offset.pos_format.get( self.type, 'I' )
+            size = struct.calcsize( frmt )
+            data = struct.unpack( frmt, file.read( size ) )
+            if self.offs : data += self.offs
+            return data
+        finally:
+            file.seek( pos, 0 )
+    def __str__(self): return self.source
+    def __repr__(self): return 'Offset(%s)' % `self.source`
+#end class Offset
+
+class MagicFileError(MagicError): pass
+
+class MagicFile:
+    def __init__(self,filename=_magic):
+        self.file = None
+        self.tests = []
+        self.total_tests = 0
+        self.load( filename )
+        self.ack_tests = None
+        self.nak_tests = None
+    def __del__(self):
+        self.close()
+    def load(self,filename=None):
+        self.open( filename )
+        self.parse()
+        self.close()
+    def open(self,filename=None):
+        self.close()
+        if filename is not None :
+            self.filename = filename
+        self.file = open( self.filename, 'r', BUFFER_SIZE )
+    def close(self):
+        if self.file :
+            self.file.close()
+            self.file = None
+    def parse(self):
+        line_no = 0
+        for line in self.file.xreadlines() :
+            line_no += 1
+            if not line or line[0]=='#' : continue
+            line = line.lstrip().rstrip('\r\n')
+            if not line or line[0]=='#' : continue
+            try:
+                x = self.parse_line( line )
+                if x is None :
+                    print >>sys.stderr, '#[%04d]#'%line_no, line
+                    continue
+            except:
+                print >>sys.stderr, '###[%04d]###'%line_no, line
+                raise
+            self.total_tests += 1
+            level, offset, mtype, test, message = x
+            new_test = MagicTest(offset,mtype,test,message,
+                    line=line_no,level=level)
+            try:
+                if level == 0 :
+                    self.tests.append( new_test )
+                else:
+                    self.tests[-1].add( new_test )
+            except:
+                if 1 :
+                    print >>sys.stderr, 'total tests=%s' % (
+                            `self.total_tests`,
+                    )
+                    print >>sys.stderr, 'level=%s' % (
+                            `level`,
+                    )
+                    print >>sys.stderr, 'tests=%s' % (
+                            pprint.pformat(self.tests),
+                    )
+                raise
+        else:
+            while self.tests[-1].level > 0 :
+                self.tests.pop()
+    def parse_line(self,line):
+        # print >>sys.stderr, 'line=[%s]' % line
+        if (not line) or line[0]=='#' : return None
+        level = 0
+        offset = mtype = test = message = ''
+        mask = None
+        # get optional level (count leading '>')
+        while line and line[0]=='>' :
+            line, level = line[1:], level+1
+        # get offset
+        while line and not line[0].isspace() :
+            offset, line = offset+line[0], line[1:]
+        try:
+            offset = Offset(offset)
+        except:
+            print >>sys.stderr, 'line=[%s]' % line
+            raise
+        # skip spaces
+        line = line.lstrip()
+        # get type
+        c = None
+        while line :
+            last_c, c, line = c, line[0], line[1:]
+            if last_c!='\\' and c.isspace() :
+                break # unescaped space - end of field
+            else:
+                mtype += c
+                if last_c == '\\' :
+                    c = None # don't fuck my brain with sequential backslashes
+        # skip spaces
+        line = line.lstrip()
+        # get test
+        c = None
+        while line :
+            last_c, c, line = c, line[0], line[1:]
+            if last_c!='\\' and c.isspace() :
+                break # unescaped space - end of field
+            else:
+                test += c
+                if last_c == '\\' :
+                    c = None # don't fuck my brain with sequential backslashes
+        # skip spaces
+        line = line.lstrip()
+        # get message
+        message = line
+        if mime and line.find("\t") != -1:
+            message=line[0:line.find("\t")]
+        #
+        # print '>>', level, offset, mtype, test, message
+        return level, offset, mtype, test, message
+    def detect(self,file):
+        self.ack_tests = 0
+        self.nak_tests = 0
+        answers = []
+        for test in self.tests :
+            message = test.run( file )
+            if message :
+                self.ack_tests += 1
+                answers.append( message )
+            else:
+                self.nak_tests += 1
+        if answers :
+            return '; '.join( answers )
+#end class MagicFile
+
+def username(uid):
+    try:
+        return pwd.getpwuid( uid )[0]
+    except:
+        return '#%s'%uid
+
+def groupname(gid):
+    try:
+        return grp.getgrgid( gid )[0]
+    except:
+        return '#%s'%gid
+
+def get_file_type(fname,follow):
+    t = None
+    if not follow :
+        try:
+            st = os.lstat( fname ) # stat that entry, don't follow links!
+        except os.error, why :
+            pass
+        else:
+            if stat.S_ISLNK(st[stat.ST_MODE]) :
+                t = 'symbolic link'
+                try:
+                    lnk = os.readlink( fname )
+                except:
+                    t += ' (unreadable)'
+                else:
+                    t += ' to '+lnk
+    if t is None :
+        try:
+            st = os.stat( fname )
+        except os.error, why :
+            return "can't stat `%s' (%s)." % (why.filename,why.strerror)
+
+    dmaj, dmin = (st.st_rdev>>8)&0x0FF, st.st_rdev&0x0FF
+
+    if 0 : pass
+    elif stat.S_ISSOCK(st.st_mode) : t = 'socket'
+    elif stat.S_ISLNK (st.st_mode) : t = follow and 'symbolic link' or t
+    elif stat.S_ISREG (st.st_mode) : t = 'file'
+    elif stat.S_ISBLK (st.st_mode) : t = 'block special (%d/%d)'%(dmaj,dmin)
+    elif stat.S_ISDIR (st.st_mode) : t = 'directory'
+    elif stat.S_ISCHR (st.st_mode) : t = 'character special (%d/%d)'%(dmaj,dmin)
+    elif stat.S_ISFIFO(st.st_mode) : t = 'pipe'
+    else: t = '<unknown>'
+
+    if st.st_mode & stat.S_ISUID :
+        t = 'setuid(%d=%s) %s'%(st.st_uid,username(st.st_uid),t)
+    if st.st_mode & stat.S_ISGID :
+        t = 'setgid(%d=%s) %s'%(st.st_gid,groupname(st.st_gid),t)
+    if st.st_mode & stat.S_ISVTX :
+        t = 'sticky '+t
+
+    return t
+
+HELP = '''%s [options] [files...]
+
+Options:
+
+        -?, --help -- this help
+        -m, --magic=<file> -- use this magic <file> instead of %s
+        -f, --files=<namefile> -- read filenames for <namefile>
+*       -C, --compile -- write "compiled" magic file
+        -b, --brief -- don't prepend filenames to output lines
++       -c, --check -- check the magic file
+        -i, --mime -- output MIME types
+*       -k, --keep-going -- don't stop st the first match
+        -n, --flush -- flush stdout after each line
+        -v, --verson -- print version and exit
+*       -z, --compressed -- try to look inside compressed files
+        -L, --follow -- follow symlinks
+        -s, --special -- don't skip special files
+
+*       -- not implemented so far ;-)
++       -- implemented, but in another way...
+'''
+
+def main():
+    import getopt
+    global _magic
+    try:
+        brief = 0
+        flush = 0
+        follow= 0
+        mime  = 0
+        check = 0
+        special=0
+        try:
+            opts, args = getopt.getopt(
+                    sys.argv[1:],
+                    '?m:f:CbciknvzLs',
+                    (       'help',
+                            'magic=',
+                            'names=',
+                            'compile',
+                            'brief',
+                            'check',
+                            'mime',
+                            'keep-going',
+                            'flush',
+                            'version',
+                            'compressed',
+                            'follow',
+                            'special',
+                    )
+            )
+        except getopt.error, why:
+            print >>sys.stderr, sys.argv[0], why
+            return 1
+        else:
+            files = None
+            for o,v in opts :
+                if o in ('-?','--help'):
+                    print HELP % (
+                            sys.argv[0],
+                            _magic,
+                    )
+                    return 0
+                elif o in ('-f','--files='):
+                    files = v
+                elif o in ('-m','--magic='):
+                    _magic = v[:]
+                elif o in ('-C','--compile'):
+                    pass
+                elif o in ('-b','--brief'):
+                    brief = 1
+                elif o in ('-c','--check'):
+                    check = 1
+                elif o in ('-i','--mime'):
+                    mime = 1
+                    if os.path.exists( _magic+'.mime' ) :
+                        _magic += '.mime'
+                        print >>sys.stderr,sys.argv[0]+':',\
+                                                        "Using regular magic file `%s'" % _magic
+                elif o in ('-k','--keep-going'):
+                    pass
+                elif o in ('-n','--flush'):
+                    flush = 1
+                elif o in ('-v','--version'):
+                    print 'VERSION'
+                    return 0
+                elif o in ('-z','--compressed'):
+                    pass
+                elif o in ('-L','--follow'):
+                    follow = 1
+                elif o in ('-s','--special'):
+                    special = 1
+            else:
+                if files :
+                    files = map(lambda x: x.strip(), v.split(','))
+                    if '-' in files and '-' in args :
+                        error( 1, 'cannot use STDIN simultaneously for file list and data' )
+                    for file in files :
+                        for name in (
+                                        (file=='-')
+                                                and sys.stdin
+                                                or open(file,'r',BUFFER_SIZE)
+                        ).xreadlines():
+                            name = name.strip()
+                            if name not in args :
+                                args.append( name )
+        try:
+            if check : print >>sys.stderr, 'Loading magic database...'
+            t0 = time.time()
+            m = MagicFile(_magic)
+            t1 = time.time()
+            if check :
+                print >>sys.stderr, \
+                                        m.total_tests, 'tests loaded', \
+                                        'for', '%.2f' % (t1-t0), 'seconds'
+                print >>sys.stderr, len(m.tests), 'tests at top level'
+                return 0 # XXX "shortened" form ;-)
+
+            mlen = max( map(len, args) )+1
+            for arg in args :
+                if not brief : print (arg + ':').ljust(mlen),
+                ftype = get_file_type( arg, follow )
+                if (special and ftype.find('special')>=0) \
+                                or ftype[-4:] == 'file' :
+                    t0 = time.time()
+                    try:
+                        t = m.detect( arg )
+                    except (IOError,os.error), why:
+                        t = "can't read `%s' (%s)" % (why.filename,why.strerror)
+                    if ftype[-4:] == 'file' : t = ftype[:-4] + t
+                    t1 = time.time()
+                    print t and t or 'data'
+                    if 0 : print \
+                                                        '#\t%d tests ok, %d tests failed for %.2f seconds'%\
+                                                        (m.ack_tests, m.nak_tests, t1-t0)
+                else:
+                    print mime and 'application/x-not-regular-file' or ftype
+                if flush : sys.stdout.flush()
+        # print >>sys.stderr, 'DONE'
+        except:
+            if check : return 1
+            raise
+        else:
+            return 0
+    finally:
+        pass
+
+if __name__ == '__main__' :
+    sys.exit( main() )
+# vim:ai
+# EOF #


Property changes on: z3c.filetype/trunk/src/z3c/filetype/magic.py
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native

Added: z3c.filetype/trunk/src/z3c/filetype/magic.txt
===================================================================
--- z3c.filetype/trunk/src/z3c/filetype/magic.txt	2006-08-11 12:55:34 UTC (rev 69400)
+++ z3c.filetype/trunk/src/z3c/filetype/magic.txt	2006-08-11 13:56:21 UTC (rev 69401)
@@ -0,0 +1,27 @@
+=======
+ Magic
+=======
+
+Extract filetype from file content
+
+  >>> from z3c.filetype import magic
+  >>> import os
+  >>> m = magic.MagicFile()
+  >>> testData = os.path.join(os.path.dirname(magic.__file__),'testdata')
+  >>> fileNames = sorted(os.listdir(testData))
+  >>> for name in fileNames:
+  ...     path = os.path.join(testData, name)
+  ...     print name, m.detect(file(path))
+  DS_Store None
+  jumps.mov video/quicktime ; video/quicktime 
+  logo.gif image/gif
+  test.flv video/x-flv 
+  test.gnutar application/x-tar ; application/x-tar 
+  test.html text/html 
+  test.png image/png 
+  test.tar application/x-tar ; application/x-tar 
+  test.tgz application/x-gzip 
+  test.txt.gz application/x-gzip 
+  test2.html text/html 
+  test2.thml text/html 
+  thumbnailImage_small.jpeg image/jpeg 


Property changes on: z3c.filetype/trunk/src/z3c/filetype/magic.txt
___________________________________________________________________
Name: svn:eol-style
   + native

Added: z3c.filetype/trunk/src/z3c/filetype/testdata/DS_Store
===================================================================
(Binary files differ)


Property changes on: z3c.filetype/trunk/src/z3c/filetype/testdata/DS_Store
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: z3c.filetype/trunk/src/z3c/filetype/testdata/jumps.mov
===================================================================
(Binary files differ)


Property changes on: z3c.filetype/trunk/src/z3c/filetype/testdata/jumps.mov
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: z3c.filetype/trunk/src/z3c/filetype/testdata/logo.gif
===================================================================
(Binary files differ)


Property changes on: z3c.filetype/trunk/src/z3c/filetype/testdata/logo.gif
___________________________________________________________________
Name: svn:mime-type
   + image/gif

Added: z3c.filetype/trunk/src/z3c/filetype/testdata/test.flv
===================================================================
(Binary files differ)


Property changes on: z3c.filetype/trunk/src/z3c/filetype/testdata/test.flv
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: z3c.filetype/trunk/src/z3c/filetype/testdata/test.gnutar
===================================================================
(Binary files differ)


Property changes on: z3c.filetype/trunk/src/z3c/filetype/testdata/test.gnutar
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: z3c.filetype/trunk/src/z3c/filetype/testdata/test.html
===================================================================
--- z3c.filetype/trunk/src/z3c/filetype/testdata/test.html	2006-08-11 12:55:34 UTC (rev 69400)
+++ z3c.filetype/trunk/src/z3c/filetype/testdata/test.html	2006-08-11 13:56:21 UTC (rev 69401)
@@ -0,0 +1,8 @@
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<title>Title</title>
+
+<body>
+ body
+</body>
+</html>


Property changes on: z3c.filetype/trunk/src/z3c/filetype/testdata/test.html
___________________________________________________________________
Name: svn:eol-style
   + native

Added: z3c.filetype/trunk/src/z3c/filetype/testdata/test.png
===================================================================
(Binary files differ)


Property changes on: z3c.filetype/trunk/src/z3c/filetype/testdata/test.png
___________________________________________________________________
Name: svn:mime-type
   + image/png

Added: z3c.filetype/trunk/src/z3c/filetype/testdata/test.tar
===================================================================
(Binary files differ)


Property changes on: z3c.filetype/trunk/src/z3c/filetype/testdata/test.tar
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: z3c.filetype/trunk/src/z3c/filetype/testdata/test.tgz
===================================================================
(Binary files differ)


Property changes on: z3c.filetype/trunk/src/z3c/filetype/testdata/test.tgz
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: z3c.filetype/trunk/src/z3c/filetype/testdata/test.txt.gz
===================================================================
(Binary files differ)


Property changes on: z3c.filetype/trunk/src/z3c/filetype/testdata/test.txt.gz
___________________________________________________________________
Name: svn:mime-type
   + application/gzip

Added: z3c.filetype/trunk/src/z3c/filetype/testdata/test2.html
===================================================================
--- z3c.filetype/trunk/src/z3c/filetype/testdata/test2.html	2006-08-11 12:55:34 UTC (rev 69400)
+++ z3c.filetype/trunk/src/z3c/filetype/testdata/test2.html	2006-08-11 13:56:21 UTC (rev 69401)
@@ -0,0 +1,9 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<title>Title</title>
+
+<body>
+ body
+</body>
+</html>


Property changes on: z3c.filetype/trunk/src/z3c/filetype/testdata/test2.html
___________________________________________________________________
Name: svn:eol-style
   + native

Added: z3c.filetype/trunk/src/z3c/filetype/testdata/test2.thml
===================================================================
--- z3c.filetype/trunk/src/z3c/filetype/testdata/test2.thml	2006-08-11 12:55:34 UTC (rev 69400)
+++ z3c.filetype/trunk/src/z3c/filetype/testdata/test2.thml	2006-08-11 13:56:21 UTC (rev 69401)
@@ -0,0 +1,8 @@
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<title>Title</title>
+
+<body>
+ body
+</body>
+</html>

Added: z3c.filetype/trunk/src/z3c/filetype/testdata/thumbnailImage_small.jpeg
===================================================================
(Binary files differ)


Property changes on: z3c.filetype/trunk/src/z3c/filetype/testdata/thumbnailImage_small.jpeg
___________________________________________________________________
Name: svn:mime-type
   + image/jpeg

Added: z3c.filetype/trunk/src/z3c/filetype/tests.py
===================================================================
--- z3c.filetype/trunk/src/z3c/filetype/tests.py	2006-08-11 12:55:34 UTC (rev 69400)
+++ z3c.filetype/trunk/src/z3c/filetype/tests.py	2006-08-11 13:56:21 UTC (rev 69401)
@@ -0,0 +1,22 @@
+import doctest
+import unittest
+from zope.testing.doctestunit import DocFileSuite, DocTestSuite
+
+def test_suite():
+    
+    return unittest.TestSuite(
+        (
+        DocFileSuite('README.txt',
+                     optionflags=doctest.NORMALIZE_WHITESPACE|doctest.ELLIPSIS,
+                     ),
+        DocFileSuite('magic.txt',
+                     optionflags=doctest.NORMALIZE_WHITESPACE|doctest.ELLIPSIS,
+                     ),
+        DocTestSuite('z3c.filetype.api',
+                     optionflags=doctest.NORMALIZE_WHITESPACE|doctest.ELLIPSIS,
+                     ),
+        ))
+
+if __name__ == '__main__':
+    unittest.main(defaultTest='test_suite')
+


Property changes on: z3c.filetype/trunk/src/z3c/filetype/tests.py
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native



More information about the Checkins mailing list