[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters - __init__.py:1.1.2.6 doc.py:1.1.2.4 html.py:1.1.2.3 null.py:1.1.2.4 pdf.py:1.1.2.4 ppt.py:1.1.2.3 ps.py:1.1.2.3

Andreas Jung andreas@digicool.com
Wed, 20 Mar 2002 18:15:56 -0500


Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters
In directory cvs.zope.org:/tmp/cvs-serv14765/converters

Modified Files:
      Tag: ajung-textindexng-branch
	__init__.py doc.py html.py null.py pdf.py ppt.py ps.py 
Log Message:
Moved more logic into Converter base class to make converters more smart

=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters/__init__.py 1.1.2.5 => 1.1.2.6 ===
     cv = cv[:-3]
     mod = __import__(cv, globals(), globals(), __path__)
-    converter = mod.Converter()
-    for t in converter.getType():
-        _converters[t] = converter
+
+    try:
+        converter = mod.Converter()
+        for t in converter.getType():
+            _converters[t] = converter
+    except:
+        continue
 
 del converters


=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters/doc.py 1.1.2.3 => 1.1.2.4 ===
 # $Id$
 
-import tempfile, os
+import os
 from Globals import package_home
 from Products.PluginIndexes.TextIndexNG.BaseConverter import BaseConverter
 
@@ -17,10 +17,8 @@
     def convert(doc):
         """Convert WinWord document to raw text"""
         
-        tmp_name = tempfile.mktemp()
-        open(tmp_name,'w').write(doc)
+        tmp_name = self.saveFile(doc)
         text = self.execute('wvWare -x %s %s 2> /dev/null' % (wvConf_file, tmp_name))
-        os.remove(tmp_name)
         
         return text
 


=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters/html.py 1.1.2.2 => 1.1.2.3 ===
 # $Id$
 
-from sgmllib import SGMLParser
 from types import StringType
 from Products.PluginIndexes.TextIndexNG.BaseConverter import BaseConverter
-
-
-# Code taken from Dieter Maurer's CatalogSupport Module
-# http://www.handshake.de/~dieter/pyprojects/zope
-# Thank you!
-
-class _StripTagParser(SGMLParser):
-  '''SGML Parser removing any tags and translating HTML entities.'''
-
-  from htmlentitydefs import entitydefs
-
-  data= None
-
-  def handle_data(self,data):
-    if self.data is None: self.data=[]
-    self.data.append(data)
-
-  def __str__(self):
-    if self.data is None: return ''
-    return ''.join(self.data)
-
+from StripTagParser import StripTagParser
 
 class Converter(BaseConverter):
 
@@ -35,7 +14,7 @@
     def convert(self, s):
         """Convert html data to raw text"""
 
-        p = _StripTagParser()
+        p = StripTagParser()
         p.feed(s)
         p.close()
 


=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters/null.py 1.1.2.3 => 1.1.2.4 ===
     def convert(self, s):
         return s
-
-
-
-


=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters/pdf.py 1.1.2.3 => 1.1.2.4 ===
 # $Id$
 
-import tempfile
-from os import popen, remove
 from Products.PluginIndexes.TextIndexNG.BaseConverter import BaseConverter
 
 class Converter(BaseConverter):
@@ -15,10 +13,8 @@
     def convert(doc):
         """Convert pdf data to raw text"""
         
-        tmp_name = tempfile.mktemp()
-        open(tmp_name,'w').write(doc)
+        tmp_name = self.saveFile(doc)
         text = self.execute('pdftotext %s -' % tmp_name)
-        remove(tmp_name)
         
         return text
 


=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters/ppt.py 1.1.2.2 => 1.1.2.3 ===
 # $Id$
 
-import tempfile, os
-from sgmllib import SGMLParser
+import os
 from Globals import package_home
 from Products.PluginIndexes.TextIndexNG.BaseConverter import BaseConverter
+from StripTagParser import StripTagParser
 
 wvConf_file = os.path.join(package_home(globals()), 'wvText.xml')
 
-class _StripTagParser(SGMLParser):
-  '''SGML Parser removing any tags and translating HTML entities.'''
- 
-  from htmlentitydefs import entitydefs
- 
-  data= None
- 
-  def handle_data(self,data):
-    if self.data is None: self.data=[]
-    self.data.append(data)
- 
-  def __str__(self):
-    if self.data is None: return ''
-    return ''.join(self.data)
-
 
 class Converter(BaseConverter):
 
@@ -35,12 +20,10 @@
     def convert(doc):
         """Convert PowerPoint document to raw text"""
         
-        tmp_name = tempfile.mktemp()
-        open(tmp_name,'w').write(doc)
+        tmp_name = self.saveFile(doc)
         text = self.execute('pptHtml %s 2> /dev/null' % tmp_name)
-        os.remove(tmp_name)
 
-        p = _StripTagParser()
+        p = StripTagParser()
      
         p.feed(text)
         p.close()


=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters/ps.py 1.1.2.2 => 1.1.2.3 ===
 # $Id$
 
-import tempfile
-from os import popen, remove
 from Products.PluginIndexes.TextIndexNG.BaseConverter import BaseConverter
 
 class Converter(BaseConverter):
@@ -15,10 +13,8 @@
     def convert(doc):
         """Convert postscript data to raw text"""
         
-        tmp_name = tempfile.mktemp()
-        open(tmp_name,'w').write(doc)
+        tmp_name = self.saveFile(doc)
         text = self.execute('ps2ascii %s -' % tmp_name)
-        remove(tmp_name)
         
         return text