Commit e093ecda authored by Romain Courteaud's avatar Romain Courteaud

Add new transformation from png to text using ocropus.

This requires to install Ocropus (http://sites.google.com/site/ocropus/), which
is not yet in ERP5 dependency.


git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@24286 20353a03-c40f-0410-a6d1-a30d3c3de9de
parent 9209e9f7
from Products.PortalTransforms.interfaces import itransform
from StringIO import StringIO
import PIL.Image
from Products.PortalTransforms.libtransforms.commandtransform \
import popentransform
import os
import sys
import tempfile
class png_to_text(popentransform):
__implements__ = itransform
__name__ = "png_to_text"
inputs = ('image/png',)
output = 'text/plain'
output_encoding = 'utf-8'
__version__ = '2008-10-07.01'
binaryName = "ocrocmd"
binaryArgs = "%(infile)s "
useStdin = False
def convert(self, data, cache, **kwargs):
# XXX Surcharge from commandtransform, as ocrocmd do not accept
# parameters but environnement variable.
# Surcharging prevent to put the variable in the zope.conf file
command = "%s %s" % (self.binary, self.binaryArgs)
if not self.useStdin:
tmpfile, tmpname = tempfile.mkstemp(text=False) # create tmp
os.write(tmpfile, data) # write data to tmp using a file descriptor
os.close(tmpfile) # close it so the other process can read it
command = command % { 'infile' : tmpname } # apply tmp name to command
cin, couterr = os.popen4('quiet=1 hocr=0 %s' % command, 'b')
if self.useStdin:
cin.write(str(data))
status = cin.close()
out = self.getData(couterr)
couterr.close()
if not self.useStdin:
# remove tmp file
os.unlink(tmpname)
cache.setData(out)
return cache
def register():
return png_to_text()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment