Commit b70be343 authored by Emmy Vouriot's avatar Emmy Vouriot Committed by Jérome Perrin

bytes to str when reading html or convert to text WIP

parent a2346a3c
...@@ -194,7 +194,7 @@ class PDFDocument(Image): ...@@ -194,7 +194,7 @@ class PDFDocument(Image):
context=self, filename=filename, context=self, filename=filename,
mimetype=self.getContentType()) mimetype=self.getContentType())
if result: if result:
return result return bytes2str(result)
else: else:
# Try to use OCR from ghostscript, but tolerate that the command might # Try to use OCR from ghostscript, but tolerate that the command might
# not be available. # not be available.
...@@ -282,7 +282,7 @@ class PDFDocument(Image): ...@@ -282,7 +282,7 @@ class PDFDocument(Image):
command = ['pdftohtml', '-enc', 'UTF-8', '-stdout', command = ['pdftohtml', '-enc', 'UTF-8', '-stdout',
'-noframes', '-i', tmp.name] '-noframes', '-i', tmp.name]
try: try:
command_result = Popen(command, stdout=PIPE).communicate()[0] command_result = bytes2str(Popen(command, stdout=PIPE).communicate()[0])
except OSError as e: except OSError as e:
if e.errno == errno.ENOENT: if e.errno == errno.ENOENT:
raise ConversionError('pdftohtml was not found') raise ConversionError('pdftohtml was not found')
...@@ -291,10 +291,10 @@ class PDFDocument(Image): ...@@ -291,10 +291,10 @@ class PDFDocument(Image):
finally: finally:
tmp.close() tmp.close()
# Quick hack to remove bg color - XXX # Quick hack to remove bg color - XXX
h = command_result.replace(b'<BODY bgcolor="#A0A0A0"', b'<BODY ') h = command_result.replace('<BODY bgcolor="#A0A0A0"', '<BODY ')
# Make links relative # Make links relative
h = h.replace(str2bytes('href="%s.html' % tmp.name.split(os.sep)[-1]), h = h.replace('href="%s.html' % tmp.name.split(os.sep)[-1],
b'href="asEntireHTML') 'href="asEntireHTML')
return h return h
security.declarePrivate('_convertToDJVU') security.declarePrivate('_convertToDJVU')
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment