Commit 57cf5cdf authored by Sebastien Robin's avatar Sebastien Robin

do not raise error if PDF is not valid when searching for metadata

parent 57402c11
...@@ -36,6 +36,7 @@ from Products.ERP5.Document.Image import Image ...@@ -36,6 +36,7 @@ from Products.ERP5.Document.Image import Image
from Products.ERP5.Document.Document import ConversionError,\ from Products.ERP5.Document.Document import ConversionError,\
VALID_TEXT_FORMAT_LIST VALID_TEXT_FORMAT_LIST
from subprocess import Popen, PIPE from subprocess import Popen, PIPE
from zLOG import LOG
import errno import errno
class PDFDocument(Image): class PDFDocument(Image):
...@@ -269,16 +270,22 @@ class PDFDocument(Image): ...@@ -269,16 +270,22 @@ class PDFDocument(Image):
# Then we use pyPdf to get extra metadata # Then we use pyPdf to get extra metadata
try: try:
from pyPdf import PdfFileReader from pyPdf import PdfFileReader
from pyPdf.utils import PdfReadError
except ImportError: except ImportError:
# if pyPdf not found, pass # if pyPdf not found, pass
pass pass
else: else:
pdf_file = PdfFileReader(tmp) try:
for info_key, info_value in pdf_file.getDocumentInfo().iteritems(): pdf_file = PdfFileReader(tmp)
info_key = info_key.lstrip("/") for info_key, info_value in pdf_file.getDocumentInfo().iteritems():
if isinstance(info_value, unicode): info_key = info_key.lstrip("/")
info_value = info_value.encode("utf-8") if isinstance(info_value, unicode):
result.setdefault(info_key, info_value) info_value = info_value.encode("utf-8")
result.setdefault(info_key, info_value)
except PdfReadError:
LOG("PDFDocument.getContentInformation", 0,
"pyPdf is Unable to read PDF, probably corrupted PDF here : %s" % \
(self.getRelativeUrl(),))
finally: finally:
tmp.close() tmp.close()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment