Commit 2e109f94 authored by Aurel's avatar Aurel Committed by Kazuhiko Shiozaki

use new pdf lib

parent a561f0f1
...@@ -81,33 +81,37 @@ class PDFDocument(Image): ...@@ -81,33 +81,37 @@ class PDFDocument(Image):
* Watermark is applied at all pages starting watermark_start_page (this * Watermark is applied at all pages starting watermark_start_page (this
index is 0 based) index is 0 based)
""" """
from pyPdf import PdfFileWriter, PdfFileReader try:
if not watermark_data: from PyPDF2 import PdfFileWriter, PdfFileReader
raise ValueError("watermark_data cannot not be empty") except ImportError:
if not self.hasData(): pass
raise ValueError("Cannot watermark an empty document") else:
self_reader = PdfFileReader(StringIO(self.getData())) if not watermark_data:
watermark_reader = PdfFileReader(StringIO(watermark_data)) raise ValueError("watermark_data cannot not be empty")
watermark_page_count = watermark_reader.getNumPages() if not self.hasData():
raise ValueError("Cannot watermark an empty document")
self_reader = PdfFileReader(StringIO(self.getData()))
watermark_reader = PdfFileReader(StringIO(watermark_data))
watermark_page_count = watermark_reader.getNumPages()
output = PdfFileWriter() output = PdfFileWriter()
for page_number in range(self_reader.getNumPages()): for page_number in range(self_reader.getNumPages()):
self_page = self_reader.getPage(page_number) self_page = self_reader.getPage(page_number)
watermark_page = None watermark_page = None
if page_number >= watermark_start_page: if page_number >= watermark_start_page:
if repeat_watermark: if repeat_watermark:
watermark_page = watermark_reader.getPage( watermark_page = watermark_reader.getPage(
(page_number - watermark_start_page) % watermark_page_count) (page_number - watermark_start_page) % watermark_page_count)
elif page_number < (watermark_page_count + watermark_start_page): elif page_number < (watermark_page_count + watermark_start_page):
watermark_page = watermark_reader.getPage(page_number - watermark_start_page) watermark_page = watermark_reader.getPage(page_number - watermark_start_page)
if watermark_page is not None: if watermark_page is not None:
self_page.mergePage(watermark_page) self_page.mergePage(watermark_page)
output.addPage(self_page) output.addPage(self_page)
outputStream = StringIO() outputStream = StringIO()
output.write(outputStream) output.write(outputStream)
return outputStream.getvalue() return outputStream.getvalue()
# Conversion API # Conversion API
def _convert(self, format, **kw): def _convert(self, format, **kw):
...@@ -301,12 +305,12 @@ class PDFDocument(Image): ...@@ -301,12 +305,12 @@ class PDFDocument(Image):
value = ':'.join(item_list[1:]).strip() value = ':'.join(item_list[1:]).strip()
result[key] = value result[key] = value
# Then we use pyPdf to get extra metadata # Then we use PyPDF2 to get extra metadata
try: try:
from pyPdf import PdfFileReader from PyPDF2 import PdfFileReader
from pyPdf.utils import PdfReadError from PyPDF2.utils import PdfReadError
except ImportError: except ImportError:
# if pyPdf not found, pass # if PyPDF2 not found, pass
pass pass
else: else:
try: try:
...@@ -327,7 +331,7 @@ class PDFDocument(Image): ...@@ -327,7 +331,7 @@ class PDFDocument(Image):
result.setdefault(info_key, info_value) result.setdefault(info_key, info_value)
except PdfReadError: except PdfReadError:
LOG("PDFDocument.getContentInformation", PROBLEM, LOG("PDFDocument.getContentInformation", PROBLEM,
"pyPdf is Unable to read PDF, probably corrupted PDF here : %s" % \ "PyPDF2 is Unable to read PDF, probably corrupted PDF here : %s" % \
(self.getRelativeUrl(),)) (self.getRelativeUrl(),))
finally: finally:
tmp.close() tmp.close()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment