utils.py 3.93 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
##############################################################################
#
# Copyright (c) 2007 Nexedi SA and Contributors. All Rights Reserved.
#          Jerome Perrin <jerome@nexedi.com>
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsability of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# garantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################

"""This module provides utilities for testing ODF files.

Validator: a class defining a `validate` method that expects odf file content
as first argument and returns list of errors.

"""
35
from __future__ import print_function
36 37 38 39 40

import os
import sys
import tempfile
import zipfile
41
import subprocess
42 43
from six.moves import urllib
from six.moves import cStringIO as StringIO
44 45

try:
46
  import lxml
47
except ImportError:
48
  lxml = None
49 50 51 52 53
try:
  import odfpy
except ImportError:
  odfpy = None

54 55 56
if lxml:
  class LXMLValidator:
    """Validate ODF document using RelaxNG and lxml"""
57
    schema_url = \
58
      'http://docs.oasis-open.org/office/v1.1/OS/OpenDocument-schema-v1.1.rng'
59 60 61 62

    def __init__(self, schema_url=schema_url):
      self.schema_url = schema_url
      self.schema_path = os.path.join(
63 64
        os.path.dirname(__file__), os.path.basename(schema_url))
      self.relaxng =  lxml.etree.RelaxNG(lxml.etree.parse(self.schema_path))
65 66 67 68 69 70 71 72 73 74

    def validate(self, odf_file_content):
      error_list = []
      odf_file = StringIO(odf_file_content)
      for f in ('content.xml', 'meta.xml', 'styles.xml', 'settings.xml'):
        error_list.extend(self._validateXML(odf_file, f))
      return error_list

    def _validateXML(self, odf_file, content_file_name):
      zfd = zipfile.ZipFile(odf_file)
75
      doc = lxml.etree.parse(StringIO(zfd.read(content_file_name)))
76 77 78 79
      return []
      # The following is the past implementation that validates with
      # RelaxNG schema. But recent LibreOffice uses extended odf
      # format by default, that does not pass the RelaxNG validation.
80
      doc.docinfo.URL = content_file_name
81 82
      self.relaxng.validate(doc)
      return [error for error in str(self.relaxng.error_log).splitlines(True)]
83

84
  Validator = LXMLValidator
85 86 87 88 89 90 91 92 93 94 95

elif odfpy:

  class OdflintValidator:
    """Validates ODF files using odflint, available on pypi
    http://opendocumentfellowship.org/development/projects/odfpy
    """
    def validate(self, odf_file_content):
      fd, file_name = tempfile.mkstemp()
      os.write(fd, odf_file_content)
      os.close(fd)
96 97 98 99 100 101
      process = subprocess.Popen(
        ['odflint', file_name],
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
      )
      stdout, _ = process.communicate()
102 103 104 105 106 107 108 109 110 111 112 113 114 115
      error_list = ''
      for line in stdout:
        if line.startswith('Error: '):
          error_list += line
      os.unlink(file_name)
      return error_list

  Validator = OdflintValidator

else:

  class NoValidator:
    """Does not actually validate, but keep the interface."""
    def validate(self, odf_file_content):
116
      print('No validator available', file=sys.stderr)
117 118 119 120

  Validator = NoValidator