Commit b8678233 authored by Nicolas Delaby's avatar Nicolas Delaby

Workaround a bug in lxml when include_meta_content_type parameter is not honoured.

This patch will be followed by a Ticket addressed to lxml maintainers.

As soon as a clean patch will be released, this commit must be reverted.



git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@45422 20353a03-c40f-0410-a6d1-a30d3c3de9de
parent ea9b5682
......@@ -2,6 +2,7 @@
from Products.PortalTransforms.interfaces import itransform
from zope.interface import implements
from oood_commandtransform import OOOdCommandTransform, OOoDocumentDataStream
from oood_commandtransform import includeMetaContentType
from zLOG import LOG
from lxml import etree, html
from lxml.etree import Element, SubElement
......@@ -32,6 +33,7 @@ class HTMLToOdt:
def convert(self, orig, data, cache=None, filename=None, context=None, **kwargs):
# Try to recover broken HTML documents, specially regarding encoding used
html_node = etree.XML(orig, parser=html_parser)
includeMetaContentType(html_node)
orig = html.tostring(html_node, encoding='utf-8', method='xml',
include_meta_content_type=True)
......
......@@ -14,6 +14,7 @@ import re
from lxml import etree
from lxml import html
from lxml.etree import ParseError, Element
from lxml.etree import SubElement
from urllib import unquote
from urlparse import urlparse
......@@ -29,6 +30,20 @@ from Products.ERP5OOo.Document.OOoDocument import OOoServerProxy
from Products.ERP5OOo.Document.OOoDocument import enc
from Products.ERP5OOo.Document.OOoDocument import dec
def includeMetaContentType(html_node):
"""XXX Temp workaround time to fix issue
in lxml when include_meta_content_type is not honoured
Force encondig into utf-8
"""
head = html_node.find('head')
if head is None:
head = SubElement(html_node, 'head')
meta_content_type_node_list = head.xpath('meta[translate('\
'attribute::http-equiv, "CONTEYP", "conteyp") = "content-type"]')
for meta_content_type_node in meta_content_type_node_list:
head.remove(meta_content_type_node)
SubElement(head, 'meta', **{'http-equiv': 'Content-Type',
'content': 'application/xhtml+xml; charset=utf-8'})
CLEAN_RELATIVE_PATH = re.compile('^../')
......@@ -194,8 +209,11 @@ class OOOdCommandTransform(commandtransform):
parent_node.append(style_node)
style_node.attrib.update({'type': 'text/css'})
parent_node.remove(css_link_tag)
includeMetaContentType(xml_doc)
xml_output = html.tostring(xml_doc, encoding='utf-8', method='xml',
include_meta_content_type=True)
xml_output = xml_output.replace('<title/>', '<title></title>')
return xml_output
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment