Commit e37accfa authored by Kazuhiko Shiozaki's avatar Kazuhiko Shiozaki

since XML may contain encoding, changing encoding only is not good. so we try...

since XML may contain encoding, changing encoding only is not good. so we try to parse and reoutput utf-8 XML with a valid encoding information for XML.


git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@41898 20353a03-c40f-0410-a6d1-a30d3c3de9de
parent 461e716a
......@@ -47,6 +47,7 @@ except ImportError:
from Products.ERP5Type.Utils import guessEncodingFromText
from lxml import html as etree_html
from lxml import etree
class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin,
TextContent, File):
......@@ -306,7 +307,15 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin,
content_type = self.getContentType() or DEFAULT_CONTENT_TYPE
text_content = self.getData()
if content_type == 'text/html':
if content_type.endswith('xml'):
try:
tree = etree.fromstring(text_content)
text_content = etree.tostring(tree, encoding='utf-8', xml_declaration=True)
content_type = 'application/xml'
message = 'Conversion to base format succeeds'
except etree.XMLSyntaxError:
message = 'Conversion to base format without codec fails'
elif content_type == 'text/html':
re_match = self.charset_parser.search(text_content)
message = 'Conversion to base format succeeds'
if re_match is not None:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment