Fix handling of unicode in XML in Python 2

Fixes #996 Signed-off-by: Michal Čihař <michal@cihar.com>

Fix handling of unicode in XML in Python 2
Fixes #996 Signed-off-by: Michal Čihař <michal@cihar.com>
d9da8a43 · Michal Čihař · 05d3a102 · d9da8a43 · d9da8a43
Commit d9da8a43 authored Feb 17, 2016 by Michal Čihař
Hide whitespace changes
Inline Side-by-side

Showing with 14 additions and 2 deletions

weblate/trans/checks/markup.py weblate/trans/checks/markup.py +10 -2

weblate/trans/tests/test_markup_checks.py weblate/trans/tests/test_markup_checks.py +4 -0

No files found.
--- a/weblate/trans/checks/markup.py
+++ b/weblate/trans/checks/markup.py
@@ -25,6 +25,8 @@ import re

 from django.utils.translation import ugettext_lazy as _

+import six
+
 from weblate.trans.checks.base import TargetCheck

 BBCODE_MATCH = re.compile(
@@ -95,8 +97,14 @@ class XMLTagsCheck(TargetCheck):
        '''
        Wrapper for parsing XML.
        '''
-        text = strip_entities(text)
-        return cElementTree.fromstring('<weblate>%s</weblate>' % text)
+        text = ''.join(
+            ('<weblate>', strip_entities(text), '</weblate>')
+        )
+
+        if six.PY2:
+            text = text.encode('utf-8')
+
+        return cElementTree.fromstring(text)

    def check_single(self, source, target, unit):
        # Quick check if source looks like XML

--- a/weblate/trans/tests/test_markup_checks.py
+++ b/weblate/trans/tests/test_markup_checks.py
@@ -22,6 +22,7 @@
 Tests for quality checks.
 """

+from __future__ import unicode_literals
 from weblate.trans.checks.markup import (
    BBCodeCheck,
    XMLTagsCheck,
@@ -63,3 +64,6 @@ class XMLTagsCheckTest(CheckTestCase):
                (24, 28, '</b>'),
            ]
        )
+
+    def test_unicode(self):
+        self.do_test(False, ('<a>zkouška</a>', '<a>zkouška</a>', ''))