# -*- coding: utf-8 -*- # # Copyright © 2012 - 2016 Michal Čihař <michal@cihar.com> # # This file is part of Weblate <https://weblate.org/> # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # from xml.etree import cElementTree import re from django.utils.translation import ugettext_lazy as _ from weblate.trans.checks.base import TargetCheck BBCODE_MATCH = re.compile( r'(?P<start>\[(?P<tag>[^]]+)(@[^]]*)?\])(.*?)(?P<end>\[\/(?P=tag)\])', re.MULTILINE ) XML_MATCH = re.compile(r'<[^>]+>') XML_ENTITY_MATCH = re.compile(r'&#?\w+;') def strip_entities(text): ''' Strips all HTML entities (we don't care about them). ''' return XML_ENTITY_MATCH.sub('', text) class BBCodeCheck(TargetCheck): ''' Check for matching bbcode tags. ''' check_id = 'bbcode' name = _('Mismatched BBcode') description = _('BBcode in translation does not match source') severity = 'warning' def check_single(self, source, target, unit): # Parse source src_match = BBCODE_MATCH.findall(source) # Any BBCode in source? if len(src_match) == 0: return False # Parse target tgt_match = BBCODE_MATCH.findall(target) if len(src_match) != len(tgt_match): return True src_tags = set([x[1] for x in src_match]) tgt_tags = set([x[1] for x in tgt_match]) return src_tags != tgt_tags def check_highlight(self, source, unit): if self.should_skip(unit): return [] ret = [] for match in BBCODE_MATCH.finditer(source): ret.append((match.start('start'), match.group('start'))) ret.append((match.start('end'), match.group('end'))) return ret class XMLTagsCheck(TargetCheck): ''' Check whether XML in target matches source. ''' check_id = 'xml-tags' name = _('XML tags mismatch') description = _('XML tags in translation do not match source') severity = 'warning' def parse_xml(self, text): ''' Wrapper for parsing XML. ''' text = strip_entities(text.encode('utf-8')) return cElementTree.fromstring('<weblate>%s</weblate>' % text) def check_single(self, source, target, unit): # Quick check if source looks like XML if '<' not in source or len(XML_MATCH.findall(source)) == 0: return False # Check if source is XML try: source_tree = self.parse_xml(source) source_tags = [x.tag for x in source_tree] except SyntaxError: # Source is not valid XML, we give up return False # Check target try: target_tree = self.parse_xml(target) target_tags = [x.tag for x in target_tree] except SyntaxError: # Target is not valid XML return True # Compare tags return source_tags != target_tags def check_highlight(self, source, unit): ret = [] for match in XML_MATCH.finditer(source): ret.append((match.start(), match.group())) for match in XML_ENTITY_MATCH.finditer(source): ret.append((match.start(), match.group())) return ret