# -*- coding: utf-8 -*-
#
# Copyright © 2012 - 2016 Michal Čihař <michal@cihar.com>
#
# This file is part of Weblate <https://weblate.org/>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

from xml.etree import cElementTree
import re

from django.utils.translation import ugettext_lazy as _

from weblate.trans.checks.base import TargetCheck

BBCODE_MATCH = re.compile(
    r'(?P<start>\[(?P<tag>[^]]+)(@[^]]*)?\])(.*?)(?P<end>\[\/(?P=tag)\])',
    re.MULTILINE
)

XML_MATCH = re.compile(r'<[^>]+>')
XML_ENTITY_MATCH = re.compile(r'&#?\w+;')


def strip_entities(text):
    '''
    Strips all HTML entities (we don't care about them).
    '''
    return XML_ENTITY_MATCH.sub('', text)


class BBCodeCheck(TargetCheck):
    '''
    Check for matching bbcode tags.
    '''
    check_id = 'bbcode'
    name = _('Mismatched BBcode')
    description = _('BBcode in translation does not match source')
    severity = 'warning'

    def check_single(self, source, target, unit):
        # Parse source
        src_match = BBCODE_MATCH.findall(source)
        # Any BBCode in source?
        if len(src_match) == 0:
            return False
        # Parse target
        tgt_match = BBCODE_MATCH.findall(target)
        if len(src_match) != len(tgt_match):
            return True

        src_tags = set([x[1] for x in src_match])
        tgt_tags = set([x[1] for x in tgt_match])

        return src_tags != tgt_tags

    def check_highlight(self, source, unit):
        if self.should_skip(unit):
            return []
        ret = []
        for match in BBCODE_MATCH.finditer(source):
            ret.append((match.start('start'), match.group('start')))
            ret.append((match.start('end'), match.group('end')))
        return ret


class XMLTagsCheck(TargetCheck):
    '''
    Check whether XML in target matches source.
    '''
    check_id = 'xml-tags'
    name = _('XML tags mismatch')
    description = _('XML tags in translation do not match source')
    severity = 'warning'

    def parse_xml(self, text):
        '''
        Wrapper for parsing XML.
        '''
        text = strip_entities(text.encode('utf-8'))
        return cElementTree.fromstring('<weblate>%s</weblate>' % text)

    def check_single(self, source, target, unit):
        # Quick check if source looks like XML
        if '<' not in source or len(XML_MATCH.findall(source)) == 0:
            return False
        # Check if source is XML
        try:
            source_tree = self.parse_xml(source)
            source_tags = [x.tag for x in source_tree]
        except SyntaxError:
            # Source is not valid XML, we give up
            return False

        # Check target
        try:
            target_tree = self.parse_xml(target)
            target_tags = [x.tag for x in target_tree]
        except SyntaxError:
            # Target is not valid XML
            return True

        # Compare tags
        return source_tags != target_tags

    def check_highlight(self, source, unit):
        ret = []
        for match in XML_MATCH.finditer(source):
            ret.append((match.start(), match.group()))
        for match in XML_ENTITY_MATCH.finditer(source):
            ret.append((match.start(), match.group()))
        return ret