Merge remote-tracking branch 'origin/master'

8795e17b · Michal Čihař · f552641c · b29bd8d4 · 8795e17b · 8795e17b
Commit 8795e17b authored Apr 02, 2012 by Michal Čihař
6 changed files
--- a/docs/admin.rst
+++ b/docs/admin.rst
@@ -142,3 +142,70 @@ The changes are in this mode committed once one of following conditions happen:
 * merge from upstream occurs
 * import of translation happens
 * translation for a language is completed
+
+.. _custom-checks:
+
+Customizing checks
+------------------
+
+Weblate comes with wide range of consistency checks (see :ref:`checks`), though
+they might not 100% cover all you want to check. The list of performed checks
+can be adjusted using :envvar:`CHECK_LIST` and you can also add custom checks.
+All you need to do is to subclass :class:`trans.checks.Check`, set few
+attributes and implement either ``check`` or ``check_single`` methods (first
+one if you want to deal with plurals in your code, the latter one does this for
+you). You will find below some examples.
+
+Checking translation text does not contain "foo"
++++++++++++++++++++++++++++++++++++++++++++++++
+
+This is pretty simple check which just checks whether translation does not
+contain string "foo".
+
+.. code-block:: python
+
+    from trans.checks import Check
+    from django.utils.translation import ugettext_lazy as _
+
+    class FooCheck(Check):
+
+        # Used as identifier for check, should be unique
+        check_id = 'foo'
+
+        # Short name used to display failing check
+        name = _('Foo check')
+
+        # Description for failing check
+        description = _('Your translation is foo')
+
+        # Real check code
+        def check_single(self, source, target, flags, language, unit):
+            return 'foo' in target
+
+Checking Czech translation text plurals differ
++++++++++++++++++++++++++++++++++++++++++++++
+
+Check using language information to verify that two plural forms in Czech
+language are not same.
+
+.. code-block:: python
+
+    from trans.checks import Check
+    from django.utils.translation import ugettext_lazy as _
+
+    class PluralCzechCheck(Check):
+
+        # Used as identifier for check, should be unique
+        check_id = 'foo'
+
+        # Short name used to display failing check
+        name = _('Foo check')
+
+        # Description for failing check
+        description = _('Your translation is foo')
+
+        # Real check code
+        def check(self, sources, targets, flags, language, unit):
+            if self.is_language(language, ['cs']):
+                return targets[1] == targets[2]
+            return False
--- a/docs/config.rst
+++ b/docs/config.rst
@@ -3,6 +3,12 @@ Configuration

 All settings are stored in :file:`settings.py` (as usual for Django).

+.. envvar:: CHECK_LIST
+
+    List of consistency checks to perform on translation.
+
+    .. seealso:: :ref:`checks`, :ref:`custom-checks`
+
 .. envvar:: COMMIT_MESSAGE

    Message used on each commit Weblate does.

--- a/docs/usage.rst
+++ b/docs/usage.rst
@@ -59,6 +59,13 @@ preferences, translation to these languages will be shown.
 Bellow translation can be also shown suggestions from other users, which you
 can accept or delete.

+Dictionary
+----------
+
+Each project can have assigned dictionary for any language. This could be used
+for storing terminology for given project, so that translations are consistent.
+You can display terms from currently translated string in bottom tabs.
+
 Suggestions
 -----------

@@ -101,6 +108,8 @@ Machine translation service provided by Microsoft.

    http://www.microsofttranslator.com/

+.. _checks:
+
 Checks
 ------


--- a/settings.py
+++ b/settings.py
@@ -263,3 +263,21 @@ LAZY_COMMITS = True

 # Where to put Whoosh index
 WHOOSH_INDEX = os.path.join(WEB_ROOT, 'whoosh-index')
+
+# List of consistency checks
+CHECK_LIST = (
+    'trans.checks.SameCheck',
+    'trans.checks.BeginNewlineCheck',
+    'trans.checks.EndNewlineCheck',
+    'trans.checks.EndSpaceCheck',
+    'trans.checks.EndStopCheck',
+    'trans.checks.EndColonCheck',
+    'trans.checks.EndQuestionCheck',
+    'trans.checks.EndExclamationCheck',
+    'trans.checks.PythonFormatCheck',
+    'trans.checks.PHPFormatCheck',
+    'trans.checks.CFormatCheck',
+    'trans.checks.PluralsCheck',
+    'trans.checks.ConsistencyCheck',
+)
+
--- a/trans/checks.py
+++ b/trans/checks.py
 # -*- coding: UTF-8 -*-
 from django.utils.translation import ugettext_lazy as _
+from django.core.exceptions import ImproperlyConfigured
+from django.conf import settings
 import re

 PYTHON_PRINTF_MATCH = re.compile('''
@@ -85,224 +87,333 @@ SAME_BLACKLIST = frozenset((
    'zip',
    ))

-CHECKS = {}
-
-def plural_check(f):
+DEFAULT_CHECK_LIST = (
+    'trans.checks.SameCheck',
+    'trans.checks.BeginNewlineCheck',
+    'trans.checks.EndNewlineCheck',
+    'trans.checks.EndSpaceCheck',
+    'trans.checks.EndStopCheck',
+    'trans.checks.EndColonCheck',
+    'trans.checks.EndQuestionCheck',
+    'trans.checks.EndExclamationCheck',
+    'trans.checks.PythonFormatCheck',
+    'trans.checks.PHPFormatCheck',
+    'trans.checks.CFormatCheck',
+    'trans.checks.PluralsCheck',
+    'trans.checks.ConsistencyCheck',
+)
+
+class Check(object):
    '''
-    Generic decorator for working with plural translations.
+    Basic class for checks.
    '''
-    def _plural_check(sources, targets, flags, language, unit):
-        if f(sources[0], targets[0], flags, language, unit):
+    check_id = ''
+    name = ''
+    description = ''
+
+    def check(self, sources, targets, flags, language, unit):
+        '''
+        Checks single unit, handling plurals.
+        '''
+        # Check singular
+        if self.check_single(sources[0], targets[0], flags, language, unit):
            return True
+        # Do we have more to check?
        if len(sources) == 1:
            return False
-        for t in targets[1:]:
-            if f(sources[1], t, flags, language, unit):
+        # Check plurals against plural from source
+        for target in targets[1:]:
+            if self.check_single(sources[1], target, flags, language, unit):
                return True
+        # Check did not fire
        return False

-    return _plural_check
-
-# Check for not translated entries
-
-@plural_check
-def check_same(source, target, flags, language, unit):
-    # One letter things are usually labels or decimal/thousand separators
-    if len(source) == 1 and len(target) == 1:
+    def check_single(self, source, target, flags, language, unit):
+        '''
+        Check for single phrase, not dealing with plurals.
+        '''
        return False

-    # English variants will have most things not translated
-    if language.code.split('_')[0] == 'en':
-        return False
+    def check_chars(self, source, target, pos, chars):
+        '''
+        Generic checker for chars presence.
+        '''
+        if len(target) == 0:
+            return False
+        s = source[pos]
+        t = target[pos]
+        return (s in chars and t not in chars) or (s not in chars and t in chars)
+
+    def check_format_strings(self, source, target, regex):
+        '''
+        Generic checker for format strings.
+        '''
+        if len(target) == 0:
+            return False
+        src_matches = set([x[0] for x in regex.findall(source)])
+        tgt_matches = set([x[0] for x in regex.findall(target)])
+        # We ignore %% as this is really not relevant. However it needs
+        # to be matched to prevent handling %%s as %s.
+        if '%' in src_matches:
+            src_matches.remove('%')
+        if '%' in tgt_matches:
+            tgt_matches.remove('%')
+
+        if src_matches != tgt_matches:
+            return True

-    # Probably shortcut
-    if source.isupper() and target.isupper():
        return False

-    # Ignore words which are often same in foreigh language
-    if source.lower() in SAME_BLACKLIST or source.lower().rstrip(': ') in SAME_BLACKLIST:
-        return False
+    def is_language(self, language, vals):
+        '''
+        Detects whether language is in given list, ignores language
+        variants.
+        '''
+        return language.code.split('_')[0] in vals

-    return (source == target)

-CHECKS['same'] = (_('Not translated'), check_same, _('Source and translated strings are same'))

-def check_chars(source, target, pos, chars):
+class SameCheck(Check):
    '''
-    Generic checker for chars presence.
+    Check for not translated entries.
    '''
-    if len(target) == 0:
-        return False
-    s = source[pos]
-    t = target[pos]
-    return (s in chars and t not in chars) or (s not in chars and t in chars)
+    check_id = 'same'
+    name = _('Not translated')
+    description = _('Source and translated strings are same')

-# Checks for newlines at beginning/end
+    def check_single(self, source, target, flags, language, unit):
+        # One letter things are usually labels or decimal/thousand separators
+        if len(source) == 1 and len(target) == 1:
+            return False

-@plural_check
-def check_begin_newline(source, target, flags, language, unit):
-    return check_chars(source, target, 0, ['\n'])
+        # English variants will have most things not translated
+        if self.is_language(language, ['en']):
+            return False

-CHECKS['begin_newline'] = (_('Starting newline'), check_begin_newline, _('Source and translated do not both start with a newline'))
+        # Probably shortcut
+        if source.isupper() and target.isupper():
+            return False

-@plural_check
-def check_end_newline(source, target, flags, language, unit):
-    return check_chars(source, target, -1, ['\n'])
+        # Ignore words which are often same in foreigh language
+        if source.lower() in SAME_BLACKLIST or source.lower().rstrip(': ') in SAME_BLACKLIST:
+            return False

-CHECKS['end_newline'] = (_('Trailing newline'), check_end_newline, _('Source and translated do not both end with a newline'))
+        return (source == target)

-# Whitespace check
+class BeginNewlineCheck(Check):
+    '''
+    Checks for newlines at beginning.
+    '''
+    check_id = 'begin_newline'
+    name = _('Starting newline')
+    description = _('Source and translated do not both start with a newline')

-@plural_check
-def check_end_space(source, target, flags, language, unit):
-    # One letter things are usually decimal/thousand separators
-    if len(source) == 1 and len(target) <= 1:
-        return False
-    if language.code.split('_')[0] in ['fr', 'br']:
-        if len(target) == 0:
-            return False
-        if source[-1] in [':', '!', '?'] and target[-1] == ' ':
-            return False
-    return check_chars(source, target, -1, [' '])
+    def check_single(self, source, target, flags, language, unit):
+        return self.check_chars(source, target, 0, ['\n'])

-CHECKS['end_space'] = (_('Trailing space'), check_end_space, _('Source and translated do not both end with a space'))
+class EndNewlineCheck(Check):
+    '''
+    Checks for newlines at end.
+    '''
+    check_id = 'end_newline'
+    name = _('Trailing newline')
+    description = _('Source and translated do not both end with a newline')

-# Check for punctation
+    def check_single(self, source, target, flags, language, unit):
+        return self.check_chars(source, target, -1, ['\n'])

-@plural_check
-def check_end_stop(source, target, flags, language, unit):
-    if len(source) == 1 and len(target) == 1:
-        return False
-    return check_chars(source, target, -1, [u'.', u'。', u'।', u'۔'])
+class EndSpaceCheck(Check):
+    '''
+    Whitespace check
+    '''
+    check_id = 'end_space'
+    name = _('Trailing space')
+    description = _('Source and translated do not both end with a space')

-CHECKS['end_stop'] = (_('Trailing stop'), check_end_stop, _('Source and translated do not both end with a full stop'))
+    def check_single(self, source, target, flags, language, unit):
+        # One letter things are usually decimal/thousand separators
+        if len(source) == 1 and len(target) <= 1:
+            return False
+        if self.is_language(language, ['fr', 'br']):
+            if len(target) == 0:
+                return False
+            if source[-1] in [':', '!', '?'] and target[-1] == ' ':
+                return False
+        return self.check_chars(source, target, -1, [' '])
+
+class EndStopCheck(Check):
+    '''
+    Check for final stop
+    '''
+    check_id = 'end_stop'
+    name = _('Trailing stop')
+    description = _('Source and translated do not both end with a full stop')

-@plural_check
-def check_end_colon(source, target, flags, language, unit):
-    if language.code.split('_')[0] in ['fr', 'br']:
-        if len(target) == 0:
+    def check_single(self, source, target, flags, language, unit):
+        if len(source) == 1 and len(target) == 1:
            return False
-        if source[-1] == ':':
-            if target[-3:] not in [' : ', '&nbsp;: ', u' : ']:
-                return True
-        return False
-    if language.code.split('_')[0] in ['ja']:
-        # Japanese sentence might need to end with full stop
-        # in case it's used before list.
-        if source[-1] == ':':
-            return check_chars(source, target, -1, [u':', u'：', u'.', u'。'])
-        return False
-    return check_chars(source, target, -1, [u':', u'：'])
+        return self.check_chars(source, target, -1, [u'.', u'。', u'।', u'۔'])

-CHECKS['end_colon'] = (_('Trailing colon'), check_end_colon, _('Source and translated do not both end with a colon or colon is not correctly spaced'))

-@plural_check
-def check_end_question(source, target, flags, language, unit):
-    if language.code.split('_')[0] in ['fr', 'br']:
-        if len(target) == 0:
+class EndColonCheck(Check):
+    '''
+    Check for final colon
+    '''
+    check_id = 'end_colon'
+    name = _('Trailing colon')
+    description = _('Source and translated do not both end with a colon or colon is not correctly spaced')
+
+    def check_single(self, source, target, flags, language, unit):
+        if self.is_language(language, ['fr', 'br']):
+            if len(target) == 0:
+                return False
+            if source[-1] == ':':
+                if target[-3:] not in [' : ', '&nbsp;: ', u' : ']:
+                    return True
            return False
-        if source[-1] == '?':
-            if target[-2:] not in [' ?', '&nbsp;?', u' ?']:
-                return True
-        return False
-    return check_chars(source, target, -1, [u'?', u'՞', u'؟', u'⸮', u'？', u'፧', u'꘏', u'⳺'])
+        if self.is_language(language, ['ja']):
+            # Japanese sentence might need to end with full stop
+            # in case it's used before list.
+            if source[-1] == ':':
+                return self.check_chars(source, target, -1, [u':', u'：', u'.', u'。'])
+            return False
+        return self.check_chars(source, target, -1, [u':', u'：'])

-CHECKS['end_question'] = (_('Trailing question'), check_end_question, _('Source and translated do not both end with a question mark or it is not correctly spaced'))

-@plural_check
-def check_end_exclamation(source, target, flags, language, unit):
-    if language.code.split('_')[0] in ['fr', 'br']:
-        if len(target) == 0:
+class EndQuestionCheck(Check):
+    '''
+    Check for final question mark
+    '''
+    check_id = 'end_question'
+    name = _('Trailing question')
+    description = _('Source and translated do not both end with a question mark or it is not correctly spaced')
+
+    def check_single(self, source, target, flags, language, unit):
+        if self.is_language(language, ['fr', 'br']):
+            if len(target) == 0:
+                return False
+            if source[-1] == '?':
+                if target[-2:] not in [' ?', '&nbsp;?', u' ?']:
+                    return True
            return False
-        if source[-1] == '!':
-            if target[-2:] not in [' !', '&nbsp;!', u' !']:
-                return True
-        return False
-    return check_chars(source, target, -1, [u'!', u'！', u'՜', u'᥄', u'႟', u'߹'])
+        return self.check_chars(source, target, -1, [u'?', u'՞', u'؟', u'⸮', u'？', u'፧', u'꘏', u'⳺'])

-CHECKS['end_exclamation'] = (_('Trailing exclamation'), check_end_exclamation, _('Source and translated do not both end with an exclamation mark or it is not correctly spaced'))
+class EndExclamationCheck(Check):
+    '''
+    Check for final exclamation mark
+    '''
+    check_id = 'end_exclamation'
+    name = _('Trailing exclamation')
+    description = _('Source and translated do not both end with an exclamation mark or it is not correctly spaced')
+
+    def check_single(self, source, target, flags, language, unit):
+        if self.is_language(language, ['fr', 'br']):
+            if len(target) == 0:
+                return False
+            if source[-1] == '!':
+                if target[-2:] not in [' !', '&nbsp;!', u' !']:
+                    return True
+            return False
+        return self.check_chars(source, target, -1, [u'!', u'！', u'՜', u'᥄', u'႟', u'߹'])

 # For now all format string checks use generic implementation, but
 # it should be switched to language specific
-def check_format_strings(source, target, regex):
+
+class PythonFormatCheck(Check):
    '''
-    Generic checker for format strings.
+    Check for Python format string
    '''
-    if len(target) == 0:
-        return False
-    src_matches = set([x[0] for x in regex.findall(source)])
-    tgt_matches = set([x[0] for x in regex.findall(target)])
-    # We ignore %% as this is really not relevant. However it needs
-    # to be matched to prevent handling %%s as %s.
-    if '%' in src_matches:
-        src_matches.remove('%')
-    if '%' in tgt_matches:
-        tgt_matches.remove('%')
+    check_id = 'python_format'
+    name = _('Python format')
+    description = _('Format string does not match source')

-    if src_matches != tgt_matches:
-        return True
+    def check_single(self, source, target, flags, language, unit):
+        if not 'python-format' in flags:
+            return False
+        return self.check_format_strings(source, target, PYTHON_PRINTF_MATCH)

-    return False
+class PHPFormatCheck(Check):
+    '''
+    Check for PHP format string
+    '''
+    check_id = 'php_format'
+    name = _('PHP format')
+    description = _('Format string does not match source')

-# Check for Python format string
+    def check_single(self, source, target, flags, language, unit):
+        if not 'php-format' in flags:
+            return False
+        return self.check_format_strings(source, target, PHP_PRINTF_MATCH)

-@plural_check
-def check_python_format(source, target, flags, language, unit):
-    if not 'python-format' in flags:
-        return False
-    return check_format_strings(source, target, PYTHON_PRINTF_MATCH)
+class CFormatCheck(Check):
+    '''
+    Check for C format string
+    '''
+    check_id = 'c_format'
+    name = _('C format')
+    description = _('Format string does not match source')

-CHECKS['python_format'] = (_('Python format'), check_python_format, _('Format string does not match source'))
+    def check_single(self, source, target, flags, language, unit):
+        if not 'c-format' in flags:
+            return False
+        return self.check_format_strings(source, target, C_PRINTF_MATCH)

-# Check for PHP format string

-@plural_check
-def check_php_format(source, target, flags, language, unit):
-    if not 'php-format' in flags:
-        return False
-    return check_format_strings(source, target, PHP_PRINTF_MATCH)
+class PluralsCheck(Check):
+    '''
+    Check for incomplete plural forms
+    '''
+    check_id = 'plurals'
+    name = _('Missing plurals')
+    description = _('Some plural forms are not translated')

-CHECKS['php_format'] = (_('PHP format'), check_php_format, _('Format string does not match source'))
+    def check(self, sources, targets, flags, language, unit):
+        # Is this plural?
+        if len(sources) == 1:
+            return False
+        # Is at least something translated?
+        if targets == len(targets) * ['']:
+            return False
+        # Check for empty translation
+        return ('' in targets)

-# Check for C format string
+class ConsistencyCheck(Check):
+    '''
+    Check for inconsistent translations
+    '''
+    check_id = 'inconsistent'
+    name = _('Inconsistent')
+    description = _('This message has more than one translation in this project')
+
+    def check(self, sources, targets, flags, language, unit):
+        from trans.models import Unit
+        related = Unit.objects.filter(
+            translation__language = language,
+            translation__subproject__project = unit.translation.subproject.project,
+            checksum = unit.checksum
+            ).exclude(
+            id = unit.id
+            )
+        for unit2 in related.iterator():
+            if unit2.target != unit.target:
+                return True

-@plural_check
-def check_c_format(source, target, flags, language, unit):
-    if not 'c-format' in flags:
        return False
-    return check_format_strings(source, target, C_PRINTF_MATCH)
-
-CHECKS['c_format'] = (_('C format'), check_c_format, _('Format string does not match source'))

-# Check for incomplete plural forms

-def check_plurals(sources, targets, flags, language, unit):
-    # Is this plural?
-    if len(sources) == 1:
-        return False
-    # Is at least something translated?
-    if targets == len(targets) * ['']:
-        return False
-    # Check for empty translation
-    return ('' in targets)
-
-CHECKS['plurals'] = (_('Missing plurals'), check_plurals, _('Some plural forms are not translated'))
-
-# Check for inconsistent translations
-
-def check_consistency(sources, targets, flags, language, unit):
-    from trans.models import Unit
-    related = Unit.objects.filter(
-        translation__language = language,
-        translation__subproject__project = unit.translation.subproject.project,
-        checksum = unit.checksum
-        ).exclude(
-        id = unit.id
-        )
-    for unit2 in related.iterator():
-        if unit2.target != unit.target:
-            return True
+# Initialize checks list
+CHECKS = {}
+for path in getattr(settings, 'CHECK_LIST', DEFAULT_CHECK_LIST):
+    i = path.rfind('.')
+    module, attr = path[:i], path[i+1:]
+    try:
+        mod = __import__(module, {}, {}, [attr])
+    except ImportError, e:
+        raise ImproperlyConfigured('Error importing translation check module %s: "%s"' % (module, e))
+    try:
+        cls = getattr(mod, attr)
+    except AttributeError:
+        raise ImproperlyConfigured('Module "%s" does not define a "%s" callable check' % (module, attr))
+    CHECKS[cls.check_id] = cls()

-    return False

-CHECKS['inconsistent'] = (_('Inconsistent'), check_consistency, _('This message has more than one translation in this project'))
--- a/trans/models.py
+++ b/trans/models.py
@@ -616,7 +616,7 @@ class Translation(models.Model):
        for check in trans.checks.CHECKS:
            cnt = self.unit_set.filter_type(check).count()
            if cnt > 0:
-                desc =  trans.checks.CHECKS[check][2] + (' (%d)' % cnt)
+                desc =  trans.checks.CHECKS[check].description + (' (%d)' % cnt)
                result.append((check, desc))
        return result

@@ -850,7 +850,7 @@ class Unit(models.Model):
        tgt = self.get_target_plurals()
        failing = []
        for check in trans.checks.CHECKS:
-            if trans.checks.CHECKS[check][1](src, tgt, self.flags, self.translation.language, self):
+            if trans.checks.CHECKS[check].check(src, tgt, self.flags, self.translation.language, self):
                failing.append(check)

        for check in self.checks():
@@ -901,7 +901,7 @@ class Suggestion(models.Model):
            unit.fuzzy = False
            unit.save_backend(request, False)

-CHECK_CHOICES = [(x, trans.checks.CHECKS[x][0]) for x in trans.checks.CHECKS]
+CHECK_CHOICES = [(x, trans.checks.CHECKS[x].name) for x in trans.checks.CHECKS]

 class Check(models.Model):
    checksum = models.CharField(max_length = 40, default = '', blank = True, db_index = True)
@@ -923,7 +923,7 @@ class Check(models.Model):
        )

    def get_description(self):
-        return trans.checks.CHECKS[self.check][2]
+        return trans.checks.CHECKS[self.check].description

    def get_doc_url(self):
        return 'http://weblate.readthedocs.org/en/weblate-%s/usage.html#check-%s' % (