Commit 52dea971 authored by Rémy Coutable's avatar Rémy Coutable

Merge branch 'bvl-i18n-validation' into 'master'

Updates to `.po` & `.pot` linting

Closes #46096 et #39292

See merge request gitlab-org/gitlab-ce!18779
parents f67eb5a1 17fc178c
......@@ -38,14 +38,17 @@ export default {
return this.modifiedFilesLength ? 'multi-file-modified' : '';
},
additionsTooltip() {
return sprintf(n__('1 %{type} addition', '%d %{type} additions', this.addedFilesLength), {
return sprintf(n__('1 %{type} addition', '%{count} %{type} additions', this.addedFilesLength), {
type: this.title.toLowerCase(),
count: this.addedFilesLength,
});
},
modifiedTooltip() {
return sprintf(
n__('1 %{type} modification', '%d %{type} modifications', this.modifiedFilesLength),
{ type: this.title.toLowerCase() },
n__('1 %{type} modification', '%{count} %{type} modifications', this.modifiedFilesLength), {
type: this.title.toLowerCase(),
count: this.modifiedFilesLength,
},
);
},
titleTooltip() {
......
......@@ -21,7 +21,8 @@ module Gitlab
'nl_NL' => 'Nederlands',
'tr_TR' => 'Türkçe',
'id_ID' => 'Bahasa Indonesia',
'fil_PH' => 'Filipino'
'fil_PH' => 'Filipino',
'pl_PL' => 'Polski'
}.freeze
def available_locales
......
......@@ -3,16 +3,25 @@ module Gitlab
class MetadataEntry
attr_reader :entry_data
# Avoid testing too many plurals if `nplurals` was incorrectly set.
# Based on info on https://www.gnu.org/software/gettext/manual/html_node/Plural-forms.html
# which mentions special cases for numbers ending in 2 digits
MAX_FORMS_TO_TEST = 101
def initialize(entry_data)
@entry_data = entry_data
end
def expected_plurals
def expected_forms
return nil unless plural_information
plural_information['nplurals'].to_i
end
def forms_to_test
@forms_to_test ||= [expected_forms, MAX_FORMS_TO_TEST].compact.min
end
private
def plural_information
......
module Gitlab
module I18n
class PoLinter
include Gitlab::Utils::StrongMemoize
attr_reader :po_path, :translation_entries, :metadata_entry, :locale
VARIABLE_REGEX = /%{\w*}|%[a-z]/.freeze
......@@ -34,7 +36,7 @@ module Gitlab
end
@translation_entries = entries.map do |entry_data|
Gitlab::I18n::TranslationEntry.new(entry_data, metadata_entry.expected_plurals)
Gitlab::I18n::TranslationEntry.new(entry_data, metadata_entry.expected_forms)
end
nil
......@@ -48,7 +50,7 @@ module Gitlab
translation_entries.each do |entry|
errors_for_entry = validate_entry(entry)
errors[join_message(entry.msgid)] = errors_for_entry if errors_for_entry.any?
errors[entry.msgid] = errors_for_entry if errors_for_entry.any?
end
errors
......@@ -62,6 +64,7 @@ module Gitlab
validate_newlines(errors, entry)
validate_number_of_plurals(errors, entry)
validate_unescaped_chars(errors, entry)
validate_translation(errors, entry)
errors
end
......@@ -81,35 +84,39 @@ module Gitlab
end
def validate_number_of_plurals(errors, entry)
return unless metadata_entry&.expected_plurals
return unless metadata_entry&.expected_forms
return unless entry.translated?
if entry.has_plural? && entry.all_translations.size != metadata_entry.expected_plurals
errors << "should have #{metadata_entry.expected_plurals} "\
"#{'translations'.pluralize(metadata_entry.expected_plurals)}"
if entry.has_plural? && entry.all_translations.size != metadata_entry.expected_forms
errors << "should have #{metadata_entry.expected_forms} "\
"#{'translations'.pluralize(metadata_entry.expected_forms)}"
end
end
def validate_newlines(errors, entry)
if entry.msgid_contains_newlines?
if entry.msgid_has_multiple_lines?
errors << 'is defined over multiple lines, this breaks some tooling.'
end
if entry.plural_id_contains_newlines?
if entry.plural_id_has_multiple_lines?
errors << 'plural is defined over multiple lines, this breaks some tooling.'
end
if entry.translations_contain_newlines?
if entry.translations_have_multiple_lines?
errors << 'has translations defined over multiple lines, this breaks some tooling.'
end
end
def validate_variables(errors, entry)
if entry.has_singular_translation?
validate_variables_in_message(errors, entry.msgid, entry.msgid)
validate_variables_in_message(errors, entry.msgid, entry.singular_translation)
end
if entry.has_plural?
validate_variables_in_message(errors, entry.plural_id, entry.plural_id)
entry.plural_translations.each do |translation|
validate_variables_in_message(errors, entry.plural_id, translation)
end
......@@ -117,26 +124,19 @@ module Gitlab
end
def validate_variables_in_message(errors, message_id, message_translation)
message_id = join_message(message_id)
required_variables = message_id.scan(VARIABLE_REGEX)
validate_unnamed_variables(errors, required_variables)
validate_translation(errors, message_id, required_variables)
validate_variable_usage(errors, message_translation, required_variables)
end
def validate_translation(errors, message_id, used_variables)
variables = fill_in_variables(used_variables)
begin
def validate_translation(errors, entry)
Gitlab::I18n.with_locale(locale) do
translated = if message_id.include?('|')
FastGettext::Translation.s_(message_id)
if entry.has_plural?
translate_plural(entry)
else
FastGettext::Translation._(message_id)
translate_singular(entry)
end
translated % variables
end
# `sprintf` could raise an `ArgumentError` when invalid passing something
......@@ -151,7 +151,71 @@ module Gitlab
# `FastGettext::Translation` could raise `ArgumentError` as subclassess
# `InvalidEncoding`, `IllegalSequence` & `InvalidCharacter`
rescue ArgumentError, TypeError, RuntimeError => e
errors << "Failure translating to #{locale} with #{variables}: #{e.message}"
errors << "Failure translating to #{locale}: #{e.message}"
end
def translate_singular(entry)
used_variables = entry.msgid.scan(VARIABLE_REGEX)
variables = fill_in_variables(used_variables)
translation = if entry.msgid.include?('|')
FastGettext::Translation.s_(entry.msgid)
else
FastGettext::Translation._(entry.msgid)
end
translation % variables if used_variables.any?
end
def translate_plural(entry)
used_variables = entry.plural_id.scan(VARIABLE_REGEX)
variables = fill_in_variables(used_variables)
numbers_covering_all_plurals.map do |number|
translation = FastGettext::Translation.n_(entry.msgid, entry.plural_id, number)
translation % variables if used_variables.any?
end
end
def numbers_covering_all_plurals
@numbers_covering_all_plurals ||= calculate_numbers_covering_all_plurals
end
def calculate_numbers_covering_all_plurals
required_numbers = []
discovered_indexes = []
counter = 0
while discovered_indexes.size < metadata_entry.forms_to_test && counter < Gitlab::I18n::MetadataEntry::MAX_FORMS_TO_TEST
index_for_count = index_for_pluralization(counter)
unless discovered_indexes.include?(index_for_count)
discovered_indexes << index_for_count
required_numbers << counter
end
counter += 1
end
required_numbers
end
def index_for_pluralization(counter)
# This calls the C function that defines the pluralization rule, it can
# return a boolean (`false` represents 0, `true` represents 1) or an integer
# that specifies the plural form to be used for the given number
pluralization_result = Gitlab::I18n.with_locale(locale) do
FastGettext.pluralisation_rule.call(counter)
end
case pluralization_result
when false
0
when true
1
else
pluralization_result
end
end
......@@ -172,14 +236,18 @@ module Gitlab
end
def validate_unnamed_variables(errors, variables)
if variables.size > 1 && variables.any? { |variable_name| unnamed_variable?(variable_name) }
unnamed_variables, named_variables = variables.partition { |name| unnamed_variable?(name) }
if unnamed_variables.any? && named_variables.any?
errors << 'is combining named variables with unnamed variables'
end
if unnamed_variables.size > 1
errors << 'is combining multiple unnamed variables'
end
end
def validate_variable_usage(errors, translation, required_variables)
translation = join_message(translation)
# We don't need to validate when the message is empty.
# In this case we fall back to the default, which has all the the
# required variables.
......@@ -205,10 +273,6 @@ module Gitlab
def validate_flags(errors, entry)
errors << "is marked #{entry.flag}" if entry.flag
end
def join_message(message)
Array(message).join
end
end
end
end
......@@ -11,11 +11,11 @@ module Gitlab
end
def msgid
entry_data[:msgid]
@msgid ||= Array(entry_data[:msgid]).join
end
def plural_id
entry_data[:msgid_plural]
@plural_id ||= Array(entry_data[:msgid_plural]).join
end
def has_plural?
......@@ -23,12 +23,11 @@ module Gitlab
end
def singular_translation
all_translations.first if has_singular_translation?
all_translations.first.to_s if has_singular_translation?
end
def all_translations
@all_translations ||= entry_data.fetch_values(*translation_keys)
.reject(&:empty?)
@all_translations ||= translation_entries.map { |translation| Array(translation).join }
end
def translated?
......@@ -54,16 +53,16 @@ module Gitlab
nplurals > 1 || !has_plural?
end
def msgid_contains_newlines?
msgid.is_a?(Array)
def msgid_has_multiple_lines?
entry_data[:msgid].is_a?(Array)
end
def plural_id_contains_newlines?
plural_id.is_a?(Array)
def plural_id_has_multiple_lines?
entry_data[:msgid_plural].is_a?(Array)
end
def translations_contain_newlines?
all_translations.any? { |translation| translation.is_a?(Array) }
def translations_have_multiple_lines?
translation_entries.any? { |translation| translation.is_a?(Array) }
end
def msgid_contains_unescaped_chars?
......@@ -84,6 +83,11 @@ module Gitlab
private
def translation_entries
@translation_entries ||= entry_data.fetch_values(*translation_keys)
.reject(&:empty?)
end
def translation_keys
@translation_keys ||= entry_data.keys.select { |key| key.to_s =~ /\Amsgstr(\[\d+\])?\z/ }
end
......
......@@ -50,6 +50,32 @@ namespace :gettext do
end
end
task :updated_check do
# Removing all pre-translated files speeds up `gettext:find` as the
# files don't need to be merged.
`rm locale/*/gitlab.po`
# `gettext:find` writes touches to temp files to `stderr` which would cause
# `static-analysis` to report failures. We can ignore these
silence_stream(STDERR) { Rake::Task['gettext:find'].invoke }
changed_files = `git diff --name-only`.lines.map(&:strip)
# reset the locale folder for potential next tasks
`git checkout -- locale`
if changed_files.include?('locale/gitlab.pot')
raise <<~MSG
Newly translated strings found, please add them to `gitlab.pot` by running:
bundle exec rake gettext:find; git checkout -- locale/*/gitlab.po;
Then commit and push the resulting changes to `locale/gitlab.pot`.
MSG
end
end
def report_errors_for_file(file, errors_for_file)
puts "Errors in `#{file}`:"
......
......@@ -27,6 +27,7 @@ unless Rails.env.production?
scss_lint
flay
gettext:lint
gettext:updated_check
lint:static_verification
].each do |task|
pid = Process.fork do
......
......@@ -8,8 +8,8 @@ msgid ""
msgstr ""
"Project-Id-Version: gitlab 1.0.0\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2018-06-12 18:57+1000\n"
"PO-Revision-Date: 2018-06-12 18:57+1000\n"
"POT-Creation-Date: 2018-06-13 14:05+0200\n"
"PO-Revision-Date: 2018-06-13 14:05+0200\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"Language: \n"
......@@ -133,12 +133,12 @@ msgid "- show less"
msgstr ""
msgid "1 %{type} addition"
msgid_plural "%d %{type} additions"
msgid_plural "%{count} %{type} additions"
msgstr[0] ""
msgstr[1] ""
msgid "1 %{type} modification"
msgid_plural "%d %{type} modifications"
msgid_plural "%{count} %{type} modifications"
msgstr[0] ""
msgstr[1] ""
......@@ -2252,10 +2252,10 @@ msgstr ""
msgid "Gitaly"
msgstr ""
msgid "Gitaly|Address"
msgid "Gitaly Servers"
msgstr ""
msgid "Gitaly Servers"
msgid "Gitaly|Address"
msgstr ""
msgid "Go Back"
......@@ -2419,6 +2419,15 @@ msgstr ""
msgid "If your HTTP repository is not publicly accessible, add authentication information to the URL: <code>https://username:password@gitlab.company.com/group/project.git</code>."
msgstr ""
msgid "ImageDiffViewer|2-up"
msgstr ""
msgid "ImageDiffViewer|Onion skin"
msgstr ""
msgid "ImageDiffViewer|Swipe"
msgstr ""
msgid "Import"
msgstr ""
......
require 'spec_helper'
describe Gitlab::I18n::MetadataEntry do
describe '#expected_plurals' do
describe '#expected_forms' do
it 'returns the number of plurals' do
data = {
msgid: "",
......@@ -22,7 +22,7 @@ describe Gitlab::I18n::MetadataEntry do
}
entry = described_class.new(data)
expect(entry.expected_plurals).to eq(2)
expect(entry.expected_forms).to eq(2)
end
it 'returns 0 for the POT-metadata' do
......@@ -45,7 +45,7 @@ describe Gitlab::I18n::MetadataEntry do
}
entry = described_class.new(data)
expect(entry.expected_plurals).to eq(0)
expect(entry.expected_forms).to eq(0)
end
end
end
require 'spec_helper'
require 'simple_po_parser'
# Disabling this cop to allow for multi-language examples in comments
# rubocop:disable Style/AsciiComments
describe Gitlab::I18n::PoLinter do
let(:linter) { described_class.new(po_path) }
let(:po_path) { 'spec/fixtures/valid.po' }
def fake_translation(msgid:, translation:, plural_id: nil, plurals: [])
data = { msgid: msgid, msgid_plural: plural_id }
if plural_id
[translation, *plurals].each_with_index do |plural, index|
allow(FastGettext::Translation).to receive(:n_).with(msgid, plural_id, index).and_return(plural)
data.merge!("msgstr[#{index}]" => plural)
end
else
allow(FastGettext::Translation).to receive(:_).with(msgid).and_return(translation)
data[:msgstr] = translation
end
Gitlab::I18n::TranslationEntry.new(
data,
plurals.size + 1
)
end
describe '#errors' do
it 'only calls validation once' do
expect(linter).to receive(:validate_po).once.and_call_original
......@@ -155,9 +176,8 @@ describe Gitlab::I18n::PoLinter do
describe '#validate_entries' do
it 'keeps track of errors for entries' do
fake_invalid_entry = Gitlab::I18n::TranslationEntry.new(
{ msgid: "Hello %{world}", msgstr: "Bonjour %{monde}" }, 2
)
fake_invalid_entry = fake_translation(msgid: "Hello %{world}",
translation: "Bonjour %{monde}")
allow(linter).to receive(:translation_entries) { [fake_invalid_entry] }
expect(linter).to receive(:validate_entry)
......@@ -177,6 +197,7 @@ describe Gitlab::I18n::PoLinter do
expect(linter).to receive(:validate_newlines).with([], fake_entry)
expect(linter).to receive(:validate_number_of_plurals).with([], fake_entry)
expect(linter).to receive(:validate_unescaped_chars).with([], fake_entry)
expect(linter).to receive(:validate_translation).with([], fake_entry)
linter.validate_entry(fake_entry)
end
......@@ -185,7 +206,7 @@ describe Gitlab::I18n::PoLinter do
describe '#validate_number_of_plurals' do
it 'validates when there are an incorrect number of translations' do
fake_metadata = double
allow(fake_metadata).to receive(:expected_plurals).and_return(2)
allow(fake_metadata).to receive(:expected_forms).and_return(2)
allow(linter).to receive(:metadata_entry).and_return(fake_metadata)
fake_entry = Gitlab::I18n::TranslationEntry.new(
......@@ -201,13 +222,16 @@ describe Gitlab::I18n::PoLinter do
end
describe '#validate_variables' do
it 'validates both signular and plural in a pluralized string when the entry has a singular' do
pluralized_entry = Gitlab::I18n::TranslationEntry.new(
{ msgid: 'Hello %{world}',
msgid_plural: 'Hello all %{world}',
'msgstr[0]' => 'Bonjour %{world}',
'msgstr[1]' => 'Bonjour tous %{world}' },
2
before do
allow(linter).to receive(:validate_variables_in_message).and_call_original
end
it 'validates both singular and plural in a pluralized string when the entry has a singular' do
pluralized_entry = fake_translation(
msgid: 'Hello %{world}',
translation: 'Bonjour %{world}',
plural_id: 'Hello all %{world}',
plurals: ['Bonjour tous %{world}']
)
expect(linter).to receive(:validate_variables_in_message)
......@@ -221,11 +245,10 @@ describe Gitlab::I18n::PoLinter do
end
it 'only validates plural when there is no separate singular' do
pluralized_entry = Gitlab::I18n::TranslationEntry.new(
{ msgid: 'Hello %{world}',
msgid_plural: 'Hello all %{world}',
'msgstr[0]' => 'Bonjour %{world}' },
1
pluralized_entry = fake_translation(
msgid: 'Hello %{world}',
translation: 'Bonjour %{world}',
plural_id: 'Hello all %{world}'
)
expect(linter).to receive(:validate_variables_in_message)
......@@ -235,37 +258,65 @@ describe Gitlab::I18n::PoLinter do
end
it 'validates the message variables' do
entry = Gitlab::I18n::TranslationEntry.new(
{ msgid: 'Hello', msgstr: 'Bonjour' },
2
)
entry = fake_translation(msgid: 'Hello', translation: 'Bonjour')
expect(linter).to receive(:validate_variables_in_message)
.with([], 'Hello', 'Bonjour')
linter.validate_variables([], entry)
end
it 'validates variable usage in message ids' do
entry = fake_translation(
msgid: 'Hello %{world}',
translation: 'Bonjour %{world}',
plural_id: 'Hello all %{world}',
plurals: ['Bonjour tous %{world}']
)
expect(linter).to receive(:validate_variables_in_message)
.with([], 'Hello %{world}', 'Hello %{world}')
.and_call_original
expect(linter).to receive(:validate_variables_in_message)
.with([], 'Hello all %{world}', 'Hello all %{world}')
.and_call_original
linter.validate_variables([], entry)
end
end
describe '#validate_variables_in_message' do
it 'detects when a variables are used incorrectly' do
errors = []
expected_errors = ['<hello %{world} %d> is missing: [%{hello}]',
'<hello %{world} %d> is using unknown variables: [%{world}]',
'is combining multiple unnamed variables']
expected_errors = ['<%d hello %{world} %s> is missing: [%{hello}]',
'<%d hello %{world} %s> is using unknown variables: [%{world}]',
'is combining multiple unnamed variables',
'is combining named variables with unnamed variables']
linter.validate_variables_in_message(errors, '%{hello} world %d', 'hello %{world} %d')
linter.validate_variables_in_message(errors, '%d %{hello} world %s', '%d hello %{world} %s')
expect(errors).to include(*expected_errors)
end
it 'does not allow combining 1 `%d` unnamed variable with named variables' do
errors = []
linter.validate_variables_in_message(errors,
'%{type} detected %d vulnerability',
'%{type} detecteerde %d kwetsbaarheid')
expect(errors).not_to be_empty
end
end
describe '#validate_translation' do
let(:entry) { fake_translation(msgid: 'Hello %{world}', translation: 'Bonjour %{world}') }
it 'succeeds with valid variables' do
errors = []
linter.validate_translation(errors, 'Hello %{world}', ['%{world}'])
linter.validate_translation(errors, entry)
expect(errors).to be_empty
end
......@@ -275,43 +326,80 @@ describe Gitlab::I18n::PoLinter do
expect(FastGettext::Translation).to receive(:_) { raise 'broken' }
linter.validate_translation(errors, 'Hello', [])
linter.validate_translation(errors, entry)
expect(errors).to include('Failure translating to en with []: broken')
expect(errors).to include('Failure translating to en: broken')
end
it 'adds an error message when translating fails when translating with context' do
entry = fake_translation(msgid: 'Tests|Hello', translation: 'broken')
errors = []
expect(FastGettext::Translation).to receive(:s_) { raise 'broken' }
linter.validate_translation(errors, 'Tests|Hello', [])
linter.validate_translation(errors, entry)
expect(errors).to include('Failure translating to en with []: broken')
expect(errors).to include('Failure translating to en: broken')
end
it "adds an error when trying to translate with incorrect variables when using unnamed variables" do
entry = fake_translation(msgid: 'Hello %s', translation: 'Hello %d')
errors = []
linter.validate_translation(errors, 'Hello %d', ['%s'])
linter.validate_translation(errors, entry)
expect(errors.first).to start_with("Failure translating to en with")
expect(errors.first).to start_with("Failure translating to en")
end
it "adds an error when trying to translate with named variables when unnamed variables are expected" do
entry = fake_translation(msgid: 'Hello %s', translation: 'Hello %{thing}')
errors = []
linter.validate_translation(errors, 'Hello %d', ['%{world}'])
linter.validate_translation(errors, entry)
expect(errors.first).to start_with("Failure translating to en with")
expect(errors.first).to start_with("Failure translating to en")
end
it 'adds an error when translated with incorrect variables using named variables' do
errors = []
it 'tests translation for all given forms' do
# Fake a language that has 3 forms to translate
fake_metadata = double
allow(fake_metadata).to receive(:forms_to_test).and_return(3)
allow(linter).to receive(:metadata_entry).and_return(fake_metadata)
entry = fake_translation(
msgid: '%d exception',
translation: '%d uitzondering',
plural_id: '%d exceptions',
plurals: ['%d uitzonderingen', '%d uitzonderingetjes']
)
# Make each count use a different index
allow(linter).to receive(:index_for_pluralization).with(0).and_return(0)
allow(linter).to receive(:index_for_pluralization).with(1).and_return(1)
allow(linter).to receive(:index_for_pluralization).with(2).and_return(2)
expect(FastGettext::Translation).to receive(:n_).with('%d exception', '%d exceptions', 0).and_call_original
expect(FastGettext::Translation).to receive(:n_).with('%d exception', '%d exceptions', 1).and_call_original
expect(FastGettext::Translation).to receive(:n_).with('%d exception', '%d exceptions', 2).and_call_original
linter.validate_translation([], entry)
end
end
describe '#numbers_covering_all_plurals' do
it 'can correctly find all required numbers to translate to Polish' do
# Polish used as an example with 3 different forms:
# 0, all plurals except the ones ending in 2,3,4: Kotów
# 1: Kot
# 2-3-4: Koty
# So translating with [0, 1, 2] will give us all different posibilities
fake_metadata = double
allow(fake_metadata).to receive(:forms_to_test).and_return(4)
allow(linter).to receive(:metadata_entry).and_return(fake_metadata)
allow(linter).to receive(:locale).and_return('pl_PL')
linter.validate_translation(errors, 'Hello %{thing}', ['%d'])
numbers = linter.numbers_covering_all_plurals
expect(errors.first).to start_with("Failure translating to en with")
expect(numbers).to contain_exactly(0, 1, 2)
end
end
......@@ -336,3 +424,4 @@ describe Gitlab::I18n::PoLinter do
end
end
end
# rubocop:enable Style/AsciiComments
......@@ -109,7 +109,7 @@ describe Gitlab::I18n::TranslationEntry do
data = { msgid: %w(hello world) }
entry = described_class.new(data, 2)
expect(entry.msgid_contains_newlines?).to be_truthy
expect(entry.msgid_has_multiple_lines?).to be_truthy
end
end
......@@ -118,7 +118,7 @@ describe Gitlab::I18n::TranslationEntry do
data = { msgid_plural: %w(hello world) }
entry = described_class.new(data, 2)
expect(entry.plural_id_contains_newlines?).to be_truthy
expect(entry.plural_id_has_multiple_lines?).to be_truthy
end
end
......@@ -127,7 +127,7 @@ describe Gitlab::I18n::TranslationEntry do
data = { msgstr: %w(hello world) }
entry = described_class.new(data, 2)
expect(entry.translations_contain_newlines?).to be_truthy
expect(entry.translations_have_multiple_lines?).to be_truthy
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment