Commit 1728dc14 authored by Bob Van Landuyt's avatar Bob Van Landuyt

Merge branch 'jh-lint_html_translations' into 'master'

Disallow HTML in translatable strings with linter

See merge request gitlab-org/gitlab!37432
parents 52d28ffc 421ac63d
......@@ -296,6 +296,74 @@ Namespaces should be PascalCase.
Note: The namespace should be removed from the translation. See the [translation
guidelines for more details](translation.md#namespaced-strings).
### HTML
We no longer include HTML directly in the strings that are submitted for translation. This is for a couple of reasons:
1. It introduces a chance for the translated string to accidentally include invalid HTML.
1. It introduces a security risk where translated strings become an attack vector for XSS, as noted by the
[Open Web Application Security Project (OWASP)](https://owasp.org/www-community/attacks/xss/).
To include formatting in the translated string, we can do the following:
- In Ruby/HAML:
```ruby
html_escape(_('Some %{strongOpen}bold%{strongClose} text.')) % { strongOpen: '<strong>'.html_safe, strongClose: '</strong>'.html_safe }
# => 'Some <strong>bold</strong> text.'
```
- In JavaScript:
```javascript
sprintf(__('Some %{strongOpen}bold%{strongClose} text.'), { strongOpen: '<strong>', strongClose: '</strong>'}, false);
// => 'Some <strong>bold</strong> text.'
```
- In Vue
See the section on [interpolation](#interpolation).
When [this translation helper issue](https://gitlab.com/gitlab-org/gitlab/-/issues/217935) is complete, we'll update the
process of including formatting in translated strings.
#### Including Angle Brackets
If a string contains angles brackets (`<`/`>`) that are not used for HTML, it will still be flagged by the
`rake gettext:lint` linter.
To avoid this error, use the applicable HTML entity code (`&lt;` or `&gt;`) instead:
- In Ruby/HAML:
```ruby
html_escape_once(_('In &lt; 1 hour')).html_safe
# => 'In < 1 hour'
```
- In JavaScript:
```javascript
import sanitize from 'sanitize-html';
const i18n = { LESS_THAN_ONE_HOUR: sanitize(__('In &lt; 1 hours'), { allowedTags: [] }) };
// ... using the string
element.innerHTML = i18n.LESS_THAN_ONE_HOUR;
// => 'In < 1 hour'
```
- In Vue:
```vue
<gl-sprintf :message="s__('In &lt; 1 hours')"/>
// => 'In < 1 hour'
```
### Dates / times
- In JavaScript:
......@@ -555,6 +623,7 @@ The linter will take the following into account:
- There should be no variables used in a translation that aren't in the
message ID
- Errors during translation.
- Presence of angle brackets (`<` or `>`)
The errors are grouped per file, and per message ID:
......
......@@ -25,14 +25,15 @@ suggesting to automate this process. Disapproving will exclude the
invalid translation, the merge request will be updated within a few
minutes.
If the translation has failed validation due to angle brackets `<` or `>`
it should be disapproved on CrowdIn as our strings should be
using [variables](externalization.md#html) for HTML instead.
It might be handy to pause the integration on the CrowdIn side for a
little while so translations don't keep coming. This can be done by
clicking `Pause sync` on the [CrowdIn integration settings
page](https://translate.gitlab.com/project/gitlab-ee/settings#integration).
When all failures are resolved, the translations need to be double
checked once more as discussed in [confidential issue](../../user/project/issues/confidential_issues.md) `https://gitlab.com/gitlab-org/gitlab/-/issues/19485`.
## Merging translations
When all translations are found good and pipelines pass the
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -5,13 +5,14 @@ module Gitlab
class PoLinter
include Gitlab::Utils::StrongMemoize
attr_reader :po_path, :translation_entries, :metadata_entry, :locale
attr_reader :po_path, :translation_entries, :metadata_entry, :locale, :html_todolist
VARIABLE_REGEX = /%{\w*}|%[a-z]/.freeze
def initialize(po_path, locale = I18n.locale.to_s)
def initialize(po_path:, html_todolist:, locale: I18n.locale.to_s)
@po_path = po_path
@locale = locale
@html_todolist = html_todolist
end
def errors
......@@ -19,7 +20,7 @@ module Gitlab
end
def validate_po
if parse_error = parse_po
if (parse_error = parse_po)
return 'PO-syntax errors' => [parse_error]
end
......@@ -38,7 +39,11 @@ module Gitlab
end
@translation_entries = entries.map do |entry_data|
Gitlab::I18n::TranslationEntry.new(entry_data, metadata_entry.expected_forms)
Gitlab::I18n::TranslationEntry.new(
entry_data: entry_data,
nplurals: metadata_entry.expected_forms,
html_allowed: html_todolist.fetch(entry_data[:msgid], false)
)
end
nil
......@@ -66,6 +71,7 @@ module Gitlab
validate_newlines(errors, entry)
validate_number_of_plurals(errors, entry)
validate_unescaped_chars(errors, entry)
validate_html(errors, entry)
validate_translation(errors, entry)
errors
......@@ -85,6 +91,23 @@ module Gitlab
end
end
def validate_html(errors, entry)
common_message = 'contains < or >. Use variables to include HTML in the string, or the &lt; and &gt; codes ' \
'for the symbols. For more info see: https://docs.gitlab.com/ee/development/i18n/externalization.html#html'
if entry.msgid_contains_potential_html? && !entry.msgid_html_allowed?
errors << common_message
end
if entry.plural_id_contains_potential_html? && !entry.plural_id_html_allowed?
errors << 'plural id ' + common_message
end
if entry.translations_contain_potential_html? && !entry.translations_html_allowed?
errors << 'translation ' + common_message
end
end
def validate_number_of_plurals(errors, entry)
return unless metadata_entry&.expected_forms
return unless entry.translated?
......
......@@ -4,12 +4,14 @@ module Gitlab
module I18n
class TranslationEntry
PERCENT_REGEX = /(?:^|[^%])%(?!{\w*}|[a-z%])/.freeze
ANGLE_BRACKET_REGEX = /[<>]/.freeze
attr_reader :nplurals, :entry_data
attr_reader :nplurals, :entry_data, :html_allowed
def initialize(entry_data, nplurals)
def initialize(entry_data:, nplurals:, html_allowed:)
@entry_data = entry_data
@nplurals = nplurals
@html_allowed = html_allowed
end
def msgid
......@@ -83,8 +85,38 @@ module Gitlab
string =~ PERCENT_REGEX
end
def msgid_contains_potential_html?
contains_angle_brackets?(msgid)
end
def plural_id_contains_potential_html?
contains_angle_brackets?(plural_id)
end
def translations_contain_potential_html?
all_translations.any? { |translation| contains_angle_brackets?(translation) }
end
def msgid_html_allowed?
html_allowed.present?
end
def plural_id_html_allowed?
html_allowed.present? && html_allowed['plural_id'] == plural_id
end
def translations_html_allowed?
html_allowed.present? && all_translations.all? do |translation|
html_allowed['translations'].include?(translation)
end
end
private
def contains_angle_brackets?(string)
string =~ ANGLE_BRACKET_REGEX
end
def translation_entries
@translation_entries ||= entry_data.fetch_values(*translation_keys)
.reject(&:empty?)
......
......@@ -12,6 +12,14 @@ namespace :gettext do
)
end
# Disallow HTML from translatable strings
# See: https://docs.gitlab.com/ee/development/i18n/externalization.html#html
def html_todolist
return @html_todolist if defined?(@html_todolist)
@html_todolist = YAML.load_file(Rails.root.join('lib/gitlab/i18n/html_todo.yml'))
end
task :compile do
# See: https://gitlab.com/gitlab-org/gitlab-foss/issues/33014#note_31218998
FileUtils.touch(File.join(Rails.root, 'locale/gitlab.pot'))
......@@ -54,11 +62,11 @@ namespace :gettext do
linters = files.map do |file|
locale = File.basename(File.dirname(file))
Gitlab::I18n::PoLinter.new(file, locale)
Gitlab::I18n::PoLinter.new(po_path: file, html_todolist: html_todolist, locale: locale)
end
pot_file = Rails.root.join('locale/gitlab.pot')
linters.unshift(Gitlab::I18n::PoLinter.new(pot_file))
linters.unshift(Gitlab::I18n::PoLinter.new(po_path: pot_file, html_todolist: html_todolist))
failed_linters = linters.select { |linter| linter.errors.any? }
......
# Spanish translations for gitlab package.
# Copyright (C) 2017 THE PACKAGE'S COPYRIGHT HOLDER
# This file is distributed under the same license as the gitlab package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2017.
#
msgid ""
msgstr ""
"Project-Id-Version: gitlab 1.0.0\n"
"Report-Msgid-Bugs-To: \n"
"PO-Revision-Date: 2017-07-13 12:10-0500\n"
"Language-Team: Spanish\n"
"Language: es\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Plural-Forms: nplurals=2; plural=n != 1;\n"
"Last-Translator: Translator <test@example.com>\n"
"X-Generator: Poedit 2.0.2\n"
msgid "String with some <strong>emphasis</strong>"
msgid_plural "String with lots of <strong>emphasis</strong>"
msgstr[0] "Translated string with some <strong>emphasis</strong>"
msgstr[1] "Translated string with lots of <strong>emphasis</strong>"
msgid "String with a legitimate < use"
msgid_plural "String with lots of < > uses"
msgstr[0] "Translated string with a legitimate < use"
msgstr[1] "Translated string with lots of < > uses"
......@@ -73,9 +73,6 @@ msgid_plural "Branches"
msgstr[0] "Rama"
msgstr[1] "Ramas"
msgid "Branch <strong>%{branch_name}</strong> was created. To set up auto deploy, choose a GitLab CI Yaml template and commit your changes. %{link_to_autodeploy_doc}"
msgstr "La rama <strong>%{branch_name}</strong> fue creada. Para configurar el auto despliegue, escoge una plantilla Yaml para GitLab CI y envía tus cambios. %{link_to_autodeploy_doc}"
msgid "BranchSwitcherPlaceholder|Search branches"
msgstr "Buscar ramas"
......
......@@ -6,7 +6,7 @@ require 'simple_po_parser'
# Disabling this cop to allow for multi-language examples in comments
# rubocop:disable Style/AsciiComments
RSpec.describe Gitlab::I18n::PoLinter do
let(:linter) { described_class.new(po_path) }
let(:linter) { described_class.new(po_path: po_path, html_todolist: {}) }
let(:po_path) { 'spec/fixtures/valid.po' }
def fake_translation(msgid:, translation:, plural_id: nil, plurals: [])
......@@ -23,8 +23,9 @@ RSpec.describe Gitlab::I18n::PoLinter do
end
Gitlab::I18n::TranslationEntry.new(
data,
plurals.size + 1
entry_data: data,
nplurals: plurals.size + 1,
html_allowed: nil
)
end
......@@ -145,6 +146,67 @@ RSpec.describe Gitlab::I18n::PoLinter do
expect(errors[message_id]).to include(expected_error)
end
end
context 'when an entry contains html' do
let(:po_path) { 'spec/fixtures/potential_html.po' }
it 'presents an error for each component containing angle brackets' do
message_id = 'String with some <strong>emphasis</strong>'
expect(errors[message_id]).to match_array [
a_string_starting_with('contains < or >.'),
a_string_starting_with('plural id contains < or >.'),
a_string_starting_with('translation contains < or >.')
]
end
end
context 'when an entry contains html on the todolist' do
subject(:linter) { described_class.new(po_path: po_path, html_todolist: todolist) }
let(:po_path) { 'spec/fixtures/potential_html.po' }
let(:todolist) do
{
'String with a legitimate < use' => {
'plural_id' => 'String with lots of < > uses',
'translations' => [
'Translated string with a legitimate < use',
'Translated string with lots of < > uses'
]
}
}
end
it 'does not present an error' do
message_id = 'String with a legitimate < use'
expect(errors[message_id]).to be_nil
end
end
context 'when an entry on the html todolist has changed' do
subject(:linter) { described_class.new(po_path: po_path, html_todolist: todolist) }
let(:po_path) { 'spec/fixtures/potential_html.po' }
let(:todolist) do
{
'String with a legitimate < use' => {
'plural_id' => 'String with lots of < > uses',
'translations' => [
'Translated string with a different legitimate < use',
'Translated string with lots of < > uses'
]
}
}
end
it 'presents an error for the changed component' do
message_id = 'String with a legitimate < use'
expect(errors[message_id])
.to include a_string_starting_with('translation contains < or >.')
end
end
end
describe '#parse_po' do
......@@ -200,6 +262,7 @@ RSpec.describe Gitlab::I18n::PoLinter do
expect(linter).to receive(:validate_number_of_plurals).with([], fake_entry)
expect(linter).to receive(:validate_unescaped_chars).with([], fake_entry)
expect(linter).to receive(:validate_translation).with([], fake_entry)
expect(linter).to receive(:validate_html).with([], fake_entry)
linter.validate_entry(fake_entry)
end
......@@ -212,8 +275,9 @@ RSpec.describe Gitlab::I18n::PoLinter do
allow(linter).to receive(:metadata_entry).and_return(fake_metadata)
fake_entry = Gitlab::I18n::TranslationEntry.new(
{ msgid: 'the singular', msgid_plural: 'the plural', 'msgstr[0]' => 'the singular' },
2
entry_data: { msgid: 'the singular', msgid_plural: 'the plural', 'msgstr[0]' => 'the singular' },
nplurals: 2,
html_allowed: nil
)
errors = []
......
......@@ -6,7 +6,7 @@ RSpec.describe Gitlab::I18n::TranslationEntry do
describe '#singular_translation' do
it 'returns the normal `msgstr` for translations without plural' do
data = { msgid: 'Hello world', msgstr: 'Bonjour monde' }
entry = described_class.new(data, 2)
entry = described_class.new(entry_data: data, nplurals: 2, html_allowed: nil)
expect(entry.singular_translation).to eq('Bonjour monde')
end
......@@ -18,7 +18,7 @@ RSpec.describe Gitlab::I18n::TranslationEntry do
'msgstr[0]' => 'Bonjour monde',
'msgstr[1]' => 'Bonjour mondes'
}
entry = described_class.new(data, 2)
entry = described_class.new(entry_data: data, nplurals: 2, html_allowed: nil)
expect(entry.singular_translation).to eq('Bonjour monde')
end
......@@ -27,7 +27,7 @@ RSpec.describe Gitlab::I18n::TranslationEntry do
describe '#all_translations' do
it 'returns all translations for singular translations' do
data = { msgid: 'Hello world', msgstr: 'Bonjour monde' }
entry = described_class.new(data, 2)
entry = described_class.new(entry_data: data, nplurals: 2, html_allowed: nil)
expect(entry.all_translations).to eq(['Bonjour monde'])
end
......@@ -39,7 +39,7 @@ RSpec.describe Gitlab::I18n::TranslationEntry do
'msgstr[0]' => 'Bonjour monde',
'msgstr[1]' => 'Bonjour mondes'
}
entry = described_class.new(data, 2)
entry = described_class.new(entry_data: data, nplurals: 2, html_allowed: nil)
expect(entry.all_translations).to eq(['Bonjour monde', 'Bonjour mondes'])
end
......@@ -52,7 +52,7 @@ RSpec.describe Gitlab::I18n::TranslationEntry do
msgid_plural: 'Hello worlds',
'msgstr[0]' => 'Bonjour monde'
}
entry = described_class.new(data, 1)
entry = described_class.new(entry_data: data, nplurals: 1, html_allowed: nil)
expect(entry.plural_translations).to eq(['Bonjour monde'])
end
......@@ -65,7 +65,7 @@ RSpec.describe Gitlab::I18n::TranslationEntry do
'msgstr[1]' => 'Bonjour mondes',
'msgstr[2]' => 'Bonjour tous les mondes'
}
entry = described_class.new(data, 3)
entry = described_class.new(entry_data: data, nplurals: 3, html_allowed: nil)
expect(entry.plural_translations).to eq(['Bonjour mondes', 'Bonjour tous les mondes'])
end
......@@ -77,7 +77,7 @@ RSpec.describe Gitlab::I18n::TranslationEntry do
msgid: 'hello world',
msgstr: 'hello'
}
entry = described_class.new(data, 2)
entry = described_class.new(entry_data: data, nplurals: 2, html_allowed: nil)
expect(entry).to have_singular_translation
end
......@@ -89,7 +89,7 @@ RSpec.describe Gitlab::I18n::TranslationEntry do
"msgstr[0]" => 'hello world',
"msgstr[1]" => 'hello worlds'
}
entry = described_class.new(data, 2)
entry = described_class.new(entry_data: data, nplurals: 2, html_allowed: nil)
expect(entry).to have_singular_translation
end
......@@ -100,7 +100,7 @@ RSpec.describe Gitlab::I18n::TranslationEntry do
msgid_plural: 'hello worlds',
"msgstr[0]" => 'hello worlds'
}
entry = described_class.new(data, 1)
entry = described_class.new(entry_data: data, nplurals: 1, html_allowed: nil)
expect(entry).not_to have_singular_translation
end
......@@ -109,7 +109,7 @@ RSpec.describe Gitlab::I18n::TranslationEntry do
describe '#msgid_contains_newlines' do
it 'is true when the msgid is an array' do
data = { msgid: %w(hello world) }
entry = described_class.new(data, 2)
entry = described_class.new(entry_data: data, nplurals: 2, html_allowed: nil)
expect(entry.msgid_has_multiple_lines?).to be_truthy
end
......@@ -118,7 +118,7 @@ RSpec.describe Gitlab::I18n::TranslationEntry do
describe '#plural_id_contains_newlines' do
it 'is true when the msgid is an array' do
data = { msgid_plural: %w(hello world) }
entry = described_class.new(data, 2)
entry = described_class.new(entry_data: data, nplurals: 2, html_allowed: nil)
expect(entry.plural_id_has_multiple_lines?).to be_truthy
end
......@@ -127,7 +127,7 @@ RSpec.describe Gitlab::I18n::TranslationEntry do
describe '#translations_contain_newlines' do
it 'is true when the msgid is an array' do
data = { msgstr: %w(hello world) }
entry = described_class.new(data, 2)
entry = described_class.new(entry_data: data, nplurals: 2, html_allowed: nil)
expect(entry.translations_have_multiple_lines?).to be_truthy
end
......@@ -135,7 +135,7 @@ RSpec.describe Gitlab::I18n::TranslationEntry do
describe '#contains_unescaped_chars' do
let(:data) { { msgid: '' } }
let(:entry) { described_class.new(data, 2) }
let(:entry) { described_class.new(entry_data: data, nplurals: 2, html_allowed: nil) }
it 'is true when the msgid is an array' do
string = '「100%確定」'
......@@ -177,7 +177,7 @@ RSpec.describe Gitlab::I18n::TranslationEntry do
describe '#msgid_contains_unescaped_chars' do
it 'is true when the msgid contains a `%`' do
data = { msgid: '「100%確定」' }
entry = described_class.new(data, 2)
entry = described_class.new(entry_data: data, nplurals: 2, html_allowed: nil)
expect(entry).to receive(:contains_unescaped_chars?).and_call_original
expect(entry.msgid_contains_unescaped_chars?).to be_truthy
......@@ -187,7 +187,7 @@ RSpec.describe Gitlab::I18n::TranslationEntry do
describe '#plural_id_contains_unescaped_chars' do
it 'is true when the plural msgid contains a `%`' do
data = { msgid_plural: '「100%確定」' }
entry = described_class.new(data, 2)
entry = described_class.new(entry_data: data, nplurals: 2, html_allowed: nil)
expect(entry).to receive(:contains_unescaped_chars?).and_call_original
expect(entry.plural_id_contains_unescaped_chars?).to be_truthy
......@@ -197,10 +197,136 @@ RSpec.describe Gitlab::I18n::TranslationEntry do
describe '#translations_contain_unescaped_chars' do
it 'is true when the translation contains a `%`' do
data = { msgstr: '「100%確定」' }
entry = described_class.new(data, 2)
entry = described_class.new(entry_data: data, nplurals: 2, html_allowed: nil)
expect(entry).to receive(:contains_unescaped_chars?).and_call_original
expect(entry.translations_contain_unescaped_chars?).to be_truthy
end
end
describe '#msgid_contains_potential_html?' do
subject(:entry) { described_class.new(entry_data: data, nplurals: 2, html_allowed: nil) }
context 'when there are no angle brackets in the msgid' do
let(:data) { { msgid: 'String with no brackets' } }
it 'returns false' do
expect(entry.msgid_contains_potential_html?).to be_falsey
end
end
context 'when there are angle brackets in the msgid' do
let(:data) { { msgid: 'String with <strong> tag' } }
it 'returns true' do
expect(entry.msgid_contains_potential_html?).to be_truthy
end
end
end
describe '#plural_id_contains_potential_html?' do
subject(:entry) { described_class.new(entry_data: data, nplurals: 2, html_allowed: nil) }
context 'when there are no angle brackets in the plural_id' do
let(:data) { { msgid_plural: 'String with no brackets' } }
it 'returns false' do
expect(entry.plural_id_contains_potential_html?).to be_falsey
end
end
context 'when there are angle brackets in the plural_id' do
let(:data) { { msgid_plural: 'This string has a <strong>' } }
it 'returns true' do
expect(entry.plural_id_contains_potential_html?).to be_truthy
end
end
end
describe '#translations_contain_potential_html?' do
subject(:entry) { described_class.new(entry_data: data, nplurals: 2, html_allowed: nil) }
context 'when there are no angle brackets in the translations' do
let(:data) { { msgstr: 'This string has no angle brackets' } }
it 'returns false' do
expect(entry.translations_contain_potential_html?).to be_falsey
end
end
context 'when there are angle brackets in the translations' do
let(:data) { { msgstr: 'This string has a <strong>' } }
it 'returns true' do
expect(entry.translations_contain_potential_html?).to be_truthy
end
end
end
describe '#msgid_html_allowed?' do
subject(:entry) do
described_class.new(entry_data: { msgid: 'String with a <strong>' }, nplurals: 2, html_allowed: html_todo)
end
context 'when the html in the string is in the todolist' do
let(:html_todo) { { 'plural_id' => nil, 'translations' => [] } }
it 'returns true' do
expect(entry.msgid_html_allowed?).to be true
end
end
context 'when the html in the string is not in the todolist' do
let(:html_todo) { nil }
it 'returns false' do
expect(entry.msgid_html_allowed?).to be false
end
end
end
describe '#plural_id_html_allowed?' do
subject(:entry) do
described_class.new(entry_data: { msgid_plural: 'String with many <strong>' }, nplurals: 2, html_allowed: html_todo)
end
context 'when the html in the string is in the todolist' do
let(:html_todo) { { 'plural_id' => 'String with many <strong>', 'translations' => [] } }
it 'returns true' do
expect(entry.plural_id_html_allowed?).to be true
end
end
context 'when the html in the string is not in the todolist' do
let(:html_todo) { { 'plural_id' => 'String with some <strong>', 'translations' => [] } }
it 'returns false' do
expect(entry.plural_id_html_allowed?).to be false
end
end
end
describe '#translations_html_allowed?' do
subject(:entry) do
described_class.new(entry_data: { msgstr: 'String with a <strong>' }, nplurals: 2, html_allowed: html_todo)
end
context 'when the html in the string is in the todolist' do
let(:html_todo) { { 'plural_id' => nil, 'translations' => ['String with a <strong>'] } }
it 'returns true' do
expect(entry.translations_html_allowed?).to be true
end
end
context 'when the html in the string is not in the todolist' do
let(:html_todo) { { 'plural_id' => nil, 'translations' => ['String with a different <strong>'] } }
it 'returns false' do
expect(entry.translations_html_allowed?).to be false
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment