Commit d76b4c3b authored by Dylan Griffith's avatar Dylan Griffith

Application limit for ES indexed field length

Using the plan_limit table we can down store a limit for the maximum
size of any fields being indexed in Elasticsearch. This is for
https://gitlab.com/gitlab-org/gitlab/issues/201826 . Any strings above
the length limit will be truncated down to the limit.

We default to 0 which means unlimited so this has no affect for
self-managed customers. On GitLab.com we will likely want to set this to
around 20k as a sensible upper limit on what we should store.
parent 2e33d3e6
# frozen_string_literal: true
class AddElasticsearchIndexedFieldLengthLimitToApplicationSettings < ActiveRecord::Migration[6.0]
DOWNTIME = false
def up
add_column :application_settings, :elasticsearch_indexed_field_length_limit, :integer, null: false, default: 0
if Gitlab.com?
execute 'UPDATE application_settings SET elasticsearch_indexed_field_length_limit = 20000'
end
end
def down
remove_column :application_settings, :elasticsearch_indexed_field_length_limit
end
end
......@@ -343,6 +343,7 @@ ActiveRecord::Schema.define(version: 2020_02_06_111847) do
t.boolean "force_pages_access_control", default: false, null: false
t.boolean "updating_name_disabled_for_users", default: false, null: false
t.integer "instance_administrators_group_id"
t.integer "elasticsearch_indexed_field_length_limit", default: 0, null: false
t.index ["custom_project_templates_group_id"], name: "index_application_settings_on_custom_project_templates_group_id"
t.index ["file_template_project_id"], name: "index_application_settings_on_file_template_project_id"
t.index ["instance_administration_project_id"], name: "index_applicationsettings_on_instance_administration_project_id"
......
......@@ -86,3 +86,22 @@ Plan.default.limits.update!(ci_active_jobs: 500)
```
NOTE: **Note:** Set the limit to `0` to disable it.
## Advanced Global Search limits
### Maximum field length
> [Introduced](https://gitlab.com/gitlab-org/gitlab/issues/201826) in GitLab 12.8.
You can set a limit on the content of text fields indexed for Global Search.
Setting a maximum helps to reduce the load of the indexing processes. If any
text field exceeds this limit then the text will be truncated to this number of
characters and the rest will not be indexed and hence will not be searchable.
- On GitLab.com this is limited to 20000 characters
- For self-hosted installations it is unlimited by default
This limit can be configured for self hosted installations when [enabling
Elasticsearch](../integration/elasticsearch.md#enabling-elasticsearch).
NOTE: **Note:** Set the limit to `0` to disable it.
......@@ -150,6 +150,7 @@ The following Elasticsearch settings are available:
| `AWS Region` | The AWS region your Elasticsearch service is located in. |
| `AWS Access Key` | The AWS access key. |
| `AWS Secret Access Key` | The AWS secret access key. |
| `Maximum field length` | See [the explanation in instance limits.](../administration/instance_limits.md#maximum-field-length). |
### Limiting namespaces and projects
......
......@@ -27,6 +27,7 @@ module EE
:elasticsearch_aws_secret_access_key,
:elasticsearch_indexing,
:elasticsearch_replicas,
:elasticsearch_indexed_field_length_limit,
:elasticsearch_search,
:elasticsearch_shards,
:elasticsearch_url,
......
......@@ -56,6 +56,10 @@ module EE
presence: { message: "can't be blank when using aws hosted elasticsearch" },
if: ->(setting) { setting.elasticsearch_indexing? && setting.elasticsearch_aws? }
validates :elasticsearch_indexed_field_length_limit,
presence: true,
numericality: { only_integer: true, greater_than_or_equal_to: 0 }
validates :email_additional_text,
allow_blank: true,
length: { maximum: EMAIL_ADDITIONAL_TEXT_CHARACTER_LIMIT }
......@@ -85,6 +89,7 @@ module EE
elasticsearch_aws_region: ENV['ELASTIC_REGION'] || 'us-east-1',
elasticsearch_replicas: 1,
elasticsearch_shards: 5,
elasticsearch_indexed_field_length_limit: 0,
elasticsearch_url: ENV['ELASTIC_URL'] || 'http://localhost:9200',
email_additional_text: nil,
lock_memberships_to_ldap: false,
......
......@@ -57,6 +57,12 @@
= _('How many replicas each Elasticsearch shard has.')
= recreate_index_text
.form-group
= f.label :elasticsearch_indexed_field_length_limit, _('Maximum field length'), class: 'label-bold'
= f.number_field :elasticsearch_indexed_field_length_limit, value: @application_setting.elasticsearch_indexed_field_length_limit, class: 'form-control'
.form-text.text-muted
= _('If any indexed field exceeds this limit it will be truncated to this number of characters and the rest will not be indexed or searchable. This does not apply to repository and wiki indexing. Setting this to 0 means it is unlimited.')
.sub-section
%h4= _('Elasticsearch indexing restrictions')
.form-group
......
---
title: Add application limit for ES indexed field length
merge_request: 24345
author:
type: added
......@@ -41,10 +41,21 @@ module Elastic
# them to raise exceptions. When this happens, we still want the remainder
# of the object to be saved, so silently swallow the errors
def safely_read_attribute_for_elasticsearch(attr_name)
target.send(attr_name) # rubocop:disable GitlabSecurity/PublicSend
result = target.send(attr_name) # rubocop:disable GitlabSecurity/PublicSend
apply_field_limit(result)
rescue => err
target.logger.warn("Elasticsearch failed to read #{attr_name} for #{target.class} #{target.id}: #{err}")
nil
end
def apply_field_limit(result)
return result unless result.is_a? String
limit = Gitlab::CurrentSettings.elasticsearch_indexed_field_length_limit
return result unless limit > 0
result[0, limit]
end
end
end
......@@ -68,6 +68,7 @@ describe 'Admin updates EE-only settings' do
check 'Search with Elasticsearch enabled'
fill_in 'Number of Elasticsearch shards', with: '120'
fill_in 'Number of Elasticsearch replicas', with: '2'
fill_in 'Maximum field length', with: '100000'
click_button 'Save changes'
end
......@@ -77,6 +78,7 @@ describe 'Admin updates EE-only settings' do
expect(current_settings.elasticsearch_search).to be_truthy
expect(current_settings.elasticsearch_shards).to eq(120)
expect(current_settings.elasticsearch_replicas).to eq(2)
expect(current_settings.elasticsearch_indexed_field_length_limit).to eq(100000)
expect(page).to have_content 'Application settings saved successfully'
end
end
......
......@@ -41,6 +41,12 @@ describe ApplicationSetting do
it { is_expected.not_to allow_value(1.1).for(:elasticsearch_replicas) }
it { is_expected.not_to allow_value(-1).for(:elasticsearch_replicas) }
it { is_expected.to allow_value(10).for(:elasticsearch_indexed_field_length_limit) }
it { is_expected.to allow_value(0).for(:elasticsearch_indexed_field_length_limit) }
it { is_expected.not_to allow_value(nil).for(:elasticsearch_indexed_field_length_limit) }
it { is_expected.not_to allow_value(1.1).for(:elasticsearch_indexed_field_length_limit) }
it { is_expected.not_to allow_value(-1).for(:elasticsearch_indexed_field_length_limit) }
it { is_expected.to allow_value(nil).for(:required_instance_ci_template) }
it { is_expected.not_to allow_value("").for(:required_instance_ci_template) }
it { is_expected.not_to allow_value(" ").for(:required_instance_ci_template) }
......
......@@ -125,6 +125,32 @@ describe Issue, :elastic do
expect(issue.__elasticsearch__.as_indexed_json).to eq(expected_hash)
end
context 'field length limits' do
context 'when there is an elasticsearch_indexed_field_length limit' do
it 'truncates to the default plan limit' do
stub_ee_application_setting(elasticsearch_indexed_field_length_limit: 10)
issue = create :issue, description: 'The description is too long'
indexed_json = issue.__elasticsearch__.as_indexed_json
expect(indexed_json['description']).to eq('The descri')
end
end
context 'when the elasticsearch_indexed_field_length limit is 0' do
it 'does not truncate the fields' do
stub_ee_application_setting(elasticsearch_indexed_field_length_limit: 0)
issue = create :issue, description: 'The description is too long'
indexed_json = issue.__elasticsearch__.as_indexed_json
expect(indexed_json['description']).to eq('The description is too long')
end
end
end
it_behaves_like 'no results when the user cannot read cross project' do
let(:record1) { create(:issue, project: project, title: 'test-issue') }
let(:record2) { create(:issue, project: project2, title: 'test-issue') }
......
......@@ -10106,6 +10106,9 @@ msgstr ""
msgid "Identities"
msgstr ""
msgid "If any indexed field exceeds this limit it will be truncated to this number of characters and the rest will not be indexed or searchable. This does not apply to repository and wiki indexing. Setting this to 0 means it is unlimited."
msgstr ""
msgid "If any job surpasses this timeout threshold, it will be marked as failed. Human readable time input language is accepted like \"1 hour\". Values without specification represent seconds."
msgstr ""
......@@ -11713,6 +11716,9 @@ msgstr ""
msgid "Maximum duration of a session."
msgstr ""
msgid "Maximum field length"
msgstr ""
msgid "Maximum job timeout"
msgstr ""
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment