Commit d8b01969 authored by Dmitry Gruzd's avatar Dmitry Gruzd

Merge branch '5468-migration-to-copy-merge-requests-to-new-index-and-query-new-index' into 'master'

Migration to move merge requests to separate Elasticsearch index

See merge request gitlab-org/gitlab!60586
parents 5c5eefb6 b4296d0d
......@@ -102,6 +102,11 @@ module EE
grouping_columns << ::MergeRequest::Metrics.review_time_field if sort.to_s == 'review_time_desc'
grouping_columns
end
# override
def use_separate_indices?
Elastic::DataMigrationService.migration_has_finished?(:migrate_merge_requests_to_separate_index)
end
end
override :mergeable?
......
......@@ -30,6 +30,10 @@ class ElasticDeleteProjectWorker
index_names << helper.standalone_indices_proxies(target_classes: [Note]).map(&:index_name)
end
if Elastic::DataMigrationService.migration_has_finished?(:migrate_merge_requests_to_separate_index)
index_names << helper.standalone_indices_proxies(target_classes: [MergeRequest]).map(&:index_name)
end
index_names
end
......
---
title: Migration to move merge requests to separate Elasticsearch index
merge_request: 60586
author:
type: performance
# frozen_string_literal: true
class MigrateMergeRequestsToSeparateIndex < Elastic::Migration
include Elastic::MigrationHelper
pause_indexing!
batched!
space_requirements!
throttle_delay 1.minute
MAX_ATTEMPTS_PER_SLICE = 30
def migrate
# On initial batch we only create index
if migration_state[:slice].blank?
reindexing_cleanup! # support retries
log "Create standalone #{document_type_plural} index under #{new_index_name}"
helper.create_standalone_indices(target_classes: [MergeRequest])
options = {
slice: 0,
retry_attempt: 0,
max_slices: get_number_of_shards
}
set_migration_state(options)
return
end
retry_attempt = migration_state[:retry_attempt].to_i
slice = migration_state[:slice]
task_id = migration_state[:task_id]
max_slices = migration_state[:max_slices]
if retry_attempt >= MAX_ATTEMPTS_PER_SLICE
fail_migration_halt_error!(retry_attempt: retry_attempt)
return
end
return unless slice < max_slices
if task_id
log "Checking reindexing status for slice:#{slice} | task_id:#{task_id}"
if reindexing_completed?(task_id: task_id)
log "Reindexing is completed for slice:#{slice} | task_id:#{task_id}"
set_migration_state(
slice: slice + 1,
task_id: nil,
retry_attempt: 0, # We reset retry_attempt for a next slice
max_slices: max_slices
)
else
log "Reindexing is still in progress for slice:#{slice} | task_id:#{task_id}"
end
return
end
log "Launching reindexing for slice:#{slice} | max_slices:#{max_slices}"
task_id = reindex(slice: slice, max_slices: max_slices)
log "Reindexing for slice:#{slice} | max_slices:#{max_slices} is started with task_id:#{task_id}"
set_migration_state(
slice: slice,
task_id: task_id,
max_slices: max_slices
)
rescue StandardError => e
log "migrate failed, increasing migration_state for slice:#{slice} retry_attempt:#{retry_attempt} error:#{e.message}"
set_migration_state(
slice: slice,
task_id: nil,
retry_attempt: retry_attempt + 1,
max_slices: max_slices
)
raise e
end
def completed?
helper.refresh_index(index_name: new_index_name)
original_count = original_documents_count
new_count = new_documents_count
log "Checking to see if migration is completed based on index counts: original_count:#{original_count}, new_count:#{new_count}"
original_count == new_count
end
def space_required_bytes
# merge_requests index on GitLab.com takes at most 0.2% of the main index storage
# this migration will require 5 times that value to give a buffer
(helper.index_size_bytes * 0.010).ceil
end
private
def document_type
'merge_request'
end
def document_type_fields
%w(
type
id
iid
target_branch
source_branch
title
description
created_at
updated_at
state
merge_status
source_project_id
target_project_id
project_id
author_id
visibility_level
merge_requests_access_level
)
end
end
# frozen_string_literal: true
module Elastic
module Latest
module MergeRequestConfig
# To obtain settings and mappings methods
extend Elasticsearch::Model::Indexing::ClassMethods
extend Elasticsearch::Model::Naming::ClassMethods
self.document_type = 'doc'
self.index_name = [Rails.application.class.module_parent_name.downcase, Rails.env, 'merge_requests'].join('-')
settings Elastic::Latest::Config.settings.to_hash.deep_merge(
index: {
number_of_shards: Elastic::AsJSON.new { Elastic::IndexSetting[self.index_name].number_of_shards },
number_of_replicas: Elastic::AsJSON.new { Elastic::IndexSetting[self.index_name].number_of_replicas }
}
)
mappings dynamic: 'strict' do
indexes :type, type: :keyword
indexes :id, type: :integer
indexes :iid, type: :integer
indexes :title, type: :text,
index_options: 'positions'
indexes :description, type: :text,
index_options: 'positions'
indexes :state, type: :keyword
indexes :project_id, type: :integer
indexes :author_id, type: :integer
indexes :target_branch, type: :keyword
indexes :source_branch, type: :keyword
indexes :merge_status, type: :keyword
indexes :source_project_id, type: :integer
indexes :target_project_id, type: :integer
indexes :created_at, type: :date
indexes :updated_at, type: :date
indexes :visibility_level, type: :integer
indexes :merge_requests_access_level, type: :integer
end
end
end
end
......@@ -32,6 +32,14 @@ module Elastic
data.merge(generic_attributes)
end
def generic_attributes
if Elastic::DataMigrationService.migration_has_finished?(:migrate_merge_requests_to_separate_index)
super.except('join_field')
else
super
end
end
end
end
end
......@@ -15,7 +15,8 @@ module Gitlab
ES_SEPARATE_CLASSES = [
Issue,
Note
Note,
MergeRequest
].freeze
attr_reader :version, :client
......
......@@ -10,6 +10,7 @@ RSpec.describe AddNewDataToMergeRequestsDocuments, :elastic, :sidekiq_inline do
before do
stub_ee_application_setting(elasticsearch_search: true, elasticsearch_indexing: true)
set_elasticsearch_migration_to :add_permissions_data_to_notes_documents, including: false
# ensure merge_requests are indexed
merge_requests
......
# frozen_string_literal: true
require 'spec_helper'
require File.expand_path('ee/elastic/migrate/20210429154500_migrate_merge_requests_to_separate_index.rb')
RSpec.describe MigrateMergeRequestsToSeparateIndex do
let(:version) { 20210429154500 }
let(:migration) { described_class.new(version) }
let(:index_name) { "#{es_helper.target_name}-merge_requests" }
let(:helper) { Gitlab::Elastic::Helper.new }
before do
stub_ee_application_setting(elasticsearch_search: true, elasticsearch_indexing: true)
allow(migration).to receive(:helper).and_return(helper)
end
describe 'migration_options' do
it 'has migration options set', :aggregate_failures do
expect(migration.batched?).to be_truthy
expect(migration.throttle_delay).to eq(1.minute)
expect(migration.pause_indexing?).to be_truthy
expect(migration.space_requirements?).to be_truthy
end
end
describe '.migrate', :elastic, :clean_gitlab_redis_shared_state do
before do
set_elasticsearch_migration_to :migrate_merge_requests_to_separate_index, including: false
end
context 'initial launch' do
before do
es_helper.delete_index(index_name: es_helper.target_index_name(target: index_name))
end
it 'creates an index and sets migration_state' do
expect { migration.migrate }.to change { es_helper.alias_exists?(name: index_name) }.from(false).to(true)
expect(migration.migration_state).to include(slice: 0, max_slices: 5)
end
end
context 'batch run' do
it 'sets migration_state task_id' do
allow(migration).to receive(:reindex).and_return('task_id')
migration.set_migration_state(slice: 0, max_slices: 5)
migration.migrate
expect(migration.migration_state).to include(slice: 0, max_slices: 5, task_id: 'task_id')
end
it 'sets next slice after task check' do
allow(migration).to receive(:reindexing_completed?).and_return(true)
migration.set_migration_state(slice: 0, max_slices: 5, task_id: 'task_id')
migration.migrate
expect(migration.migration_state).to include(slice: 1, max_slices: 5, task_id: nil)
end
it 'resets retry_attempt for a next slice' do
allow(migration).to receive(:reindexing_completed?).and_return(true)
migration.set_migration_state(slice: 0, max_slices: 5, retry_attempt: 5, task_id: 'task_id')
migration.migrate
expect(migration.migration_state).to match(slice: 1, max_slices: 5, retry_attempt: 0, task_id: nil)
end
context 'reindexing is still in progress' do
before do
allow(migration).to receive(:reindexing_completed?).and_return(false)
end
it 'does nothing' do
migration.set_migration_state(slice: 0, max_slices: 5, task_id: 'task_id')
migration.migrate
expect(migration).not_to receive(:reindex)
end
end
context 'with merge_requests in elastic' do
# Create merge_requests on different projects to ensure they are spread across
# all shards. If they all end up in 1 ES shard then they'll be migrated
# in a single slice.
let!(:merge_requests) { create_list(:merge_request, 10, :unique_branches) }
before do
ensure_elasticsearch_index!
end
it 'migrates all merge_requests' do
slices = 2
migration.set_migration_state(slice: 0, max_slices: slices)
migration.migrate
50.times do |i| # Max 0.5s waiting
break if migration.completed?
sleep 0.01
migration.migrate
end
expect(migration.completed?).to be_truthy
expect(es_helper.documents_count(index_name: "#{es_helper.target_name}-merge_requests")).to eq(merge_requests.count)
end
end
end
context 'failed run' do
context 'exception is raised' do
before do
allow(migration).to receive(:reindex).and_raise(StandardError)
end
it 'increases retry_attempt' do
migration.set_migration_state(slice: 0, max_slices: 2, retry_attempt: 1)
expect { migration.migrate }.to raise_error(StandardError)
expect(migration.migration_state).to match(slice: 0, max_slices: 2, retry_attempt: 2, task_id: nil)
end
it 'fails the migration after too many attempts' do
migration.set_migration_state(slice: 0, max_slices: 2, retry_attempt: 30)
migration.migrate
expect(migration.migration_state).to match(slice: 0, max_slices: 2, retry_attempt: 30, halted: true, halted_indexing_unpaused: false)
expect(migration).not_to receive(:reindex)
end
end
context 'elasticsearch failures' do
context 'total is not equal' do
before do
allow(helper).to receive(:task_status).and_return({ "completed" => true, "response" => { "total" => 60, "updated" => 0, "created" => 45, "deleted" => 0, "failures" => [] } })
end
it 'raises an error' do
migration.set_migration_state(slice: 0, max_slices: 2, task_id: 'task_id')
expect { migration.migrate }.to raise_error(/total is not equal/)
expect(migration.migration_state[:task_id]).to be_nil
end
end
context 'reindexing failues' do
before do
allow(helper).to receive(:task_status).with(task_id: 'task_id').and_return({ "completed" => true, "response" => { "total" => 60, "updated" => 0, "created" => 0, "deleted" => 0, "failures" => [{ "type": "es_rejected_execution_exception" }] } })
end
it 'raises an error' do
migration.set_migration_state(slice: 0, max_slices: 2, task_id: 'task_id')
expect { migration.migrate }.to raise_error(/failed with/)
expect(migration.migration_state[:task_id]).to be_nil
end
end
end
end
end
describe '.completed?' do
subject { migration.completed? }
let(:original_count) { 5 }
before do
allow(helper).to receive(:refresh_index).and_return(true)
allow(migration).to receive(:original_documents_count).and_return(original_count)
allow(migration).to receive(:new_documents_count).and_return(new_count)
end
context 'counts are equal' do
let(:new_count) { original_count }
it 'returns true' do
is_expected.to be_truthy
end
end
context 'counts are not equal' do
let(:new_count) { original_count - 1 }
it 'returns true' do
is_expected.to be_falsey
end
end
end
describe 'space_required_bytes' do
subject { migration.space_required_bytes }
before do
allow(helper).to receive(:index_size_bytes).and_return(300)
end
it { is_expected.to eq(3) }
end
end
......@@ -71,6 +71,8 @@ RSpec.describe 'Admin updates EE-only settings' do
fill_in 'application_setting_elasticsearch_replicas[gitlab-test-issues]', with: '3'
fill_in 'application_setting_elasticsearch_shards[gitlab-test-notes]', with: '20'
fill_in 'application_setting_elasticsearch_replicas[gitlab-test-notes]', with: '4'
fill_in 'application_setting_elasticsearch_shards[gitlab-test-merge_requests]', with: '15'
fill_in 'application_setting_elasticsearch_replicas[gitlab-test-merge_requests]', with: '5'
fill_in 'Maximum file size indexed (KiB)', with: '5000'
fill_in 'Maximum field length', with: '100000'
......@@ -93,6 +95,8 @@ RSpec.describe 'Admin updates EE-only settings' do
expect(Elastic::IndexSetting['gitlab-test-issues'].number_of_replicas).to eq(3)
expect(Elastic::IndexSetting['gitlab-test-notes'].number_of_shards).to eq(20)
expect(Elastic::IndexSetting['gitlab-test-notes'].number_of_replicas).to eq(4)
expect(Elastic::IndexSetting['gitlab-test-merge_requests'].number_of_shards).to eq(15)
expect(Elastic::IndexSetting['gitlab-test-merge_requests'].number_of_replicas).to eq(5)
expect(current_settings.elasticsearch_indexed_file_size_limit_kb).to eq(5000)
expect(current_settings.elasticsearch_indexed_field_length_limit).to eq(100000)
......
# frozen_string_literal: true
require 'fast_spec_helper'
require_relative './config_shared_examples'
RSpec.describe Elastic::Latest::MergeRequestConfig do
describe '.document_type' do
it 'returns config' do
expect(described_class.document_type).to eq('doc')
end
end
describe '.settings' do
it_behaves_like 'config settings return correct values'
end
describe '.mappings' do
it 'returns config' do
expect(described_class.mapping).to be_a(Elasticsearch::Model::Indexing::Mappings)
end
end
end
......@@ -36,7 +36,8 @@ RSpec.describe ::Gitlab::Instrumentation::ElasticsearchTransport, :elastic, :req
allow(::Gitlab::PerformanceBar).to receive(:enabled_for_request?).and_return(true)
end
it 'parses and tracks the call details' do
it 'parses and tracks the call details', :use_clean_rails_memory_store_caching do
create(:merge_request, title: "cache warming MR") # Warm cache for checking migrations are finished
ensure_elasticsearch_index!
::Gitlab::SafeRequestStore.clear!
......
......@@ -90,10 +90,6 @@ RSpec.describe MergeRequest, :elastic do
).merge({
'state' => merge_request.state,
'type' => merge_request.es_type,
'join_field' => {
'name' => merge_request.es_type,
'parent' => merge_request.es_parent
},
'merge_requests_access_level' => ProjectFeature::ENABLED,
'visibility_level' => Gitlab::VisibilityLevel::INTERNAL,
'project_id' => merge_request.target_project.id
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment