Commit 6cdba731 authored by Sean McGivern's avatar Sean McGivern

Merge branch '338004-next-batch-not-null-strategy' into 'master'

Makes BackfillWorkItemTypeIdOnIssues migration more efficient

See merge request gitlab-org/gitlab!84213
parents 9c2e405f 10ba7bc3
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
class BackfillWorkItemTypeIdOnIssues < Gitlab::Database::Migration[1.0] class BackfillWorkItemTypeIdOnIssues < Gitlab::Database::Migration[1.0]
MIGRATION = 'BackfillWorkItemTypeIdForIssues' MIGRATION = 'BackfillWorkItemTypeIdForIssues'
BATCH_CLASS_NAME = 'BackfillIssueWorkItemTypeBatchingStrategy'
BATCH_SIZE = 10_000 BATCH_SIZE = 10_000
MAX_BATCH_SIZE = 30_000 MAX_BATCH_SIZE = 30_000
SUB_BATCH_SIZE = 100 SUB_BATCH_SIZE = 100
...@@ -27,7 +28,8 @@ class BackfillWorkItemTypeIdOnIssues < Gitlab::Database::Migration[1.0] ...@@ -27,7 +28,8 @@ class BackfillWorkItemTypeIdOnIssues < Gitlab::Database::Migration[1.0]
job_interval: INTERVAL, job_interval: INTERVAL,
batch_size: BATCH_SIZE, batch_size: BATCH_SIZE,
max_batch_size: MAX_BATCH_SIZE, max_batch_size: MAX_BATCH_SIZE,
sub_batch_size: SUB_BATCH_SIZE sub_batch_size: SUB_BATCH_SIZE,
batch_class_name: BATCH_CLASS_NAME
) )
end end
end end
......
# frozen_string_literal: true
class ReplaceWorkItemTypeBackfillNextBatchStrategy < Gitlab::Database::Migration[1.0]
JOB_CLASS_NAME = 'BackfillWorkItemTypeIdForIssues'
NEW_STRATEGY_CLASS = 'BackfillIssueWorkItemTypeBatchingStrategy'
OLD_STRATEGY_CLASS = 'PrimaryKeyBatchingStrategy'
class InlineBatchedMigration < ApplicationRecord
self.table_name = :batched_background_migrations
end
def up
InlineBatchedMigration.where(job_class_name: JOB_CLASS_NAME)
.update_all(batch_class_name: NEW_STRATEGY_CLASS)
end
def down
InlineBatchedMigration.where(job_class_name: JOB_CLASS_NAME)
.update_all(batch_class_name: OLD_STRATEGY_CLASS)
end
end
7897da66c2a941a6a09db6f62fa9069caef235603663077e5dd342a72ac5c7c3
\ No newline at end of file
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
module BatchingStrategies
# Batching class to use for back-filling issue's work_item_type_id for a single issue type.
# Batches will be scoped to records where the foreign key is NULL and only of a given issue type
#
# If no more batches exist in the table, returns nil.
class BackfillIssueWorkItemTypeBatchingStrategy < PrimaryKeyBatchingStrategy
def apply_additional_filters(relation, job_arguments:)
issue_type = job_arguments.first
relation.where(issue_type: issue_type)
end
end
end
end
end
...@@ -23,7 +23,7 @@ module Gitlab ...@@ -23,7 +23,7 @@ module Gitlab
quoted_column_name = model_class.connection.quote_column_name(column_name) quoted_column_name = model_class.connection.quote_column_name(column_name)
relation = model_class.where("#{quoted_column_name} >= ?", batch_min_value) relation = model_class.where("#{quoted_column_name} >= ?", batch_min_value)
relation = apply_additional_filters(relation) relation = apply_additional_filters(relation, job_arguments: job_arguments)
next_batch_bounds = nil next_batch_bounds = nil
relation.each_batch(of: batch_size, column: column_name) do |batch| # rubocop:disable Lint/UnreachableLoop relation.each_batch(of: batch_size, column: column_name) do |batch| # rubocop:disable Lint/UnreachableLoop
...@@ -40,12 +40,14 @@ module Gitlab ...@@ -40,12 +40,14 @@ module Gitlab
# #
# Example: # Example:
# #
# class TypeIsNotNull < PrimaryKeyBatchingStrategy # class MatchingType < PrimaryKeyBatchingStrategy
# def apply_additional_filters(relation) # def apply_additional_filters(relation, job_arguments:)
# relation.where.not(type: nil) # type = job_arguments.first
#
# relation.where(type: type)
# end # end
# end # end
def apply_additional_filters(relation) def apply_additional_filters(relation, job_arguments: [])
relation relation
end end
end end
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::BackfillIssueWorkItemTypeBatchingStrategy,
'#next_batch' do
# let! can't be used in migration specs because all tables but `work_item_types` are deleted after each spec
let!(:issue_type_enum) { { issue: 0, incident: 1, test_case: 2, requirement: 3, task: 4 } }
let!(:namespace) { table(:namespaces).create!(name: 'namespace', path: 'namespace') }
let!(:project) { table(:projects).create!(namespace_id: namespace.id) }
let!(:issues_table) { table(:issues) }
let!(:task_type) { table(:work_item_types).find_by!(namespace_id: nil, base_type: issue_type_enum[:task]) }
let!(:issue1) { issues_table.create!(project_id: project.id, issue_type: issue_type_enum[:issue]) }
let!(:task1) { issues_table.create!(project_id: project.id, issue_type: issue_type_enum[:task]) }
let!(:issue2) { issues_table.create!(project_id: project.id, issue_type: issue_type_enum[:issue]) }
let!(:issue3) { issues_table.create!(project_id: project.id, issue_type: issue_type_enum[:issue]) }
let!(:task2) { issues_table.create!(project_id: project.id, issue_type: issue_type_enum[:task]) }
let!(:incident1) { issues_table.create!(project_id: project.id, issue_type: issue_type_enum[:incident]) }
# test_case is EE only, but enum values exist on the FOSS model
let!(:test_case1) { issues_table.create!(project_id: project.id, issue_type: issue_type_enum[:test_case]) }
let!(:task3) do
issues_table.create!(project_id: project.id, issue_type: issue_type_enum[:task], work_item_type_id: task_type.id)
end
let!(:task4) { issues_table.create!(project_id: project.id, issue_type: issue_type_enum[:task]) }
let!(:batching_strategy) { described_class.new(connection: ActiveRecord::Base.connection) }
context 'when issue_type is issue' do
let(:job_arguments) { [issue_type_enum[:issue], 'irrelevant_work_item_id'] }
context 'when starting on the first batch' do
it 'returns the bounds of the next batch' do
batch_bounds = next_batch(issue1.id, 2)
expect(batch_bounds).to match_array([issue1.id, issue2.id])
end
end
context 'when additional batches remain' do
it 'returns the bounds of the next batch' do
batch_bounds = next_batch(issue2.id, 2)
expect(batch_bounds).to match_array([issue2.id, issue3.id])
end
end
context 'when on the final batch' do
it 'returns the bounds of the next batch' do
batch_bounds = next_batch(issue3.id, 2)
expect(batch_bounds).to match_array([issue3.id, issue3.id])
end
end
context 'when no additional batches remain' do
it 'returns nil' do
batch_bounds = next_batch(issue3.id + 1, 1)
expect(batch_bounds).to be_nil
end
end
end
context 'when issue_type is incident' do
let(:job_arguments) { [issue_type_enum[:incident], 'irrelevant_work_item_id'] }
context 'when starting on the first batch' do
it 'returns the bounds of the next batch with only one element' do
batch_bounds = next_batch(incident1.id, 2)
expect(batch_bounds).to match_array([incident1.id, incident1.id])
end
end
end
context 'when issue_type is requirement and there are no matching records' do
let(:job_arguments) { [issue_type_enum[:requirement], 'irrelevant_work_item_id'] }
context 'when starting on the first batch' do
it 'returns nil' do
batch_bounds = next_batch(1, 2)
expect(batch_bounds).to be_nil
end
end
end
context 'when issue_type is task' do
let(:job_arguments) { [issue_type_enum[:task], 'irrelevant_work_item_id'] }
context 'when starting on the first batch' do
it 'returns the bounds of the next batch' do
batch_bounds = next_batch(task1.id, 2)
expect(batch_bounds).to match_array([task1.id, task2.id])
end
end
context 'when additional batches remain' do
it 'returns the bounds of the next batch, does not skip records where FK is already set' do
batch_bounds = next_batch(task2.id, 2)
expect(batch_bounds).to match_array([task2.id, task3.id])
end
end
context 'when on the final batch' do
it 'returns the bounds of the next batch' do
batch_bounds = next_batch(task4.id, 2)
expect(batch_bounds).to match_array([task4.id, task4.id])
end
end
context 'when no additional batches remain' do
it 'returns nil' do
batch_bounds = next_batch(task4.id + 1, 1)
expect(batch_bounds).to be_nil
end
end
end
def next_batch(min_value, batch_size)
batching_strategy.next_batch(
:issues,
:id,
batch_min_value: min_value,
batch_size: batch_size,
job_arguments: job_arguments
)
end
end
...@@ -15,7 +15,7 @@ RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchi ...@@ -15,7 +15,7 @@ RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchi
context 'when starting on the first batch' do context 'when starting on the first batch' do
it 'returns the bounds of the next batch' do it 'returns the bounds of the next batch' do
batch_bounds = batching_strategy.next_batch(:namespaces, :id, batch_min_value: namespace1.id, batch_size: 3, job_arguments: nil) batch_bounds = batching_strategy.next_batch(:namespaces, :id, batch_min_value: namespace1.id, batch_size: 3, job_arguments: [])
expect(batch_bounds).to eq([namespace1.id, namespace3.id]) expect(batch_bounds).to eq([namespace1.id, namespace3.id])
end end
...@@ -23,7 +23,7 @@ RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchi ...@@ -23,7 +23,7 @@ RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchi
context 'when additional batches remain' do context 'when additional batches remain' do
it 'returns the bounds of the next batch' do it 'returns the bounds of the next batch' do
batch_bounds = batching_strategy.next_batch(:namespaces, :id, batch_min_value: namespace2.id, batch_size: 3, job_arguments: nil) batch_bounds = batching_strategy.next_batch(:namespaces, :id, batch_min_value: namespace2.id, batch_size: 3, job_arguments: [])
expect(batch_bounds).to eq([namespace2.id, namespace4.id]) expect(batch_bounds).to eq([namespace2.id, namespace4.id])
end end
...@@ -31,7 +31,7 @@ RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchi ...@@ -31,7 +31,7 @@ RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchi
context 'when on the final batch' do context 'when on the final batch' do
it 'returns the bounds of the next batch' do it 'returns the bounds of the next batch' do
batch_bounds = batching_strategy.next_batch(:namespaces, :id, batch_min_value: namespace4.id, batch_size: 3, job_arguments: nil) batch_bounds = batching_strategy.next_batch(:namespaces, :id, batch_min_value: namespace4.id, batch_size: 3, job_arguments: [])
expect(batch_bounds).to eq([namespace4.id, namespace4.id]) expect(batch_bounds).to eq([namespace4.id, namespace4.id])
end end
...@@ -39,7 +39,7 @@ RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchi ...@@ -39,7 +39,7 @@ RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchi
context 'when no additional batches remain' do context 'when no additional batches remain' do
it 'returns nil' do it 'returns nil' do
batch_bounds = batching_strategy.next_batch(:namespaces, :id, batch_min_value: namespace4.id + 1, batch_size: 1, job_arguments: nil) batch_bounds = batching_strategy.next_batch(:namespaces, :id, batch_min_value: namespace4.id + 1, batch_size: 1, job_arguments: [])
expect(batch_bounds).to be_nil expect(batch_bounds).to be_nil
end end
...@@ -48,8 +48,10 @@ RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchi ...@@ -48,8 +48,10 @@ RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchi
context 'additional filters' do context 'additional filters' do
let(:strategy_with_filters) do let(:strategy_with_filters) do
Class.new(described_class) do Class.new(described_class) do
def apply_additional_filters(relation) def apply_additional_filters(relation, job_arguments:)
relation.where.not(type: 'Project') min_id = job_arguments.first
relation.where.not(type: 'Project').where('id >= ?', min_id)
end end
end end
end end
...@@ -58,7 +60,7 @@ RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchi ...@@ -58,7 +60,7 @@ RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchi
let!(:namespace5) { namespaces.create!(name: 'batchtest5', path: 'batch-test5', type: 'Project') } let!(:namespace5) { namespaces.create!(name: 'batchtest5', path: 'batch-test5', type: 'Project') }
it 'applies additional filters' do it 'applies additional filters' do
batch_bounds = batching_strategy.next_batch(:namespaces, :id, batch_min_value: namespace4.id, batch_size: 3, job_arguments: nil) batch_bounds = batching_strategy.next_batch(:namespaces, :id, batch_min_value: namespace4.id, batch_size: 3, job_arguments: [1])
expect(batch_bounds).to eq([namespace4.id, namespace4.id]) expect(batch_bounds).to eq([namespace4.id, namespace4.id])
end end
......
...@@ -32,7 +32,8 @@ RSpec.describe BackfillWorkItemTypeIdOnIssues, :migration do ...@@ -32,7 +32,8 @@ RSpec.describe BackfillWorkItemTypeIdOnIssues, :migration do
interval: interval, interval: interval,
batch_size: described_class::BATCH_SIZE, batch_size: described_class::BATCH_SIZE,
max_batch_size: described_class::MAX_BATCH_SIZE, max_batch_size: described_class::MAX_BATCH_SIZE,
sub_batch_size: described_class::SUB_BATCH_SIZE sub_batch_size: described_class::SUB_BATCH_SIZE,
batch_class_name: described_class::BATCH_CLASS_NAME
) )
end end
end end
......
# frozen_string_literal: true
require 'spec_helper'
require_migration!
RSpec.describe ReplaceWorkItemTypeBackfillNextBatchStrategy, :migration do
describe '#up' do
it 'sets the new strategy for existing migrations' do
migrations = create_migrations(described_class::OLD_STRATEGY_CLASS, 2)
expect do
migrate!
migrations.each(&:reload)
end.to change { migrations.pluck(:batch_class_name).uniq }.from([described_class::OLD_STRATEGY_CLASS])
.to([described_class::NEW_STRATEGY_CLASS])
end
end
describe '#down' do
it 'sets the old strategy for existing migrations' do
migrations = create_migrations(described_class::NEW_STRATEGY_CLASS, 2)
expect do
migrate!
schema_migrate_down!
migrations.each(&:reload)
end.to change { migrations.pluck(:batch_class_name).uniq }.from([described_class::NEW_STRATEGY_CLASS])
.to([described_class::OLD_STRATEGY_CLASS])
end
end
def create_migrations(batch_class_name, count)
Array.new(2) { |index| create_background_migration(batch_class_name, [index]) }
end
def create_background_migration(batch_class_name, job_arguments)
migrations_table = table(:batched_background_migrations)
migrations_table.create!(
batch_class_name: batch_class_name,
job_class_name: described_class::JOB_CLASS_NAME,
max_value: 10,
batch_size: 5,
sub_batch_size: 1,
interval: 2.minutes,
table_name: :issues,
column_name: :id,
total_tuple_count: 10_000,
pause_ms: 100,
job_arguments: job_arguments
)
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment