Commit 1e078444 authored by Mario Celi's avatar Mario Celi

Backfill work_item_type_id for all Issues

DB Migration
Backfills work_item_type_id for issues that still
don't have a foreign_key set

Use queue_batched_background_migration to schedule background migration

Changelog: other
parent ea68d14f
# frozen_string_literal: true
class RemoveIssuesWorkItemTypeIdIndex < Gitlab::Database::Migration[1.0]
disable_ddl_transaction!
INDEX_NAME = 'index_issues_on_work_item_type_id'
def up
remove_concurrent_index_by_name :issues, name: INDEX_NAME
end
def down
add_concurrent_index :issues, :work_item_type_id, name: INDEX_NAME
end
end
# frozen_string_literal: true
class AddTemporaryIssueTypeIndexForWorkItemTypes < Gitlab::Database::Migration[1.0]
disable_ddl_transaction!
INDEX_NAME = 'tmp_index_issues_on_issue_type_and_id'
def up
add_concurrent_index :issues, [:issue_type, :id], name: INDEX_NAME
end
def down
remove_concurrent_index_by_name :issues, INDEX_NAME
end
end
# frozen_string_literal: true
class BackfillWorkItemTypeIdOnIssues < Gitlab::Database::Migration[1.0]
MIGRATION = 'BackfillWorkItemTypeIdForIssues'
BATCH_SIZE = 10_000
MAX_BATCH_SIZE = 30_000
SUB_BATCH_SIZE = 100
INTERVAL = 2.minutes
class MigrationWorkItemType < ApplicationRecord
self.table_name = 'work_item_types'
def self.id_by_type
where(namespace_id: nil).order(:base_type).pluck(:base_type, :id).to_h
end
end
def up
# We expect no more than 5 types. Only 3 of them are expected to have associated issues at the moment
MigrationWorkItemType.id_by_type.each do |base_type, type_id|
queue_batched_background_migration(
MIGRATION,
:issues,
:id,
base_type,
type_id,
job_interval: INTERVAL,
batch_size: BATCH_SIZE,
max_batch_size: MAX_BATCH_SIZE,
sub_batch_size: SUB_BATCH_SIZE
)
end
end
def down
Gitlab::Database::BackgroundMigration::BatchedMigration.where(job_class_name: MIGRATION).delete_all
end
end
1276e86db3e2922cd1535e98f17768d1ab0dd653317f918fb8e6819760893a47
\ No newline at end of file
8ab13f76037450704a29220ca6af0174b373ce743620952e3bc197c952d00c1d
\ No newline at end of file
7dc3d314e76f26a539c0aa009bd45ea6ccc9c6108f798728909fd4b18e3c31b9
\ No newline at end of file
......@@ -27961,8 +27961,6 @@ CREATE INDEX index_issues_on_updated_at ON issues USING btree (updated_at);
CREATE INDEX index_issues_on_updated_by_id ON issues USING btree (updated_by_id) WHERE (updated_by_id IS NOT NULL);
CREATE INDEX index_issues_on_work_item_type_id ON issues USING btree (work_item_type_id);
CREATE INDEX index_iterations_cadences_on_group_id ON iterations_cadences USING btree (group_id);
CREATE UNIQUE INDEX index_jira_connect_installations_on_client_key ON jira_connect_installations USING btree (client_key);
......@@ -29615,6 +29613,8 @@ CREATE INDEX tmp_index_for_namespace_id_migration_on_group_members ON members US
CREATE INDEX tmp_index_for_namespace_id_migration_on_routes ON routes USING btree (id) WHERE ((namespace_id IS NULL) AND ((source_type)::text = 'Namespace'::text));
CREATE INDEX tmp_index_issues_on_issue_type_and_id ON issues USING btree (issue_type, id);
CREATE INDEX tmp_index_members_on_state ON members USING btree (state) WHERE (state = 2);
CREATE INDEX tmp_index_namespaces_empty_traversal_ids_with_child_namespaces ON namespaces USING btree (id) WHERE ((parent_id IS NOT NULL) AND (traversal_ids = '{}'::integer[]));
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# Backfills the `issues.work_item_type_id` column, replacing any
# instances of `NULL` with the appropriate `work_item_types.id` based on `issues.issue_type`
class BackfillWorkItemTypeIdForIssues
# Basic AR model for issues table
class MigrationIssue < ApplicationRecord
include ::EachBatch
self.table_name = 'issues'
scope :base_query, ->(base_type) { where(work_item_type_id: nil, issue_type: base_type) }
end
MAX_UPDATE_RETRIES = 3
def perform(start_id, end_id, batch_table, batch_column, sub_batch_size, pause_ms, base_type, base_type_id)
parent_batch_relation = relation_scoped_to_range(batch_table, batch_column, start_id, end_id, base_type)
parent_batch_relation.each_batch(column: batch_column, of: sub_batch_size) do |sub_batch|
first, last = sub_batch.pluck(Arel.sql('min(id), max(id)')).first
# The query need to be reconstructed because .each_batch modifies the default scope
# See: https://gitlab.com/gitlab-org/gitlab/-/issues/330510
reconstructed_sub_batch = MigrationIssue.unscoped.base_query(base_type).where(id: first..last)
batch_metrics.time_operation(:update_all) do
update_with_retry(reconstructed_sub_batch, base_type_id)
end
pause_ms = 0 if pause_ms < 0
sleep(pause_ms * 0.001)
end
end
def batch_metrics
@batch_metrics ||= Gitlab::Database::BackgroundMigration::BatchMetrics.new
end
private
# Retry mechanism required as update statements on the issues table will randomly take longer than
# expected due to gin indexes https://gitlab.com/gitlab-org/gitlab/-/merge_requests/71869#note_775796352
def update_with_retry(sub_batch, base_type_id)
update_attempt = 1
begin
update_batch(sub_batch, base_type_id)
rescue ActiveRecord::StatementTimeout => e
update_attempt += 1
if update_attempt <= MAX_UPDATE_RETRIES
# sleeping 30 seconds as it might take a long time to clean the gin index pending list
sleep(30)
retry
end
raise e
end
end
def update_batch(sub_batch, base_type_id)
sub_batch.update_all(work_item_type_id: base_type_id)
end
def relation_scoped_to_range(source_table, source_key_column, start_id, end_id, base_type)
MigrationIssue.where(source_key_column => start_id..end_id).base_query(base_type)
end
end
end
end
......@@ -10,6 +10,10 @@ RSpec.describe 'Database schema' do
let(:tables) { connection.tables }
let(:columns_name_with_jsonb) { retrieve_columns_name_with_jsonb }
IGNORED_INDEXES_ON_FKS = {
issues: %w[work_item_type_id]
}.with_indifferent_access.freeze
# List of columns historically missing a FK, don't add more columns
# See: https://docs.gitlab.com/ee/development/foreign_keys.html#naming-foreign-keys
IGNORED_FK_COLUMNS = {
......@@ -115,6 +119,7 @@ RSpec.describe 'Database schema' do
columns.first.chomp
end
foreign_keys_columns = all_foreign_keys.map(&:column)
required_indexed_columns = foreign_keys_columns - ignored_index_columns(table)
# Add the primary key column to the list of indexed columns because
# postgres and mysql both automatically create an index on the primary
......@@ -122,7 +127,7 @@ RSpec.describe 'Database schema' do
# automatically generated indexes (like the primary key index).
first_indexed_column.push(primary_key_column)
expect(first_indexed_column.uniq).to include(*foreign_keys_columns)
expect(first_indexed_column.uniq).to include(*required_indexed_columns)
end
end
......@@ -305,8 +310,12 @@ RSpec.describe 'Database schema' do
@models_by_table_name ||= ApplicationRecord.descendants.reject(&:abstract_class).group_by(&:table_name)
end
def ignored_fk_columns(column)
IGNORED_FK_COLUMNS.fetch(column, [])
def ignored_fk_columns(table)
IGNORED_FK_COLUMNS.fetch(table, [])
end
def ignored_index_columns(table)
IGNORED_INDEXES_ON_FKS.fetch(table, [])
end
def ignored_limit_enums(model)
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::BackgroundMigration::BackfillWorkItemTypeIdForIssues do
subject(:migrate) { migration.perform(start_id, end_id, batch_table, batch_column, sub_batch_size, pause_ms, issue_type_enum[:issue], issue_type.id) }
let(:migration) { described_class.new }
let(:batch_table) { 'issues' }
let(:batch_column) { 'id' }
let(:sub_batch_size) { 2 }
let(:pause_ms) { 0 }
# let_it_be can't be used in migration specs because all tables but `work_item_types` are deleted after each spec
let(:issue_type_enum) { { issue: 0, incident: 1, test_case: 2, requirement: 3, task: 4 } }
let(:namespace) { table(:namespaces).create!(name: 'namespace', path: 'namespace') }
let(:project) { table(:projects).create!(namespace_id: namespace.id) }
let(:issues_table) { table(:issues) }
let(:issue_type) { table(:work_item_types).find_by!(namespace_id: nil, base_type: issue_type_enum[:issue]) }
let(:issue1) { issues_table.create!(project_id: project.id, issue_type: issue_type_enum[:issue]) }
let(:issue2) { issues_table.create!(project_id: project.id, issue_type: issue_type_enum[:issue]) }
let(:issue3) { issues_table.create!(project_id: project.id, issue_type: issue_type_enum[:issue]) }
let(:incident1) { issues_table.create!(project_id: project.id, issue_type: issue_type_enum[:incident]) }
# test_case and requirement are EE only, but enum values exist on the FOSS model
let(:test_case1) { issues_table.create!(project_id: project.id, issue_type: issue_type_enum[:test_case]) }
let(:requirement1) { issues_table.create!(project_id: project.id, issue_type: issue_type_enum[:requirement]) }
let(:start_id) { issue1.id }
let(:end_id) { requirement1.id }
let(:all_issues) { [issue1, issue2, issue3, incident1, test_case1, requirement1] }
it 'sets work_item_type_id only for the given type' do
expect(all_issues).to all(have_attributes(work_item_type_id: nil))
expect { migrate }.to make_queries_matching(/UPDATE \"issues\" SET "work_item_type_id"/, 2)
all_issues.each(&:reload)
expect([issue1, issue2, issue3]).to all(have_attributes(work_item_type_id: issue_type.id))
expect(all_issues - [issue1, issue2, issue3]).to all(have_attributes(work_item_type_id: nil))
end
it 'tracks timings of queries' do
expect(migration.batch_metrics.timings).to be_empty
expect { migrate }.to change { migration.batch_metrics.timings }
end
it 'retries on ActiveRecord::StatementTimeout' do
expect(migration).to receive(:update_batch).exactly(3).times.and_raise(ActiveRecord::StatementTimeout)
expect(migration).to receive(:sleep).with(30).twice
expect do
migrate
end.to raise_error(ActiveRecord::StatementTimeout)
end
end
# frozen_string_literal: true
require 'spec_helper'
require_migration!
RSpec.describe BackfillWorkItemTypeIdOnIssues, :migration do
let_it_be(:migration) { described_class::MIGRATION }
let_it_be(:interval) { 2.minutes }
let_it_be(:issue_type_enum) { { issue: 0, incident: 1, test_case: 2, requirement: 3, task: 4 } }
let_it_be(:base_work_item_type_ids) do
table(:work_item_types).where(namespace_id: nil).order(:base_type).each_with_object({}) do |type, hash|
hash[type.base_type] = type.id
end
end
describe '#up' do
it 'correctly schedules background migrations' do
Sidekiq::Testing.fake! do
freeze_time do
migrate!
scheduled_migrations = Gitlab::Database::BackgroundMigration::BatchedMigration.where(job_class_name: migration)
work_item_types = table(:work_item_types).where(namespace_id: nil)
expect(scheduled_migrations.count).to eq(work_item_types.count)
[:issue, :incident, :test_case, :requirement, :task].each do |issue_type|
expect(migration).to have_scheduled_batched_migration(
table_name: :issues,
column_name: :id,
job_arguments: [issue_type_enum[issue_type], base_work_item_type_ids[issue_type_enum[issue_type]]],
interval: interval,
batch_size: described_class::BATCH_SIZE,
max_batch_size: described_class::MAX_BATCH_SIZE,
sub_batch_size: described_class::SUB_BATCH_SIZE
)
end
end
end
end
end
describe '#down' do
it 'deletes all batched migration records' do
migrate!
schema_migrate_down!
expect(migration).not_to have_scheduled_batched_migration
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment