Commit 650a6dee authored by Yorick Peterse's avatar Yorick Peterse

Remove transaction when migrating diff commits

The background migration MigrateMergeRequestDiffCommitUsers was wrapping
its updates in a transaction. This was a left-over from an early draft
of the merge request. As the bulk updates can take some time to
complete, this can lead to long transaction timings. On production this
ultimately lead to the incident described in issue
https://gitlab.com/gitlab-com/gl-infra/production/-/issues/5119, though
fortunately this didn't seem to negatively affect users.

This commit removes the transaction, and adds a migration to reschedule
all the background jobs (which have been removed from the queue).
Rescheduling all jobs is the simplest solution to ensuring all data is
migrated. Rows that have already been migrated will be updated again,
but that's not a problem. Any existing data in
merge_request_diff_commit_users will just be reused.

See the following for more details:

- https://gitlab.com/gitlab-org/gitlab/-/merge_requests/63669#note_621297454
- https://gitlab.com/gitlab-com/gl-infra/production/-/issues/5119
- https://gitlab.com/gitlab-org/gitlab/-/issues/331823

Changelog: fixed
parent 1dc8a831
# frozen_string_literal: true
class ScheduleMergeRequestDiffUsersBackgroundMigration < ActiveRecord::Migration[6.1]
include Gitlab::Database::MigrationHelpers
disable_ddl_transaction!
# The number of rows to process in a single migration job.
#
# The minimum interval for background migrations is two minutes. On staging we
# observed we can process roughly 20 000 rows in a minute. Based on the total
# number of rows on staging, this translates to a total processing time of
# roughly 14 days.
#
# By using a batch size of 40 000, we maintain a rate of roughly 20 000 rows
# per minute, hopefully keeping the total migration time under two weeks;
# instead of four weeks.
BATCH_SIZE = 40_000
MIGRATION_NAME = 'MigrateMergeRequestDiffCommitUsers'
class MergeRequestDiff < ActiveRecord::Base
self.table_name = 'merge_request_diffs'
end
def up
start = MergeRequestDiff.minimum(:id).to_i
max = MergeRequestDiff.maximum(:id).to_i
delay = BackgroundMigrationWorker.minimum_interval
# The table merge_request_diff_commits contains _a lot_ of rows (roughly 400
# 000 000 on staging). Iterating a table that large to determine job ranges
# would take a while.
#
# To avoid that overhead, we simply schedule fixed ranges according to the
# minimum and maximum IDs. The background migration in turn only processes
# rows that actually exist.
while start < max
stop = start + BATCH_SIZE
migrate_in(delay, MIGRATION_NAME, [start, stop])
Gitlab::Database::BackgroundMigrationJob
.create!(class_name: MIGRATION_NAME, arguments: [start, stop])
delay += BackgroundMigrationWorker.minimum_interval
start += BATCH_SIZE
end
# no-op
end
def down
......
# frozen_string_literal: true
class RescheduleMergeRequestDiffUsersBackgroundMigration < ActiveRecord::Migration[6.1]
include Gitlab::Database::MigrationHelpers
disable_ddl_transaction!
# The number of rows to process in a single migration job.
#
# The minimum interval for background migrations is two minutes. On staging we
# observed we can process roughly 20 000 rows in a minute. Based on the total
# number of rows on staging, this translates to a total processing time of
# roughly 14 days.
#
# By using a batch size of 40 000, we maintain a rate of roughly 20 000 rows
# per minute, hopefully keeping the total migration time under two weeks;
# instead of four weeks.
BATCH_SIZE = 40_000
MIGRATION_NAME = 'MigrateMergeRequestDiffCommitUsers'
class MergeRequestDiff < ActiveRecord::Base
self.table_name = 'merge_request_diffs'
end
def up
start = MergeRequestDiff.minimum(:id).to_i
max = MergeRequestDiff.maximum(:id).to_i
delay = BackgroundMigrationWorker.minimum_interval
Gitlab::Database::BackgroundMigrationJob
.where(class_name: MIGRATION_NAME)
.delete_all
# The table merge_request_diff_commits contains _a lot_ of rows (roughly 400
# 000 000 on staging). Iterating a table that large to determine job ranges
# would take a while.
#
# To avoid that overhead, we simply schedule fixed ranges according to the
# minimum and maximum IDs. The background migration in turn only processes
# rows that actually exist.
while start < max
stop = start + BATCH_SIZE
migrate_in(delay, MIGRATION_NAME, [start, stop])
Gitlab::Database::BackgroundMigrationJob
.create!(class_name: MIGRATION_NAME, arguments: [start, stop])
delay += BackgroundMigrationWorker.minimum_interval
start += BATCH_SIZE
end
end
def down
# no-op
end
end
8545d6575c9dacec6796882677c4403cf3559430518e8709bf390f20717413d7
\ No newline at end of file
......@@ -172,19 +172,17 @@ module Gitlab
# Updates rows in merge_request_diff_commits with their new
# commit_author_id and committer_id values.
def update_commit_rows(to_update, user_mapping)
MergeRequestDiffCommitUser.transaction do
to_update.each_slice(UPDATES_PER_QUERY) do |slice|
updates = {}
to_update.each_slice(UPDATES_PER_QUERY) do |slice|
updates = {}
slice.each do |(diff_id, order), (author, committer)|
author_id = user_mapping[author]&.id
committer_id = user_mapping[committer]&.id
slice.each do |(diff_id, order), (author, committer)|
author_id = user_mapping[author]&.id
committer_id = user_mapping[committer]&.id
updates[[diff_id, order]] = [author_id, committer_id]
end
bulk_update_commit_rows(updates)
updates[[diff_id, order]] = [author_id, committer_id]
end
bulk_update_commit_rows(updates)
end
end
......
# frozen_string_literal: true
require 'spec_helper'
require_migration! 'schedule_merge_request_diff_users_background_migration'
require_migration! 'reschedule_merge_request_diff_users_background_migration'
RSpec.describe ScheduleMergeRequestDiffUsersBackgroundMigration, :migration do
RSpec.describe RescheduleMergeRequestDiffUsersBackgroundMigration, :migration do
let(:migration) { described_class.new }
describe '#up' do
......@@ -19,6 +19,21 @@ RSpec.describe ScheduleMergeRequestDiffUsersBackgroundMigration, :migration do
.and_return(85_123)
end
it 'deletes existing background migration job records' do
args = [150_000, 300_000]
Gitlab::Database::BackgroundMigrationJob
.create!(class_name: described_class::MIGRATION_NAME, arguments: args)
migration.up
found = Gitlab::Database::BackgroundMigrationJob
.where(class_name: described_class::MIGRATION_NAME, arguments: args)
.count
expect(found).to eq(0)
end
it 'schedules the migrations in batches' do
expect(migration)
.to receive(:migrate_in)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment