Commit 4adacdd0 authored by Adam Hegyi's avatar Adam Hegyi

Merge branch 'fix-merge-request-diff-author-transactions' into 'master'

Remove transaction when migrating diff commits

See merge request gitlab-org/gitlab!65746
parents 1b116094 650a6dee
# frozen_string_literal: true
class ScheduleMergeRequestDiffUsersBackgroundMigration < ActiveRecord::Migration[6.1]
include Gitlab::Database::MigrationHelpers
disable_ddl_transaction!
# The number of rows to process in a single migration job.
#
# The minimum interval for background migrations is two minutes. On staging we
# observed we can process roughly 20 000 rows in a minute. Based on the total
# number of rows on staging, this translates to a total processing time of
# roughly 14 days.
#
# By using a batch size of 40 000, we maintain a rate of roughly 20 000 rows
# per minute, hopefully keeping the total migration time under two weeks;
# instead of four weeks.
BATCH_SIZE = 40_000
MIGRATION_NAME = 'MigrateMergeRequestDiffCommitUsers'
class MergeRequestDiff < ActiveRecord::Base
self.table_name = 'merge_request_diffs'
end
def up
start = MergeRequestDiff.minimum(:id).to_i
max = MergeRequestDiff.maximum(:id).to_i
delay = BackgroundMigrationWorker.minimum_interval
# The table merge_request_diff_commits contains _a lot_ of rows (roughly 400
# 000 000 on staging). Iterating a table that large to determine job ranges
# would take a while.
#
# To avoid that overhead, we simply schedule fixed ranges according to the
# minimum and maximum IDs. The background migration in turn only processes
# rows that actually exist.
while start < max
stop = start + BATCH_SIZE
migrate_in(delay, MIGRATION_NAME, [start, stop])
Gitlab::Database::BackgroundMigrationJob
.create!(class_name: MIGRATION_NAME, arguments: [start, stop])
delay += BackgroundMigrationWorker.minimum_interval
start += BATCH_SIZE
end
# no-op
end
def down
......
# frozen_string_literal: true
class RescheduleMergeRequestDiffUsersBackgroundMigration < ActiveRecord::Migration[6.1]
include Gitlab::Database::MigrationHelpers
disable_ddl_transaction!
# The number of rows to process in a single migration job.
#
# The minimum interval for background migrations is two minutes. On staging we
# observed we can process roughly 20 000 rows in a minute. Based on the total
# number of rows on staging, this translates to a total processing time of
# roughly 14 days.
#
# By using a batch size of 40 000, we maintain a rate of roughly 20 000 rows
# per minute, hopefully keeping the total migration time under two weeks;
# instead of four weeks.
BATCH_SIZE = 40_000
MIGRATION_NAME = 'MigrateMergeRequestDiffCommitUsers'
class MergeRequestDiff < ActiveRecord::Base
self.table_name = 'merge_request_diffs'
end
def up
start = MergeRequestDiff.minimum(:id).to_i
max = MergeRequestDiff.maximum(:id).to_i
delay = BackgroundMigrationWorker.minimum_interval
Gitlab::Database::BackgroundMigrationJob
.where(class_name: MIGRATION_NAME)
.delete_all
# The table merge_request_diff_commits contains _a lot_ of rows (roughly 400
# 000 000 on staging). Iterating a table that large to determine job ranges
# would take a while.
#
# To avoid that overhead, we simply schedule fixed ranges according to the
# minimum and maximum IDs. The background migration in turn only processes
# rows that actually exist.
while start < max
stop = start + BATCH_SIZE
migrate_in(delay, MIGRATION_NAME, [start, stop])
Gitlab::Database::BackgroundMigrationJob
.create!(class_name: MIGRATION_NAME, arguments: [start, stop])
delay += BackgroundMigrationWorker.minimum_interval
start += BATCH_SIZE
end
end
def down
# no-op
end
end
8545d6575c9dacec6796882677c4403cf3559430518e8709bf390f20717413d7
\ No newline at end of file
......@@ -172,19 +172,17 @@ module Gitlab
# Updates rows in merge_request_diff_commits with their new
# commit_author_id and committer_id values.
def update_commit_rows(to_update, user_mapping)
MergeRequestDiffCommitUser.transaction do
to_update.each_slice(UPDATES_PER_QUERY) do |slice|
updates = {}
to_update.each_slice(UPDATES_PER_QUERY) do |slice|
updates = {}
slice.each do |(diff_id, order), (author, committer)|
author_id = user_mapping[author]&.id
committer_id = user_mapping[committer]&.id
slice.each do |(diff_id, order), (author, committer)|
author_id = user_mapping[author]&.id
committer_id = user_mapping[committer]&.id
updates[[diff_id, order]] = [author_id, committer_id]
end
bulk_update_commit_rows(updates)
updates[[diff_id, order]] = [author_id, committer_id]
end
bulk_update_commit_rows(updates)
end
end
......
# frozen_string_literal: true
require 'spec_helper'
require_migration! 'schedule_merge_request_diff_users_background_migration'
require_migration! 'reschedule_merge_request_diff_users_background_migration'
RSpec.describe ScheduleMergeRequestDiffUsersBackgroundMigration, :migration do
RSpec.describe RescheduleMergeRequestDiffUsersBackgroundMigration, :migration do
let(:migration) { described_class.new }
describe '#up' do
......@@ -19,6 +19,21 @@ RSpec.describe ScheduleMergeRequestDiffUsersBackgroundMigration, :migration do
.and_return(85_123)
end
it 'deletes existing background migration job records' do
args = [150_000, 300_000]
Gitlab::Database::BackgroundMigrationJob
.create!(class_name: described_class::MIGRATION_NAME, arguments: args)
migration.up
found = Gitlab::Database::BackgroundMigrationJob
.where(class_name: described_class::MIGRATION_NAME, arguments: args)
.count
expect(found).to eq(0)
end
it 'schedules the migrations in batches' do
expect(migration)
.to receive(:migrate_in)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment