Commit 0687ed31 authored by Yorick Peterse's avatar Yorick Peterse

Use keyset pagination when fixing diff commits

The background migration FixMergeRequestDiffCommitUsers, originally
added in https://gitlab.com/gitlab-org/gitlab/-/merge_requests/73307,
didn't use keyset pagination for iterating over diff commits. In some
cases this would lead to a SQL timeout, as a single query would try to
fetch too many diff commit rows at once. This commit changes the code to
use keyset pagination, allowing us to fetch the necessary data without
any SQL timeouts.

Changelog: performance
parent 9e34f971
......@@ -7,6 +7,7 @@ class MergeRequestDiffCommit < ApplicationRecord
include ShaAttribute
include CachedCommit
include IgnorableColumns
include FromUnion
ignore_column %i[author_name author_email committer_name committer_email],
remove_with: '14.6',
......
......@@ -10,7 +10,10 @@ module Gitlab
# this process needs Git/Gitaly access, and duplicating all that code is far
# too much, this migration relies on global models such as Project,
# MergeRequest, etc.
# rubocop: disable Metrics/ClassLength
class FixMergeRequestDiffCommitUsers
BATCH_SIZE = 100
def initialize
@commits = {}
@users = {}
......@@ -33,24 +36,47 @@ module Gitlab
# Loading everything using one big query may result in timeouts (e.g.
# for projects the size of gitlab-org/gitlab). So instead we query
# data on a per merge request basis.
project.merge_requests.each_batch do |mrs|
::MergeRequestDiffCommit
.select([
:merge_request_diff_id,
:relative_order,
:sha,
:committer_id,
:commit_author_id
])
.joins(merge_request_diff: :merge_request)
.where(merge_requests: { id: mrs.select(:id) })
.where('commit_author_id IS NULL OR committer_id IS NULL')
.each do |commit|
project.merge_requests.each_batch(column: :iid) do |mrs|
mrs.ids.each do |mr_id|
each_row_to_check(mr_id) do |commit|
update_commit(project, commit)
end
end
end
end
def each_row_to_check(merge_request_id, &block)
columns = %w[merge_request_diff_id relative_order].map do |col|
Pagination::Keyset::ColumnOrderDefinition.new(
attribute_name: col,
order_expression: MergeRequestDiffCommit.arel_table[col.to_sym].asc,
nullable: :not_nullable,
distinct: false
)
end
order = Pagination::Keyset::Order.build(columns)
scope = MergeRequestDiffCommit
.joins(:merge_request_diff)
.where(merge_request_diffs: { merge_request_id: merge_request_id })
.where('commit_author_id IS NULL OR committer_id IS NULL')
.order(order)
Pagination::Keyset::Iterator
.new(scope: scope, use_union_optimization: true)
.each_batch(of: BATCH_SIZE) do |rows|
rows
.select([
:merge_request_diff_id,
:relative_order,
:sha,
:committer_id,
:commit_author_id
])
.each(&block)
end
end
# rubocop: disable Metrics/AbcSize
def update_commit(project, row)
commit = find_commit(project, row.sha)
......@@ -125,5 +151,6 @@ module Gitlab
MergeRequestDiffCommit.arel_table
end
end
# rubocop: enable Metrics/ClassLength
end
end
......@@ -49,6 +49,36 @@ RSpec.describe Gitlab::BackgroundMigration::FixMergeRequestDiffCommitUsers do
end
end
describe '#process' do
it 'processes the merge requests of the project' do
project = create(:project, :repository)
commit = project.commit
mr = create(
:merge_request_with_diffs,
source_project: project,
target_project: project
)
diff = mr.merge_request_diffs.first
create(
:merge_request_diff_commit,
merge_request_diff: diff,
sha: commit.sha,
relative_order: 9000
)
migration.process(project)
updated = diff
.merge_request_diff_commits
.find_by(sha: commit.sha, relative_order: 9000)
expect(updated.commit_author_id).not_to be_nil
expect(updated.committer_id).not_to be_nil
end
end
describe '#update_commit' do
let(:project) { create(:project, :repository) }
let(:mr) do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment