Commit 0687ed31 authored by Yorick Peterse's avatar Yorick Peterse

Use keyset pagination when fixing diff commits

The background migration FixMergeRequestDiffCommitUsers, originally
added in https://gitlab.com/gitlab-org/gitlab/-/merge_requests/73307,
didn't use keyset pagination for iterating over diff commits. In some
cases this would lead to a SQL timeout, as a single query would try to
fetch too many diff commit rows at once. This commit changes the code to
use keyset pagination, allowing us to fetch the necessary data without
any SQL timeouts.

Changelog: performance
parent 9e34f971
...@@ -7,6 +7,7 @@ class MergeRequestDiffCommit < ApplicationRecord ...@@ -7,6 +7,7 @@ class MergeRequestDiffCommit < ApplicationRecord
include ShaAttribute include ShaAttribute
include CachedCommit include CachedCommit
include IgnorableColumns include IgnorableColumns
include FromUnion
ignore_column %i[author_name author_email committer_name committer_email], ignore_column %i[author_name author_email committer_name committer_email],
remove_with: '14.6', remove_with: '14.6',
......
...@@ -10,7 +10,10 @@ module Gitlab ...@@ -10,7 +10,10 @@ module Gitlab
# this process needs Git/Gitaly access, and duplicating all that code is far # this process needs Git/Gitaly access, and duplicating all that code is far
# too much, this migration relies on global models such as Project, # too much, this migration relies on global models such as Project,
# MergeRequest, etc. # MergeRequest, etc.
# rubocop: disable Metrics/ClassLength
class FixMergeRequestDiffCommitUsers class FixMergeRequestDiffCommitUsers
BATCH_SIZE = 100
def initialize def initialize
@commits = {} @commits = {}
@users = {} @users = {}
...@@ -33,24 +36,47 @@ module Gitlab ...@@ -33,24 +36,47 @@ module Gitlab
# Loading everything using one big query may result in timeouts (e.g. # Loading everything using one big query may result in timeouts (e.g.
# for projects the size of gitlab-org/gitlab). So instead we query # for projects the size of gitlab-org/gitlab). So instead we query
# data on a per merge request basis. # data on a per merge request basis.
project.merge_requests.each_batch do |mrs| project.merge_requests.each_batch(column: :iid) do |mrs|
::MergeRequestDiffCommit mrs.ids.each do |mr_id|
.select([ each_row_to_check(mr_id) do |commit|
:merge_request_diff_id,
:relative_order,
:sha,
:committer_id,
:commit_author_id
])
.joins(merge_request_diff: :merge_request)
.where(merge_requests: { id: mrs.select(:id) })
.where('commit_author_id IS NULL OR committer_id IS NULL')
.each do |commit|
update_commit(project, commit) update_commit(project, commit)
end end
end
end end
end end
def each_row_to_check(merge_request_id, &block)
columns = %w[merge_request_diff_id relative_order].map do |col|
Pagination::Keyset::ColumnOrderDefinition.new(
attribute_name: col,
order_expression: MergeRequestDiffCommit.arel_table[col.to_sym].asc,
nullable: :not_nullable,
distinct: false
)
end
order = Pagination::Keyset::Order.build(columns)
scope = MergeRequestDiffCommit
.joins(:merge_request_diff)
.where(merge_request_diffs: { merge_request_id: merge_request_id })
.where('commit_author_id IS NULL OR committer_id IS NULL')
.order(order)
Pagination::Keyset::Iterator
.new(scope: scope, use_union_optimization: true)
.each_batch(of: BATCH_SIZE) do |rows|
rows
.select([
:merge_request_diff_id,
:relative_order,
:sha,
:committer_id,
:commit_author_id
])
.each(&block)
end
end
# rubocop: disable Metrics/AbcSize # rubocop: disable Metrics/AbcSize
def update_commit(project, row) def update_commit(project, row)
commit = find_commit(project, row.sha) commit = find_commit(project, row.sha)
...@@ -125,5 +151,6 @@ module Gitlab ...@@ -125,5 +151,6 @@ module Gitlab
MergeRequestDiffCommit.arel_table MergeRequestDiffCommit.arel_table
end end
end end
# rubocop: enable Metrics/ClassLength
end end
end end
...@@ -49,6 +49,36 @@ RSpec.describe Gitlab::BackgroundMigration::FixMergeRequestDiffCommitUsers do ...@@ -49,6 +49,36 @@ RSpec.describe Gitlab::BackgroundMigration::FixMergeRequestDiffCommitUsers do
end end
end end
describe '#process' do
it 'processes the merge requests of the project' do
project = create(:project, :repository)
commit = project.commit
mr = create(
:merge_request_with_diffs,
source_project: project,
target_project: project
)
diff = mr.merge_request_diffs.first
create(
:merge_request_diff_commit,
merge_request_diff: diff,
sha: commit.sha,
relative_order: 9000
)
migration.process(project)
updated = diff
.merge_request_diff_commits
.find_by(sha: commit.sha, relative_order: 9000)
expect(updated.commit_author_id).not_to be_nil
expect(updated.committer_id).not_to be_nil
end
end
describe '#update_commit' do describe '#update_commit' do
let(:project) { create(:project, :repository) } let(:project) { create(:project, :repository) }
let(:mr) do let(:mr) do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment