Commit 91719415 authored by Sean McGivern's avatar Sean McGivern

Insert at most 1,000 rows at once in MR diff background migration

We were hitting the statement timeout for very large MR diffs. Now we insert at
most 1,000 rows to `merge_request_diff_commits` in a single statement, or 100
rows to `merge_request_diff_files`.
parent ccfe6860
...@@ -8,6 +8,7 @@ module Gitlab ...@@ -8,6 +8,7 @@ module Gitlab
end end
BUFFER_ROWS = 1000 BUFFER_ROWS = 1000
DIFF_FILE_BUFFER_ROWS = 100
def perform(start_id, stop_id) def perform(start_id, stop_id)
merge_request_diffs = MergeRequestDiff merge_request_diffs = MergeRequestDiff
...@@ -26,7 +27,7 @@ module Gitlab ...@@ -26,7 +27,7 @@ module Gitlab
if diff_ids.length > BUFFER_ROWS || if diff_ids.length > BUFFER_ROWS ||
commit_rows.length > BUFFER_ROWS || commit_rows.length > BUFFER_ROWS ||
file_rows.length > BUFFER_ROWS file_rows.length > DIFF_FILE_BUFFER_ROWS
flush_buffers! flush_buffers!
end end
...@@ -46,8 +47,13 @@ module Gitlab ...@@ -46,8 +47,13 @@ module Gitlab
def flush_buffers! def flush_buffers!
if diff_ids.any? if diff_ids.any?
MergeRequestDiff.transaction do MergeRequestDiff.transaction do
Gitlab::Database.bulk_insert('merge_request_diff_commits', commit_rows) commit_rows.each_slice(BUFFER_ROWS).each do |commit_rows_slice|
Gitlab::Database.bulk_insert('merge_request_diff_files', file_rows) Gitlab::Database.bulk_insert('merge_request_diff_commits', commit_rows_slice)
end
file_rows.each_slice(DIFF_FILE_BUFFER_ROWS).each do |file_rows_slice|
Gitlab::Database.bulk_insert('merge_request_diff_files', file_rows_slice)
end
MergeRequestDiff.where(id: diff_ids).update_all(st_commits: nil, st_diffs: nil) MergeRequestDiff.where(id: diff_ids).update_all(st_commits: nil, st_diffs: nil)
end end
......
...@@ -70,8 +70,8 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do ...@@ -70,8 +70,8 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do
before do before do
merge_request.reload_diff(true) merge_request.reload_diff(true)
convert_to_yaml(start_id, merge_request_diff.commits, merge_request_diff.diffs) convert_to_yaml(start_id, merge_request_diff.commits, diffs_to_hashes(merge_request_diff.merge_request_diff_files))
convert_to_yaml(stop_id, updated_merge_request_diff.commits, updated_merge_request_diff.diffs) convert_to_yaml(stop_id, updated_merge_request_diff.commits, diffs_to_hashes(updated_merge_request_diff.merge_request_diff_files))
MergeRequestDiffCommit.delete_all MergeRequestDiffCommit.delete_all
MergeRequestDiffFile.delete_all MergeRequestDiffFile.delete_all
...@@ -80,6 +80,8 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do ...@@ -80,6 +80,8 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do
context 'when BUFFER_ROWS is exceeded' do context 'when BUFFER_ROWS is exceeded' do
before do before do
stub_const("#{described_class}::BUFFER_ROWS", 1) stub_const("#{described_class}::BUFFER_ROWS", 1)
allow(Gitlab::Database).to receive(:bulk_insert).and_call_original
end end
it 'updates and continues' do it 'updates and continues' do
...@@ -87,6 +89,32 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do ...@@ -87,6 +89,32 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do
subject.perform(start_id, stop_id) subject.perform(start_id, stop_id)
end end
it 'inserts commit rows in chunks of BUFFER_ROWS' do
# There are 29 commits in each diff, so we should have slices of 20 + 9 + 20 + 9.
stub_const("#{described_class}::BUFFER_ROWS", 20)
expect(Gitlab::Database).to receive(:bulk_insert)
.with('merge_request_diff_commits', anything)
.exactly(4)
.times
.and_call_original
subject.perform(start_id, stop_id)
end
it 'inserts diff rows in chunks of DIFF_FILE_BUFFER_ROWS' do
# There are 20 files in each diff, so we should have slices of 20 + 20.
stub_const("#{described_class}::DIFF_FILE_BUFFER_ROWS", 20)
expect(Gitlab::Database).to receive(:bulk_insert)
.with('merge_request_diff_files', anything)
.exactly(2)
.times
.and_call_original
subject.perform(start_id, stop_id)
end
end end
context 'when BUFFER_ROWS is not exceeded' do context 'when BUFFER_ROWS is not exceeded' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment