Commit 05e9c9f7 authored by Rémy Coutable's avatar Rémy Coutable

Merge branch...

Merge branch '36631-activerecord-statementinvalid-pg-querycanceled-error-canceling-statement-due-to-statement-timeout' into 'master'

Insert at most 1,000 rows at once in MR diff background migration

Closes #36631 et #37505

See merge request gitlab-org/gitlab-ce!13661
parents 66775a80 472be7fe
---
title: Reschedule merge request diff background migrations to catch failures from
9.5 run
merge_request:
author:
type: fixed
class ScheduleMergeRequestDiffMigrationsTakeTwo < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
BATCH_SIZE = 500
MIGRATION = 'DeserializeMergeRequestDiffsAndCommits'
DELAY_INTERVAL = 10.minutes
disable_ddl_transaction!
class MergeRequestDiff < ActiveRecord::Base
self.table_name = 'merge_request_diffs'
include ::EachBatch
default_scope { where('st_commits IS NOT NULL OR st_diffs IS NOT NULL') }
end
# By this point, we assume ScheduleMergeRequestDiffMigrations - the first
# version of this - has already run. On GitLab.com, we have ~220k un-migrated
# rows, but these rows will, in general, take a long time.
#
# With a gap of 10 minutes per batch, and 500 rows per batch, these migrations
# are scheduled over 220_000 / 500 / 6 ~= 74 hours, which is a little over
# three days.
def up
queue_background_migration_jobs_by_range_at_intervals(MergeRequestDiff, MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE)
end
def down
end
end
...@@ -3,11 +3,18 @@ module Gitlab ...@@ -3,11 +3,18 @@ module Gitlab
class DeserializeMergeRequestDiffsAndCommits class DeserializeMergeRequestDiffsAndCommits
attr_reader :diff_ids, :commit_rows, :file_rows attr_reader :diff_ids, :commit_rows, :file_rows
class Error < StandardError
def backtrace
cause.backtrace
end
end
class MergeRequestDiff < ActiveRecord::Base class MergeRequestDiff < ActiveRecord::Base
self.table_name = 'merge_request_diffs' self.table_name = 'merge_request_diffs'
end end
BUFFER_ROWS = 1000 BUFFER_ROWS = 1000
DIFF_FILE_BUFFER_ROWS = 100
def perform(start_id, stop_id) def perform(start_id, stop_id)
merge_request_diffs = MergeRequestDiff merge_request_diffs = MergeRequestDiff
...@@ -26,13 +33,17 @@ module Gitlab ...@@ -26,13 +33,17 @@ module Gitlab
if diff_ids.length > BUFFER_ROWS || if diff_ids.length > BUFFER_ROWS ||
commit_rows.length > BUFFER_ROWS || commit_rows.length > BUFFER_ROWS ||
file_rows.length > BUFFER_ROWS file_rows.length > DIFF_FILE_BUFFER_ROWS
flush_buffers! flush_buffers!
end end
end end
flush_buffers! flush_buffers!
rescue => e
Rails.logger.info("#{self.class.name}: failed for IDs #{merge_request_diffs.map(&:id)} with #{e.class.name}")
raise Error.new(e.inspect)
end end
private private
...@@ -45,17 +56,28 @@ module Gitlab ...@@ -45,17 +56,28 @@ module Gitlab
def flush_buffers! def flush_buffers!
if diff_ids.any? if diff_ids.any?
MergeRequestDiff.transaction do commit_rows.each_slice(BUFFER_ROWS).each do |commit_rows_slice|
Gitlab::Database.bulk_insert('merge_request_diff_commits', commit_rows) bulk_insert('merge_request_diff_commits', commit_rows_slice)
Gitlab::Database.bulk_insert('merge_request_diff_files', file_rows) end
MergeRequestDiff.where(id: diff_ids).update_all(st_commits: nil, st_diffs: nil) file_rows.each_slice(DIFF_FILE_BUFFER_ROWS).each do |file_rows_slice|
bulk_insert('merge_request_diff_files', file_rows_slice)
end end
MergeRequestDiff.where(id: diff_ids).update_all(st_commits: nil, st_diffs: nil)
end end
reset_buffers! reset_buffers!
end end
def bulk_insert(table, rows)
Gitlab::Database.bulk_insert(table, rows)
rescue ActiveRecord::RecordNotUnique
ids = rows.map { |row| row[:merge_request_diff_id] }.uniq.sort
Rails.logger.info("#{self.class.name}: rows inserted twice for IDs #{ids}")
end
def single_diff_rows(merge_request_diff) def single_diff_rows(merge_request_diff)
sha_attribute = Gitlab::Database::ShaAttribute.new sha_attribute = Gitlab::Database::ShaAttribute.new
commits = YAML.load(merge_request_diff.st_commits) rescue [] commits = YAML.load(merge_request_diff.st_commits) rescue []
......
require 'spec_helper' require 'spec_helper'
describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits, :truncate do
describe '#perform' do describe '#perform' do
set(:merge_request) { create(:merge_request) } let(:merge_request) { create(:merge_request) }
set(:merge_request_diff) { merge_request.merge_request_diff } let(:merge_request_diff) { merge_request.merge_request_diff }
let(:updated_merge_request_diff) { MergeRequestDiff.find(merge_request_diff.id) } let(:updated_merge_request_diff) { MergeRequestDiff.find(merge_request_diff.id) }
def diffs_to_hashes(diffs) def diffs_to_hashes(diffs)
...@@ -70,8 +70,8 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do ...@@ -70,8 +70,8 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do
before do before do
merge_request.reload_diff(true) merge_request.reload_diff(true)
convert_to_yaml(start_id, merge_request_diff.commits, merge_request_diff.diffs) convert_to_yaml(start_id, merge_request_diff.commits, diffs_to_hashes(merge_request_diff.merge_request_diff_files))
convert_to_yaml(stop_id, updated_merge_request_diff.commits, updated_merge_request_diff.diffs) convert_to_yaml(stop_id, updated_merge_request_diff.commits, diffs_to_hashes(updated_merge_request_diff.merge_request_diff_files))
MergeRequestDiffCommit.delete_all MergeRequestDiffCommit.delete_all
MergeRequestDiffFile.delete_all MergeRequestDiffFile.delete_all
...@@ -80,10 +80,32 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do ...@@ -80,10 +80,32 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do
context 'when BUFFER_ROWS is exceeded' do context 'when BUFFER_ROWS is exceeded' do
before do before do
stub_const("#{described_class}::BUFFER_ROWS", 1) stub_const("#{described_class}::BUFFER_ROWS", 1)
allow(Gitlab::Database).to receive(:bulk_insert).and_call_original
end
it 'inserts commit rows in chunks of BUFFER_ROWS' do
# There are 29 commits in each diff, so we should have slices of 20 + 9 + 20 + 9.
stub_const("#{described_class}::BUFFER_ROWS", 20)
expect(Gitlab::Database).to receive(:bulk_insert)
.with('merge_request_diff_commits', anything)
.exactly(4)
.times
.and_call_original
subject.perform(start_id, stop_id)
end end
it 'updates and continues' do it 'inserts diff rows in chunks of DIFF_FILE_BUFFER_ROWS' do
expect(described_class::MergeRequestDiff).to receive(:transaction).twice # There are 20 files in each diff, so we should have slices of 20 + 20.
stub_const("#{described_class}::DIFF_FILE_BUFFER_ROWS", 20)
expect(Gitlab::Database).to receive(:bulk_insert)
.with('merge_request_diff_files', anything)
.exactly(2)
.times
.and_call_original
subject.perform(start_id, stop_id) subject.perform(start_id, stop_id)
end end
...@@ -91,27 +113,87 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do ...@@ -91,27 +113,87 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do
context 'when BUFFER_ROWS is not exceeded' do context 'when BUFFER_ROWS is not exceeded' do
it 'only updates once' do it 'only updates once' do
expect(described_class::MergeRequestDiff).to receive(:transaction).once expect(Gitlab::Database).to receive(:bulk_insert)
.with('merge_request_diff_commits', anything)
.once
.and_call_original
expect(Gitlab::Database).to receive(:bulk_insert)
.with('merge_request_diff_files', anything)
.once
.and_call_original
subject.perform(start_id, stop_id) subject.perform(start_id, stop_id)
end end
end end
end
context 'when the merge request diff update fails' do context 'when some rows were already inserted due to a previous failure' do
before do before do
allow(described_class::MergeRequestDiff) subject.perform(start_id, stop_id)
.to receive(:update_all).and_raise(ActiveRecord::Rollback)
end
it 'does not add any diff commits' do convert_to_yaml(start_id, merge_request_diff.commits, diffs_to_hashes(merge_request_diff.merge_request_diff_files))
expect { subject.perform(merge_request_diff.id, merge_request_diff.id) } convert_to_yaml(stop_id, updated_merge_request_diff.commits, diffs_to_hashes(updated_merge_request_diff.merge_request_diff_files))
.not_to change { MergeRequestDiffCommit.count } end
it 'does not raise' do
expect { subject.perform(start_id, stop_id) }.not_to raise_exception
end
it 'logs a message' do
expect(Rails.logger).to receive(:info)
.with(
a_string_matching(described_class.name).and(matching([start_id, stop_id].inspect))
)
.twice
subject.perform(start_id, stop_id)
end
it 'ends up with the correct rows' do
expect(updated_merge_request_diff.commits.count).to eq(29)
expect(updated_merge_request_diff.raw_diffs.count).to eq(20)
end
end end
it 'does not add any diff files' do context 'when the merge request diff update fails' do
expect { subject.perform(merge_request_diff.id, merge_request_diff.id) } let(:exception) { ActiveRecord::RecordNotFound }
.not_to change { MergeRequestDiffFile.count }
let(:perform_ignoring_exceptions) do
begin
subject.perform(start_id, stop_id)
rescue described_class::Error
end
end
before do
allow_any_instance_of(described_class::MergeRequestDiff::ActiveRecord_Relation)
.to receive(:update_all).and_raise(exception)
end
it 'raises an error' do
expect { subject.perform(start_id, stop_id) }
.to raise_exception(described_class::Error)
end
it 'logs the error' do
expect(Rails.logger).to receive(:info).with(
a_string_matching(described_class.name)
.and(matching([start_id, stop_id].inspect))
.and(matching(exception.name))
)
perform_ignoring_exceptions
end
it 'still adds diff commits' do
expect { perform_ignoring_exceptions }
.to change { MergeRequestDiffCommit.count }
end
it 'still adds diff files' do
expect { perform_ignoring_exceptions }
.to change { MergeRequestDiffFile.count }
end
end end
end end
......
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20170926150348_schedule_merge_request_diff_migrations_take_two')
describe ScheduleMergeRequestDiffMigrationsTakeTwo, :migration, :sidekiq do
matcher :be_scheduled_migration do |time, *expected|
match do |migration|
BackgroundMigrationWorker.jobs.any? do |job|
job['args'] == [migration, expected] &&
job['at'].to_i == time.to_i
end
end
failure_message do |migration|
"Migration `#{migration}` with args `#{expected.inspect}` not scheduled!"
end
end
let(:merge_request_diffs) { table(:merge_request_diffs) }
let(:merge_requests) { table(:merge_requests) }
let(:projects) { table(:projects) }
before do
stub_const("#{described_class.name}::BATCH_SIZE", 1)
projects.create!(id: 1, name: 'gitlab', path: 'gitlab')
merge_requests.create!(id: 1, target_project_id: 1, source_project_id: 1, target_branch: 'feature', source_branch: 'master')
merge_request_diffs.create!(id: 1, merge_request_id: 1, st_commits: YAML.dump([]), st_diffs: nil)
merge_request_diffs.create!(id: 2, merge_request_id: 1, st_commits: nil, st_diffs: YAML.dump([]))
merge_request_diffs.create!(id: 3, merge_request_id: 1, st_commits: nil, st_diffs: nil)
merge_request_diffs.create!(id: 4, merge_request_id: 1, st_commits: YAML.dump([]), st_diffs: YAML.dump([]))
end
it 'correctly schedules background migrations' do
Sidekiq::Testing.fake! do
Timecop.freeze do
migrate!
expect(described_class::MIGRATION).to be_scheduled_migration(10.minutes.from_now, 1, 1)
expect(described_class::MIGRATION).to be_scheduled_migration(20.minutes.from_now, 2, 2)
expect(described_class::MIGRATION).to be_scheduled_migration(30.minutes.from_now, 4, 4)
expect(BackgroundMigrationWorker.jobs.size).to eq 3
end
end
end
it 'migrates the data' do
Sidekiq::Testing.inline! do
non_empty = 'st_commits IS NOT NULL OR st_diffs IS NOT NULL'
expect(merge_request_diffs.where(non_empty).count).to eq 3
migrate!
expect(merge_request_diffs.where(non_empty).count).to eq 0
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment