Commit 294c5c41 authored by Felipe Artur's avatar Felipe Artur

Remove auto vacuum logic, decrease batch size and interval

parent 7bd066a1
......@@ -11,10 +11,12 @@ class ScheduleSyncIssuablesStateId < ActiveRecord::Migration[5.0]
# issues count: 13587305
# merge requests count: 18925274
#
# Using 25000 as batch size should take around 26 hours
# to migrate both issues and merge requests
BATCH_SIZE = 25000
DELAY_INTERVAL = 5.minutes.to_i
# Using 5000 as batch size and 115 seconds interval will give:
# 2718 jobs for issues - taking ~86 hours
# 3786 jobs for merge requests - taking ~120 hours
#
BATCH_SIZE = 5000
DELAY_INTERVAL = 120.seconds.to_i
ISSUES_MIGRATION = 'SyncIssuesStateId'.freeze
MERGE_REQUESTS_MIGRATION = 'SyncMergeRequestsStateId'.freeze
......@@ -34,14 +36,14 @@ class ScheduleSyncIssuablesStateId < ActiveRecord::Migration[5.0]
def up
queue_background_migration_jobs_by_range_at_intervals(
Issue.where(state_id: nil),
Issue.all,
ISSUES_MIGRATION,
DELAY_INTERVAL,
batch_size: BATCH_SIZE
)
queue_background_migration_jobs_by_range_at_intervals(
MergeRequest.where(state_id: nil),
MergeRequest.all,
MERGE_REQUESTS_MIGRATION,
DELAY_INTERVAL,
batch_size: BATCH_SIZE
......
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# == Reschedulable helper
#
# Allows background migrations to be rescheduled if a condition is met,
# the condition should be overridden in classes in #should_reschedule? method.
#
# For example, check DeleteDiffFiles migration which is rescheduled if dead tuple count
# on DB is not acceptable.
#
module Reschedulable
extend ActiveSupport::Concern
# Use this method to perform the background migration and it will be rescheduled
# if #should_reschedule? returns true.
def reschedule_if_needed(*args, &block)
if should_reschedule?
BackgroundMigrationWorker.perform_in(wait_time, self.class.name.demodulize, args)
else
yield
end
end
# Override this on base class if you need a different reschedule condition
def should_reschedule?
raise NotImplementedError, "#{self.class} does not implement #{__method__}"
end
# Override in subclass if a different dead tuple threshold
def dead_tuples_threshold
@dead_tuples_threshold ||= 50_000
end
# Override in subclass if a different wait time
def wait_time
@wait_time ||= 5.minutes
end
def execute_statement(sql)
ActiveRecord::Base.connection.execute(sql)
end
def wait_for_deadtuple_vacuum?(table_name)
return false unless Gitlab::Database.postgresql?
dead_tuples_count_for(table_name) >= dead_tuples_threshold
end
def dead_tuples_count_for(table_name)
dead_tuple =
execute_statement("SELECT n_dead_tup FROM pg_stat_all_tables "\
"WHERE relname = '#{table_name}'")[0]
dead_tuple&.fetch('n_dead_tup', 0).to_i
end
end
end
end
......@@ -4,29 +4,19 @@
module Gitlab
module BackgroundMigration
class SyncIssuesStateId
include Reschedulable
def perform(start_id, end_id)
Rails.logger.info("Issues - Populating state_id: #{start_id} - #{end_id}")
reschedule_if_needed(start_id, end_id) do
execute_statement <<~SQL
UPDATE issues
SET state_id =
CASE state
WHEN 'opened' THEN 1
WHEN 'closed' THEN 2
END
WHERE state_id IS NULL
AND id BETWEEN #{start_id} AND #{end_id}
SQL
end
end
private
def should_reschedule?
wait_for_deadtuple_vacuum?('issues')
ActiveRecord::Base.connection.execute <<~SQL
UPDATE issues
SET state_id =
CASE state
WHEN 'opened' THEN 1
WHEN 'closed' THEN 2
END
WHERE state_id IS NULL
AND id BETWEEN #{start_id} AND #{end_id}
SQL
end
end
end
......
......@@ -4,31 +4,21 @@
module Gitlab
module BackgroundMigration
class SyncMergeRequestsStateId
include Reschedulable
def perform(start_id, end_id)
Rails.logger.info("Merge Requests - Populating state_id: #{start_id} - #{end_id}")
reschedule_if_needed(start_id, end_id) do
execute_statement <<~SQL
UPDATE merge_requests
SET state_id =
CASE state
WHEN 'opened' THEN 1
WHEN 'closed' THEN 2
WHEN 'merged' THEN 3
WHEN 'locked' THEN 4
END
WHERE state_id IS NULL
AND id BETWEEN #{start_id} AND #{end_id}
SQL
end
end
private
def should_reschedule?
wait_for_deadtuple_vacuum?('issues')
ActiveRecord::Base.connection.execute <<~SQL
UPDATE merge_requests
SET state_id =
CASE state
WHEN 'opened' THEN 1
WHEN 'closed' THEN 2
WHEN 'merged' THEN 3
WHEN 'locked' THEN 4
END
WHERE state_id IS NULL
AND id BETWEEN #{start_id} AND #{end_id}
SQL
end
end
end
......
......@@ -26,33 +26,14 @@ describe ScheduleSyncIssuablesStateId, :migration do
Timecop.freeze do
migrate!
expect(migration).to be_scheduled_delayed_migration(5.minutes, resource_1.id, resource_2.id)
expect(migration).to be_scheduled_delayed_migration(10.minutes, resource_3.id, resource_4.id)
expect(migration).to be_scheduled_delayed_migration(120.seconds, resource_1.id, resource_2.id)
expect(migration).to be_scheduled_delayed_migration(240.seconds, resource_3.id, resource_4.id)
expect(BackgroundMigrationWorker.jobs.size).to eq(2)
end
end
end
end
shared_examples 'rescheduling migrations' do
before do
Sidekiq::Worker.clear_all
end
it 'reschedules migrations when should wait for dead tuple vacuum' do
worker = worker_class.new
Sidekiq::Testing.fake! do
allow(worker).to receive(:wait_for_deadtuple_vacuum?) { true }
worker.perform(resource_1.id, resource_2.id)
expect(worker_class.name.demodulize).to be_scheduled_delayed_migration(5.minutes, resource_1.id, resource_2.id)
expect(BackgroundMigrationWorker.jobs.size).to eq(1)
end
end
end
describe '#up' do
context 'issues' do
it 'migrates state column to integer' do
......@@ -76,12 +57,6 @@ describe ScheduleSyncIssuablesStateId, :migration do
let!(:resource_3) { issues.create!(description: 'third', state: 'closed') }
let!(:resource_4) { issues.create!(description: 'fourth', state: 'closed') }
end
it_behaves_like 'rescheduling migrations' do
let(:worker_class) { Gitlab::BackgroundMigration::SyncIssuesStateId }
let!(:resource_1) { issues.create!(description: 'first', state: 'opened') }
let!(:resource_2) { issues.create!(description: 'second', state: 'closed') }
end
end
context 'merge requests' do
......@@ -108,12 +83,6 @@ describe ScheduleSyncIssuablesStateId, :migration do
let!(:resource_3) { merge_requests.create!(state: 'merged', target_project_id: @project.id, target_branch: 'feature3', source_branch: 'master') }
let!(:resource_4) { merge_requests.create!(state: 'locked', target_project_id: @project.id, target_branch: 'feature4', source_branch: 'master') }
end
it_behaves_like 'rescheduling migrations' do
let(:worker_class) { Gitlab::BackgroundMigration::SyncMergeRequestsStateId }
let(:resource_1) { merge_requests.create!(state: 'opened', target_project_id: @project.id, target_branch: 'feature1', source_branch: 'master') }
let(:resource_2) { merge_requests.create!(state: 'closed', target_project_id: @project.id, target_branch: 'feature2', source_branch: 'master') }
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment