Commit ca104d66 authored by Heinrich Lee Yu's avatar Heinrich Lee Yu

Merge branch 'ab/optimize-batched-migrations' into 'master'

Optimize batch size for migrations automatically [RUN ALL RSPEC] [RUN AS-IF-FOSS]

See merge request gitlab-org/gitlab!60133
parents c5a87715 76d4449e
---
title: Add index to support execution time order for batched migration jobs
merge_request: 60133
author:
type: other
---
name: optimize_batched_migrations
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/60133
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/328817
milestone: '13.12'
type: ops
group: group::database
default_enabled: false
# frozen_string_literal: true
class AddExecutionOrderIndexToBatchedBackgroundMigrationJobs < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
disable_ddl_transaction!
INDEX_NAME = 'index_migration_jobs_on_migration_id_and_finished_at'
def up
add_concurrent_index :batched_background_migration_jobs, %i(batched_background_migration_id finished_at), name: INDEX_NAME
end
def down
remove_concurrent_index_by_name :batched_background_migration_jobs, INDEX_NAME
end
end
aa0ae491a7f94d99ea0c42250434245a4f23b0084657b709b0aaad0317dfd6b1
\ No newline at end of file
......@@ -23256,6 +23256,8 @@ CREATE INDEX index_metrics_dashboard_annotations_on_timespan_end ON metrics_dash
CREATE INDEX index_metrics_users_starred_dashboards_on_project_id ON metrics_users_starred_dashboards USING btree (project_id);
CREATE INDEX index_migration_jobs_on_migration_id_and_finished_at ON batched_background_migration_jobs USING btree (batched_background_migration_id, finished_at);
CREATE INDEX index_milestone_releases_on_release_id ON milestone_releases USING btree (release_id);
CREATE INDEX index_milestones_on_description_trigram ON milestones USING gin (description gin_trgm_ops);
# frozen_string_literal: true
module Gitlab
module Database
module BackgroundMigration
# This is an optimizer for throughput of batched migration jobs
#
# The underyling mechanic is based on the concept of time efficiency:
# time efficiency = job duration / interval
# Ideally, this is close but lower than 1 - so we're using time efficiently.
#
# We aim to land in the 90%-98% range, which gives the database a little breathing room
# in between.
#
# The optimizer is based on calculating the exponential moving average of time efficiencies
# for the last N jobs. If we're outside the range, we add 10% to or decrease by 20% of the batch size.
class BatchOptimizer
# Target time efficiency for a job
# Time efficiency is defined as: job duration / interval
TARGET_EFFICIENCY = (0.8..0.98).freeze
# Lower and upper bound for the batch size
ALLOWED_BATCH_SIZE = (1_000..1_000_000).freeze
# Use this batch_size multiplier to increase batch size
INCREASE_MULTIPLIER = 1.1
# Use this batch_size multiplier to decrease batch size
DECREASE_MULTIPLIER = 0.8
attr_reader :migration, :number_of_jobs
def initialize(migration, number_of_jobs: 10)
@migration = migration
@number_of_jobs = number_of_jobs
end
def optimize!
return unless Feature.enabled?(:optimize_batched_migrations, type: :ops)
if multiplier = batch_size_multiplier
migration.batch_size = (migration.batch_size * multiplier).to_i.clamp(ALLOWED_BATCH_SIZE)
migration.save!
end
end
private
def batch_size_multiplier
efficiency = migration.smoothed_time_efficiency(number_of_jobs: number_of_jobs)
return unless efficiency
if TARGET_EFFICIENCY.include?(efficiency)
# We hit the range - no change
nil
elsif efficiency > TARGET_EFFICIENCY.max
# We're above the range - decrease by 20%
DECREASE_MULTIPLIER
else
# We're below the range - increase by 10%
INCREASE_MULTIPLIER
end
end
end
end
end
end
......@@ -15,10 +15,22 @@ module Gitlab
succeeded: 3
}
scope :successful_in_execution_order, -> { where.not(finished_at: nil).succeeded.order(:finished_at) }
delegate :aborted?, :job_class, :table_name, :column_name, :job_arguments,
to: :batched_migration, prefix: :migration
attribute :pause_ms, :integer, default: 100
def time_efficiency
return unless succeeded?
return unless finished_at && started_at
duration = finished_at - started_at
# TODO: Switch to individual job interval (prereq: https://gitlab.com/gitlab-org/gitlab/-/issues/328801)
duration.to_f / batched_migration.interval
end
end
end
end
......
......@@ -76,6 +76,30 @@ module Gitlab
migration_identifier: "%s/%s.%s" % [job_class_name, table_name, column_name]
}
end
def smoothed_time_efficiency(number_of_jobs: 10, alpha: 0.2)
jobs = batched_jobs.successful_in_execution_order.reverse_order.limit(number_of_jobs)
return if jobs.size < number_of_jobs
efficiencies = jobs.map(&:time_efficiency).reject(&:nil?).each_with_index
dividend = efficiencies.reduce(0) do |total, (job_eff, i)|
total + job_eff * (1 - alpha)**i
end
divisor = efficiencies.reduce(0) do |total, (job_eff, i)|
total + (1 - alpha)**i
end
return if divisor == 0
(dividend / divisor).round(2)
end
def optimize!
BatchOptimizer.new(self).optimize!
end
end
end
end
......
......@@ -21,6 +21,8 @@ module Gitlab
def run_migration_job(active_migration)
if next_batched_job = create_next_batched_job!(active_migration)
migration_wrapper.perform(next_batched_job)
active_migration.optimize!
else
finish_active_migration(active_migration)
end
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Database::BackgroundMigration::BatchOptimizer do
describe '#optimize' do
subject { described_class.new(migration, number_of_jobs: number_of_jobs).optimize! }
let(:migration) { create(:batched_background_migration, batch_size: batch_size, sub_batch_size: 100, interval: 120) }
let(:batch_size) { 10_000 }
let_it_be(:number_of_jobs) { 5 }
def mock_efficiency(eff)
expect(migration).to receive(:smoothed_time_efficiency).with(number_of_jobs: number_of_jobs).and_return(eff)
end
it 'with unknown time efficiency, it keeps the batch size' do
mock_efficiency(nil)
expect { subject }.not_to change { migration.reload.batch_size }
end
it 'with a time efficiency of 95%, it keeps the batch size' do
mock_efficiency(0.95)
expect { subject }.not_to change { migration.reload.batch_size }
end
it 'with a time efficiency of 90%, it keeps the batch size' do
mock_efficiency(0.9)
expect { subject }.not_to change { migration.reload.batch_size }
end
it 'with a time efficiency of 70%, it increases the batch size by 10%' do
mock_efficiency(0.7)
expect { subject }.to change { migration.reload.batch_size }.from(10_000).to(11_000)
end
it 'with a time efficiency of 110%, it decreases the batch size by 20%' do
mock_efficiency(1.1)
expect { subject }.to change { migration.reload.batch_size }.from(10_000).to(8_000)
end
context 'reaching the upper limit for the batch size' do
let(:batch_size) { 950_000 }
it 'caps the batch size at 10M' do
mock_efficiency(0.7)
expect { subject }.to change { migration.reload.batch_size }.to(1_000_000)
end
end
context 'reaching the lower limit for the batch size' do
let(:batch_size) { 1_050 }
it 'caps the batch size at 1k' do
mock_efficiency(1.1)
expect { subject }.to change { migration.reload.batch_size }.to(1_000)
end
end
end
end
......@@ -47,4 +47,55 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedJob, type: :model d
end
end
end
describe '#time_efficiency' do
subject { job.time_efficiency }
let(:migration) { build(:batched_background_migration, interval: 120.seconds) }
let(:job) { build(:batched_background_migration_job, status: :succeeded, batched_migration: migration) }
context 'when job has not yet succeeded' do
let(:job) { build(:batched_background_migration_job, status: :running) }
it 'returns nil' do
expect(subject).to be_nil
end
end
context 'when finished_at is not set' do
it 'returns nil' do
job.started_at = Time.zone.now
expect(subject).to be_nil
end
end
context 'when started_at is not set' do
it 'returns nil' do
job.finished_at = Time.zone.now
expect(subject).to be_nil
end
end
context 'when job has finished' do
it 'returns ratio of duration to interval, here: 0.5' do
freeze_time do
job.started_at = Time.zone.now - migration.interval / 2
job.finished_at = Time.zone.now
expect(subject).to eq(0.5)
end
end
it 'returns ratio of duration to interval, here: 1' do
freeze_time do
job.started_at = Time.zone.now - migration.interval
job.finished_at = Time.zone.now
expect(subject).to eq(1)
end
end
end
end
end
......@@ -50,6 +50,15 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigrationRunner do
batch_size: migration.batch_size,
sub_batch_size: migration.sub_batch_size)
end
it 'optimizes the migration after executing the job' do
migration.update!(min_value: event1.id, max_value: event2.id)
expect(migration_wrapper).to receive(:perform).ordered
expect(migration).to receive(:optimize!).ordered
runner.run_migration_job(migration)
end
end
context 'when the batch maximum exceeds the migration maximum' do
......
......@@ -232,4 +232,96 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigration, type: :m
expect(batched_migration.prometheus_labels).to eq(labels)
end
end
describe '#smoothed_time_efficiency' do
let(:migration) { create(:batched_background_migration, interval: 120.seconds) }
let(:end_time) { Time.zone.now }
around do |example|
freeze_time do
example.run
end
end
let(:common_attrs) do
{
status: :succeeded,
batched_migration: migration,
finished_at: end_time
}
end
context 'when there are not enough jobs' do
subject { migration.smoothed_time_efficiency(number_of_jobs: 10) }
it 'returns nil' do
create_list(:batched_background_migration_job, 9, **common_attrs)
expect(subject).to be_nil
end
end
context 'when there are enough jobs' do
subject { migration.smoothed_time_efficiency(number_of_jobs: number_of_jobs) }
let!(:jobs) { create_list(:batched_background_migration_job, number_of_jobs, **common_attrs.merge(batched_migration: migration)) }
let(:number_of_jobs) { 10 }
before do
expect(migration).to receive_message_chain(:batched_jobs, :successful_in_execution_order, :reverse_order, :limit).with(no_args).with(no_args).with(number_of_jobs).and_return(jobs)
end
def mock_efficiencies(*effs)
effs.each_with_index do |eff, i|
expect(jobs[i]).to receive(:time_efficiency).and_return(eff)
end
end
context 'example 1: increasing trend, but only recently crossed threshold' do
it 'returns the smoothed time efficiency' do
mock_efficiencies(1.1, 1, 0.95, 0.9, 0.8, 0.95, 0.9, 0.8, 0.9, 0.95)
expect(subject).to be_within(0.05).of(0.95)
end
end
context 'example 2: increasing trend, crossed threshold a while ago' do
it 'returns the smoothed time efficiency' do
mock_efficiencies(1.2, 1.1, 1, 1, 1.1, 1, 0.95, 0.9, 0.95, 0.9)
expect(subject).to be_within(0.05).of(1.1)
end
end
context 'example 3: decreasing trend, but only recently crossed threshold' do
it 'returns the smoothed time efficiency' do
mock_efficiencies(0.9, 0.95, 1, 1.2, 1.1, 1.2, 1.1, 1.0, 1.1, 1.0)
expect(subject).to be_within(0.05).of(1.0)
end
end
context 'example 4: latest run spiked' do
it 'returns the smoothed time efficiency' do
mock_efficiencies(1.2, 0.9, 0.8, 0.9, 0.95, 0.9, 0.92, 0.9, 0.95, 0.9)
expect(subject).to be_within(0.02).of(0.96)
end
end
end
end
describe '#optimize!' do
subject { batched_migration.optimize! }
let(:batched_migration) { create(:batched_background_migration) }
let(:optimizer) { instance_double('Gitlab::Database::BackgroundMigration::BatchOptimizer') }
it 'calls the BatchOptimizer' do
expect(Gitlab::Database::BackgroundMigration::BatchOptimizer).to receive(:new).with(batched_migration).and_return(optimizer)
expect(optimizer).to receive(:optimize!)
subject
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment