Commit 773225fd authored by Matthias Käppler's avatar Matthias Käppler

Merge branch 'ab/bm-prom-metrics' into 'master'

Add prometheus metrics for batched migrations

See merge request gitlab-org/gitlab!58763
parents 5e8f24b7 60e1455d
......@@ -57,6 +57,13 @@ module Gitlab
def batch_class_name=(class_name)
write_attribute(:batch_class_name, class_name.demodulize)
end
def prometheus_labels
@prometheus_labels ||= {
migration_id: id,
migration_identifier: "%s/%s.%s" % [job_class_name, table_name, column_name]
}
end
end
end
end
......
......@@ -4,6 +4,8 @@ module Gitlab
module Database
module BackgroundMigration
class BatchedMigrationWrapper
extend Gitlab::Utils::StrongMemoize
# Wraps the execution of a batched_background_migration.
#
# Updates the job's tracking records with the status of the migration
......@@ -23,6 +25,7 @@ module Gitlab
raise e
ensure
finish_tracking_execution(batch_tracking_record)
track_prometheus_metrics(batch_tracking_record)
end
private
......@@ -51,6 +54,65 @@ module Gitlab
tracking_record.finished_at = Time.current
tracking_record.save!
end
def track_prometheus_metrics(tracking_record)
migration = tracking_record.batched_migration
base_labels = migration.prometheus_labels
metric_for(:gauge_batch_size).set(base_labels, tracking_record.batch_size)
metric_for(:gauge_sub_batch_size).set(base_labels, tracking_record.sub_batch_size)
metric_for(:counter_updated_tuples).increment(base_labels, tracking_record.batch_size)
# Time efficiency: Ratio of duration to interval (ideal: less than, but close to 1)
efficiency = (tracking_record.finished_at - tracking_record.started_at).to_i / migration.interval.to_f
metric_for(:histogram_time_efficiency).observe(base_labels, efficiency)
if metrics = tracking_record.metrics
metrics['timings']&.each do |key, timings|
summary = metric_for(:histogram_timings)
labels = base_labels.merge(operation: key)
timings.each do |timing|
summary.observe(labels, timing)
end
end
end
end
def metric_for(name)
self.class.metrics[name]
end
def self.metrics
strong_memoize(:metrics) do
{
gauge_batch_size: Gitlab::Metrics.gauge(
:batched_migration_job_batch_size,
'Batch size for a batched migration job'
),
gauge_sub_batch_size: Gitlab::Metrics.gauge(
:batched_migration_job_sub_batch_size,
'Sub-batch size for a batched migration job'
),
counter_updated_tuples: Gitlab::Metrics.counter(
:batched_migration_job_updated_tuples_total,
'Number of tuples updated by batched migration job'
),
histogram_timings: Gitlab::Metrics.histogram(
:batched_migration_job_duration_seconds,
'Timings for a batched migration job',
{},
[0.1, 0.25, 0.5, 1, 5].freeze
),
histogram_time_efficiency: Gitlab::Metrics.histogram(
:batched_migration_job_time_efficiency,
'Ratio of job duration to interval',
{},
[0.5, 0.9, 1, 1.5, 2].freeze
)
}
end
end
end
end
end
......
......@@ -195,4 +195,17 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigration, type: :m
describe '#batch_class_name=' do
it_behaves_like 'an attr_writer that demodulizes assigned class names', :batch_class_name
end
describe '#prometheus_labels' do
let(:batched_migration) { create(:batched_background_migration, job_class_name: 'TestMigration', table_name: 'foo', column_name: 'bar') }
it 'returns a hash with labels for the migration' do
labels = {
migration_id: batched_migration.id,
migration_identifier: 'TestMigration/foo.bar'
}
expect(batched_migration.prometheus_labels).to eq(labels)
end
end
end
......@@ -3,7 +3,8 @@
require 'spec_helper'
RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigrationWrapper, '#perform' do
let(:migration_wrapper) { described_class.new }
subject { described_class.new.perform(job_record) }
let(:job_class) { Gitlab::BackgroundMigration::CopyColumnUsingBackgroundMigrationJob }
let_it_be(:active_migration) { create(:batched_background_migration, :active, job_arguments: [:id, :other_id]) }
......@@ -18,7 +19,7 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigrationWrapper, '
it 'runs the migration job' do
expect(job_instance).to receive(:perform).with(1, 10, 'events', 'id', 1, 'id', 'other_id')
migration_wrapper.perform(job_record)
subject
end
it 'updates the tracking record in the database' do
......@@ -30,7 +31,7 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigrationWrapper, '
expect(job_record).to receive(:update!).with(hash_including(attempts: 1, status: :running)).and_call_original
freeze_time do
migration_wrapper.perform(job_record)
subject
reloaded_job_record = job_record.reload
......@@ -41,12 +42,66 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigrationWrapper, '
end
end
context 'reporting prometheus metrics' do
let(:labels) { job_record.batched_migration.prometheus_labels }
before do
allow(job_instance).to receive(:perform)
end
it 'reports batch_size' do
expect(described_class.metrics[:gauge_batch_size]).to receive(:set).with(labels, job_record.batch_size)
subject
end
it 'reports sub_batch_size' do
expect(described_class.metrics[:gauge_sub_batch_size]).to receive(:set).with(labels, job_record.sub_batch_size)
subject
end
it 'reports updated tuples (currently based on batch_size)' do
expect(described_class.metrics[:counter_updated_tuples]).to receive(:increment).with(labels, job_record.batch_size)
subject
end
it 'reports summary of query timings' do
metrics = { 'timings' => { 'update_all' => [1, 2, 3, 4, 5] } }
expect(job_instance).to receive(:batch_metrics).and_return(metrics)
metrics['timings'].each do |key, timings|
summary_labels = labels.merge(operation: key)
timings.each do |timing|
expect(described_class.metrics[:histogram_timings]).to receive(:observe).with(summary_labels, timing)
end
end
subject
end
it 'reports time efficiency' do
freeze_time do
expect(Time).to receive(:current).and_return(Time.zone.now - 5.seconds).ordered
expect(Time).to receive(:current).and_return(Time.zone.now).ordered
ratio = 5 / job_record.batched_migration.interval.to_f
expect(described_class.metrics[:histogram_time_efficiency]).to receive(:observe).with(labels, ratio)
subject
end
end
end
context 'when the migration job does not raise an error' do
it 'marks the tracking record as succeeded' do
expect(job_instance).to receive(:perform).with(1, 10, 'events', 'id', 1, 'id', 'other_id')
freeze_time do
migration_wrapper.perform(job_record)
subject
reloaded_job_record = job_record.reload
......@@ -63,7 +118,7 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigrationWrapper, '
.and_raise(RuntimeError, 'Something broke!')
freeze_time do
expect { migration_wrapper.perform(job_record) }.to raise_error(RuntimeError, 'Something broke!')
expect { subject }.to raise_error(RuntimeError, 'Something broke!')
reloaded_job_record = job_record.reload
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment