Commit b4120f95 authored by Douglas Barbosa Alexandre's avatar Douglas Barbosa Alexandre

Merge branch 'pb-move-bbm-prometheus-metrics' into 'master'

Move BBM prometheus metrics into separate class

See merge request gitlab-org/gitlab!83644
parents ae5b9e31 a6f0994c
...@@ -4,10 +4,9 @@ module Gitlab ...@@ -4,10 +4,9 @@ module Gitlab
module Database module Database
module BackgroundMigration module BackgroundMigration
class BatchedMigrationWrapper class BatchedMigrationWrapper
extend Gitlab::Utils::StrongMemoize def initialize(connection: ApplicationRecord.connection, metrics: PrometheusMetrics.new)
def initialize(connection: ApplicationRecord.connection)
@connection = connection @connection = connection
@metrics = metrics
end end
# Wraps the execution of a batched_background_migration. # Wraps the execution of a batched_background_migration.
...@@ -28,12 +27,12 @@ module Gitlab ...@@ -28,12 +27,12 @@ module Gitlab
raise raise
ensure ensure
track_prometheus_metrics(batch_tracking_record) metrics.track(batch_tracking_record)
end end
private private
attr_reader :connection attr_reader :connection, :metrics
def start_tracking_execution(tracking_record) def start_tracking_execution(tracking_record)
tracking_record.run! tracking_record.run!
...@@ -63,80 +62,6 @@ module Gitlab ...@@ -63,80 +62,6 @@ module Gitlab
job_class.new job_class.new
end end
end end
def track_prometheus_metrics(tracking_record)
migration = tracking_record.batched_migration
base_labels = migration.prometheus_labels
metric_for(:gauge_batch_size).set(base_labels, tracking_record.batch_size)
metric_for(:gauge_sub_batch_size).set(base_labels, tracking_record.sub_batch_size)
metric_for(:gauge_interval).set(base_labels, tracking_record.batched_migration.interval)
metric_for(:gauge_job_duration).set(base_labels, (tracking_record.finished_at - tracking_record.started_at).to_i)
metric_for(:counter_updated_tuples).increment(base_labels, tracking_record.batch_size)
metric_for(:gauge_migrated_tuples).set(base_labels, tracking_record.batched_migration.migrated_tuple_count)
metric_for(:gauge_total_tuple_count).set(base_labels, tracking_record.batched_migration.total_tuple_count)
metric_for(:gauge_last_update_time).set(base_labels, Time.current.to_i)
if metrics = tracking_record.metrics
metrics['timings']&.each do |key, timings|
summary = metric_for(:histogram_timings)
labels = base_labels.merge(operation: key)
timings.each do |timing|
summary.observe(labels, timing)
end
end
end
end
def metric_for(name)
self.class.metrics[name]
end
def self.metrics
strong_memoize(:metrics) do
{
gauge_batch_size: Gitlab::Metrics.gauge(
:batched_migration_job_batch_size,
'Batch size for a batched migration job'
),
gauge_sub_batch_size: Gitlab::Metrics.gauge(
:batched_migration_job_sub_batch_size,
'Sub-batch size for a batched migration job'
),
gauge_interval: Gitlab::Metrics.gauge(
:batched_migration_job_interval_seconds,
'Interval for a batched migration job'
),
gauge_job_duration: Gitlab::Metrics.gauge(
:batched_migration_job_duration_seconds,
'Duration for a batched migration job'
),
counter_updated_tuples: Gitlab::Metrics.counter(
:batched_migration_job_updated_tuples_total,
'Number of tuples updated by batched migration job'
),
gauge_migrated_tuples: Gitlab::Metrics.gauge(
:batched_migration_migrated_tuples_total,
'Total number of tuples migrated by a batched migration'
),
histogram_timings: Gitlab::Metrics.histogram(
:batched_migration_job_query_duration_seconds,
'Query timings for a batched migration job',
{},
[0.1, 0.25, 0.5, 1, 5].freeze
),
gauge_total_tuple_count: Gitlab::Metrics.gauge(
:batched_migration_total_tuple_count,
'Total tuple count the migration needs to touch'
),
gauge_last_update_time: Gitlab::Metrics.gauge(
:batched_migration_last_update_time_seconds,
'Unix epoch time in seconds'
)
}
end
end
end end
end end
end end
......
# frozen_string_literal: true
module Gitlab
module Database
module BackgroundMigration
class PrometheusMetrics
extend Gitlab::Utils::StrongMemoize
QUERY_TIMING_BUCKETS = [0.1, 0.25, 0.5, 1, 5].freeze
def track(job_record)
migration_record = job_record.batched_migration
base_labels = migration_record.prometheus_labels
metric_for(:gauge_batch_size).set(base_labels, job_record.batch_size)
metric_for(:gauge_sub_batch_size).set(base_labels, job_record.sub_batch_size)
metric_for(:gauge_interval).set(base_labels, job_record.batched_migration.interval)
metric_for(:gauge_job_duration).set(base_labels, (job_record.finished_at - job_record.started_at).to_i)
metric_for(:counter_updated_tuples).increment(base_labels, job_record.batch_size)
metric_for(:gauge_migrated_tuples).set(base_labels, migration_record.migrated_tuple_count)
metric_for(:gauge_total_tuple_count).set(base_labels, migration_record.total_tuple_count)
metric_for(:gauge_last_update_time).set(base_labels, Time.current.to_i)
track_timing_metrics(base_labels, job_record.metrics)
end
def self.metrics
strong_memoize(:metrics) do
{
gauge_batch_size: Gitlab::Metrics.gauge(
:batched_migration_job_batch_size,
'Batch size for a batched migration job'
),
gauge_sub_batch_size: Gitlab::Metrics.gauge(
:batched_migration_job_sub_batch_size,
'Sub-batch size for a batched migration job'
),
gauge_interval: Gitlab::Metrics.gauge(
:batched_migration_job_interval_seconds,
'Interval for a batched migration job'
),
gauge_job_duration: Gitlab::Metrics.gauge(
:batched_migration_job_duration_seconds,
'Duration for a batched migration job'
),
counter_updated_tuples: Gitlab::Metrics.counter(
:batched_migration_job_updated_tuples_total,
'Number of tuples updated by batched migration job'
),
gauge_migrated_tuples: Gitlab::Metrics.gauge(
:batched_migration_migrated_tuples_total,
'Total number of tuples migrated by a batched migration'
),
histogram_timings: Gitlab::Metrics.histogram(
:batched_migration_job_query_duration_seconds,
'Query timings for a batched migration job',
{},
QUERY_TIMING_BUCKETS
),
gauge_total_tuple_count: Gitlab::Metrics.gauge(
:batched_migration_total_tuple_count,
'Total tuple count the migration needs to touch'
),
gauge_last_update_time: Gitlab::Metrics.gauge(
:batched_migration_last_update_time_seconds,
'Unix epoch time in seconds'
)
}
end
end
private
def track_timing_metrics(base_labels, metrics)
return unless metrics && metrics['timings']
metrics['timings'].each do |key, timings|
summary = metric_for(:histogram_timings)
labels = base_labels.merge(operation: key)
timings.each do |timing|
summary.observe(labels, timing)
end
end
end
def metric_for(name)
self.class.metrics[name]
end
end
end
end
end
...@@ -3,8 +3,9 @@ ...@@ -3,8 +3,9 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigrationWrapper, '#perform' do RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigrationWrapper, '#perform' do
subject { described_class.new.perform(job_record) } subject { described_class.new(metrics: metrics_tracker).perform(job_record) }
let(:metrics_tracker) { instance_double('::Gitlab::Database::BackgroundMigration::PrometheusMetrics', track: nil) }
let(:job_class) { Gitlab::BackgroundMigration::CopyColumnUsingBackgroundMigrationJob } let(:job_class) { Gitlab::BackgroundMigration::CopyColumnUsingBackgroundMigrationJob }
let_it_be(:pause_ms) { 250 } let_it_be(:pause_ms) { 250 }
...@@ -78,86 +79,6 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigrationWrapper, ' ...@@ -78,86 +79,6 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigrationWrapper, '
end end
end end
context 'reporting prometheus metrics' do
let(:labels) { job_record.batched_migration.prometheus_labels }
before do
allow(job_instance).to receive(:perform)
end
it 'reports batch_size' do
expect(described_class.metrics[:gauge_batch_size]).to receive(:set).with(labels, job_record.batch_size)
subject
end
it 'reports sub_batch_size' do
expect(described_class.metrics[:gauge_sub_batch_size]).to receive(:set).with(labels, job_record.sub_batch_size)
subject
end
it 'reports interval' do
expect(described_class.metrics[:gauge_interval]).to receive(:set).with(labels, job_record.batched_migration.interval)
subject
end
it 'reports updated tuples (currently based on batch_size)' do
expect(described_class.metrics[:counter_updated_tuples]).to receive(:increment).with(labels, job_record.batch_size)
subject
end
it 'reports migrated tuples' do
count = double
expect(job_record.batched_migration).to receive(:migrated_tuple_count).and_return(count)
expect(described_class.metrics[:gauge_migrated_tuples]).to receive(:set).with(labels, count)
subject
end
it 'reports summary of query timings' do
metrics = { 'timings' => { 'update_all' => [1, 2, 3, 4, 5] } }
expect(job_instance).to receive(:batch_metrics).and_return(metrics)
metrics['timings'].each do |key, timings|
summary_labels = labels.merge(operation: key)
timings.each do |timing|
expect(described_class.metrics[:histogram_timings]).to receive(:observe).with(summary_labels, timing)
end
end
subject
end
it 'reports job duration' do
freeze_time do
expect(Time).to receive(:current).and_return(Time.zone.now - 5.seconds).ordered
allow(Time).to receive(:current).and_call_original
expect(described_class.metrics[:gauge_job_duration]).to receive(:set).with(labels, 5.seconds)
subject
end
end
it 'reports the total tuple count for the migration' do
expect(described_class.metrics[:gauge_total_tuple_count]).to receive(:set).with(labels, job_record.batched_migration.total_tuple_count)
subject
end
it 'reports last updated at timestamp' do
freeze_time do
expect(described_class.metrics[:gauge_last_update_time]).to receive(:set).with(labels, Time.current.to_i)
subject
end
end
end
context 'when the migration job does not raise an error' do context 'when the migration job does not raise an error' do
it 'marks the tracking record as succeeded' do it 'marks the tracking record as succeeded' do
expect(job_instance).to receive(:perform).with(1, 10, 'events', 'id', 1, pause_ms, 'id', 'other_id') expect(job_instance).to receive(:perform).with(1, 10, 'events', 'id', 1, pause_ms, 'id', 'other_id')
...@@ -171,6 +92,13 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigrationWrapper, ' ...@@ -171,6 +92,13 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigrationWrapper, '
expect(reloaded_job_record.finished_at).to eq(Time.current) expect(reloaded_job_record.finished_at).to eq(Time.current)
end end
end end
it 'tracks metrics of the execution' do
expect(job_instance).to receive(:perform)
expect(metrics_tracker).to receive(:track).with(job_record)
subject
end
end end
context 'when the migration job raises an error' do context 'when the migration job raises an error' do
...@@ -189,6 +117,13 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigrationWrapper, ' ...@@ -189,6 +117,13 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigrationWrapper, '
expect(reloaded_job_record.finished_at).to eq(Time.current) expect(reloaded_job_record.finished_at).to eq(Time.current)
end end
end end
it 'tracks metrics of the execution' do
expect(job_instance).to receive(:perform).and_raise(error_class)
expect(metrics_tracker).to receive(:track).with(job_record)
expect { subject }.to raise_error(error_class)
end
end end
it_behaves_like 'an error is raised', RuntimeError.new('Something broke!') it_behaves_like 'an error is raised', RuntimeError.new('Something broke!')
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Database::BackgroundMigration::PrometheusMetrics, :prometheus do
describe '#track' do
let(:job_record) do
build(:batched_background_migration_job, :succeeded,
started_at: Time.current - 2.minutes,
finished_at: Time.current - 1.minute,
updated_at: Time.current,
metrics: { 'timings' => { 'update_all' => [0.05, 0.2, 0.4, 0.9, 4] } })
end
let(:labels) { job_record.batched_migration.prometheus_labels }
subject(:track_job_record_metrics) { described_class.new.track(job_record) }
it 'reports batch_size' do
track_job_record_metrics
expect(metric_for_job_by_name(:gauge_batch_size)).to eq(job_record.batch_size)
end
it 'reports sub_batch_size' do
track_job_record_metrics
expect(metric_for_job_by_name(:gauge_sub_batch_size)).to eq(job_record.sub_batch_size)
end
it 'reports interval' do
track_job_record_metrics
expect(metric_for_job_by_name(:gauge_interval)).to eq(job_record.batched_migration.interval)
end
it 'reports job duration' do
freeze_time do
track_job_record_metrics
expect(metric_for_job_by_name(:gauge_job_duration)).to eq(1.minute)
end
end
it 'increments updated tuples (currently based on batch_size)' do
expect(described_class.metrics[:counter_updated_tuples]).to receive(:increment)
.with(labels, job_record.batch_size)
.twice
.and_call_original
track_job_record_metrics
expect(metric_for_job_by_name(:counter_updated_tuples)).to eq(job_record.batch_size)
described_class.new.track(job_record)
expect(metric_for_job_by_name(:counter_updated_tuples)).to eq(job_record.batch_size * 2)
end
it 'reports migrated tuples' do
expect(job_record.batched_migration).to receive(:migrated_tuple_count).and_return(20)
track_job_record_metrics
expect(metric_for_job_by_name(:gauge_migrated_tuples)).to eq(20)
end
it 'reports the total tuple count for the migration' do
track_job_record_metrics
expect(metric_for_job_by_name(:gauge_total_tuple_count)).to eq(job_record.batched_migration.total_tuple_count)
end
it 'reports last updated at timestamp' do
freeze_time do
track_job_record_metrics
expect(metric_for_job_by_name(:gauge_last_update_time)).to eq(Time.current.to_i)
end
end
it 'reports summary of query timings' do
summary_labels = labels.merge(operation: 'update_all')
job_record.metrics['timings']['update_all'].each do |timing|
expect(described_class.metrics[:histogram_timings]).to receive(:observe)
.with(summary_labels, timing)
.and_call_original
end
track_job_record_metrics
expect(metric_for_job_by_name(:histogram_timings, job_labels: summary_labels))
.to eq({ 0.1 => 1.0, 0.25 => 2.0, 0.5 => 3.0, 1 => 4.0, 5 => 5.0 })
end
context 'when the tracking record does not having timing metrics' do
before do
job_record.metrics = {}
end
it 'does not attempt to report query timings' do
summary_labels = labels.merge(operation: 'update_all')
expect(described_class.metrics[:histogram_timings]).not_to receive(:observe)
track_job_record_metrics
expect(metric_for_job_by_name(:histogram_timings, job_labels: summary_labels))
.to eq({ 0.1 => 0.0, 0.25 => 0.0, 0.5 => 0.0, 1 => 0.0, 5 => 0.0 })
end
end
def metric_for_job_by_name(name, job_labels: labels)
described_class.metrics[name].values[job_labels].get
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment