Commit 68ef56d4 authored by Sean McGivern's avatar Sean McGivern

Merge branch 'add-sidekiq-metrics-for-gitaly-and-db-time' into 'master'

Add Sidekiq metrics for Gitaly and DB time

See merge request gitlab-org/gitlab!26384
parents e8cade67 a97c30c9
---
title: Add Prometheus metrics for Gitaly and database time in background jobs
merge_request: 26384
author:
type: changed
...@@ -86,13 +86,15 @@ The following metrics are available: ...@@ -86,13 +86,15 @@ The following metrics are available:
| `failed_login_captcha_total` | Gauge | 11.0 | Counter of failed CAPTCHA attempts during login | | | `failed_login_captcha_total` | Gauge | 11.0 | Counter of failed CAPTCHA attempts during login | |
| `successful_login_captcha_total` | Gauge | 11.0 | Counter of successful CAPTCHA attempts during login | | | `successful_login_captcha_total` | Gauge | 11.0 | Counter of successful CAPTCHA attempts during login | |
| `auto_devops_pipelines_completed_total` | Counter | 12.7 | Counter of completed Auto DevOps pipelines, labeled by status | | | `auto_devops_pipelines_completed_total` | Counter | 12.7 | Counter of completed Auto DevOps pipelines, labeled by status | |
| `sidekiq_jobs_cpu_seconds` | Histogram | 12.4 | Seconds of cpu time to run Sidekiq job | | | `sidekiq_jobs_cpu_seconds` | Histogram | 12.4 | Seconds of cpu time to run Sidekiq job | queue, boundary, external_dependencies, feature_category, job_status, urgency |
| `sidekiq_jobs_completion_seconds` | Histogram | 12.2 | Seconds to complete Sidekiq job | | | `sidekiq_jobs_completion_seconds` | Histogram | 12.2 | Seconds to complete Sidekiq job | queue, boundary, external_dependencies, feature_category, job_status, urgency |
| `sidekiq_jobs_queue_duration_seconds` | Histogram | 12.5 | Duration in seconds that a Sidekiq job was queued before being executed | | | `sidekiq_jobs_db_seconds` | Histogram | 12.9 | Seconds of DB time to run Sidekiq job | queue, boundary, external_dependencies, feature_category, job_status, urgency |
| `sidekiq_jobs_failed_total` | Counter | 12.2 | Sidekiq jobs failed | | | `sidekiq_jobs_gitaly_seconds` | Histogram | 12.9 | Seconds of Gitaly time to run Sidekiq job | queue, boundary, external_dependencies, feature_category, job_status, urgency |
| `sidekiq_jobs_retried_total` | Counter | 12.2 | Sidekiq jobs retried | | | `sidekiq_jobs_queue_duration_seconds` | Histogram | 12.5 | Duration in seconds that a Sidekiq job was queued before being executed | queue, boundary, external_dependencies, feature_category, urgency |
| `sidekiq_running_jobs` | Gauge | 12.2 | Number of Sidekiq jobs running | | | `sidekiq_jobs_failed_total` | Counter | 12.2 | Sidekiq jobs failed | queue, boundary, external_dependencies, feature_category, urgency |
| `sidekiq_concurrency` | Gauge | 12.5 | Maximum number of Sidekiq jobs | | | `sidekiq_jobs_retried_total` | Counter | 12.2 | Sidekiq jobs retried | queue, boundary, external_dependencies, feature_category, urgency |
| `sidekiq_running_jobs` | Gauge | 12.2 | Number of Sidekiq jobs running | queue, boundary, external_dependencies, feature_category, urgency |
| `sidekiq_concurrency` | Gauge | 12.5 | Maximum number of Sidekiq jobs | |
## Metrics controlled by a feature flag ## Metrics controlled by a feature flag
......
...@@ -45,6 +45,8 @@ module Gitlab ...@@ -45,6 +45,8 @@ module Gitlab
labels[:job_status] = job_succeeded ? "done" : "fail" labels[:job_status] = job_succeeded ? "done" : "fail"
@metrics[:sidekiq_jobs_cpu_seconds].observe(labels, job_thread_cputime) @metrics[:sidekiq_jobs_cpu_seconds].observe(labels, job_thread_cputime)
@metrics[:sidekiq_jobs_completion_seconds].observe(labels, monotonic_time) @metrics[:sidekiq_jobs_completion_seconds].observe(labels, monotonic_time)
@metrics[:sidekiq_jobs_db_seconds].observe(labels, ActiveRecord::LogSubscriber.runtime / 1000)
@metrics[:sidekiq_jobs_gitaly_seconds].observe(labels, Gitlab::GitalyClient.query_time)
end end
end end
...@@ -54,6 +56,8 @@ module Gitlab ...@@ -54,6 +56,8 @@ module Gitlab
{ {
sidekiq_jobs_cpu_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_cpu_seconds, 'Seconds of cpu time to run Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS), sidekiq_jobs_cpu_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_cpu_seconds, 'Seconds of cpu time to run Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_completion_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_completion_seconds, 'Seconds to complete Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS), sidekiq_jobs_completion_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_completion_seconds, 'Seconds to complete Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_db_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_db_seconds, 'Seconds of database time to run Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_gitaly_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_gitaly_seconds, 'Seconds of Gitaly time to run Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_queue_duration_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_queue_duration_seconds, 'Duration in seconds that a Sidekiq job was queued before being executed', {}, SIDEKIQ_LATENCY_BUCKETS), sidekiq_jobs_queue_duration_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_queue_duration_seconds, 'Duration in seconds that a Sidekiq job was queued before being executed', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_failed_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_failed_total, 'Sidekiq jobs failed'), sidekiq_jobs_failed_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_failed_total, 'Sidekiq jobs failed'),
sidekiq_jobs_retried_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_retried_total, 'Sidekiq jobs retried'), sidekiq_jobs_retried_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_retried_total, 'Sidekiq jobs retried'),
......
...@@ -20,6 +20,8 @@ describe Gitlab::SidekiqMiddleware::ServerMetrics do ...@@ -20,6 +20,8 @@ describe Gitlab::SidekiqMiddleware::ServerMetrics do
let(:queue_duration_seconds) { double('queue duration seconds metric') } let(:queue_duration_seconds) { double('queue duration seconds metric') }
let(:completion_seconds_metric) { double('completion seconds metric') } let(:completion_seconds_metric) { double('completion seconds metric') }
let(:user_execution_seconds_metric) { double('user execution seconds metric') } let(:user_execution_seconds_metric) { double('user execution seconds metric') }
let(:db_seconds_metric) { double('db seconds metric') }
let(:gitaly_seconds_metric) { double('gitaly seconds metric') }
let(:failed_total_metric) { double('failed total metric') } let(:failed_total_metric) { double('failed total metric') }
let(:retried_total_metric) { double('retried total metric') } let(:retried_total_metric) { double('retried total metric') }
let(:running_jobs_metric) { double('running jobs metric') } let(:running_jobs_metric) { double('running jobs metric') }
...@@ -28,6 +30,8 @@ describe Gitlab::SidekiqMiddleware::ServerMetrics do ...@@ -28,6 +30,8 @@ describe Gitlab::SidekiqMiddleware::ServerMetrics do
allow(Gitlab::Metrics).to receive(:histogram).with(:sidekiq_jobs_queue_duration_seconds, anything, anything, anything).and_return(queue_duration_seconds) allow(Gitlab::Metrics).to receive(:histogram).with(:sidekiq_jobs_queue_duration_seconds, anything, anything, anything).and_return(queue_duration_seconds)
allow(Gitlab::Metrics).to receive(:histogram).with(:sidekiq_jobs_completion_seconds, anything, anything, anything).and_return(completion_seconds_metric) allow(Gitlab::Metrics).to receive(:histogram).with(:sidekiq_jobs_completion_seconds, anything, anything, anything).and_return(completion_seconds_metric)
allow(Gitlab::Metrics).to receive(:histogram).with(:sidekiq_jobs_cpu_seconds, anything, anything, anything).and_return(user_execution_seconds_metric) allow(Gitlab::Metrics).to receive(:histogram).with(:sidekiq_jobs_cpu_seconds, anything, anything, anything).and_return(user_execution_seconds_metric)
allow(Gitlab::Metrics).to receive(:histogram).with(:sidekiq_jobs_db_seconds, anything, anything, anything).and_return(db_seconds_metric)
allow(Gitlab::Metrics).to receive(:histogram).with(:sidekiq_jobs_gitaly_seconds, anything, anything, anything).and_return(gitaly_seconds_metric)
allow(Gitlab::Metrics).to receive(:counter).with(:sidekiq_jobs_failed_total, anything).and_return(failed_total_metric) allow(Gitlab::Metrics).to receive(:counter).with(:sidekiq_jobs_failed_total, anything).and_return(failed_total_metric)
allow(Gitlab::Metrics).to receive(:counter).with(:sidekiq_jobs_retried_total, anything).and_return(retried_total_metric) allow(Gitlab::Metrics).to receive(:counter).with(:sidekiq_jobs_retried_total, anything).and_return(retried_total_metric)
allow(Gitlab::Metrics).to receive(:gauge).with(:sidekiq_running_jobs, anything, {}, :all).and_return(running_jobs_metric) allow(Gitlab::Metrics).to receive(:gauge).with(:sidekiq_running_jobs, anything, {}, :all).and_return(running_jobs_metric)
...@@ -55,16 +59,23 @@ describe Gitlab::SidekiqMiddleware::ServerMetrics do ...@@ -55,16 +59,23 @@ describe Gitlab::SidekiqMiddleware::ServerMetrics do
let(:queue_duration_for_job) { 0.01 } let(:queue_duration_for_job) { 0.01 }
let(:db_duration) { 3 }
let(:gitaly_duration) { 4 }
before do before do
allow(subject).to receive(:get_thread_cputime).and_return(thread_cputime_before, thread_cputime_after) allow(subject).to receive(:get_thread_cputime).and_return(thread_cputime_before, thread_cputime_after)
allow(Gitlab::Metrics::System).to receive(:monotonic_time).and_return(monotonic_time_before, monotonic_time_after) allow(Gitlab::Metrics::System).to receive(:monotonic_time).and_return(monotonic_time_before, monotonic_time_after)
allow(Gitlab::InstrumentationHelper).to receive(:queue_duration_for_job).with(job).and_return(queue_duration_for_job) allow(Gitlab::InstrumentationHelper).to receive(:queue_duration_for_job).with(job).and_return(queue_duration_for_job)
allow(ActiveRecord::LogSubscriber).to receive(:runtime).and_return(db_duration * 1000)
allow(Gitlab::GitalyClient).to receive(:query_time).and_return(gitaly_duration)
expect(running_jobs_metric).to receive(:increment).with(labels, 1) expect(running_jobs_metric).to receive(:increment).with(labels, 1)
expect(running_jobs_metric).to receive(:increment).with(labels, -1) expect(running_jobs_metric).to receive(:increment).with(labels, -1)
expect(queue_duration_seconds).to receive(:observe).with(labels, queue_duration_for_job) if queue_duration_for_job expect(queue_duration_seconds).to receive(:observe).with(labels, queue_duration_for_job) if queue_duration_for_job
expect(user_execution_seconds_metric).to receive(:observe).with(labels_with_job_status, thread_cputime_duration) expect(user_execution_seconds_metric).to receive(:observe).with(labels_with_job_status, thread_cputime_duration)
expect(db_seconds_metric).to receive(:observe).with(labels_with_job_status, db_duration)
expect(gitaly_seconds_metric).to receive(:observe).with(labels_with_job_status, gitaly_duration)
expect(completion_seconds_metric).to receive(:observe).with(labels_with_job_status, monotonic_time_duration) expect(completion_seconds_metric).to receive(:observe).with(labels_with_job_status, monotonic_time_duration)
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment