Commit 37f94825 authored by Stan Hu's avatar Stan Hu

Merge branch 'an-sidekiq-records-failure-durations' into 'master'

Record latencies for sidekiq failures

Closes gitlab-com/gl-infra/scalability#46

See merge request gitlab-org/gitlab!18909
parents 23c170f8 c17922ea
---
title: Record latencies for Sidekiq failures
merge_request: 18909
author:
type: performance
......@@ -21,22 +21,28 @@ module Gitlab
@metrics[:sidekiq_jobs_retried_total].increment(labels, 1)
end
job_succeeded = false
monotonic_time_start = Gitlab::Metrics::System.monotonic_time
job_thread_cputime_start = get_thread_cputime
realtime = Benchmark.realtime do
begin
yield
end
job_succeeded = true
ensure
monotonic_time_end = Gitlab::Metrics::System.monotonic_time
job_thread_cputime_end = get_thread_cputime
monotonic_time = monotonic_time_end - monotonic_time_start
job_thread_cputime = job_thread_cputime_end - job_thread_cputime_start
@metrics[:sidekiq_jobs_cpu_seconds].observe(labels, job_thread_cputime)
@metrics[:sidekiq_jobs_completion_seconds].observe(labels, realtime)
rescue Exception # rubocop: disable Lint/RescueException
@metrics[:sidekiq_jobs_failed_total].increment(labels, 1)
raise
ensure
# sidekiq_running_jobs, sidekiq_jobs_failed_total should not include the job_status label
@metrics[:sidekiq_running_jobs].increment(labels, -1)
@metrics[:sidekiq_jobs_failed_total].increment(labels, 1) unless job_succeeded
# job_status: done, fail match the job_status attribute in structured logging
labels[:job_status] = job_succeeded ? :done : :fail
@metrics[:sidekiq_jobs_cpu_seconds].observe(labels, job_thread_cputime)
@metrics[:sidekiq_jobs_completion_seconds].observe(labels, monotonic_time)
end
end
private
......
......@@ -44,12 +44,14 @@ describe Gitlab::SidekiqMiddleware::Metrics do
it 'sets queue specific metrics' do
labels = { queue: :test }
labels_with_job_status = { queue: :test, job_status: :done }
allow(middleware).to receive(:get_thread_cputime).and_return(1, 3)
allow(Gitlab::Metrics::System).to receive(:monotonic_time).and_return(2, 3)
expect(user_execution_seconds_metric).to receive(:observe).with(labels, 2)
expect(running_jobs_metric).to receive(:increment).with(labels, 1)
expect(running_jobs_metric).to receive(:increment).with(labels, -1)
expect(completion_seconds_metric).to receive(:observe).with(labels, kind_of(Numeric))
expect(user_execution_seconds_metric).to receive(:observe).with(labels_with_job_status, 2)
expect(completion_seconds_metric).to receive(:observe).with(labels_with_job_status, 1)
middleware.call(worker, {}, :test) { nil }
end
......@@ -74,8 +76,18 @@ describe Gitlab::SidekiqMiddleware::Metrics do
context 'when error is raised' do
it 'sets sidekiq_jobs_failed_total and reraises' do
expect(failed_total_metric).to receive(:increment)
expect { middleware.call(worker, {}, :test) { raise } }.to raise_error
labels = { queue: :test }
labels_with_job_status = { queue: :test, job_status: :fail }
allow(middleware).to receive(:get_thread_cputime).and_return(1, 4)
allow(Gitlab::Metrics::System).to receive(:monotonic_time).and_return(2, 6)
expect(running_jobs_metric).to receive(:increment).with(labels, 1)
expect(running_jobs_metric).to receive(:increment).with(labels, -1)
expect(failed_total_metric).to receive(:increment).with(labels, 1)
expect(user_execution_seconds_metric).to receive(:observe).with(labels_with_job_status, 3)
expect(completion_seconds_metric).to receive(:observe).with(labels_with_job_status, 4)
expect { middleware.call(worker, {}, :test) { raise StandardError, "Failed" } }.to raise_error(StandardError, "Failed")
end
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment