Commit ed4ed51a authored by Kerri Miller's avatar Kerri Miller

Merge branch 'sh-sidekiq-enqueue-latency' into 'master'

Measure Sidekiq enqueue latency for scheduled jobs

See merge request gitlab-org/gitlab!71615
parents 338c9e64 774b16d1
......@@ -296,7 +296,6 @@ Rails/TimeZone:
- 'spec/lib/gitlab/graphql_logger_spec.rb'
- 'spec/lib/gitlab/graphs/commits_spec.rb'
- 'spec/lib/gitlab/import_export/project/relation_factory_spec.rb'
- 'spec/lib/gitlab/instrumentation_helper_spec.rb'
- 'spec/lib/gitlab/json_logger_spec.rb'
- 'spec/lib/gitlab/lfs_token_spec.rb'
- 'spec/lib/gitlab/log_timestamp_formatter_spec.rb'
......@@ -384,10 +383,8 @@ RSpec/TimecopFreeze:
- 'spec/lib/gitlab/checks/timed_logger_spec.rb'
- 'spec/lib/gitlab/cycle_analytics/stage_summary_spec.rb'
- 'spec/lib/gitlab/cycle_analytics/usage_data_spec.rb'
- 'spec/lib/gitlab/instrumentation_helper_spec.rb'
- 'spec/lib/gitlab/omniauth_logging/json_formatter_spec.rb'
- 'spec/lib/gitlab/puma_logging/json_formatter_spec.rb'
- 'spec/lib/gitlab/sidekiq_logging/structured_logger_spec.rb'
- 'spec/lib/json_web_token/hmac_token_spec.rb'
- 'spec/tooling/rspec_flaky/flaky_example_spec.rb'
- 'spec/tooling/rspec_flaky/listener_spec.rb'
......
......@@ -131,18 +131,43 @@ module Gitlab
enqueued_at_time = convert_to_time(enqueued_at)
return unless enqueued_at_time
# Its possible that if theres clock-skew between two nodes
# this value may be less than zero. In that event, we record the value
round_elapsed_time(enqueued_at_time)
end
# Returns the time it took for a scheduled job to be enqueued in seconds, as a float,
# if the `scheduled_at` and `enqueued_at` fields are available.
#
# * If the job doesn't contain sufficient information, returns nil
# * If the job has a start time in the future, returns 0
# * If the job contains an invalid start time value, returns nil
# @param [Hash] job a Sidekiq job, represented as a hash
def self.enqueue_latency_for_scheduled_job(job)
scheduled_at = job['scheduled_at']
enqueued_at = job['enqueued_at']
return unless scheduled_at && enqueued_at
scheduled_at_time = convert_to_time(scheduled_at)
enqueued_at_time = convert_to_time(enqueued_at)
return unless scheduled_at_time && enqueued_at_time
round_elapsed_time(scheduled_at_time, enqueued_at_time)
end
def self.round_elapsed_time(start, end_time = Time.now)
# It's possible that if there is clock-skew between two nodes this
# value may be less than zero. In that event, we record the value
# as zero.
[elapsed_by_absolute_time(enqueued_at_time), 0].max.round(DURATION_PRECISION)
[elapsed_by_absolute_time(start, end_time), 0].max.round(DURATION_PRECISION)
end
# Calculates the time in seconds, as a float, from
# the provided start time until now
#
# @param [Time] start
def self.elapsed_by_absolute_time(start)
(Time.now - start).to_f.round(DURATION_PRECISION)
def self.elapsed_by_absolute_time(start, end_time)
(end_time - start).to_f.round(DURATION_PRECISION)
end
private_class_method :elapsed_by_absolute_time
......
......@@ -55,6 +55,9 @@ module Gitlab
scheduling_latency_s = ::Gitlab::InstrumentationHelper.queue_duration_for_job(payload)
payload['scheduling_latency_s'] = scheduling_latency_s if scheduling_latency_s
enqueue_latency_s = ::Gitlab::InstrumentationHelper.enqueue_latency_for_scheduled_job(payload)
payload['enqueue_latency_s'] = enqueue_latency_s if enqueue_latency_s
payload
end
......
......@@ -16,7 +16,12 @@ module Gitlab
worker_class = worker_class.to_s.safe_constantize
labels = create_labels(worker_class, queue, job)
labels[:scheduling] = job.key?('at') ? 'delayed' : 'immediate'
if job.key?('at')
labels[:scheduling] = 'delayed'
job[:scheduled_at] = job['at']
else
labels[:scheduling] = 'immediate'
end
@metrics.fetch(ENQUEUED).increment(labels, 1)
......
......@@ -149,8 +149,8 @@ RSpec.describe Gitlab::InstrumentationHelper do
end
end
describe '.queue_duration_for_job' do
where(:enqueued_at, :created_at, :time_now, :expected_duration) do
describe 'duration calculations' do
where(:end_time, :start_time, :time_now, :expected_duration) do
"2019-06-01T00:00:00.000+0000" | nil | "2019-06-01T02:00:00.000+0000" | 2.hours.to_f
"2019-06-01T02:00:00.000+0000" | nil | "2019-06-01T02:00:00.001+0000" | 0.001
"2019-06-01T02:00:00.000+0000" | "2019-05-01T02:00:00.000+0000" | "2019-06-01T02:00:01.000+0000" | 1
......@@ -164,15 +164,29 @@ RSpec.describe Gitlab::InstrumentationHelper do
0 | nil | "2019-10-23T12:13:16.000+0200" | nil
-1 | nil | "2019-10-23T12:13:16.000+0200" | nil
"2019-06-01T02:00:00.000+0000" | nil | "2019-06-01T00:00:00.000+0000" | 0
Time.at(1571999233) | nil | "2019-10-25T12:29:16.000+0200" | 123
Time.at(1571999233).utc | nil | "2019-10-25T12:29:16.000+0200" | 123
end
with_them do
let(:job) { { 'enqueued_at' => enqueued_at, 'created_at' => created_at } }
describe '.queue_duration_for_job' do
with_them do
let(:job) { { 'enqueued_at' => end_time, 'created_at' => start_time } }
it "returns the correct duration" do
Timecop.freeze(Time.iso8601(time_now)) do
expect(described_class.queue_duration_for_job(job)).to eq(expected_duration)
it "returns the correct duration" do
travel_to(Time.iso8601(time_now)) do
expect(described_class.queue_duration_for_job(job)).to eq(expected_duration)
end
end
end
end
describe '.enqueue_latency_for_scheduled_job' do
with_them do
let(:job) { { 'enqueued_at' => end_time, 'scheduled_at' => start_time } }
it "returns the correct duration" do
travel_to(Time.iso8601(time_now)) do
expect(described_class.enqueue_latency_for_scheduled_job(job)).to eq(expected_duration)
end
end
end
end
......
......@@ -18,7 +18,7 @@ RSpec.describe Gitlab::SidekiqLogging::StructuredLogger do
end
it 'logs start and end of job' do
Timecop.freeze(timestamp) do
travel_to(timestamp) do
expect(logger).to receive(:info).with(start_payload).ordered
expect(logger).to receive(:info).with(end_payload).ordered
expect(subject).to receive(:log_job_start).and_call_original
......@@ -34,7 +34,7 @@ RSpec.describe Gitlab::SidekiqLogging::StructuredLogger do
"wrapped" => "TestWorker"
)
Timecop.freeze(timestamp) do
travel_to(timestamp) do
expect(logger).to receive(:info).with(start_payload).ordered
expect(logger).to receive(:info).with(end_payload).ordered
expect(subject).to receive(:log_job_start).and_call_original
......@@ -45,7 +45,7 @@ RSpec.describe Gitlab::SidekiqLogging::StructuredLogger do
end
it 'logs an exception in job' do
Timecop.freeze(timestamp) do
travel_to(timestamp) do
expect(logger).to receive(:info).with(start_payload)
expect(logger).to receive(:warn).with(include(exception_payload))
expect(subject).to receive(:log_job_start).and_call_original
......@@ -60,7 +60,7 @@ RSpec.describe Gitlab::SidekiqLogging::StructuredLogger do
end
it 'logs the root cause of an Sidekiq::JobRetry::Skip exception in the job' do
Timecop.freeze(timestamp) do
travel_to(timestamp) do
expect(logger).to receive(:info).with(start_payload)
expect(logger).to receive(:warn).with(include(exception_payload))
expect(subject).to receive(:log_job_start).and_call_original
......@@ -77,7 +77,7 @@ RSpec.describe Gitlab::SidekiqLogging::StructuredLogger do
end
it 'logs the root cause of an Sidekiq::JobRetry::Handled exception in the job' do
Timecop.freeze(timestamp) do
travel_to(timestamp) do
expect(logger).to receive(:info).with(start_payload)
expect(logger).to receive(:warn).with(include(exception_payload))
expect(subject).to receive(:log_job_start).and_call_original
......@@ -94,7 +94,7 @@ RSpec.describe Gitlab::SidekiqLogging::StructuredLogger do
end
it 'keeps Sidekiq::JobRetry::Handled exception if the cause does not exist' do
Timecop.freeze(timestamp) do
travel_to(timestamp) do
expect(logger).to receive(:info).with(start_payload)
expect(logger).to receive(:warn).with(
include(
......@@ -116,7 +116,7 @@ RSpec.describe Gitlab::SidekiqLogging::StructuredLogger do
end
it 'does not modify the job' do
Timecop.freeze(timestamp) do
travel_to(timestamp) do
job_copy = job.deep_dup
allow(logger).to receive(:info)
......@@ -130,7 +130,7 @@ RSpec.describe Gitlab::SidekiqLogging::StructuredLogger do
end
it 'does not modify the wrapped job' do
Timecop.freeze(timestamp) do
travel_to(timestamp) do
wrapped_job = job.merge(
"class" => "ActiveJob::QueueAdapters::SidekiqAdapter::JobWrapper",
"wrapped" => "TestWorker"
......@@ -154,7 +154,7 @@ RSpec.describe Gitlab::SidekiqLogging::StructuredLogger do
end
it 'logs start and end of job without args' do
Timecop.freeze(timestamp) do
travel_to(timestamp) do
expect(logger).to receive(:info).with(start_payload.except('args')).ordered
expect(logger).to receive(:info).with(end_payload.except('args')).ordered
expect(subject).to receive(:log_job_start).and_call_original
......@@ -165,7 +165,7 @@ RSpec.describe Gitlab::SidekiqLogging::StructuredLogger do
end
it 'logs without created_at and enqueued_at fields' do
Timecop.freeze(timestamp) do
travel_to(timestamp) do
excluded_fields = %w(created_at enqueued_at args scheduling_latency_s)
expect(logger).to receive(:info).with(start_payload.except(*excluded_fields)).ordered
......@@ -183,7 +183,7 @@ RSpec.describe Gitlab::SidekiqLogging::StructuredLogger do
let(:scheduling_latency_s) { 7200.0 }
it 'logs with scheduling latency' do
Timecop.freeze(timestamp) do
travel_to(timestamp) do
expect(logger).to receive(:info).with(start_payload).ordered
expect(logger).to receive(:info).with(end_payload).ordered
expect(subject).to receive(:log_job_start).and_call_original
......@@ -194,6 +194,35 @@ RSpec.describe Gitlab::SidekiqLogging::StructuredLogger do
end
end
context 'with enqueue latency' do
let(:expected_start_payload) do
start_payload.merge(
'scheduled_at' => job['scheduled_at'],
'enqueue_latency_s' => 1.hour.to_f
)
end
let(:expected_end_payload) do
end_payload.merge('enqueue_latency_s' => 1.hour.to_f)
end
before do
# enqueued_at is set to created_at
job['scheduled_at'] = created_at - 1.hour
end
it 'logs with scheduling latency' do
travel_to(timestamp) do
expect(logger).to receive(:info).with(expected_start_payload).ordered
expect(logger).to receive(:info).with(expected_end_payload).ordered
expect(subject).to receive(:log_job_start).and_call_original
expect(subject).to receive(:log_job_done).and_call_original
call_subject(job, 'test_queue') { }
end
end
end
context 'with Gitaly, Rugged, and Redis calls' do
let(:timing_data) do
{
......@@ -218,7 +247,7 @@ RSpec.describe Gitlab::SidekiqLogging::StructuredLogger do
end
it 'logs with Gitaly and Rugged timing data', :aggregate_failures do
Timecop.freeze(timestamp) do
travel_to(timestamp) do
expect(logger).to receive(:info).with(start_payload).ordered
expect(logger).to receive(:info).with(expected_end_payload).ordered
......@@ -323,7 +352,7 @@ RSpec.describe Gitlab::SidekiqLogging::StructuredLogger do
end
it 'logs it in the done log' do
Timecop.freeze(timestamp) do
travel_to(timestamp) do
expect(logger).to receive(:info).with(expected_start_payload).ordered
expect(logger).to receive(:info).with(expected_end_payload).ordered
......@@ -365,7 +394,7 @@ RSpec.describe Gitlab::SidekiqLogging::StructuredLogger do
end
it 'logs it in the done log' do
Timecop.freeze(timestamp) do
travel_to(timestamp) do
expect(logger).to receive(:info).with(expected_start_payload).ordered
expect(logger).to receive(:info).with(expected_end_payload).ordered
......@@ -390,13 +419,13 @@ RSpec.describe Gitlab::SidekiqLogging::StructuredLogger do
'message' => 'my-message',
'job_status' => 'my-job-status',
'duration_s' => 0.123123,
'completed_at' => current_utc_time.to_f }
'completed_at' => current_utc_time.to_i }
end
subject { described_class.new }
it 'update payload correctly' do
Timecop.freeze(current_utc_time) do
travel_to(current_utc_time) do
subject.send(:add_time_keys!, time, payload)
expect(payload).to eq(payload_with_time_keys)
......
......@@ -62,6 +62,14 @@ RSpec.describe Gitlab::SidekiqMiddleware::ClientMetrics do
Sidekiq::Testing.inline! { TestWorker.perform_in(1.second) }
end
it 'sets the scheduled_at field' do
job = { 'at' => Time.current }
subject.call('TestWorker', job, 'queue', nil) do
expect(job[:scheduled_at]).to eq(job['at'])
end
end
end
context 'when the worker class cannot be found' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment