Commit b8494c07 authored by Stan Hu's avatar Stan Hu

Merge branch 'add-metrics-for-jobwaiter-timeouts' into 'master'

Record metrics for JobWaiter timeouts

See merge request gitlab-org/gitlab!25599
parents 6979d5d1 c7bfe9fc
...@@ -9,7 +9,7 @@ module WaitableWorker ...@@ -9,7 +9,7 @@ module WaitableWorker
# Short-circuit: it's more efficient to do small numbers of jobs inline # Short-circuit: it's more efficient to do small numbers of jobs inline
return bulk_perform_inline(args_list) if args_list.size <= 3 return bulk_perform_inline(args_list) if args_list.size <= 3
waiter = Gitlab::JobWaiter.new(args_list.size) waiter = Gitlab::JobWaiter.new(args_list.size, worker_label: self.to_s)
# Point all the bulk jobs at the same JobWaiter. Converts, [[1], [2], [3]] # Point all the bulk jobs at the same JobWaiter. Converts, [[1], [2], [3]]
# into [[1, "key"], [2, "key"], [3, "key"]] # into [[1, "key"], [2, "key"], [3, "key"]]
......
...@@ -19,6 +19,9 @@ module Gitlab ...@@ -19,6 +19,9 @@ module Gitlab
class JobWaiter class JobWaiter
KEY_PREFIX = "gitlab:job_waiter" KEY_PREFIX = "gitlab:job_waiter"
STARTED_METRIC = :gitlab_job_waiter_started_total
TIMEOUTS_METRIC = :gitlab_job_waiter_timeouts_total
def self.notify(key, jid) def self.notify(key, jid)
Gitlab::Redis::SharedState.with { |redis| redis.lpush(key, jid) } Gitlab::Redis::SharedState.with { |redis| redis.lpush(key, jid) }
end end
...@@ -27,15 +30,16 @@ module Gitlab ...@@ -27,15 +30,16 @@ module Gitlab
key.is_a?(String) && key =~ /\A#{KEY_PREFIX}:\h{8}-\h{4}-\h{4}-\h{4}-\h{12}\z/ key.is_a?(String) && key =~ /\A#{KEY_PREFIX}:\h{8}-\h{4}-\h{4}-\h{4}-\h{12}\z/
end end
attr_reader :key, :finished attr_reader :key, :finished, :worker_label
attr_accessor :jobs_remaining attr_accessor :jobs_remaining
# jobs_remaining - the number of jobs left to wait for # jobs_remaining - the number of jobs left to wait for
# key - The key of this waiter. # key - The key of this waiter.
def initialize(jobs_remaining = 0, key = "#{KEY_PREFIX}:#{SecureRandom.uuid}") def initialize(jobs_remaining = 0, key = "#{KEY_PREFIX}:#{SecureRandom.uuid}", worker_label: nil)
@key = key @key = key
@jobs_remaining = jobs_remaining @jobs_remaining = jobs_remaining
@finished = [] @finished = []
@worker_label = worker_label
end end
# Waits for all the jobs to be completed. # Waits for all the jobs to be completed.
...@@ -45,6 +49,7 @@ module Gitlab ...@@ -45,6 +49,7 @@ module Gitlab
# long to process, or is never processed. # long to process, or is never processed.
def wait(timeout = 10) def wait(timeout = 10)
deadline = Time.now.utc + timeout deadline = Time.now.utc + timeout
increment_counter(STARTED_METRIC)
Gitlab::Redis::SharedState.with do |redis| Gitlab::Redis::SharedState.with do |redis|
# Fallback key expiry: allow a long grace period to reduce the chance of # Fallback key expiry: allow a long grace period to reduce the chance of
...@@ -60,7 +65,12 @@ module Gitlab ...@@ -60,7 +65,12 @@ module Gitlab
break if seconds_left <= 0 break if seconds_left <= 0
list, jid = redis.blpop(key, timeout: seconds_left) list, jid = redis.blpop(key, timeout: seconds_left)
break unless list && jid # timed out
# timed out
unless list && jid
increment_counter(TIMEOUTS_METRIC)
break
end
@finished << jid @finished << jid
@jobs_remaining -= 1 @jobs_remaining -= 1
...@@ -72,5 +82,20 @@ module Gitlab ...@@ -72,5 +82,20 @@ module Gitlab
finished finished
end end
private
def increment_counter(metric)
return unless worker_label
metrics[metric].increment(worker: worker_label)
end
def metrics
@metrics ||= {
STARTED_METRIC => Gitlab::Metrics.counter(STARTED_METRIC, 'JobWaiter attempts started'),
TIMEOUTS_METRIC => Gitlab::Metrics.counter(TIMEOUTS_METRIC, 'JobWaiter attempts timed out')
}
end
end end
end end
...@@ -37,5 +37,40 @@ describe Gitlab::JobWaiter do ...@@ -37,5 +37,40 @@ describe Gitlab::JobWaiter do
expect(result).to contain_exactly('a') expect(result).to contain_exactly('a')
end end
context 'when a label is provided' do
let(:waiter) { described_class.new(2, worker_label: 'Foo') }
let(:started_total) { double(:started_total) }
let(:timeouts_total) { double(:timeouts_total) }
before do
allow(Gitlab::Metrics).to receive(:counter)
.with(described_class::STARTED_METRIC, anything)
.and_return(started_total)
allow(Gitlab::Metrics).to receive(:counter)
.with(described_class::TIMEOUTS_METRIC, anything)
.and_return(timeouts_total)
end
it 'increments just job_waiter_started_total when all jobs complete' do
expect(started_total).to receive(:increment).with(worker: 'Foo')
expect(timeouts_total).not_to receive(:increment)
described_class.notify(waiter.key, 'a')
described_class.notify(waiter.key, 'b')
result = nil
expect { Timeout.timeout(1) { result = waiter.wait(2) } }.not_to raise_error
end
it 'increments job_waiter_started_total and job_waiter_timeouts_total when it times out' do
expect(started_total).to receive(:increment).with(worker: 'Foo')
expect(timeouts_total).to receive(:increment).with(worker: 'Foo')
result = nil
expect { Timeout.timeout(2) { result = waiter.wait(1) } }.not_to raise_error
end
end
end end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment