Commit c7bfe9fc authored by Sean McGivern's avatar Sean McGivern

Record metrics for JobWaiter timeouts

When a JobWaiter times out, the caller doesn't know that all the jobs
have completed. This is particularly important for the
AuthorizedProjectsWorker, which is called with a timeout of 10 seconds
from various controllers.

In order to improve that worker, we first want to track how often these
waiters time out, as compared to how often we attempt to wait. Based on
the results, we can consider different approaches to making this meet
its timeout more frequently.
parent 7dd5296d
......@@ -9,7 +9,7 @@ module WaitableWorker
# Short-circuit: it's more efficient to do small numbers of jobs inline
return bulk_perform_inline(args_list) if args_list.size <= 3
waiter = Gitlab::JobWaiter.new(args_list.size)
waiter = Gitlab::JobWaiter.new(args_list.size, worker_label: self.to_s)
# Point all the bulk jobs at the same JobWaiter. Converts, [[1], [2], [3]]
# into [[1, "key"], [2, "key"], [3, "key"]]
......
......@@ -19,6 +19,9 @@ module Gitlab
class JobWaiter
KEY_PREFIX = "gitlab:job_waiter"
STARTED_METRIC = :gitlab_job_waiter_started_total
TIMEOUTS_METRIC = :gitlab_job_waiter_timeouts_total
def self.notify(key, jid)
Gitlab::Redis::SharedState.with { |redis| redis.lpush(key, jid) }
end
......@@ -27,15 +30,16 @@ module Gitlab
key.is_a?(String) && key =~ /\A#{KEY_PREFIX}:\h{8}-\h{4}-\h{4}-\h{4}-\h{12}\z/
end
attr_reader :key, :finished
attr_reader :key, :finished, :worker_label
attr_accessor :jobs_remaining
# jobs_remaining - the number of jobs left to wait for
# key - The key of this waiter.
def initialize(jobs_remaining = 0, key = "#{KEY_PREFIX}:#{SecureRandom.uuid}")
def initialize(jobs_remaining = 0, key = "#{KEY_PREFIX}:#{SecureRandom.uuid}", worker_label: nil)
@key = key
@jobs_remaining = jobs_remaining
@finished = []
@worker_label = worker_label
end
# Waits for all the jobs to be completed.
......@@ -45,6 +49,7 @@ module Gitlab
# long to process, or is never processed.
def wait(timeout = 10)
deadline = Time.now.utc + timeout
increment_counter(STARTED_METRIC)
Gitlab::Redis::SharedState.with do |redis|
# Fallback key expiry: allow a long grace period to reduce the chance of
......@@ -60,7 +65,12 @@ module Gitlab
break if seconds_left <= 0
list, jid = redis.blpop(key, timeout: seconds_left)
break unless list && jid # timed out
# timed out
unless list && jid
increment_counter(TIMEOUTS_METRIC)
break
end
@finished << jid
@jobs_remaining -= 1
......@@ -72,5 +82,20 @@ module Gitlab
finished
end
private
def increment_counter(metric)
return unless worker_label
metrics[metric].increment(worker: worker_label)
end
def metrics
@metrics ||= {
STARTED_METRIC => Gitlab::Metrics.counter(STARTED_METRIC, 'JobWaiter attempts started'),
TIMEOUTS_METRIC => Gitlab::Metrics.counter(TIMEOUTS_METRIC, 'JobWaiter attempts timed out')
}
end
end
end
......@@ -37,5 +37,40 @@ describe Gitlab::JobWaiter do
expect(result).to contain_exactly('a')
end
context 'when a label is provided' do
let(:waiter) { described_class.new(2, worker_label: 'Foo') }
let(:started_total) { double(:started_total) }
let(:timeouts_total) { double(:timeouts_total) }
before do
allow(Gitlab::Metrics).to receive(:counter)
.with(described_class::STARTED_METRIC, anything)
.and_return(started_total)
allow(Gitlab::Metrics).to receive(:counter)
.with(described_class::TIMEOUTS_METRIC, anything)
.and_return(timeouts_total)
end
it 'increments just job_waiter_started_total when all jobs complete' do
expect(started_total).to receive(:increment).with(worker: 'Foo')
expect(timeouts_total).not_to receive(:increment)
described_class.notify(waiter.key, 'a')
described_class.notify(waiter.key, 'b')
result = nil
expect { Timeout.timeout(1) { result = waiter.wait(2) } }.not_to raise_error
end
it 'increments job_waiter_started_total and job_waiter_timeouts_total when it times out' do
expect(started_total).to receive(:increment).with(worker: 'Foo')
expect(timeouts_total).to receive(:increment).with(worker: 'Foo')
result = nil
expect { Timeout.timeout(2) { result = waiter.wait(1) } }.not_to raise_error
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment