Commit 430e7671 authored by Bob Van Landuyt's avatar Bob Van Landuyt

Implement backoff for the circuitbreaker

The circuitbreaker now has 2 failure modes:

- Backing off: This will raise the `Gitlab::Git::Storage::Failing`
  exception. Access to the shard is blocked temporarily.
- Circuit broken: This will raise the
  `Gitlab::Git::Storage::CircuitBroken` exception. Access to the shard
  will be blocked until the failures are reset.
parent 1881d4f8
......@@ -16,17 +16,16 @@ module StorageHealthHelper
def message_for_circuit_breaker(circuit_breaker)
maximum_failures = circuit_breaker.failure_count_threshold
current_failures = circuit_breaker.failure_count
permanently_broken = circuit_breaker.circuit_broken? && current_failures >= maximum_failures
translation_params = { number_of_failures: current_failures,
maximum_failures: maximum_failures,
number_of_seconds: circuit_breaker.failure_wait_time }
if permanently_broken
if circuit_breaker.circuit_broken?
s_("%{number_of_failures} of %{maximum_failures} failures. GitLab will not "\
"retry automatically. Reset storage information when the problem is "\
"resolved.") % translation_params
elsif circuit_breaker.circuit_broken?
elsif circuit_breaker.backing_off?
_("%{number_of_failures} of %{maximum_failures} failures. GitLab will "\
"block access for %{number_of_seconds} seconds.") % translation_params
else
......
......@@ -12,6 +12,7 @@ module Gitlab
CircuitOpen = Class.new(Inaccessible)
Misconfiguration = Class.new(Inaccessible)
Failing = Class.new(Inaccessible)
REDIS_KEY_PREFIX = 'storage_accessible:'.freeze
......
......@@ -64,12 +64,20 @@ module Gitlab
def circuit_broken?
return false if no_failures?
failure_count > failure_count_threshold
end
def backing_off?
return false if no_failures?
recent_failure = last_failure > failure_wait_time.seconds.ago
too_many_failures = failure_count > failure_count_threshold
too_many_failures = failure_count > backoff_threshold
recent_failure || too_many_failures
recent_failure && too_many_failures
end
private
def failure_info
@failure_info ||= get_failure_info
end
......@@ -94,7 +102,11 @@ module Gitlab
def check_storage_accessible!
if circuit_broken?
raise Gitlab::Git::Storage::CircuitOpen.new("Circuit for #{storage} is broken", failure_wait_time)
raise Gitlab::Git::Storage::CircuitOpen.new("Circuit for #{storage} is broken", failure_reset_time)
end
if backing_off?
raise Gitlab::Git::Storage::Failing.new("Backing off access to #{storage}", failure_wait_time)
end
unless storage_available?
......@@ -131,12 +143,6 @@ module Gitlab
end
end
def cache_key
@cache_key ||= "#{Gitlab::Git::Storage::REDIS_KEY_PREFIX}#{storage}:#{hostname}"
end
private
def get_failure_info
last_failure, failure_count = Gitlab::Git::Storage.redis.with do |redis|
redis.hmget(cache_key, :last_failure, :failure_count)
......@@ -146,6 +152,10 @@ module Gitlab
FailureInfo.new(last_failure, failure_count.to_i)
end
def cache_key
@cache_key ||= "#{Gitlab::Git::Storage::REDIS_KEY_PREFIX}#{storage}:#{hostname}"
end
end
end
end
......
......@@ -18,6 +18,14 @@ module Gitlab
application_settings.circuitbreaker_storage_timeout
end
def access_retries
application_settings.circuitbreaker_access_retries
end
def backoff_threshold
application_settings.circuitbreaker_backoff_threshold
end
private
def application_settings
......
......@@ -25,6 +25,10 @@ module Gitlab
!!@error
end
def backing_off?
false
end
def last_failure
circuit_broken? ? Time.now : nil
end
......
......@@ -65,17 +65,6 @@ describe Gitlab::Git::Storage::NullCircuitBreaker do
ours = described_class.public_instance_methods
theirs = Gitlab::Git::Storage::CircuitBreaker.public_instance_methods
# These methods are not part of the public API, but are public to allow the
# CircuitBreaker specs to operate. They should be made private over time.
exceptions = %i[
cache_key
check_storage_accessible!
no_failures?
storage_available?
track_storage_accessible
track_storage_inaccessible
]
expect(theirs - ours).to contain_exactly(*exceptions)
expect(theirs - ours).to be_empty
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment