Commit 256a3072 authored by Kamil Trzciński's avatar Kamil Trzciński

Improve service level health-checks

This improves the behavior of health-checks
introducing two levels of them:

- service level: checks served by `sidekiq|web_exporter:'
  served on separate port

- application level: checks served by `/-/metrics` endpoint
  of `HealthController`

The changes makes the:

- `/liveness` always indicate that application is ready
  and not deadlocked
- `/readiness` to indicate:

The changes for `/readiness`:

- on service level: the status of the service if it can accept
  and process connections

- on application level: the status of external components if they
  are accessible

The `on service level` and `on application level` do not share
any checks, they have different checks exposed.
parent c7c4ac07
...@@ -4,18 +4,31 @@ class HealthController < ActionController::Base ...@@ -4,18 +4,31 @@ class HealthController < ActionController::Base
protect_from_forgery with: :exception, prepend: true protect_from_forgery with: :exception, prepend: true
include RequiresWhitelistedMonitoringClient include RequiresWhitelistedMonitoringClient
CHECKS = [
Gitlab::HealthChecks::DbCheck,
Gitlab::HealthChecks::Redis::RedisCheck,
Gitlab::HealthChecks::Redis::CacheCheck,
Gitlab::HealthChecks::Redis::QueuesCheck,
Gitlab::HealthChecks::Redis::SharedStateCheck,
Gitlab::HealthChecks::GitalyCheck
].freeze
def readiness def readiness
render_probe(::Gitlab::HealthChecks::Probes::Readiness) # readiness check is a collection with all above application-level checks
render_checks(*CHECKS)
end end
def liveness def liveness
render_probe(::Gitlab::HealthChecks::Probes::Liveness) # liveness check is a collection without additional checks
render_checks
end end
private private
def render_probe(probe_class) def render_checks(*checks)
result = probe_class.new.execute result = Gitlab::HealthChecks::Probes::Collection
.new(*checks)
.execute
# disable static error pages at the gitlab-workhorse level, we want to see this error response even in production # disable static error pages at the gitlab-workhorse level, we want to see this error response even in production
headers["X-GitLab-Custom-Error"] = 1 unless result.success? headers["X-GitLab-Custom-Error"] = 1 unless result.success?
......
# frozen_string_literal: true
module Gitlab
module HealthChecks
CHECKS = [
Gitlab::HealthChecks::DbCheck,
Gitlab::HealthChecks::Redis::RedisCheck,
Gitlab::HealthChecks::Redis::CacheCheck,
Gitlab::HealthChecks::Redis::QueuesCheck,
Gitlab::HealthChecks::Redis::SharedStateCheck,
Gitlab::HealthChecks::GitalyCheck
].freeze
end
end
...@@ -3,14 +3,13 @@ ...@@ -3,14 +3,13 @@
module Gitlab module Gitlab
module HealthChecks module HealthChecks
module Probes module Probes
class Readiness class Collection
attr_reader :checks attr_reader :checks
# This accepts an array of objects implementing `:readiness` # This accepts an array of objects implementing `:readiness`
# that returns `::Gitlab::HealthChecks::Result` # that returns `::Gitlab::HealthChecks::Result`
def initialize(*additional_checks) def initialize(*checks)
@checks = ::Gitlab::HealthChecks::CHECKS @checks = checks
@checks += additional_checks
end end
def execute def execute
......
# frozen_string_literal: true
module Gitlab
module HealthChecks
module Probes
class Liveness
def execute
Probes::Status.new(200, status: 'ok')
end
end
end
end
end
...@@ -6,7 +6,7 @@ module Gitlab ...@@ -6,7 +6,7 @@ module Gitlab
class BaseExporter < Daemon class BaseExporter < Daemon
attr_reader :server attr_reader :server
attr_accessor :additional_checks attr_accessor :readiness_checks
def enabled? def enabled?
settings.enabled settings.enabled
...@@ -73,11 +73,11 @@ module Gitlab ...@@ -73,11 +73,11 @@ module Gitlab
end end
def readiness_probe def readiness_probe
::Gitlab::HealthChecks::Probes::Readiness.new(*additional_checks) ::Gitlab::HealthChecks::Probes::Collection.new(*readiness_checks)
end end
def liveness_probe def liveness_probe
::Gitlab::HealthChecks::Probes::Liveness.new ::Gitlab::HealthChecks::Probes::Collection.new
end end
def render_probe(probe, req, res) def render_probe(probe, req, res)
......
...@@ -20,7 +20,7 @@ module Gitlab ...@@ -20,7 +20,7 @@ module Gitlab
def initialize def initialize
super super
self.additional_checks = [ self.readiness_checks = [
WebExporter::ExporterCheck.new(self), WebExporter::ExporterCheck.new(self),
Gitlab::HealthChecks::PumaCheck, Gitlab::HealthChecks::PumaCheck,
Gitlab::HealthChecks::UnicornCheck Gitlab::HealthChecks::UnicornCheck
......
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::HealthChecks::Probes::Collection do
let(:readiness) { described_class.new(*checks) }
describe '#call' do
subject { readiness.execute }
context 'with all checks' do
let(:checks) do
[
Gitlab::HealthChecks::DbCheck,
Gitlab::HealthChecks::Redis::RedisCheck,
Gitlab::HealthChecks::Redis::CacheCheck,
Gitlab::HealthChecks::Redis::QueuesCheck,
Gitlab::HealthChecks::Redis::SharedStateCheck,
Gitlab::HealthChecks::GitalyCheck
]
end
it 'responds with readiness checks data' do
expect(subject.http_status).to eq(200)
expect(subject.json[:status]).to eq('ok')
expect(subject.json['db_check']).to contain_exactly(status: 'ok')
expect(subject.json['cache_check']).to contain_exactly(status: 'ok')
expect(subject.json['queues_check']).to contain_exactly(status: 'ok')
expect(subject.json['shared_state_check']).to contain_exactly(status: 'ok')
expect(subject.json['gitaly_check']).to contain_exactly(
status: 'ok', labels: { shard: 'default' })
end
context 'when Redis fails' do
before do
allow(Gitlab::HealthChecks::Redis::RedisCheck).to receive(:readiness).and_return(
Gitlab::HealthChecks::Result.new('redis_check', false, "check error"))
end
it 'responds with failure' do
expect(subject.http_status).to eq(503)
expect(subject.json[:status]).to eq('failed')
expect(subject.json['cache_check']).to contain_exactly(status: 'ok')
expect(subject.json['redis_check']).to contain_exactly(
status: 'failed', message: 'check error')
end
end
end
context 'without checks' do
let(:checks) { [] }
it 'responds with success' do
expect(subject.http_status).to eq(200)
expect(subject.json).to eq(status: 'ok')
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::HealthChecks::Probes::Liveness do
let(:liveness) { described_class.new }
describe '#call' do
subject { liveness.execute }
it 'responds with liveness checks data' do
expect(subject.http_status).to eq(200)
expect(subject.json[:status]).to eq('ok')
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::HealthChecks::Probes::Readiness do
let(:readiness) { described_class.new }
describe '#call' do
subject { readiness.execute }
it 'responds with readiness checks data' do
expect(subject.http_status).to eq(200)
expect(subject.json[:status]).to eq('ok')
expect(subject.json['db_check']).to contain_exactly(status: 'ok')
expect(subject.json['cache_check']).to contain_exactly(status: 'ok')
expect(subject.json['queues_check']).to contain_exactly(status: 'ok')
expect(subject.json['shared_state_check']).to contain_exactly(status: 'ok')
expect(subject.json['gitaly_check']).to contain_exactly(
status: 'ok', labels: { shard: 'default' })
end
context 'when Redis fails' do
before do
allow(Gitlab::HealthChecks::Redis::RedisCheck).to receive(:readiness).and_return(
Gitlab::HealthChecks::Result.new('redis_check', false, "check error"))
end
it 'responds with failure' do
expect(subject.http_status).to eq(503)
expect(subject.json[:status]).to eq('failed')
expect(subject.json['cache_check']).to contain_exactly(status: 'ok')
expect(subject.json['redis_check']).to contain_exactly(
status: 'failed', message: 'check error')
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment