Commit 256a3072 authored by Kamil Trzciński's avatar Kamil Trzciński

Improve service level health-checks

This improves the behavior of health-checks
introducing two levels of them:

- service level: checks served by `sidekiq|web_exporter:'
  served on separate port

- application level: checks served by `/-/metrics` endpoint
  of `HealthController`

The changes makes the:

- `/liveness` always indicate that application is ready
  and not deadlocked
- `/readiness` to indicate:

The changes for `/readiness`:

- on service level: the status of the service if it can accept
  and process connections

- on application level: the status of external components if they
  are accessible

The `on service level` and `on application level` do not share
any checks, they have different checks exposed.
parent c7c4ac07
......@@ -4,18 +4,31 @@ class HealthController < ActionController::Base
protect_from_forgery with: :exception, prepend: true
include RequiresWhitelistedMonitoringClient
CHECKS = [
Gitlab::HealthChecks::DbCheck,
Gitlab::HealthChecks::Redis::RedisCheck,
Gitlab::HealthChecks::Redis::CacheCheck,
Gitlab::HealthChecks::Redis::QueuesCheck,
Gitlab::HealthChecks::Redis::SharedStateCheck,
Gitlab::HealthChecks::GitalyCheck
].freeze
def readiness
render_probe(::Gitlab::HealthChecks::Probes::Readiness)
# readiness check is a collection with all above application-level checks
render_checks(*CHECKS)
end
def liveness
render_probe(::Gitlab::HealthChecks::Probes::Liveness)
# liveness check is a collection without additional checks
render_checks
end
private
def render_probe(probe_class)
result = probe_class.new.execute
def render_checks(*checks)
result = Gitlab::HealthChecks::Probes::Collection
.new(*checks)
.execute
# disable static error pages at the gitlab-workhorse level, we want to see this error response even in production
headers["X-GitLab-Custom-Error"] = 1 unless result.success?
......
# frozen_string_literal: true
module Gitlab
module HealthChecks
CHECKS = [
Gitlab::HealthChecks::DbCheck,
Gitlab::HealthChecks::Redis::RedisCheck,
Gitlab::HealthChecks::Redis::CacheCheck,
Gitlab::HealthChecks::Redis::QueuesCheck,
Gitlab::HealthChecks::Redis::SharedStateCheck,
Gitlab::HealthChecks::GitalyCheck
].freeze
end
end
......@@ -3,14 +3,13 @@
module Gitlab
module HealthChecks
module Probes
class Readiness
class Collection
attr_reader :checks
# This accepts an array of objects implementing `:readiness`
# that returns `::Gitlab::HealthChecks::Result`
def initialize(*additional_checks)
@checks = ::Gitlab::HealthChecks::CHECKS
@checks += additional_checks
def initialize(*checks)
@checks = checks
end
def execute
......
# frozen_string_literal: true
module Gitlab
module HealthChecks
module Probes
class Liveness
def execute
Probes::Status.new(200, status: 'ok')
end
end
end
end
end
......@@ -6,7 +6,7 @@ module Gitlab
class BaseExporter < Daemon
attr_reader :server
attr_accessor :additional_checks
attr_accessor :readiness_checks
def enabled?
settings.enabled
......@@ -73,11 +73,11 @@ module Gitlab
end
def readiness_probe
::Gitlab::HealthChecks::Probes::Readiness.new(*additional_checks)
::Gitlab::HealthChecks::Probes::Collection.new(*readiness_checks)
end
def liveness_probe
::Gitlab::HealthChecks::Probes::Liveness.new
::Gitlab::HealthChecks::Probes::Collection.new
end
def render_probe(probe, req, res)
......
......@@ -20,7 +20,7 @@ module Gitlab
def initialize
super
self.additional_checks = [
self.readiness_checks = [
WebExporter::ExporterCheck.new(self),
Gitlab::HealthChecks::PumaCheck,
Gitlab::HealthChecks::UnicornCheck
......
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::HealthChecks::Probes::Collection do
let(:readiness) { described_class.new(*checks) }
describe '#call' do
subject { readiness.execute }
context 'with all checks' do
let(:checks) do
[
Gitlab::HealthChecks::DbCheck,
Gitlab::HealthChecks::Redis::RedisCheck,
Gitlab::HealthChecks::Redis::CacheCheck,
Gitlab::HealthChecks::Redis::QueuesCheck,
Gitlab::HealthChecks::Redis::SharedStateCheck,
Gitlab::HealthChecks::GitalyCheck
]
end
it 'responds with readiness checks data' do
expect(subject.http_status).to eq(200)
expect(subject.json[:status]).to eq('ok')
expect(subject.json['db_check']).to contain_exactly(status: 'ok')
expect(subject.json['cache_check']).to contain_exactly(status: 'ok')
expect(subject.json['queues_check']).to contain_exactly(status: 'ok')
expect(subject.json['shared_state_check']).to contain_exactly(status: 'ok')
expect(subject.json['gitaly_check']).to contain_exactly(
status: 'ok', labels: { shard: 'default' })
end
context 'when Redis fails' do
before do
allow(Gitlab::HealthChecks::Redis::RedisCheck).to receive(:readiness).and_return(
Gitlab::HealthChecks::Result.new('redis_check', false, "check error"))
end
it 'responds with failure' do
expect(subject.http_status).to eq(503)
expect(subject.json[:status]).to eq('failed')
expect(subject.json['cache_check']).to contain_exactly(status: 'ok')
expect(subject.json['redis_check']).to contain_exactly(
status: 'failed', message: 'check error')
end
end
end
context 'without checks' do
let(:checks) { [] }
it 'responds with success' do
expect(subject.http_status).to eq(200)
expect(subject.json).to eq(status: 'ok')
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::HealthChecks::Probes::Liveness do
let(:liveness) { described_class.new }
describe '#call' do
subject { liveness.execute }
it 'responds with liveness checks data' do
expect(subject.http_status).to eq(200)
expect(subject.json[:status]).to eq('ok')
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::HealthChecks::Probes::Readiness do
let(:readiness) { described_class.new }
describe '#call' do
subject { readiness.execute }
it 'responds with readiness checks data' do
expect(subject.http_status).to eq(200)
expect(subject.json[:status]).to eq('ok')
expect(subject.json['db_check']).to contain_exactly(status: 'ok')
expect(subject.json['cache_check']).to contain_exactly(status: 'ok')
expect(subject.json['queues_check']).to contain_exactly(status: 'ok')
expect(subject.json['shared_state_check']).to contain_exactly(status: 'ok')
expect(subject.json['gitaly_check']).to contain_exactly(
status: 'ok', labels: { shard: 'default' })
end
context 'when Redis fails' do
before do
allow(Gitlab::HealthChecks::Redis::RedisCheck).to receive(:readiness).and_return(
Gitlab::HealthChecks::Result.new('redis_check', false, "check error"))
end
it 'responds with failure' do
expect(subject.http_status).to eq(503)
expect(subject.json[:status]).to eq('failed')
expect(subject.json['cache_check']).to contain_exactly(status: 'ok')
expect(subject.json['redis_check']).to contain_exactly(
status: 'failed', message: 'check error')
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment