Commit a261590f authored by Kamil Trzciński's avatar Kamil Trzciński Committed by Grzegorz Bizon

Improve liveness and readiness probes

This adds liveness and readiness probes
on sidekiq|web_exporters.

This also improves readiness probe
to return a success if at least one
probe from the group (like Gitaly)
returns success.
parent 2590ff59
...@@ -5,23 +5,21 @@ class HealthController < ActionController::Base ...@@ -5,23 +5,21 @@ class HealthController < ActionController::Base
include RequiresWhitelistedMonitoringClient include RequiresWhitelistedMonitoringClient
def readiness def readiness
results = checks.flat_map(&:readiness) render_probe(::Gitlab::HealthChecks::Probes::Readiness)
success = results.all?(&:success)
# disable static error pages at the gitlab-workhorse level, we want to see this error response even in production
headers["X-GitLab-Custom-Error"] = 1 unless success
response = results.map { |result| [result.name, result.payload] }.to_h
render json: response, status: success ? :ok : :service_unavailable
end end
def liveness def liveness
render json: { status: 'ok' }, status: :ok render_probe(::Gitlab::HealthChecks::Probes::Liveness)
end end
private private
def checks def render_probe(probe_class)
::Gitlab::HealthChecks::CHECKS result = probe_class.new.execute
# disable static error pages at the gitlab-workhorse level, we want to see this error response even in production
headers["X-GitLab-Custom-Error"] = 1 unless result.success?
render json: result.json, status: result.http_status
end end
end end
---
title: Export liveness and readiness probes
merge_request:
author:
type: changed
# frozen_string_literal: true
module Gitlab
module HealthChecks
module Probes
class Liveness
def execute
Probes::Status.new(200, status: 'ok')
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module HealthChecks
module Probes
class Readiness
attr_reader :checks
# This accepts an array of Proc
# that returns `::Gitlab::HealthChecks::Result`
def initialize(*additional_checks)
@checks = ::Gitlab::HealthChecks::CHECKS.map { |check| check.method(:readiness) }
@checks += additional_checks
end
def execute
readiness = probe_readiness
success = all_succeeded?(readiness)
Probes::Status.new(
success ? 200 : 503,
status(success).merge(payload(readiness))
)
end
private
def all_succeeded?(readiness)
readiness.all? do |name, probes|
probes.any?(&:success)
end
end
def status(success)
{ status: success ? 'ok' : 'failed' }
end
def payload(readiness)
readiness.transform_values do |probes|
probes.map(&:payload)
end
end
def probe_readiness
checks
.flat_map(&:call)
.compact
.group_by(&:name)
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module HealthChecks
module Probes
Status = Struct.new(:http_status, :json) do
# We accept 2xx
def success?
http_status / 100 == 2
end
end
end
end
end
...@@ -31,7 +31,15 @@ module Gitlab ...@@ -31,7 +31,15 @@ module Gitlab
@server = ::WEBrick::HTTPServer.new( @server = ::WEBrick::HTTPServer.new(
Port: settings.port, BindAddress: settings.address, Port: settings.port, BindAddress: settings.address,
Logger: logger, AccessLog: access_log) Logger: logger, AccessLog: access_log)
server.mount "/", Rack::Handler::WEBrick, rack_app server.mount_proc '/readiness' do |req, res|
render_probe(
::Gitlab::HealthChecks::Probes::Readiness.new, req, res)
end
server.mount_proc '/liveness' do |req, res|
render_probe(
::Gitlab::HealthChecks::Probes::Liveness.new, req, res)
end
server.mount '/', Rack::Handler::WEBrick, rack_app
server.start server.start
end end
...@@ -51,6 +59,14 @@ module Gitlab ...@@ -51,6 +59,14 @@ module Gitlab
run -> (env) { [404, {}, ['']] } run -> (env) { [404, {}, ['']] }
end end
end end
def render_probe(probe, req, res)
result = probe.execute
res.status = result.http_status
res.content_type = 'application/json; charset=utf-8'
res.body = result.json.to_json
end
end end
end end
end end
......
...@@ -24,11 +24,12 @@ describe HealthController do ...@@ -24,11 +24,12 @@ describe HealthController do
it 'responds with readiness checks data' do it 'responds with readiness checks data' do
subject subject
expect(json_response['db_check']['status']).to eq('ok') expect(json_response['db_check']).to contain_exactly({ 'status' => 'ok' })
expect(json_response['cache_check']['status']).to eq('ok') expect(json_response['cache_check']).to contain_exactly({ 'status' => 'ok' })
expect(json_response['queues_check']['status']).to eq('ok') expect(json_response['queues_check']).to contain_exactly({ 'status' => 'ok' })
expect(json_response['shared_state_check']['status']).to eq('ok') expect(json_response['shared_state_check']).to contain_exactly({ 'status' => 'ok' })
expect(json_response['gitaly_check']['status']).to eq('ok') expect(json_response['gitaly_check']).to contain_exactly(
{ 'status' => 'ok', 'labels' => { 'shard' => 'default' } })
end end
it 'responds with readiness checks data when a failure happens' do it 'responds with readiness checks data when a failure happens' do
...@@ -37,9 +38,9 @@ describe HealthController do ...@@ -37,9 +38,9 @@ describe HealthController do
subject subject
expect(json_response['redis_check']['status']).to eq('failed') expect(json_response['cache_check']).to contain_exactly({ 'status' => 'ok' })
expect(json_response['redis_check']['message']).to eq('check error') expect(json_response['redis_check']).to contain_exactly(
expect(json_response['cache_check']['status']).to eq('ok') { 'status' => 'failed', 'message' => 'check error' })
expect(response.status).to eq(503) expect(response.status).to eq(503)
expect(response.headers['X-GitLab-Custom-Error']).to eq(1) expect(response.headers['X-GitLab-Custom-Error']).to eq(1)
...@@ -90,7 +91,7 @@ describe HealthController do ...@@ -90,7 +91,7 @@ describe HealthController do
it 'responds with liveness checks data' do it 'responds with liveness checks data' do
subject subject
expect(json_response['status']).to eq('ok') expect(json_response).to eq('status' => 'ok')
end end
end end
......
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::HealthChecks::Probes::Liveness do
let(:liveness) { described_class.new }
describe '#call' do
subject { liveness.execute }
it 'responds with liveness checks data' do
expect(subject.http_status).to eq(200)
expect(subject.json[:status]).to eq('ok')
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::HealthChecks::Probes::Readiness do
let(:readiness) { described_class.new }
describe '#call' do
subject { readiness.execute }
it 'responds with readiness checks data' do
expect(subject.http_status).to eq(200)
expect(subject.json[:status]).to eq('ok')
expect(subject.json['db_check']).to contain_exactly(status: 'ok')
expect(subject.json['cache_check']).to contain_exactly(status: 'ok')
expect(subject.json['queues_check']).to contain_exactly(status: 'ok')
expect(subject.json['shared_state_check']).to contain_exactly(status: 'ok')
expect(subject.json['gitaly_check']).to contain_exactly(
status: 'ok', labels: { shard: 'default' })
end
context 'when Redis fails' do
before do
allow(Gitlab::HealthChecks::Redis::RedisCheck).to receive(:readiness).and_return(
Gitlab::HealthChecks::Result.new('redis_check', false, "check error"))
end
it 'responds with failure' do
expect(subject.http_status).to eq(503)
expect(subject.json[:status]).to eq('failed')
expect(subject.json['cache_check']).to contain_exactly(status: 'ok')
expect(subject.json['redis_check']).to contain_exactly(
status: 'failed', message: 'check error')
end
end
end
end
...@@ -4,35 +4,42 @@ require 'spec_helper' ...@@ -4,35 +4,42 @@ require 'spec_helper'
describe Gitlab::Metrics::Exporter::BaseExporter do describe Gitlab::Metrics::Exporter::BaseExporter do
let(:exporter) { described_class.new } let(:exporter) { described_class.new }
let(:server) { double('server') }
let(:socket) { double('socket') }
let(:log_filename) { File.join(Rails.root, 'log', 'sidekiq_exporter.log') } let(:log_filename) { File.join(Rails.root, 'log', 'sidekiq_exporter.log') }
let(:settings) { double('settings') } let(:settings) { double('settings') }
before do before do
allow(::WEBrick::HTTPServer).to receive(:new).and_return(server)
allow(server).to receive(:mount)
allow(server).to receive(:start)
allow(server).to receive(:shutdown)
allow(server).to receive(:listeners) { [socket] }
allow(socket).to receive(:close)
allow_any_instance_of(described_class).to receive(:log_filename).and_return(log_filename) allow_any_instance_of(described_class).to receive(:log_filename).and_return(log_filename)
allow_any_instance_of(described_class).to receive(:settings).and_return(settings) allow_any_instance_of(described_class).to receive(:settings).and_return(settings)
end end
describe 'when exporter is enabled' do describe 'when exporter is enabled' do
before do before do
allow(::WEBrick::HTTPServer).to receive(:new).with(
Port: anything,
BindAddress: anything,
Logger: anything,
AccessLog: anything
).and_wrap_original do |m, *args|
m.call(DoNotListen: true, Logger: args.first[:Logger])
end
allow_any_instance_of(::WEBrick::HTTPServer).to receive(:start)
allow(settings).to receive(:enabled).and_return(true) allow(settings).to receive(:enabled).and_return(true)
allow(settings).to receive(:port).and_return(3707) allow(settings).to receive(:port).and_return(8082)
allow(settings).to receive(:address).and_return('localhost') allow(settings).to receive(:address).and_return('localhost')
end end
after do
exporter.stop
end
describe 'when exporter is stopped' do describe 'when exporter is stopped' do
describe '#start' do describe '#start' do
it 'starts the exporter' do it 'starts the exporter' do
expect { exporter.start.join }.to change { exporter.thread? }.from(false).to(true) expect_any_instance_of(::WEBrick::HTTPServer).to receive(:start)
expect(server).to have_received(:start) expect { exporter.start.join }.to change { exporter.thread? }.from(false).to(true)
end end
describe 'with custom settings' do describe 'with custom settings' do
...@@ -45,23 +52,25 @@ describe Gitlab::Metrics::Exporter::BaseExporter do ...@@ -45,23 +52,25 @@ describe Gitlab::Metrics::Exporter::BaseExporter do
end end
it 'starts server with port and address from settings' do it 'starts server with port and address from settings' do
exporter.start.join expect(::WEBrick::HTTPServer).to receive(:new).with(
expect(::WEBrick::HTTPServer).to have_received(:new).with(
Port: port, Port: port,
BindAddress: address, BindAddress: address,
Logger: anything, Logger: anything,
AccessLog: anything AccessLog: anything
) ).and_wrap_original do |m, *args|
m.call(DoNotListen: true, Logger: args.first[:Logger])
end
exporter.start.join
end end
end end
end end
describe '#stop' do describe '#stop' do
it "doesn't shutdown stopped server" do it "doesn't shutdown stopped server" do
expect { exporter.stop }.not_to change { exporter.thread? } expect_any_instance_of(::WEBrick::HTTPServer).not_to receive(:shutdown)
expect(server).not_to have_received(:shutdown) expect { exporter.stop }.not_to change { exporter.thread? }
end end
end end
end end
...@@ -73,19 +82,65 @@ describe Gitlab::Metrics::Exporter::BaseExporter do ...@@ -73,19 +82,65 @@ describe Gitlab::Metrics::Exporter::BaseExporter do
describe '#start' do describe '#start' do
it "doesn't start running server" do it "doesn't start running server" do
expect { exporter.start.join }.not_to change { exporter.thread? } expect_any_instance_of(::WEBrick::HTTPServer).not_to receive(:start)
expect(server).to have_received(:start).once expect { exporter.start.join }.not_to change { exporter.thread? }
end end
end end
describe '#stop' do describe '#stop' do
it 'shutdowns server' do it 'shutdowns server' do
expect_any_instance_of(::WEBrick::HTTPServer).to receive(:shutdown)
expect { exporter.stop }.to change { exporter.thread? }.from(true).to(false) expect { exporter.stop }.to change { exporter.thread? }.from(true).to(false)
end
end
end
end
describe 'request handling' do
using RSpec::Parameterized::TableSyntax
expect(socket).to have_received(:close) where(:method_class, :path, :http_status) do
expect(server).to have_received(:shutdown) Net::HTTP::Get | '/metrics' | 200
Net::HTTP::Get | '/liveness' | 200
Net::HTTP::Get | '/readiness' | 200
Net::HTTP::Get | '/' | 404
end end
before do
allow(settings).to receive(:enabled).and_return(true)
allow(settings).to receive(:port).and_return(0)
allow(settings).to receive(:address).and_return('127.0.0.1')
# We want to wrap original method
# and run handling of requests
# in separate thread
allow_any_instance_of(::WEBrick::HTTPServer)
.to receive(:start).and_wrap_original do |m, *args|
Thread.new do
m.call(*args)
rescue IOError
# is raised as we close listeners
end
end
exporter.start.join
end
after do
exporter.stop
end
with_them do
let(:config) { exporter.server.config }
let(:request) { method_class.new(path) }
it 'responds with proper http_status' do
http = Net::HTTP.new(config[:BindAddress], config[:Port])
response = http.request(request)
expect(response.code).to eq(http_status.to_s)
end end
end end
end end
...@@ -97,18 +152,18 @@ describe Gitlab::Metrics::Exporter::BaseExporter do ...@@ -97,18 +152,18 @@ describe Gitlab::Metrics::Exporter::BaseExporter do
describe '#start' do describe '#start' do
it "doesn't start" do it "doesn't start" do
expect_any_instance_of(::WEBrick::HTTPServer).not_to receive(:start)
expect(exporter.start).to be_nil expect(exporter.start).to be_nil
expect { exporter.start }.not_to change { exporter.thread? } expect { exporter.start }.not_to change { exporter.thread? }
expect(server).not_to have_received(:start)
end end
end end
describe '#stop' do describe '#stop' do
it "doesn't shutdown" do it "doesn't shutdown" do
expect { exporter.stop }.not_to change { exporter.thread? } expect_any_instance_of(::WEBrick::HTTPServer).not_to receive(:shutdown)
expect(server).not_to have_received(:shutdown) expect { exporter.stop }.not_to change { exporter.thread? }
end end
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment