Commit 03cba0fe authored by Kamil Trzciński's avatar Kamil Trzciński

Add unicorn/puma health checks

These checks do validate amount of workers
available to process requests.

It allows the readiness to indicate a time when
the requests can be safelly send to the web server.
parent 86bb8213
...@@ -5,7 +5,7 @@ module Gitlab ...@@ -5,7 +5,7 @@ module Gitlab
class GitalyCheck class GitalyCheck
extend BaseAbstractCheck extend BaseAbstractCheck
METRIC_PREFIX = 'gitaly_health_check' METRIC_PREFIX = 'gitaly_health_check'.freeze
class << self class << self
def readiness def readiness
......
...@@ -6,10 +6,10 @@ module Gitlab ...@@ -6,10 +6,10 @@ module Gitlab
class Readiness class Readiness
attr_reader :checks attr_reader :checks
# This accepts an array of Proc # This accepts an array of objects implementing `:readiness`
# that returns `::Gitlab::HealthChecks::Result` # that returns `::Gitlab::HealthChecks::Result`
def initialize(*additional_checks) def initialize(*additional_checks)
@checks = ::Gitlab::HealthChecks::CHECKS.map { |check| check.method(:readiness) } @checks = ::Gitlab::HealthChecks::CHECKS
@checks += additional_checks @checks += additional_checks
end end
...@@ -43,7 +43,7 @@ module Gitlab ...@@ -43,7 +43,7 @@ module Gitlab
def probe_readiness def probe_readiness
checks checks
.flat_map(&:call) .flat_map(&:readiness)
.compact .compact
.group_by(&:name) .group_by(&:name)
end end
......
# frozen_string_literal: true
module Gitlab
module HealthChecks
# This check can only be run on Puma `master` process
class PumaCheck
extend SimpleAbstractCheck
class << self
private
def metric_prefix
'puma_check'
end
def successful?(result)
result > 0
end
def check
return unless defined?(::Puma)
stats = Puma.stats
stats = JSON.parse(stats)
# If `workers` is missing this means that
# Puma server is running in single mode
stats.fetch('workers', 1)
rescue NoMethodError
# server is not ready
0
end
end
end
end
end
...@@ -7,6 +7,8 @@ module Gitlab ...@@ -7,6 +7,8 @@ module Gitlab
def readiness def readiness
check_result = check check_result = check
return if check_result.nil?
if successful?(check_result) if successful?(check_result)
HealthChecks::Result.new(name, true) HealthChecks::Result.new(name, true)
elsif check_result.is_a?(Timeout::Error) elsif check_result.is_a?(Timeout::Error)
...@@ -20,6 +22,8 @@ module Gitlab ...@@ -20,6 +22,8 @@ module Gitlab
def metrics def metrics
result, elapsed = with_timing(&method(:check)) result, elapsed = with_timing(&method(:check))
return if result.nil?
Rails.logger.error("#{human_name} check returned unexpected result #{result}") unless successful?(result) # rubocop:disable Gitlab/RailsLogger Rails.logger.error("#{human_name} check returned unexpected result #{result}") unless successful?(result) # rubocop:disable Gitlab/RailsLogger
[ [
metric("#{metric_prefix}_timeout", result.is_a?(Timeout::Error) ? 1 : 0), metric("#{metric_prefix}_timeout", result.is_a?(Timeout::Error) ? 1 : 0),
......
# frozen_string_literal: true
module Gitlab
module HealthChecks
# This check can only be run on Unicorn `master` process
class UnicornCheck
extend SimpleAbstractCheck
class << self
include Gitlab::Utils::StrongMemoize
private
def metric_prefix
'unicorn_check'
end
def successful?(result)
result > 0
end
def check
return unless http_servers
http_servers.sum(&:worker_processes) # rubocop: disable CodeReuse/ActiveRecord
end
# Traversal of ObjectSpace is expensive, on fully loaded application
# it takes around 80ms. The instances of HttpServers are not a subject
# to change so we can cache the list of servers.
def http_servers
strong_memoize(:http_servers) do
next unless defined?(::Unicorn::HttpServer)
ObjectSpace.each_object(::Unicorn::HttpServer).to_a
end
end
end
end
end
end
...@@ -6,6 +6,8 @@ module Gitlab ...@@ -6,6 +6,8 @@ module Gitlab
class BaseExporter < Daemon class BaseExporter < Daemon
attr_reader :server attr_reader :server
attr_accessor :additional_checks
def enabled? def enabled?
settings.enabled settings.enabled
end end
...@@ -32,12 +34,10 @@ module Gitlab ...@@ -32,12 +34,10 @@ module Gitlab
Port: settings.port, BindAddress: settings.address, Port: settings.port, BindAddress: settings.address,
Logger: logger, AccessLog: access_log) Logger: logger, AccessLog: access_log)
server.mount_proc '/readiness' do |req, res| server.mount_proc '/readiness' do |req, res|
render_probe( render_probe(readiness_probe, req, res)
::Gitlab::HealthChecks::Probes::Readiness.new, req, res)
end end
server.mount_proc '/liveness' do |req, res| server.mount_proc '/liveness' do |req, res|
render_probe( render_probe(liveness_probe, req, res)
::Gitlab::HealthChecks::Probes::Liveness.new, req, res)
end end
server.mount '/', Rack::Handler::WEBrick, rack_app server.mount '/', Rack::Handler::WEBrick, rack_app
server.start server.start
...@@ -45,8 +45,10 @@ module Gitlab ...@@ -45,8 +45,10 @@ module Gitlab
def stop_working def stop_working
if server if server
# we close sockets if thread is not longer running
# this happens, when the process forks
server.listeners.each(&:close) unless thread.alive?
server.shutdown server.shutdown
server.listeners.each(&:close)
end end
@server = nil @server = nil
...@@ -60,6 +62,14 @@ module Gitlab ...@@ -60,6 +62,14 @@ module Gitlab
end end
end end
def readiness_probe
::Gitlab::HealthChecks::Probes::Readiness.new(*additional_checks)
end
def liveness_probe
::Gitlab::HealthChecks::Probes::Liveness.new
end
def render_probe(probe, req, res) def render_probe(probe, req, res)
result = probe.execute result = probe.execute
......
...@@ -7,6 +7,16 @@ module Gitlab ...@@ -7,6 +7,16 @@ module Gitlab
module Metrics module Metrics
module Exporter module Exporter
class WebExporter < BaseExporter class WebExporter < BaseExporter
# This exporter is always run on master process
def initialize
super
self.additional_checks = [
Gitlab::HealthChecks::PumaCheck,
Gitlab::HealthChecks::UnicornCheck
]
end
def settings def settings
Settings.monitoring.web_exporter Settings.monitoring.web_exporter
end end
......
require 'spec_helper'
describe Gitlab::HealthChecks::PumaCheck do
let(:result_class) { Gitlab::HealthChecks::Result }
let(:readiness) { described_class.readiness }
let(:metrics) { described_class.metrics }
shared_examples 'with state' do |(state, message)|
it "does provide readiness" do
expect(readiness).to eq(result_class.new('puma_check', state, message))
end
it "does provide metrics" do
expect(metrics).to include(
an_object_having_attributes(name: 'puma_check_success', value: state ? 1 : 0))
expect(metrics).to include(
an_object_having_attributes(name: 'puma_check_latency_seconds', value: be >= 0))
end
end
context 'when Puma is not loaded' do
before do
hide_const('Puma')
end
it "does not provide readiness and metrics" do
expect(readiness).to be_nil
expect(metrics).to be_nil
end
end
context 'when Puma is loaded' do
before do
stub_const('Puma', Module.new)
end
context 'when stats are missing' do
before do
expect(Puma).to receive(:stats).and_raise(NoMethodError)
end
it_behaves_like 'with state', [false, 'unexpected Puma check result: 0']
end
context 'for Single mode' do
before do
expect(Puma).to receive(:stats) do
'{}'
end
end
it_behaves_like 'with state', true
end
context 'for Cluster mode' do
before do
expect(Puma).to receive(:stats) do
'{"workers":2}'
end
end
it_behaves_like 'with state', true
end
end
end
require 'spec_helper'
describe Gitlab::HealthChecks::UnicornCheck do
let(:result_class) { Gitlab::HealthChecks::Result }
let(:readiness) { described_class.readiness }
let(:metrics) { described_class.metrics }
before do
described_class.clear_memoization(:http_servers)
end
shared_examples 'with state' do |(state, message)|
it "does provide readiness" do
expect(readiness).to eq(result_class.new('unicorn_check', state, message))
end
it "does provide metrics" do
expect(metrics).to include(
an_object_having_attributes(name: 'unicorn_check_success', value: state ? 1 : 0))
expect(metrics).to include(
an_object_having_attributes(name: 'unicorn_check_latency_seconds', value: be >= 0))
end
end
context 'when Unicorn is not loaded' do
before do
hide_const('Unicorn')
end
it "does not provide readiness and metrics" do
expect(readiness).to be_nil
expect(metrics).to be_nil
end
end
context 'when Unicorn is loaded' do
let(:http_server_class) { Struct.new(:worker_processes) }
before do
stub_const('Unicorn::HttpServer', http_server_class)
end
context 'when no servers are running' do
it_behaves_like 'with state', [false, 'unexpected Unicorn check result: 0']
end
context 'when servers without workers are running' do
before do
http_server_class.new(0)
end
it_behaves_like 'with state', [false, 'unexpected Unicorn check result: 0']
end
context 'when servers with workers are running' do
before do
http_server_class.new(1)
end
it_behaves_like 'with state', true
end
end
end
...@@ -64,6 +64,18 @@ describe Gitlab::Metrics::Exporter::BaseExporter do ...@@ -64,6 +64,18 @@ describe Gitlab::Metrics::Exporter::BaseExporter do
exporter.start.join exporter.start.join
end end
end end
describe 'when thread is not alive' do
it 'does close listeners' do
expect_any_instance_of(::WEBrick::HTTPServer).to receive(:start)
expect_any_instance_of(::WEBrick::HTTPServer).to receive(:listeners)
.and_call_original
expect { exporter.start.join }.to change { exporter.thread? }.from(false).to(true)
exporter.stop
end
end
end end
describe '#stop' do describe '#stop' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment