Commit 63da77f8 authored by Kamil Trzciński's avatar Kamil Trzciński Committed by Sean McGivern

Improve `/-/health` to look at state of master

1. This moves blackout period to `shutdown: blackout_seconds:`
1. This moves a blackout period logic to `LifecycleEvents`
1. This makes `web_exporter` to use lifecycle events
   for blackout period
1. This adds additional health check to look at state
   of master process
1. Exempt `/liveness` and `/readiness` from Rack Attack
1. Make `/liveness` and `/readiness` to not hit DB/Redis
1. Optimise Rack Attack handling
parent 1dd0e8c2
...@@ -5,6 +5,11 @@ class HealthController < ActionController::Base ...@@ -5,6 +5,11 @@ class HealthController < ActionController::Base
include RequiresWhitelistedMonitoringClient include RequiresWhitelistedMonitoringClient
CHECKS = [ CHECKS = [
Gitlab::HealthChecks::MasterCheck
].freeze
ALL_CHECKS = [
*CHECKS,
Gitlab::HealthChecks::DbCheck, Gitlab::HealthChecks::DbCheck,
Gitlab::HealthChecks::Redis::RedisCheck, Gitlab::HealthChecks::Redis::RedisCheck,
Gitlab::HealthChecks::Redis::CacheCheck, Gitlab::HealthChecks::Redis::CacheCheck,
...@@ -14,8 +19,9 @@ class HealthController < ActionController::Base ...@@ -14,8 +19,9 @@ class HealthController < ActionController::Base
].freeze ].freeze
def readiness def readiness
# readiness check is a collection with all above application-level checks # readiness check is a collection of application-level checks
render_checks(*CHECKS) # and optionally all service checks
render_checks(params[:all] ? ALL_CHECKS : CHECKS)
end end
def liveness def liveness
...@@ -25,7 +31,7 @@ class HealthController < ActionController::Base ...@@ -25,7 +31,7 @@ class HealthController < ActionController::Base
private private
def render_checks(*checks) def render_checks(checks = [])
result = Gitlab::HealthChecks::Probes::Collection result = Gitlab::HealthChecks::Probes::Collection
.new(*checks) .new(*checks)
.execute .execute
......
...@@ -1032,12 +1032,6 @@ production: &base ...@@ -1032,12 +1032,6 @@ production: &base
# enabled: true # enabled: true
# address: localhost # address: localhost
# port: 8083 # port: 8083
# # blackout_seconds:
# # defines an interval to block healthcheck,
# # but continue accepting application requests
# # this allows Load Balancer to notice service
# # being shutdown and not interrupt any of the clients
# blackout_seconds: 10
## Prometheus settings ## Prometheus settings
# Do not modify these settings here. They should be modified in /etc/gitlab/gitlab.rb # Do not modify these settings here. They should be modified in /etc/gitlab/gitlab.rb
...@@ -1049,6 +1043,14 @@ production: &base ...@@ -1049,6 +1043,14 @@ production: &base
# enable: true # enable: true
# listen_address: 'localhost:9090' # listen_address: 'localhost:9090'
shutdown:
# # blackout_seconds:
# # defines an interval to block healthcheck,
# # but continue accepting application requests
# # this allows Load Balancer to notice service
# # being shutdown and not interrupt any of the clients
# blackout_seconds: 10
# #
# 5. Extra customization # 5. Extra customization
# ========================== # ==========================
......
...@@ -676,7 +676,12 @@ Settings.monitoring['web_exporter'] ||= Settingslogic.new({}) ...@@ -676,7 +676,12 @@ Settings.monitoring['web_exporter'] ||= Settingslogic.new({})
Settings.monitoring.web_exporter['enabled'] ||= false Settings.monitoring.web_exporter['enabled'] ||= false
Settings.monitoring.web_exporter['address'] ||= 'localhost' Settings.monitoring.web_exporter['address'] ||= 'localhost'
Settings.monitoring.web_exporter['port'] ||= 8083 Settings.monitoring.web_exporter['port'] ||= 8083
Settings.monitoring.web_exporter['blackout_seconds'] ||= 10
#
# Shutdown settings
#
Settings['shutdown'] ||= Settingslogic.new({})
Settings.shutdown['blackout_seconds'] ||= 10
# #
# Testing settings # Testing settings
......
...@@ -70,6 +70,13 @@ if defined?(::Unicorn) || defined?(::Puma) ...@@ -70,6 +70,13 @@ if defined?(::Unicorn) || defined?(::Puma)
Gitlab::Metrics::Exporter::WebExporter.instance.start Gitlab::Metrics::Exporter::WebExporter.instance.start
end end
# DEPRECATED: TO BE REMOVED
# This is needed to implement blackout period of `web_exporter`
# https://gitlab.com/gitlab-org/gitlab/issues/35343#note_238479057
Gitlab::Cluster::LifecycleEvents.on_before_blackout_period do
Gitlab::Metrics::Exporter::WebExporter.instance.mark_as_not_running!
end
Gitlab::Cluster::LifecycleEvents.on_before_graceful_shutdown do Gitlab::Cluster::LifecycleEvents.on_before_graceful_shutdown do
# We need to ensure that before we re-exec or shutdown server # We need to ensure that before we re-exec or shutdown server
# we do stop the exporter # we do stop the exporter
......
...@@ -8,3 +8,15 @@ HealthCheck.setup do |config| ...@@ -8,3 +8,15 @@ HealthCheck.setup do |config|
end end
end end
end end
Gitlab::Cluster::LifecycleEvents.on_before_fork do
Gitlab::HealthChecks::MasterCheck.register_master
end
Gitlab::Cluster::LifecycleEvents.on_before_blackout_period do
Gitlab::HealthChecks::MasterCheck.finish_master
end
Gitlab::Cluster::LifecycleEvents.on_worker_start do
Gitlab::HealthChecks::MasterCheck.register_worker
end
...@@ -39,45 +39,62 @@ module Gitlab::Throttle ...@@ -39,45 +39,62 @@ module Gitlab::Throttle
end end
class Rack::Attack class Rack::Attack
# Order conditions by how expensive they are:
# 1. The most expensive is the `req.unauthenticated?` and
# `req.authenticated_user_id` as it performs an expensive
# DB/Redis query to validate the request
# 2. Slightly less expensive is the need to query DB/Redis
# to unmarshal settings (`Gitlab::Throttle.settings`)
#
# We deliberately skip `/-/health|liveness|readiness`
# from Rack Attack as they need to always be accessible
# by Load Balancer and additional measure is implemented
# (token and whitelisting) to prevent abuse.
throttle('throttle_unauthenticated', Gitlab::Throttle.unauthenticated_options) do |req| throttle('throttle_unauthenticated', Gitlab::Throttle.unauthenticated_options) do |req|
Gitlab::Throttle.settings.throttle_unauthenticated_enabled && if !req.should_be_skipped? &&
req.unauthenticated? && Gitlab::Throttle.settings.throttle_unauthenticated_enabled &&
!req.should_be_skipped? && req.unauthenticated?
req.ip req.ip
end
end end
throttle('throttle_authenticated_api', Gitlab::Throttle.authenticated_api_options) do |req| throttle('throttle_authenticated_api', Gitlab::Throttle.authenticated_api_options) do |req|
Gitlab::Throttle.settings.throttle_authenticated_api_enabled && if req.api_request? &&
req.api_request? && Gitlab::Throttle.settings.throttle_authenticated_api_enabled
req.authenticated_user_id([:api]) req.authenticated_user_id([:api])
end
end end
throttle('throttle_authenticated_web', Gitlab::Throttle.authenticated_web_options) do |req| throttle('throttle_authenticated_web', Gitlab::Throttle.authenticated_web_options) do |req|
Gitlab::Throttle.settings.throttle_authenticated_web_enabled && if req.web_request? &&
req.web_request? && Gitlab::Throttle.settings.throttle_authenticated_web_enabled
req.authenticated_user_id([:api, :rss, :ics]) req.authenticated_user_id([:api, :rss, :ics])
end
end end
throttle('throttle_unauthenticated_protected_paths', Gitlab::Throttle.protected_paths_options) do |req| throttle('throttle_unauthenticated_protected_paths', Gitlab::Throttle.protected_paths_options) do |req|
Gitlab::Throttle.protected_paths_enabled? && if !req.should_be_skipped? &&
req.unauthenticated? && req.protected_path? &&
!req.should_be_skipped? && Gitlab::Throttle.protected_paths_enabled? &&
req.protected_path? && req.unauthenticated?
req.ip req.ip
end
end end
throttle('throttle_authenticated_protected_paths_api', Gitlab::Throttle.protected_paths_options) do |req| throttle('throttle_authenticated_protected_paths_api', Gitlab::Throttle.protected_paths_options) do |req|
Gitlab::Throttle.protected_paths_enabled? && if req.api_request? &&
req.api_request? && Gitlab::Throttle.protected_paths_enabled? &&
req.protected_path? && req.protected_path?
req.authenticated_user_id([:api]) req.authenticated_user_id([:api])
end
end end
throttle('throttle_authenticated_protected_paths_web', Gitlab::Throttle.protected_paths_options) do |req| throttle('throttle_authenticated_protected_paths_web', Gitlab::Throttle.protected_paths_options) do |req|
Gitlab::Throttle.protected_paths_enabled? && if req.web_request? &&
req.web_request? && Gitlab::Throttle.protected_paths_enabled? &&
req.protected_path? && req.protected_path?
req.authenticated_user_id([:api, :rss, :ics]) req.authenticated_user_id([:api, :rss, :ics])
end
end end
class Request class Request
...@@ -97,12 +114,16 @@ class Rack::Attack ...@@ -97,12 +114,16 @@ class Rack::Attack
path =~ %r{^/api/v\d+/internal/} path =~ %r{^/api/v\d+/internal/}
end end
def health_check_request?
path =~ %r{^/-/(health|liveness|readiness)}
end
def should_be_skipped? def should_be_skipped?
api_internal_request? api_internal_request? || health_check_request?
end end
def web_request? def web_request?
!api_request? !api_request? && !health_check_request?
end end
def protected_path? def protected_path?
......
...@@ -39,7 +39,11 @@ GET http://localhost/-/liveness ...@@ -39,7 +39,11 @@ GET http://localhost/-/liveness
## Health ## Health
Checks whether the application server is running. It does not verify the database or other services are running. Checks whether the application server is running.
It does not verify the database or other services
are running. This endpoint circumvents Rails Controllers
and is implemented as additional middleware `BasicHealthCheck`
very early into the request processing lifecycle.
```text ```text
GET /-/health GET /-/health
...@@ -59,10 +63,17 @@ GitLab OK ...@@ -59,10 +63,17 @@ GitLab OK
## Readiness ## Readiness
The readiness probe checks whether the GitLab instance is ready to use. It checks the dependent services (Database, Redis, Gitaly etc.) and gives a status for each. The readiness probe checks whether the GitLab instance is ready
to accept traffic via Rails Controllers. The check by default
does validate only instance-checks.
If the `all=1` parameter is specified, the check will also validate
the dependent services (Database, Redis, Gitaly etc.)
and gives a status for each.
```text ```text
GET /-/readiness GET /-/readiness
GET /-/readiness?all=1
``` ```
Example request: Example request:
...@@ -75,37 +86,30 @@ Example response: ...@@ -75,37 +86,30 @@ Example response:
```json ```json
{ {
"db_check":{ "master_check":[{
"status":"failed", "status":"failed",
"message": "unexpected Db check result: 0" "message": "unexpected Master check result: false"
}, }],
"redis_check":{ ...
"status":"ok" }
},
"cache_check":{
"status":"ok"
},
"queues_check":{
"status":"ok"
},
"shared_state_check":{
"status":"ok"
},
"gitaly_check":{
"status":"ok",
"labels":{
"shard":"default"
}
}
}
``` ```
On failure, the endpoint will return a `503` HTTP status code.
This check does hit the database and Redis if authenticated via `token`.
This check is being exempt from Rack Attack.
## Liveness ## Liveness
DANGER: **Warning:** DANGER: **Warning:**
In Gitlab [12.4](https://about.gitlab.com/upcoming-releases/) the response body of the Liveness check will change to match the example below. In Gitlab [12.4](https://about.gitlab.com/upcoming-releases/)
the response body of the Liveness check was changed
to match the example below.
The liveness probe checks whether the application server is alive. Unlike the [`health`](#health) check, this check hits the database. Checks whether the application server is running.
This probe is used to know if Rails Controllers
are not deadlocked due to a multi-threading.
```text ```text
GET /-/liveness GET /-/liveness
...@@ -127,7 +131,9 @@ On success, the endpoint will return a `200` HTTP status code, and a response li ...@@ -127,7 +131,9 @@ On success, the endpoint will return a `200` HTTP status code, and a response li
} }
``` ```
On failure, the endpoint will return a `500` HTTP status code. On failure, the endpoint will return a `503` HTTP status code.
This check is being exempt from Rack Attack.
## Access token (Deprecated) ## Access token (Deprecated)
......
...@@ -2,9 +2,10 @@ ...@@ -2,9 +2,10 @@
module EE::Gitlab::Rack::Attack module EE::Gitlab::Rack::Attack
Rack::Attack.throttle('throttle_incident_management_notification_web', EE::Gitlab::Throttle.incident_management_options) do |req| Rack::Attack.throttle('throttle_incident_management_notification_web', EE::Gitlab::Throttle.incident_management_options) do |req|
EE::Gitlab::Throttle.settings.throttle_incident_management_notification_enabled && if req.web_request? &&
req.web_request? && req.path.include?('alerts/notify') &&
req.path.include?('alerts/notify') && EE::Gitlab::Throttle.settings.throttle_incident_management_notification_enabled
req.path req.path
end
end end
end end
...@@ -10,38 +10,39 @@ module Gitlab ...@@ -10,38 +10,39 @@ module Gitlab
# #
# We have the following lifecycle events. # We have the following lifecycle events.
# #
# - on_master_start: # - on_before_fork (on master process):
# #
# Unicorn/Puma Cluster: This will be called exactly once, # Unicorn/Puma Cluster: This will be called exactly once,
# on startup, before the workers are forked. This is # on startup, before the workers are forked. This is
# called in the PARENT/MASTER process. # called in the PARENT/MASTER process.
# #
# Sidekiq/Puma Single: This is called immediately. # Sidekiq/Puma Single: This is not called.
# #
# - on_before_fork: # - on_master_start (on master process):
# #
# Unicorn/Puma Cluster: This will be called exactly once, # Unicorn/Puma Cluster: This will be called exactly once,
# on startup, before the workers are forked. This is # on startup, before the workers are forked. This is
# called in the PARENT/MASTER process. # called in the PARENT/MASTER process.
# #
# Sidekiq/Puma Single: This is not called. # Sidekiq/Puma Single: This is called immediately.
# #
# - on_worker_start: # - on_before_blackout_period (on master process):
# #
# Unicorn/Puma Cluster: This is called in the worker process # Unicorn/Puma Cluster: This will be called before a blackout
# exactly once before processing requests. # period when performing graceful shutdown of master.
# This is called on `master` process.
# #
# Sidekiq/Puma Single: This is called immediately. # Sidekiq/Puma Single: This is not called.
# #
# - on_before_graceful_shutdown: # - on_before_graceful_shutdown (on master process):
# #
# Unicorn/Puma Cluster: This will be called before a graceful # Unicorn/Puma Cluster: This will be called before a graceful
# shutdown of workers starts happening. # shutdown of workers starts happening, but after blackout period.
# This is called on `master` process. # This is called on `master` process.
# #
# Sidekiq/Puma Single: This is not called. # Sidekiq/Puma Single: This is not called.
# #
# - on_before_master_restart: # - on_before_master_restart (on master process):
# #
# Unicorn: This will be called before a new master is spun up. # Unicorn: This will be called before a new master is spun up.
# This is called on forked master before `execve` to become # This is called on forked master before `execve` to become
...@@ -53,6 +54,13 @@ module Gitlab ...@@ -53,6 +54,13 @@ module Gitlab
# #
# Sidekiq/Puma Single: This is not called. # Sidekiq/Puma Single: This is not called.
# #
# - on_worker_start (on worker process):
#
# Unicorn/Puma Cluster: This is called in the worker process
# exactly once before processing requests.
#
# Sidekiq/Puma Single: This is called immediately.
#
# Blocks will be executed in the order in which they are registered. # Blocks will be executed in the order in which they are registered.
# #
class LifecycleEvents class LifecycleEvents
...@@ -74,6 +82,12 @@ module Gitlab ...@@ -74,6 +82,12 @@ module Gitlab
(@before_fork_hooks ||= []) << block (@before_fork_hooks ||= []) << block
end end
# Read the config/initializers/cluster_events_before_phased_restart.rb
def on_before_blackout_period(&block)
# Defer block execution
(@master_blackout_period ||= []) << block
end
# Read the config/initializers/cluster_events_before_phased_restart.rb # Read the config/initializers/cluster_events_before_phased_restart.rb
def on_before_graceful_shutdown(&block) def on_before_graceful_shutdown(&block)
# Defer block execution # Defer block execution
...@@ -97,27 +111,24 @@ module Gitlab ...@@ -97,27 +111,24 @@ module Gitlab
# Lifecycle integration methods (called from unicorn.rb, puma.rb, etc.) # Lifecycle integration methods (called from unicorn.rb, puma.rb, etc.)
# #
def do_worker_start def do_worker_start
@worker_start_hooks&.each do |block| call(@worker_start_hooks)
block.call
end
end end
def do_before_fork def do_before_fork
@before_fork_hooks&.each do |block| call(@before_fork_hooks)
block.call
end
end end
def do_before_graceful_shutdown def do_before_graceful_shutdown
@master_graceful_shutdown&.each do |block| call(@master_blackout_period)
block.call
end blackout_seconds = ::Settings.shutdown.blackout_seconds.to_i
sleep(blackout_seconds) if blackout_seconds > 0
call(@master_graceful_shutdown)
end end
def do_before_master_restart def do_before_master_restart
@master_restart_hooks&.each do |block| call(@master_restart_hooks)
block.call
end
end end
# DEPRECATED # DEPRECATED
...@@ -132,6 +143,10 @@ module Gitlab ...@@ -132,6 +143,10 @@ module Gitlab
private private
def call(hooks)
hooks&.each(&:call)
end
def in_clustered_environment? def in_clustered_environment?
# Sidekiq doesn't fork # Sidekiq doesn't fork
return false if Sidekiq.server? return false if Sidekiq.server?
......
# frozen_string_literal: true
module Gitlab
module HealthChecks
# This check is registered on master,
# and validated by worker
class MasterCheck
extend SimpleAbstractCheck
class << self
def register_master
# when we fork, we pass the read pipe to child
# child can then react on whether the other end
# of pipe is still available
@pipe_read, @pipe_write = IO.pipe
end
def finish_master
close_read
close_write
end
def register_worker
# fork needs to close the pipe
close_write
end
private
def close_read
@pipe_read&.close
@pipe_read = nil
end
def close_write
@pipe_write&.close
@pipe_write = nil
end
def metric_prefix
'master_check'
end
def successful?(result)
result
end
def check
# the lack of pipe is a legitimate failure of check
return false unless @pipe_read
@pipe_read.read_nonblock(1)
true
rescue IO::EAGAINWaitReadable
# if it is blocked, it means that the pipe is still open
# and there's no data waiting on it
true
rescue EOFError
# the pipe is closed
false
end
end
end
end
end
...@@ -20,6 +20,10 @@ module Gitlab ...@@ -20,6 +20,10 @@ module Gitlab
def initialize def initialize
super super
# DEPRECATED:
# these `readiness_checks` are deprecated
# as presenting no value in a way how we run
# application: https://gitlab.com/gitlab-org/gitlab/issues/35343
self.readiness_checks = [ self.readiness_checks = [
WebExporter::ExporterCheck.new(self), WebExporter::ExporterCheck.new(self),
Gitlab::HealthChecks::PumaCheck, Gitlab::HealthChecks::PumaCheck,
...@@ -35,6 +39,10 @@ module Gitlab ...@@ -35,6 +39,10 @@ module Gitlab
File.join(Rails.root, 'log', 'web_exporter.log') File.join(Rails.root, 'log', 'web_exporter.log')
end end
def mark_as_not_running!
@running = false
end
private private
def start_working def start_working
...@@ -43,24 +51,9 @@ module Gitlab ...@@ -43,24 +51,9 @@ module Gitlab
end end
def stop_working def stop_working
@running = false mark_as_not_running!
wait_in_blackout_period if server && thread.alive?
super super
end end
def wait_in_blackout_period
return unless blackout_seconds > 0
@server.logger.info(
message: 'starting blackout...',
duration_s: blackout_seconds)
sleep(blackout_seconds)
end
def blackout_seconds
settings['blackout_seconds'].to_i
end
end end
end end
end end
......
# frozen_string_literal: true
require 'spec_helper'
describe HealthController do
include StubENV
let(:token) { Gitlab::CurrentSettings.health_check_access_token }
let(:whitelisted_ip) { '127.0.0.1' }
let(:not_whitelisted_ip) { '127.0.0.2' }
before do
allow(Settings.monitoring).to receive(:ip_whitelist).and_return([whitelisted_ip])
stub_storage_settings({}) # Hide the broken storage
stub_env('IN_MEMORY_APPLICATION_SETTINGS', 'false')
end
describe '#readiness' do
shared_context 'endpoint responding with readiness data' do
let(:request_params) { {} }
subject { get :readiness, params: request_params }
it 'responds with readiness checks data' do
subject
expect(json_response['db_check']).to contain_exactly({ 'status' => 'ok' })
expect(json_response['cache_check']).to contain_exactly({ 'status' => 'ok' })
expect(json_response['queues_check']).to contain_exactly({ 'status' => 'ok' })
expect(json_response['shared_state_check']).to contain_exactly({ 'status' => 'ok' })
expect(json_response['gitaly_check']).to contain_exactly(
{ 'status' => 'ok', 'labels' => { 'shard' => 'default' } })
end
it 'responds with readiness checks data when a failure happens' do
allow(Gitlab::HealthChecks::Redis::RedisCheck).to receive(:readiness).and_return(
Gitlab::HealthChecks::Result.new('redis_check', false, "check error"))
subject
expect(json_response['cache_check']).to contain_exactly({ 'status' => 'ok' })
expect(json_response['redis_check']).to contain_exactly(
{ 'status' => 'failed', 'message' => 'check error' })
expect(response.status).to eq(503)
expect(response.headers['X-GitLab-Custom-Error']).to eq(1)
end
end
context 'accessed from whitelisted ip' do
before do
allow(Gitlab::RequestContext).to receive(:client_ip).and_return(whitelisted_ip)
end
it_behaves_like 'endpoint responding with readiness data'
end
context 'accessed from not whitelisted ip' do
before do
allow(Gitlab::RequestContext).to receive(:client_ip).and_return(not_whitelisted_ip)
end
it 'responds with resource not found' do
get :readiness
expect(response.status).to eq(404)
end
context 'accessed with valid token' do
context 'token passed in request header' do
before do
request.headers['TOKEN'] = token
end
it_behaves_like 'endpoint responding with readiness data'
end
end
context 'token passed as URL param' do
it_behaves_like 'endpoint responding with readiness data' do
let(:request_params) { { token: token } }
end
end
end
end
describe '#liveness' do
shared_context 'endpoint responding with liveness data' do
subject { get :liveness }
it 'responds with liveness checks data' do
subject
expect(json_response).to eq('status' => 'ok')
end
end
context 'accessed from whitelisted ip' do
before do
allow(Gitlab::RequestContext).to receive(:client_ip).and_return(whitelisted_ip)
end
it_behaves_like 'endpoint responding with liveness data'
end
context 'accessed from not whitelisted ip' do
before do
allow(Gitlab::RequestContext).to receive(:client_ip).and_return(not_whitelisted_ip)
end
it 'responds with resource not found' do
get :liveness
expect(response.status).to eq(404)
end
context 'accessed with valid token' do
context 'token passed in request header' do
before do
request.headers['TOKEN'] = token
end
it_behaves_like 'endpoint responding with liveness data'
end
context 'token passed as URL param' do
it_behaves_like 'endpoint responding with liveness data' do
subject { get :liveness, params: { token: token } }
end
end
end
end
end
end
...@@ -77,7 +77,7 @@ describe Gitlab::Cluster::Mixins::PumaCluster do ...@@ -77,7 +77,7 @@ describe Gitlab::Cluster::Mixins::PumaCluster do
mutex = Mutex.new mutex = Mutex.new
Gitlab::Cluster::LifecycleEvents.on_before_graceful_shutdown do Gitlab::Cluster::LifecycleEvents.on_before_blackout_period do
mutex.synchronize do mutex.synchronize do
exit(140) exit(140)
end end
......
...@@ -75,7 +75,7 @@ describe Gitlab::Cluster::Mixins::UnicornHttpServer do ...@@ -75,7 +75,7 @@ describe Gitlab::Cluster::Mixins::UnicornHttpServer do
mutex = Mutex.new mutex = Mutex.new
Gitlab::Cluster::LifecycleEvents.on_before_graceful_shutdown do Gitlab::Cluster::LifecycleEvents.on_before_blackout_period do
mutex.synchronize do mutex.synchronize do
exit(140) exit(140)
end end
......
require 'spec_helper'
require_relative './simple_check_shared'
describe Gitlab::HealthChecks::MasterCheck do
let(:result_class) { Gitlab::HealthChecks::Result }
SUCCESS_CODE = 100
FAILURE_CODE = 101
before do
described_class.register_master
end
after do
described_class.finish_master
end
describe '#readiness' do
context 'when master is running' do
it 'worker does return success' do
_, child_status = run_worker
expect(child_status.exitstatus).to eq(SUCCESS_CODE)
end
end
context 'when master finishes early' do
before do
described_class.send(:close_write)
end
it 'worker does return failure' do
_, child_status = run_worker
expect(child_status.exitstatus).to eq(FAILURE_CODE)
end
end
def run_worker
pid = fork do
described_class.register_worker
exit(described_class.readiness.success ? SUCCESS_CODE : FAILURE_CODE)
end
Process.wait2(pid)
end
end
end
...@@ -4,61 +4,41 @@ require 'spec_helper' ...@@ -4,61 +4,41 @@ require 'spec_helper'
describe Gitlab::Metrics::Exporter::WebExporter do describe Gitlab::Metrics::Exporter::WebExporter do
let(:exporter) { described_class.new } let(:exporter) { described_class.new }
let(:readiness_probe) { exporter.send(:readiness_probe).execute }
context 'when blackout seconds is used' do
let(:blackout_seconds) { 0 } before do
let(:readiness_probe) { exporter.send(:readiness_probe).execute } stub_config(
monitoring: {
before do web_exporter: {
stub_config( enabled: true,
monitoring: { port: 0,
web_exporter: { address: '127.0.0.1'
enabled: true,
port: 0,
address: '127.0.0.1',
blackout_seconds: blackout_seconds
}
} }
) }
)
exporter.start
end
after do
exporter.stop
end
context 'when running server' do exporter.start
it 'readiness probe returns succesful status' do end
expect(readiness_probe.http_status).to eq(200)
expect(readiness_probe.json).to include(status: 'ok')
expect(readiness_probe.json).to include('web_exporter' => [{ 'status': 'ok' }])
end
end
context 'when blackout seconds is 10s' do
let(:blackout_seconds) { 10 }
it 'readiness probe returns a failure status' do after do
# during sleep we check the status of readiness probe exporter.stop
expect(exporter).to receive(:sleep).with(10) do end
expect(readiness_probe.http_status).to eq(503)
expect(readiness_probe.json).to include(status: 'failed')
expect(readiness_probe.json).to include('web_exporter' => [{ 'status': 'failed' }])
end
exporter.stop context 'when running server' do
end it 'readiness probe returns succesful status' do
expect(readiness_probe.http_status).to eq(200)
expect(readiness_probe.json).to include(status: 'ok')
expect(readiness_probe.json).to include('web_exporter' => [{ 'status': 'ok' }])
end end
end
context 'when blackout is disabled' do describe '#mark_as_not_running!' do
let(:blackout_seconds) { 0 } it 'readiness probe returns a failure status' do
exporter.mark_as_not_running!
it 'readiness probe returns a failure status' do
expect(exporter).not_to receive(:sleep)
exporter.stop expect(readiness_probe.http_status).to eq(503)
end expect(readiness_probe.json).to include(status: 'failed')
expect(readiness_probe.json).to include('web_exporter' => [{ 'status': 'failed' }])
end end
end end
end end
# frozen_string_literal: true
require 'spec_helper'
describe HealthController do
include StubENV
let(:token) { Gitlab::CurrentSettings.health_check_access_token }
let(:whitelisted_ip) { '1.1.1.1' }
let(:not_whitelisted_ip) { '2.2.2.2' }
let(:params) { {} }
let(:headers) { {} }
before do
allow(Settings.monitoring).to receive(:ip_whitelist).and_return([whitelisted_ip])
stub_storage_settings({}) # Hide the broken storage
stub_env('IN_MEMORY_APPLICATION_SETTINGS', 'false')
end
shared_context 'endpoint querying database' do
it 'does query database' do
control_count = ActiveRecord::QueryRecorder.new { subject }.count
expect(control_count).not_to be_zero
end
end
shared_context 'endpoint not querying database' do
it 'does not query database' do
control_count = ActiveRecord::QueryRecorder.new { subject }.count
expect(control_count).to be_zero
end
end
shared_context 'endpoint not found' do
it 'responds with resource not found' do
subject
expect(response.status).to eq(404)
end
end
describe 'GET /-/health' do
subject { get '/-/health', params: params, headers: headers }
shared_context 'endpoint responding with health data' do
it 'responds with health checks data' do
subject
expect(response.status).to eq(200)
expect(response.body).to eq('GitLab OK')
end
end
context 'accessed from whitelisted ip' do
before do
stub_remote_addr(whitelisted_ip)
end
it_behaves_like 'endpoint responding with health data'
it_behaves_like 'endpoint not querying database'
end
context 'accessed from not whitelisted ip' do
before do
stub_remote_addr(not_whitelisted_ip)
end
it_behaves_like 'endpoint not querying database'
it_behaves_like 'endpoint not found'
end
end
describe 'GET /-/readiness' do
subject { get '/-/readiness', params: params, headers: headers }
shared_context 'endpoint responding with readiness data' do
context 'when requesting instance-checks' do
it 'responds with readiness checks data' do
expect(Gitlab::HealthChecks::MasterCheck).to receive(:check) { true }
subject
expect(json_response).to include({ 'status' => 'ok' })
expect(json_response['master_check']).to contain_exactly({ 'status' => 'ok' })
end
it 'responds with readiness checks data when a failure happens' do
expect(Gitlab::HealthChecks::MasterCheck).to receive(:check) { false }
subject
expect(json_response).to include({ 'status' => 'failed' })
expect(json_response['master_check']).to contain_exactly(
{ 'status' => 'failed', 'message' => 'unexpected Master check result: false' })
expect(response.status).to eq(503)
expect(response.headers['X-GitLab-Custom-Error']).to eq(1)
end
end
context 'when requesting all checks' do
before do
params.merge!(all: true)
end
it 'responds with readiness checks data' do
subject
expect(json_response['db_check']).to contain_exactly({ 'status' => 'ok' })
expect(json_response['cache_check']).to contain_exactly({ 'status' => 'ok' })
expect(json_response['queues_check']).to contain_exactly({ 'status' => 'ok' })
expect(json_response['shared_state_check']).to contain_exactly({ 'status' => 'ok' })
expect(json_response['gitaly_check']).to contain_exactly(
{ 'status' => 'ok', 'labels' => { 'shard' => 'default' } })
end
it 'responds with readiness checks data when a failure happens' do
allow(Gitlab::HealthChecks::Redis::RedisCheck).to receive(:readiness).and_return(
Gitlab::HealthChecks::Result.new('redis_check', false, "check error"))
subject
expect(json_response['cache_check']).to contain_exactly({ 'status' => 'ok' })
expect(json_response['redis_check']).to contain_exactly(
{ 'status' => 'failed', 'message' => 'check error' })
expect(response.status).to eq(503)
expect(response.headers['X-GitLab-Custom-Error']).to eq(1)
end
end
end
context 'accessed from whitelisted ip' do
before do
stub_remote_addr(whitelisted_ip)
end
it_behaves_like 'endpoint not querying database'
it_behaves_like 'endpoint responding with readiness data'
context 'when requesting all checks' do
before do
params.merge!(all: true)
end
it_behaves_like 'endpoint querying database'
end
end
context 'accessed from not whitelisted ip' do
before do
stub_remote_addr(not_whitelisted_ip)
end
it_behaves_like 'endpoint not querying database'
it_behaves_like 'endpoint not found'
end
context 'accessed with valid token' do
context 'token passed in request header' do
let(:headers) { { TOKEN: token } }
it_behaves_like 'endpoint responding with readiness data'
it_behaves_like 'endpoint querying database'
end
context 'token passed as URL param' do
let(:params) { { token: token } }
it_behaves_like 'endpoint responding with readiness data'
it_behaves_like 'endpoint querying database'
end
end
end
describe 'GET /-/liveness' do
subject { get '/-/liveness', params: params, headers: headers }
shared_context 'endpoint responding with liveness data' do
it 'responds with liveness checks data' do
subject
expect(json_response).to eq('status' => 'ok')
end
end
context 'accessed from whitelisted ip' do
before do
stub_remote_addr(whitelisted_ip)
end
it_behaves_like 'endpoint not querying database'
it_behaves_like 'endpoint responding with liveness data'
end
context 'accessed from not whitelisted ip' do
before do
stub_remote_addr(not_whitelisted_ip)
end
it_behaves_like 'endpoint not querying database'
it_behaves_like 'endpoint not found'
context 'accessed with valid token' do
context 'token passed in request header' do
let(:headers) { { TOKEN: token } }
it_behaves_like 'endpoint responding with liveness data'
it_behaves_like 'endpoint querying database'
end
context 'token passed as URL param' do
let(:params) { { token: token } }
it_behaves_like 'endpoint responding with liveness data'
it_behaves_like 'endpoint querying database'
end
end
end
end
def stub_remote_addr(ip)
headers.merge!(REMOTE_ADDR: ip)
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment