Commit 51c4ba46 authored by Stan Hu's avatar Stan Hu

Disable Prometheus metrics if initialization fails

Previously if the underlying filesystem ran of space, reads and writes
to mmap() regions would throw an ugly SIGBUS error and crash.

With prometheus-client-mmap v0.10.0,
`Prometheus::Client.reinitialize_on_pid_change` will now throw an
IOError if initialization fails for some reason. If this happens, we
disable internal Prometheus metrics to ensure the system stays up.

Closes https://gitlab.com/gitlab-org/gitlab/issues/24425
parent 649a9ebf
......@@ -327,7 +327,7 @@ group :metrics do
gem 'influxdb', '~> 0.2', require: false
# Prometheus
gem 'prometheus-client-mmap', '~> 0.9.10'
gem 'prometheus-client-mmap', '~> 0.10.0'
gem 'raindrops', '~> 0.18'
end
......
......@@ -749,7 +749,7 @@ GEM
parser
unparser
procto (0.0.3)
prometheus-client-mmap (0.9.10)
prometheus-client-mmap (0.10.0)
pry (0.11.3)
coderay (~> 1.1.0)
method_source (~> 0.9.0)
......@@ -1292,7 +1292,7 @@ DEPENDENCIES
pg (~> 1.1)
png_quantizator (~> 0.2.1)
premailer-rails (~> 1.10.3)
prometheus-client-mmap (~> 0.9.10)
prometheus-client-mmap (~> 0.10.0)
pry-byebug (~> 3.5.1)
pry-rails (~> 0.3.4)
rack (~> 2.0.7)
......
---
title: Disable Prometheus metrics if initialization fails
merge_request: 22355
author:
type: fixed
......@@ -43,6 +43,9 @@ if !Rails.env.test? && Gitlab::Metrics.prometheus_metrics_enabled?
defined?(::Prometheus::Client.reinitialize_on_pid_change) && Prometheus::Client.reinitialize_on_pid_change
Gitlab::Metrics::Samplers::RubySampler.initialize_instance(Settings.monitoring.ruby_sampler_interval).start
rescue IOError => e
Gitlab::ErrorTracking.track_exception(e)
Gitlab::Metrics.error_detected!
end
Gitlab::Cluster::LifecycleEvents.on_master_start do
......@@ -55,6 +58,9 @@ if !Rails.env.test? && Gitlab::Metrics.prometheus_metrics_enabled?
end
Gitlab::Metrics::RequestsRackMiddleware.initialize_http_request_duration_seconds
rescue IOError => e
Gitlab::ErrorTracking.track_exception(e)
Gitlab::Metrics.error_detected!
end
end
......
......@@ -5,8 +5,14 @@ module Gitlab
include Gitlab::Metrics::InfluxDb
include Gitlab::Metrics::Prometheus
@error = false
def self.enabled?
influx_metrics_enabled? || prometheus_metrics_enabled?
end
def self.error?
@error
end
end
end
......@@ -61,6 +61,14 @@ module Gitlab
safe_provide_metric(:histogram, name, docstring, base_labels, buckets)
end
def error_detected!
clear_memoization(:prometheus_metrics_enabled)
PROVIDER_MUTEX.synchronize do
@error = true
end
end
private
def safe_provide_metric(method, name, *args)
......@@ -81,7 +89,7 @@ module Gitlab
end
def prometheus_metrics_enabled_unmemoized
metrics_folder_present? && Gitlab::CurrentSettings.prometheus_metrics_enabled || false
!error? && metrics_folder_present? && Gitlab::CurrentSettings.prometheus_metrics_enabled || false
end
end
end
......
......@@ -17,4 +17,21 @@ describe Gitlab::Metrics::Prometheus, :prometheus do
expect(all_metrics.registry.metrics.count).to eq(0)
end
end
describe '#error_detected!' do
before do
allow(all_metrics).to receive(:metrics_folder_present?).and_return(true)
stub_application_setting(prometheus_metrics_enabled: true)
end
it 'disables Prometheus metrics' do
expect(all_metrics.error?).to be_falsey
expect(all_metrics.prometheus_metrics_enabled?).to be_truthy
all_metrics.error_detected!
expect(all_metrics.prometheus_metrics_enabled?).to be_falsey
expect(all_metrics.error?).to be_truthy
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment