Commit f50f508b authored by Matthias Käppler's avatar Matthias Käppler Committed by James Fargher

Sidekiq: Fork into metrics-server instead of `exec`ing

parent 92b65146
......@@ -3,14 +3,10 @@
require_relative '../metrics_server/metrics_server'
begin
target = ENV['METRICS_SERVER_TARGET']
raise "Required: METRICS_SERVER_TARGET=[sidekiq]" unless target == 'sidekiq'
target = ENV['METRICS_SERVER_TARGET']
raise "METRICS_SERVER_TARGET cannot be blank" if target.blank?
metrics_dir = ENV["prometheus_multiproc_dir"] || File.absolute_path("tmp/prometheus_multiproc_dir/#{target}")
wipe_metrics_dir = Gitlab::Utils.to_boolean(ENV['WIPE_METRICS_DIR']) || false
metrics_dir = ENV["prometheus_multiproc_dir"] || File.absolute_path("tmp/prometheus_multiproc_dir/#{target}")
wipe_metrics_dir = Gitlab::Utils.to_boolean(ENV['WIPE_METRICS_DIR']) || false
# Re-raise exceptions in threads on the main thread.
Thread.abort_on_exception = true
MetricsServer.new(target, metrics_dir, wipe_metrics_dir).start
end
Process.wait(MetricsServer.spawn(target, metrics_dir: metrics_dir, wipe_metrics_dir: wipe_metrics_dir))
......@@ -2,12 +2,6 @@
module Gitlab
module ProcessManagement
# The signals that should terminate both the master and workers.
TERMINATE_SIGNALS = %i(INT TERM).freeze
# The signals that should simply be forwarded to the workers.
FORWARD_SIGNALS = %i(TTIN USR1 USR2 HUP).freeze
# Traps the given signals and yields the block whenever these signals are
# received.
#
......@@ -26,12 +20,13 @@ module Gitlab
end
end
def self.trap_terminate(&block)
trap_signals(TERMINATE_SIGNALS, &block)
end
def self.trap_forward(&block)
trap_signals(FORWARD_SIGNALS, &block)
# Traps the given signals with the given command.
#
# Example:
#
# modify_signals(%i(HUP TERM), 'DEFAULT')
def self.modify_signals(signals, command)
signals.each { |signal| trap(signal, command) }
end
def self.signal(pid, signal)
......
......@@ -22,5 +22,6 @@ require_relative '../lib/gitlab/metrics/exporter/base_exporter'
require_relative '../lib/gitlab/metrics/exporter/sidekiq_exporter'
require_relative '../lib/gitlab/health_checks/probes/collection'
require_relative '../lib/gitlab/health_checks/probes/status'
require_relative '../lib/gitlab/process_management'
# rubocop:enable Naming/FileName
......@@ -6,17 +6,28 @@ require_relative 'dependencies'
class MetricsServer # rubocop:disable Gitlab/NamespacedClass
class << self
def spawn(target, gitlab_config: nil, wipe_metrics_dir: false)
cmd = "#{Rails.root}/bin/metrics-server"
env = {
'METRICS_SERVER_TARGET' => target,
'GITLAB_CONFIG' => gitlab_config,
'WIPE_METRICS_DIR' => wipe_metrics_dir.to_s
}
Process.spawn(env, cmd, err: $stderr, out: $stdout).tap do |pid|
def spawn(target, metrics_dir:, wipe_metrics_dir: false, trapped_signals: [])
raise "The only valid target is 'sidekiq' currently" unless target == 'sidekiq'
pid = Process.fork
if pid.nil? # nil means we're inside the fork
# Remove any custom signal handlers the parent process had registered, since we do
# not want to inherit them, and Ruby forks with a `clone` that has the `CLONE_SIGHAND`
# flag set.
Gitlab::ProcessManagement.modify_signals(trapped_signals, 'DEFAULT')
server = MetricsServer.new(target, metrics_dir, wipe_metrics_dir)
# This rewrites /proc/cmdline, since otherwise tools like `top` will show the
# parent process `cmdline` which is really confusing.
$0 = server.name
server.start
else
Process.detach(pid)
end
pid
end
end
......@@ -34,10 +45,15 @@ class MetricsServer # rubocop:disable Gitlab/NamespacedClass
FileUtils.mkdir_p(@metrics_dir, mode: 0700)
::Prometheus::CleanupMultiprocDirService.new.execute if @wipe_metrics_dir
settings = Settings.monitoring.sidekiq_exporter
settings = Settings.new(Settings.monitoring[name])
exporter_class = "Gitlab::Metrics::Exporter::#{@target.camelize}Exporter".constantize
server = exporter_class.instance(settings, synchronous: true)
server.start
end
def name
"#{@target}_exporter"
end
end
......@@ -20,6 +20,14 @@ require_relative 'sidekiq_cluster'
module Gitlab
module SidekiqCluster
class CLI
THREAD_NAME = 'supervisor'
# The signals that should terminate both the master and workers.
TERMINATE_SIGNALS = %i(INT TERM).freeze
# The signals that should simply be forwarded to the workers.
FORWARD_SIGNALS = %i(TTIN USR1 USR2 HUP).freeze
CommandError = Class.new(StandardError)
def initialize(log_output = $stderr)
......@@ -27,6 +35,7 @@ module Gitlab
@max_concurrency = 50
@min_concurrency = 0
@environment = ENV['RAILS_ENV'] || 'development'
@metrics_dir = ENV["prometheus_multiproc_dir"] || File.absolute_path("tmp/prometheus_multiproc_dir/sidekiq")
@pid = nil
@interval = 5
@alive = true
......@@ -39,6 +48,8 @@ module Gitlab
end
def run(argv = ARGV)
Thread.current.name = THREAD_NAME
if argv.empty?
raise CommandError,
'You must specify at least one queue to start a worker for'
......@@ -144,13 +155,13 @@ module Gitlab
end
def trap_signals
ProcessManagement.trap_terminate do |signal|
ProcessManagement.trap_signals(TERMINATE_SIGNALS) do |signal|
@alive = false
ProcessManagement.signal_processes(@processes, signal)
wait_for_termination
end
ProcessManagement.trap_forward do |signal|
ProcessManagement.trap_signals(FORWARD_SIGNALS) do |signal|
ProcessManagement.signal_processes(@processes, signal)
end
end
......@@ -180,7 +191,12 @@ module Gitlab
return unless metrics_server_enabled?
@logger.info("Starting metrics server on port #{sidekiq_exporter_port}")
@metrics_server_pid = MetricsServer.spawn('sidekiq', wipe_metrics_dir: wipe_metrics_dir)
@metrics_server_pid = MetricsServer.spawn(
'sidekiq',
metrics_dir: @metrics_dir,
wipe_metrics_dir: wipe_metrics_dir,
trapped_signals: TERMINATE_SIGNALS + FORWARD_SIGNALS
)
end
def sidekiq_exporter_enabled?
......
......@@ -29,17 +29,27 @@ RSpec.describe 'bin/metrics-server', :aggregate_failures do
config_file.write(YAML.dump(config))
config_file.close
@pid = MetricsServer.spawn('sidekiq', gitlab_config: config_file.path, wipe_metrics_dir: true)
env = {
'GITLAB_CONFIG' => config_file.path,
'METRICS_SERVER_TARGET' => 'sidekiq',
'WIPE_METRICS_DIR' => '1'
}
@pid = Process.spawn(env, 'bin/metrics-server', pgroup: true)
end
after do
webmock_enable!
if @pid
pgrp = Process.getpgid(@pid)
Timeout.timeout(5) do
Process.kill('TERM', @pid)
Process.kill('TERM', -pgrp)
Process.waitpid(@pid)
end
expect(Gitlab::ProcessManagement.process_alive?(@pid)).to be(false)
end
rescue Errno::ESRCH => _
# 'No such process' means the process died before
......
......@@ -258,6 +258,17 @@ RSpec.describe Gitlab::SidekiqCluster::CLI do # rubocop:disable RSpec/FilePath
end
context 'metrics server' do
let(:trapped_signals) { described_class::TERMINATE_SIGNALS + described_class::FORWARD_SIGNALS }
let(:metrics_dir) { Dir.mktmpdir }
before do
stub_env('prometheus_multiproc_dir', metrics_dir)
end
after do
FileUtils.rm_rf(metrics_dir, secure: true)
end
context 'starting the server' do
context 'without --dryrun' do
context 'when there are no sidekiq_health_checks settings set' do
......@@ -342,6 +353,7 @@ RSpec.describe Gitlab::SidekiqCluster::CLI do # rubocop:disable RSpec/FilePath
end
end
context 'with valid settings' do
using RSpec::Parameterized::TableSyntax
where(:sidekiq_exporter_enabled, :sidekiq_exporter_port, :sidekiq_health_checks_port, :start_metrics_server) do
......@@ -361,7 +373,7 @@ RSpec.describe Gitlab::SidekiqCluster::CLI do # rubocop:disable RSpec/FilePath
specify do
if start_metrics_server
expect(MetricsServer).to receive(:spawn).with('sidekiq', wipe_metrics_dir: true)
expect(MetricsServer).to receive(:spawn).with('sidekiq', metrics_dir: metrics_dir, wipe_metrics_dir: true, trapped_signals: trapped_signals)
else
expect(MetricsServer).not_to receive(:spawn)
end
......@@ -370,6 +382,7 @@ RSpec.describe Gitlab::SidekiqCluster::CLI do # rubocop:disable RSpec/FilePath
end
end
end
end
context 'with --dryrun set' do
let(:sidekiq_exporter_enabled) { true }
......@@ -388,7 +401,7 @@ RSpec.describe Gitlab::SidekiqCluster::CLI do # rubocop:disable RSpec/FilePath
before do
allow(cli).to receive(:sleep).with(a_kind_of(Numeric))
allow(MetricsServer).to receive(:spawn).with('sidekiq', wipe_metrics_dir: false).and_return(99)
allow(MetricsServer).to receive(:spawn).and_return(99)
cli.start_metrics_server
end
......@@ -407,7 +420,9 @@ RSpec.describe Gitlab::SidekiqCluster::CLI do # rubocop:disable RSpec/FilePath
allow(Gitlab::ProcessManagement).to receive(:all_alive?).with(an_instance_of(Array)).and_return(false)
allow(cli).to receive(:stop_metrics_server)
expect(MetricsServer).to receive(:spawn).with('sidekiq', wipe_metrics_dir: false)
expect(MetricsServer).to receive(:spawn).with(
'sidekiq', metrics_dir: metrics_dir, wipe_metrics_dir: false, trapped_signals: trapped_signals
)
cli.start_loop
end
......@@ -484,9 +499,9 @@ RSpec.describe Gitlab::SidekiqCluster::CLI do # rubocop:disable RSpec/FilePath
end
describe '#trap_signals' do
it 'traps the termination and forwarding signals' do
expect(Gitlab::ProcessManagement).to receive(:trap_terminate)
expect(Gitlab::ProcessManagement).to receive(:trap_forward)
it 'traps termination and sidekiq specific signals' do
expect(Gitlab::ProcessManagement).to receive(:trap_signals).with(%i[INT TERM])
expect(Gitlab::ProcessManagement).to receive(:trap_signals).with(%i[TTIN USR1 USR2 HUP])
cli.trap_signals
end
......
......@@ -12,21 +12,12 @@ RSpec.describe Gitlab::ProcessManagement do
end
end
describe '.trap_terminate' do
it 'traps the termination signals' do
expect(described_class).to receive(:trap_signals)
.with(described_class::TERMINATE_SIGNALS)
describe '.modify_signals' do
it 'traps the given signals with the given command' do
expect(described_class).to receive(:trap).ordered.with(:INT, 'DEFAULT')
expect(described_class).to receive(:trap).ordered.with(:HUP, 'DEFAULT')
described_class.trap_terminate { }
end
end
describe '.trap_forward' do
it 'traps the signals to forward' do
expect(described_class).to receive(:trap_signals)
.with(described_class::FORWARD_SIGNALS)
described_class.trap_forward { }
described_class.modify_signals(%i(INT HUP), 'DEFAULT')
end
end
......
......@@ -8,52 +8,67 @@ require_relative '../support/helpers/next_instance_of'
RSpec.describe MetricsServer do # rubocop:disable RSpec/FilePath
include NextInstanceOf
describe '.spawn' do
let(:env) do
{
'METRICS_SERVER_TARGET' => 'sidekiq',
'GITLAB_CONFIG' => nil,
'WIPE_METRICS_DIR' => 'false'
}
before do
# We do not want this to have knock-on effects on the test process.
allow(Gitlab::ProcessManagement).to receive(:modify_signals)
end
it 'spawns a process with the correct environment variables and detaches it' do
expect(Process).to receive(:spawn).with(env, anything, err: $stderr, out: $stdout).and_return(99)
describe '.spawn' do
context 'when in parent process' do
it 'forks into a new process and detaches it' do
expect(Process).to receive(:fork).and_return(99)
expect(Process).to receive(:detach).with(99)
described_class.spawn('sidekiq')
described_class.spawn('sidekiq', metrics_dir: 'path/to/metrics')
end
end
context 'when in child process' do
before do
# This signals the process that it's "inside" the fork
expect(Process).to receive(:fork).and_return(nil)
expect(Process).not_to receive(:detach)
end
it 'starts the metrics server with the given arguments' do
expect_next_instance_of(MetricsServer) do |server|
expect(server).to receive(:start)
end
described_class.spawn('sidekiq', metrics_dir: 'path/to/metrics')
end
it 'resets signal handlers from parent process' do
expect(Gitlab::ProcessManagement).to receive(:modify_signals).with(%i[A B], 'DEFAULT')
described_class.spawn('sidekiq', metrics_dir: 'path/to/metrics', trapped_signals: %i[A B])
end
end
end
describe '#start' do
let(:exporter_class) { Class.new(Gitlab::Metrics::Exporter::BaseExporter) }
let(:exporter_double) { double('fake_exporter', start: true) }
let(:prometheus_client_double) { double(::Prometheus::Client) }
let(:prometheus_config) { ::Prometheus::Client::Configuration.new }
let(:prometheus_config) { ::Prometheus::Client.configuration }
let(:metrics_dir) { Dir.mktmpdir }
let(:settings_double) { double(:settings, sidekiq_exporter: {}) }
let!(:old_metrics_dir) { ::Prometheus::Client.configuration.multiprocess_files_dir }
let(:settings) { { "fake_exporter" => { "enabled" => true } } }
let!(:old_metrics_dir) { prometheus_config.multiprocess_files_dir }
subject(:metrics_server) { described_class.new('fake', metrics_dir, true)}
before do
stub_env('prometheus_multiproc_dir', metrics_dir)
stub_const('Gitlab::Metrics::Exporter::FakeExporter', exporter_class)
allow(exporter_class).to receive(:instance).with({}, synchronous: true).and_return(exporter_double)
allow(Settings).to receive(:monitoring).and_return(settings_double)
expect(exporter_class).to receive(:instance).with(settings['fake_exporter'], synchronous: true).and_return(exporter_double)
expect(Settings).to receive(:monitoring).and_return(settings)
end
after do
Gitlab::Metrics.reset_registry!
::Prometheus::CleanupMultiprocDirService.new.execute
Dir.rmdir(metrics_dir)
::Prometheus::Client.configuration.multiprocess_files_dir = old_metrics_dir
FileUtils.rm_rf(metrics_dir, secure: true)
prometheus_config.multiprocess_files_dir = old_metrics_dir
end
it 'configures ::Prometheus::Client' do
allow(prometheus_client_double).to receive(:configuration).and_return(prometheus_config)
metrics_server.start
expect(prometheus_config.multiprocess_files_dir).to eq metrics_dir
......@@ -90,12 +105,5 @@ RSpec.describe MetricsServer do # rubocop:disable RSpec/FilePath
metrics_server.start
end
it 'sends the correct Settings to the exporter instance' do
expect(Settings).to receive(:monitoring).and_return(settings_double)
expect(settings_double).to receive(:sidekiq_exporter)
metrics_server.start
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment