Commit 679c0048 authored by Robert Speicher's avatar Robert Speicher

Merge branch 'an-multithreading' into 'master'

Puma in GDK and rack server lifecycle event abstractions

Closes #52762

See merge request gitlab-org/gitlab-ce!22372
parents 08d946e2 1065f8ce
......@@ -40,6 +40,7 @@ eslint-report.html
/config/redis.queues.yml
/config/redis.shared_state.yml
/config/unicorn.rb
/config/puma.rb
/config/secrets.yml
/config/sidekiq.yml
/config/registry.key
......
......@@ -153,6 +153,11 @@ group :unicorn do
gem 'unicorn-worker-killer', '~> 0.4.4'
end
group :puma do
gem 'puma', '~> 3.12', require: false
gem 'puma_worker_killer', require: false
end
# State machine
gem 'state_machines-activerecord', '~> 0.5.1'
......
......@@ -598,6 +598,10 @@ GEM
pry-rails (0.3.6)
pry (>= 0.10.4)
public_suffix (3.0.3)
puma (3.12.0)
puma_worker_killer (0.1.0)
get_process_mem (~> 0.2)
puma (>= 2.7, < 4)
pyu-ruby-sasl (0.0.3.3)
rack (1.6.10)
rack-accept (0.4.5)
......@@ -1076,6 +1080,8 @@ DEPENDENCIES
prometheus-client-mmap (~> 0.9.4)
pry-byebug (~> 3.4.1)
pry-rails (~> 0.3.4)
puma (~> 3.12)
puma_worker_killer
rack-attack (~> 4.4.1)
rack-cors (~> 1.0.0)
rack-oauth2 (~> 1.2.1)
......
......@@ -602,6 +602,10 @@ GEM
pry-rails (0.3.6)
pry (>= 0.10.4)
public_suffix (3.0.3)
puma (3.12.0)
puma_worker_killer (0.1.0)
get_process_mem (~> 0.2)
puma (>= 2.7, < 4)
pyu-ruby-sasl (0.0.3.3)
rack (2.0.5)
rack-accept (0.4.5)
......@@ -1085,6 +1089,8 @@ DEPENDENCIES
prometheus-client-mmap (~> 0.9.4)
pry-byebug (~> 3.4.1)
pry-rails (~> 0.3.4)
puma (~> 3.12)
puma_worker_killer
rack-attack (~> 4.4.1)
rack-cors (~> 1.0.0)
rack-oauth2 (~> 1.2.1)
......
......@@ -3,6 +3,11 @@
cd $(dirname $0)/..
app_root=$(pwd)
# Switch to experimental PUMA configuration
if [ -n "${EXPERIMENTAL_PUMA}" ]; then
exec bin/web_puma "$@"
fi
unicorn_pidfile="$app_root/tmp/pids/unicorn.pid"
unicorn_config="$app_root/config/unicorn.rb"
unicorn_cmd="bundle exec unicorn_rails -c $unicorn_config -E $RAILS_ENV"
......
#!/bin/sh
set -e
cd $(dirname $0)/..
app_root=$(pwd)
puma_pidfile="$app_root/tmp/pids/puma.pid"
puma_config="$app_root/config/puma.rb"
spawn_puma()
{
exec bundle exec puma --config "${puma_config}" "$@"
}
get_puma_pid()
{
pid=$(cat "${puma_pidfile}")
if [ -z "$pid" ] ; then
echo "Could not find a PID in $puma_pidfile"
exit 1
fi
echo "${pid}"
}
start()
{
spawn_puma -d
}
start_foreground()
{
spawn_puma
}
stop()
{
get_puma_pid
kill -QUIT "$(get_puma_pid)"
}
reload()
{
kill -USR2 "$(get_puma_pid)"
}
case "$1" in
start)
start
;;
start_foreground)
start_foreground
;;
stop)
stop
;;
reload)
reload
;;
*)
echo "Usage: RAILS_ENV=your_env $0 {start|stop|reload}"
;;
esac
---
title: Experimental support for running Puma multithreaded web-server
merge_request: 22372
author:
type: performance
......@@ -26,9 +26,25 @@ Sidekiq.configure_server do |config|
end
if !Rails.env.test? && Gitlab::Metrics.prometheus_metrics_enabled?
unless Sidekiq.server?
Gitlab::Metrics::Samplers::UnicornSampler.initialize_instance(Settings.monitoring.unicorn_sampler_interval).start
Gitlab::Cluster::LifecycleEvents.on_worker_start do
defined?(::Prometheus::Client.reinitialize_on_pid_change) && Prometheus::Client.reinitialize_on_pid_change
unless Sidekiq.server?
Gitlab::Metrics::Samplers::UnicornSampler.initialize_instance(Settings.monitoring.unicorn_sampler_interval).start
end
Gitlab::Metrics::Samplers::RubySampler.initialize_instance(Settings.monitoring.ruby_sampler_interval).start
end
end
Gitlab::Metrics::Samplers::RubySampler.initialize_instance(Settings.monitoring.ruby_sampler_interval).start
Gitlab::Cluster::LifecycleEvents.on_master_restart do
# The following is necessary to ensure stale Prometheus metrics don't
# accumulate over time. It needs to be done in this hook as opposed to
# inside an init script to ensure metrics files aren't deleted after new
# unicorn workers start after a SIGUSR2 is received.
prometheus_multiproc_dir = ENV['prometheus_multiproc_dir']
if prometheus_multiproc_dir
old_metrics = Dir[File.join(prometheus_multiproc_dir, '*.db')]
FileUtils.rm_rf(old_metrics)
end
end
......@@ -158,7 +158,9 @@ if Gitlab::Metrics.enabled? && !Rails.env.test?
GC::Profiler.enable
Gitlab::Metrics::Samplers::InfluxSampler.initialize_instance.start
Gitlab::Cluster::LifecycleEvents.on_worker_start do
Gitlab::Metrics::Samplers::InfluxSampler.initialize_instance.start
end
module TrackNewRedisConnections
def connect(*args)
......
# frozen_string_literal: true
# Don't handle sidekiq configuration as it
# has its own special active record configuration here
if defined?(ActiveRecord::Base) && !Sidekiq.server?
Gitlab::Cluster::LifecycleEvents.on_worker_start do
ActiveSupport.on_load(:active_record) do
ActiveRecord::Base.establish_connection
Rails.logger.debug("ActiveRecord connection established")
end
end
end
if defined?(ActiveRecord::Base)
Gitlab::Cluster::LifecycleEvents.on_before_fork do
# the following is highly recommended for Rails + "preload_app true"
# as there's no need for the master process to hold a connection
ActiveRecord::Base.connection.disconnect!
Rails.logger.debug("ActiveRecord connection disconnected")
end
end
# frozen_string_literal: true
if /darwin/ =~ RUBY_PLATFORM
Gitlab::Cluster::LifecycleEvents.on_before_fork do
require 'fiddle'
# Dynamically load Foundation.framework, ~implicitly~ initialising
# the Objective-C runtime before any forking happens in Unicorn
#
# From https://bugs.ruby-lang.org/issues/14009
Fiddle.dlopen '/System/Library/Frameworks/Foundation.framework/Foundation'
end
end
# frozen_string_literal: true
if ENV['ENABLE_RBTRACE']
Gitlab::Cluster::LifecycleEvents.on_worker_start do
# Unicorn clears out signals before it forks, so rbtrace won't work
# unless it is enabled after the fork.
require 'rbtrace'
end
end
......@@ -14,8 +14,6 @@ Sidekiq.default_worker_options = { retry: 3 }
enable_json_logs = Gitlab.config.sidekiq.log_format == 'json'
Sidekiq.configure_server do |config|
require 'rbtrace' if ENV['ENABLE_RBTRACE']
config.redis = queues_config_hash
config.server_middleware do |chain|
......
# frozen_string_literal: true
# -----------------------------------------------------------------------
# This file is used by the GDK to generate a default config/puma.rb file
# Note that `/home/git` will be substituted for the actual GDK root
# directory when this file is generated
# -----------------------------------------------------------------------
# Load "path" as a rackup file.
#
# The default is "config.ru".
#
rackup 'config.ru'
pidfile '/home/git/gitlab/tmp/pids/puma.pid'
state_path '/home/git/gitlab/tmp/pids/puma.state'
stdout_redirect '/home/git/gitlab/log/puma.stdout.log',
'/home/git/gitlab/log/puma.stderr.log',
true
# Configure "min" to be the minimum number of threads to use to answer
# requests and "max" the maximum.
#
# The default is "0, 16".
#
threads 1, 4
# By default, workers accept all requests and queue them to pass to handlers.
# When false, workers accept the number of simultaneous requests configured.
#
# Queueing requests generally improves performance, but can cause deadlocks if
# the app is waiting on a request to itself. See https://github.com/puma/puma/issues/612
#
# When set to false this may require a reverse proxy to handle slow clients and
# queue requests before they reach puma. This is due to disabling HTTP keepalive
queue_requests false
# Bind the server to "url". "tcp://", "unix://" and "ssl://" are the only
# accepted protocols.
bind 'unix:///home/git/gitlab.socket'
workers 2
require_relative "/home/git/gitlab/lib/gitlab/cluster/lifecycle_events"
require_relative "/home/git/gitlab/lib/gitlab/cluster/puma_worker_killer_initializer"
on_restart do
# Signal application hooks that we're about to restart
Gitlab::Cluster::LifecycleEvents.do_master_restart
end
before_fork do
# Signal to the puma killer
Gitlab::Cluster::PumaWorkerKillerInitializer.start @config.options unless ENV['DISABLE_PUMA_WORKER_KILLER']
# Signal application hooks that we're about to fork
Gitlab::Cluster::LifecycleEvents.do_before_fork
end
Gitlab::Cluster::LifecycleEvents.set_puma_options @config.options
on_worker_boot do
# Signal application hooks of worker start
Gitlab::Cluster::LifecycleEvents.do_worker_start
end
# Preload the application before starting the workers; this conflicts with
# phased restart feature. (off by default)
preload_app!
tag 'gitlab-puma-worker'
# Verifies that all workers have checked in to the master process within
# the given timeout. If not the worker process will be restarted. Default
# value is 60 seconds.
#
worker_timeout 60
......@@ -81,22 +81,16 @@ preload_app true
# fast LAN.
check_client_connection false
require_relative "/home/git/gitlab/lib/gitlab/cluster/lifecycle_events"
before_exec do |server|
# The following is necessary to ensure stale Prometheus metrics don't
# accumulate over time. It needs to be done in this hook as opposed to
# inside an init script to ensure metrics files aren't deleted after new
# unicorn workers start after a SIGUSR2 is received.
if ENV['prometheus_multiproc_dir']
old_metrics = Dir[File.join(ENV['prometheus_multiproc_dir'], '*.db')]
FileUtils.rm_rf(old_metrics)
end
# Signal application hooks that we're about to restart
Gitlab::Cluster::LifecycleEvents.do_master_restart
end
before_fork do |server, worker|
# the following is highly recommended for Rails + "preload_app true"
# as there's no need for the master process to hold a connection
defined?(ActiveRecord::Base) &&
ActiveRecord::Base.connection.disconnect!
# Signal application hooks that we're about to fork
Gitlab::Cluster::LifecycleEvents.do_before_fork
# The following is only recommended for memory/DB-constrained
# installations. It is not needed if your system can house
......@@ -124,25 +118,10 @@ before_fork do |server, worker|
end
after_fork do |server, worker|
# Unicorn clears out signals before it forks, so rbtrace won't work
# unless it is enabled after the fork.
require 'rbtrace' if ENV['ENABLE_RBTRACE']
# Signal application hooks of worker start
Gitlab::Cluster::LifecycleEvents.do_worker_start
# per-process listener ports for debugging/admin/migrations
# addr = "127.0.0.1:#{9293 + worker.nr}"
# server.listen(addr, :tries => -1, :delay => 5, :tcp_nopush => true)
# the following is *required* for Rails + "preload_app true",
defined?(ActiveRecord::Base) &&
ActiveRecord::Base.establish_connection
# reset prometheus client, this will cause any opened metrics files to be closed
defined?(::Prometheus::Client.reinitialize_on_pid_change) &&
Prometheus::Client.reinitialize_on_pid_change
# if preload_app is true, then you may also want to check and
# restart any other shared sockets/descriptors such as Memcached,
# and Redis. TokyoCabinet file handles are safe to reuse
# between any number of forked children (assuming your kernel
# correctly implements pread()/pwrite() system calls)
end
# frozen_string_literal: true
# -------------------------------------------------------------------------
# This file is used by the GDK to generate a default config/unicorn.rb file
# Note that `/home/git` will be substituted for the actual GDK root
# directory when this file is generated
# -------------------------------------------------------------------------
worker_processes 2
timeout 60
listen '/home/git/gitlab.socket'
preload_app true
check_client_connection false
require_relative "/home/git/gitlab/lib/gitlab/cluster/lifecycle_events"
before_exec do |server|
# Signal application hooks that we're about to restart
Gitlab::Cluster::LifecycleEvents.do_master_restart
end
before_fork do |server, worker|
# the following is highly recommended for Rails + "preload_app true"
# as there's no need for the master process to hold a connection
defined?(ActiveRecord::Base) &&
ActiveRecord::Base.connection.disconnect!
if /darwin/ =~ RUBY_PLATFORM
require 'fiddle'
# Dynamically load Foundation.framework, ~implicitly~ initialising
# the Objective-C runtime before any forking happens in Unicorn
#
# From https://bugs.ruby-lang.org/issues/14009
Fiddle.dlopen '/System/Library/Frameworks/Foundation.framework/Foundation'
# Signal application hooks that we're about to fork
Gitlab::Cluster::LifecycleEvents.do_before_fork
# The following is only recommended for memory/DB-constrained
# installations. It is not needed if your system can house
# twice as many worker_processes as you have configured.
#
# This allows a new master process to incrementally
# phase out the old master process with SIGTTOU to avoid a
# thundering herd (especially in the "preload_app false" case)
# when doing a transparent upgrade. The last worker spawned
# will then kill off the old master process with a SIGQUIT.
old_pid = "#{server.config[:pid]}.oldbin"
if old_pid != server.pid
begin
sig = (worker.nr + 1) >= server.worker_processes ? :QUIT : :TTOU
Process.kill(sig, File.read(old_pid).to_i)
rescue Errno::ENOENT, Errno::ESRCH
end
end
#
# Throttle the master from forking too quickly by sleeping. Due
# to the implementation of standard Unix signal handlers, this
# helps (but does not completely) prevent identical, repeated signals
# from being lost when the receiving process is busy.
# sleep 1
end
after_fork do |server, worker|
# Unicorn clears out signals before it forks, so rbtrace won't work
# unless it is enabled after the fork.
require 'rbtrace' if ENV['ENABLE_RBTRACE']
# Signal application hooks of worker start
Gitlab::Cluster::LifecycleEvents.do_worker_start
# the following is *required* for Rails + "preload_app true",
defined?(ActiveRecord::Base) &&
ActiveRecord::Base.establish_connection
# per-process listener ports for debugging/admin/migrations
# addr = "127.0.0.1:#{9293 + worker.nr}"
# server.listen(addr, :tries => -1, :delay => 5, :tcp_nopush => true)
end
This diff is collapsed.
# frozen_string_literal: true
module Gitlab
module Cluster
#
# LifecycleEvents lets Rails initializers register application startup hooks
# that are sensitive to forking. For example, to defer the creation of
# watchdog threads. This lets us abstract away the Unix process
# lifecycles of Unicorn, Sidekiq, Puma, Puma Cluster, etc.
#
# We have three lifecycle events.
#
# - before_fork (only in forking processes)
# - worker_start
# - before_master_restart (only in forking processes)
#
# Blocks will be executed in the order in which they are registered.
#
class LifecycleEvents
class << self
#
# Hook registration methods (called from initializers)
#
def on_worker_start(&block)
if in_clustered_environment?
# Defer block execution
(@worker_start_hooks ||= []) << block
else
yield
end
end
def on_before_fork(&block)
return unless in_clustered_environment?
# Defer block execution
(@before_fork_hooks ||= []) << block
end
def on_master_restart(&block)
return unless in_clustered_environment?
# Defer block execution
(@master_restart_hooks ||= []) << block
end
#
# Lifecycle integration methods (called from unicorn.rb, puma.rb, etc.)
#
def do_worker_start
@worker_start_hooks&.each do |block|
block.call
end
end
def do_before_fork
@before_fork_hooks&.each do |block|
block.call
end
end
def do_master_restart
@master_restart_hooks && @master_restart_hooks.each do |block|
block.call
end
end
# Puma doesn't use singletons (which is good) but
# this means we need to pass through whether the
# puma server is running in single mode or cluster mode
def set_puma_options(options)
@puma_options = options
end
private
def in_clustered_environment?
# Sidekiq doesn't fork
return false if Sidekiq.server?
# Unicorn always forks
return true if defined?(::Unicorn)
# Puma sometimes forks
return true if in_clustered_puma?
# Default assumption is that we don't fork
false
end
def in_clustered_puma?
return false unless defined?(::Puma)
@puma_options && @puma_options[:workers] && @puma_options[:workers] > 0
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Cluster
class PumaWorkerKillerInitializer
def self.start(puma_options, puma_per_worker_max_memory_mb: 650)
require 'puma_worker_killer'
PumaWorkerKiller.config do |config|
# Note! ram is expressed in megabytes (whereas GITLAB_UNICORN_MEMORY_MAX is in bytes)
# Importantly RAM is for _all_workers (ie, the cluster),
# not each worker as is the case with GITLAB_UNICORN_MEMORY_MAX
worker_count = puma_options[:workers] || 1
config.ram = worker_count * puma_per_worker_max_memory_mb
config.frequency = 20 # seconds
# We just want to limit to a fixed maximum, unrelated to the total amount
# of available RAM.
config.percent_usage = 0.98
# Ideally we'll never hit the maximum amount of memory. If so the worker
# is restarted already, thus periodically restarting workers shouldn't be
# needed.
config.rolling_restart_frequency = false
end
PumaWorkerKiller.start
end
end
end
end
# frozen_string_literal: true
app = proc do |env|
if env['REQUEST_METHOD'] == 'GET'
[200, {}, ["#{Process.pid}"]]
else
Process.kill(env['QUERY_STRING'], Process.pid)
[200, {}, ['Bye!']]
end
end
run app
# frozen_string_literal: true
# Note: this file is used for testing puma in `spec/rack_servers/puma_spec.rb` only
# Note: as per the convention in `config/puma.example.development.rb`,
# this file will replace `/home/git` with the actual working directory
directory '/home/git'
threads 1, 10
queue_requests false
pidfile '/home/git/gitlab/tmp/pids/puma.pid'
bind 'unix:///home/git/gitlab/tmp/tests/puma.socket'
workers 1
preload_app!
worker_timeout 60
require_relative "/home/git/gitlab/lib/gitlab/cluster/lifecycle_events"
require_relative "/home/git/gitlab/lib/gitlab/cluster/puma_worker_killer_initializer"
before_fork do
Gitlab::Cluster::PumaWorkerKillerInitializer.start @config.options
Gitlab::Cluster::LifecycleEvents.do_before_fork
end
Gitlab::Cluster::LifecycleEvents.set_puma_options @config.options
on_worker_boot do
Gitlab::Cluster::LifecycleEvents.do_worker_start
File.write('/home/git/gitlab/tmp/tests/puma-worker-ready', Process.pid)
end
on_restart do
Gitlab::Cluster::LifecycleEvents.do_master_restart
end
# frozen_string_literal: true
require 'fileutils'
require 'excon'
require 'spec_helper'
describe 'Puma' do
before(:all) do
project_root = File.expand_path('../..', __dir__)
config_lines = File.read('spec/rack_servers/configs/puma.rb')
.gsub('/home/git/gitlab', project_root)
.gsub('/home/git', project_root)
config_path = File.join(project_root, "tmp/tests/puma.rb")
@socket_path = File.join(project_root, 'tmp/tests/puma.socket')
File.write(config_path, config_lines)
cmd = %W[puma -e test -C #{config_path} #{File.join(__dir__, 'configs/config.ru')}]
@puma_master_pid = spawn(*cmd)
wait_puma_boot!(@puma_master_pid, File.join(project_root, 'tmp/tests/puma-worker-ready'))
WebMock.allow_net_connect!
end
%w[SIGQUIT SIGTERM SIGKILL].each do |signal|
it "has a worker that self-terminates on signal #{signal}" do
response = Excon.get('unix://', socket: @socket_path)
expect(response.status).to eq(200)
worker_pid = response.body.to_i
expect(worker_pid).to be > 0
begin
Excon.post("unix://?#{signal}", socket: @socket_path)
rescue Excon::Error::Socket
# The connection may be closed abruptly
end
expect(pid_gone?(worker_pid)).to eq(true)
end
end
after(:all) do
begin
WebMock.disable_net_connect!(allow_localhost: true)
Process.kill('TERM', @puma_master_pid)
rescue Errno::ESRCH
end
end
def wait_puma_boot!(master_pid, ready_file)
# We have seen the boot timeout after 2 minutes in CI so let's set it to 5 minutes.
timeout = 5 * 60
timeout.times do
return if File.exist?(ready_file)
pid = Process.waitpid(master_pid, Process::WNOHANG)
raise "puma failed to boot: #{$?}" unless pid.nil?
sleep 1
end
raise "puma boot timed out after #{timeout} seconds"
end
def pid_gone?(pid)
# Worker termination should take less than a second. That makes 10
# seconds a generous timeout.
10.times do
begin
Process.kill(0, pid)
rescue Errno::ESRCH
return true
end
sleep 1
end
false
end
end
# frozen_string_literal: true
require 'fileutils'
require 'excon'
......@@ -6,12 +8,16 @@ require 'spec_helper'
describe 'Unicorn' do
before(:all) do
config_lines = File.read('config/unicorn.rb.example').split("\n")
project_root = File.expand_path('../..', __dir__)
config_lines = File.read('config/unicorn.rb.example')
.gsub('/home/git/gitlab', project_root)
.gsub('/home/git', project_root)
.split("\n")
# Remove these because they make setup harder.
config_lines = config_lines.reject do |line|
%w[
working_directory
worker_processes
listen
pid
......@@ -26,33 +32,18 @@ describe 'Unicorn' do
# predictable which process will handle our requests.
config_lines << 'worker_processes 1'
@socket_path = File.join(Dir.pwd, 'tmp/tests/unicorn.socket')
@socket_path = File.join(project_root, 'tmp/tests/unicorn.socket')
config_lines << "listen '#{@socket_path}'"
ready_file = 'tmp/tests/unicorn-worker-ready'
ready_file = File.join(project_root, 'tmp/tests/unicorn-worker-ready')
FileUtils.rm_f(ready_file)
after_fork_index = config_lines.index { |l| l.start_with?('after_fork') }
config_lines.insert(after_fork_index + 1, "File.write('#{ready_file}', Process.pid)")
config_path = 'tmp/tests/unicorn.rb'
config_path = File.join(project_root, 'tmp/tests/unicorn.rb')
File.write(config_path, config_lines.join("\n") + "\n")
rackup_path = 'tmp/tests/config.ru'
File.write(rackup_path, <<~EOS)
app =
proc do |env|
if env['REQUEST_METHOD'] == 'GET'
[200, {}, [Process.pid]]
else
Process.kill(env['QUERY_STRING'], Process.pid)
[200, {}, ['Bye!']]
end
end
run app
EOS
cmd = %W[unicorn -E test -c #{config_path} #{rackup_path}]
cmd = %W[unicorn -E test -c #{config_path} spec/rack_servers/configs/config.ru]
@unicorn_master_pid = spawn(*cmd)
wait_unicorn_boot!(@unicorn_master_pid, ready_file)
WebMock.allow_net_connect!
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment