Added rack-timeout for Puma

It assures that requests are aborted after 60 seconds, otherwise an exception is raised. This exception is logged by Sentry, also there is a Prometheus counter for measuring number of requests in each state.

Added rack-timeout for Puma
It assures that requests are aborted after 60 seconds, otherwise an exception is raised. This exception is logged by Sentry, also there is a Prometheus counter for measuring number of requests in each state.
6d6bae66 · Jan Provaznik · Mayra Cabrera · 70d1537d · 6d6bae66 · 6d6bae66
Commit 6d6bae66 authored May 29, 2019 by Jan Provaznik Committed by Mayra Cabrera May 29, 2019
6 changed files
--- a/Gemfile
+++ b/Gemfile
@@ -154,6 +154,7 @@ end
 group :puma do
  gem 'puma', '~> 3.12', require: false
  gem 'puma_worker_killer', require: false
+  gem 'rack-timeout', require: false
 end
 # State machine

--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -680,6 +680,7 @@ GEM
      rack
    rack-test (1.1.0)
      rack (>= 1.0, < 3)
+    rack-timeout (0.5.1)
    rails (5.1.7)
      actioncable (= 5.1.7)
      actionmailer (= 5.1.7)
@@ -1174,6 +1175,7 @@ DEPENDENCIES
  rack-cors (~> 1.0.0)
  rack-oauth2 (~> 1.9.3)
  rack-proxy (~> 0.6.0)
+  rack-timeout
  rails (= 5.1.7)
  rails-controller-testing
  rails-i18n (~> 5.1)

--- a/config/initializers/rack_timeout.rb
+++ b/config/initializers/rack_timeout.rb
+# frozen_string_literal: true
+# Unicorn terminates any request which runs longer than 60 seconds.
+# Puma doesn't have any timeout mechanism for terminating long-running
+# requests, to make sure that server is not paralyzed by long-running
+# or stuck queries, we add a request timeout which terminates the
+# request after 60 seconds. This may be dangerous in some situations
+# (https://github.com/heroku/rack-timeout/blob/master/doc/exceptions.md)
+# and it's used only as the last resort. In such case this termination is
+# logged and we should fix the potential timeout issue in the code itself.
+if defined?(::Puma) && !Rails.env.test?
+  require 'rack/timeout/base'
+  Gitlab::Application.configure do |config|
+    config.middleware.insert_before(Rack::Runtime, Rack::Timeout,
+                                    service_timeout: 60,
+                                    wait_timeout: 90)
+  end
+  observer = Gitlab::RackTimeoutObserver.new
+  Rack::Timeout.register_state_change_observer(:gitlab_rack_timeout, &observer.callback)
+end
--- a/doc/administration/monitoring/prometheus/gitlab_metrics.md
+++ b/doc/administration/monitoring/prometheus/gitlab_metrics.md
@@ -109,17 +109,17 @@ When Puma is used instead of Unicorn, following metrics are available:
 | Metric                                       | Type    | Since | Description |
 |:-------------------------------------------- |:------- |:----- |:----------- |
-| puma_workers                                 | Gauge   | 12.0 | Total number of workers |
+| puma_workers                                 | Gauge   | 12.0  | Total number of workers |
-| puma_running_workers                         | Gauge   | 12.0 | Number of booted workers |
+| puma_running_workers                         | Gauge   | 12.0  | Number of booted workers |
-| puma_stale_workers                           | Gauge   | 12.0 | Number of old workers |
+| puma_stale_workers                           | Gauge   | 12.0  | Number of old workers |
-| puma_phase                                   | Gauge   | 12.0 | Phase number (increased during phased restarts) |
+| puma_phase                                   | Gauge   | 12.0  | Phase number (increased during phased restarts) |
-| puma_running                                 | Gauge   | 12.0 | Number of running threads |
+| puma_running                                 | Gauge   | 12.0  | Number of running threads |
-| puma_queued_connections                      | Gauge   | 12.0 | Number of connections in that worker's "todo" set waiting for a worker thread |
+| puma_queued_connections                      | Gauge   | 12.0  | Number of connections in that worker's "todo" set waiting for a worker thread |
-| puma_active_connections                      | Gauge   | 12.0 | Number of threads processing a request |
+| puma_active_connections                      | Gauge   | 12.0  | Number of threads processing a request |
-| puma_pool_capacity                           | Gauge   | 12.0 | Number of requests the worker is capable of taking right now |
+| puma_pool_capacity                           | Gauge   | 12.0  | Number of requests the worker is capable of taking right now |
-| puma_max_threads                             | Gauge   | 12.0 | Maximum number of worker threads |
+| puma_max_threads                             | Gauge   | 12.0  | Maximum number of worker threads |
-| puma_idle_threads                            | Gauge   | 12.0 | Number of spawned threads which are not processing a request |
+| puma_idle_threads                            | Gauge   | 12.0  | Number of spawned threads which are not processing a request |
+| rack_state_total                             | Gauge   | 12.0  | Number of requests in a given rack state |
 ## Metrics shared directory

--- a/lib/gitlab/rack_timeout_observer.rb
+++ b/lib/gitlab/rack_timeout_observer.rb
+# frozen_string_literal: true
+module Gitlab
+  class RackTimeoutObserver
+    def initialize
+      @counter = Gitlab::Metrics.counter(:rack_state_total, 'Number of requests in a given rack state')
+    end
+    # returns the Proc to be used as the observer callback block
+    def callback
+      method(:log_timeout_exception)
+    end
+    private
+    def log_timeout_exception(env)
+      info = env[::Rack::Timeout::ENV_INFO_KEY]
+      return unless info
+      @counter.increment(labels(info, env))
+    end
+    def labels(info, env)
+      params = controller_params(env) || grape_params(env) || {}
+      {
+        controller: params['controller'],
+        action: params['action'],
+        route: params['route'],
+        state: info.state
+      }
+    end
+    def controller_params(env)
+      env['action_dispatch.request.parameters']
+    end
+    def grape_params(env)
+      endpoint = env[Grape::Env::API_ENDPOINT]
+      route = endpoint&.route&.pattern&.origin
+      return unless route
+      { 'route' => route }
+    end
+  end
+end
--- a/spec/lib/gitlab/rack_timeout_observer_spec.rb
+++ b/spec/lib/gitlab/rack_timeout_observer_spec.rb
+# frozen_string_literal: true
+require 'spec_helper'
+describe Gitlab::RackTimeoutObserver do
+  let(:counter) { Gitlab::Metrics::NullMetric.instance }
+  before do
+    allow(Gitlab::Metrics).to receive(:counter)
+      .with(any_args)
+      .and_return(counter)
+  end
+  describe '#callback' do
+    context 'when request times out' do
+      let(:env) do
+        {
+          ::Rack::Timeout::ENV_INFO_KEY => double(state: :timed_out),
+          'action_dispatch.request.parameters' => {
+            'controller' => 'foo',
+            'action' => 'bar'
+          }
+        }
+      end
+      subject { described_class.new }
+      it 'increments timeout counter' do
+        expect(counter)
+          .to receive(:increment)
+          .with({ controller: 'foo', action: 'bar', route: nil, state: :timed_out })
+        subject.callback.call(env)
+      end
+    end
+    context 'when request expires' do
+      let(:endpoint) { double }
+      let(:env) do
+        {
+          ::Rack::Timeout::ENV_INFO_KEY => double(state: :expired),
+          Grape::Env::API_ENDPOINT => endpoint
+        }
+      end
+      subject { described_class.new }
+      it 'increments timeout counter' do
+        allow(endpoint).to receive_message_chain('route.pattern.origin') { 'foobar' }
+        expect(counter)
+          .to receive(:increment)
+          .with({ controller: nil, action: nil, route: 'foobar', state: :expired })
+        subject.callback.call(env)
+      end
+    end
+  end
+end