Add unicorn/puma health checks

These checks do validate amount of workers available to process requests. It allows the readiness to indicate a time when the requests can be safelly send to the web server.

Add unicorn/puma health checks
These checks do validate amount of workers available to process requests. It allows the readiness to indicate a time when the requests can be safelly send to the web server.
03cba0fe · Kamil Trzciński · 86bb8213 · 03cba0fe · 03cba0fe · 03cba0fe
Commit 03cba0fe authored Oct 15, 2019 by Kamil Trzciński
10 changed files
--- a/lib/gitlab/health_checks/gitaly_check.rb
+++ b/lib/gitlab/health_checks/gitaly_check.rb
@@ -5,7 +5,7 @@ module Gitlab
    class GitalyCheck
      extend BaseAbstractCheck
-      METRIC_PREFIX = 'gitaly_health_check'
+      METRIC_PREFIX = 'gitaly_health_check'.freeze
      class << self
        def readiness

--- a/lib/gitlab/health_checks/probes/readiness.rb
+++ b/lib/gitlab/health_checks/probes/readiness.rb
@@ -6,10 +6,10 @@ module Gitlab
      class Readiness
        attr_reader :checks
-        # This accepts an array of Proc
+        # This accepts an array of objects implementing `:readiness`
        # that returns `::Gitlab::HealthChecks::Result`
        def initialize(*additional_checks)
-          @checks = ::Gitlab::HealthChecks::CHECKS.map { |check| check.method(:readiness) }
+          @checks = ::Gitlab::HealthChecks::CHECKS
          @checks += additional_checks
        end
@@ -43,7 +43,7 @@ module Gitlab
        def probe_readiness
          checks
-            .flat_map(&:call)
+            .flat_map(&:readiness)
            .compact
            .group_by(&:name)
        end

--- a/lib/gitlab/health_checks/puma_check.rb
+++ b/lib/gitlab/health_checks/puma_check.rb
+# frozen_string_literal: true
+module Gitlab
+  module HealthChecks
+    # This check can only be run on Puma `master` process
+    class PumaCheck
+      extend SimpleAbstractCheck
+      class << self
+        private
+        def metric_prefix
+          'puma_check'
+        end
+        def successful?(result)
+          result > 0
+        end
+        def check
+          return unless defined?(::Puma)
+          stats = Puma.stats
+          stats = JSON.parse(stats)
+          # If `workers` is missing this means that
+          # Puma server is running in single mode
+          stats.fetch('workers', 1)
+        rescue NoMethodError
+          # server is not ready
+          0
+        end
+      end
+    end
+  end
+end
--- a/lib/gitlab/health_checks/simple_abstract_check.rb
+++ b/lib/gitlab/health_checks/simple_abstract_check.rb
@@ -7,6 +7,8 @@ module Gitlab
      def readiness
        check_result = check
+        return if check_result.nil?
        if successful?(check_result)
          HealthChecks::Result.new(name, true)
        elsif check_result.is_a?(Timeout::Error)
@@ -20,6 +22,8 @@ module Gitlab
      def metrics
        result, elapsed = with_timing(&method(:check))
+        return if result.nil?
        Rails.logger.error("#{human_name} check returned unexpected result #{result}") unless successful?(result) # rubocop:disable Gitlab/RailsLogger
        [
          metric("#{metric_prefix}_timeout", result.is_a?(Timeout::Error) ? 1 : 0),

--- a/lib/gitlab/health_checks/unicorn_check.rb
+++ b/lib/gitlab/health_checks/unicorn_check.rb
+# frozen_string_literal: true
+module Gitlab
+  module HealthChecks
+    # This check can only be run on Unicorn `master` process
+    class UnicornCheck
+      extend SimpleAbstractCheck
+      class << self
+        include Gitlab::Utils::StrongMemoize
+        private
+        def metric_prefix
+          'unicorn_check'
+        end
+        def successful?(result)
+          result > 0
+        end
+        def check
+          return unless http_servers
+          http_servers.sum(&:worker_processes) # rubocop: disable CodeReuse/ActiveRecord
+        end
+        # Traversal of ObjectSpace is expensive, on fully loaded application
+        # it takes around 80ms. The instances of HttpServers are not a subject
+        # to change so we can cache the list of servers.
+        def http_servers
+          strong_memoize(:http_servers) do
+            next unless defined?(::Unicorn::HttpServer)
+            ObjectSpace.each_object(::Unicorn::HttpServer).to_a
+          end
+        end
+      end
+    end
+  end
+end
--- a/lib/gitlab/metrics/exporter/base_exporter.rb
+++ b/lib/gitlab/metrics/exporter/base_exporter.rb
@@ -6,6 +6,8 @@ module Gitlab
      class BaseExporter < Daemon
        attr_reader :server
+        attr_accessor :additional_checks
        def enabled?
          settings.enabled
        end
@@ -32,12 +34,10 @@ module Gitlab
            Port: settings.port, BindAddress: settings.address,
            Logger: logger, AccessLog: access_log)
          server.mount_proc '/readiness' do |req, res|
-            render_probe(
+            render_probe(readiness_probe, req, res)
-              ::Gitlab::HealthChecks::Probes::Readiness.new, req, res)
          end
          server.mount_proc '/liveness' do |req, res|
-            render_probe(
+            render_probe(liveness_probe, req, res)
-              ::Gitlab::HealthChecks::Probes::Liveness.new, req, res)
          end
          server.mount '/', Rack::Handler::WEBrick, rack_app
          server.start
@@ -45,8 +45,10 @@ module Gitlab
        def stop_working
          if server
+            # we close sockets if thread is not longer running
+            # this happens, when the process forks
+            server.listeners.each(&:close) unless thread.alive?
            server.shutdown
-            server.listeners.each(&:close)
          end
          @server = nil
@@ -60,6 +62,14 @@ module Gitlab
          end
        end
+        def readiness_probe
+          ::Gitlab::HealthChecks::Probes::Readiness.new(*additional_checks)
+        end
+        def liveness_probe
+          ::Gitlab::HealthChecks::Probes::Liveness.new
+        end
        def render_probe(probe, req, res)
          result = probe.execute

--- a/lib/gitlab/metrics/exporter/web_exporter.rb
+++ b/lib/gitlab/metrics/exporter/web_exporter.rb
@@ -7,6 +7,16 @@ module Gitlab
  module Metrics
    module Exporter
      class WebExporter < BaseExporter
+        # This exporter is always run on master process
+        def initialize
+          super
+          self.additional_checks = [
+            Gitlab::HealthChecks::PumaCheck,
+            Gitlab::HealthChecks::UnicornCheck
+          ]
+        end
        def settings
          Settings.monitoring.web_exporter
        end

--- a/spec/lib/gitlab/health_checks/puma_check_spec.rb
+++ b/spec/lib/gitlab/health_checks/puma_check_spec.rb
+require 'spec_helper'
+describe Gitlab::HealthChecks::PumaCheck do
+  let(:result_class) { Gitlab::HealthChecks::Result }
+  let(:readiness) { described_class.readiness }
+  let(:metrics) { described_class.metrics }
+  shared_examples 'with state' do |(state, message)|
+    it "does provide readiness" do
+      expect(readiness).to eq(result_class.new('puma_check', state, message))
+    end
+    it "does provide metrics" do
+      expect(metrics).to include(
+        an_object_having_attributes(name: 'puma_check_success', value: state ? 1 : 0))
+      expect(metrics).to include(
+        an_object_having_attributes(name: 'puma_check_latency_seconds', value: be >= 0))
+    end
+  end
+  context 'when Puma is not loaded' do
+    before do
+      hide_const('Puma')
+    end
+    it "does not provide readiness and metrics" do
+      expect(readiness).to be_nil
+      expect(metrics).to be_nil
+    end
+  end
+  context 'when Puma is loaded' do
+    before do
+      stub_const('Puma', Module.new)
+    end
+    context 'when stats are missing' do
+      before do
+        expect(Puma).to receive(:stats).and_raise(NoMethodError)
+      end
+      it_behaves_like 'with state', [false, 'unexpected Puma check result: 0']
+    end
+    context 'for Single mode' do
+      before do
+        expect(Puma).to receive(:stats) do
+          '{}'
+        end
+      end
+      it_behaves_like 'with state', true
+    end
+    context 'for Cluster mode' do
+      before do
+        expect(Puma).to receive(:stats) do
+          '{"workers":2}'
+        end
+      end
+      it_behaves_like 'with state', true
+    end
+  end
+end
--- a/spec/lib/gitlab/health_checks/unicorn_check_spec.rb
+++ b/spec/lib/gitlab/health_checks/unicorn_check_spec.rb
+require 'spec_helper'
+describe Gitlab::HealthChecks::UnicornCheck do
+  let(:result_class) { Gitlab::HealthChecks::Result }
+  let(:readiness) { described_class.readiness }
+  let(:metrics) { described_class.metrics }
+  before do
+    described_class.clear_memoization(:http_servers)
+  end
+  shared_examples 'with state' do |(state, message)|
+    it "does provide readiness" do
+      expect(readiness).to eq(result_class.new('unicorn_check', state, message))
+    end
+    it "does provide metrics" do
+      expect(metrics).to include(
+        an_object_having_attributes(name: 'unicorn_check_success', value: state ? 1 : 0))
+      expect(metrics).to include(
+        an_object_having_attributes(name: 'unicorn_check_latency_seconds', value: be >= 0))
+    end
+  end
+  context 'when Unicorn is not loaded' do
+    before do
+      hide_const('Unicorn')
+    end
+    it "does not provide readiness and metrics" do
+      expect(readiness).to be_nil
+      expect(metrics).to be_nil
+    end
+  end
+  context 'when Unicorn is loaded' do
+    let(:http_server_class) { Struct.new(:worker_processes) }
+    before do
+      stub_const('Unicorn::HttpServer', http_server_class)
+    end
+    context 'when no servers are running' do
+      it_behaves_like 'with state', [false, 'unexpected Unicorn check result: 0']
+    end
+    context 'when servers without workers are running' do
+      before do
+        http_server_class.new(0)
+      end
+      it_behaves_like 'with state', [false, 'unexpected Unicorn check result: 0']
+    end
+    context 'when servers with workers are running' do
+      before do
+        http_server_class.new(1)
+      end
+      it_behaves_like 'with state', true
+    end
+  end
+end
--- a/spec/lib/gitlab/metrics/exporter/base_exporter_spec.rb
+++ b/spec/lib/gitlab/metrics/exporter/base_exporter_spec.rb
@@ -64,6 +64,18 @@ describe Gitlab::Metrics::Exporter::BaseExporter do
            exporter.start.join
          end
        end
+        describe 'when thread is not alive' do
+          it 'does close listeners' do
+            expect_any_instance_of(::WEBrick::HTTPServer).to receive(:start)
+            expect_any_instance_of(::WEBrick::HTTPServer).to receive(:listeners)
+              .and_call_original
+            expect { exporter.start.join }.to change { exporter.thread? }.from(false).to(true)
+            exporter.stop
+          end
+        end
      end
      describe '#stop' do