Commit be08202b authored by Douwe Maan's avatar Douwe Maan

Merge branch 'bjk/metric_names' into 'master'

Update metric names to match Prometheus guidelines.

Closes #35031

See merge request !12812
parents 0f393724 2d0741e5
...@@ -48,7 +48,7 @@ class SessionsController < Devise::SessionsController ...@@ -48,7 +48,7 @@ class SessionsController < Devise::SessionsController
private private
def login_counter def login_counter
@login_counter ||= Gitlab::Metrics.counter(:user_session_logins, 'User sign in count') @login_counter ||= Gitlab::Metrics.counter(:user_session_logins_total, 'User sign in count')
end end
# Handle an "initial setup" state, where there's only one user, it's an admin, # Handle an "initial setup" state, where there's only one user, it's an admin,
......
...@@ -123,7 +123,7 @@ Gitlab::Metrics::UnicornSampler.initialize_instance(Settings.monitoring.unicorn_ ...@@ -123,7 +123,7 @@ Gitlab::Metrics::UnicornSampler.initialize_instance(Settings.monitoring.unicorn_
Gitlab::Application.configure do |config| Gitlab::Application.configure do |config|
# 0 should be Sentry to catch errors in this middleware # 0 should be Sentry to catch errors in this middleware
config.middleware.insert(1, Gitlab::Metrics::ConnectionRackMiddleware) config.middleware.insert(1, Gitlab::Metrics::RequestsRackMiddleware)
end end
if Gitlab::Metrics.enabled? if Gitlab::Metrics.enabled?
......
...@@ -24,20 +24,23 @@ Currently the embedded Prometheus server is not automatically configured to coll ...@@ -24,20 +24,23 @@ Currently the embedded Prometheus server is not automatically configured to coll
In this experimental phase, only a few metrics are available: In this experimental phase, only a few metrics are available:
| Metric | Type | Description | | Metric | Type | Description |
| ------ | ---- | ----------- | | --------------------------------- | --------- | ----------- |
| db_ping_timeout | Gauge | Whether or not the last database ping timed out | | db_ping_timeout | Gauge | Whether or not the last database ping timed out |
| db_ping_success | Gauge | Whether or not the last database ping succeeded | | db_ping_success | Gauge | Whether or not the last database ping succeeded |
| db_ping_latency | Gauge | Round trip time of the database ping | | db_ping_latency_seconds | Gauge | Round trip time of the database ping |
| filesystem_access_latency_seconds | Gauge | Latency in accessing a specific filesystem |
| filesystem_accessible | Gauge | Whether or not a specific filesystem is accessible |
| filesystem_write_latency_seconds | Gauge | Write latency of a specific filesystem |
| filesystem_writable | Gauge | Whether or not the filesystem is writable |
| filesystem_read_latency_seconds | Gauge | Read latency of a specific filesystem |
| filesystem_readable | Gauge | Whether or not the filesystem is readable |
| http_requests_total | Counter | Rack request count |
| http_request_duration_seconds | Histogram | HTTP response time from rack middleware |
| rack_uncaught_errors_total | Counter | Rack connections handling uncaught errors count |
| redis_ping_timeout | Gauge | Whether or not the last redis ping timed out | | redis_ping_timeout | Gauge | Whether or not the last redis ping timed out |
| redis_ping_success | Gauge | Whether or not the last redis ping succeeded | | redis_ping_success | Gauge | Whether or not the last redis ping succeeded |
| redis_ping_latency | Gauge | Round trip time of the redis ping | | redis_ping_latency_seconds | Gauge | Round trip time of the redis ping |
| filesystem_access_latency | gauge | Latency in accessing a specific filesystem | | user_session_logins_total | Counter | Counter of how many users have logged in |
| filesystem_accessible | gauge | Whether or not a specific filesystem is accessible |
| filesystem_write_latency | gauge | Write latency of a specific filesystem |
| filesystem_writable | gauge | Whether or not the filesystem is writable |
| filesystem_read_latency | gauge | Read latency of a specific filesystem |
| filesystem_readable | gauge | Whether or not the filesystem is readable |
| user_sessions_logins | Counter | Counter of how many users have logged in |
[← Back to the main Prometheus page](index.md) [← Back to the main Prometheus page](index.md)
......
...@@ -35,9 +35,9 @@ module Gitlab ...@@ -35,9 +35,9 @@ module Gitlab
repository_storages.flat_map do |storage_name| repository_storages.flat_map do |storage_name|
tmp_file_path = tmp_file_path(storage_name) tmp_file_path = tmp_file_path(storage_name)
[ [
operation_metrics(:filesystem_accessible, :filesystem_access_latency, -> { storage_stat_test(storage_name) }, shard: storage_name), operation_metrics(:filesystem_accessible, :filesystem_access_latency_seconds, -> { storage_stat_test(storage_name) }, shard: storage_name),
operation_metrics(:filesystem_writable, :filesystem_write_latency, -> { storage_write_test(tmp_file_path) }, shard: storage_name), operation_metrics(:filesystem_writable, :filesystem_write_latency_seconds, -> { storage_write_test(tmp_file_path) }, shard: storage_name),
operation_metrics(:filesystem_readable, :filesystem_read_latency, -> { storage_read_test(tmp_file_path) }, shard: storage_name) operation_metrics(:filesystem_readable, :filesystem_read_latency_seconds, -> { storage_read_test(tmp_file_path) }, shard: storage_name)
].flatten ].flatten
end end
end end
......
...@@ -20,7 +20,7 @@ module Gitlab ...@@ -20,7 +20,7 @@ module Gitlab
[ [
metric("#{metric_prefix}_timeout", result.is_a?(Timeout::Error) ? 1 : 0), metric("#{metric_prefix}_timeout", result.is_a?(Timeout::Error) ? 1 : 0),
metric("#{metric_prefix}_success", is_successful?(result) ? 1 : 0), metric("#{metric_prefix}_success", is_successful?(result) ? 1 : 0),
metric("#{metric_prefix}_latency", elapsed) metric("#{metric_prefix}_latency_seconds", elapsed)
] ]
end end
end end
......
module Gitlab module Gitlab
module Metrics module Metrics
class ConnectionRackMiddleware class RequestsRackMiddleware
def initialize(app) def initialize(app)
@app = app @app = app
end end
def self.rack_request_count def self.http_request_total
@rack_request_count ||= Gitlab::Metrics.counter(:rack_request, 'Rack request count') @http_request_total ||= Gitlab::Metrics.counter(:http_requests_total, 'Request count')
end
def self.rack_response_count
@rack_response_count ||= Gitlab::Metrics.counter(:rack_response, 'Rack response count')
end end
def self.rack_uncaught_errors_count def self.rack_uncaught_errors_count
@rack_uncaught_errors_count ||= Gitlab::Metrics.counter(:rack_uncaught_errors, 'Rack connections handling uncaught errors count') @rack_uncaught_errors_count ||= Gitlab::Metrics.counter(:rack_uncaught_errors_total, 'Request handling uncaught errors count')
end end
def self.rack_execution_time def self.http_request_duration_seconds
@rack_execution_time ||= Gitlab::Metrics.histogram(:rack_execution_time, 'Rack connection handling execution time', @http_request_duration_seconds ||= Gitlab::Metrics.histogram(:http_request_duration_seconds, 'Request handling execution time',
{}, [0.05, 0.1, 0.25, 0.5, 0.7, 1, 1.5, 2, 2.5, 3, 5, 7, 10]) {}, [0.05, 0.1, 0.25, 0.5, 0.7, 1, 2.5, 5, 10, 25])
end end
def call(env) def call(env)
method = env['REQUEST_METHOD'].downcase method = env['REQUEST_METHOD'].downcase
started = Time.now.to_f started = Time.now.to_f
begin begin
ConnectionRackMiddleware.rack_request_count.increment(method: method) RequestsRackMiddleware.http_request_total.increment(method: method)
status, headers, body = @app.call(env) status, headers, body = @app.call(env)
ConnectionRackMiddleware.rack_response_count.increment(method: method, status: status) elapsed = Time.now.to_f - started
RequestsRackMiddleware.http_request_duration_seconds.observe({ method: method, status: status }, elapsed)
[status, headers, body] [status, headers, body]
rescue rescue
ConnectionRackMiddleware.rack_uncaught_errors_count.increment RequestsRackMiddleware.rack_uncaught_errors_count.increment
raise raise
ensure
elapsed = Time.now.to_f - started
ConnectionRackMiddleware.rack_execution_time.observe({}, elapsed)
end end
end end
end end
......
...@@ -24,7 +24,7 @@ describe MetricsController do ...@@ -24,7 +24,7 @@ describe MetricsController do
expect(response.body).to match(/^db_ping_timeout 0$/) expect(response.body).to match(/^db_ping_timeout 0$/)
expect(response.body).to match(/^db_ping_success 1$/) expect(response.body).to match(/^db_ping_success 1$/)
expect(response.body).to match(/^db_ping_latency [0-9\.]+$/) expect(response.body).to match(/^db_ping_latency_seconds [0-9\.]+$/)
end end
it 'returns Redis ping metrics' do it 'returns Redis ping metrics' do
...@@ -32,7 +32,7 @@ describe MetricsController do ...@@ -32,7 +32,7 @@ describe MetricsController do
expect(response.body).to match(/^redis_ping_timeout 0$/) expect(response.body).to match(/^redis_ping_timeout 0$/)
expect(response.body).to match(/^redis_ping_success 1$/) expect(response.body).to match(/^redis_ping_success 1$/)
expect(response.body).to match(/^redis_ping_latency [0-9\.]+$/) expect(response.body).to match(/^redis_ping_latency_seconds [0-9\.]+$/)
end end
it 'returns Caching ping metrics' do it 'returns Caching ping metrics' do
...@@ -40,7 +40,7 @@ describe MetricsController do ...@@ -40,7 +40,7 @@ describe MetricsController do
expect(response.body).to match(/^redis_cache_ping_timeout 0$/) expect(response.body).to match(/^redis_cache_ping_timeout 0$/)
expect(response.body).to match(/^redis_cache_ping_success 1$/) expect(response.body).to match(/^redis_cache_ping_success 1$/)
expect(response.body).to match(/^redis_cache_ping_latency [0-9\.]+$/) expect(response.body).to match(/^redis_cache_ping_latency_seconds [0-9\.]+$/)
end end
it 'returns Queues ping metrics' do it 'returns Queues ping metrics' do
...@@ -48,7 +48,7 @@ describe MetricsController do ...@@ -48,7 +48,7 @@ describe MetricsController do
expect(response.body).to match(/^redis_queues_ping_timeout 0$/) expect(response.body).to match(/^redis_queues_ping_timeout 0$/)
expect(response.body).to match(/^redis_queues_ping_success 1$/) expect(response.body).to match(/^redis_queues_ping_success 1$/)
expect(response.body).to match(/^redis_queues_ping_latency [0-9\.]+$/) expect(response.body).to match(/^redis_queues_ping_latency_seconds [0-9\.]+$/)
end end
it 'returns SharedState ping metrics' do it 'returns SharedState ping metrics' do
...@@ -56,17 +56,17 @@ describe MetricsController do ...@@ -56,17 +56,17 @@ describe MetricsController do
expect(response.body).to match(/^redis_shared_state_ping_timeout 0$/) expect(response.body).to match(/^redis_shared_state_ping_timeout 0$/)
expect(response.body).to match(/^redis_shared_state_ping_success 1$/) expect(response.body).to match(/^redis_shared_state_ping_success 1$/)
expect(response.body).to match(/^redis_shared_state_ping_latency [0-9\.]+$/) expect(response.body).to match(/^redis_shared_state_ping_latency_seconds [0-9\.]+$/)
end end
it 'returns file system check metrics' do it 'returns file system check metrics' do
get :index get :index
expect(response.body).to match(/^filesystem_access_latency{shard="default"} [0-9\.]+$/) expect(response.body).to match(/^filesystem_access_latency_seconds{shard="default"} [0-9\.]+$/)
expect(response.body).to match(/^filesystem_accessible{shard="default"} 1$/) expect(response.body).to match(/^filesystem_accessible{shard="default"} 1$/)
expect(response.body).to match(/^filesystem_write_latency{shard="default"} [0-9\.]+$/) expect(response.body).to match(/^filesystem_write_latency_seconds{shard="default"} [0-9\.]+$/)
expect(response.body).to match(/^filesystem_writable{shard="default"} 1$/) expect(response.body).to match(/^filesystem_writable{shard="default"} 1$/)
expect(response.body).to match(/^filesystem_read_latency{shard="default"} [0-9\.]+$/) expect(response.body).to match(/^filesystem_read_latency_seconds{shard="default"} [0-9\.]+$/)
expect(response.body).to match(/^filesystem_readable{shard="default"} 1$/) expect(response.body).to match(/^filesystem_readable{shard="default"} 1$/)
end end
......
...@@ -109,9 +109,9 @@ describe Gitlab::HealthChecks::FsShardsCheck do ...@@ -109,9 +109,9 @@ describe Gitlab::HealthChecks::FsShardsCheck do
expect(subject).to include(an_object_having_attributes(name: :filesystem_readable, value: 0)) expect(subject).to include(an_object_having_attributes(name: :filesystem_readable, value: 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_writable, value: 0)) expect(subject).to include(an_object_having_attributes(name: :filesystem_writable, value: 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_access_latency, value: be >= 0)) expect(subject).to include(an_object_having_attributes(name: :filesystem_access_latency_seconds, value: be >= 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_read_latency, value: be >= 0)) expect(subject).to include(an_object_having_attributes(name: :filesystem_read_latency_seconds, value: be >= 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_write_latency, value: be >= 0)) expect(subject).to include(an_object_having_attributes(name: :filesystem_write_latency_seconds, value: be >= 0))
end end
end end
...@@ -127,9 +127,9 @@ describe Gitlab::HealthChecks::FsShardsCheck do ...@@ -127,9 +127,9 @@ describe Gitlab::HealthChecks::FsShardsCheck do
expect(subject).to include(an_object_having_attributes(name: :filesystem_readable, value: 1)) expect(subject).to include(an_object_having_attributes(name: :filesystem_readable, value: 1))
expect(subject).to include(an_object_having_attributes(name: :filesystem_writable, value: 1)) expect(subject).to include(an_object_having_attributes(name: :filesystem_writable, value: 1))
expect(subject).to include(an_object_having_attributes(name: :filesystem_access_latency, value: be >= 0)) expect(subject).to include(an_object_having_attributes(name: :filesystem_access_latency_seconds, value: be >= 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_read_latency, value: be >= 0)) expect(subject).to include(an_object_having_attributes(name: :filesystem_read_latency_seconds, value: be >= 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_write_latency, value: be >= 0)) expect(subject).to include(an_object_having_attributes(name: :filesystem_write_latency_seconds, value: be >= 0))
end end
end end
end end
...@@ -159,9 +159,9 @@ describe Gitlab::HealthChecks::FsShardsCheck do ...@@ -159,9 +159,9 @@ describe Gitlab::HealthChecks::FsShardsCheck do
expect(subject).to include(an_object_having_attributes(name: :filesystem_readable, value: 0)) expect(subject).to include(an_object_having_attributes(name: :filesystem_readable, value: 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_writable, value: 0)) expect(subject).to include(an_object_having_attributes(name: :filesystem_writable, value: 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_access_latency, value: be >= 0)) expect(subject).to include(an_object_having_attributes(name: :filesystem_access_latency_seconds, value: be >= 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_read_latency, value: be >= 0)) expect(subject).to include(an_object_having_attributes(name: :filesystem_read_latency_seconds, value: be >= 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_write_latency, value: be >= 0)) expect(subject).to include(an_object_having_attributes(name: :filesystem_write_latency_seconds, value: be >= 0))
end end
end end
end end
......
...@@ -8,7 +8,7 @@ shared_context 'simple_check' do |metrics_prefix, check_name, success_result| ...@@ -8,7 +8,7 @@ shared_context 'simple_check' do |metrics_prefix, check_name, success_result|
it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_success", value: 1)) } it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_success", value: 1)) }
it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_timeout", value: 0)) } it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_timeout", value: 0)) }
it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_latency", value: be >= 0)) } it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_latency_seconds", value: be >= 0)) }
end end
context 'Check is misbehaving' do context 'Check is misbehaving' do
...@@ -18,7 +18,7 @@ shared_context 'simple_check' do |metrics_prefix, check_name, success_result| ...@@ -18,7 +18,7 @@ shared_context 'simple_check' do |metrics_prefix, check_name, success_result|
it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_success", value: 0)) } it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_success", value: 0)) }
it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_timeout", value: 0)) } it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_timeout", value: 0)) }
it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_latency", value: be >= 0)) } it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_latency_seconds", value: be >= 0)) }
end end
context 'Check is timeouting' do context 'Check is timeouting' do
...@@ -28,7 +28,7 @@ shared_context 'simple_check' do |metrics_prefix, check_name, success_result| ...@@ -28,7 +28,7 @@ shared_context 'simple_check' do |metrics_prefix, check_name, success_result|
it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_success", value: 0)) } it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_success", value: 0)) }
it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_timeout", value: 1)) } it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_timeout", value: 1)) }
it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_latency", value: be >= 0)) } it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_latency_seconds", value: be >= 0)) }
end end
end end
......
require 'spec_helper' require 'spec_helper'
describe Gitlab::Metrics::ConnectionRackMiddleware do describe Gitlab::Metrics::RequestsRackMiddleware do
let(:app) { double('app') } let(:app) { double('app') }
subject { described_class.new(app) } subject { described_class.new(app) }
...@@ -22,14 +22,8 @@ describe Gitlab::Metrics::ConnectionRackMiddleware do ...@@ -22,14 +22,8 @@ describe Gitlab::Metrics::ConnectionRackMiddleware do
allow(app).to receive(:call).and_return([200, nil, nil]) allow(app).to receive(:call).and_return([200, nil, nil])
end end
it 'increments response count with status label' do
expect(described_class).to receive_message_chain(:rack_response_count, :increment).with(include(status: 200, method: 'get'))
subject.call(env)
end
it 'increments requests count' do it 'increments requests count' do
expect(described_class).to receive_message_chain(:rack_request_count, :increment).with(method: 'get') expect(described_class).to receive_message_chain(:http_request_total, :increment).with(method: 'get')
subject.call(env) subject.call(env)
end end
...@@ -38,20 +32,21 @@ describe Gitlab::Metrics::ConnectionRackMiddleware do ...@@ -38,20 +32,21 @@ describe Gitlab::Metrics::ConnectionRackMiddleware do
execution_time = 10 execution_time = 10
allow(app).to receive(:call) do |*args| allow(app).to receive(:call) do |*args|
Timecop.freeze(execution_time.seconds) Timecop.freeze(execution_time.seconds)
[200, nil, nil]
end end
expect(described_class).to receive_message_chain(:rack_execution_time, :observe).with({}, execution_time) expect(described_class).to receive_message_chain(:http_request_duration_seconds, :observe).with({ status: 200, method: 'get' }, execution_time)
subject.call(env) subject.call(env)
end end
end end
context '@app.call throws exception' do context '@app.call throws exception' do
let(:rack_response_count) { double('rack_response_count') } let(:http_request_duration_seconds) { double('http_request_duration_seconds') }
before do before do
allow(app).to receive(:call).and_raise(StandardError) allow(app).to receive(:call).and_raise(StandardError)
allow(described_class).to receive(:rack_response_count).and_return(rack_response_count) allow(described_class).to receive(:http_request_duration_seconds).and_return(http_request_duration_seconds)
end end
it 'increments exceptions count' do it 'increments exceptions count' do
...@@ -61,25 +56,13 @@ describe Gitlab::Metrics::ConnectionRackMiddleware do ...@@ -61,25 +56,13 @@ describe Gitlab::Metrics::ConnectionRackMiddleware do
end end
it 'increments requests count' do it 'increments requests count' do
expect(described_class).to receive_message_chain(:rack_request_count, :increment).with(method: 'get') expect(described_class).to receive_message_chain(:http_request_total, :increment).with(method: 'get')
expect { subject.call(env) }.to raise_error(StandardError)
end
it "does't increment response count" do
expect(described_class.rack_response_count).not_to receive(:increment)
expect { subject.call(env) }.to raise_error(StandardError) expect { subject.call(env) }.to raise_error(StandardError)
end end
it 'measures execution time' do it "does't measure request execution time" do
execution_time = 10 expect(described_class.http_request_duration_seconds).not_to receive(:increment)
allow(app).to receive(:call) do |*args|
Timecop.freeze(execution_time.seconds)
raise StandardError
end
expect(described_class).to receive_message_chain(:rack_execution_time, :observe).with({}, execution_time)
expect { subject.call(env) }.to raise_error(StandardError) expect { subject.call(env) }.to raise_error(StandardError)
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment