Commit 17ec208c authored by Bob Van Landuyt's avatar Bob Van Landuyt

Add request urgency to logs and metrics

This adds the request urgency to logs by adding a `request_urgency`
and a `request_urgency_s` fields. The duration in seconds will make it
easier to build dashboards showing SLIs that don't meet SLO without
having to copy the information in the runbooks.

The way fields are added to logs is slighty refacotred and tests
added extended.

The `rails_request_apdex` metrics get initialized and emitted with an
`urgency` label.
parent 78cce600
...@@ -163,7 +163,8 @@ class ApplicationController < ActionController::Base ...@@ -163,7 +163,8 @@ class ApplicationController < ActionController::Base
payload[Labkit::Correlation::CorrelationId::LOG_KEY] = Labkit::Correlation::CorrelationId.current_id payload[Labkit::Correlation::CorrelationId::LOG_KEY] = Labkit::Correlation::CorrelationId.current_id
payload[:metadata] = @current_context payload[:metadata] = @current_context
payload[:request_urgency] = urgency&.name
payload[:target_duration_s] = urgency&.duration
logged_user = auth_user logged_user = auth_user
if logged_user.present? if logged_user.present?
payload[:user_id] = logged_user.try(:id) payload[:user_id] = logged_user.try(:id)
......
...@@ -27,7 +27,8 @@ module API ...@@ -27,7 +27,8 @@ module API
Gitlab::GrapeLogging::Loggers::PerfLogger.new, Gitlab::GrapeLogging::Loggers::PerfLogger.new,
Gitlab::GrapeLogging::Loggers::CorrelationIdLogger.new, Gitlab::GrapeLogging::Loggers::CorrelationIdLogger.new,
Gitlab::GrapeLogging::Loggers::ContextLogger.new, Gitlab::GrapeLogging::Loggers::ContextLogger.new,
Gitlab::GrapeLogging::Loggers::ContentLogger.new Gitlab::GrapeLogging::Loggers::ContentLogger.new,
Gitlab::GrapeLogging::Loggers::UrgencyLogger.new
] ]
allow_access_with_scope :api allow_access_with_scope :api
......
# frozen_string_literal: true
module Gitlab
module GrapeLogging
module Loggers
class UrgencyLogger < ::GrapeLogging::Loggers::Base
def parameters(request, _)
endpoint = request.env['api.endpoint']
return {} unless endpoint
urgency = endpoint.options[:for].try(:urgency_for_app, endpoint)
return {} unless urgency
{ request_urgency: urgency.name, target_duration_s: urgency.duration }
end
end
end
end
end
...@@ -7,6 +7,8 @@ module Gitlab ...@@ -7,6 +7,8 @@ module Gitlab
LIMITED_ARRAY_SENTINEL = { key: 'truncated', value: '...' }.freeze LIMITED_ARRAY_SENTINEL = { key: 'truncated', value: '...' }.freeze
IGNORE_PARAMS = Set.new(%w(controller action format)).freeze IGNORE_PARAMS = Set.new(%w(controller action format)).freeze
KNOWN_PAYLOAD_PARAMS = [:remote_ip, :user_id, :username, :ua, :queue_duration_s,
:etag_route, :request_urgency, :target_duration_s] + CLOUDFLARE_CUSTOM_HEADERS.values
def self.call(event) def self.call(event)
params = event params = event
...@@ -14,24 +16,17 @@ module Gitlab ...@@ -14,24 +16,17 @@ module Gitlab
.each_with_object([]) { |(k, v), array| array << { key: k, value: v } unless IGNORE_PARAMS.include?(k) } .each_with_object([]) { |(k, v), array| array << { key: k, value: v } unless IGNORE_PARAMS.include?(k) }
payload = { payload = {
time: Time.now.utc.iso8601(3), time: Time.now.utc.iso8601(3),
params: Gitlab::Utils::LogLimitedArray.log_limited_array(params, sentinel: LIMITED_ARRAY_SENTINEL), params: Gitlab::Utils::LogLimitedArray.log_limited_array(params, sentinel: LIMITED_ARRAY_SENTINEL)
remote_ip: event.payload[:remote_ip],
user_id: event.payload[:user_id],
username: event.payload[:username],
ua: event.payload[:ua]
} }
payload.merge!(event.payload[:metadata]) if event.payload[:metadata] payload.merge!(event.payload[:metadata]) if event.payload[:metadata]
optional_payload_params = event.payload.slice(*KNOWN_PAYLOAD_PARAMS).compact
payload.merge!(optional_payload_params)
::Gitlab::InstrumentationHelper.add_instrumentation_data(payload) ::Gitlab::InstrumentationHelper.add_instrumentation_data(payload)
payload[:queue_duration_s] = event.payload[:queue_duration_s] if event.payload[:queue_duration_s]
payload[:etag_route] = event.payload[:etag_route] if event.payload[:etag_route]
payload[Labkit::Correlation::CorrelationId::LOG_KEY] = event.payload[Labkit::Correlation::CorrelationId::LOG_KEY] || Labkit::Correlation::CorrelationId.current_id payload[Labkit::Correlation::CorrelationId::LOG_KEY] = event.payload[Labkit::Correlation::CorrelationId::LOG_KEY] || Labkit::Correlation::CorrelationId.current_id
CLOUDFLARE_CUSTOM_HEADERS.each do |_, value|
payload[value] = event.payload[value] if event.payload[value]
end
# https://github.com/roidrage/lograge#logging-errors--exceptions # https://github.com/roidrage/lograge#logging-errors--exceptions
exception = event.payload[:exception_object] exception = event.payload[:exception_object]
......
...@@ -30,10 +30,12 @@ module Gitlab ...@@ -30,10 +30,12 @@ module Gitlab
endpoint_id = API::Base.endpoint_id_for_route(route) endpoint_id = API::Base.endpoint_id_for_route(route)
route_class = route.app.options[:for] route_class = route.app.options[:for]
feature_category = route_class.feature_category_for_app(route.app) feature_category = route_class.feature_category_for_app(route.app)
request_urgency = route_class.urgency_for_app(route.app)
{ {
endpoint_id: endpoint_id, endpoint_id: endpoint_id,
feature_category: feature_category feature_category: feature_category,
request_urgency: request_urgency.name
} }
end end
end end
...@@ -42,7 +44,8 @@ module Gitlab ...@@ -42,7 +44,8 @@ module Gitlab
Gitlab::RequestEndpoints.all_controller_actions.map do |controller, action| Gitlab::RequestEndpoints.all_controller_actions.map do |controller, action|
{ {
endpoint_id: controller.endpoint_id_for_action(action), endpoint_id: controller.endpoint_id_for_action(action),
feature_category: controller.feature_category_for_action(action) feature_category: controller.feature_category_for_action(action),
request_urgency: controller.urgency_for_action(action).name
} }
end end
end end
......
...@@ -116,9 +116,11 @@ module Gitlab ...@@ -116,9 +116,11 @@ module Gitlab
def record_apdex_if_needed(env, elapsed) def record_apdex_if_needed(env, elapsed)
return unless Gitlab::Metrics::RailsSlis.request_apdex_counters_enabled? return unless Gitlab::Metrics::RailsSlis.request_apdex_counters_enabled?
urgency = urgency_for_env(env)
Gitlab::Metrics::RailsSlis.request_apdex.increment( Gitlab::Metrics::RailsSlis.request_apdex.increment(
labels: labels_from_context, labels: labels_from_context.merge(request_urgency: urgency.name),
success: satisfactory?(env, elapsed) success: elapsed < urgency.duration
) )
end end
...@@ -129,17 +131,15 @@ module Gitlab ...@@ -129,17 +131,15 @@ module Gitlab
} }
end end
def satisfactory?(env, elapsed) def urgency_for_env(env)
target = endpoint_urgency =
if env['api.endpoint'].present? if env['api.endpoint'].present?
env['api.endpoint'].options[:for].try(:urgency_for_app, env['api.endpoint']) env['api.endpoint'].options[:for].try(:urgency_for_app, env['api.endpoint'])
elsif env['action_controller.instance'].present? && env['action_controller.instance'].respond_to?(:urgency) elsif env['action_controller.instance'].present? && env['action_controller.instance'].respond_to?(:urgency)
env['action_controller.instance'].urgency env['action_controller.instance'].urgency
end end
target ||= Gitlab::EndpointAttributes::DEFAULT_URGENCY endpoint_urgency || Gitlab::EndpointAttributes::DEFAULT_URGENCY
elapsed < target.duration
end end
end end
end end
......
...@@ -501,11 +501,16 @@ RSpec.describe ApplicationController do ...@@ -501,11 +501,16 @@ RSpec.describe ApplicationController do
describe '#append_info_to_payload' do describe '#append_info_to_payload' do
controller(described_class) do controller(described_class) do
attr_reader :last_payload attr_reader :last_payload
urgency :high, [:foo]
def index def index
render html: 'authenticated' render html: 'authenticated'
end end
def foo
render html: ''
end
def append_info_to_payload(payload) def append_info_to_payload(payload)
super super
...@@ -513,6 +518,13 @@ RSpec.describe ApplicationController do ...@@ -513,6 +518,13 @@ RSpec.describe ApplicationController do
end end
end end
before do
routes.draw do
get 'index' => 'anonymous#index'
get 'foo' => 'anonymous#foo'
end
end
it 'does not log errors with a 200 response' do it 'does not log errors with a 200 response' do
get :index get :index
...@@ -534,6 +546,22 @@ RSpec.describe ApplicationController do ...@@ -534,6 +546,22 @@ RSpec.describe ApplicationController do
expect(controller.last_payload[:metadata]).to include('meta.user' => user.username) expect(controller.last_payload[:metadata]).to include('meta.user' => user.username)
end end
context 'urgency information' do
it 'adds default urgency information to the payload' do
get :index
expect(controller.last_payload[:request_urgency]).to eq(:default)
expect(controller.last_payload[:target_duration_s]).to eq(1)
end
it 'adds customized urgency information to the payload' do
get :foo
expect(controller.last_payload[:request_urgency]).to eq(:high)
expect(controller.last_payload[:target_duration_s]).to eq(0.25)
end
end
end end
describe '#access_denied' do describe '#access_denied' do
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::GrapeLogging::Loggers::UrgencyLogger do
def endpoint(options, namespace: '')
Struct.new(:options, :namespace).new(options, namespace)
end
let(:api_class) do
Class.new(API::Base) do
namespace 'testing' do
# rubocop:disable Rails/HttpPositionalArguments
# This is not the get that performs a request, but the one from Grape
get 'test', urgency: :high do
{}
end
# rubocop:enable Rails/HttpPositionalArguments
end
end
end
describe ".parameters" do
where(:request_env, :expected_parameters) do
[
[{}, {}],
[{ 'api.endpoint' => endpoint({}) }, {}],
[{ 'api.endpoint' => endpoint({ for: 'something weird' }) }, {}],
[
{ 'api.endpoint' => endpoint({ for: api_class, path: [] }) },
{ request_urgency: :default, target_duration_s: 1 }
],
[
{ 'api.endpoint' => endpoint({ for: api_class, path: ['test'] }, namespace: '/testing') },
{ request_urgency: :high, target_duration_s: 0.25 }
]
]
end
with_them do
let(:request) { double('request', env: request_env) }
subject { described_class.new.parameters(request, nil) }
it { is_expected.to eq(expected_parameters) }
end
end
end
...@@ -19,7 +19,13 @@ RSpec.describe Gitlab::Lograge::CustomOptions do ...@@ -19,7 +19,13 @@ RSpec.describe Gitlab::Lograge::CustomOptions do
user_id: 'test', user_id: 'test',
cf_ray: SecureRandom.hex, cf_ray: SecureRandom.hex,
cf_request_id: SecureRandom.hex, cf_request_id: SecureRandom.hex,
metadata: { 'meta.user' => 'jane.doe' } metadata: { 'meta.user' => 'jane.doe' },
request_urgency: :default,
target_duration_s: 1,
remote_ip: '192.168.1.2',
ua: 'Nyxt',
queue_duration_s: 0.2,
etag_route: '/etag'
} }
end end
...@@ -66,6 +72,18 @@ RSpec.describe Gitlab::Lograge::CustomOptions do ...@@ -66,6 +72,18 @@ RSpec.describe Gitlab::Lograge::CustomOptions do
end end
end end
context 'trusted payload' do
it { is_expected.to include(event_payload.slice(*described_class::KNOWN_PAYLOAD_PARAMS)) }
context 'payload with rejected fields' do
let(:event_payload) { { params: {}, request_urgency: :high, something: 'random', username: nil } }
it { is_expected.to include({ request_urgency: :high }) }
it { is_expected.not_to include({ something: 'random' }) }
it { is_expected.not_to include({ username: nil }) }
end
end
context 'when correlation_id is overridden' do context 'when correlation_id is overridden' do
let(:correlation_id_key) { Labkit::Correlation::CorrelationId::LOG_KEY } let(:correlation_id_key) { Labkit::Correlation::CorrelationId::LOG_KEY }
......
...@@ -17,11 +17,13 @@ RSpec.describe Gitlab::Metrics::RailsSlis do ...@@ -17,11 +17,13 @@ RSpec.describe Gitlab::Metrics::RailsSlis do
possible_labels = [ possible_labels = [
{ {
endpoint_id: "GET /api/:version/version", endpoint_id: "GET /api/:version/version",
feature_category: :not_owned feature_category: :not_owned,
request_urgency: :default
}, },
{ {
endpoint_id: "ProjectsController#show", endpoint_id: "ProjectsController#show",
feature_category: :projects feature_category: :projects,
request_urgency: :default
} }
] ]
......
...@@ -36,7 +36,8 @@ RSpec.describe Gitlab::Metrics::RequestsRackMiddleware, :aggregate_failures do ...@@ -36,7 +36,8 @@ RSpec.describe Gitlab::Metrics::RequestsRackMiddleware, :aggregate_failures do
it 'tracks request count and duration' do it 'tracks request count and duration' do
expect(described_class).to receive_message_chain(:http_requests_total, :increment).with(method: 'get', status: '200', feature_category: 'unknown') expect(described_class).to receive_message_chain(:http_requests_total, :increment).with(method: 'get', status: '200', feature_category: 'unknown')
expect(described_class).to receive_message_chain(:http_request_duration_seconds, :observe).with({ method: 'get' }, a_positive_execution_time) expect(described_class).to receive_message_chain(:http_request_duration_seconds, :observe).with({ method: 'get' }, a_positive_execution_time)
expect(Gitlab::Metrics::RailsSlis.request_apdex).to receive(:increment).with(labels: { feature_category: 'unknown', endpoint_id: 'unknown' }, success: true) expect(Gitlab::Metrics::RailsSlis.request_apdex).to receive(:increment)
.with(labels: { feature_category: 'unknown', endpoint_id: 'unknown', request_urgency: :default }, success: true)
subject.call(env) subject.call(env)
end end
...@@ -122,7 +123,7 @@ RSpec.describe Gitlab::Metrics::RequestsRackMiddleware, :aggregate_failures do ...@@ -122,7 +123,7 @@ RSpec.describe Gitlab::Metrics::RequestsRackMiddleware, :aggregate_failures do
expect(described_class).to receive_message_chain(:http_requests_total, :increment).with(method: 'get', status: '200', feature_category: 'issue_tracking') expect(described_class).to receive_message_chain(:http_requests_total, :increment).with(method: 'get', status: '200', feature_category: 'issue_tracking')
expect(described_class).not_to receive(:http_health_requests_total) expect(described_class).not_to receive(:http_health_requests_total)
expect(Gitlab::Metrics::RailsSlis.request_apdex) expect(Gitlab::Metrics::RailsSlis.request_apdex)
.to receive(:increment).with(labels: { feature_category: 'issue_tracking', endpoint_id: 'IssuesController#show' }, success: true) .to receive(:increment).with(labels: { feature_category: 'issue_tracking', endpoint_id: 'IssuesController#show', request_urgency: :default }, success: true)
subject.call(env) subject.call(env)
end end
...@@ -156,7 +157,8 @@ RSpec.describe Gitlab::Metrics::RequestsRackMiddleware, :aggregate_failures do ...@@ -156,7 +157,8 @@ RSpec.describe Gitlab::Metrics::RequestsRackMiddleware, :aggregate_failures do
it 'sets the required labels to unknown' do it 'sets the required labels to unknown' do
expect(described_class).to receive_message_chain(:http_requests_total, :increment).with(method: 'get', status: '200', feature_category: 'unknown') expect(described_class).to receive_message_chain(:http_requests_total, :increment).with(method: 'get', status: '200', feature_category: 'unknown')
expect(described_class).not_to receive(:http_health_requests_total) expect(described_class).not_to receive(:http_health_requests_total)
expect(Gitlab::Metrics::RailsSlis.request_apdex).to receive(:increment).with(labels: { feature_category: 'unknown', endpoint_id: 'unknown' }, success: true) expect(Gitlab::Metrics::RailsSlis.request_apdex).to receive(:increment)
.with(labels: { feature_category: 'unknown', endpoint_id: 'unknown', request_urgency: :default }, success: true)
subject.call(env) subject.call(env)
end end
...@@ -206,7 +208,11 @@ RSpec.describe Gitlab::Metrics::RequestsRackMiddleware, :aggregate_failures do ...@@ -206,7 +208,11 @@ RSpec.describe Gitlab::Metrics::RequestsRackMiddleware, :aggregate_failures do
it "captures SLI metrics" do it "captures SLI metrics" do
expect(Gitlab::Metrics::RailsSlis.request_apdex).to receive(:increment).with( expect(Gitlab::Metrics::RailsSlis.request_apdex).to receive(:increment).with(
labels: { feature_category: 'hello_world', endpoint_id: 'GET /projects/:id/archive' }, labels: {
feature_category: 'hello_world',
endpoint_id: 'GET /projects/:id/archive',
request_urgency: request_urgency_name
},
success: success success: success
) )
subject.call(env) subject.call(env)
...@@ -235,7 +241,11 @@ RSpec.describe Gitlab::Metrics::RequestsRackMiddleware, :aggregate_failures do ...@@ -235,7 +241,11 @@ RSpec.describe Gitlab::Metrics::RequestsRackMiddleware, :aggregate_failures do
it "captures SLI metrics" do it "captures SLI metrics" do
expect(Gitlab::Metrics::RailsSlis.request_apdex).to receive(:increment).with( expect(Gitlab::Metrics::RailsSlis.request_apdex).to receive(:increment).with(
labels: { feature_category: 'hello_world', endpoint_id: 'AnonymousController#index' }, labels: {
feature_category: 'hello_world',
endpoint_id: 'AnonymousController#index',
request_urgency: request_urgency_name
},
success: success success: success
) )
subject.call(env) subject.call(env)
...@@ -255,17 +265,25 @@ RSpec.describe Gitlab::Metrics::RequestsRackMiddleware, :aggregate_failures do ...@@ -255,17 +265,25 @@ RSpec.describe Gitlab::Metrics::RequestsRackMiddleware, :aggregate_failures do
let(:api_handler) { Class.new(::API::Base) } let(:api_handler) { Class.new(::API::Base) }
it "falls back request's expectation to medium (1 second)" do it "falls back request's expectation to default (1 second)" do
allow(Gitlab::Metrics::System).to receive(:monotonic_time).and_return(100, 100.9) allow(Gitlab::Metrics::System).to receive(:monotonic_time).and_return(100, 100.9)
expect(Gitlab::Metrics::RailsSlis.request_apdex).to receive(:increment).with( expect(Gitlab::Metrics::RailsSlis.request_apdex).to receive(:increment).with(
labels: { feature_category: 'unknown', endpoint_id: 'unknown' }, labels: {
feature_category: 'unknown',
endpoint_id: 'unknown',
request_urgency: :default
},
success: true success: true
) )
subject.call(env) subject.call(env)
allow(Gitlab::Metrics::System).to receive(:monotonic_time).and_return(100, 101) allow(Gitlab::Metrics::System).to receive(:monotonic_time).and_return(100, 101)
expect(Gitlab::Metrics::RailsSlis.request_apdex).to receive(:increment).with( expect(Gitlab::Metrics::RailsSlis.request_apdex).to receive(:increment).with(
labels: { feature_category: 'unknown', endpoint_id: 'unknown' }, labels: {
feature_category: 'unknown',
endpoint_id: 'unknown',
request_urgency: :default
},
success: false success: false
) )
subject.call(env) subject.call(env)
...@@ -281,17 +299,25 @@ RSpec.describe Gitlab::Metrics::RequestsRackMiddleware, :aggregate_failures do ...@@ -281,17 +299,25 @@ RSpec.describe Gitlab::Metrics::RequestsRackMiddleware, :aggregate_failures do
{ 'action_controller.instance' => controller_instance, 'REQUEST_METHOD' => 'GET' } { 'action_controller.instance' => controller_instance, 'REQUEST_METHOD' => 'GET' }
end end
it "falls back request's expectation to medium (1 second)" do it "falls back request's expectation to default (1 second)" do
allow(Gitlab::Metrics::System).to receive(:monotonic_time).and_return(100, 100.9) allow(Gitlab::Metrics::System).to receive(:monotonic_time).and_return(100, 100.9)
expect(Gitlab::Metrics::RailsSlis.request_apdex).to receive(:increment).with( expect(Gitlab::Metrics::RailsSlis.request_apdex).to receive(:increment).with(
labels: { feature_category: 'unknown', endpoint_id: 'unknown' }, labels: {
feature_category: 'unknown',
endpoint_id: 'unknown',
request_urgency: :default
},
success: true success: true
) )
subject.call(env) subject.call(env)
allow(Gitlab::Metrics::System).to receive(:monotonic_time).and_return(100, 101) allow(Gitlab::Metrics::System).to receive(:monotonic_time).and_return(100, 101)
expect(Gitlab::Metrics::RailsSlis.request_apdex).to receive(:increment).with( expect(Gitlab::Metrics::RailsSlis.request_apdex).to receive(:increment).with(
labels: { feature_category: 'unknown', endpoint_id: 'unknown' }, labels: {
feature_category: 'unknown',
endpoint_id: 'unknown',
request_urgency: :default
},
success: false success: false
) )
subject.call(env) subject.call(env)
...@@ -303,17 +329,25 @@ RSpec.describe Gitlab::Metrics::RequestsRackMiddleware, :aggregate_failures do ...@@ -303,17 +329,25 @@ RSpec.describe Gitlab::Metrics::RequestsRackMiddleware, :aggregate_failures do
{ 'REQUEST_METHOD' => 'GET' } { 'REQUEST_METHOD' => 'GET' }
end end
it "falls back request's expectation to medium (1 second)" do it "falls back request's expectation to default (1 second)" do
allow(Gitlab::Metrics::System).to receive(:monotonic_time).and_return(100, 100.9) allow(Gitlab::Metrics::System).to receive(:monotonic_time).and_return(100, 100.9)
expect(Gitlab::Metrics::RailsSlis.request_apdex).to receive(:increment).with( expect(Gitlab::Metrics::RailsSlis.request_apdex).to receive(:increment).with(
labels: { feature_category: 'unknown', endpoint_id: 'unknown' }, labels: {
feature_category: 'unknown',
endpoint_id: 'unknown',
request_urgency: :default
},
success: true success: true
) )
subject.call(env) subject.call(env)
allow(Gitlab::Metrics::System).to receive(:monotonic_time).and_return(100, 101) allow(Gitlab::Metrics::System).to receive(:monotonic_time).and_return(100, 101)
expect(Gitlab::Metrics::RailsSlis.request_apdex).to receive(:increment).with( expect(Gitlab::Metrics::RailsSlis.request_apdex).to receive(:increment).with(
labels: { feature_category: 'unknown', endpoint_id: 'unknown' }, labels: {
feature_category: 'unknown',
endpoint_id: 'unknown',
request_urgency: :default
},
success: false success: false
) )
subject.call(env) subject.call(env)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment