Commit e9319486 authored by Kamil Trzciński's avatar Kamil Trzciński

Merge branch 'fix_sidekiq_logging_load_balancing_counters' into 'master'

Add db_load_balancing_counters to structured_log

See merge request gitlab-org/gitlab!57849
parents 2e647463 0e32ea23
......@@ -17,6 +17,11 @@ module EE
class_methods do
extend ::Gitlab::Utils::Override
override :known_payload_keys
def known_payload_keys
super + DB_LOAD_BALANCING_COUNTERS
end
override :db_counter_payload
def db_counter_payload
super.tap do |payload|
......@@ -25,7 +30,7 @@ module EE
payload[counter] = ::Gitlab::SafeRequestStore[counter].to_i
end
DB_LOAD_BALANCING_DURATIONS.each do |duration|
payload[duration] = ::Gitlab::SafeRequestStore[duration].to_f.round(6)
payload[duration] = ::Gitlab::SafeRequestStore[duration].to_f.round(3)
end
end
end
......
......@@ -8,6 +8,38 @@ RSpec.describe Gitlab::InstrumentationHelper do
subject { described_class.add_instrumentation_data(payload) }
context 'when load balancing is enabled' do
include_context 'clear DB Load Balancing configuration'
before do
allow(Gitlab::Database::LoadBalancing).to receive(:enable?).and_return(true)
end
it 'includes DB counts' do
subject
expect(payload).to include(db_replica_count: 0,
db_replica_cached_count: 0,
db_primary_count: 0,
db_primary_cached_count: 0)
end
end
context 'when load balancing is disabled' do
before do
allow(Gitlab::Database::LoadBalancing).to receive(:enable?).and_return(false)
end
it 'includes DB counts' do
subject
expect(payload).not_to include(db_replica_count: 0,
db_replica_cached_count: 0,
db_primary_count: 0,
db_primary_cached_count: 0)
end
end
# We don't want to interact with Elasticsearch in GitLab FOSS so we test
# this in ee/ only. The code exists in FOSS and won't do anything.
context 'when Elasticsearch calls are made', :elastic do
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::SidekiqLogging::StructuredLogger do
before do
# We disable a memory instrumentation feature
# as this requires a special patched Ruby
allow(Gitlab::Memory::Instrumentation).to receive(:available?) { false }
end
describe '#call', :request_store do
include_context 'structured_logger'
RSpec.shared_examples 'performs database queries' do |load_balancing|
include_context 'clear DB Load Balancing configuration'
before do
allow(Time).to receive(:now).and_return(timestamp)
allow(Process).to receive(:clock_gettime).and_call_original
allow(Gitlab::Database::LoadBalancing).to receive(:enable?).and_return(load_balancing)
end
let(:expected_start_payload) { start_payload }
let(:expected_end_payload) do
end_payload.merge('cpu_s' => a_value >= 0)
end
it 'logs the database time', :aggregate_errors do
expect(logger).to receive(:info).with(expected_start_payload).ordered
expect(logger).to receive(:info).with(expected_end_payload_with_db).ordered
call_subject(job, 'test_queue') do
ActiveRecord::Base.connection.execute('SELECT pg_sleep(0.1);')
end
end
it 'prevents database time from leaking to the next job', :aggregate_errors do
expect(logger).to receive(:info).with(expected_start_payload).ordered
expect(logger).to receive(:info).with(expected_end_payload_with_db).ordered
expect(logger).to receive(:info).with(expected_start_payload).ordered
expect(logger).to receive(:info).with(expected_end_payload).ordered
call_subject(job.dup, 'test_queue') do
ActiveRecord::Base.connection.execute('SELECT pg_sleep(0.1);')
end
Gitlab::SafeRequestStore.clear!
call_subject(job.dup, 'test_queue') { }
end
end
context 'when the job performs database queries' do
context 'when load balancing is disabled' do
let(:expected_end_payload_with_db) do
expected_end_payload.merge(
'db_duration_s' => a_value >= 0.1,
'db_count' => a_value >= 1,
'db_cached_count' => 0,
'db_write_count' => 0
)
end
include_examples 'performs database queries', false
end
context 'when load balancing is enabled' do
let(:expected_end_payload_with_db) do
expected_end_payload.merge(
'db_duration_s' => a_value >= 0.1,
'db_count' => a_value >= 1,
'db_cached_count' => 0,
'db_write_count' => 0,
'db_replica_count' => 0,
'db_replica_cached_count' => 0,
'db_primary_count' => a_value >= 1,
'db_primary_cached_count' => 0
)
end
let(:end_payload) do
start_payload.merge(
'message' => 'TestWorker JID-da883554ee4fe414012f5f42: done: 0.0 sec',
'job_status' => 'done',
'duration_s' => 0.0,
'completed_at' => timestamp.to_f,
'cpu_s' => 1.111112,
'db_duration_s' => 0.0,
'db_cached_count' => 0,
'db_count' => 0,
'db_write_count' => 0,
'db_replica_count' => 0,
'db_replica_cached_count' => 0,
'db_primary_count' => 0,
'db_primary_cached_count' => 0
)
end
include_examples 'performs database queries', true
end
end
def call_subject(job, queue)
# This structured logger strongly depends on execution of `InstrumentationLogger`
subject.call(job, queue) do
::Gitlab::SidekiqMiddleware::InstrumentationLogger.new.call('worker', job, queue) do
yield
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::SidekiqMiddleware::InstrumentationLogger do
describe '.keys' do
it 'contains load balancer keys' do
expected_keys = [
:db_replica_count,
:db_replica_cached_count,
:db_primary_count,
:db_primary_cached_count
]
expect(described_class.keys).to include(*expected_keys)
end
end
end
......@@ -6,25 +6,6 @@ module Gitlab
DURATION_PRECISION = 6 # microseconds
def keys
@keys ||= [
:cpu_s,
:gitaly_calls,
:gitaly_duration_s,
:rugged_calls,
:rugged_duration_s,
:elasticsearch_calls,
:elasticsearch_duration_s,
:elasticsearch_timed_out_count,
:worker_data_consistency,
*::Gitlab::Memory::Instrumentation::KEY_MAPPING.values,
*::Gitlab::Instrumentation::Redis.known_payload_keys,
*::Gitlab::Metrics::Subscribers::ActiveRecord::DB_COUNTERS,
*::Gitlab::Metrics::Subscribers::ExternalHttp::KNOWN_PAYLOAD_KEYS,
*::Gitlab::Metrics::Subscribers::RackAttack::PAYLOAD_KEYS
]
end
def init_instrumentation_data(request_ip: nil)
# Set `request_start_time` only if this is request
# This is done, as `request_start_time` imply `request_deadline`
......
......@@ -51,6 +51,10 @@ module Gitlab
payload
end
def self.known_payload_keys
DB_COUNTERS
end
private
def ignored_query?(payload)
......
......@@ -39,9 +39,7 @@ module Gitlab
private
def add_instrumentation_keys!(job, output_payload)
instrumentation_values = job.slice(*::Gitlab::InstrumentationHelper.keys).stringify_keys
output_payload.merge!(instrumentation_values)
output_payload.merge!(job[:instrumentation].stringify_keys)
end
def add_logging_extras!(job, output_payload)
......
......@@ -3,6 +3,25 @@
module Gitlab
module SidekiqMiddleware
class InstrumentationLogger
def self.keys
@keys ||= [
:cpu_s,
:gitaly_calls,
:gitaly_duration_s,
:rugged_calls,
:rugged_duration_s,
:elasticsearch_calls,
:elasticsearch_duration_s,
:elasticsearch_timed_out_count,
:worker_data_consistency,
*::Gitlab::Memory::Instrumentation::KEY_MAPPING.values,
*::Gitlab::Instrumentation::Redis.known_payload_keys,
*::Gitlab::Metrics::Subscribers::ActiveRecord.known_payload_keys,
*::Gitlab::Metrics::Subscribers::ExternalHttp::KNOWN_PAYLOAD_KEYS,
*::Gitlab::Metrics::Subscribers::RackAttack::PAYLOAD_KEYS
]
end
def call(worker, job, queue)
::Gitlab::InstrumentationHelper.init_instrumentation_data
......@@ -17,7 +36,10 @@ module Gitlab
# because Sidekiq keeps a pristine copy of the original hash
# before sending it to the middleware:
# https://github.com/mperham/sidekiq/blob/53bd529a0c3f901879925b8390353129c465b1f2/lib/sidekiq/processor.rb#L115-L118
::Gitlab::InstrumentationHelper.add_instrumentation_data(job)
job[:instrumentation] = {}.tap do |instrumentation_values|
::Gitlab::InstrumentationHelper.add_instrumentation_data(instrumentation_values)
instrumentation_values.slice!(*self.class.keys)
end
end
end
end
......
......@@ -50,14 +50,15 @@ module Gitlab
# job_status: done, fail match the job_status attribute in structured logging
labels[:job_status] = job_succeeded ? "done" : "fail"
instrumentation = job[:instrumentation] || {}
@metrics[:sidekiq_jobs_cpu_seconds].observe(labels, job_thread_cputime)
@metrics[:sidekiq_jobs_completion_seconds].observe(labels, monotonic_time)
@metrics[:sidekiq_jobs_db_seconds].observe(labels, ActiveRecord::LogSubscriber.runtime / 1000)
@metrics[:sidekiq_jobs_gitaly_seconds].observe(labels, get_gitaly_time(job))
@metrics[:sidekiq_redis_requests_total].increment(labels, get_redis_calls(job))
@metrics[:sidekiq_redis_requests_duration_seconds].observe(labels, get_redis_time(job))
@metrics[:sidekiq_elasticsearch_requests_total].increment(labels, get_elasticsearch_calls(job))
@metrics[:sidekiq_elasticsearch_requests_duration_seconds].observe(labels, get_elasticsearch_time(job))
@metrics[:sidekiq_jobs_gitaly_seconds].observe(labels, get_gitaly_time(instrumentation))
@metrics[:sidekiq_redis_requests_total].increment(labels, get_redis_calls(instrumentation))
@metrics[:sidekiq_redis_requests_duration_seconds].observe(labels, get_redis_time(instrumentation))
@metrics[:sidekiq_elasticsearch_requests_total].increment(labels, get_elasticsearch_calls(instrumentation))
@metrics[:sidekiq_elasticsearch_requests_duration_seconds].observe(labels, get_elasticsearch_time(instrumentation))
end
end
......@@ -85,24 +86,24 @@ module Gitlab
defined?(Process::CLOCK_THREAD_CPUTIME_ID) ? Process.clock_gettime(Process::CLOCK_THREAD_CPUTIME_ID) : 0
end
def get_redis_time(job)
job.fetch(:redis_duration_s, 0)
def get_redis_time(payload)
payload.fetch(:redis_duration_s, 0)
end
def get_redis_calls(job)
job.fetch(:redis_calls, 0)
def get_redis_calls(payload)
payload.fetch(:redis_calls, 0)
end
def get_elasticsearch_time(job)
job.fetch(:elasticsearch_duration_s, 0)
def get_elasticsearch_time(payload)
payload.fetch(:elasticsearch_duration_s, 0)
end
def get_elasticsearch_calls(job)
job.fetch(:elasticsearch_calls, 0)
def get_elasticsearch_calls(payload)
payload.fetch(:elasticsearch_calls, 0)
end
def get_gitaly_time(job)
job.fetch(:gitaly_duration_s, 0)
def get_gitaly_time(payload)
payload.fetch(:gitaly_duration_s, 0)
end
end
end
......
......@@ -29,6 +29,7 @@ ALLOWED = [
'config/initializers/lograge.rb',
'lib/gitlab/grape_logging/loggers/perf_logger.rb',
'lib/gitlab/instrumentation_helper.rb',
'lib/gitlab/sidekiq_middleware/instrumentation_logger.rb',
'lib/gitlab/rugged_instrumentation.rb',
'lib/peek/views/rugged.rb'
].freeze
......
......@@ -6,54 +6,6 @@ require 'rspec-parameterized'
RSpec.describe Gitlab::InstrumentationHelper do
using RSpec::Parameterized::TableSyntax
describe '.keys' do
it 'returns all available payload keys' do
expected_keys = [
:cpu_s,
:gitaly_calls,
:gitaly_duration_s,
:rugged_calls,
:rugged_duration_s,
:elasticsearch_calls,
:elasticsearch_duration_s,
:elasticsearch_timed_out_count,
:worker_data_consistency,
:mem_objects,
:mem_bytes,
:mem_mallocs,
:redis_calls,
:redis_duration_s,
:redis_read_bytes,
:redis_write_bytes,
:redis_action_cable_calls,
:redis_action_cable_duration_s,
:redis_action_cable_read_bytes,
:redis_action_cable_write_bytes,
:redis_cache_calls,
:redis_cache_duration_s,
:redis_cache_read_bytes,
:redis_cache_write_bytes,
:redis_queues_calls,
:redis_queues_duration_s,
:redis_queues_read_bytes,
:redis_queues_write_bytes,
:redis_shared_state_calls,
:redis_shared_state_duration_s,
:redis_shared_state_read_bytes,
:redis_shared_state_write_bytes,
:db_count,
:db_write_count,
:db_cached_count,
:external_http_count,
:external_http_duration_s,
:rack_attack_redis_count,
:rack_attack_redis_duration_s
]
expect(described_class.keys).to eq(expected_keys)
end
end
describe '.add_instrumentation_data', :request_store do
let(:payload) { {} }
......
......@@ -10,80 +10,7 @@ RSpec.describe Gitlab::SidekiqLogging::StructuredLogger do
end
describe '#call', :request_store do
let(:timestamp) { Time.iso8601('2018-01-01T12:00:00.000Z') }
let(:created_at) { timestamp - 1.second }
let(:scheduling_latency_s) { 1.0 }
let(:job) do
{
"class" => "TestWorker",
"args" => [1234, 'hello', { 'key' => 'value' }],
"retry" => false,
"queue" => "cronjob:test_queue",
"queue_namespace" => "cronjob",
"jid" => "da883554ee4fe414012f5f42",
"created_at" => created_at.to_f,
"enqueued_at" => created_at.to_f,
"correlation_id" => 'cid',
"error_message" => "wrong number of arguments (2 for 3)",
"error_class" => "ArgumentError",
"error_backtrace" => []
}
end
let(:logger) { double }
let(:clock_realtime_start) { 0.222222299 }
let(:clock_realtime_end) { 1.333333799 }
let(:clock_thread_cputime_start) { 0.222222299 }
let(:clock_thread_cputime_end) { 1.333333799 }
let(:start_payload) do
job.except('error_backtrace', 'error_class', 'error_message').merge(
'message' => 'TestWorker JID-da883554ee4fe414012f5f42: start',
'job_status' => 'start',
'pid' => Process.pid,
'created_at' => created_at.to_f,
'enqueued_at' => created_at.to_f,
'scheduling_latency_s' => scheduling_latency_s,
'job_size_bytes' => be > 0
)
end
let(:end_payload) do
start_payload.merge(
'message' => 'TestWorker JID-da883554ee4fe414012f5f42: done: 0.0 sec',
'job_status' => 'done',
'duration_s' => 0.0,
'completed_at' => timestamp.to_f,
'cpu_s' => 1.111112,
'db_duration_s' => 0.0,
'db_cached_count' => 0,
'db_count' => 0,
'db_write_count' => 0
)
end
let(:exception_payload) do
end_payload.merge(
'message' => 'TestWorker JID-da883554ee4fe414012f5f42: fail: 0.0 sec',
'job_status' => 'fail',
'error_class' => 'ArgumentError',
'error_message' => 'Something went wrong',
'error_backtrace' => be_a(Array).and(be_present)
)
end
before do
allow(Sidekiq).to receive(:logger).and_return(logger)
allow(subject).to receive(:current_time).and_return(timestamp.to_f)
allow(Process).to receive(:clock_gettime).with(Process::CLOCK_REALTIME, :float_second)
.and_return(clock_realtime_start, clock_realtime_end)
allow(Process).to receive(:clock_gettime).with(Process::CLOCK_THREAD_CPUTIME_ID, :float_second)
.and_return(clock_thread_cputime_start, clock_thread_cputime_end)
end
subject { described_class.new }
include_context 'structured_logger'
context 'with SIDEKIQ_LOG_ARGUMENTS enabled' do
before do
......@@ -283,14 +210,19 @@ RSpec.describe Gitlab::SidekiqLogging::StructuredLogger do
end_payload.merge(timing_data.stringify_keys)
end
it 'logs with Gitaly and Rugged timing data' do
before do
allow(::Gitlab::InstrumentationHelper).to receive(:add_instrumentation_data).and_wrap_original do |method, values|
method.call(values)
values.merge!(timing_data)
end
end
it 'logs with Gitaly and Rugged timing data', :aggregate_failures do
Timecop.freeze(timestamp) do
expect(logger).to receive(:info).with(start_payload).ordered
expect(logger).to receive(:info).with(expected_end_payload).ordered
call_subject(job, 'test_queue') do
job.merge!(timing_data)
end
call_subject(job, 'test_queue') { }
end
end
end
......@@ -361,15 +293,6 @@ RSpec.describe Gitlab::SidekiqLogging::StructuredLogger do
end
end
end
def call_subject(job, queue)
# This structured logger strongly depends on execution of `InstrumentationLogger`
subject.call(job, queue) do
::Gitlab::SidekiqMiddleware::InstrumentationLogger.new.call('worker', job, queue) do
yield
end
end
end
end
describe '#add_time_keys!' do
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::SidekiqMiddleware::InstrumentationLogger do
let(:job) { { 'jid' => 123 } }
let(:queue) { 'test_queue' }
let(:worker) do
Class.new do
def self.name
'TestDWorker'
end
include ApplicationWorker
def perform(*args)
end
end
end
subject { described_class.new }
before do
stub_const('TestWorker', worker)
end
describe '.keys' do
it 'returns all available payload keys' do
expected_keys = [
:cpu_s,
:gitaly_calls,
:gitaly_duration_s,
:rugged_calls,
:rugged_duration_s,
:elasticsearch_calls,
:elasticsearch_duration_s,
:elasticsearch_timed_out_count,
:worker_data_consistency,
:mem_objects,
:mem_bytes,
:mem_mallocs,
:redis_calls,
:redis_duration_s,
:redis_read_bytes,
:redis_write_bytes,
:redis_action_cable_calls,
:redis_action_cable_duration_s,
:redis_action_cable_read_bytes,
:redis_action_cable_write_bytes,
:redis_cache_calls,
:redis_cache_duration_s,
:redis_cache_read_bytes,
:redis_cache_write_bytes,
:redis_queues_calls,
:redis_queues_duration_s,
:redis_queues_read_bytes,
:redis_queues_write_bytes,
:redis_shared_state_calls,
:redis_shared_state_duration_s,
:redis_shared_state_read_bytes,
:redis_shared_state_write_bytes,
:db_count,
:db_write_count,
:db_cached_count,
:external_http_count,
:external_http_duration_s,
:rack_attack_redis_count,
:rack_attack_redis_duration_s
]
expect(described_class.keys).to include(*expected_keys)
end
end
describe '#call', :request_store do
let(:instrumentation_values) do
{
cpu_s: 10,
unknown_attribute: 123,
db_count: 0,
db_cached_count: 0,
db_write_count: 0,
gitaly_calls: 0,
redis_calls: 0
}
end
before do
allow(::Gitlab::InstrumentationHelper).to receive(:add_instrumentation_data) do |values|
values.merge!(instrumentation_values)
end
end
it 'merges correct instrumentation data in the job' do
expect { |b| subject.call(worker, job, queue, &b) }.to yield_control
expected_values = instrumentation_values.except(:unknown_attribute)
expect(job[:instrumentation]).to eq(expected_values)
end
end
end
......@@ -84,18 +84,22 @@ RSpec.describe Gitlab::SidekiqMiddleware::ServerMetrics do
let(:elasticsearch_calls) { 8 }
let(:elasticsearch_duration) { 0.54 }
let(:instrumentation) do
{
gitaly_duration_s: gitaly_duration,
redis_calls: redis_calls,
redis_duration_s: redis_duration,
elasticsearch_calls: elasticsearch_calls,
elasticsearch_duration_s: elasticsearch_duration
}
end
before do
allow(subject).to receive(:get_thread_cputime).and_return(thread_cputime_before, thread_cputime_after)
allow(Gitlab::Metrics::System).to receive(:monotonic_time).and_return(monotonic_time_before, monotonic_time_after)
allow(Gitlab::InstrumentationHelper).to receive(:queue_duration_for_job).with(job).and_return(queue_duration_for_job)
allow(ActiveRecord::LogSubscriber).to receive(:runtime).and_return(db_duration * 1000)
job[:gitaly_duration_s] = gitaly_duration
job[:redis_calls] = redis_calls
job[:redis_duration_s] = redis_duration
job[:elasticsearch_calls] = elasticsearch_calls
job[:elasticsearch_duration_s] = elasticsearch_duration
job[:instrumentation] = instrumentation
allow(running_jobs_metric).to receive(:increment)
allow(redis_requests_total).to receive(:increment)
......
# frozen_string_literal: true
RSpec.shared_context 'structured_logger' do
let(:timestamp) { Time.iso8601('2018-01-01T12:00:00.000Z') }
let(:created_at) { timestamp - 1.second }
let(:scheduling_latency_s) { 1.0 }
let(:job) do
{
"class" => "TestWorker",
"args" => [1234, 'hello', { 'key' => 'value' }],
"retry" => false,
"queue" => "cronjob:test_queue",
"queue_namespace" => "cronjob",
"jid" => "da883554ee4fe414012f5f42",
"created_at" => created_at.to_f,
"enqueued_at" => created_at.to_f,
"correlation_id" => 'cid',
"error_message" => "wrong number of arguments (2 for 3)",
"error_class" => "ArgumentError",
"error_backtrace" => []
}
end
let(:logger) { double }
let(:clock_realtime_start) { 0.222222299 }
let(:clock_realtime_end) { 1.333333799 }
let(:clock_thread_cputime_start) { 0.222222299 }
let(:clock_thread_cputime_end) { 1.333333799 }
let(:start_payload) do
job.except('error_backtrace', 'error_class', 'error_message').merge(
'message' => 'TestWorker JID-da883554ee4fe414012f5f42: start',
'job_status' => 'start',
'pid' => Process.pid,
'created_at' => created_at.to_f,
'enqueued_at' => created_at.to_f,
'scheduling_latency_s' => scheduling_latency_s,
'job_size_bytes' => be > 0
)
end
let(:end_payload) do
start_payload.merge(
'message' => 'TestWorker JID-da883554ee4fe414012f5f42: done: 0.0 sec',
'job_status' => 'done',
'duration_s' => 0.0,
'completed_at' => timestamp.to_f,
'cpu_s' => 1.111112,
'db_duration_s' => 0.0,
'db_cached_count' => 0,
'db_count' => 0,
'db_write_count' => 0
)
end
let(:exception_payload) do
end_payload.merge(
'message' => 'TestWorker JID-da883554ee4fe414012f5f42: fail: 0.0 sec',
'job_status' => 'fail',
'error_class' => 'ArgumentError',
'error_message' => 'Something went wrong',
'error_backtrace' => be_a(Array).and(be_present)
)
end
before do
allow(Sidekiq).to receive(:logger).and_return(logger)
allow(subject).to receive(:current_time).and_return(timestamp.to_f)
allow(Process).to receive(:clock_gettime).with(Process::CLOCK_REALTIME, :float_second)
.and_return(clock_realtime_start, clock_realtime_end)
allow(Process).to receive(:clock_gettime).with(Process::CLOCK_THREAD_CPUTIME_ID, :float_second)
.and_return(clock_thread_cputime_start, clock_thread_cputime_end)
end
subject { described_class.new }
def call_subject(job, queue)
# This structured logger strongly depends on execution of `InstrumentationLogger`
subject.call(job, queue) do
::Gitlab::SidekiqMiddleware::InstrumentationLogger.new.call('worker', job, queue) do
yield
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment