Commit 8b3a22a6 authored by Mayra Cabrera's avatar Mayra Cabrera

Merge branch '326125-add-load-balancer-host-selection-metrics' into 'master'

Add metrics on searching for caught up replicas [RUN ALL RSPEC] [RUN AS-IF-FOSS]

See merge request gitlab-org/gitlab!64698
parents 4aa58875 79fcc98d
......@@ -149,7 +149,15 @@ if Gitlab::Metrics.enabled? && !Rails.env.test? && !(Rails.env.development? && d
require_dependency 'gitlab/metrics/subscribers/rails_cache'
Gitlab::Application.configure do |config|
# We want to track certain metrics during the Load Balancing host resolving process.
# Because of that, we need to have metrics code available earlier for Load Balancing.
if Gitlab::Database::LoadBalancing.enable?
config.middleware.insert_before Gitlab::Database::LoadBalancing::RackMiddleware,
Gitlab::Metrics::RackMiddleware
else
config.middleware.use(Gitlab::Metrics::RackMiddleware)
end
config.middleware.use(Gitlab::Middleware::RailsQueueDuration)
config.middleware.use(Gitlab::Metrics::ElasticsearchRackMiddleware)
end
......
......@@ -265,9 +265,10 @@ configuration option in `gitlab.yml`. These metrics are served from the
The following metrics are available:
| Metric | Type | Since | Description | Labels |
|:--------------------------------- |:--------- |:------------------------------------------------------------- |:-------------------------------------- |:--------------------------------------------------------- |
|:-------------------------------------------------------- |:--------- |:------------------------------------------------------------- |:---------------------------------------------------------------------------------- |:---------------------------------------------------------------------------------------------------------------------------------------- |
| `db_load_balancing_hosts` | Gauge | [12.3](https://gitlab.com/gitlab-org/gitlab/-/issues/13630) | Current number of load balancing hosts | |
| `sidekiq_load_balancing_count` | Counter | 13.11 | Sidekiq jobs using load balancing with data consistency set to :sticky or :delayed | `queue`, `boundary`, `external_dependencies`, `feature_category`, `job_status`, `urgency`, `data_consistency`, `load_balancing_strategy` |
| `gitlab_transaction_caught_up_replica_pick_count_total` | Counter | 14.1 | Number of search attempts for caught up replica | `result` |
## Database partitioning metrics **(PREMIUM SELF)**
......
......@@ -39,6 +39,8 @@ module Gitlab
result = @app.call(env)
ActiveSupport::Notifications.instrument('web_transaction_completed.load_balancing')
stick_if_necessary(env)
result
......
......@@ -34,6 +34,8 @@ module Gitlab
return true unless location
load_balancer.all_caught_up?(location).tap do |caught_up|
ActiveSupport::Notifications.instrument('caught_up_replica_pick.load_balancing', { result: caught_up } )
unstick(namespace, id) if caught_up
end
end
......
......@@ -29,6 +29,7 @@ module Gitlab
instrument_rack_attack(payload)
instrument_cpu(payload)
instrument_thread_memory_allocations(payload)
instrument_load_balancing(payload)
end
def instrument_gitaly(payload)
......@@ -104,6 +105,12 @@ module Gitlab
payload.merge!(counters) if counters
end
def instrument_load_balancing(payload)
load_balancing_payload = ::Gitlab::Metrics::Subscribers::LoadBalancing.load_balancing_payload
payload.merge!(load_balancing_payload)
end
# Returns the queuing duration for a Sidekiq job in seconds, as a float, if the
# `enqueued_at` field or `created_at` field is available.
#
......
# frozen_string_literal: true
module Gitlab
module Metrics
module Subscribers
class LoadBalancing < ActiveSupport::Subscriber
attach_to :load_balancing
PROMETHEUS_COUNTER = :gitlab_transaction_caught_up_replica_pick_count_total
LOG_COUNTERS = { true => :caught_up_replica_pick_ok, false => :caught_up_replica_pick_fail }.freeze
def caught_up_replica_pick(event)
return unless Gitlab::SafeRequestStore.active? && ::Gitlab::Database::LoadBalancing.enable?
result = event.payload[:result]
counter_name = counter(result)
increment(counter_name)
end
# we want to update Prometheus counter after the controller/action are set
def web_transaction_completed(_event)
return unless Gitlab::SafeRequestStore.active? && ::Gitlab::Database::LoadBalancing.enable?
LOG_COUNTERS.keys.each { |result| increment_prometheus_for_result_label(result) }
end
def self.load_balancing_payload
return {} unless Gitlab::SafeRequestStore.active? && ::Gitlab::Database::LoadBalancing.enable?
{}.tap do |payload|
LOG_COUNTERS.values.each do |counter|
value = Gitlab::SafeRequestStore[counter]
payload[counter] = value.to_i if value
end
end
end
private
def increment(counter)
Gitlab::SafeRequestStore[counter] = Gitlab::SafeRequestStore[counter].to_i + 1
end
def increment_prometheus_for_result_label(label_value)
counter_name = counter(label_value)
return unless (counter_value = Gitlab::SafeRequestStore[counter_name])
increment_prometheus(labels: { result: label_value }, value: counter_value.to_i)
end
def increment_prometheus(labels:, value:)
current_transaction&.increment(PROMETHEUS_COUNTER, value, labels) do
docstring 'Caught up replica pick result'
label_keys labels.keys
end
end
def counter(result)
LOG_COUNTERS[result]
end
def current_transaction
::Gitlab::Metrics::WebTransaction.current
end
end
end
end
end
......@@ -71,6 +71,11 @@ RSpec.describe Gitlab::Database::LoadBalancing::RackMiddleware, :redis do
expect(app).to receive(:call).with(env).and_return(10)
expect(ActiveSupport::Notifications)
.to receive(:instrument)
.with('web_transaction_completed.load_balancing')
.and_call_original
expect(middleware.call(env)).to eq(10)
end
end
......
......@@ -46,12 +46,20 @@ RSpec.describe Gitlab::Database::LoadBalancing::Sticking, :redis do
describe '.all_caught_up?' do
let(:lb) { double(:lb) }
let(:last_write_location) { 'foo' }
before do
allow(described_class).to receive(:load_balancer).and_return(lb)
allow(described_class).to receive(:last_write_location_for)
.with(:user, 42)
.and_return(last_write_location)
end
it 'returns true if no write location could be found' do
context 'when no write location could be found' do
let(:last_write_location) { nil }
it 'returns true' do
allow(described_class).to receive(:last_write_location_for)
.with(:user, 42)
.and_return(nil)
......@@ -60,28 +68,47 @@ RSpec.describe Gitlab::Database::LoadBalancing::Sticking, :redis do
expect(described_class.all_caught_up?(:user, 42)).to eq(true)
end
end
it 'returns true, and unsticks if all secondaries have caught up' do
allow(described_class).to receive(:last_write_location_for)
.with(:user, 42)
.and_return('foo')
context 'when all secondaries have caught up' do
before do
allow(lb).to receive(:all_caught_up?).with('foo').and_return(true)
end
it 'returns true, and unsticks' do
expect(described_class).to receive(:unstick).with(:user, 42)
expect(described_class.all_caught_up?(:user, 42)).to eq(true)
end
it 'return false if the secondaries have not yet caught up' do
allow(described_class).to receive(:last_write_location_for)
.with(:user, 42)
.and_return('foo')
it 'notifies with the proper event payload' do
expect(ActiveSupport::Notifications)
.to receive(:instrument)
.with('caught_up_replica_pick.load_balancing', { result: true })
.and_call_original
described_class.all_caught_up?(:user, 42)
end
end
context 'when the secondaries have not yet caught up' do
before do
allow(lb).to receive(:all_caught_up?).with('foo').and_return(false)
end
it 'returns false' do
expect(described_class.all_caught_up?(:user, 42)).to eq(false)
end
it 'notifies with the proper event payload' do
expect(ActiveSupport::Notifications)
.to receive(:instrument)
.with('caught_up_replica_pick.load_balancing', { result: false })
.and_call_original
described_class.all_caught_up?(:user, 42)
end
end
end
describe '.unstick_or_continue_sticking' do
......
......@@ -137,6 +137,34 @@ RSpec.describe Gitlab::InstrumentationHelper do
db_primary_wal_cached_count: 0,
db_replica_wal_cached_count: 0)
end
context 'when replica caught up search was made' do
before do
Gitlab::SafeRequestStore[:caught_up_replica_pick_ok] = 2
Gitlab::SafeRequestStore[:caught_up_replica_pick_fail] = 1
end
it 'includes related metrics' do
subject
expect(payload).to include(caught_up_replica_pick_ok: 2)
expect(payload).to include(caught_up_replica_pick_fail: 1)
end
end
context 'when only a single counter was updated' do
before do
Gitlab::SafeRequestStore[:caught_up_replica_pick_ok] = 1
Gitlab::SafeRequestStore[:caught_up_replica_pick_fail] = nil
end
it 'includes only that counter into logging' do
subject
expect(payload).to include(caught_up_replica_pick_ok: 1)
expect(payload).not_to include(:caught_up_replica_pick_fail)
end
end
end
context 'when load balancing is disabled' do
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Metrics::Subscribers::LoadBalancing, :request_store do
let(:subscriber) { described_class.new }
before do
allow(Gitlab::Database::LoadBalancing).to receive(:enable?).and_return(true)
end
describe '#caught_up_replica_pick' do
shared_examples 'having payload result value' do |result, counter_name|
subject { subscriber.caught_up_replica_pick(event) }
let(:payload) { { result: result } }
let(:event) do
double(
:event,
name: 'load_balancing.caught_up_replica_pick',
payload: payload
)
end
it 'stores per-request caught up replica search result' do
subject
expect(Gitlab::SafeRequestStore[counter_name]).to eq(1)
end
end
it_behaves_like 'having payload result value', true, :caught_up_replica_pick_ok
it_behaves_like 'having payload result value', false, :caught_up_replica_pick_fail
end
describe "#web_transaction_completed" do
subject { subscriber.web_transaction_completed(event) }
let(:event) do
double(
:event,
name: 'load_balancing.web_transaction_completed',
payload: {}
)
end
let(:web_transaction) { double('Gitlab::Metrics::WebTransaction') }
before do
allow(::Gitlab::Metrics::WebTransaction).to receive(:current)
.and_return(web_transaction)
end
context 'when no data in request store' do
before do
Gitlab::SafeRequestStore[:caught_up_replica_pick] = nil
end
it 'does not change the counters' do
expect(web_transaction).not_to receive(:increment)
end
end
context 'when request store was updated' do
before do
Gitlab::SafeRequestStore[:caught_up_replica_pick_ok] = 2
Gitlab::SafeRequestStore[:caught_up_replica_pick_fail] = 1
end
it 'increments :caught_up_replica_pick count with proper label' do
expect(web_transaction).to receive(:increment).with(:gitlab_transaction_caught_up_replica_pick_count_total, 2, { result: true })
expect(web_transaction).to receive(:increment).with(:gitlab_transaction_caught_up_replica_pick_count_total, 1, { result: false })
subject
end
end
end
describe '.load_balancing_payload' do
subject { described_class.load_balancing_payload }
context 'when no data in request store' do
before do
Gitlab::SafeRequestStore[:caught_up_replica_pick_ok] = nil
Gitlab::SafeRequestStore[:caught_up_replica_pick_fail] = nil
end
it 'returns empty hash' do
expect(subject).to eq({})
end
end
context 'when request store was updated for a single counter' do
before do
Gitlab::SafeRequestStore[:caught_up_replica_pick_ok] = 2
end
it 'returns proper payload with only that counter' do
expect(subject).to eq({ caught_up_replica_pick_ok: 2 })
end
end
context 'when both counters were updated' do
before do
Gitlab::SafeRequestStore[:caught_up_replica_pick_ok] = 2
Gitlab::SafeRequestStore[:caught_up_replica_pick_fail] = 1
end
it 'return proper payload' do
expect(subject).to eq({ caught_up_replica_pick_ok: 2, caught_up_replica_pick_fail: 1 })
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment