Commit eeec86d3 authored by Dylan Griffith's avatar Dylan Griffith

Add per DB counters to logs and refactor

This MR adds multiple new DB metrics to our logging when the
`multiple_database_counters` feature flag is enabled.

```diff
+ "db_primary_primary_cached_count": 0,
+ "db_primary_primary_count": 0,
+ "db_primary_primary_duration_s": 0,
+ "db_replica_primary_cached_count": 0,
+ "db_replica_primary_count": 0,
+ "db_replica_primary_duration_s": 0,
+ "db_primary_primary_wal_count": 0,
+ "db_primary_primary_wal_cached_count": 0,
+ "db_replica_primary_wal_cached_count": 0,
+ "db_replica_primary_wal_count": 0,
+ "db_replica_primary_duration_s": 0,
+ "db_primary_primary_duration_s": 0,
```

These new fields indicate how many queries went to the database named
`primary`. `primary` happens to be the default name Rails gives to the
database connections described in `config/database.yml`. This is
unfortunate as it conflicts with the `primary/replica` language we use
to distinguish between read/write and read-only servers (hence
`primary_primary` in the key) but this will change when we start
explicitly setting the 2 databases we configure to be called `main` and
`ci`. The feature flag exists to alleviate some of this confusion as
described in https://gitlab.com/gitlab-org/gitlab/-/merge_requests/66515
 as only the ~group::sharding will need to use this for testing in the
meantime.

Prior to this change we already supported logging the `duration_s` data
per database but we did not log the count of queries. When implementing
this count of queries it became obvious it was much easier to implement
these metrics in the same way that all the other metrics were
implemented. In particular that they are logged as `0` even if there was
no matching query. This led to quite a lot of refactoring because the
list of `database_config_names` is dynamic. It's what is configured in
`config/database.yml` and we had a tonne of tests that needed the full
list of keys and values being logged.
parent 1716bf84
...@@ -158,6 +158,10 @@ module Gitlab ...@@ -158,6 +158,10 @@ module Gitlab
end end
end end
def self.db_config_names
::ActiveRecord::Base.configurations.configs_for(env_name: Rails.env).map(&:name)
end
def self.db_config_name(ar_connection) def self.db_config_name(ar_connection)
if ar_connection.respond_to?(:pool) && if ar_connection.respond_to?(:pool) &&
ar_connection.pool.respond_to?(:db_config) && ar_connection.pool.respond_to?(:db_config) &&
......
...@@ -8,17 +8,15 @@ module Gitlab ...@@ -8,17 +8,15 @@ module Gitlab
attach_to :active_record attach_to :active_record
IGNORABLE_SQL = %w{BEGIN COMMIT}.freeze IGNORABLE_SQL = %w{BEGIN COMMIT}.freeze
DB_COUNTERS = %i{db_count db_write_count db_cached_count}.freeze DB_COUNTERS = %i{count write_count cached_count}.freeze
SQL_COMMANDS_WITH_COMMENTS_REGEX = %r{\A(/\*.*\*/\s)?((?!(.*[^\w'"](DELETE|UPDATE|INSERT INTO)[^\w'"])))(WITH.*)?(SELECT)((?!(FOR UPDATE|FOR SHARE)).)*$}i.freeze SQL_COMMANDS_WITH_COMMENTS_REGEX = %r{\A(/\*.*\*/\s)?((?!(.*[^\w'"](DELETE|UPDATE|INSERT INTO)[^\w'"])))(WITH.*)?(SELECT)((?!(FOR UPDATE|FOR SHARE)).)*$}i.freeze
SQL_DURATION_BUCKET = [0.05, 0.1, 0.25].freeze SQL_DURATION_BUCKET = [0.05, 0.1, 0.25].freeze
TRANSACTION_DURATION_BUCKET = [0.1, 0.25, 1].freeze TRANSACTION_DURATION_BUCKET = [0.1, 0.25, 1].freeze
DB_LOAD_BALANCING_COUNTERS = %i{ DB_LOAD_BALANCING_ROLES = %i{replica primary}.freeze
db_replica_count db_replica_cached_count db_replica_wal_count db_replica_wal_cached_count DB_LOAD_BALANCING_COUNTERS = %i{count cached_count wal_count wal_cached_count}.freeze
db_primary_count db_primary_cached_count db_primary_wal_count db_primary_wal_cached_count DB_LOAD_BALANCING_DURATIONS = %i{duration_s}.freeze
}.freeze
DB_LOAD_BALANCING_DURATIONS = %i{db_primary_duration_s db_replica_duration_s}.freeze
SQL_WAL_LOCATION_REGEX = /(pg_current_wal_insert_lsn\(\)::text|pg_last_wal_replay_lsn\(\)::text)/.freeze SQL_WAL_LOCATION_REGEX = /(pg_current_wal_insert_lsn\(\)::text|pg_last_wal_replay_lsn\(\)::text)/.freeze
...@@ -40,9 +38,10 @@ module Gitlab ...@@ -40,9 +38,10 @@ module Gitlab
payload = event.payload payload = event.payload
return if ignored_query?(payload) return if ignored_query?(payload)
increment(:db_count) db_config_name = db_config_name(event.payload)
increment(:db_cached_count) if cached_query?(payload) increment(:count, db_config_name: db_config_name)
increment(:db_write_count) unless select_sql_command?(payload) increment(:cached_count, db_config_name: db_config_name) if cached_query?(payload)
increment(:write_count, db_config_name: db_config_name) unless select_sql_command?(payload)
observe(:gitlab_sql_duration_seconds, event) do observe(:gitlab_sql_duration_seconds, event) do
buckets SQL_DURATION_BUCKET buckets SQL_DURATION_BUCKET
...@@ -61,24 +60,17 @@ module Gitlab ...@@ -61,24 +60,17 @@ module Gitlab
return {} unless Gitlab::SafeRequestStore.active? return {} unless Gitlab::SafeRequestStore.active?
{}.tap do |payload| {}.tap do |payload|
DB_COUNTERS.each do |counter| db_counter_keys.each do |key|
payload[counter] = Gitlab::SafeRequestStore[counter].to_i payload[key] = Gitlab::SafeRequestStore[key].to_i
end end
if ::Gitlab::SafeRequestStore.active? && ::Gitlab::Database::LoadBalancing.enable? if ::Gitlab::SafeRequestStore.active? && ::Gitlab::Database::LoadBalancing.enable?
DB_LOAD_BALANCING_COUNTERS.each do |counter| load_balancing_metric_counter_keys.each do |counter|
payload[counter] = ::Gitlab::SafeRequestStore[counter].to_i payload[counter] = ::Gitlab::SafeRequestStore[counter].to_i
end end
DB_LOAD_BALANCING_DURATIONS.each do |duration|
payload[duration] = ::Gitlab::SafeRequestStore[duration].to_f.round(3)
end
if Feature.enabled?(:multiple_database_metrics, default_enabled: :yaml) load_balancing_metric_duration_keys.each do |duration|
::Gitlab::SafeRequestStore[:duration_by_database]&.each do |name, duration_by_role| payload[duration] = ::Gitlab::SafeRequestStore[duration].to_f.round(3)
duration_by_role.each do |db_role, duration|
payload[:"db_#{db_role}_#{name}_duration_s"] = duration.to_f.round(3)
end
end
end end
end end
end end
...@@ -92,12 +84,15 @@ module Gitlab ...@@ -92,12 +84,15 @@ module Gitlab
def increment_db_role_counters(db_role, payload) def increment_db_role_counters(db_role, payload)
cached = cached_query?(payload) cached = cached_query?(payload)
increment("db_#{db_role}_count".to_sym)
increment("db_#{db_role}_cached_count".to_sym) if cached db_config_name = db_config_name(payload)
increment(:count, db_role: db_role, db_config_name: db_config_name)
increment(:cached_count, db_role: db_role, db_config_name: db_config_name) if cached
if wal_command?(payload) if wal_command?(payload)
increment("db_#{db_role}_wal_count".to_sym) increment(:wal_count, db_role: db_role, db_config_name: db_config_name)
increment("db_#{db_role}_wal_cached_count".to_sym) if cached increment(:wal_cached_count, db_role: db_role, db_config_name: db_config_name) if cached
end end
end end
...@@ -109,15 +104,13 @@ module Gitlab ...@@ -109,15 +104,13 @@ module Gitlab
return unless ::Gitlab::SafeRequestStore.active? return unless ::Gitlab::SafeRequestStore.active?
duration = event.duration / 1000.0 duration = event.duration / 1000.0
duration_key = "db_#{db_role}_duration_s".to_sym duration_key = compose_metric_key(:duration_s, db_role)
::Gitlab::SafeRequestStore[duration_key] = (::Gitlab::SafeRequestStore[duration_key].presence || 0) + duration ::Gitlab::SafeRequestStore[duration_key] = (::Gitlab::SafeRequestStore[duration_key].presence || 0) + duration
# Per database metrics # Per database metrics
name = ::Gitlab::Database.db_config_name(event.payload[:connection]) db_config_name = db_config_name(event.payload)
::Gitlab::SafeRequestStore[:duration_by_database] ||= {} duration_key = compose_metric_key(:duration_s, db_role, db_config_name)
::Gitlab::SafeRequestStore[:duration_by_database][name] ||= {} ::Gitlab::SafeRequestStore[duration_key] = (::Gitlab::SafeRequestStore[duration_key].presence || 0) + duration
::Gitlab::SafeRequestStore[:duration_by_database][name][db_role] ||= 0
::Gitlab::SafeRequestStore[:duration_by_database][name][db_role] += duration
end end
def ignored_query?(payload) def ignored_query?(payload)
...@@ -132,10 +125,25 @@ module Gitlab ...@@ -132,10 +125,25 @@ module Gitlab
payload[:sql].match(SQL_COMMANDS_WITH_COMMENTS_REGEX) payload[:sql].match(SQL_COMMANDS_WITH_COMMENTS_REGEX)
end end
def increment(counter) def increment(counter, db_config_name:, db_role: nil)
current_transaction&.increment("gitlab_transaction_#{counter}_total".to_sym, 1) log_key = compose_metric_key(counter, db_role)
prometheus_key = if db_role
:"gitlab_transaction_db_#{db_role}_#{counter}_total"
else
:"gitlab_transaction_db_#{counter}_total"
end
Gitlab::SafeRequestStore[counter] = Gitlab::SafeRequestStore[counter].to_i + 1 current_transaction&.increment(prometheus_key, 1)
Gitlab::SafeRequestStore[log_key] = Gitlab::SafeRequestStore[log_key].to_i + 1
# To avoid confusing log keys we only log the db_config_name metrics
# when we are also logging the db_role. Otherwise it will be hard to
# tell if the log key is referring to a db_role OR a db_config_name.
if db_role.present? && db_config_name.present?
log_key = compose_metric_key(counter, db_role, db_config_name)
Gitlab::SafeRequestStore[log_key] = Gitlab::SafeRequestStore[log_key].to_i + 1
end
end end
def observe(histogram, event, &block) def observe(histogram, event, &block)
...@@ -145,6 +153,45 @@ module Gitlab ...@@ -145,6 +153,45 @@ module Gitlab
def current_transaction def current_transaction
::Gitlab::Metrics::WebTransaction.current || ::Gitlab::Metrics::BackgroundTransaction.current ::Gitlab::Metrics::WebTransaction.current || ::Gitlab::Metrics::BackgroundTransaction.current
end end
def db_config_name(payload)
::Gitlab::Database.db_config_name(payload[:connection])
end
def self.db_counter_keys
DB_COUNTERS.map { |c| compose_metric_key(c) }
end
def self.load_balancing_metric_counter_keys
load_balancing_metric_keys(DB_LOAD_BALANCING_COUNTERS)
end
def self.load_balancing_metric_duration_keys
load_balancing_metric_keys(DB_LOAD_BALANCING_DURATIONS)
end
def self.load_balancing_metric_keys(metrics)
[].tap do |counters|
DB_LOAD_BALANCING_ROLES.each do |role|
metrics.each do |metric|
counters << compose_metric_key(metric, role)
next unless ::Feature.enabled?(:multiple_database_metrics, default_enabled: :yaml)
::Gitlab::Database.db_config_names.each do |config_name|
counters << compose_metric_key(metric, role, config_name)
end
end
end
end
end
def compose_metric_key(metric, db_role = nil, db_config_name = nil)
self.class.compose_metric_key(metric, db_role, db_config_name)
end
def self.compose_metric_key(metric, db_role = nil, db_config_name = nil)
[:db, db_role, db_config_name, metric].compact.join("_").to_sym
end
end end
end end
end end
......
...@@ -300,47 +300,38 @@ RSpec.describe Gitlab::SidekiqLogging::StructuredLogger do ...@@ -300,47 +300,38 @@ RSpec.describe Gitlab::SidekiqLogging::StructuredLogger do
let(:db_config_name) { ::Gitlab::Database.db_config_name(ApplicationRecord.connection) } let(:db_config_name) { ::Gitlab::Database.db_config_name(ApplicationRecord.connection) }
let(:expected_db_payload_defaults) do
metrics =
::Gitlab::Metrics::Subscribers::ActiveRecord.load_balancing_metric_counter_keys +
::Gitlab::Metrics::Subscribers::ActiveRecord.load_balancing_metric_duration_keys +
::Gitlab::Metrics::Subscribers::ActiveRecord.db_counter_keys +
[:db_duration_s]
metrics.each_with_object({}) do |key, result|
result[key.to_s] = 0
end
end
let(:expected_end_payload_with_db) do let(:expected_end_payload_with_db) do
expected_end_payload.merge( expected_end_payload.merge(expected_db_payload_defaults).merge(
'db_duration_s' => a_value >= 0.1, 'db_duration_s' => a_value >= 0.1,
'db_count' => a_value >= 1, 'db_count' => a_value >= 1,
'db_cached_count' => 0, "db_replica_#{db_config_name}_count" => 0,
'db_write_count' => 0,
'db_replica_count' => 0,
'db_replica_cached_count' => 0,
'db_replica_wal_count' => 0,
'db_replica_duration_s' => a_value >= 0, 'db_replica_duration_s' => a_value >= 0,
'db_primary_count' => a_value >= 1, 'db_primary_count' => a_value >= 1,
'db_primary_cached_count' => 0, "db_primary_#{db_config_name}_count" => a_value >= 1,
'db_primary_wal_count' => 0,
'db_primary_duration_s' => a_value > 0, 'db_primary_duration_s' => a_value > 0,
"db_primary_#{db_config_name}_duration_s" => a_value > 0, "db_primary_#{db_config_name}_duration_s" => a_value > 0
'db_primary_wal_cached_count' => 0,
'db_replica_wal_cached_count' => 0
) )
end end
let(:end_payload) do let(:end_payload) do
start_payload.merge( start_payload.merge(expected_db_payload_defaults).merge(
'message' => 'TestWorker JID-da883554ee4fe414012f5f42: done: 0.0 sec', 'message' => 'TestWorker JID-da883554ee4fe414012f5f42: done: 0.0 sec',
'job_status' => 'done', 'job_status' => 'done',
'duration_s' => 0.0, 'duration_s' => 0.0,
'completed_at' => timestamp.to_f, 'completed_at' => timestamp.to_f,
'cpu_s' => 1.111112, 'cpu_s' => 1.111112
'db_duration_s' => 0.0,
'db_cached_count' => 0,
'db_count' => 0,
'db_write_count' => 0,
'db_replica_count' => 0,
'db_replica_cached_count' => 0,
'db_replica_wal_count' => 0,
'db_replica_duration_s' => 0,
'db_primary_count' => 0,
'db_primary_cached_count' => 0,
'db_primary_wal_count' => 0,
'db_primary_wal_cached_count' => 0,
'db_replica_wal_cached_count' => 0,
'db_primary_duration_s' => 0
) )
end end
......
# frozen_string_literal: true # frozen_string_literal: true
RSpec.shared_examples 'store ActiveRecord info in RequestStore' do |db_role| RSpec.shared_examples 'store ActiveRecord info in RequestStore' do |db_role|
let(:db_config_name) { ::Gitlab::Database.db_config_name(ApplicationRecord.connection) }
let(:expected_payload_defaults) do
metrics =
::Gitlab::Metrics::Subscribers::ActiveRecord.load_balancing_metric_counter_keys +
::Gitlab::Metrics::Subscribers::ActiveRecord.load_balancing_metric_duration_keys +
::Gitlab::Metrics::Subscribers::ActiveRecord.db_counter_keys
metrics.each_with_object({}) do |key, result|
result[key] = 0
end
end
it 'prevents db counters from leaking to the next transaction' do it 'prevents db counters from leaking to the next transaction' do
2.times do 2.times do
Gitlab::WithRequestStore.with_request_store do Gitlab::WithRequestStore.with_request_store do
subscriber.sql(event) subscriber.sql(event)
connection = event.payload[:connection]
# rubocop:disable Style/ConditionalAssignment
if db_role == :primary if db_role == :primary
expected = { expected = expected_payload_defaults.merge({
db_count: record_query ? 1 : 0, db_count: record_query ? 1 : 0,
db_write_count: record_write_query ? 1 : 0, db_write_count: record_write_query ? 1 : 0,
db_cached_count: record_cached_query ? 1 : 0, db_cached_count: record_cached_query ? 1 : 0,
db_primary_cached_count: record_cached_query ? 1 : 0, db_primary_cached_count: record_cached_query ? 1 : 0,
db_primary_count: record_query ? 1 : 0, "db_primary_#{db_config_name}_cached_count": record_cached_query ? 1 : 0,
db_primary_duration_s: record_query ? 0.002 : 0, db_primary_count: record_query ? 1 : 0,
db_replica_cached_count: 0, "db_primary_#{db_config_name}_count": record_query ? 1 : 0,
db_replica_count: 0, db_primary_duration_s: record_query ? 0.002 : 0,
db_replica_duration_s: 0.0, "db_primary_#{db_config_name}_duration_s": record_query ? 0.002 : 0,
db_primary_wal_count: record_wal_query ? 1 : 0, db_primary_wal_count: record_wal_query ? 1 : 0,
"db_primary_#{db_config_name}_wal_count": record_wal_query ? 1 : 0,
db_primary_wal_cached_count: record_wal_query && record_cached_query ? 1 : 0, db_primary_wal_cached_count: record_wal_query && record_cached_query ? 1 : 0,
db_replica_wal_cached_count: 0, "db_primary_#{db_config_name}_wal_cached_count": record_wal_query && record_cached_query ? 1 : 0
db_replica_wal_count: 0 })
}
expected[:"db_primary_#{::Gitlab::Database.db_config_name(connection)}_duration_s"] = 0.002 if record_query
elsif db_role == :replica elsif db_role == :replica
expected = { expected = expected_payload_defaults.merge({
db_count: record_query ? 1 : 0, db_count: record_query ? 1 : 0,
db_write_count: record_write_query ? 1 : 0, db_write_count: record_write_query ? 1 : 0,
db_cached_count: record_cached_query ? 1 : 0, db_cached_count: record_cached_query ? 1 : 0,
db_primary_cached_count: 0, db_replica_cached_count: record_cached_query ? 1 : 0,
db_primary_count: 0, "db_replica_#{db_config_name}_cached_count": record_cached_query ? 1 : 0,
db_primary_duration_s: 0.0, db_replica_count: record_query ? 1 : 0,
db_replica_cached_count: record_cached_query ? 1 : 0, "db_replica_#{db_config_name}_count": record_query ? 1 : 0,
db_replica_count: record_query ? 1 : 0, db_replica_duration_s: record_query ? 0.002 : 0,
db_replica_duration_s: record_query ? 0.002 : 0, "db_replica_#{db_config_name}_duration_s": record_query ? 0.002 : 0,
db_replica_wal_count: record_wal_query ? 1 : 0, db_replica_wal_count: record_wal_query ? 1 : 0,
"db_replica_#{db_config_name}_wal_count": record_wal_query ? 1 : 0,
db_replica_wal_cached_count: record_wal_query && record_cached_query ? 1 : 0, db_replica_wal_cached_count: record_wal_query && record_cached_query ? 1 : 0,
db_primary_wal_cached_count: 0, "db_replica_#{db_config_name}_wal_cached_count": record_wal_query && record_cached_query ? 1 : 0
db_primary_wal_count: 0 })
}
expected[:"db_replica_#{::Gitlab::Database.db_config_name(connection)}_duration_s"] = 0.002 if record_query
else else
expected = { expected = {
db_count: record_query ? 1 : 0, db_count: record_query ? 1 : 0,
...@@ -48,6 +59,7 @@ RSpec.shared_examples 'store ActiveRecord info in RequestStore' do |db_role| ...@@ -48,6 +59,7 @@ RSpec.shared_examples 'store ActiveRecord info in RequestStore' do |db_role|
db_cached_count: record_cached_query ? 1 : 0 db_cached_count: record_cached_query ? 1 : 0
} }
end end
# rubocop:enable Style/ConditionalAssignment
expect(described_class.db_counter_payload).to eq(expected) expect(described_class.db_counter_payload).to eq(expected)
end end
...@@ -62,9 +74,9 @@ RSpec.shared_examples 'store ActiveRecord info in RequestStore' do |db_role| ...@@ -62,9 +74,9 @@ RSpec.shared_examples 'store ActiveRecord info in RequestStore' do |db_role|
it 'does not include per database metrics' do it 'does not include per database metrics' do
Gitlab::WithRequestStore.with_request_store do Gitlab::WithRequestStore.with_request_store do
subscriber.sql(event) subscriber.sql(event)
connection = event.payload[:connection]
expect(described_class.db_counter_payload).not_to include(:"db_replica_#{::Gitlab::Database.db_config_name(connection)}_duration_s") expect(described_class.db_counter_payload).not_to include(:"db_replica_#{db_config_name}_duration_s")
expect(described_class.db_counter_payload).not_to include(:"db_replica_#{db_config_name}_count")
end end
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment