Commit eeec86d3 authored by Dylan Griffith's avatar Dylan Griffith

Add per DB counters to logs and refactor

This MR adds multiple new DB metrics to our logging when the
`multiple_database_counters` feature flag is enabled.

```diff
+ "db_primary_primary_cached_count": 0,
+ "db_primary_primary_count": 0,
+ "db_primary_primary_duration_s": 0,
+ "db_replica_primary_cached_count": 0,
+ "db_replica_primary_count": 0,
+ "db_replica_primary_duration_s": 0,
+ "db_primary_primary_wal_count": 0,
+ "db_primary_primary_wal_cached_count": 0,
+ "db_replica_primary_wal_cached_count": 0,
+ "db_replica_primary_wal_count": 0,
+ "db_replica_primary_duration_s": 0,
+ "db_primary_primary_duration_s": 0,
```

These new fields indicate how many queries went to the database named
`primary`. `primary` happens to be the default name Rails gives to the
database connections described in `config/database.yml`. This is
unfortunate as it conflicts with the `primary/replica` language we use
to distinguish between read/write and read-only servers (hence
`primary_primary` in the key) but this will change when we start
explicitly setting the 2 databases we configure to be called `main` and
`ci`. The feature flag exists to alleviate some of this confusion as
described in https://gitlab.com/gitlab-org/gitlab/-/merge_requests/66515
 as only the ~group::sharding will need to use this for testing in the
meantime.

Prior to this change we already supported logging the `duration_s` data
per database but we did not log the count of queries. When implementing
this count of queries it became obvious it was much easier to implement
these metrics in the same way that all the other metrics were
implemented. In particular that they are logged as `0` even if there was
no matching query. This led to quite a lot of refactoring because the
list of `database_config_names` is dynamic. It's what is configured in
`config/database.yml` and we had a tonne of tests that needed the full
list of keys and values being logged.
parent 1716bf84
......@@ -158,6 +158,10 @@ module Gitlab
end
end
def self.db_config_names
::ActiveRecord::Base.configurations.configs_for(env_name: Rails.env).map(&:name)
end
def self.db_config_name(ar_connection)
if ar_connection.respond_to?(:pool) &&
ar_connection.pool.respond_to?(:db_config) &&
......
......@@ -8,17 +8,15 @@ module Gitlab
attach_to :active_record
IGNORABLE_SQL = %w{BEGIN COMMIT}.freeze
DB_COUNTERS = %i{db_count db_write_count db_cached_count}.freeze
DB_COUNTERS = %i{count write_count cached_count}.freeze
SQL_COMMANDS_WITH_COMMENTS_REGEX = %r{\A(/\*.*\*/\s)?((?!(.*[^\w'"](DELETE|UPDATE|INSERT INTO)[^\w'"])))(WITH.*)?(SELECT)((?!(FOR UPDATE|FOR SHARE)).)*$}i.freeze
SQL_DURATION_BUCKET = [0.05, 0.1, 0.25].freeze
TRANSACTION_DURATION_BUCKET = [0.1, 0.25, 1].freeze
DB_LOAD_BALANCING_COUNTERS = %i{
db_replica_count db_replica_cached_count db_replica_wal_count db_replica_wal_cached_count
db_primary_count db_primary_cached_count db_primary_wal_count db_primary_wal_cached_count
}.freeze
DB_LOAD_BALANCING_DURATIONS = %i{db_primary_duration_s db_replica_duration_s}.freeze
DB_LOAD_BALANCING_ROLES = %i{replica primary}.freeze
DB_LOAD_BALANCING_COUNTERS = %i{count cached_count wal_count wal_cached_count}.freeze
DB_LOAD_BALANCING_DURATIONS = %i{duration_s}.freeze
SQL_WAL_LOCATION_REGEX = /(pg_current_wal_insert_lsn\(\)::text|pg_last_wal_replay_lsn\(\)::text)/.freeze
......@@ -40,9 +38,10 @@ module Gitlab
payload = event.payload
return if ignored_query?(payload)
increment(:db_count)
increment(:db_cached_count) if cached_query?(payload)
increment(:db_write_count) unless select_sql_command?(payload)
db_config_name = db_config_name(event.payload)
increment(:count, db_config_name: db_config_name)
increment(:cached_count, db_config_name: db_config_name) if cached_query?(payload)
increment(:write_count, db_config_name: db_config_name) unless select_sql_command?(payload)
observe(:gitlab_sql_duration_seconds, event) do
buckets SQL_DURATION_BUCKET
......@@ -61,24 +60,17 @@ module Gitlab
return {} unless Gitlab::SafeRequestStore.active?
{}.tap do |payload|
DB_COUNTERS.each do |counter|
payload[counter] = Gitlab::SafeRequestStore[counter].to_i
db_counter_keys.each do |key|
payload[key] = Gitlab::SafeRequestStore[key].to_i
end
if ::Gitlab::SafeRequestStore.active? && ::Gitlab::Database::LoadBalancing.enable?
DB_LOAD_BALANCING_COUNTERS.each do |counter|
load_balancing_metric_counter_keys.each do |counter|
payload[counter] = ::Gitlab::SafeRequestStore[counter].to_i
end
DB_LOAD_BALANCING_DURATIONS.each do |duration|
payload[duration] = ::Gitlab::SafeRequestStore[duration].to_f.round(3)
end
if Feature.enabled?(:multiple_database_metrics, default_enabled: :yaml)
::Gitlab::SafeRequestStore[:duration_by_database]&.each do |name, duration_by_role|
duration_by_role.each do |db_role, duration|
payload[:"db_#{db_role}_#{name}_duration_s"] = duration.to_f.round(3)
end
end
load_balancing_metric_duration_keys.each do |duration|
payload[duration] = ::Gitlab::SafeRequestStore[duration].to_f.round(3)
end
end
end
......@@ -92,12 +84,15 @@ module Gitlab
def increment_db_role_counters(db_role, payload)
cached = cached_query?(payload)
increment("db_#{db_role}_count".to_sym)
increment("db_#{db_role}_cached_count".to_sym) if cached
db_config_name = db_config_name(payload)
increment(:count, db_role: db_role, db_config_name: db_config_name)
increment(:cached_count, db_role: db_role, db_config_name: db_config_name) if cached
if wal_command?(payload)
increment("db_#{db_role}_wal_count".to_sym)
increment("db_#{db_role}_wal_cached_count".to_sym) if cached
increment(:wal_count, db_role: db_role, db_config_name: db_config_name)
increment(:wal_cached_count, db_role: db_role, db_config_name: db_config_name) if cached
end
end
......@@ -109,15 +104,13 @@ module Gitlab
return unless ::Gitlab::SafeRequestStore.active?
duration = event.duration / 1000.0
duration_key = "db_#{db_role}_duration_s".to_sym
duration_key = compose_metric_key(:duration_s, db_role)
::Gitlab::SafeRequestStore[duration_key] = (::Gitlab::SafeRequestStore[duration_key].presence || 0) + duration
# Per database metrics
name = ::Gitlab::Database.db_config_name(event.payload[:connection])
::Gitlab::SafeRequestStore[:duration_by_database] ||= {}
::Gitlab::SafeRequestStore[:duration_by_database][name] ||= {}
::Gitlab::SafeRequestStore[:duration_by_database][name][db_role] ||= 0
::Gitlab::SafeRequestStore[:duration_by_database][name][db_role] += duration
db_config_name = db_config_name(event.payload)
duration_key = compose_metric_key(:duration_s, db_role, db_config_name)
::Gitlab::SafeRequestStore[duration_key] = (::Gitlab::SafeRequestStore[duration_key].presence || 0) + duration
end
def ignored_query?(payload)
......@@ -132,10 +125,25 @@ module Gitlab
payload[:sql].match(SQL_COMMANDS_WITH_COMMENTS_REGEX)
end
def increment(counter)
current_transaction&.increment("gitlab_transaction_#{counter}_total".to_sym, 1)
def increment(counter, db_config_name:, db_role: nil)
log_key = compose_metric_key(counter, db_role)
Gitlab::SafeRequestStore[counter] = Gitlab::SafeRequestStore[counter].to_i + 1
prometheus_key = if db_role
:"gitlab_transaction_db_#{db_role}_#{counter}_total"
else
:"gitlab_transaction_db_#{counter}_total"
end
current_transaction&.increment(prometheus_key, 1)
Gitlab::SafeRequestStore[log_key] = Gitlab::SafeRequestStore[log_key].to_i + 1
# To avoid confusing log keys we only log the db_config_name metrics
# when we are also logging the db_role. Otherwise it will be hard to
# tell if the log key is referring to a db_role OR a db_config_name.
if db_role.present? && db_config_name.present?
log_key = compose_metric_key(counter, db_role, db_config_name)
Gitlab::SafeRequestStore[log_key] = Gitlab::SafeRequestStore[log_key].to_i + 1
end
end
def observe(histogram, event, &block)
......@@ -145,6 +153,45 @@ module Gitlab
def current_transaction
::Gitlab::Metrics::WebTransaction.current || ::Gitlab::Metrics::BackgroundTransaction.current
end
def db_config_name(payload)
::Gitlab::Database.db_config_name(payload[:connection])
end
def self.db_counter_keys
DB_COUNTERS.map { |c| compose_metric_key(c) }
end
def self.load_balancing_metric_counter_keys
load_balancing_metric_keys(DB_LOAD_BALANCING_COUNTERS)
end
def self.load_balancing_metric_duration_keys
load_balancing_metric_keys(DB_LOAD_BALANCING_DURATIONS)
end
def self.load_balancing_metric_keys(metrics)
[].tap do |counters|
DB_LOAD_BALANCING_ROLES.each do |role|
metrics.each do |metric|
counters << compose_metric_key(metric, role)
next unless ::Feature.enabled?(:multiple_database_metrics, default_enabled: :yaml)
::Gitlab::Database.db_config_names.each do |config_name|
counters << compose_metric_key(metric, role, config_name)
end
end
end
end
end
def compose_metric_key(metric, db_role = nil, db_config_name = nil)
self.class.compose_metric_key(metric, db_role, db_config_name)
end
def self.compose_metric_key(metric, db_role = nil, db_config_name = nil)
[:db, db_role, db_config_name, metric].compact.join("_").to_sym
end
end
end
end
......
......@@ -300,47 +300,38 @@ RSpec.describe Gitlab::SidekiqLogging::StructuredLogger do
let(:db_config_name) { ::Gitlab::Database.db_config_name(ApplicationRecord.connection) }
let(:expected_db_payload_defaults) do
metrics =
::Gitlab::Metrics::Subscribers::ActiveRecord.load_balancing_metric_counter_keys +
::Gitlab::Metrics::Subscribers::ActiveRecord.load_balancing_metric_duration_keys +
::Gitlab::Metrics::Subscribers::ActiveRecord.db_counter_keys +
[:db_duration_s]
metrics.each_with_object({}) do |key, result|
result[key.to_s] = 0
end
end
let(:expected_end_payload_with_db) do
expected_end_payload.merge(
expected_end_payload.merge(expected_db_payload_defaults).merge(
'db_duration_s' => a_value >= 0.1,
'db_count' => a_value >= 1,
'db_cached_count' => 0,
'db_write_count' => 0,
'db_replica_count' => 0,
'db_replica_cached_count' => 0,
'db_replica_wal_count' => 0,
"db_replica_#{db_config_name}_count" => 0,
'db_replica_duration_s' => a_value >= 0,
'db_primary_count' => a_value >= 1,
'db_primary_cached_count' => 0,
'db_primary_wal_count' => 0,
"db_primary_#{db_config_name}_count" => a_value >= 1,
'db_primary_duration_s' => a_value > 0,
"db_primary_#{db_config_name}_duration_s" => a_value > 0,
'db_primary_wal_cached_count' => 0,
'db_replica_wal_cached_count' => 0
"db_primary_#{db_config_name}_duration_s" => a_value > 0
)
end
let(:end_payload) do
start_payload.merge(
start_payload.merge(expected_db_payload_defaults).merge(
'message' => 'TestWorker JID-da883554ee4fe414012f5f42: done: 0.0 sec',
'job_status' => 'done',
'duration_s' => 0.0,
'completed_at' => timestamp.to_f,
'cpu_s' => 1.111112,
'db_duration_s' => 0.0,
'db_cached_count' => 0,
'db_count' => 0,
'db_write_count' => 0,
'db_replica_count' => 0,
'db_replica_cached_count' => 0,
'db_replica_wal_count' => 0,
'db_replica_duration_s' => 0,
'db_primary_count' => 0,
'db_primary_cached_count' => 0,
'db_primary_wal_count' => 0,
'db_primary_wal_cached_count' => 0,
'db_replica_wal_cached_count' => 0,
'db_primary_duration_s' => 0
'cpu_s' => 1.111112
)
end
......
# frozen_string_literal: true
RSpec.shared_examples 'store ActiveRecord info in RequestStore' do |db_role|
let(:db_config_name) { ::Gitlab::Database.db_config_name(ApplicationRecord.connection) }
let(:expected_payload_defaults) do
metrics =
::Gitlab::Metrics::Subscribers::ActiveRecord.load_balancing_metric_counter_keys +
::Gitlab::Metrics::Subscribers::ActiveRecord.load_balancing_metric_duration_keys +
::Gitlab::Metrics::Subscribers::ActiveRecord.db_counter_keys
metrics.each_with_object({}) do |key, result|
result[key] = 0
end
end
it 'prevents db counters from leaking to the next transaction' do
2.times do
Gitlab::WithRequestStore.with_request_store do
subscriber.sql(event)
connection = event.payload[:connection]
# rubocop:disable Style/ConditionalAssignment
if db_role == :primary
expected = {
expected = expected_payload_defaults.merge({
db_count: record_query ? 1 : 0,
db_write_count: record_write_query ? 1 : 0,
db_cached_count: record_cached_query ? 1 : 0,
db_primary_cached_count: record_cached_query ? 1 : 0,
"db_primary_#{db_config_name}_cached_count": record_cached_query ? 1 : 0,
db_primary_count: record_query ? 1 : 0,
"db_primary_#{db_config_name}_count": record_query ? 1 : 0,
db_primary_duration_s: record_query ? 0.002 : 0,
db_replica_cached_count: 0,
db_replica_count: 0,
db_replica_duration_s: 0.0,
"db_primary_#{db_config_name}_duration_s": record_query ? 0.002 : 0,
db_primary_wal_count: record_wal_query ? 1 : 0,
"db_primary_#{db_config_name}_wal_count": record_wal_query ? 1 : 0,
db_primary_wal_cached_count: record_wal_query && record_cached_query ? 1 : 0,
db_replica_wal_cached_count: 0,
db_replica_wal_count: 0
}
expected[:"db_primary_#{::Gitlab::Database.db_config_name(connection)}_duration_s"] = 0.002 if record_query
"db_primary_#{db_config_name}_wal_cached_count": record_wal_query && record_cached_query ? 1 : 0
})
elsif db_role == :replica
expected = {
expected = expected_payload_defaults.merge({
db_count: record_query ? 1 : 0,
db_write_count: record_write_query ? 1 : 0,
db_cached_count: record_cached_query ? 1 : 0,
db_primary_cached_count: 0,
db_primary_count: 0,
db_primary_duration_s: 0.0,
db_replica_cached_count: record_cached_query ? 1 : 0,
"db_replica_#{db_config_name}_cached_count": record_cached_query ? 1 : 0,
db_replica_count: record_query ? 1 : 0,
"db_replica_#{db_config_name}_count": record_query ? 1 : 0,
db_replica_duration_s: record_query ? 0.002 : 0,
"db_replica_#{db_config_name}_duration_s": record_query ? 0.002 : 0,
db_replica_wal_count: record_wal_query ? 1 : 0,
"db_replica_#{db_config_name}_wal_count": record_wal_query ? 1 : 0,
db_replica_wal_cached_count: record_wal_query && record_cached_query ? 1 : 0,
db_primary_wal_cached_count: 0,
db_primary_wal_count: 0
}
expected[:"db_replica_#{::Gitlab::Database.db_config_name(connection)}_duration_s"] = 0.002 if record_query
"db_replica_#{db_config_name}_wal_cached_count": record_wal_query && record_cached_query ? 1 : 0
})
else
expected = {
db_count: record_query ? 1 : 0,
......@@ -48,6 +59,7 @@ RSpec.shared_examples 'store ActiveRecord info in RequestStore' do |db_role|
db_cached_count: record_cached_query ? 1 : 0
}
end
# rubocop:enable Style/ConditionalAssignment
expect(described_class.db_counter_payload).to eq(expected)
end
......@@ -62,9 +74,9 @@ RSpec.shared_examples 'store ActiveRecord info in RequestStore' do |db_role|
it 'does not include per database metrics' do
Gitlab::WithRequestStore.with_request_store do
subscriber.sql(event)
connection = event.payload[:connection]
expect(described_class.db_counter_payload).not_to include(:"db_replica_#{::Gitlab::Database.db_config_name(connection)}_duration_s")
expect(described_class.db_counter_payload).not_to include(:"db_replica_#{db_config_name}_duration_s")
expect(described_class.db_counter_payload).not_to include(:"db_replica_#{db_config_name}_count")
end
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment