Commit 71dfd193 authored by Kamil Trzciński's avatar Kamil Trzciński

Add `QueryAnalyzers::GitlabSchemasMetrics` to observe used schemas

This analyzers output is Prometheus metrics observing connections
vs gitlab schemas of executed queries.

It will present well in a single metric how many connections
do cross-join or are misplaced once additional databases
are configured.
parent 717a1010
---
name: query_analyzer_gitlab_schema_metrics
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/73839
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/345034
milestone: '14.5'
type: development
group: group::sharding
default_enabled: false
......@@ -3,6 +3,7 @@
# Currently we register validator only for `dev` or `test` environment
if Gitlab.dev_or_test_env? || Gitlab::Utils.to_boolean('GITLAB_ENABLE_QUERY_ANALYZERS', default: false)
Gitlab::Database::QueryAnalyzer.instance.hook!
Gitlab::Database::QueryAnalyzer.instance.all_analyzers.append(::Gitlab::Database::QueryAnalyzers::GitlabSchemasMetrics)
Gitlab::Application.configure do |config|
config.middleware.use(Gitlab::Middleware::QueryAnalyzer)
......
# frozen_string_literal: true
module Gitlab
module Database
module QueryAnalyzers
# The purpose of this analyzer is to observe via prometheus metrics
# all unique schemas observed on a given connection
#
# This effectively allows to do sample 1% or 0.01% of queries hitting
# system and observe if on a given connection we observe queries that
# are misaligned (`ci_replica` sees queries doing accessing only `gitlab_main`)
#
class GitlabSchemasMetrics < Base
class << self
def enabled?(_connection)
Feature.enabled?(:query_analyzer_gitlab_schema_metrics)
end
def analyze(parsed)
db_config_name = ::Gitlab::Database.db_config_name(parsed.connection)
return unless db_config_name
gitlab_schemas = ::Gitlab::Database::GitlabSchema.table_schemas(parsed.pg.tables)
return if gitlab_schemas.empty?
# to reduce amount of labels sort schemas used
gitlab_schemas = gitlab_schemas.to_a.sort.join(",")
schemas_metrics.increment({
gitlab_schemas: gitlab_schemas,
db_config_name: db_config_name
})
end
def schemas_metrics
@schemas_metrics ||= ::Gitlab::Metrics.counter(
:gitlab_database_decomposition_gitlab_schemas_used,
'The number of observed schemas dependent on connection'
)
end
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Database::QueryAnalyzers::GitlabSchemasMetrics do
let(:analyzer) { described_class }
before do
stub_const('Gitlab::Database::QueryAnalyzer::ANALYZERS', [analyzer])
end
it 'does not increment metrics if feature flag is disabled' do
stub_feature_flags(query_analyzer_gitlab_schema_metrics: false)
expect(analyzer).not_to receive(:analyze)
process_sql(ActiveRecord::Base, "SELECT 1 FROM projects")
end
context 'properly observes all queries', :mocked_ci_connection do
using RSpec::Parameterized::TableSyntax
where do
{
"for simple query observes schema correctly" => {
model: ApplicationRecord,
sql: "SELECT 1 FROM projects",
expectations: {
gitlab_schemas: "gitlab_main",
db_config_name: "main"
}
},
"for query accessing gitlab_ci and gitlab_main" => {
model: ApplicationRecord,
sql: "SELECT 1 FROM projects LEFT JOIN ci_builds ON ci_builds.project_id=projects.id",
expectations: {
gitlab_schemas: "gitlab_ci,gitlab_main",
db_config_name: "main"
}
},
"for query accessing gitlab_ci and gitlab_main the gitlab_schemas is always ordered" => {
model: ApplicationRecord,
sql: "SELECT 1 FROM ci_builds LEFT JOIN projects ON ci_builds.project_id=projects.id",
expectations: {
gitlab_schemas: "gitlab_ci,gitlab_main",
db_config_name: "main"
}
},
"for query accessing CI database" => {
model: Ci::ApplicationRecord,
sql: "SELECT 1 FROM ci_builds",
expectations: {
gitlab_schemas: "gitlab_ci",
db_config_name: "ci"
}
}
}
end
with_them do
it do
expect(described_class.schemas_metrics).to receive(:increment)
.with(expectations).and_call_original
process_sql(model, sql)
end
end
end
def process_sql(model, sql)
model.connection.load_balancer.read_write do |connection|
Gitlab::Database::QueryAnalyzer.new.process_sql(sql, connection)
end
end
end
......@@ -6,6 +6,18 @@ module Database
skip 'Skipping because multiple databases not set up' unless Gitlab::Database.has_config?(:ci)
end
def reconfigure_db_connection(name: nil, config_hash: {}, model: ActiveRecord::Base, config_model: nil)
db_config = (config_model || model).connection_db_config
new_db_config = ActiveRecord::DatabaseConfigurations::HashConfig.new(
db_config.env_name,
name ? name.to_s : db_config.name,
db_config.configuration_hash.merge(config_hash)
)
model.establish_connection(new_db_config)
end
# The usage of this method switches temporarily used `connection_handler`
# allowing full manipulation of ActiveRecord::Base connections without
# having side effects like:
......@@ -56,6 +68,19 @@ RSpec.configure do |config|
example.run
end
end
config.around(:each, :mocked_ci_connection) do |example|
with_reestablished_active_record_base(reconnect: true) do
reconfigure_db_connection(
name: :ci,
model: Ci::ApplicationRecord,
config_model: ActiveRecord::Base
)
example.run
end
end
end
ActiveRecord::Base.singleton_class.prepend(::Database::ActiveRecordBaseEstablishConnection) # rubocop:disable Database/MultipleDatabases
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment