Commit 0d273430 authored by Thong Kuah's avatar Thong Kuah

Merge branch 'restrict-migrations' into 'master'

Add `restrict_gitlab_migration` to limit when a given migration is executed in context of decomposed databases

See merge request gitlab-org/gitlab!73756
parents 05425d79 6e59c54e
...@@ -231,12 +231,26 @@ module Gitlab ...@@ -231,12 +231,26 @@ module Gitlab
::ActiveRecord::Base.configurations.configs_for(env_name: Rails.env).map(&:name) ::ActiveRecord::Base.configurations.configs_for(env_name: Rails.env).map(&:name)
end end
# This returns all matching schemas that a given connection can use
# Since the `ActiveRecord::Base` might change the connection (from main to ci)
# This does not look at literal connection names, but rather compares
# models that are holders for a given db_config_name
def self.gitlab_schemas_for_connection(connection)
connection_name = self.db_config_name(connection)
primary_model = self.database_base_models.fetch(connection_name)
self.schemas_to_base_models
.select { |_, models| models.include?(primary_model) }
.keys
.map!(&:to_sym)
end
def self.db_config_for_connection(connection) def self.db_config_for_connection(connection)
return unless connection return unless connection
# The LB connection proxy does not have a direct db_config if connection.is_a?(::Gitlab::Database::LoadBalancing::ConnectionProxy)
# that can be referenced return connection.load_balancer.configuration.primary_db_config
return if connection.is_a?(::Gitlab::Database::LoadBalancing::ConnectionProxy) end
# During application init we might receive `NullPool` # During application init we might receive `NullPool`
return unless connection.respond_to?(:pool) && return unless connection.respond_to?(:pool) &&
......
...@@ -5,6 +5,8 @@ module Gitlab ...@@ -5,6 +5,8 @@ module Gitlab
module AsyncIndexes module AsyncIndexes
module MigrationHelpers module MigrationHelpers
def unprepare_async_index(table_name, column_name, **options) def unprepare_async_index(table_name, column_name, **options)
Gitlab::Database::QueryAnalyzers::RestrictAllowedSchemas.require_ddl_mode!
return unless async_index_creation_available? return unless async_index_creation_available?
index_name = options[:name] || index_name(table_name, column_name) index_name = options[:name] || index_name(table_name, column_name)
...@@ -15,6 +17,8 @@ module Gitlab ...@@ -15,6 +17,8 @@ module Gitlab
end end
def unprepare_async_index_by_name(table_name, index_name, **options) def unprepare_async_index_by_name(table_name, index_name, **options)
Gitlab::Database::QueryAnalyzers::RestrictAllowedSchemas.require_ddl_mode!
return unless async_index_creation_available? return unless async_index_creation_available?
PostgresAsyncIndex.find_by(name: index_name).try do |async_index| PostgresAsyncIndex.find_by(name: index_name).try do |async_index|
...@@ -32,6 +36,8 @@ module Gitlab ...@@ -32,6 +36,8 @@ module Gitlab
# If the requested index has already been created, it is not stored in the table for # If the requested index has already been created, it is not stored in the table for
# asynchronous creation. # asynchronous creation.
def prepare_async_index(table_name, column_name, **options) def prepare_async_index(table_name, column_name, **options)
Gitlab::Database::QueryAnalyzers::RestrictAllowedSchemas.require_ddl_mode!
return unless async_index_creation_available? return unless async_index_creation_available?
index_name = options[:name] || index_name(table_name, column_name) index_name = options[:name] || index_name(table_name, column_name)
......
...@@ -95,6 +95,10 @@ module Gitlab ...@@ -95,6 +95,10 @@ module Gitlab
def self.tables_to_schema def self.tables_to_schema
@tables_to_schema ||= YAML.load_file(Rails.root.join('lib/gitlab/database/gitlab_schemas.yml')) @tables_to_schema ||= YAML.load_file(Rails.root.join('lib/gitlab/database/gitlab_schemas.yml'))
end end
def self.schema_names
@schema_names ||= self.tables_to_schema.values.to_set
end
end end
end end
end end
...@@ -47,6 +47,8 @@ module Gitlab ...@@ -47,6 +47,8 @@ module Gitlab
# Returns the role (primary/replica) of the database the connection is # Returns the role (primary/replica) of the database the connection is
# connecting to. # connecting to.
def self.db_role_for_connection(connection) def self.db_role_for_connection(connection)
return ROLE_UNKNOWN if connection.is_a?(::Gitlab::Database::LoadBalancing::ConnectionProxy)
db_config = Database.db_config_for_connection(connection) db_config = Database.db_config_for_connection(connection)
return ROLE_UNKNOWN unless db_config return ROLE_UNKNOWN unless db_config
......
...@@ -94,6 +94,10 @@ module Gitlab ...@@ -94,6 +94,10 @@ module Gitlab
end end
end end
def primary_db_config
primary_model_or_model_if_enabled.connection_db_config
end
def replica_db_config def replica_db_config
@model.connection_db_config @model.connection_db_config
end end
......
# frozen_string_literal: true
module Gitlab
module Database
module MigrationHelpers
module RestrictGitlabSchema
extend ActiveSupport::Concern
MigrationSkippedError = Class.new(StandardError)
included do
class_attribute :allowed_gitlab_schemas
end
class_methods do
def restrict_gitlab_migration(gitlab_schema:)
unless Gitlab::Database::GitlabSchema.schema_names.include?(gitlab_schema)
raise "Unknown 'gitlab_schema: #{gitlab_schema}' specified. It needs to be one of: " \
"#{Gitlab::Database::GitlabSchema.schema_names.to_a}"
end
self.allowed_gitlab_schemas = [gitlab_schema]
end
end
def migrate(direction)
if unmatched_schemas.any?
# TODO: Today skipping migration would raise an exception.
# Ideally, skipped migration should be ignored (not loaded), or softly ignored.
# Read more in: https://gitlab.com/gitlab-org/gitlab/-/issues/355014
raise MigrationSkippedError, "Current migration is skipped since it modifies "\
"'#{self.class.allowed_gitlab_schemas}' which is outside of '#{allowed_schemas_for_connection}'"
end
Gitlab::Database::QueryAnalyzer.instance.within([validator_class]) do
validator_class.allowed_gitlab_schemas = self.allowed_gitlab_schemas
super
end
end
private
def validator_class
Gitlab::Database::QueryAnalyzers::RestrictAllowedSchemas
end
def unmatched_schemas
(self.allowed_gitlab_schemas || []) - allowed_schemas_for_connection
end
def allowed_schemas_for_connection
Gitlab::Database.gitlab_schemas_for_connection(connection)
end
end
end
end
end
...@@ -30,13 +30,17 @@ module Gitlab ...@@ -30,13 +30,17 @@ module Gitlab
end end
end end
def within def within(user_analyzers = nil)
# Due to singleton nature of analyzers # Due to singleton nature of analyzers
# only an outer invocation of the `.within` # only an outer invocation of the `.within`
# is allowed to initialize them # is allowed to initialize them
return yield if already_within? if already_within?
raise 'Query analyzers are already defined, cannot re-define them.' if user_analyzers
begin! return yield
end
begin!(user_analyzers || all_analyzers)
begin begin
yield yield
...@@ -61,21 +65,21 @@ module Gitlab ...@@ -61,21 +65,21 @@ module Gitlab
next if analyzer.suppressed? && !analyzer.requires_tracking?(parsed) next if analyzer.suppressed? && !analyzer.requires_tracking?(parsed)
analyzer.analyze(parsed) analyzer.analyze(parsed)
rescue StandardError, QueryAnalyzers::Base::QueryAnalyzerError => e rescue StandardError, ::Gitlab::Database::QueryAnalyzers::Base::QueryAnalyzerError => e
# We catch all standard errors to prevent validation errors to introduce fatal errors in production # We catch all standard errors to prevent validation errors to introduce fatal errors in production
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e) Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e)
end end
end end
# Enable query analyzers # Enable query analyzers
def begin! def begin!(analyzers = all_analyzers)
analyzers = all_analyzers.select do |analyzer| analyzers = analyzers.select do |analyzer|
if analyzer.enabled? if analyzer.enabled?
analyzer.begin! analyzer.begin!
true true
end end
rescue StandardError, QueryAnalyzers::Base::QueryAnalyzerError => e rescue StandardError, ::Gitlab::Database::QueryAnalyzers::Base::QueryAnalyzerError => e
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e) Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e)
false false
...@@ -88,7 +92,7 @@ module Gitlab ...@@ -88,7 +92,7 @@ module Gitlab
def end! def end!
enabled_analyzers.select do |analyzer| enabled_analyzers.select do |analyzer|
analyzer.end! analyzer.end!
rescue StandardError, QueryAnalyzers::Base::QueryAnalyzerError => e rescue StandardError, ::Gitlab::Database::QueryAnalyzers::Base::QueryAnalyzerError => e
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e) Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e)
end end
......
# frozen_string_literal: true
module Gitlab
module Database
module QueryAnalyzers
class RestrictAllowedSchemas < Base
UnsupportedSchemaError = Class.new(QueryAnalyzerError)
DDLNotAllowedError = Class.new(UnsupportedSchemaError)
DMLNotAllowedError = Class.new(UnsupportedSchemaError)
DMLAccessDeniedError = Class.new(UnsupportedSchemaError)
IGNORED_SCHEMAS = %i[gitlab_shared].freeze
class << self
def enabled?
true
end
def allowed_gitlab_schemas
self.context[:allowed_gitlab_schemas]
end
def allowed_gitlab_schemas=(value)
self.context[:allowed_gitlab_schemas] = value
end
def analyze(parsed)
# If list of schemas is empty, we allow only DDL changes
if self.dml_mode?
self.restrict_to_dml_only(parsed)
else
self.restrict_to_ddl_only(parsed)
end
end
def require_ddl_mode!(message = "")
return unless self.context
self.raise_dml_not_allowed_error(message) if self.dml_mode?
end
def require_dml_mode!(message = "")
return unless self.context
self.raise_ddl_not_allowed_error(message) if self.ddl_mode?
end
private
def restrict_to_ddl_only(parsed)
tables = self.dml_tables(parsed)
schemas = self.dml_schemas(tables)
if schemas.any?
self.raise_dml_not_allowed_error("Modifying of '#{tables}' (#{schemas.to_a}) with '#{parsed.sql}'")
end
end
def restrict_to_dml_only(parsed)
if parsed.pg.ddl_tables.any?
self.raise_ddl_not_allowed_error("Modifying of '#{parsed.pg.ddl_tables}' with '#{parsed.sql}'")
end
if parsed.pg.ddl_functions.any?
self.raise_ddl_not_allowed_error("Modifying of '#{parsed.pg.ddl_functions}' with '#{parsed.sql}'")
end
tables = self.dml_tables(parsed)
schemas = self.dml_schemas(tables)
if (schemas - self.allowed_gitlab_schemas).any?
raise DMLAccessDeniedError, "Select/DML queries (SELECT/UPDATE/DELETE) do access '#{tables}' (#{schemas.to_a}) " \
"which is outside of list of allowed schemas: '#{self.allowed_gitlab_schemas}'."
end
end
def dml_mode?
self.allowed_gitlab_schemas&.any?
end
def ddl_mode?
!self.dml_mode?
end
def dml_tables(parsed)
parsed.pg.select_tables + parsed.pg.dml_tables
end
def dml_schemas(tables)
extra_schemas = ::Gitlab::Database::GitlabSchema.table_schemas(tables)
extra_schemas.subtract(IGNORED_SCHEMAS)
extra_schemas
end
def raise_dml_not_allowed_error(message)
raise DMLNotAllowedError, "Select/DML queries (SELECT/UPDATE/DELETE) are disallowed in the DDL (structure) mode. #{message}"
end
def raise_ddl_not_allowed_error(message)
raise DDLNotAllowedError, "DDL queries (structure) are disallowed in the Select/DML (SELECT/UPDATE/DELETE) mode. #{message}"
end
end
end
end
end
end
...@@ -92,6 +92,18 @@ RSpec.describe Gitlab::Database::LoadBalancing do ...@@ -92,6 +92,18 @@ RSpec.describe Gitlab::Database::LoadBalancing do
end end
end end
context 'when an invalid connection is used' do
it 'returns :unknown' do
expect(described_class.db_role_for_connection(:invalid)).to eq(:unknown)
end
end
context 'when a null connection is used' do
it 'returns :unknown' do
expect(described_class.db_role_for_connection(nil)).to eq(:unknown)
end
end
context 'when a read connection is used' do context 'when a read connection is used' do
it 'returns :replica' do it 'returns :replica' do
load_balancer.read do |connection| load_balancer.read do |connection|
......
...@@ -4,6 +4,7 @@ require 'spec_helper' ...@@ -4,6 +4,7 @@ require 'spec_helper'
RSpec.describe Gitlab::Database::QueryAnalyzer, query_analyzers: false do RSpec.describe Gitlab::Database::QueryAnalyzer, query_analyzers: false do
let(:analyzer) { double(:query_analyzer) } let(:analyzer) { double(:query_analyzer) }
let(:user_analyzer) { double(:query_analyzer) }
let(:disabled_analyzer) { double(:disabled_query_analyzer) } let(:disabled_analyzer) { double(:disabled_query_analyzer) }
before do before do
...@@ -53,6 +54,10 @@ RSpec.describe Gitlab::Database::QueryAnalyzer, query_analyzers: false do ...@@ -53,6 +54,10 @@ RSpec.describe Gitlab::Database::QueryAnalyzer, query_analyzers: false do
expect { |b| described_class.instance.within(&b) }.to yield_control expect { |b| described_class.instance.within(&b) }.to yield_control
end end
it 'raises exception when trying to re-define analyzers' do
expect { |b| described_class.instance.within([user_analyzer], &b) }.to raise_error /Query analyzers are already defined, cannot re-define them/
end
end end
context 'when initializer is enabled' do context 'when initializer is enabled' do
...@@ -75,6 +80,18 @@ RSpec.describe Gitlab::Database::QueryAnalyzer, query_analyzers: false do ...@@ -75,6 +80,18 @@ RSpec.describe Gitlab::Database::QueryAnalyzer, query_analyzers: false do
expect { |b| described_class.instance.within(&b) }.to yield_control expect { |b| described_class.instance.within(&b) }.to yield_control
end end
end end
context 'when user analyzers are used' do
it 'calls begin! and end!' do
expect(analyzer).not_to receive(:begin!)
allow(user_analyzer).to receive(:enabled?).and_return(true)
allow(user_analyzer).to receive(:suppressed?).and_return(false)
expect(user_analyzer).to receive(:begin!)
expect(user_analyzer).to receive(:end!)
expect { |b| described_class.instance.within([user_analyzer], &b) }.to yield_control
end
end
end end
describe '#process_sql' do describe '#process_sql' do
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Database::QueryAnalyzers::RestrictAllowedSchemas, query_analyzers: false do
let(:analyzer) { described_class }
context 'properly analyzes queries' do
using RSpec::Parameterized::TableSyntax
where do
examples = {
"for SELECT on projects" => {
sql: "SELECT 1 FROM projects",
expected_allowed_gitlab_schemas: {
no_schema: :dml_not_allowed,
gitlab_main: :success,
gitlab_ci: :dml_access_denied # cross-schema access
}
},
"for INSERT" => {
sql: "INSERT INTO projects VALUES (1)",
expected_allowed_gitlab_schemas: {
no_schema: :dml_not_allowed,
gitlab_main: :success,
gitlab_ci: :dml_access_denied # cross-schema access
}
},
"for CREATE INDEX" => {
sql: "CREATE INDEX index_projects_on_hidden ON projects (hidden)",
expected_allowed_gitlab_schemas: {
no_schema: :success,
gitlab_main: :ddl_not_allowed,
gitlab_ci: :ddl_not_allowed
}
},
"for CREATE SCHEMA" => {
sql: "CREATE SCHEMA __test_schema",
expected_allowed_gitlab_schemas: {
no_schema: :success,
# TODO: This is currently not properly detected
gitlab_main: :success,
gitlab_ci: :success
}
},
"for CREATE FUNCTION" => {
sql: "CREATE FUNCTION add(integer, integer) RETURNS integer AS 'select $1 + $2;' LANGUAGE SQL",
expected_allowed_gitlab_schemas: {
no_schema: :success,
gitlab_main: :ddl_not_allowed,
gitlab_ci: :ddl_not_allowed
}
},
"for CREATE TRIGGER" => {
sql: "CREATE TRIGGER check_projects BEFORE UPDATE ON projects FOR EACH ROW EXECUTE PROCEDURE check_projects_update()",
expected_allowed_gitlab_schemas: {
no_schema: :success,
gitlab_main: :ddl_not_allowed,
gitlab_ci: :ddl_not_allowed
}
}
}
# Expands all examples into individual tests
examples.flat_map do |name, configuration|
configuration[:expected_allowed_gitlab_schemas].map do |allowed_gitlab_schema, expectation|
[
"#{name} for allowed_gitlab_schema=#{allowed_gitlab_schema}",
{
sql: configuration[:sql],
allowed_gitlab_schema: allowed_gitlab_schema, # nil, gitlab_main
expectation: expectation # success, dml_access_denied, ...
}
]
end
end.to_h
end
with_them do
subject do
process_sql(sql) do
analyzer.allowed_gitlab_schemas = [allowed_gitlab_schema] unless allowed_gitlab_schema == :no_schema
end
end
it do
case expectation
when :success
expect { subject }.not_to raise_error
when :ddl_not_allowed
expect { subject }.to raise_error(described_class::DDLNotAllowedError)
when :dml_not_allowed
expect { subject }.to raise_error(described_class::DMLNotAllowedError)
when :dml_access_denied
expect { subject }.to raise_error(described_class::DMLAccessDeniedError)
else
raise "invalid expectation: #{expectation}"
end
end
end
end
describe '.require_ddl_mode!' do
subject { described_class.require_ddl_mode! }
it "when not configured does not raise exception" do
expect { subject }.not_to raise_error
end
it "when no schemas are configured does not raise exception (DDL mode)" do
with_analyzer do
expect { subject }.not_to raise_error
end
end
it "with schemas configured does raise exception (DML mode)" do
with_analyzer do
analyzer.allowed_gitlab_schemas = %i[gitlab_main]
expect { subject }.to raise_error(described_class::DMLNotAllowedError)
end
end
end
describe '.require_dml_mode!' do
subject { described_class.require_dml_mode! }
it "when not configured does not raise exception" do
expect { subject }.not_to raise_error
end
it "when no schemas are configured does raise exception (DDL mode)" do
with_analyzer do
expect { subject }.to raise_error(described_class::DDLNotAllowedError)
end
end
it "with schemas configured does raise exception (DML mode)" do
with_analyzer do
analyzer.allowed_gitlab_schemas = %i[gitlab_main]
expect { subject }.not_to raise_error
end
end
end
def with_analyzer
Gitlab::Database::QueryAnalyzer.instance.within([analyzer]) do
yield
end
end
def process_sql(sql, model = ActiveRecord::Base)
with_analyzer do
yield if block_given?
# Skip load balancer and retrieve connection assigned to model
Gitlab::Database::QueryAnalyzer.instance.process_sql(sql, model.retrieve_connection)
end
end
end
...@@ -205,12 +205,12 @@ RSpec.describe Gitlab::Database do ...@@ -205,12 +205,12 @@ RSpec.describe Gitlab::Database do
end end
context 'when the connection is LoadBalancing::ConnectionProxy' do context 'when the connection is LoadBalancing::ConnectionProxy' do
it 'returns nil' do it 'returns primary_db_config' do
lb_config = ::Gitlab::Database::LoadBalancing::Configuration.new(ActiveRecord::Base) lb_config = ::Gitlab::Database::LoadBalancing::Configuration.new(ActiveRecord::Base)
lb = ::Gitlab::Database::LoadBalancing::LoadBalancer.new(lb_config) lb = ::Gitlab::Database::LoadBalancing::LoadBalancer.new(lb_config)
proxy = ::Gitlab::Database::LoadBalancing::ConnectionProxy.new(lb) proxy = ::Gitlab::Database::LoadBalancing::ConnectionProxy.new(lb)
expect(described_class.db_config_for_connection(proxy)).to be_nil expect(described_class.db_config_for_connection(proxy)).to eq(lb_config.primary_db_config)
end end
end end
...@@ -229,7 +229,7 @@ RSpec.describe Gitlab::Database do ...@@ -229,7 +229,7 @@ RSpec.describe Gitlab::Database do
# This is a ConnectionProxy # This is a ConnectionProxy
expect(described_class.db_config_name(model.connection)) expect(described_class.db_config_name(model.connection))
.to eq('unknown') .to eq('main')
# This is an actual connection # This is an actual connection
expect(described_class.db_config_name(model.retrieve_connection)) expect(described_class.db_config_name(model.retrieve_connection))
...@@ -245,6 +245,31 @@ RSpec.describe Gitlab::Database do ...@@ -245,6 +245,31 @@ RSpec.describe Gitlab::Database do
end end
end end
describe '.gitlab_schemas_for_connection' do
it 'does raise exception for invalid connection' do
expect { described_class.gitlab_schemas_for_connection(:invalid) }.to raise_error /key not found: "unknown"/
end
it 'does return a valid schema depending on a base model used', :request_store do
# This is currently required as otherwise the `Ci::Build.connection` == `Project.connection`
# ENV due to lib/gitlab/database/load_balancing/setup.rb:93
stub_env('GITLAB_USE_MODEL_LOAD_BALANCING', '1')
# FF due to lib/gitlab/database/load_balancing/configuration.rb:92
stub_feature_flags(force_no_sharing_primary_model: true)
expect(described_class.gitlab_schemas_for_connection(Project.connection)).to include(:gitlab_main, :gitlab_shared)
expect(described_class.gitlab_schemas_for_connection(Ci::Build.connection)).to include(:gitlab_ci, :gitlab_shared)
end
it 'does return gitlab_ci when a ActiveRecord::Base is using CI connection' do
with_reestablished_active_record_base do
reconfigure_db_connection(model: ActiveRecord::Base, config_model: Ci::Build)
expect(described_class.gitlab_schemas_for_connection(ActiveRecord::Base.connection)).to include(:gitlab_ci, :gitlab_shared)
end
end
end
describe '#true_value' do describe '#true_value' do
it 'returns correct value' do it 'returns correct value' do
expect(described_class.true_value).to eq "'t'" expect(described_class.true_value).to eq "'t'"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment