Commit 79afc9a0 authored by Yorick Peterse's avatar Yorick Peterse

Refactor Gitlab::Database to support multiple DBs

This refactors the module Gitlab::Database so it supports multiple
databases. Logic tied to a connection is now scoped to instances of
Gitlab::Dababase::Connection. For every database, an instance of this
class is created and stored in Gitlab::Database::DATABASES.

The module Gitlab::Database still exposes various methods that forward
the call to a corresponding instance of Database::Connection. These
wrappers and their use will be changed in separate commits, as otherwise
the diffs become far too large for anybody to make sense of.

See https://gitlab.com/gitlab-org/gitlab/-/issues/331776 for more
information.
parent 208e818a
......@@ -23,8 +23,9 @@ end
db_config = Gitlab::Database.config ||
Rails.application.config.database_configuration[Rails.env]
db_config['pool'] = Gitlab::Database.default_pool_size
ActiveRecord::Base.establish_connection(db_config)
ActiveRecord::Base.establish_connection(
db_config.merge(pool: Gitlab::Database.default_pool_size)
)
Gitlab.ee do
if Gitlab::Runtime.sidekiq? && Gitlab::Geo.geo_database_configured?
......
......@@ -18,7 +18,7 @@ module Geo
def perform
return if Gitlab::Database.read_only?
return unless Gitlab::Database.healthy?
return unless Gitlab::Database.main.healthy?
unless ::GeoNode.secondary_nodes.any?
Geo::PruneEventLogService.new(:all).execute
......
......@@ -182,7 +182,7 @@ module Geo
def update_pending_resources
if reload_queue?
@pending_resources = Gitlab::Database.geo_uncached_queries { load_pending_resources }
@pending_resources = Gitlab::Database.main.geo_uncached_queries { load_pending_resources }
set_backoff_time! if should_apply_backoff?
end
end
......
......@@ -3,9 +3,8 @@
module EE
module Gitlab
module Database
extend ActiveSupport::Concern
class_methods do
module Connection
extend ActiveSupport::Concern
extend ::Gitlab::Utils::Override
override :read_only?
......@@ -20,7 +19,7 @@ module EE
def geo_uncached_queries(&block)
raise 'No block given' unless block_given?
ActiveRecord::Base.uncached do
scope.uncached do
if ::Gitlab::Geo.secondary?
Geo::TrackingBase.uncached(&block)
else
......
......@@ -2,10 +2,12 @@
require 'spec_helper'
RSpec.describe Gitlab::Database do
RSpec.describe Gitlab::Database::Connection do
include ::EE::GeoHelpers
describe '.read_only?' do
let(:connection) { described_class.new }
describe '#read_only?' do
context 'with Geo enabled' do
before do
allow(Gitlab::Geo).to receive(:enabled?) { true }
......@@ -16,7 +18,7 @@ RSpec.describe Gitlab::Database do
let(:geo_node) { create(:geo_node) }
it 'returns true' do
expect(described_class.read_only?).to be_truthy
expect(connection.read_only?).to be_truthy
end
end
......@@ -24,14 +26,14 @@ RSpec.describe Gitlab::Database do
let(:geo_node) { create(:geo_node, :primary) }
it 'returns false when is Geo primary node' do
expect(described_class.read_only?).to be_falsey
expect(connection.read_only?).to be_falsey
end
end
end
context 'with Geo disabled' do
it 'returns false' do
expect(described_class.read_only?).to be_falsey
expect(connection.read_only?).to be_falsey
end
end
......@@ -41,30 +43,30 @@ RSpec.describe Gitlab::Database do
end
it 'returns true' do
expect(described_class.read_only?).to be_truthy
expect(connection.read_only?).to be_truthy
end
end
end
describe '.healthy?' do
describe '#healthy?' do
it 'returns true when replication lag is not too great' do
allow(Postgresql::ReplicationSlot).to receive(:lag_too_great?).and_return(false)
expect(described_class.healthy?).to be_truthy
expect(connection.healthy?).to be_truthy
end
it 'returns false when replication lag is too great' do
allow(Postgresql::ReplicationSlot).to receive(:lag_too_great?).and_return(true)
expect(described_class.healthy?).to be_falsey
expect(connection.healthy?).to be_falsey
end
end
describe '.geo_uncached_queries' do
describe '#geo_uncached_queries' do
context 'when no block is given' do
it 'raises error' do
expect do
described_class.geo_uncached_queries
connection.geo_uncached_queries
end.to raise_error('No block given')
end
end
......@@ -79,7 +81,7 @@ RSpec.describe Gitlab::Database do
expect(ActiveRecord::Base).to receive(:uncached).and_call_original
expect do |b|
described_class.geo_uncached_queries(&b)
connection.geo_uncached_queries(&b)
end.to yield_control
end
end
......@@ -95,7 +97,7 @@ RSpec.describe Gitlab::Database do
expect(ActiveRecord::Base).to receive(:uncached).and_call_original
expect do |b|
described_class.geo_uncached_queries(&b)
connection.geo_uncached_queries(&b)
end.to yield_control
end
end
......@@ -106,7 +108,7 @@ RSpec.describe Gitlab::Database do
expect(ActiveRecord::Base).to receive(:uncached).and_call_original
expect do |b|
described_class.geo_uncached_queries(&b)
connection.geo_uncached_queries(&b)
end.to yield_control
end
end
......
......@@ -840,7 +840,7 @@ RSpec.describe Group do
context 'in read-only mode' do
before do
allow(Gitlab::Database).to receive(:read_only?).and_return(true)
allow(Gitlab::Database.main).to receive(:read_only?).and_return(true)
allow(group).to receive(:create_or_update).and_raise(ActiveRecord::ReadOnlyRecord)
end
......
......@@ -287,7 +287,7 @@ RSpec.describe User do
end
it 'does not clear remember_created_at when in a GitLab read-only instance' do
allow(Gitlab::Database).to receive(:read_only?) { true }
allow(Gitlab::Database.main).to receive(:read_only?) { true }
expect { subject.forget_me! }.not_to change(subject, :remember_created_at)
end
......@@ -303,7 +303,7 @@ RSpec.describe User do
end
it 'does not update remember_created_at when in a Geo read-only instance' do
allow(Gitlab::Database).to receive(:read_only?) { true }
allow(Gitlab::Database.main).to receive(:read_only?) { true }
expect { subject.remember_me! }.not_to change(subject, :remember_created_at)
end
......
......@@ -336,7 +336,7 @@ RSpec.describe AuditEventService, :request_store do
context 'on a read-only instance' do
before do
allow(Gitlab::Database).to receive(:read_only?).and_return(true)
allow(Gitlab::Database.main).to receive(:read_only?).and_return(true)
end
it 'does not create an event record in the database' do
......
......@@ -29,7 +29,7 @@ RSpec.describe Geo::PruneEventLogWorker, :geo do
end
it 'does nothing when database is not feeling healthy' do
allow(EE::Gitlab::Database).to receive(:healthy?).and_return(false)
allow(Gitlab::Database.main).to receive(:healthy?).and_return(false)
expect(Geo::PruneEventLogService).not_to receive(:new)
......
This diff is collapsed.
# frozen_string_literal: true
module Gitlab
module Database
# Configuration settings and methods for interacting with a PostgreSQL
# database, with support for multiple databases.
class Connection
DEFAULT_POOL_HEADROOM = 10
attr_reader :scope
# Initializes a new `Database`.
#
# The `scope` argument must be an object (such as `ActiveRecord::Base`)
# that supports retrieving connections and connection pools.
def initialize(scope = ActiveRecord::Base)
@config = nil
@scope = scope
@version = nil
@open_transactions_baseline = 0
end
# We configure the database connection pool size automatically based on
# the configured concurrency. We also add some headroom, to make sure we
# don't run out of connections when more threads besides the 'user-facing'
# ones are running.
#
# Read more about this in
# doc/development/database/client_side_connection_pool.md
def default_pool_size
headroom =
(ENV["DB_POOL_HEADROOM"].presence || DEFAULT_POOL_HEADROOM).to_i
Gitlab::Runtime.max_threads + headroom
end
def config
@config ||=
scope.connection_db_config.configuration_hash.with_indifferent_access
end
def pool_size
config[:pool] || default_pool_size
end
def username
config[:username] || ENV['USER']
end
def database_name
config[:database]
end
def adapter_name
config[:adapter]
end
def human_adapter_name
if postgresql?
'PostgreSQL'
else
'Unknown'
end
end
def postgresql?
adapter_name.casecmp('postgresql') == 0
end
# Disables prepared statements for the current database connection.
def disable_prepared_statements
scope.establish_connection(config.merge(prepared_statements: false))
end
def read_only?
false
end
def read_write?
!read_only?
end
# Check whether the underlying database is in read-only mode
def db_read_only?
pg_is_in_recovery =
scope
.connection
.execute('SELECT pg_is_in_recovery()')
.first
.fetch('pg_is_in_recovery')
Gitlab::Utils.to_boolean(pg_is_in_recovery)
end
def db_read_write?
!db_read_only?
end
def version
@version ||= database_version.match(/\A(?:PostgreSQL |)([^\s]+).*\z/)[1]
end
def database_version
connection.execute("SELECT VERSION()").first['version']
end
def postgresql_minimum_supported_version?
version.to_f >= MINIMUM_POSTGRES_VERSION
end
# Bulk inserts a number of rows into a table, optionally returning their
# IDs.
#
# table - The name of the table to insert the rows into.
# rows - An Array of Hash instances, each mapping the columns to their
# values.
# return_ids - When set to true the return value will be an Array of IDs of
# the inserted rows
# disable_quote - A key or an Array of keys to exclude from quoting (You
# become responsible for protection from SQL injection for
# these keys!)
# on_conflict - Defines an upsert. Values can be: :disabled (default) or
# :do_nothing
def bulk_insert(table, rows, return_ids: false, disable_quote: [], on_conflict: nil)
return if rows.empty?
keys = rows.first.keys
columns = keys.map { |key| connection.quote_column_name(key) }
disable_quote = Array(disable_quote).to_set
tuples = rows.map do |row|
keys.map do |k|
disable_quote.include?(k) ? row[k] : connection.quote(row[k])
end
end
sql = <<-EOF
INSERT INTO #{table} (#{columns.join(', ')})
VALUES #{tuples.map { |tuple| "(#{tuple.join(', ')})" }.join(', ')}
EOF
sql = "#{sql} ON CONFLICT DO NOTHING" if on_conflict == :do_nothing
sql = "#{sql} RETURNING id" if return_ids
result = connection.execute(sql)
if return_ids
result.values.map { |tuple| tuple[0].to_i }
else
[]
end
end
# pool_size - The size of the DB pool.
# host - An optional host name to use instead of the default one.
# port - An optional port to connect to.
def create_connection_pool(pool_size, host = nil, port = nil)
original_config = config
env_config = original_config.merge(pool: pool_size)
env_config[:host] = host if host
env_config[:port] = port if port
ActiveRecord::ConnectionAdapters::ConnectionHandler
.new.establish_connection(env_config)
end
def with_connection_pool(pool_size)
pool = create_connection_pool(pool_size)
begin
yield(pool)
ensure
pool.disconnect!
end
end
def cached_column_exists?(table_name, column_name)
connection
.schema_cache.columns_hash(table_name)
.has_key?(column_name.to_s)
end
def cached_table_exists?(table_name)
exists? && connection.schema_cache.data_source_exists?(table_name)
end
def exists?
connection
true
rescue StandardError
false
end
def system_id
row = connection
.execute('SELECT system_identifier FROM pg_control_system()')
.first
row['system_identifier']
end
# @param [ActiveRecord::Connection] ar_connection
# @return [String]
def get_write_location(ar_connection)
use_new_load_balancer_query = Gitlab::Utils
.to_boolean(ENV['USE_NEW_LOAD_BALANCER_QUERY'], default: true)
sql =
if use_new_load_balancer_query
<<~NEWSQL
SELECT CASE
WHEN pg_is_in_recovery() = true AND EXISTS (SELECT 1 FROM pg_stat_get_wal_senders())
THEN pg_last_wal_replay_lsn()::text
WHEN pg_is_in_recovery() = false
THEN pg_current_wal_insert_lsn()::text
ELSE NULL
END AS location;
NEWSQL
else
<<~SQL
SELECT pg_current_wal_insert_lsn()::text AS location
SQL
end
row = ar_connection.select_all(sql).first
row['location'] if row
end
# inside_transaction? will return true if the caller is running within a
# transaction. Handles special cases when running inside a test
# environment, where tests may be wrapped in transactions
def inside_transaction?
base = Rails.env.test? ? @open_transactions_baseline : 0
scope.connection.open_transactions > base
end
# These methods that access @open_transactions_baseline are not
# thread-safe. These are fine though because we only call these in
# RSpec's main thread. If we decide to run specs multi-threaded, we would
# need to use something like ThreadGroup to keep track of this value
def set_open_transactions_baseline
@open_transactions_baseline = scope.connection.open_transactions
end
def reset_open_transactions_baseline
@open_transactions_baseline = 0
end
def connection
scope.connection
end
end
end
end
Gitlab::Database::Connection.prepend_mod_with('Gitlab::Database::Connection')
......@@ -79,7 +79,7 @@ module Gitlab
end
def self.pool_size
Gitlab::Database.config[:pool]
Gitlab::Database.main.pool_size
end
# Returns true if load balancing is to be enabled.
......
......@@ -84,10 +84,6 @@ function rspec_simple_job() {
function rspec_db_library_code() {
local db_files="spec/lib/gitlab/database/ spec/support/helpers/database/"
if [[ -d "ee/" ]]; then
db_files="${db_files} ee/spec/lib/ee/gitlab/database_spec.rb"
fi
rspec_simple_job "-- ${db_files}"
}
......
......@@ -21,37 +21,23 @@ RSpec.describe 'Database config initializer' do
let(:max_threads) { 8 }
context "no existing pool size is set" do
before do
stub_database_config(pool_size: nil)
end
it "sets it based on the max number of worker threads" do
expect { subject }.to change { Gitlab::Database.config['pool'] }.from(nil).to(18)
expect(ActiveRecord::Base.connection_db_config.pool).to eq(18)
end
end
context "the existing pool size is smaller than the max number of worker threads" do
before do
stub_database_config(pool_size: 1)
end
context 'when no custom headroom is specified' do
it 'sets the pool size based on the number of worker threads' do
old = ActiveRecord::Base.connection_db_config.pool
it "sets it based on the max number of worker threads" do
expect { subject }.to change { Gitlab::Database.config['pool'] }.from(1).to(18)
expect(old).not_to eq(18)
expect(ActiveRecord::Base.connection_db_config.pool).to eq(18)
expect { subject }
.to change { ActiveRecord::Base.connection_db_config.pool }
.from(old)
.to(18)
end
end
context "and the existing pool size is larger than the max number of worker threads" do
before do
stub_database_config(pool_size: 100)
end
it 'overwrites custom pool settings' do
config = Gitlab::Database.config.merge(pool: 42)
it "sets it based on the max number of worker threads" do
expect { subject }.to change { Gitlab::Database.config['pool'] }.from(100).to(18)
allow(Gitlab::Database.main).to receive(:config).and_return(config)
subject
expect(ActiveRecord::Base.connection_db_config.pool).to eq(18)
end
......@@ -61,25 +47,16 @@ RSpec.describe 'Database config initializer' do
let(:headroom) { 15 }
before do
stub_database_config(pool_size: 1)
stub_env("DB_POOL_HEADROOM", headroom)
end
it "adds headroom on top of the calculated size" do
expect { subject }.to change { Gitlab::Database.config['pool'] }
.from(1)
.to(max_threads + headroom)
old = ActiveRecord::Base.connection_db_config.pool
expect(ActiveRecord::Base.connection_db_config.pool).to eq(max_threads + headroom)
expect { subject }
.to change { ActiveRecord::Base.connection_db_config.pool }
.from(old)
.to(23)
end
end
def stub_database_config(pool_size:)
original_config = Gitlab::Database.config
config = original_config.dup
config['pool'] = pool_size
allow(Gitlab::Database).to receive(:config).and_return(config)
end
end
......@@ -844,7 +844,7 @@ RSpec.describe Gitlab::Auth, :use_clean_rails_memory_store_caching do
context 'when the database is read-only' do
before do
allow(Gitlab::Database).to receive(:read_only?).and_return(true)
allow(Gitlab::Database.main).to receive(:read_only?).and_return(true)
end
it 'does not increment failed_attempts when true and password is incorrect' do
......
This diff is collapsed.
......@@ -42,7 +42,7 @@ RSpec.describe Gitlab::Database::LoadBalancing do
original_db_config = Gitlab::Database.config
modified_db_config = original_db_config.merge(load_balancing: lb_config)
expect(Gitlab::Database).to receive(:config).and_return(modified_db_config)
expect(Gitlab::Database.main).to receive(:config).and_return(modified_db_config)
expect(described_class.configuration).to eq(lb_config)
end
......@@ -401,7 +401,7 @@ RSpec.describe Gitlab::Database::LoadBalancing do
original_db_config = Gitlab::Database.config
modified_db_config = original_db_config.merge(load_balancing: { hosts: hosts })
allow(Gitlab::Database).to receive(:config).and_return(modified_db_config)
allow(Gitlab::Database.main).to receive(:config).and_return(modified_db_config)
::Gitlab::Database::LoadBalancing::Session.clear_session
end
......
This diff is collapsed.
......@@ -46,7 +46,7 @@ RSpec.describe Packages::CreateEventService do
context 'on a read-only instance' do
before do
allow(Gitlab::Database).to receive(:read_only?).and_return(true)
allow(Gitlab::Database.main).to receive(:read_only?).and_return(true)
end
it 'does not create an event' do
......
......@@ -12,7 +12,7 @@ RSpec.shared_examples 'boards recent visit create service' do
end
it 'returns nil when database is read only' do
allow(Gitlab::Database).to receive(:read_only?) { true }
allow(Gitlab::Database.main).to receive(:read_only?) { true }
expect(service.execute(board)).to be_nil
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment