Commit f4177add authored by Krasimir Angelov's avatar Krasimir Angelov

Update gitlab:db:reindex to work with multiple databases

* Iterate over all configured databses / connections
* Convert used models to inherit from `SharedModel`
* Inject and use the correct connection

https://gitlab.com/gitlab-org/gitlab/-/issues/340832
parent dfa79015
......@@ -2450,8 +2450,6 @@ Database/MultipleDatabases:
- 'lib/gitlab/database/postgresql_adapter/dump_schema_versions_mixin.rb'
- 'lib/gitlab/database/postgresql_database_tasks/load_schema_versions_mixin.rb'
- 'lib/gitlab/database.rb'
- 'lib/gitlab/database/reindexing/concurrent_reindex.rb'
- 'lib/gitlab/database/reindexing/reindex_concurrently.rb'
- 'lib/gitlab/database/schema_cache_with_renamed_table.rb'
- 'lib/gitlab/database/schema_migrations/context.rb'
- 'lib/gitlab/database/schema_version_files.rb'
......
......@@ -42,7 +42,7 @@ class PrepareCiBuildsMetadataAndCiBuildAsyncIndexes < ActiveRecord::Migration[6.
return if index_name_exists?(table_name, index_name)
async_index = Gitlab::Database::AsyncIndexes::PostgresAsyncIndex.safe_find_or_create_by!(name: index_name) do |rec|
async_index = Gitlab::Database::AsyncIndexes::PostgresAsyncIndex.find_or_create_by!(name: index_name) do |rec|
rec.table_name = table_name
rec.definition = definition
end
......
......@@ -40,7 +40,7 @@ module Gitlab
end
def connection
@connection ||= ApplicationRecord.connection
@connection ||= async_index.connection
end
def lease_timeout
......
......@@ -3,7 +3,7 @@
module Gitlab
module Database
module AsyncIndexes
class PostgresAsyncIndex < ApplicationRecord
class PostgresAsyncIndex < SharedModel
self.table_name = 'postgres_async_indexes'
MAX_IDENTIFIER_LENGTH = Gitlab::Database::MigrationHelpers::MAX_IDENTIFIER_NAME_LENGTH
......
......@@ -2,7 +2,7 @@
module Gitlab
module Database
class PostgresIndex < ActiveRecord::Base
class PostgresIndex < SharedModel
include Gitlab::Utils::StrongMemoize
self.table_name = 'postgres_indexes'
......
......@@ -6,7 +6,7 @@ module Gitlab
# for all indexes can be expensive in a large database.
#
# Best used on a per-index basis.
class PostgresIndexBloatEstimate < ActiveRecord::Base
class PostgresIndexBloatEstimate < SharedModel
self.table_name = 'postgres_index_bloat_estimates'
self.primary_key = 'identifier'
......
......@@ -27,13 +27,14 @@ module Gitlab
Gitlab::AppLogger.info("Removing index #{index.identifier} which is a leftover, temporary index from previous reindexing activity")
retries = Gitlab::Database::WithLockRetriesOutsideTransaction.new(
connection: index.connection,
timing_configuration: REMOVE_INDEX_RETRY_CONFIG,
klass: self.class,
logger: Gitlab::AppLogger
)
retries.run(raise_on_exhaustion: false) do
ApplicationRecord.connection.tap do |conn|
index.connection.tap do |conn|
conn.execute("DROP INDEX CONCURRENTLY IF EXISTS #{conn.quote_table_name(index.schema)}.#{conn.quote_table_name(index.name)}")
end
end
......
......@@ -3,7 +3,7 @@
module Gitlab
module Database
module Reindexing
class ReindexAction < ActiveRecord::Base
class ReindexAction < SharedModel
self.table_name = 'postgres_reindex_actions'
belongs_to :index, foreign_key: :index_identifier, class_name: 'Gitlab::Database::PostgresIndex'
......
......@@ -99,6 +99,7 @@ module Gitlab
logger.info("Removing dangling index #{index.identifier}")
retries = Gitlab::Database::WithLockRetriesOutsideTransaction.new(
connection: connection,
timing_configuration: REMOVE_INDEX_RETRY_CONFIG,
klass: self.class,
logger: logger
......@@ -109,11 +110,6 @@ module Gitlab
end
end
def with_lock_retries(&block)
arguments = { klass: self.class, logger: logger }
Gitlab::Database::WithLockRetries.new(**arguments).run(raise_on_exhaustion: true, &block)
end
def set_statement_timeout
execute("SET statement_timeout TO '%ds'" % STATEMENT_TIMEOUT)
yield
......@@ -123,7 +119,7 @@ module Gitlab
delegate :execute, :quote_table_name, to: :connection
def connection
@connection ||= ActiveRecord::Base.connection
@connection ||= index.connection
end
end
end
......
......@@ -161,31 +161,33 @@ namespace :gitlab do
end
desc 'reindex a regular index without downtime to eliminate bloat'
task :reindex, [:index_name] => :environment do |_, args|
task :reindex, [:index_name, :database] => :environment do |_, args|
unless Feature.enabled?(:database_reindexing, type: :ops)
puts "This feature (database_reindexing) is currently disabled.".color(:yellow)
exit
end
indexes = Gitlab::Database::PostgresIndex.reindexing_support
Gitlab::Database::EachDatabase.each_database_connection do |connection, connection_name|
indexes = Gitlab::Database::PostgresIndex.reindexing_support
if identifier = args[:index_name]
raise ArgumentError, "Index name is not fully qualified with a schema: #{identifier}" unless identifier =~ /^\w+\.\w+$/
if (identifier = args[:index_name]) && (args.fetch(:database, 'main') == connection_name)
raise ArgumentError, "Index name is not fully qualified with a schema: #{identifier}" unless identifier =~ /^\w+\.\w+$/
indexes = indexes.where(identifier: identifier)
indexes = indexes.where(identifier: identifier)
raise "Index not found or not supported: #{args[:index_name]}" if indexes.empty?
end
raise "Index #{args[:index_name]} for #{connection_name} database not found or not supported" if indexes.empty?
end
ActiveRecord::Base.logger = Logger.new($stdout) if Gitlab::Utils.to_boolean(ENV['LOG_QUERIES_TO_CONSOLE'], default: false)
Gitlab::Database::SharedModel.logger = Logger.new($stdout) if Gitlab::Utils.to_boolean(ENV['LOG_QUERIES_TO_CONSOLE'], default: false)
# Cleanup leftover temporary indexes from previous, possibly aborted runs (if any)
Gitlab::Database::Reindexing.cleanup_leftovers!
# Cleanup leftover temporary indexes from previous, possibly aborted runs (if any)
Gitlab::Database::Reindexing.cleanup_leftovers!
# Hack: Before we do actual reindexing work, create async indexes
Gitlab::Database::AsyncIndexes.create_pending_indexes! if Feature.enabled?(:database_async_index_creation, type: :ops)
# Hack: Before we do actual reindexing work, create async indexes
Gitlab::Database::AsyncIndexes.create_pending_indexes! if Feature.enabled?(:database_async_index_creation, type: :ops)
Gitlab::Database::Reindexing.perform(indexes)
Gitlab::Database::Reindexing.perform(indexes)
end
rescue StandardError => e
Gitlab::AppLogger.error(e)
raise
......
......@@ -3,6 +3,8 @@
require 'spec_helper'
RSpec.describe Gitlab::Database::AsyncIndexes::PostgresAsyncIndex, type: :model do
it { is_expected.to be_a Gitlab::Database::SharedModel }
describe 'validations' do
let(:identifier_limit) { described_class::MAX_IDENTIFIER_LENGTH }
let(:definition_limit) { described_class::MAX_DEFINITION_LENGTH }
......
......@@ -13,6 +13,8 @@ RSpec.describe Gitlab::Database::PostgresIndexBloatEstimate do
let(:identifier) { 'public.schema_migrations_pkey' }
it { is_expected.to be_a Gitlab::Database::SharedModel }
describe '#bloat_size' do
it 'returns the bloat size in bytes' do
# We cannot reach much more about the bloat size estimate here
......
......@@ -22,6 +22,8 @@ RSpec.describe Gitlab::Database::PostgresIndex do
it_behaves_like 'a postgres model'
it { is_expected.to be_a Gitlab::Database::SharedModel }
describe '.reindexing_support' do
it 'only non partitioned indexes' do
expect(described_class.reindexing_support).to all(have_attributes(partitioned: false))
......
......@@ -11,6 +11,8 @@ RSpec.describe Gitlab::Database::Reindexing::ReindexAction do
swapout_view_for_table(:postgres_indexes)
end
it { is_expected.to be_a Gitlab::Database::SharedModel }
describe '.create_for' do
subject { described_class.create_for(index) }
......
......@@ -252,10 +252,26 @@ RSpec.describe 'gitlab:db namespace rake task', :silence_stdout do
run_rake_task('gitlab:db:reindex', '[public.foo_idx]')
end
context 'when database name is provided' do
it 'calls the index rebuilder with the proper arguments when the database name match' do
allow(indexes).to receive(:where).with(identifier: 'public.foo_idx').and_return([index])
expect(Gitlab::Database::Reindexing).to receive(:perform).with([index])
run_rake_task('gitlab:db:reindex', '[public.foo_idx,main]')
end
it 'ignores the index and uses all candidate indexes if database name does not match' do
expect(Gitlab::Database::PostgresIndex).to receive(:reindexing_support).and_return(indexes)
expect(Gitlab::Database::Reindexing).to receive(:perform).with(indexes)
run_rake_task('gitlab:db:reindex', '[public.foo_idx,ci]')
end
end
it 'raises an error if the index does not exist' do
allow(indexes).to receive(:where).with(identifier: 'public.absent_index').and_return([])
expect { run_rake_task('gitlab:db:reindex', '[public.absent_index]') }.to raise_error(/Index not found/)
expect { run_rake_task('gitlab:db:reindex', '[public.absent_index]') }.to raise_error(/Index public.absent_index for main database not found or not supported/)
end
it 'raises an error if the index is not fully qualified with a schema' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment