Commit 210552b2 authored by Patrick Bair's avatar Patrick Bair

Merge branch '340832-reindex-for-multiple-databases' into 'master'

Update gitlab:db:reindex to work with multiple databases

See merge request gitlab-org/gitlab!72745
parents 48d8be18 f4177add
...@@ -2449,8 +2449,6 @@ Database/MultipleDatabases: ...@@ -2449,8 +2449,6 @@ Database/MultipleDatabases:
- 'lib/gitlab/database/postgresql_adapter/dump_schema_versions_mixin.rb' - 'lib/gitlab/database/postgresql_adapter/dump_schema_versions_mixin.rb'
- 'lib/gitlab/database/postgresql_database_tasks/load_schema_versions_mixin.rb' - 'lib/gitlab/database/postgresql_database_tasks/load_schema_versions_mixin.rb'
- 'lib/gitlab/database.rb' - 'lib/gitlab/database.rb'
- 'lib/gitlab/database/reindexing/concurrent_reindex.rb'
- 'lib/gitlab/database/reindexing/reindex_concurrently.rb'
- 'lib/gitlab/database/schema_cache_with_renamed_table.rb' - 'lib/gitlab/database/schema_cache_with_renamed_table.rb'
- 'lib/gitlab/database/schema_migrations/context.rb' - 'lib/gitlab/database/schema_migrations/context.rb'
- 'lib/gitlab/database/schema_version_files.rb' - 'lib/gitlab/database/schema_version_files.rb'
......
...@@ -42,7 +42,7 @@ class PrepareCiBuildsMetadataAndCiBuildAsyncIndexes < ActiveRecord::Migration[6. ...@@ -42,7 +42,7 @@ class PrepareCiBuildsMetadataAndCiBuildAsyncIndexes < ActiveRecord::Migration[6.
return if index_name_exists?(table_name, index_name) return if index_name_exists?(table_name, index_name)
async_index = Gitlab::Database::AsyncIndexes::PostgresAsyncIndex.safe_find_or_create_by!(name: index_name) do |rec| async_index = Gitlab::Database::AsyncIndexes::PostgresAsyncIndex.find_or_create_by!(name: index_name) do |rec|
rec.table_name = table_name rec.table_name = table_name
rec.definition = definition rec.definition = definition
end end
......
...@@ -40,7 +40,7 @@ module Gitlab ...@@ -40,7 +40,7 @@ module Gitlab
end end
def connection def connection
@connection ||= ApplicationRecord.connection @connection ||= async_index.connection
end end
def lease_timeout def lease_timeout
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
module Gitlab module Gitlab
module Database module Database
module AsyncIndexes module AsyncIndexes
class PostgresAsyncIndex < ApplicationRecord class PostgresAsyncIndex < SharedModel
self.table_name = 'postgres_async_indexes' self.table_name = 'postgres_async_indexes'
MAX_IDENTIFIER_LENGTH = Gitlab::Database::MigrationHelpers::MAX_IDENTIFIER_NAME_LENGTH MAX_IDENTIFIER_LENGTH = Gitlab::Database::MigrationHelpers::MAX_IDENTIFIER_NAME_LENGTH
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
module Gitlab module Gitlab
module Database module Database
class PostgresIndex < ActiveRecord::Base class PostgresIndex < SharedModel
include Gitlab::Utils::StrongMemoize include Gitlab::Utils::StrongMemoize
self.table_name = 'postgres_indexes' self.table_name = 'postgres_indexes'
......
...@@ -6,7 +6,7 @@ module Gitlab ...@@ -6,7 +6,7 @@ module Gitlab
# for all indexes can be expensive in a large database. # for all indexes can be expensive in a large database.
# #
# Best used on a per-index basis. # Best used on a per-index basis.
class PostgresIndexBloatEstimate < ActiveRecord::Base class PostgresIndexBloatEstimate < SharedModel
self.table_name = 'postgres_index_bloat_estimates' self.table_name = 'postgres_index_bloat_estimates'
self.primary_key = 'identifier' self.primary_key = 'identifier'
......
...@@ -27,13 +27,14 @@ module Gitlab ...@@ -27,13 +27,14 @@ module Gitlab
Gitlab::AppLogger.info("Removing index #{index.identifier} which is a leftover, temporary index from previous reindexing activity") Gitlab::AppLogger.info("Removing index #{index.identifier} which is a leftover, temporary index from previous reindexing activity")
retries = Gitlab::Database::WithLockRetriesOutsideTransaction.new( retries = Gitlab::Database::WithLockRetriesOutsideTransaction.new(
connection: index.connection,
timing_configuration: REMOVE_INDEX_RETRY_CONFIG, timing_configuration: REMOVE_INDEX_RETRY_CONFIG,
klass: self.class, klass: self.class,
logger: Gitlab::AppLogger logger: Gitlab::AppLogger
) )
retries.run(raise_on_exhaustion: false) do retries.run(raise_on_exhaustion: false) do
ApplicationRecord.connection.tap do |conn| index.connection.tap do |conn|
conn.execute("DROP INDEX CONCURRENTLY IF EXISTS #{conn.quote_table_name(index.schema)}.#{conn.quote_table_name(index.name)}") conn.execute("DROP INDEX CONCURRENTLY IF EXISTS #{conn.quote_table_name(index.schema)}.#{conn.quote_table_name(index.name)}")
end end
end end
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
module Gitlab module Gitlab
module Database module Database
module Reindexing module Reindexing
class ReindexAction < ActiveRecord::Base class ReindexAction < SharedModel
self.table_name = 'postgres_reindex_actions' self.table_name = 'postgres_reindex_actions'
belongs_to :index, foreign_key: :index_identifier, class_name: 'Gitlab::Database::PostgresIndex' belongs_to :index, foreign_key: :index_identifier, class_name: 'Gitlab::Database::PostgresIndex'
......
...@@ -99,6 +99,7 @@ module Gitlab ...@@ -99,6 +99,7 @@ module Gitlab
logger.info("Removing dangling index #{index.identifier}") logger.info("Removing dangling index #{index.identifier}")
retries = Gitlab::Database::WithLockRetriesOutsideTransaction.new( retries = Gitlab::Database::WithLockRetriesOutsideTransaction.new(
connection: connection,
timing_configuration: REMOVE_INDEX_RETRY_CONFIG, timing_configuration: REMOVE_INDEX_RETRY_CONFIG,
klass: self.class, klass: self.class,
logger: logger logger: logger
...@@ -109,11 +110,6 @@ module Gitlab ...@@ -109,11 +110,6 @@ module Gitlab
end end
end end
def with_lock_retries(&block)
arguments = { klass: self.class, logger: logger }
Gitlab::Database::WithLockRetries.new(**arguments).run(raise_on_exhaustion: true, &block)
end
def set_statement_timeout def set_statement_timeout
execute("SET statement_timeout TO '%ds'" % STATEMENT_TIMEOUT) execute("SET statement_timeout TO '%ds'" % STATEMENT_TIMEOUT)
yield yield
...@@ -123,7 +119,7 @@ module Gitlab ...@@ -123,7 +119,7 @@ module Gitlab
delegate :execute, :quote_table_name, to: :connection delegate :execute, :quote_table_name, to: :connection
def connection def connection
@connection ||= ActiveRecord::Base.connection @connection ||= index.connection
end end
end end
end end
......
...@@ -161,31 +161,33 @@ namespace :gitlab do ...@@ -161,31 +161,33 @@ namespace :gitlab do
end end
desc 'reindex a regular index without downtime to eliminate bloat' desc 'reindex a regular index without downtime to eliminate bloat'
task :reindex, [:index_name] => :environment do |_, args| task :reindex, [:index_name, :database] => :environment do |_, args|
unless Feature.enabled?(:database_reindexing, type: :ops) unless Feature.enabled?(:database_reindexing, type: :ops)
puts "This feature (database_reindexing) is currently disabled.".color(:yellow) puts "This feature (database_reindexing) is currently disabled.".color(:yellow)
exit exit
end end
indexes = Gitlab::Database::PostgresIndex.reindexing_support Gitlab::Database::EachDatabase.each_database_connection do |connection, connection_name|
indexes = Gitlab::Database::PostgresIndex.reindexing_support
if identifier = args[:index_name] if (identifier = args[:index_name]) && (args.fetch(:database, 'main') == connection_name)
raise ArgumentError, "Index name is not fully qualified with a schema: #{identifier}" unless identifier =~ /^\w+\.\w+$/ raise ArgumentError, "Index name is not fully qualified with a schema: #{identifier}" unless identifier =~ /^\w+\.\w+$/
indexes = indexes.where(identifier: identifier) indexes = indexes.where(identifier: identifier)
raise "Index not found or not supported: #{args[:index_name]}" if indexes.empty? raise "Index #{args[:index_name]} for #{connection_name} database not found or not supported" if indexes.empty?
end end
ActiveRecord::Base.logger = Logger.new($stdout) if Gitlab::Utils.to_boolean(ENV['LOG_QUERIES_TO_CONSOLE'], default: false) Gitlab::Database::SharedModel.logger = Logger.new($stdout) if Gitlab::Utils.to_boolean(ENV['LOG_QUERIES_TO_CONSOLE'], default: false)
# Cleanup leftover temporary indexes from previous, possibly aborted runs (if any) # Cleanup leftover temporary indexes from previous, possibly aborted runs (if any)
Gitlab::Database::Reindexing.cleanup_leftovers! Gitlab::Database::Reindexing.cleanup_leftovers!
# Hack: Before we do actual reindexing work, create async indexes # Hack: Before we do actual reindexing work, create async indexes
Gitlab::Database::AsyncIndexes.create_pending_indexes! if Feature.enabled?(:database_async_index_creation, type: :ops) Gitlab::Database::AsyncIndexes.create_pending_indexes! if Feature.enabled?(:database_async_index_creation, type: :ops)
Gitlab::Database::Reindexing.perform(indexes) Gitlab::Database::Reindexing.perform(indexes)
end
rescue StandardError => e rescue StandardError => e
Gitlab::AppLogger.error(e) Gitlab::AppLogger.error(e)
raise raise
......
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Gitlab::Database::AsyncIndexes::PostgresAsyncIndex, type: :model do RSpec.describe Gitlab::Database::AsyncIndexes::PostgresAsyncIndex, type: :model do
it { is_expected.to be_a Gitlab::Database::SharedModel }
describe 'validations' do describe 'validations' do
let(:identifier_limit) { described_class::MAX_IDENTIFIER_LENGTH } let(:identifier_limit) { described_class::MAX_IDENTIFIER_LENGTH }
let(:definition_limit) { described_class::MAX_DEFINITION_LENGTH } let(:definition_limit) { described_class::MAX_DEFINITION_LENGTH }
......
...@@ -13,6 +13,8 @@ RSpec.describe Gitlab::Database::PostgresIndexBloatEstimate do ...@@ -13,6 +13,8 @@ RSpec.describe Gitlab::Database::PostgresIndexBloatEstimate do
let(:identifier) { 'public.schema_migrations_pkey' } let(:identifier) { 'public.schema_migrations_pkey' }
it { is_expected.to be_a Gitlab::Database::SharedModel }
describe '#bloat_size' do describe '#bloat_size' do
it 'returns the bloat size in bytes' do it 'returns the bloat size in bytes' do
# We cannot reach much more about the bloat size estimate here # We cannot reach much more about the bloat size estimate here
......
...@@ -22,6 +22,8 @@ RSpec.describe Gitlab::Database::PostgresIndex do ...@@ -22,6 +22,8 @@ RSpec.describe Gitlab::Database::PostgresIndex do
it_behaves_like 'a postgres model' it_behaves_like 'a postgres model'
it { is_expected.to be_a Gitlab::Database::SharedModel }
describe '.reindexing_support' do describe '.reindexing_support' do
it 'only non partitioned indexes' do it 'only non partitioned indexes' do
expect(described_class.reindexing_support).to all(have_attributes(partitioned: false)) expect(described_class.reindexing_support).to all(have_attributes(partitioned: false))
......
...@@ -11,6 +11,8 @@ RSpec.describe Gitlab::Database::Reindexing::ReindexAction do ...@@ -11,6 +11,8 @@ RSpec.describe Gitlab::Database::Reindexing::ReindexAction do
swapout_view_for_table(:postgres_indexes) swapout_view_for_table(:postgres_indexes)
end end
it { is_expected.to be_a Gitlab::Database::SharedModel }
describe '.create_for' do describe '.create_for' do
subject { described_class.create_for(index) } subject { described_class.create_for(index) }
......
...@@ -252,10 +252,26 @@ RSpec.describe 'gitlab:db namespace rake task', :silence_stdout do ...@@ -252,10 +252,26 @@ RSpec.describe 'gitlab:db namespace rake task', :silence_stdout do
run_rake_task('gitlab:db:reindex', '[public.foo_idx]') run_rake_task('gitlab:db:reindex', '[public.foo_idx]')
end end
context 'when database name is provided' do
it 'calls the index rebuilder with the proper arguments when the database name match' do
allow(indexes).to receive(:where).with(identifier: 'public.foo_idx').and_return([index])
expect(Gitlab::Database::Reindexing).to receive(:perform).with([index])
run_rake_task('gitlab:db:reindex', '[public.foo_idx,main]')
end
it 'ignores the index and uses all candidate indexes if database name does not match' do
expect(Gitlab::Database::PostgresIndex).to receive(:reindexing_support).and_return(indexes)
expect(Gitlab::Database::Reindexing).to receive(:perform).with(indexes)
run_rake_task('gitlab:db:reindex', '[public.foo_idx,ci]')
end
end
it 'raises an error if the index does not exist' do it 'raises an error if the index does not exist' do
allow(indexes).to receive(:where).with(identifier: 'public.absent_index').and_return([]) allow(indexes).to receive(:where).with(identifier: 'public.absent_index').and_return([])
expect { run_rake_task('gitlab:db:reindex', '[public.absent_index]') }.to raise_error(/Index not found/) expect { run_rake_task('gitlab:db:reindex', '[public.absent_index]') }.to raise_error(/Index public.absent_index for main database not found or not supported/)
end end
it 'raises an error if the index is not fully qualified with a schema' do it 'raises an error if the index is not fully qualified with a schema' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment