Commit 4d303f82 authored by Bob Van Landuyt's avatar Bob Van Landuyt

Merge branch 'ab/reindex-concurrently' into 'master'

Reindexing strategy for PG12

See merge request gitlab-org/gitlab!64695
parents 58f8d735 3604cb32
---
name: database_reindexing_pg12
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/64695
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/334372
milestone: '14.1'
type: development
group: group::database
default_enabled: false
......@@ -20,16 +20,25 @@ module Gitlab
# A 'regular' index is a non-unique index,
# that does not serve an exclusion constraint and
# is defined on a table that is not partitioned.
scope :regular, -> { where(unique: false, partitioned: false, exclusion: false)}
#
# Deprecated: Switch to scope .reindexing_support
scope :regular, -> { where(unique: false, partitioned: false, exclusion: false, expression: false)}
# Indexes for reindexing with PG12
scope :reindexing_support, -> { where(partitioned: false, exclusion: false, expression: false) }
scope :not_match, ->(regex) { where("name !~ ?", regex)}
scope :match, ->(regex) { where("name ~* ?", regex)}
scope :not_recently_reindexed, -> do
recent_actions = Reindexing::ReindexAction.recent.where('index_identifier = identifier')
where('NOT EXISTS (?)', recent_actions)
end
alias_method :reset, :reload
def bloat_size
strong_memoize(:bloat_size) { bloat_estimate&.bloat_size || 0 }
end
......
......@@ -14,11 +14,16 @@ module Gitlab
end
def self.candidate_indexes
Gitlab::Database::PostgresIndex
.regular
.where('NOT expression')
indexes = Gitlab::Database::PostgresIndex
.not_match("^#{ConcurrentReindex::TEMPORARY_INDEX_PREFIX}")
.not_match("^#{ConcurrentReindex::REPLACED_INDEX_PREFIX}")
.not_match("#{ReindexConcurrently::TEMPORARY_INDEX_PATTERN}$")
if Feature.enabled?(:database_reindexing_pg12, type: :development)
indexes.reindexing_support
else
indexes.regular
end
end
end
end
......
......@@ -41,7 +41,13 @@ module Gitlab
end
def perform_for(index, action)
ConcurrentReindex.new(index).perform
strategy = if Feature.enabled?(:database_reindexing_pg12, type: :development)
ReindexConcurrently
else
ConcurrentReindex
end
strategy.new(index).perform
rescue StandardError
action.state = :failed
......
# frozen_string_literal: true
module Gitlab
module Database
module Reindexing
# This is a >= PG12 reindexing strategy based on `REINDEX CONCURRENTLY`
class ReindexConcurrently
ReindexError = Class.new(StandardError)
TEMPORARY_INDEX_PATTERN = '\_ccnew[0-9]*'
STATEMENT_TIMEOUT = 9.hours
PG_MAX_INDEX_NAME_LENGTH = 63
# When dropping an index, we acquire a SHARE UPDATE EXCLUSIVE lock,
# which only conflicts with DDL and vacuum. We therefore execute this with a rather
# high lock timeout and a long pause in between retries. This is an alternative to
# setting a high statement timeout, which would lead to a long running query with effects
# on e.g. vacuum.
REMOVE_INDEX_RETRY_CONFIG = [[1.minute, 9.minutes]] * 30
attr_reader :index, :logger
def initialize(index, logger: Gitlab::AppLogger)
@index = index
@logger = logger
end
def perform
raise ReindexError, 'indexes serving an exclusion constraint are currently not supported' if index.exclusion?
raise ReindexError, 'index is a left-over temporary index from a previous reindexing run' if index.name =~ /#{TEMPORARY_INDEX_PATTERN}/
# Expression indexes require additional statistics in `pg_statistic`:
# select * from pg_statistic where starelid = (select oid from pg_class where relname = 'some_index');
#
# In PG12, this has been fixed in https://gitlab.com/postgres/postgres/-/commit/b17ff07aa3eb142d2cde2ea00e4a4e8f63686f96.
# Discussion happened in https://www.postgresql.org/message-id/flat/CAFcNs%2BqpFPmiHd1oTXvcPdvAHicJDA9qBUSujgAhUMJyUMb%2BSA%40mail.gmail.com
# following a GitLab.com incident that surfaced this (https://gitlab.com/gitlab-com/gl-infra/production/-/issues/2885).
#
# While this has been backpatched, we continue to disable expression indexes until further review.
raise ReindexError, 'expression indexes are currently not supported' if index.expression?
begin
with_logging do
set_statement_timeout do
execute("REINDEX INDEX CONCURRENTLY #{quote_table_name(index.schema)}.#{quote_table_name(index.name)}")
end
end
ensure
cleanup_dangling_indexes
end
end
private
def with_logging
bloat_size = index.bloat_size
ondisk_size_before = index.ondisk_size_bytes
logger.info(
message: "Starting reindex of #{index}",
index: index.identifier,
table: index.tablename,
estimated_bloat_bytes: bloat_size,
index_size_before_bytes: ondisk_size_before
)
duration = Benchmark.realtime do
yield
end
index.reset
logger.info(
message: "Finished reindex of #{index}",
index: index.identifier,
table: index.tablename,
estimated_bloat_bytes: bloat_size,
index_size_before_bytes: ondisk_size_before,
index_size_after_bytes: index.ondisk_size_bytes,
duration_s: duration.round(2)
)
end
def cleanup_dangling_indexes
Gitlab::Database::PostgresIndex.match("#{TEMPORARY_INDEX_PATTERN}$").each do |lingering_index|
# Example lingering index name: some_index_ccnew1
# Example prefix: 'some_index'
prefix = lingering_index.name.gsub(/#{TEMPORARY_INDEX_PATTERN}/, '')
# Example suffix: '_ccnew1'
suffix = lingering_index.name.match(/#{TEMPORARY_INDEX_PATTERN}/)[0]
# Only remove if the lingering index name could have been chosen
# as a result of a REINDEX operation (considering that PostgreSQL
# truncates index names to 63 chars and adds a suffix).
if index.name[0...PG_MAX_INDEX_NAME_LENGTH - suffix.length] == prefix
remove_index(lingering_index)
end
end
end
def remove_index(index)
logger.info("Removing dangling index #{index.identifier}")
retries = Gitlab::Database::WithLockRetriesOutsideTransaction.new(
timing_configuration: REMOVE_INDEX_RETRY_CONFIG,
klass: self.class,
logger: logger
)
retries.run(raise_on_exhaustion: false) do
execute("DROP INDEX CONCURRENTLY IF EXISTS #{quote_table_name(index.schema)}.#{quote_table_name(index.name)}")
end
end
def with_lock_retries(&block)
arguments = { klass: self.class, logger: logger }
Gitlab::Database::WithLockRetries.new(**arguments).run(raise_on_exhaustion: true, &block)
end
def set_statement_timeout
execute("SET statement_timeout TO '%ds'" % STATEMENT_TIMEOUT)
yield
ensure
execute('RESET statement_timeout')
end
delegate :execute, :quote_table_name, to: :connection
def connection
@connection ||= ActiveRecord::Base.connection
end
end
end
end
end
......@@ -264,6 +264,18 @@ RSpec.describe 'Database schema' do
end
end
context 'index names' do
it 'disallows index names with a _ccnew[0-9]* suffix' do
# During REINDEX operations, Postgres generates a temporary index with a _ccnew[0-9]* suffix
# Since indexes are being considered temporary and subject to removal if they stick around for longer. See Gitlab::Database::Reindexing.
#
# Hence we disallow adding permanent indexes with this suffix.
problematic_indexes = Gitlab::Database::PostgresIndex.match("#{Gitlab::Database::Reindexing::ReindexConcurrently::TEMPORARY_INDEX_PATTERN}$").all
expect(problematic_indexes).to be_empty
end
end
private
def retrieve_columns_name_with_jsonb
......
......@@ -34,6 +34,24 @@ RSpec.describe Gitlab::Database::PostgresIndex do
it 'only indexes that dont serve an exclusion constraint' do
expect(described_class.regular).to all(have_attributes(exclusion: false))
end
it 'only non-expression indexes' do
expect(described_class.regular).to all(have_attributes(expression: false))
end
end
describe '.reindexing_support' do
it 'only non partitioned indexes' do
expect(described_class.reindexing_support).to all(have_attributes(partitioned: false))
end
it 'only indexes that dont serve an exclusion constraint' do
expect(described_class.reindexing_support).to all(have_attributes(exclusion: false))
end
it 'only non-expression indexes' do
expect(described_class.reindexing_support).to all(have_attributes(expression: false))
end
end
describe '.not_match' do
......
......@@ -9,13 +9,6 @@ RSpec.describe Gitlab::Database::Reindexing::Coordinator do
describe '.perform' do
subject { described_class.new(index, notifier).perform }
before do
swapout_view_for_table(:postgres_indexes)
allow(Gitlab::Database::Reindexing::ConcurrentReindex).to receive(:new).with(index).and_return(reindexer)
allow(Gitlab::Database::Reindexing::ReindexAction).to receive(:create_for).with(index).and_return(action)
end
let(:index) { create(:postgres_index) }
let(:notifier) { instance_double(Gitlab::Database::Reindexing::GrafanaNotifier, notify_start: nil, notify_end: nil) }
let(:reindexer) { instance_double(Gitlab::Database::Reindexing::ConcurrentReindex, perform: nil) }
......@@ -26,57 +19,83 @@ RSpec.describe Gitlab::Database::Reindexing::Coordinator do
let(:lease_timeout) { 1.day }
let(:uuid) { 'uuid' }
context 'locking' do
it 'acquires a lock while reindexing' do
expect(lease).to receive(:try_obtain).ordered.and_return(uuid)
shared_examples_for 'reindexing coordination' do
context 'locking' do
it 'acquires a lock while reindexing' do
expect(lease).to receive(:try_obtain).ordered.and_return(uuid)
expect(reindexer).to receive(:perform).ordered
expect(reindexer).to receive(:perform).ordered
expect(Gitlab::ExclusiveLease).to receive(:cancel).ordered.with(lease_key, uuid)
expect(Gitlab::ExclusiveLease).to receive(:cancel).ordered.with(lease_key, uuid)
subject
end
subject
end
it 'does not perform reindexing actions if lease is not granted' do
expect(lease).to receive(:try_obtain).ordered.and_return(false)
expect(Gitlab::Database::Reindexing::ConcurrentReindex).not_to receive(:new)
it 'does not perform reindexing actions if lease is not granted' do
expect(lease).to receive(:try_obtain).ordered.and_return(false)
expect(Gitlab::Database::Reindexing::ConcurrentReindex).not_to receive(:new)
subject
subject
end
end
end
context 'notifications' do
it 'sends #notify_start before reindexing' do
expect(notifier).to receive(:notify_start).with(action).ordered
expect(reindexer).to receive(:perform).ordered
context 'notifications' do
it 'sends #notify_start before reindexing' do
expect(notifier).to receive(:notify_start).with(action).ordered
expect(reindexer).to receive(:perform).ordered
subject
end
subject
it 'sends #notify_end after reindexing and updating the action is done' do
expect(action).to receive(:finish).ordered
expect(notifier).to receive(:notify_end).with(action).ordered
subject
end
end
it 'sends #notify_end after reindexing and updating the action is done' do
expect(action).to receive(:finish).ordered
expect(notifier).to receive(:notify_end).with(action).ordered
context 'action tracking' do
it 'calls #finish on the action' do
expect(reindexer).to receive(:perform).ordered
expect(action).to receive(:finish).ordered
subject
end
it 'upon error, it still calls finish and raises the error' do
expect(reindexer).to receive(:perform).ordered.and_raise('something went wrong')
expect(action).to receive(:finish).ordered
subject
expect { subject }.to raise_error(/something went wrong/)
expect(action).to be_failed
end
end
end
context 'action tracking' do
it 'calls #finish on the action' do
expect(reindexer).to receive(:perform).ordered
expect(action).to receive(:finish).ordered
context 'legacy reindexing method (< PG12) - to be removed' do
before do
stub_feature_flags(database_reindexing_pg12: false)
swapout_view_for_table(:postgres_indexes)
subject
allow(Gitlab::Database::Reindexing::ConcurrentReindex).to receive(:new).with(index).and_return(reindexer)
allow(Gitlab::Database::Reindexing::ReindexAction).to receive(:create_for).with(index).and_return(action)
end
it 'upon error, it still calls finish and raises the error' do
expect(reindexer).to receive(:perform).ordered.and_raise('something went wrong')
expect(action).to receive(:finish).ordered
it_behaves_like 'reindexing coordination'
end
expect { subject }.to raise_error(/something went wrong/)
context 'PG12 reindexing method' do
before do
stub_feature_flags(database_reindexing_pg12: true)
swapout_view_for_table(:postgres_indexes)
expect(action).to be_failed
allow(Gitlab::Database::Reindexing::ReindexConcurrently).to receive(:new).with(index).and_return(reindexer)
allow(Gitlab::Database::Reindexing::ReindexAction).to receive(:create_for).with(index).and_return(action)
end
it_behaves_like 'reindexing coordination'
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Database::Reindexing::ReindexConcurrently, '#perform' do
subject { described_class.new(index, logger: logger).perform }
let(:table_name) { '_test_reindex_table' }
let(:column_name) { '_test_column' }
let(:index_name) { '_test_reindex_index' }
let(:index) { Gitlab::Database::PostgresIndex.by_identifier("public.#{iname(index_name)}") }
let(:logger) { double('logger', debug: nil, info: nil, error: nil ) }
let(:connection) { ActiveRecord::Base.connection }
before do
connection.execute(<<~SQL)
CREATE TABLE #{table_name} (
id serial NOT NULL PRIMARY KEY,
#{column_name} integer NOT NULL);
CREATE INDEX #{index_name} ON #{table_name} (#{column_name});
SQL
end
context 'when the index serves an exclusion constraint' do
before do
allow(index).to receive(:exclusion?).and_return(true)
end
it 'raises an error' do
expect { subject }.to raise_error(described_class::ReindexError, /indexes serving an exclusion constraint are currently not supported/)
end
end
context 'when attempting to reindex an expression index' do
before do
allow(index).to receive(:expression?).and_return(true)
end
it 'raises an error' do
expect { subject }.to raise_error(described_class::ReindexError, /expression indexes are currently not supported/)
end
end
context 'when the index is a dangling temporary index from a previous reindexing run' do
context 'with the temporary index prefix' do
let(:index_name) { '_test_reindex_index_ccnew' }
it 'raises an error' do
expect { subject }.to raise_error(described_class::ReindexError, /left-over temporary index/)
end
end
context 'with the temporary index prefix with a counter' do
let(:index_name) { '_test_reindex_index_ccnew1' }
it 'raises an error' do
expect { subject }.to raise_error(described_class::ReindexError, /left-over temporary index/)
end
end
end
it 'recreates the index using REINDEX with a long statement timeout' do
expect_to_execute_in_order(
"SET statement_timeout TO '32400s'",
"REINDEX INDEX CONCURRENTLY \"public\".\"#{index.name}\"",
"RESET statement_timeout"
)
subject
end
context 'with dangling indexes matching TEMPORARY_INDEX_PATTERN, i.e. /some\_index\_ccnew(\d)*/' do
before do
# dangling indexes
connection.execute("CREATE INDEX #{iname(index_name, '_ccnew')} ON #{table_name} (#{column_name})")
connection.execute("CREATE INDEX #{iname(index_name, '_ccnew2')} ON #{table_name} (#{column_name})")
# Unrelated index - don't drop
connection.execute("CREATE INDEX some_other_index_ccnew ON #{table_name} (#{column_name})")
end
shared_examples_for 'dropping the dangling index' do
it 'drops the dangling indexes while controlling lock_timeout' do
expect_to_execute_in_order(
# Regular index rebuild
"SET statement_timeout TO '32400s'",
"REINDEX INDEX CONCURRENTLY \"public\".\"#{index_name}\"",
"RESET statement_timeout",
# Drop _ccnew index
"SET lock_timeout TO '60000ms'",
"DROP INDEX CONCURRENTLY IF EXISTS \"public\".\"#{iname(index_name, '_ccnew')}\"",
"RESET idle_in_transaction_session_timeout; RESET lock_timeout",
# Drop _ccnew2 index
"SET lock_timeout TO '60000ms'",
"DROP INDEX CONCURRENTLY IF EXISTS \"public\".\"#{iname(index_name, '_ccnew2')}\"",
"RESET idle_in_transaction_session_timeout; RESET lock_timeout"
)
subject
end
end
context 'with normal index names' do
it_behaves_like 'dropping the dangling index'
end
context 'with index name at 63 character limit' do
let(:index_name) { 'a' * 63 }
before do
# Another unrelated index - don't drop
extra_index = index_name[0...55]
connection.execute("CREATE INDEX #{extra_index}_ccnew ON #{table_name} (#{column_name})")
end
it_behaves_like 'dropping the dangling index'
end
end
def iname(name, suffix = '')
"#{name[0...63 - suffix.size]}#{suffix}"
end
def expect_to_execute_in_order(*queries)
# Indexes cannot be created CONCURRENTLY in a transaction. Since the tests are wrapped in transactions,
# verify the original call but pass through the non-concurrent form.
queries.each do |query|
expect(connection).to receive(:execute).with(query).ordered.and_wrap_original do |method, sql|
method.call(sql.sub(/CONCURRENTLY/, ''))
end
end
end
end
......@@ -29,11 +29,30 @@ RSpec.describe Gitlab::Database::Reindexing do
describe '.candidate_indexes' do
subject { described_class.candidate_indexes }
it 'retrieves regular indexes that are no left-overs from previous runs' do
result = double
expect(Gitlab::Database::PostgresIndex).to receive_message_chain('regular.where.not_match.not_match').with(no_args).with('NOT expression').with('^tmp_reindex_').with('^old_reindex_').and_return(result)
context 'with deprecated method for < PG12' do
before do
stub_feature_flags(database_reindexing_pg12: false)
end
it 'retrieves regular indexes that are no left-overs from previous runs' do
result = double
expect(Gitlab::Database::PostgresIndex).to receive_message_chain('not_match.not_match.not_match.regular').with('^tmp_reindex_').with('^old_reindex_').with('\_ccnew[0-9]*$').with(no_args).and_return(result)
expect(subject).to eq(result)
end
end
expect(subject).to eq(result)
context 'with deprecated method for >= PG12' do
before do
stub_feature_flags(database_reindexing_pg12: true)
end
it 'retrieves regular indexes that are no left-overs from previous runs' do
result = double
expect(Gitlab::Database::PostgresIndex).to receive_message_chain('not_match.not_match.not_match.reindexing_support').with('^tmp_reindex_').with('^old_reindex_').with('\_ccnew[0-9]*$').with(no_args).and_return(result)
expect(subject).to eq(result)
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment