Commit 9a58817b authored by pbair's avatar pbair

Helper to safely add indexes to partitioned tables

Add a new set of migration helpers to concurrently create indexes
on partitioned tables. Postgres does not support concurrent index
creation on partitioned tables, so the index must be created
concurrently on each partition, and then finally added in a
non-concurrent fashion on the parent table.
parent 369065d7
......@@ -5,6 +5,7 @@ module Gitlab
module PartitioningMigrationHelpers
include ForeignKeyHelpers
include TableManagementHelpers
include IndexHelpers
end
end
end
# frozen_string_literal: true
module Gitlab
module Database
module PartitioningMigrationHelpers
module IndexHelpers
include Gitlab::Database::MigrationHelpers
include Gitlab::Database::SchemaHelpers
# Concurrently creates a new index on a partitioned table. In concept this works similarly to
# `add_concurrent_index`, and won't block reads or writes on the table while the index is being built.
#
# A special helper is required for partitioning because Postgres does not support concurrently building indexes
# on partitioned tables. This helper concurrently adds the same index to each partition, and creates the final
# index on the parent table once all of the partitions are indexed. This is the recommended safe way to add
# indexes to partitioned tables.
#
# Example:
#
# add_concurrent_partitioned_index :users, :some_column
#
# See Rails' `add_index` for more info on the available arguments.
def add_concurrent_partitioned_index(table_name, column_names, options = {})
raise ArgumentError, 'A name is required for indexes added to partitioned tables' unless options[:name]
partitioned_table = find_partitioned_table(table_name)
if index_name_exists?(table_name, options[:name])
Gitlab::AppLogger.warn "Index not created because it already exists (this may be due to an aborted" \
" migration or similar): table_name: #{table_name}, index_name: #{options[:name]}"
return
end
partitioned_table.postgres_partitions.each do |partition|
partition_index_name = generated_index_name(partition.identifier, options[:name])
partition_options = options.merge(name: partition_index_name)
add_concurrent_index(partition.identifier, column_names, partition_options)
end
with_lock_retries do
add_index(table_name, column_names, options)
end
end
# Safely removes an existing index from a partitioned table. The method name is a bit inaccurate as it does not
# drop the index concurrently, but it's named as such to maintain consistency with other similar helpers, and
# indicate that this should be safe to use in a production environment.
#
# In current versions of Postgres it's impossible to drop an index concurrently, or drop an index from an
# individual partition that exists across the entire partitioned table. As a result this helper drops the index
# from the parent table, which automatically cascades to all partitions. While this does require an exclusive
# lock, dropping an index is a fast operation that won't block the table for a significant period of time.
#
# Example:
#
# remove_concurrent_partitioned_index_by_name :users, 'index_name_goes_here'
def remove_concurrent_partitioned_index_by_name(table_name, index_name)
find_partitioned_table(table_name)
unless index_name_exists?(table_name, index_name)
Gitlab::AppLogger.warn "Index not removed because it does not exist (this may be due to an aborted " \
"migration or similar): table_name: #{table_name}, index_name: #{index_name}"
return
end
with_lock_retries do
remove_index(table_name, name: index_name)
end
end
private
def find_partitioned_table(table_name)
partitioned_table = Gitlab::Database::PostgresPartitionedTable.find_by_name_in_current_schema(table_name)
raise ArgumentError, "#{table_name} is not a partitioned table" unless partitioned_table
partitioned_table
end
def generated_index_name(partition_name, index_name)
object_name("#{partition_name}_#{index_name}", 'index')
end
end
end
end
end
......@@ -15,6 +15,10 @@ module Gitlab
find(identifier)
end
def self.find_by_name_in_current_schema(name)
find_by("identifier = concat(current_schema(), '.', ?)", name)
end
def dynamic?
DYNAMIC_PARTITION_STRATEGIES.include?(strategy)
end
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::IndexHelpers do
let(:migration) do
ActiveRecord::Migration.new.extend(described_class)
end
let(:table_name) { '_test_partitioned_table' }
let(:index_name) { '_test_partitioning_index_name' }
let(:partitioned_table) { double(name: 'partitioned_table') }
before do
allow(migration).to receive(:puts)
allow(Gitlab::Database::PostgresPartitionedTable).to receive(:find_by_name_in_current_schema).with(table_name)
.and_return(partitioned_table)
end
describe '#add_concurrent_partitioned_index' do
let(:column_name) { '_test_column_name' }
let(:partition_name_1) { "#{table_name}_202001" }
let(:partition_name_2) { "#{table_name}_202002" }
let(:partition_name_3) { "#{table_name}_202003" }
let(:index_name_1) { '_test_index_name_1' }
let(:index_name_2) { '_test_index_name_2' }
let(:index_name_3) { '_test_index_name_3' }
let(:partition_1) { double(identifier: partition_name_1) }
let(:partition_2) { double(identifier: partition_name_2) }
let(:partition_3) { double(identifier: partition_name_3) }
let(:current_partitions) { [partition_1, partition_2, partition_3] }
before do
allow(partitioned_table).to receive(:postgres_partitions).and_return(current_partitions)
allow(migration).to receive(:generated_index_name).and_return(index_name_1, index_name_2, index_name_3)
allow(migration).to receive(:index_name_exists?).with(table_name, index_name).and_return(false)
allow(migration).to receive(:with_lock_retries).and_yield
end
context 'when the index does not exist on the parent table' do
it 'creates the index on each partition, and the parent table', :aggregate_failures do
expect(migration).to receive(:index_name_exists?).with(table_name, index_name).and_return(false)
expect(migration).to receive(:add_concurrent_index)
.with(partition_name_1, column_name, name: index_name_1).ordered
expect(migration).to receive(:add_concurrent_index)
.with(partition_name_2, column_name, name: index_name_2).ordered
expect(migration).to receive(:add_concurrent_index)
.with(partition_name_3, column_name, name: index_name_3).ordered
expect(migration).to receive(:with_lock_retries).and_yield.ordered
expect(migration).to receive(:add_index).with(table_name, column_name, name: index_name).ordered
migration.add_concurrent_partitioned_index(table_name, column_name, name: index_name)
end
end
context 'when the index exists on the parent table' do
it 'does not attempt to create any indexes', :aggregate_failures do
expect(migration).to receive(:index_name_exists?).with(table_name, index_name).and_return(true)
expect(migration).not_to receive(:add_concurrent_index)
expect(migration).not_to receive(:with_lock_retries)
expect(migration).not_to receive(:add_index)
migration.add_concurrent_partitioned_index(table_name, column_name, name: index_name)
end
end
context 'when additional index options are given' do
let(:current_partitions) { [partition_1] }
it 'forwards them to the index helper methods', :aggregate_failures do
expect(migration).to receive(:add_concurrent_index)
.with(partition_name_1, column_name, name: index_name_1, where: 'x > 0', unique: true)
expect(migration).to receive(:add_index)
.with(table_name, column_name, name: index_name, where: 'x > 0', unique: true)
migration.add_concurrent_partitioned_index(table_name, column_name,
name: index_name, where: 'x > 0', unique: true)
end
end
context 'when a name argument for the index is not given' do
it 'raises an error', :aggregate_failures do
expect(migration).not_to receive(:add_concurrent_index)
expect(migration).not_to receive(:add_index)
expect do
migration.add_concurrent_partitioned_index(table_name, column_name)
end.to raise_error(ArgumentError, /A name is required for indexes added to partitioned tables/)
end
end
context 'when the given table is not a partitioned table' do
before do
allow(Gitlab::Database::PostgresPartitionedTable).to receive(:find_by_name_in_current_schema)
.with(table_name).and_return(nil)
end
it 'raises an error', :aggregate_failures do
expect(migration).not_to receive(:add_concurrent_index)
expect(migration).not_to receive(:add_index)
expect do
migration.add_concurrent_partitioned_index(table_name, column_name, name: index_name)
end.to raise_error(ArgumentError, /#{table_name} is not a partitioned table/)
end
end
end
describe '#remove_concurrent_partitioned_index_by_name' do
before do
allow(migration).to receive(:index_name_exists?).with(table_name, index_name).and_return(true)
allow(migration).to receive(:with_lock_retries).and_yield
end
context 'when the index exists' do
it 'drops the index on the parent table', :aggregate_failures do
expect(migration).to receive(:index_name_exists?).with(table_name, index_name).and_return(true)
expect(migration).to receive(:with_lock_retries).and_yield.ordered
expect(migration).to receive(:remove_index).with(table_name, name: index_name).ordered
migration.remove_concurrent_partitioned_index_by_name(table_name, index_name)
end
end
context 'when the index does not exist' do
it 'does not attempt to drop the index', :aggregate_failures do
expect(migration).to receive(:index_name_exists?).with(table_name, index_name).and_return(false)
expect(migration).not_to receive(:with_lock_retries)
expect(migration).not_to receive(:remove_index)
migration.remove_concurrent_partitioned_index_by_name(table_name, index_name)
end
end
context 'when the given table is not a partitioned table' do
before do
allow(Gitlab::Database::PostgresPartitionedTable).to receive(:find_by_name_in_current_schema)
.with(table_name).and_return(nil)
end
it 'raises an error', :aggregate_failures do
expect(migration).not_to receive(:remove_index)
expect do
migration.remove_concurrent_partitioned_index_by_name(table_name, index_name)
end.to raise_error(ArgumentError, /#{table_name} is not a partitioned table/)
end
end
end
end
......@@ -39,6 +39,23 @@ RSpec.describe Gitlab::Database::PostgresPartitionedTable, type: :model do
it_behaves_like 'a postgres model'
describe '.find_by_name_in_current_schema' do
it 'finds the partitioned tables in the current schema by name', :aggregate_failures do
partitioned_table = described_class.find_by_name_in_current_schema(name)
expect(partitioned_table).not_to be_nil
expect(partitioned_table.identifier).to eq(identifier)
end
it 'does not find partitioned tables in a different schema' do
ActiveRecord::Base.connection.execute(<<~SQL)
ALTER TABLE #{identifier} SET SCHEMA gitlab_partitions_dynamic
SQL
expect(described_class.find_by_name_in_current_schema(name)).to be_nil
end
end
describe '#dynamic?' do
it 'returns true for tables partitioned by range' do
expect(find('public.foo_range')).to be_dynamic
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment