Commit b011b740 authored by Mayra Cabrera's avatar Mayra Cabrera

Merge branch 'pb-int-conversion-helpers-multiple-columns' into 'master'

Support multiple columns when converting int types

See merge request gitlab-org/gitlab!58409
parents 42c050db 3c22f6f1
......@@ -2,8 +2,8 @@
module Gitlab
module BackgroundMigration
# Background migration that updates the value of a
# column using the value of another column in the same table.
# Background migration that updates the value of one or more
# columns using the value of other columns in the same table.
#
# - The {start_id, end_id} arguments are at the start so that it can be used
# with `queue_batched_background_migration`
......@@ -25,17 +25,21 @@ module Gitlab
# sub_batch_size - We don't want updates to take more than ~100ms
# This allows us to run multiple smaller batches during
# the minimum 2.minute interval that we can schedule jobs
# copy_from - The column containing the data to copy.
# copy_to - The column to copy the data to.
# copy_from - List of columns containing the data to copy.
# copy_to - List of columns to copy the data to. Order must match the order in `copy_from`.
def perform(start_id, end_id, batch_table, batch_column, sub_batch_size, copy_from, copy_to)
quoted_copy_from = connection.quote_column_name(copy_from)
quoted_copy_to = connection.quote_column_name(copy_to)
copy_from = Array.wrap(copy_from)
copy_to = Array.wrap(copy_to)
raise ArgumentError, 'number of source and destination columns must match' unless copy_from.count == copy_to.count
assignment_clauses = column_assignment_clauses(copy_from, copy_to)
parent_batch_relation = relation_scoped_to_range(batch_table, batch_column, start_id, end_id)
parent_batch_relation.each_batch(column: batch_column, of: sub_batch_size) do |sub_batch|
batch_metrics.time_operation(:update_all) do
sub_batch.update_all("#{quoted_copy_to}=#{quoted_copy_from}")
sub_batch.update_all(assignment_clauses)
end
sleep(PAUSE_SECONDS)
......@@ -55,6 +59,17 @@ module Gitlab
def relation_scoped_to_range(source_table, source_key_column, start_id, stop_id)
define_batchable_model(source_table).where(source_key_column => start_id..stop_id)
end
def column_assignment_clauses(copy_from, copy_to)
assignments = copy_from.zip(copy_to).map do |from_column, to_column|
from_column = connection.quote_column_name(from_column)
to_column = connection.quote_column_name(to_column)
"#{to_column} = #{from_column}"
end
assignments.join(', ')
end
end
end
end
......@@ -905,7 +905,7 @@ module Gitlab
end
end
# Initializes the conversion of an integer column to bigint
# Initializes the conversion of a set of integer columns to bigint
#
# It can be used for converting both a Primary Key and any Foreign Keys
# that may reference it or any other integer column that we may want to
......@@ -923,14 +923,14 @@ module Gitlab
# Note: this helper is intended to be used in a regular (pre-deployment) migration.
#
# This helper is part 1 of a multi-step migration process:
# 1. initialize_conversion_of_integer_to_bigint to create the new column and database triggers
# 1. initialize_conversion_of_integer_to_bigint to create the new columns and database trigger
# 2. backfill_conversion_of_integer_to_bigint to copy historic data using background migrations
# 3. remaining steps TBD, see #288005
#
# table - The name of the database table containing the column
# column - The name of the column that we want to convert to bigint.
# columns - The name, or array of names, of the column(s) that we want to convert to bigint.
# primary_key - The name of the primary key column (most often :id)
def initialize_conversion_of_integer_to_bigint(table, column, primary_key: :id)
def initialize_conversion_of_integer_to_bigint(table, columns, primary_key: :id)
unless table_exists?(table)
raise "Table #{table} does not exist"
end
......@@ -939,34 +939,40 @@ module Gitlab
raise "Column #{primary_key} does not exist on #{table}"
end
unless column_exists?(table, column)
raise "Column #{column} does not exist on #{table}"
columns = Array.wrap(columns)
columns.each do |column|
next if column_exists?(table, column)
raise ArgumentError, "Column #{column} does not exist on #{table}"
end
check_trigger_permissions!(table)
old_column = column_for(table, column)
tmp_column = "#{column}_convert_to_bigint"
conversions = columns.to_h { |column| [column, "#{column}_convert_to_bigint"] }
with_lock_retries do
if (column.to_s == primary_key.to_s) || !old_column.null
# If the column to be converted is either a PK or is defined as NOT NULL,
# set it to `NOT NULL DEFAULT 0` and we'll copy paste the correct values bellow
# That way, we skip the expensive validation step required to add
# a NOT NULL constraint at the end of the process
add_column(table, tmp_column, :bigint, default: old_column.default || 0, null: false)
else
add_column(table, tmp_column, :bigint, default: old_column.default)
conversions.each do |(source_column, temporary_name)|
column = column_for(table, source_column)
if (column.name.to_s == primary_key.to_s) || !column.null
# If the column to be converted is either a PK or is defined as NOT NULL,
# set it to `NOT NULL DEFAULT 0` and we'll copy paste the correct values bellow
# That way, we skip the expensive validation step required to add
# a NOT NULL constraint at the end of the process
add_column(table, temporary_name, :bigint, default: column.default || 0, null: false)
else
add_column(table, temporary_name, :bigint, default: column.default)
end
end
install_rename_triggers(table, column, tmp_column)
install_rename_triggers(table, conversions.keys, conversions.values)
end
end
# Backfills the new column used in the conversion of an integer column to bigint using background migrations.
# Backfills the new columns used in an integer-to-bigint conversion using background migrations.
#
# - This helper should be called from a post-deployment migration.
# - In order for this helper to work properly, the new column must be first initialized with
# - In order for this helper to work properly, the new columns must be first initialized with
# the `initialize_conversion_of_integer_to_bigint` helper.
# - It tracks the scheduled background jobs through Gitlab::Database::BackgroundMigration::BatchedMigration,
# which allows a more thorough check that all jobs succeeded in the
......@@ -976,12 +982,12 @@ module Gitlab
# deployed (including background job changes) before we begin processing the background migration.
#
# This helper is part 2 of a multi-step migration process:
# 1. initialize_conversion_of_integer_to_bigint to create the new column and database triggers
# 1. initialize_conversion_of_integer_to_bigint to create the new columns and database trigger
# 2. backfill_conversion_of_integer_to_bigint to copy historic data using background migrations
# 3. remaining steps TBD, see #288005
#
# table - The name of the database table containing the column
# column - The name of the column that we want to convert to bigint.
# columns - The name, or an array of names, of the column(s) we want to convert to bigint.
# primary_key - The name of the primary key column (most often :id)
# batch_size - The number of rows to schedule in a single background migration
# sub_batch_size - The smaller batches that will be used by each scheduled job
......@@ -1001,7 +1007,7 @@ module Gitlab
# between the scheduled jobs
def backfill_conversion_of_integer_to_bigint(
table,
column,
columns,
primary_key: :id,
batch_size: 20_000,
sub_batch_size: 1000,
......@@ -1016,22 +1022,21 @@ module Gitlab
raise "Column #{primary_key} does not exist on #{table}"
end
unless column_exists?(table, column)
raise "Column #{column} does not exist on #{table}"
end
conversions = Array.wrap(columns).to_h do |column|
raise ArgumentError, "Column #{column} does not exist on #{table}" unless column_exists?(table, column)
tmp_column = "#{column}_convert_to_bigint"
temporary_name = "#{column}_convert_to_bigint"
raise ArgumentError, "Column #{temporary_name} does not exist on #{table}" unless column_exists?(table, temporary_name)
unless column_exists?(table, tmp_column)
raise 'The temporary column does not exist, initialize it with `initialize_conversion_of_integer_to_bigint`'
[column, temporary_name]
end
batched_migration = queue_batched_background_migration(
'CopyColumnUsingBackgroundMigrationJob',
table,
primary_key,
column,
tmp_column,
conversions.keys,
conversions.values,
job_interval: interval,
batch_size: batch_size,
sub_batch_size: sub_batch_size)
......
......@@ -65,6 +65,26 @@ RSpec.describe Gitlab::BackgroundMigration::CopyColumnUsingBackgroundMigrationJo
expect(test_table.where("name_convert_to_text = 'no name'").count).to eq(0)
end
it 'copies multiple columns when given' do
columns_to_copy_from = %w[id fk]
columns_to_copy_to = %w[id_convert_to_bigint fk_convert_to_bigint]
subject.perform(10, 15, table_name, 'id', sub_batch_size, columns_to_copy_from, columns_to_copy_to)
expect(test_table.where('id = id_convert_to_bigint AND fk = fk_convert_to_bigint').pluck(:id)).to contain_exactly(11, 12, 15)
expect(test_table.where(id_convert_to_bigint: 0).where(fk_convert_to_bigint: 0).pluck(:id)).to contain_exactly(19)
expect(test_table.all.count).to eq(4)
end
it 'raises error when number of source and target columns does not match' do
columns_to_copy_from = %w[id fk]
columns_to_copy_to = %w[id_convert_to_bigint]
expect do
subject.perform(10, 15, table_name, 'id', sub_batch_size, columns_to_copy_from, columns_to_copy_to)
end.to raise_error(ArgumentError, 'number of source and destination columns must match')
end
it 'tracks timings of queries' do
expect(subject.batch_metrics.timings).to be_empty
......
......@@ -1730,12 +1730,10 @@ RSpec.describe Gitlab::Database::MigrationHelpers do
end
end
context 'when the column to convert does not exist' do
let(:column) { :foobar }
context 'when the column to migrate does not exist' do
it 'raises an error' do
expect { model.initialize_conversion_of_integer_to_bigint(table, column) }
.to raise_error("Column #{column} does not exist on #{table}")
expect { model.initialize_conversion_of_integer_to_bigint(table, :this_column_is_not_real) }
.to raise_error(ArgumentError, "Column this_column_is_not_real does not exist on #{table}")
end
end
......@@ -1743,7 +1741,7 @@ RSpec.describe Gitlab::Database::MigrationHelpers do
it 'creates a not-null bigint column and installs triggers' do
expect(model).to receive(:add_column).with(table, tmp_column, :bigint, default: 0, null: false)
expect(model).to receive(:install_rename_triggers).with(table, column, tmp_column)
expect(model).to receive(:install_rename_triggers).with(table, [column], [tmp_column])
model.initialize_conversion_of_integer_to_bigint(table, column)
end
......@@ -1755,7 +1753,7 @@ RSpec.describe Gitlab::Database::MigrationHelpers do
it 'creates a not-null bigint column and installs triggers' do
expect(model).to receive(:add_column).with(table, tmp_column, :bigint, default: 0, null: false)
expect(model).to receive(:install_rename_triggers).with(table, column, tmp_column)
expect(model).to receive(:install_rename_triggers).with(table, [column], [tmp_column])
model.initialize_conversion_of_integer_to_bigint(table, column)
end
......@@ -1767,11 +1765,30 @@ RSpec.describe Gitlab::Database::MigrationHelpers do
it 'creates a nullable bigint column and installs triggers' do
expect(model).to receive(:add_column).with(table, tmp_column, :bigint, default: nil)
expect(model).to receive(:install_rename_triggers).with(table, column, tmp_column)
expect(model).to receive(:install_rename_triggers).with(table, [column], [tmp_column])
model.initialize_conversion_of_integer_to_bigint(table, column)
end
end
context 'when multiple columns are given' do
it 'creates the correct columns and installs the trigger' do
columns_to_convert = %i[id non_nullable_column nullable_column]
temporary_columns = %w[
id_convert_to_bigint
non_nullable_column_convert_to_bigint
nullable_column_convert_to_bigint
]
expect(model).to receive(:add_column).with(table, temporary_columns[0], :bigint, default: 0, null: false)
expect(model).to receive(:add_column).with(table, temporary_columns[1], :bigint, default: 0, null: false)
expect(model).to receive(:add_column).with(table, temporary_columns[2], :bigint, default: nil)
expect(model).to receive(:install_rename_triggers).with(table, columns_to_convert, temporary_columns)
model.initialize_conversion_of_integer_to_bigint(table, columns_to_convert)
end
end
end
describe '#backfill_conversion_of_integer_to_bigint' do
......@@ -1783,6 +1800,7 @@ RSpec.describe Gitlab::Database::MigrationHelpers do
model.create_table table, id: false do |t|
t.integer :id, primary_key: true
t.text :message, null: false
t.integer :other_id
t.timestamps
end
......@@ -1808,14 +1826,14 @@ RSpec.describe Gitlab::Database::MigrationHelpers do
it 'raises an error' do
expect { model.backfill_conversion_of_integer_to_bigint(table, column) }
.to raise_error("Column #{column} does not exist on #{table}")
.to raise_error(ArgumentError, "Column #{column} does not exist on #{table}")
end
end
context 'when the temporary column does not exist' do
it 'raises an error' do
expect { model.backfill_conversion_of_integer_to_bigint(table, column) }
.to raise_error('The temporary column does not exist, initialize it with `initialize_conversion_of_integer_to_bigint`')
.to raise_error(ArgumentError, "Column #{tmp_column} does not exist on #{table}")
end
end
......@@ -1829,33 +1847,65 @@ RSpec.describe Gitlab::Database::MigrationHelpers do
let(:migration_relation) { Gitlab::Database::BackgroundMigration::BatchedMigration.active }
before do
model.initialize_conversion_of_integer_to_bigint(table, column)
model.initialize_conversion_of_integer_to_bigint(table, columns)
model_class.create!(message: 'hello')
model_class.create!(message: 'so long')
end
it 'creates the batched migration tracking record' do
last_record = model_class.create!(message: 'goodbye')
context 'when a single column is being converted' do
let(:columns) { column }
expect do
model.backfill_conversion_of_integer_to_bigint(table, column, batch_size: 2, sub_batch_size: 1)
end.to change { migration_relation.count }.by(1)
expect(migration_relation.last).to have_attributes(
job_class_name: 'CopyColumnUsingBackgroundMigrationJob',
table_name: table.to_s,
column_name: column.to_s,
min_value: 1,
max_value: last_record.id,
interval: 120,
batch_size: 2,
sub_batch_size: 1,
job_arguments: [column.to_s, "#{column}_convert_to_bigint"]
)
it 'creates the batched migration tracking record' do
last_record = model_class.create!(message: 'goodbye')
expect do
model.backfill_conversion_of_integer_to_bigint(table, column, batch_size: 2, sub_batch_size: 1)
end.to change { migration_relation.count }.by(1)
expect(migration_relation.last).to have_attributes(
job_class_name: 'CopyColumnUsingBackgroundMigrationJob',
table_name: table.to_s,
column_name: column.to_s,
min_value: 1,
max_value: last_record.id,
interval: 120,
batch_size: 2,
sub_batch_size: 1,
job_arguments: [[column.to_s], ["#{column}_convert_to_bigint"]]
)
end
end
context 'when multiple columns are being converted' do
let(:other_column) { :other_id }
let(:other_tmp_column) { "#{other_column}_convert_to_bigint" }
let(:columns) { [column, other_column] }
it 'creates the batched migration tracking record' do
last_record = model_class.create!(message: 'goodbye', other_id: 50)
expect do
model.backfill_conversion_of_integer_to_bigint(table, columns, batch_size: 2, sub_batch_size: 1)
end.to change { migration_relation.count }.by(1)
expect(migration_relation.last).to have_attributes(
job_class_name: 'CopyColumnUsingBackgroundMigrationJob',
table_name: table.to_s,
column_name: column.to_s,
min_value: 1,
max_value: last_record.id,
interval: 120,
batch_size: 2,
sub_batch_size: 1,
job_arguments: [[column.to_s, other_column.to_s], [tmp_column, other_tmp_column]]
)
end
end
context 'when the migration should be performed inline' do
let(:columns) { column }
it 'calls the runner to run the entire migration' do
expect(model).to receive(:perform_background_migration_inline?).and_return(true)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment