Commit b011b740 authored by Mayra Cabrera's avatar Mayra Cabrera

Merge branch 'pb-int-conversion-helpers-multiple-columns' into 'master'

Support multiple columns when converting int types

See merge request gitlab-org/gitlab!58409
parents 42c050db 3c22f6f1
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
module Gitlab module Gitlab
module BackgroundMigration module BackgroundMigration
# Background migration that updates the value of a # Background migration that updates the value of one or more
# column using the value of another column in the same table. # columns using the value of other columns in the same table.
# #
# - The {start_id, end_id} arguments are at the start so that it can be used # - The {start_id, end_id} arguments are at the start so that it can be used
# with `queue_batched_background_migration` # with `queue_batched_background_migration`
...@@ -25,17 +25,21 @@ module Gitlab ...@@ -25,17 +25,21 @@ module Gitlab
# sub_batch_size - We don't want updates to take more than ~100ms # sub_batch_size - We don't want updates to take more than ~100ms
# This allows us to run multiple smaller batches during # This allows us to run multiple smaller batches during
# the minimum 2.minute interval that we can schedule jobs # the minimum 2.minute interval that we can schedule jobs
# copy_from - The column containing the data to copy. # copy_from - List of columns containing the data to copy.
# copy_to - The column to copy the data to. # copy_to - List of columns to copy the data to. Order must match the order in `copy_from`.
def perform(start_id, end_id, batch_table, batch_column, sub_batch_size, copy_from, copy_to) def perform(start_id, end_id, batch_table, batch_column, sub_batch_size, copy_from, copy_to)
quoted_copy_from = connection.quote_column_name(copy_from) copy_from = Array.wrap(copy_from)
quoted_copy_to = connection.quote_column_name(copy_to) copy_to = Array.wrap(copy_to)
raise ArgumentError, 'number of source and destination columns must match' unless copy_from.count == copy_to.count
assignment_clauses = column_assignment_clauses(copy_from, copy_to)
parent_batch_relation = relation_scoped_to_range(batch_table, batch_column, start_id, end_id) parent_batch_relation = relation_scoped_to_range(batch_table, batch_column, start_id, end_id)
parent_batch_relation.each_batch(column: batch_column, of: sub_batch_size) do |sub_batch| parent_batch_relation.each_batch(column: batch_column, of: sub_batch_size) do |sub_batch|
batch_metrics.time_operation(:update_all) do batch_metrics.time_operation(:update_all) do
sub_batch.update_all("#{quoted_copy_to}=#{quoted_copy_from}") sub_batch.update_all(assignment_clauses)
end end
sleep(PAUSE_SECONDS) sleep(PAUSE_SECONDS)
...@@ -55,6 +59,17 @@ module Gitlab ...@@ -55,6 +59,17 @@ module Gitlab
def relation_scoped_to_range(source_table, source_key_column, start_id, stop_id) def relation_scoped_to_range(source_table, source_key_column, start_id, stop_id)
define_batchable_model(source_table).where(source_key_column => start_id..stop_id) define_batchable_model(source_table).where(source_key_column => start_id..stop_id)
end end
def column_assignment_clauses(copy_from, copy_to)
assignments = copy_from.zip(copy_to).map do |from_column, to_column|
from_column = connection.quote_column_name(from_column)
to_column = connection.quote_column_name(to_column)
"#{to_column} = #{from_column}"
end
assignments.join(', ')
end
end end
end end
end end
...@@ -905,7 +905,7 @@ module Gitlab ...@@ -905,7 +905,7 @@ module Gitlab
end end
end end
# Initializes the conversion of an integer column to bigint # Initializes the conversion of a set of integer columns to bigint
# #
# It can be used for converting both a Primary Key and any Foreign Keys # It can be used for converting both a Primary Key and any Foreign Keys
# that may reference it or any other integer column that we may want to # that may reference it or any other integer column that we may want to
...@@ -923,14 +923,14 @@ module Gitlab ...@@ -923,14 +923,14 @@ module Gitlab
# Note: this helper is intended to be used in a regular (pre-deployment) migration. # Note: this helper is intended to be used in a regular (pre-deployment) migration.
# #
# This helper is part 1 of a multi-step migration process: # This helper is part 1 of a multi-step migration process:
# 1. initialize_conversion_of_integer_to_bigint to create the new column and database triggers # 1. initialize_conversion_of_integer_to_bigint to create the new columns and database trigger
# 2. backfill_conversion_of_integer_to_bigint to copy historic data using background migrations # 2. backfill_conversion_of_integer_to_bigint to copy historic data using background migrations
# 3. remaining steps TBD, see #288005 # 3. remaining steps TBD, see #288005
# #
# table - The name of the database table containing the column # table - The name of the database table containing the column
# column - The name of the column that we want to convert to bigint. # columns - The name, or array of names, of the column(s) that we want to convert to bigint.
# primary_key - The name of the primary key column (most often :id) # primary_key - The name of the primary key column (most often :id)
def initialize_conversion_of_integer_to_bigint(table, column, primary_key: :id) def initialize_conversion_of_integer_to_bigint(table, columns, primary_key: :id)
unless table_exists?(table) unless table_exists?(table)
raise "Table #{table} does not exist" raise "Table #{table} does not exist"
end end
...@@ -939,34 +939,40 @@ module Gitlab ...@@ -939,34 +939,40 @@ module Gitlab
raise "Column #{primary_key} does not exist on #{table}" raise "Column #{primary_key} does not exist on #{table}"
end end
unless column_exists?(table, column) columns = Array.wrap(columns)
raise "Column #{column} does not exist on #{table}" columns.each do |column|
next if column_exists?(table, column)
raise ArgumentError, "Column #{column} does not exist on #{table}"
end end
check_trigger_permissions!(table) check_trigger_permissions!(table)
old_column = column_for(table, column) conversions = columns.to_h { |column| [column, "#{column}_convert_to_bigint"] }
tmp_column = "#{column}_convert_to_bigint"
with_lock_retries do with_lock_retries do
if (column.to_s == primary_key.to_s) || !old_column.null conversions.each do |(source_column, temporary_name)|
column = column_for(table, source_column)
if (column.name.to_s == primary_key.to_s) || !column.null
# If the column to be converted is either a PK or is defined as NOT NULL, # If the column to be converted is either a PK or is defined as NOT NULL,
# set it to `NOT NULL DEFAULT 0` and we'll copy paste the correct values bellow # set it to `NOT NULL DEFAULT 0` and we'll copy paste the correct values bellow
# That way, we skip the expensive validation step required to add # That way, we skip the expensive validation step required to add
# a NOT NULL constraint at the end of the process # a NOT NULL constraint at the end of the process
add_column(table, tmp_column, :bigint, default: old_column.default || 0, null: false) add_column(table, temporary_name, :bigint, default: column.default || 0, null: false)
else else
add_column(table, tmp_column, :bigint, default: old_column.default) add_column(table, temporary_name, :bigint, default: column.default)
end
end end
install_rename_triggers(table, column, tmp_column) install_rename_triggers(table, conversions.keys, conversions.values)
end end
end end
# Backfills the new column used in the conversion of an integer column to bigint using background migrations. # Backfills the new columns used in an integer-to-bigint conversion using background migrations.
# #
# - This helper should be called from a post-deployment migration. # - This helper should be called from a post-deployment migration.
# - In order for this helper to work properly, the new column must be first initialized with # - In order for this helper to work properly, the new columns must be first initialized with
# the `initialize_conversion_of_integer_to_bigint` helper. # the `initialize_conversion_of_integer_to_bigint` helper.
# - It tracks the scheduled background jobs through Gitlab::Database::BackgroundMigration::BatchedMigration, # - It tracks the scheduled background jobs through Gitlab::Database::BackgroundMigration::BatchedMigration,
# which allows a more thorough check that all jobs succeeded in the # which allows a more thorough check that all jobs succeeded in the
...@@ -976,12 +982,12 @@ module Gitlab ...@@ -976,12 +982,12 @@ module Gitlab
# deployed (including background job changes) before we begin processing the background migration. # deployed (including background job changes) before we begin processing the background migration.
# #
# This helper is part 2 of a multi-step migration process: # This helper is part 2 of a multi-step migration process:
# 1. initialize_conversion_of_integer_to_bigint to create the new column and database triggers # 1. initialize_conversion_of_integer_to_bigint to create the new columns and database trigger
# 2. backfill_conversion_of_integer_to_bigint to copy historic data using background migrations # 2. backfill_conversion_of_integer_to_bigint to copy historic data using background migrations
# 3. remaining steps TBD, see #288005 # 3. remaining steps TBD, see #288005
# #
# table - The name of the database table containing the column # table - The name of the database table containing the column
# column - The name of the column that we want to convert to bigint. # columns - The name, or an array of names, of the column(s) we want to convert to bigint.
# primary_key - The name of the primary key column (most often :id) # primary_key - The name of the primary key column (most often :id)
# batch_size - The number of rows to schedule in a single background migration # batch_size - The number of rows to schedule in a single background migration
# sub_batch_size - The smaller batches that will be used by each scheduled job # sub_batch_size - The smaller batches that will be used by each scheduled job
...@@ -1001,7 +1007,7 @@ module Gitlab ...@@ -1001,7 +1007,7 @@ module Gitlab
# between the scheduled jobs # between the scheduled jobs
def backfill_conversion_of_integer_to_bigint( def backfill_conversion_of_integer_to_bigint(
table, table,
column, columns,
primary_key: :id, primary_key: :id,
batch_size: 20_000, batch_size: 20_000,
sub_batch_size: 1000, sub_batch_size: 1000,
...@@ -1016,22 +1022,21 @@ module Gitlab ...@@ -1016,22 +1022,21 @@ module Gitlab
raise "Column #{primary_key} does not exist on #{table}" raise "Column #{primary_key} does not exist on #{table}"
end end
unless column_exists?(table, column) conversions = Array.wrap(columns).to_h do |column|
raise "Column #{column} does not exist on #{table}" raise ArgumentError, "Column #{column} does not exist on #{table}" unless column_exists?(table, column)
end
tmp_column = "#{column}_convert_to_bigint" temporary_name = "#{column}_convert_to_bigint"
raise ArgumentError, "Column #{temporary_name} does not exist on #{table}" unless column_exists?(table, temporary_name)
unless column_exists?(table, tmp_column) [column, temporary_name]
raise 'The temporary column does not exist, initialize it with `initialize_conversion_of_integer_to_bigint`'
end end
batched_migration = queue_batched_background_migration( batched_migration = queue_batched_background_migration(
'CopyColumnUsingBackgroundMigrationJob', 'CopyColumnUsingBackgroundMigrationJob',
table, table,
primary_key, primary_key,
column, conversions.keys,
tmp_column, conversions.values,
job_interval: interval, job_interval: interval,
batch_size: batch_size, batch_size: batch_size,
sub_batch_size: sub_batch_size) sub_batch_size: sub_batch_size)
......
...@@ -65,6 +65,26 @@ RSpec.describe Gitlab::BackgroundMigration::CopyColumnUsingBackgroundMigrationJo ...@@ -65,6 +65,26 @@ RSpec.describe Gitlab::BackgroundMigration::CopyColumnUsingBackgroundMigrationJo
expect(test_table.where("name_convert_to_text = 'no name'").count).to eq(0) expect(test_table.where("name_convert_to_text = 'no name'").count).to eq(0)
end end
it 'copies multiple columns when given' do
columns_to_copy_from = %w[id fk]
columns_to_copy_to = %w[id_convert_to_bigint fk_convert_to_bigint]
subject.perform(10, 15, table_name, 'id', sub_batch_size, columns_to_copy_from, columns_to_copy_to)
expect(test_table.where('id = id_convert_to_bigint AND fk = fk_convert_to_bigint').pluck(:id)).to contain_exactly(11, 12, 15)
expect(test_table.where(id_convert_to_bigint: 0).where(fk_convert_to_bigint: 0).pluck(:id)).to contain_exactly(19)
expect(test_table.all.count).to eq(4)
end
it 'raises error when number of source and target columns does not match' do
columns_to_copy_from = %w[id fk]
columns_to_copy_to = %w[id_convert_to_bigint]
expect do
subject.perform(10, 15, table_name, 'id', sub_batch_size, columns_to_copy_from, columns_to_copy_to)
end.to raise_error(ArgumentError, 'number of source and destination columns must match')
end
it 'tracks timings of queries' do it 'tracks timings of queries' do
expect(subject.batch_metrics.timings).to be_empty expect(subject.batch_metrics.timings).to be_empty
......
...@@ -1730,12 +1730,10 @@ RSpec.describe Gitlab::Database::MigrationHelpers do ...@@ -1730,12 +1730,10 @@ RSpec.describe Gitlab::Database::MigrationHelpers do
end end
end end
context 'when the column to convert does not exist' do context 'when the column to migrate does not exist' do
let(:column) { :foobar }
it 'raises an error' do it 'raises an error' do
expect { model.initialize_conversion_of_integer_to_bigint(table, column) } expect { model.initialize_conversion_of_integer_to_bigint(table, :this_column_is_not_real) }
.to raise_error("Column #{column} does not exist on #{table}") .to raise_error(ArgumentError, "Column this_column_is_not_real does not exist on #{table}")
end end
end end
...@@ -1743,7 +1741,7 @@ RSpec.describe Gitlab::Database::MigrationHelpers do ...@@ -1743,7 +1741,7 @@ RSpec.describe Gitlab::Database::MigrationHelpers do
it 'creates a not-null bigint column and installs triggers' do it 'creates a not-null bigint column and installs triggers' do
expect(model).to receive(:add_column).with(table, tmp_column, :bigint, default: 0, null: false) expect(model).to receive(:add_column).with(table, tmp_column, :bigint, default: 0, null: false)
expect(model).to receive(:install_rename_triggers).with(table, column, tmp_column) expect(model).to receive(:install_rename_triggers).with(table, [column], [tmp_column])
model.initialize_conversion_of_integer_to_bigint(table, column) model.initialize_conversion_of_integer_to_bigint(table, column)
end end
...@@ -1755,7 +1753,7 @@ RSpec.describe Gitlab::Database::MigrationHelpers do ...@@ -1755,7 +1753,7 @@ RSpec.describe Gitlab::Database::MigrationHelpers do
it 'creates a not-null bigint column and installs triggers' do it 'creates a not-null bigint column and installs triggers' do
expect(model).to receive(:add_column).with(table, tmp_column, :bigint, default: 0, null: false) expect(model).to receive(:add_column).with(table, tmp_column, :bigint, default: 0, null: false)
expect(model).to receive(:install_rename_triggers).with(table, column, tmp_column) expect(model).to receive(:install_rename_triggers).with(table, [column], [tmp_column])
model.initialize_conversion_of_integer_to_bigint(table, column) model.initialize_conversion_of_integer_to_bigint(table, column)
end end
...@@ -1767,11 +1765,30 @@ RSpec.describe Gitlab::Database::MigrationHelpers do ...@@ -1767,11 +1765,30 @@ RSpec.describe Gitlab::Database::MigrationHelpers do
it 'creates a nullable bigint column and installs triggers' do it 'creates a nullable bigint column and installs triggers' do
expect(model).to receive(:add_column).with(table, tmp_column, :bigint, default: nil) expect(model).to receive(:add_column).with(table, tmp_column, :bigint, default: nil)
expect(model).to receive(:install_rename_triggers).with(table, column, tmp_column) expect(model).to receive(:install_rename_triggers).with(table, [column], [tmp_column])
model.initialize_conversion_of_integer_to_bigint(table, column) model.initialize_conversion_of_integer_to_bigint(table, column)
end end
end end
context 'when multiple columns are given' do
it 'creates the correct columns and installs the trigger' do
columns_to_convert = %i[id non_nullable_column nullable_column]
temporary_columns = %w[
id_convert_to_bigint
non_nullable_column_convert_to_bigint
nullable_column_convert_to_bigint
]
expect(model).to receive(:add_column).with(table, temporary_columns[0], :bigint, default: 0, null: false)
expect(model).to receive(:add_column).with(table, temporary_columns[1], :bigint, default: 0, null: false)
expect(model).to receive(:add_column).with(table, temporary_columns[2], :bigint, default: nil)
expect(model).to receive(:install_rename_triggers).with(table, columns_to_convert, temporary_columns)
model.initialize_conversion_of_integer_to_bigint(table, columns_to_convert)
end
end
end end
describe '#backfill_conversion_of_integer_to_bigint' do describe '#backfill_conversion_of_integer_to_bigint' do
...@@ -1783,6 +1800,7 @@ RSpec.describe Gitlab::Database::MigrationHelpers do ...@@ -1783,6 +1800,7 @@ RSpec.describe Gitlab::Database::MigrationHelpers do
model.create_table table, id: false do |t| model.create_table table, id: false do |t|
t.integer :id, primary_key: true t.integer :id, primary_key: true
t.text :message, null: false t.text :message, null: false
t.integer :other_id
t.timestamps t.timestamps
end end
...@@ -1808,14 +1826,14 @@ RSpec.describe Gitlab::Database::MigrationHelpers do ...@@ -1808,14 +1826,14 @@ RSpec.describe Gitlab::Database::MigrationHelpers do
it 'raises an error' do it 'raises an error' do
expect { model.backfill_conversion_of_integer_to_bigint(table, column) } expect { model.backfill_conversion_of_integer_to_bigint(table, column) }
.to raise_error("Column #{column} does not exist on #{table}") .to raise_error(ArgumentError, "Column #{column} does not exist on #{table}")
end end
end end
context 'when the temporary column does not exist' do context 'when the temporary column does not exist' do
it 'raises an error' do it 'raises an error' do
expect { model.backfill_conversion_of_integer_to_bigint(table, column) } expect { model.backfill_conversion_of_integer_to_bigint(table, column) }
.to raise_error('The temporary column does not exist, initialize it with `initialize_conversion_of_integer_to_bigint`') .to raise_error(ArgumentError, "Column #{tmp_column} does not exist on #{table}")
end end
end end
...@@ -1829,12 +1847,15 @@ RSpec.describe Gitlab::Database::MigrationHelpers do ...@@ -1829,12 +1847,15 @@ RSpec.describe Gitlab::Database::MigrationHelpers do
let(:migration_relation) { Gitlab::Database::BackgroundMigration::BatchedMigration.active } let(:migration_relation) { Gitlab::Database::BackgroundMigration::BatchedMigration.active }
before do before do
model.initialize_conversion_of_integer_to_bigint(table, column) model.initialize_conversion_of_integer_to_bigint(table, columns)
model_class.create!(message: 'hello') model_class.create!(message: 'hello')
model_class.create!(message: 'so long') model_class.create!(message: 'so long')
end end
context 'when a single column is being converted' do
let(:columns) { column }
it 'creates the batched migration tracking record' do it 'creates the batched migration tracking record' do
last_record = model_class.create!(message: 'goodbye') last_record = model_class.create!(message: 'goodbye')
...@@ -1851,11 +1872,40 @@ RSpec.describe Gitlab::Database::MigrationHelpers do ...@@ -1851,11 +1872,40 @@ RSpec.describe Gitlab::Database::MigrationHelpers do
interval: 120, interval: 120,
batch_size: 2, batch_size: 2,
sub_batch_size: 1, sub_batch_size: 1,
job_arguments: [column.to_s, "#{column}_convert_to_bigint"] job_arguments: [[column.to_s], ["#{column}_convert_to_bigint"]]
) )
end end
end
context 'when multiple columns are being converted' do
let(:other_column) { :other_id }
let(:other_tmp_column) { "#{other_column}_convert_to_bigint" }
let(:columns) { [column, other_column] }
it 'creates the batched migration tracking record' do
last_record = model_class.create!(message: 'goodbye', other_id: 50)
expect do
model.backfill_conversion_of_integer_to_bigint(table, columns, batch_size: 2, sub_batch_size: 1)
end.to change { migration_relation.count }.by(1)
expect(migration_relation.last).to have_attributes(
job_class_name: 'CopyColumnUsingBackgroundMigrationJob',
table_name: table.to_s,
column_name: column.to_s,
min_value: 1,
max_value: last_record.id,
interval: 120,
batch_size: 2,
sub_batch_size: 1,
job_arguments: [[column.to_s, other_column.to_s], [tmp_column, other_tmp_column]]
)
end
end
context 'when the migration should be performed inline' do context 'when the migration should be performed inline' do
let(:columns) { column }
it 'calls the runner to run the entire migration' do it 'calls the runner to run the entire migration' do
expect(model).to receive(:perform_background_migration_inline?).and_return(true) expect(model).to receive(:perform_background_migration_inline?).and_return(true)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment