Commit e8670990 authored by Yorick Peterse's avatar Yorick Peterse

Added helper methods for database migrations

These helpers can be used to perform migrations without taking down the
entire application.

For example, the method "add_column_with_default" can be used to add a
new column with a default value without locking the entire table.
parent 3dc64764
module Gitlab
module Database
module MigrationHelpers
# Creates a new index, concurrently when supported
#
# On PostgreSQL this method creates an index concurrently, on MySQL this
# creates a regular index.
#
# Example:
#
# add_concurrent_index :users, :some_column
#
# See Rails' `add_index` for more info on the available arguments.
def add_concurrent_index(*args)
if transaction_open?
raise 'add_concurrent_index can not be run inside a transaction, ' \
'you can disable transactions by calling disable_ddl_transaction! ' \
'in the body of your migration class'
end
if Database.postgresql?
args << { algorithm: :concurrently }
end
add_index(*args)
end
# Updates the value of a column in batches.
#
# This method updates the table in batches of 5% of the total row count.
# Any data inserted while running this method (or after it has finished
# running) is _not_ updated automatically.
#
# This method _only_ updates rows where the column's value is set to NULL.
#
# table - The name of the table.
# column - The name of the column to update.
# value - The value for the column.
def update_column_in_batches(table, column, value)
quoted_table = quote_table_name(table)
quoted_column = quote_column_name(column)
quoted_value = quote(value)
processed = 0
total = exec_query("SELECT COUNT(*) AS count FROM #{quoted_table}").
to_hash.
first['count'].
to_i
# Update in batches of 5% with an upper limit of 5000 rows.
batch_size = ((total / 100.0) * 5.0).ceil
while processed < total
start_row = exec_query(%Q{
SELECT id
FROM #{quoted_table}
ORDER BY id ASC
LIMIT 1 OFFSET #{processed}
}).to_hash.first
stop_row = exec_query(%Q{
SELECT id
FROM #{quoted_table}
ORDER BY id ASC
LIMIT 1 OFFSET #{processed + batch_size}
}.strip_heredoc).to_hash.first
query = %Q{
UPDATE #{quoted_table}
SET #{quoted_column} = #{quoted_value}
WHERE id >= #{start_row['id']}
}
if stop_row
query += " AND id < #{stop_row['id']}"
end
execute(query.strip_heredoc)
processed += batch_size
end
end
# Adds a column with a default value without locking an entire table.
#
# This method runs the following steps:
#
# 1. Add the column with a default value of NULL.
# 2. Update all existing rows in batches.
# 3. Change the default value of the column to the specified value.
# 4. Update any remaining rows.
#
# These steps ensure a column can be added to a large and commonly used
# table without locking the entire table for the duration of the table
# modification.
#
# table - The name of the table to update.
# column - The name of the column to add.
# type - The column type (e.g. `:integer`).
# default - The default value for the column.
# allow_null - When set to `true` the column will allow NULL values, the
# default is to not allow NULL values.
def add_column_with_default(table, column, type, default:, allow_null: false)
if transaction_open?
raise 'add_column_with_default can not be run inside a transaction, ' \
'you can disable transactions by calling disable_ddl_transaction! ' \
'in the body of your migration class'
end
transaction do
add_column(table, column, type, default: nil)
# Changing the default before the update ensures any newly inserted
# rows already use the proper default value.
change_column_default(table, column, default)
end
begin
transaction do
update_column_in_batches(table, column, default)
end
rescue Exception => error
remove_column(table, column)
raise error
end
change_column_null(table, column, false) unless allow_null
end
end
end
end
require 'spec_helper'
describe Gitlab::Database::MigrationHelpers, lib: true do
let(:model) do
Class.new do
include Gitlab::Database::MigrationHelpers
def method_missing(name, *args, &block)
ActiveRecord::Base.connection.send(name, *args, &block)
end
end.new
end
describe '#add_concurrent_index' do
context 'outside a transaction' do
before do
expect(model).to receive(:transaction_open?).and_return(false)
end
context 'using PostgreSQL' do
it 'creates the index concurrently' do
expect(Gitlab::Database).to receive(:postgresql?).and_return(true)
expect(model).to receive(:add_index).
with(:users, :foo, algorithm: :concurrently)
model.add_concurrent_index(:users, :foo)
end
end
context 'using MySQL' do
it 'creates a regular index' do
expect(Gitlab::Database).to receive(:postgresql?).and_return(false)
expect(model).to receive(:add_index).
with(:users, :foo)
model.add_concurrent_index(:users, :foo)
end
end
end
context 'inside a transaction' do
it 'raises RuntimeError' do
expect(model).to receive(:transaction_open?).and_return(true)
expect { model.add_concurrent_index(:users, :foo) }.
to raise_error(RuntimeError)
end
end
end
describe '#update_column_in_batches' do
before do
create_list(:empty_project, 5)
end
it 'updates all the rows in a table' do
model.update_column_in_batches(:projects, :import_error, 'foo')
expect(Project.where(import_error: 'foo').count).to eq(5)
end
end
describe '#add_column_with_default' do
context 'outside of a transaction' do
before do
expect(model).to receive(:transaction_open?).and_return(false)
expect(model).to receive(:transaction).twice.and_yield
expect(model).to receive(:add_column).
with(:projects, :foo, :integer, default: nil)
expect(model).to receive(:change_column_default).
with(:projects, :foo, 10)
end
it 'adds the column while allowing NULL values' do
expect(model).to receive(:update_column_in_batches).
with(:projects, :foo, 10)
expect(model).not_to receive(:change_column_null)
model.add_column_with_default(:projects, :foo, :integer,
default: 10,
allow_null: true)
end
it 'adds the column while not allowing NULL values' do
expect(model).to receive(:update_column_in_batches).
with(:projects, :foo, 10)
expect(model).to receive(:change_column_null).
with(:projects, :foo, false)
model.add_column_with_default(:projects, :foo, :integer, default: 10)
end
it 'removes the added column whenever updating the rows fails' do
expect(model).to receive(:update_column_in_batches).
with(:projects, :foo, 10).
and_raise(RuntimeError)
expect(model).to receive(:remove_column).
with(:projects, :foo)
expect {
model.add_column_with_default(:projects, :foo, :integer, default: 10)
}.to raise_error(RuntimeError)
end
end
context 'inside a transaction' do
it 'raises RuntimeError' do
expect(model).to receive(:transaction_open?).and_return(true)
expect {
model.add_column_with_default(:projects, :foo, :integer, default: 10)
}.to raise_error(RuntimeError)
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment