Commit e036a374 authored by Grzegorz Bizon's avatar Grzegorz Bizon

Add walk_table_in_batches and refactor migration helpers

parent 24a4199a
...@@ -221,17 +221,19 @@ module Gitlab ...@@ -221,17 +221,19 @@ module Gitlab
# make things _more_ complex). # make things _more_ complex).
# #
# rubocop: disable Metrics/AbcSize # rubocop: disable Metrics/AbcSize
def update_column_in_batches(table, column, value) def update_column_in_batches(table, column, value, &scope)
if transaction_open? if transaction_open?
raise 'update_column_in_batches can not be run inside a transaction, ' \ raise <<-MSG
'you can disable transactions by calling disable_ddl_transaction! ' \ update_column_in_batches helper can not be run inside a transaction.
'in the body of your migration class' You can disable transactions by calling `disable_ddl_transaction!`
method in the body of your migration class.
MSG
end end
table = Arel::Table.new(table) table_arel = Arel::Table.new(table)
count_arel = table.project(Arel.star.count.as('count')) count_arel = table_arel.project(Arel.star.count.as('count'))
count_arel = yield table, count_arel if block_given? count_arel = yield table_arel, count_arel if block_given?
total = exec_query(count_arel.to_sql).to_hash.first['count'].to_i total = exec_query(count_arel.to_sql).to_hash.first['count'].to_i
...@@ -246,37 +248,56 @@ module Gitlab ...@@ -246,37 +248,56 @@ module Gitlab
# rows for GitLab.com. # rows for GitLab.com.
batch_size = max_size if batch_size > max_size batch_size = max_size if batch_size > max_size
walk_table_in_batches(table, of: batch_size, scope: scope) do
Arel::UpdateManager.new(ActiveRecord::Base)
.table(table_arel)
.set([[table_arel[column], value]])
end
end
def walk_table_in_batches(table, of: 1000, scope: nil)
if transaction_open?
raise <<-MSG
walk_table_in_batches helper can not be run inside a transaction.
You can disable transactions by calling `disable_ddl_transaction!`
method in the body of your migration class.
MSG
end
table = Arel::Table.new(table)
start_arel = table.project(table[:id]).order(table[:id].asc).take(1) start_arel = table.project(table[:id]).order(table[:id].asc).take(1)
start_arel = yield table, start_arel if block_given? start_arel = scope.call(table, start_arel) if scope
start_id = exec_query(start_arel.to_sql).to_hash.first['id'].to_i start_id = exec_query(start_arel.to_sql).to_hash.first.to_h['id'].to_i
loop do 1.step do |batch|
stop_arel = table.project(table[:id]) stop_arel = table.project(table[:id])
.where(table[:id].gteq(start_id)) .where(table[:id].gteq(start_id))
.order(table[:id].asc) .order(table[:id].asc)
.take(1) .take(1)
.skip(batch_size) .skip(of)
stop_arel = yield table, stop_arel if block_given?
stop_row = exec_query(stop_arel.to_sql).to_hash.first
update_arel = Arel::UpdateManager.new(ActiveRecord::Base) stop_arel = scope.call(table, stop_arel) if scope
.table(table) stop_id = exec_query(stop_arel.to_sql)
.set([[table[column], value]]) .to_hash.first.to_h['id'].to_i
.where(table[:id].gteq(start_id))
if stop_row action = yield(batch, start_id, stop_id)
stop_id = stop_row['id'].to_i
start_id = stop_id
update_arel = update_arel.where(table[:id].lt(stop_id))
end
update_arel = yield table, update_arel if block_given? if action.is_a?(Arel::TreeManager)
exec_arel = action.where(table[:id].gteq(start_id))
exec_arel = exec_arel.where(table[:id].lt(stop_id)) if stop_id.nonzero?
exec_arel = scope.call(table, exec_arel) if scope
execute(update_arel.to_sql) execute(exec_arel.to_sql)
end
# There are no more rows left to update. if stop_id.zero?
break unless stop_row # there are no more rows left to update
break
else
# next loop
start_id = stop_id
end
end end
end end
......
...@@ -2,9 +2,7 @@ require 'spec_helper' ...@@ -2,9 +2,7 @@ require 'spec_helper'
describe Gitlab::Database::MigrationHelpers, lib: true do describe Gitlab::Database::MigrationHelpers, lib: true do
let(:model) do let(:model) do
ActiveRecord::Migration.new.extend( ActiveRecord::Migration.new.extend(described_class)
Gitlab::Database::MigrationHelpers
)
end end
before do before do
...@@ -264,7 +262,7 @@ describe Gitlab::Database::MigrationHelpers, lib: true do ...@@ -264,7 +262,7 @@ describe Gitlab::Database::MigrationHelpers, lib: true do
describe '#update_column_in_batches' do describe '#update_column_in_batches' do
context 'when running outside of a transaction' do context 'when running outside of a transaction' do
before do before do
expect(model).to receive(:transaction_open?).and_return(false) expect(model).to receive(:transaction_open?).twice.and_return(false)
create_list(:empty_project, 5) create_list(:empty_project, 5)
end end
...@@ -313,6 +311,53 @@ describe Gitlab::Database::MigrationHelpers, lib: true do ...@@ -313,6 +311,53 @@ describe Gitlab::Database::MigrationHelpers, lib: true do
end end
end end
describe '#walk_table_in_batches' do
context 'when running outside of a transaction' do
before do
expect(model).to receive(:transaction_open?).and_return(false)
create_list(:empty_project, 6)
end
it 'yields for each batch' do
expect { |b| model.walk_table_in_batches(:projects, of: 2, &b) }
.to yield_control.exactly(3).times
end
it 'yields successive ranges' do
expect { |b| model.walk_table_in_batches(:projects, of: 2, &b) }
.to yield_successive_args([1, Integer, Integer],
[2, Integer, Integer],
[3, Integer, 0])
end
context 'when a scope is provided' do
it 'limits the scope of the statement provided inside the block' do
first_id = Project.first.id
scope = ->(table, query) { query.where(table[:id].eq(first_id)) }
model.walk_table_in_batches(:projects, scope: scope) do
Arel::UpdateManager.new(ActiveRecord::Base)
.table(Arel::Table.new(:projects))
.set([[Arel::Table.new(:projects)[:archived], true]])
end
expect(Project.where(archived: true).count).to eq(1)
end
end
end
context 'when running inside the transaction' do
it 'raises RuntimeError' do
expect(model).to receive(:transaction_open?).and_return(true)
expect do
model.update_column_in_batches(:projects, :star_count, Arel.sql('1+1'))
end.to raise_error(RuntimeError)
end
end
end
describe '#add_column_with_default' do describe '#add_column_with_default' do
context 'outside of a transaction' do context 'outside of a transaction' do
context 'when a column limit is not set' do context 'when a column limit is not set' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment