Commit cd3e0a5c authored by Grzegorz Bizon's avatar Grzegorz Bizon Committed by Fabio Pitino

Add new migration helpers for data migration in batches

parent d710a04b
# frozen_string_literal: true # frozen_string_literal: true
class CleanUpPendingBuildsTable < ActiveRecord::Migration[6.0] class CleanUpPendingBuildsTable < ActiveRecord::Migration[6.0]
include ::Gitlab::Database::DynamicModelHelpers
BATCH_SIZE = 1000 BATCH_SIZE = 1000
disable_ddl_transaction! disable_ddl_transaction!
...@@ -8,7 +10,7 @@ class CleanUpPendingBuildsTable < ActiveRecord::Migration[6.0] ...@@ -8,7 +10,7 @@ class CleanUpPendingBuildsTable < ActiveRecord::Migration[6.0]
def up def up
return unless Gitlab.dev_or_test_env? || Gitlab.com? return unless Gitlab.dev_or_test_env? || Gitlab.com?
each_batch('ci_pending_builds', of: BATCH_SIZE) do |min, max| each_batch_range('ci_pending_builds', of: BATCH_SIZE) do |min, max|
execute <<~SQL execute <<~SQL
DELETE FROM ci_pending_builds DELETE FROM ci_pending_builds
USING ci_builds USING ci_builds
...@@ -23,19 +25,4 @@ class CleanUpPendingBuildsTable < ActiveRecord::Migration[6.0] ...@@ -23,19 +25,4 @@ class CleanUpPendingBuildsTable < ActiveRecord::Migration[6.0]
def down def down
# noop # noop
end end
private
def each_batch(table_name, scope: ->(table) { table.all }, of: 1000)
table = Class.new(ActiveRecord::Base) do
include EachBatch
self.table_name = table_name
self.inheritance_column = :_type_disabled
end
scope.call(table).each_batch(of: of) do |batch|
yield batch.pluck('MIN(id), MAX(id)').first
end
end
end end
...@@ -11,6 +11,25 @@ module Gitlab ...@@ -11,6 +11,25 @@ module Gitlab
self.inheritance_column = :_type_disabled self.inheritance_column = :_type_disabled
end end
end end
def each_batch(table_name, scope: ->(table) { table.all }, of: 1000)
if transaction_open?
raise <<~MSG.squish
each_batch should not run inside a transaction, you can disable
transactions by calling disable_ddl_transaction! in the body of
your migration class
MSG
end
scope.call(define_batchable_model(table_name))
.each_batch(of: of) { |batch| yield batch }
end
def each_batch_range(table_name, scope: ->(table) { table.all }, of: 1000)
each_batch(table_name, scope: scope, of: of) do |batch|
yield batch.pluck('MIN(id), MAX(id)').first
end
end
end end
end end
end end
...@@ -3,12 +3,12 @@ ...@@ -3,12 +3,12 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Gitlab::Database::DynamicModelHelpers do RSpec.describe Gitlab::Database::DynamicModelHelpers do
let(:including_class) { Class.new.include(described_class) }
let(:table_name) { 'projects' }
describe '#define_batchable_model' do describe '#define_batchable_model' do
subject { including_class.new.define_batchable_model(table_name) } subject { including_class.new.define_batchable_model(table_name) }
let(:including_class) { Class.new.include(described_class) }
let(:table_name) { 'projects' }
it 'is an ActiveRecord model' do it 'is an ActiveRecord model' do
expect(subject.ancestors).to include(ActiveRecord::Base) expect(subject.ancestors).to include(ActiveRecord::Base)
end end
...@@ -25,4 +25,86 @@ RSpec.describe Gitlab::Database::DynamicModelHelpers do ...@@ -25,4 +25,86 @@ RSpec.describe Gitlab::Database::DynamicModelHelpers do
expect(subject.inheritance_column).to eq('_type_disabled') expect(subject.inheritance_column).to eq('_type_disabled')
end end
end end
describe '#each_batch' do
subject { including_class.new }
before do
create_list(:project, 2)
end
context 'when no transaction is open' do
before do
allow(subject).to receive(:transaction_open?).and_return(false)
end
it 'iterates table in batches' do
each_batch_size = ->(&block) do
subject.each_batch(table_name, of: 1) do |batch|
block.call(batch.size)
end
end
expect { |b| each_batch_size.call(&b) }
.to yield_successive_args(1, 1)
end
end
context 'when transaction is open' do
before do
allow(subject).to receive(:transaction_open?).and_return(true)
end
it 'raises an error' do
expect { subject.each_batch(table_name, of: 1) { |batch| batch.size } }
.to raise_error(RuntimeError, /each_batch should not run inside a transaction/)
end
end
end
describe '#each_batch_range' do
subject { including_class.new }
let(:first_project) { create(:project) }
let(:second_project) { create(:project) }
context 'when no transaction is open' do
before do
allow(subject).to receive(:transaction_open?).and_return(false)
end
it 'iterates table in batch ranges' do
expect { |b| subject.each_batch_range(table_name, of: 1, &b) }
.to yield_successive_args(
[first_project.id, first_project.id],
[second_project.id, second_project.id]
)
end
it 'yields only one batch if bigger than the table size' do
expect { |b| subject.each_batch_range(table_name, of: 2, &b) }
.to yield_successive_args([first_project.id, second_project.id])
end
it 'makes it possible to apply a scope' do
each_batch_limited = ->(&b) do
subject.each_batch_range(table_name, scope: ->(table) { table.limit(1) }, of: 1, &b)
end
expect { |b| each_batch_limited.call(&b) }
.to yield_successive_args([first_project.id, first_project.id])
end
end
context 'when transaction is open' do
before do
allow(subject).to receive(:transaction_open?).and_return(true)
end
it 'raises an error' do
expect { subject.each_batch_range(table_name, of: 1) { 1 } }
.to raise_error(RuntimeError, /each_batch should not run inside a transaction/)
end
end
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment