Commit dc96eabb authored by Andreas Brandl's avatar Andreas Brandl

Merge branch 'pb-remove-old-bg-migration-method' into 'master'

Remove bulk_queue_bg_migration_jobs_by_range

See merge request gitlab-org/gitlab!75274
parents 4fe026d0 e72b6cb3
...@@ -7,54 +7,6 @@ module Gitlab ...@@ -7,54 +7,6 @@ module Gitlab
BATCH_SIZE = 1_000 # Number of rows to process per job BATCH_SIZE = 1_000 # Number of rows to process per job
JOB_BUFFER_SIZE = 1_000 # Number of jobs to bulk queue at a time JOB_BUFFER_SIZE = 1_000 # Number of jobs to bulk queue at a time
# Bulk queues background migration jobs for an entire table, batched by ID range.
# "Bulk" meaning many jobs will be pushed at a time for efficiency.
# If you need a delay interval per job, then use `queue_background_migration_jobs_by_range_at_intervals`.
#
# model_class - The table being iterated over
# job_class_name - The background migration job class as a string
# batch_size - The maximum number of rows per job
#
# Example:
#
# class Route < ActiveRecord::Base
# include EachBatch
# self.table_name = 'routes'
# end
#
# bulk_queue_background_migration_jobs_by_range(Route, 'ProcessRoutes')
#
# Where the model_class includes EachBatch, and the background migration exists:
#
# class Gitlab::BackgroundMigration::ProcessRoutes
# def perform(start_id, end_id)
# # do something
# end
# end
def bulk_queue_background_migration_jobs_by_range(model_class, job_class_name, batch_size: BATCH_SIZE)
raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id')
jobs = []
table_name = model_class.quoted_table_name
model_class.each_batch(of: batch_size) do |relation|
start_id, end_id = relation.pluck("MIN(#{table_name}.id)", "MAX(#{table_name}.id)").first
if jobs.length >= JOB_BUFFER_SIZE
# Note: This code path generally only helps with many millions of rows
# We push multiple jobs at a time to reduce the time spent in
# Sidekiq/Redis operations. We're using this buffer based approach so we
# don't need to run additional queries for every range.
bulk_migrate_async(jobs)
jobs.clear
end
jobs << [job_class_name, [start_id, end_id]]
end
bulk_migrate_async(jobs) unless jobs.empty?
end
# Queues background migration jobs for an entire table in batches. # Queues background migration jobs for an entire table in batches.
# The default batching column used is the standard primary key `id`. # The default batching column used is the standard primary key `id`.
# Each job is scheduled with a `delay_interval` in between. # Each job is scheduled with a `delay_interval` in between.
......
...@@ -7,78 +7,6 @@ RSpec.describe Gitlab::Database::Migrations::BackgroundMigrationHelpers do ...@@ -7,78 +7,6 @@ RSpec.describe Gitlab::Database::Migrations::BackgroundMigrationHelpers do
ActiveRecord::Migration.new.extend(described_class) ActiveRecord::Migration.new.extend(described_class)
end end
describe '#bulk_queue_background_migration_jobs_by_range' do
context 'when the model has an ID column' do
let!(:id1) { create(:user).id }
let!(:id2) { create(:user).id }
let!(:id3) { create(:user).id }
before do
User.class_eval do
include EachBatch
end
end
context 'with enough rows to bulk queue jobs more than once' do
before do
stub_const('Gitlab::Database::Migrations::BackgroundMigrationHelpers::JOB_BUFFER_SIZE', 1)
end
it 'queues jobs correctly' do
Sidekiq::Testing.fake! do
model.bulk_queue_background_migration_jobs_by_range(User, 'FooJob', batch_size: 2)
expect(BackgroundMigrationWorker.jobs[0]['args']).to eq(['FooJob', [id1, id2]])
expect(BackgroundMigrationWorker.jobs[1]['args']).to eq(['FooJob', [id3, id3]])
end
end
it 'queues jobs in groups of buffer size 1' do
expect(BackgroundMigrationWorker).to receive(:bulk_perform_async).with([['FooJob', [id1, id2]]])
expect(BackgroundMigrationWorker).to receive(:bulk_perform_async).with([['FooJob', [id3, id3]]])
model.bulk_queue_background_migration_jobs_by_range(User, 'FooJob', batch_size: 2)
end
end
context 'with not enough rows to bulk queue jobs more than once' do
it 'queues jobs correctly' do
Sidekiq::Testing.fake! do
model.bulk_queue_background_migration_jobs_by_range(User, 'FooJob', batch_size: 2)
expect(BackgroundMigrationWorker.jobs[0]['args']).to eq(['FooJob', [id1, id2]])
expect(BackgroundMigrationWorker.jobs[1]['args']).to eq(['FooJob', [id3, id3]])
end
end
it 'queues jobs in bulk all at once (big buffer size)' do
expect(BackgroundMigrationWorker).to receive(:bulk_perform_async).with([['FooJob', [id1, id2]],
['FooJob', [id3, id3]]])
model.bulk_queue_background_migration_jobs_by_range(User, 'FooJob', batch_size: 2)
end
end
context 'without specifying batch_size' do
it 'queues jobs correctly' do
Sidekiq::Testing.fake! do
model.bulk_queue_background_migration_jobs_by_range(User, 'FooJob')
expect(BackgroundMigrationWorker.jobs[0]['args']).to eq(['FooJob', [id1, id3]])
end
end
end
end
context "when the model doesn't have an ID column" do
it 'raises error (for now)' do
expect do
model.bulk_queue_background_migration_jobs_by_range(ProjectAuthorization, 'FooJob')
end.to raise_error(StandardError, /does not have an ID/)
end
end
end
describe '#queue_background_migration_jobs_by_range_at_intervals' do describe '#queue_background_migration_jobs_by_range_at_intervals' do
context 'when the model has an ID column' do context 'when the model has an ID column' do
let!(:id1) { create(:user).id } let!(:id1) { create(:user).id }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment