Commit 80a7be87 authored by Oswaldo Ferreira's avatar Oswaldo Ferreira

Schedule batches in bulks of 5 diffs

Issuing 6M writings in a N+1 manner in Redis takes time, 3 hours to be precise. This commit makes it schedule 5 jobs at a time, what should make it schedule every job in approximately 40 minutes
parent 3acf7ba9
...@@ -12,7 +12,7 @@ class EnqueueDeleteDiffFilesWorkers < ActiveRecord::Migration ...@@ -12,7 +12,7 @@ class EnqueueDeleteDiffFilesWorkers < ActiveRecord::Migration
DOWNTIME = false DOWNTIME = false
BATCH_SIZE = 1000 BATCH_SIZE = 1000
MIGRATION = 'DeleteDiffFiles' MIGRATION = 'DeleteDiffFiles'
DELAY_INTERVAL = 8.minutes DELAY_INTERVAL = 10.minutes
TMP_INDEX = 'tmp_partial_diff_id_with_files_index'.freeze TMP_INDEX = 'tmp_partial_diff_id_with_files_index'.freeze
disable_ddl_transaction! disable_ddl_transaction!
...@@ -39,20 +39,21 @@ class EnqueueDeleteDiffFilesWorkers < ActiveRecord::Migration ...@@ -39,20 +39,21 @@ class EnqueueDeleteDiffFilesWorkers < ActiveRecord::Migration
# Execution time: 12.430 ms # Execution time: 12.430 ms
# #
diffs_with_files.each_batch(of: BATCH_SIZE) do |relation, outer_index| diffs_with_files.each_batch(of: BATCH_SIZE) do |relation, outer_index|
ids = relation.pluck(:id) # We slice the batches in groups of 5 and schedule each group of 5 at
# once. This should make writings on Redis go 5x faster.
job_batches = relation.pluck(:id).in_groups_of(5, false).map do |ids|
ids.map { |id| [MIGRATION, [id]] }
end
ids.each_with_index do |diff_id, inner_index| job_batches.each_with_index do |jobs, inner_index|
# This will give some space between batches of workers. # This will give some space between batches of workers.
interval = DELAY_INTERVAL * outer_index + inner_index.minutes interval = DELAY_INTERVAL * outer_index + inner_index.minutes
# A single `merge_request_diff` can be associated with way too many # A single `merge_request_diff` can be associated with way too many
# `merge_request_diff_files`. It's better to avoid batching these and # `merge_request_diff_files`. It's better to avoid scheduling big
# schedule one at a time. # batches and go with 5 at a time.
#
# Considering roughly 6M jobs, this should take ~30 days to process all
# of them.
# #
BackgroundMigrationWorker.perform_in(interval, MIGRATION, [diff_id]) BackgroundMigrationWorker.bulk_perform_in(interval, jobs)
end end
end end
......
...@@ -8,7 +8,7 @@ describe EnqueueDeleteDiffFilesWorkers, :migration, :sidekiq do ...@@ -8,7 +8,7 @@ describe EnqueueDeleteDiffFilesWorkers, :migration, :sidekiq do
let(:projects) { table(:projects) } let(:projects) { table(:projects) }
before do before do
stub_const("#{described_class.name}::BATCH_SIZE", 2) stub_const("#{described_class.name}::BATCH_SIZE", 7)
namespaces.create!(id: 1, name: 'gitlab', path: 'gitlab') namespaces.create!(id: 1, name: 'gitlab', path: 'gitlab')
projects.create!(id: 1, namespace_id: 1, name: 'gitlab', path: 'gitlab') projects.create!(id: 1, namespace_id: 1, name: 'gitlab', path: 'gitlab')
...@@ -21,6 +21,10 @@ describe EnqueueDeleteDiffFilesWorkers, :migration, :sidekiq do ...@@ -21,6 +21,10 @@ describe EnqueueDeleteDiffFilesWorkers, :migration, :sidekiq do
merge_request_diffs.create!(id: 4, merge_request_id: 1, state: 'collected') merge_request_diffs.create!(id: 4, merge_request_id: 1, state: 'collected')
merge_request_diffs.create!(id: 5, merge_request_id: 1, state: 'empty') merge_request_diffs.create!(id: 5, merge_request_id: 1, state: 'empty')
merge_request_diffs.create!(id: 6, merge_request_id: 1, state: 'collected') merge_request_diffs.create!(id: 6, merge_request_id: 1, state: 'collected')
merge_request_diffs.create!(id: 7, merge_request_id: 1, state: 'collected')
merge_request_diffs.create!(id: 8, merge_request_id: 1, state: 'collected')
merge_request_diffs.create!(id: 9, merge_request_id: 1, state: 'collected')
merge_request_diffs.create!(id: 10, merge_request_id: 1, state: 'collected')
merge_requests.update(1, latest_merge_request_diff_id: 6) merge_requests.update(1, latest_merge_request_diff_id: 6)
end end
...@@ -30,13 +34,17 @@ describe EnqueueDeleteDiffFilesWorkers, :migration, :sidekiq do ...@@ -30,13 +34,17 @@ describe EnqueueDeleteDiffFilesWorkers, :migration, :sidekiq do
Timecop.freeze do Timecop.freeze do
migrate! migrate!
# 1st batch # 1st batch schedule
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(8.minutes, 1) [1, 3, 4, 6, 7].each do |id|
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(9.minutes, 3) expect(described_class::MIGRATION).to be_scheduled_delayed_migration(10.minutes, id)
# 2nd batch end
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(16.minutes, 4) [8, 9].each do |id|
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(17.minutes, 6) expect(described_class::MIGRATION).to be_scheduled_delayed_migration(11.minutes, id)
expect(BackgroundMigrationWorker.jobs.size).to eq(4) end
# 2nd batch schedule
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(20.minutes, 10)
expect(BackgroundMigrationWorker.jobs.size).to eq(8)
end end
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment