Commit 659aeba4 authored by Oswaldo Ferreira's avatar Oswaldo Ferreira

Use schedulers and delete diff files upon deadtuples check

parent c15f836c
...@@ -2,7 +2,7 @@ class EnqueueDeleteDiffFilesWorkers < ActiveRecord::Migration ...@@ -2,7 +2,7 @@ class EnqueueDeleteDiffFilesWorkers < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers include Gitlab::Database::MigrationHelpers
DOWNTIME = false DOWNTIME = false
MIGRATION = 'DeleteDiffFiles'.freeze SCHEDULER = 'ScheduleDiffFilesDeletion'.freeze
TMP_INDEX = 'tmp_partial_diff_id_with_files_index'.freeze TMP_INDEX = 'tmp_partial_diff_id_with_files_index'.freeze
disable_ddl_transaction! disable_ddl_transaction!
...@@ -12,7 +12,7 @@ class EnqueueDeleteDiffFilesWorkers < ActiveRecord::Migration ...@@ -12,7 +12,7 @@ class EnqueueDeleteDiffFilesWorkers < ActiveRecord::Migration
add_concurrent_index(:merge_request_diffs, :id, where: "(state NOT IN ('without_files', 'empty'))", name: TMP_INDEX) add_concurrent_index(:merge_request_diffs, :id, where: "(state NOT IN ('without_files', 'empty'))", name: TMP_INDEX)
end end
BackgroundMigrationWorker.perform_async(MIGRATION) BackgroundMigrationWorker.perform_async(SCHEDULER)
# We don't remove the index since it's going to be used on DeleteDiffFiles # We don't remove the index since it's going to be used on DeleteDiffFiles
# worker. We should remove it in an upcoming release. # worker. We should remove it in an upcoming release.
......
...@@ -15,49 +15,37 @@ module Gitlab ...@@ -15,49 +15,37 @@ module Gitlab
self.table_name = 'merge_request_diff_files' self.table_name = 'merge_request_diff_files'
end end
DIFF_ROWS_LIMIT = 5_000
DEAD_TUPLES_THRESHOLD = 50_000 DEAD_TUPLES_THRESHOLD = 50_000
VACUUM_WAIT_TIME = 5.minutes VACUUM_WAIT_TIME = 5.minutes
def perform def perform(ids)
rescheduling do @ids = ids
prune_diff_files(diffs_collection.limit(DIFF_ROWS_LIMIT))
end
end
def should_wait_deadtuple_vacuum?
return false unless Gitlab::Database.postgresql?
diff_files_dead_tuples_count >= DEAD_TUPLES_THRESHOLD
end
private
def rescheduling(&block)
# We should reschedule until deadtuples get in a desirable # We should reschedule until deadtuples get in a desirable
# state (e.g. < 50_000). That may take move than one reschedule. # state (e.g. < 50_000). That may take more than one reschedule.
# #
if should_wait_deadtuple_vacuum? if should_wait_deadtuple_vacuum?
reschedule reschedule
return return
end end
block.call prune_diff_files
end
def should_wait_deadtuple_vacuum?
return false unless Gitlab::Database.postgresql?
reschedule if diffs_collection.limit(1).count > 0 diff_files_dead_tuples_count >= DEAD_TUPLES_THRESHOLD
end end
private
def reschedule def reschedule
BackgroundMigrationWorker.perform_in(VACUUM_WAIT_TIME, self.class.name.demodulize) BackgroundMigrationWorker.perform_in(VACUUM_WAIT_TIME, self.class.name.demodulize, [@ids])
end end
def diffs_collection def diffs_collection
MergeRequestDiff MergeRequestDiff.where(id: @ids)
.joins(:merge_request)
.where("merge_requests.state = 'merged'")
.where('merge_requests.latest_merge_request_diff_id IS NOT NULL')
.where('merge_requests.latest_merge_request_diff_id != merge_request_diffs.id')
.where("merge_request_diffs.state NOT IN ('without_files', 'empty')")
end end
def diff_files_dead_tuples_count def diff_files_dead_tuples_count
...@@ -68,17 +56,13 @@ module Gitlab ...@@ -68,17 +56,13 @@ module Gitlab
dead_tuple&.fetch('n_dead_tup', 0).to_i dead_tuple&.fetch('n_dead_tup', 0).to_i
end end
def prune_diff_files(batch) def prune_diff_files
diff_ids = batch.pluck(:id)
removed = 0 removed = 0
updated = 0 updated = 0
MergeRequestDiff.transaction do MergeRequestDiff.transaction do
updated = MergeRequestDiff.where(id: diff_ids) updated = diffs_collection.update_all(state: 'without_files')
.update_all(state: 'without_files') removed = MergeRequestDiffFile.where(merge_request_diff_id: @ids).delete_all
removed = MergeRequestDiffFile.where(merge_request_diff_id: diff_ids)
.delete_all
end end
log_info("Removed #{removed} merge_request_diff_files rows, "\ log_info("Removed #{removed} merge_request_diff_files rows, "\
......
# frozen_string_literal: true
# rubocop:disable Style/Documentation
module Gitlab
module BackgroundMigration
class ScheduleDiffFilesDeletion
class MergeRequestDiff < ActiveRecord::Base
self.table_name = 'merge_request_diffs'
belongs_to :merge_request
include EachBatch
end
DIFF_BATCH_SIZE = 5_000
INTERVAL = 5.minutes
MIGRATION = 'DeleteDiffFiles'
def perform
diffs = MergeRequestDiff
.from("(#{diffs_collection.to_sql}) merge_request_diffs")
.where('merge_request_diffs.id != merge_request_diffs.latest_merge_request_diff_id')
.select(:id)
diffs.each_batch(of: DIFF_BATCH_SIZE) do |relation, index|
ids = relation.pluck(:id)
BackgroundMigrationWorker.perform_in(index * INTERVAL, MIGRATION, [ids])
end
end
private
def diffs_collection
MergeRequestDiff
.joins(:merge_request)
.where("merge_requests.state = 'merged'")
.where('merge_requests.latest_merge_request_diff_id IS NOT NULL')
.where("merge_request_diffs.state NOT IN ('without_files', 'empty')")
.select('merge_requests.latest_merge_request_diff_id, merge_request_diffs.id')
end
end
end
end
...@@ -9,14 +9,18 @@ describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180 ...@@ -9,14 +9,18 @@ describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180
merge_request.merge_request_diffs.first merge_request.merge_request_diffs.first
end end
let(:perform) do
described_class.new.perform(MergeRequestDiff.pluck(:id))
end
it 'deletes all merge request diff files' do it 'deletes all merge request diff files' do
expect { described_class.new.perform } expect { perform }
.to change { merge_request_diff.merge_request_diff_files.count } .to change { merge_request_diff.merge_request_diff_files.count }
.from(20).to(0) .from(20).to(0)
end end
it 'updates state to without_files' do it 'updates state to without_files' do
expect { described_class.new.perform } expect { perform }
.to change { merge_request_diff.reload.state } .to change { merge_request_diff.reload.state }
.from('collected').to('without_files') .from('collected').to('without_files')
end end
...@@ -25,7 +29,7 @@ describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180 ...@@ -25,7 +29,7 @@ describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180
expect(described_class::MergeRequestDiffFile).to receive_message_chain(:where, :delete_all) expect(described_class::MergeRequestDiffFile).to receive_message_chain(:where, :delete_all)
.and_raise .and_raise
expect { described_class.new.perform } expect { perform }
.to raise_error .to raise_error
merge_request_diff.reload merge_request_diff.reload
...@@ -35,50 +39,18 @@ describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180 ...@@ -35,50 +39,18 @@ describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180
end end
end end
it 'deletes no merge request diff files when MR is not merged' do it 'reschedules itself when should_wait_deadtuple_vacuum' do
merge_request = create(:merge_request, :opened)
merge_request.create_merge_request_diff
merge_request_diff = merge_request.merge_request_diffs.first
expect { described_class.new.perform }
.not_to change { merge_request_diff.merge_request_diff_files.count }
.from(20)
end
it 'deletes no merge request diff files when diff is marked as "without_files"' do
merge_request = create(:merge_request, :merged)
merge_request.create_merge_request_diff
merge_request_diff = merge_request.merge_request_diffs.first
merge_request_diff.clean!
expect { described_class.new.perform }
.not_to change { merge_request_diff.merge_request_diff_files.count }
.from(20)
end
it 'deletes no merge request diff files when diff is the latest' do
merge_request = create(:merge_request, :merged) merge_request = create(:merge_request, :merged)
merge_request_diff = merge_request.merge_request_diff first_diff = merge_request.merge_request_diff
second_diff = merge_request.create_merge_request_diff
expect { described_class.new.perform }
.not_to change { merge_request_diff.merge_request_diff_files.count }
.from(20)
end
it 'reschedules itself when should_wait_deadtuple_vacuum' do
Sidekiq::Testing.fake! do Sidekiq::Testing.fake! do
worker = described_class.new worker = described_class.new
allow(worker).to receive(:should_wait_deadtuple_vacuum?) { true } allow(worker).to receive(:should_wait_deadtuple_vacuum?) { true }
expect(BackgroundMigrationWorker) worker.perform([first_diff.id, second_diff.id])
.to receive(:perform_in)
.with(described_class::VACUUM_WAIT_TIME, 'DeleteDiffFiles')
.and_call_original
worker.perform
expect(described_class.name.demodulize).to be_scheduled_delayed_migration(5.minutes, [first_diff.id, second_diff.id])
expect(BackgroundMigrationWorker.jobs.size).to eq(1) expect(BackgroundMigrationWorker.jobs.size).to eq(1)
end end
end end
......
require 'spec_helper'
describe Gitlab::BackgroundMigration::ScheduleDiffFilesDeletion, :migration, schema: 20180619121030 do
describe '#perform' do
let(:merge_request_diffs) { table(:merge_request_diffs) }
let(:merge_requests) { table(:merge_requests) }
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
before do
stub_const("#{described_class.name}::DIFF_BATCH_SIZE", 3)
namespaces.create!(id: 1, name: 'gitlab', path: 'gitlab')
projects.create!(id: 1, namespace_id: 1, name: 'gitlab', path: 'gitlab')
merge_requests.create!(id: 1, target_project_id: 1, source_project_id: 1, target_branch: 'feature', source_branch: 'master', state: 'merged')
merge_request_diffs.create!(id: 1, merge_request_id: 1, state: 'collected')
merge_request_diffs.create!(id: 2, merge_request_id: 1, state: 'empty')
merge_request_diffs.create!(id: 3, merge_request_id: 1, state: 'without_files')
merge_request_diffs.create!(id: 4, merge_request_id: 1, state: 'collected')
merge_request_diffs.create!(id: 5, merge_request_id: 1, state: 'collected')
merge_request_diffs.create!(id: 6, merge_request_id: 1, state: 'collected')
merge_request_diffs.create!(id: 7, merge_request_id: 1, state: 'collected')
merge_requests.update(1, latest_merge_request_diff_id: 7)
end
it 'correctly schedules diff file deletion workers' do
Sidekiq::Testing.fake! do
Timecop.freeze do
described_class.new.perform
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(5.minutes, [1, 4, 5])
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(10.minutes, [6])
expect(BackgroundMigrationWorker.jobs.size).to eq(2)
end
end
end
end
end
...@@ -2,11 +2,11 @@ require 'spec_helper' ...@@ -2,11 +2,11 @@ require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20180619121030_enqueue_delete_diff_files_workers.rb') require Rails.root.join('db', 'post_migrate', '20180619121030_enqueue_delete_diff_files_workers.rb')
describe EnqueueDeleteDiffFilesWorkers, :migration, :sidekiq do describe EnqueueDeleteDiffFilesWorkers, :migration, :sidekiq do
it 'correctly schedules diff files deletion' do it 'correctly schedules diff files deletion schedulers' do
Sidekiq::Testing.fake! do Sidekiq::Testing.fake! do
expect(BackgroundMigrationWorker) expect(BackgroundMigrationWorker)
.to receive(:perform_async) .to receive(:perform_async)
.with(described_class::MIGRATION) .with(described_class::SCHEDULER)
.and_call_original .and_call_original
migrate! migrate!
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment