Commit b9e5859a authored by Sean McGivern's avatar Sean McGivern

Merge branch 'osw-delete-non-latest-mr-diff-files-upon-merge' into 'master'

Delete non-latest merge request diff files upon MRs merge

Closes #37639

See merge request gitlab-org/gitlab-ce!19670
parents 9490c378 f5ed18e1
...@@ -378,6 +378,10 @@ class MergeRequest < ActiveRecord::Base ...@@ -378,6 +378,10 @@ class MergeRequest < ActiveRecord::Base
end end
end end
def non_latest_diffs
merge_request_diffs.where.not(id: merge_request_diff.id)
end
def diff_size def diff_size
# Calling `merge_request_diff.diffs.real_size` will also perform # Calling `merge_request_diff.diffs.real_size` will also perform
# highlighting, which we don't need here. # highlighting, which we don't need here.
...@@ -619,18 +623,7 @@ class MergeRequest < ActiveRecord::Base ...@@ -619,18 +623,7 @@ class MergeRequest < ActiveRecord::Base
def reload_diff(current_user = nil) def reload_diff(current_user = nil)
return unless open? return unless open?
old_diff_refs = self.diff_refs MergeRequests::ReloadDiffsService.new(self, current_user).execute
new_diff = create_merge_request_diff
MergeRequests::MergeRequestDiffCacheService.new.execute(self, new_diff)
new_diff_refs = self.diff_refs
update_diff_discussion_positions(
old_diff_refs: old_diff_refs,
new_diff_refs: new_diff_refs,
current_user: current_user
)
end end
def check_if_can_be_merged def check_if_can_be_merged
......
...@@ -3,6 +3,7 @@ class MergeRequestDiff < ActiveRecord::Base ...@@ -3,6 +3,7 @@ class MergeRequestDiff < ActiveRecord::Base
include Importable include Importable
include ManualInverseAssociation include ManualInverseAssociation
include IgnorableColumn include IgnorableColumn
include EachBatch
# Don't display more than 100 commits at once # Don't display more than 100 commits at once
COMMITS_SAFE_SIZE = 100 COMMITS_SAFE_SIZE = 100
...@@ -17,8 +18,14 @@ class MergeRequestDiff < ActiveRecord::Base ...@@ -17,8 +18,14 @@ class MergeRequestDiff < ActiveRecord::Base
has_many :merge_request_diff_commits, -> { order(:merge_request_diff_id, :relative_order) } has_many :merge_request_diff_commits, -> { order(:merge_request_diff_id, :relative_order) }
state_machine :state, initial: :empty do state_machine :state, initial: :empty do
event :clean do
transition any => :without_files
end
state :collected state :collected
state :overflow state :overflow
# Diff files have been deleted by the system
state :without_files
# Deprecated states: these are no longer used but these values may still occur # Deprecated states: these are no longer used but these values may still occur
# in the database. # in the database.
state :timeout state :timeout
...@@ -27,6 +34,7 @@ class MergeRequestDiff < ActiveRecord::Base ...@@ -27,6 +34,7 @@ class MergeRequestDiff < ActiveRecord::Base
state :overflow_diff_lines_limit state :overflow_diff_lines_limit
end end
scope :with_files, -> { without_states(:without_files, :empty) }
scope :viewable, -> { without_state(:empty) } scope :viewable, -> { without_state(:empty) }
scope :by_commit_sha, ->(sha) do scope :by_commit_sha, ->(sha) do
joins(:merge_request_diff_commits).where(merge_request_diff_commits: { sha: sha }).reorder(nil) joins(:merge_request_diff_commits).where(merge_request_diff_commits: { sha: sha }).reorder(nil)
...@@ -42,6 +50,10 @@ class MergeRequestDiff < ActiveRecord::Base ...@@ -42,6 +50,10 @@ class MergeRequestDiff < ActiveRecord::Base
find_by(start_commit_sha: diff_refs.start_sha, head_commit_sha: diff_refs.head_sha, base_commit_sha: diff_refs.base_sha) find_by(start_commit_sha: diff_refs.start_sha, head_commit_sha: diff_refs.head_sha, base_commit_sha: diff_refs.base_sha)
end end
def viewable?
collected? || without_files? || overflow?
end
# Collect information about commits and diff from repository # Collect information about commits and diff from repository
# and save it to the database as serialized data # and save it to the database as serialized data
def save_git_content def save_git_content
...@@ -170,6 +182,21 @@ class MergeRequestDiff < ActiveRecord::Base ...@@ -170,6 +182,21 @@ class MergeRequestDiff < ActiveRecord::Base
end end
def diffs(diff_options = nil) def diffs(diff_options = nil)
if without_files? && comparison = diff_refs.compare_in(project)
# It should fetch the repository when diffs are cleaned by the system.
# We don't keep these for storage overload purposes.
# See https://gitlab.com/gitlab-org/gitlab-ce/issues/37639
comparison.diffs(diff_options)
else
diffs_collection(diff_options)
end
end
# Should always return the DB persisted diffs collection
# (e.g. Gitlab::Diff::FileCollection::MergeRequestDiff.
# It's useful when trying to invalidate old caches through
# FileCollection::MergeRequestDiff#clear_cache!
def diffs_collection(diff_options = nil)
Gitlab::Diff::FileCollection::MergeRequestDiff.new(self, diff_options: diff_options) Gitlab::Diff::FileCollection::MergeRequestDiff.new(self, diff_options: diff_options)
end end
......
module MergeRequests
class DeleteNonLatestDiffsService
BATCH_SIZE = 10
def initialize(merge_request)
@merge_request = merge_request
end
def execute
diffs = @merge_request.non_latest_diffs.with_files
diffs.each_batch(of: BATCH_SIZE) do |relation, index|
ids = relation.pluck(:id).map { |id| [id] }
DeleteDiffFilesWorker.bulk_perform_in(index * 5.minutes, ids)
end
end
end
end
module MergeRequests
class MergeRequestDiffCacheService
def execute(merge_request, new_diff)
# Executing the iteration we cache all the highlighted diff information
merge_request.diffs.diff_files.to_a
# Remove cache for all diffs on this MR. Do not use the association on the
# model, as that will interfere with other actions happening when
# reloading the diff.
MergeRequestDiff.where(merge_request: merge_request).each do |merge_request_diff|
next if merge_request_diff == new_diff
merge_request_diff.diffs.clear_cache!
end
end
end
end
...@@ -15,6 +15,7 @@ module MergeRequests ...@@ -15,6 +15,7 @@ module MergeRequests
execute_hooks(merge_request, 'merge') execute_hooks(merge_request, 'merge')
invalidate_cache_counts(merge_request, users: merge_request.assignees) invalidate_cache_counts(merge_request, users: merge_request.assignees)
merge_request.update_project_counter_caches merge_request.update_project_counter_caches
delete_non_latest_diffs(merge_request)
end end
private private
...@@ -31,6 +32,10 @@ module MergeRequests ...@@ -31,6 +32,10 @@ module MergeRequests
end end
end end
def delete_non_latest_diffs(merge_request)
DeleteNonLatestDiffsService.new(merge_request).execute
end
def create_merge_event(merge_request, current_user) def create_merge_event(merge_request, current_user)
EventCreateService.new.merge_mr(merge_request, current_user) EventCreateService.new.merge_mr(merge_request, current_user)
end end
......
module MergeRequests
class ReloadDiffsService
def initialize(merge_request, current_user)
@merge_request = merge_request
@current_user = current_user
end
def execute
old_diff_refs = merge_request.diff_refs
new_diff = merge_request.create_merge_request_diff
clear_cache(new_diff)
update_diff_discussion_positions(old_diff_refs)
end
private
attr_reader :merge_request, :current_user
def update_diff_discussion_positions(old_diff_refs)
new_diff_refs = merge_request.diff_refs
merge_request.update_diff_discussion_positions(old_diff_refs: old_diff_refs,
new_diff_refs: new_diff_refs,
current_user: current_user)
end
def clear_cache(new_diff)
# Executing the iteration we cache highlighted diffs for each diff file of
# MergeRequestDiff.
new_diff.diffs_collection.diff_files.to_a
# Remove cache for all diffs on this MR. Do not use the association on the
# model, as that will interfere with other actions happening when
# reloading the diff.
MergeRequestDiff.where(merge_request: merge_request).each do |merge_request_diff|
next if merge_request_diff == new_diff
merge_request_diff.diffs_collection.clear_cache!
end
end
end
end
...@@ -16,6 +16,6 @@ ...@@ -16,6 +16,6 @@
%span.ref-name= @merge_request.target_branch %span.ref-name= @merge_request.target_branch
.text-center= link_to 'Create commit', project_new_blob_path(@project, @merge_request.source_branch), class: 'btn btn-save' .text-center= link_to 'Create commit', project_new_blob_path(@project, @merge_request.source_branch), class: 'btn btn-save'
- else - else
- diff_viewable = @merge_request_diff ? @merge_request_diff.collected? || @merge_request_diff.overflow? : true - diff_viewable = @merge_request_diff ? @merge_request_diff.viewable? : true
- if diff_viewable - if diff_viewable
= render "projects/diffs/diffs", diffs: @diffs, environment: @environment, merge_request: true = render "projects/diffs/diffs", diffs: @diffs, environment: @environment, merge_request: true
...@@ -118,3 +118,4 @@ ...@@ -118,3 +118,4 @@
- web_hook - web_hook
- repository_update_remote_mirror - repository_update_remote_mirror
- create_note_diff_file - create_note_diff_file
- delete_diff_files
class DeleteDiffFilesWorker
include ApplicationWorker
def perform(merge_request_diff_id)
merge_request_diff = MergeRequestDiff.find(merge_request_diff_id)
return if merge_request_diff.without_files?
MergeRequestDiff.transaction do
merge_request_diff.clean!
MergeRequestDiffFile
.where(merge_request_diff_id: merge_request_diff.id)
.delete_all
end
end
end
---
title: Delete non-latest merge request diff files upon merge
merge_request:
author:
type: other
...@@ -76,4 +76,5 @@ ...@@ -76,4 +76,5 @@
- [repository_update_remote_mirror, 1] - [repository_update_remote_mirror, 1]
- [repository_remove_remote, 1] - [repository_remove_remote, 1]
- [create_note_diff_file, 1] - [create_note_diff_file, 1]
- [delete_diff_files, 1]
...@@ -47,6 +47,45 @@ describe MergeRequestDiff do ...@@ -47,6 +47,45 @@ describe MergeRequestDiff do
end end
describe '#diffs' do describe '#diffs' do
let(:merge_request) { create(:merge_request, :with_diffs) }
let!(:diff) { merge_request.merge_request_diff.reload }
context 'when it was not cleaned by the system' do
it 'returns persisted diffs' do
expect(diff).to receive(:load_diffs)
diff.diffs
end
end
context 'when diff was cleaned by the system' do
before do
diff.clean!
end
it 'returns diffs from repository if can compare with current diff refs' do
expect(diff).not_to receive(:load_diffs)
expect(Compare)
.to receive(:new)
.with(instance_of(Gitlab::Git::Compare), merge_request.target_project,
base_sha: diff.base_commit_sha, straight: false)
.and_call_original
diff.diffs
end
it 'returns persisted diffs if cannot compare with diff refs' do
expect(diff).to receive(:load_diffs)
diff.update!(head_commit_sha: 'invalid-sha')
diff.diffs
end
end
end
describe '#raw_diffs' do
context 'when the :ignore_whitespace_change option is set' do context 'when the :ignore_whitespace_change option is set' do
it 'creates a new compare object instead of loading from the DB' do it 'creates a new compare object instead of loading from the DB' do
expect(diff_with_commits).not_to receive(:load_diffs) expect(diff_with_commits).not_to receive(:load_diffs)
......
...@@ -1630,28 +1630,17 @@ describe MergeRequest do ...@@ -1630,28 +1630,17 @@ describe MergeRequest do
end end
describe "#reload_diff" do describe "#reload_diff" do
let(:discussion) { create(:diff_note_on_merge_request, project: subject.project, noteable: subject).to_discussion } it 'calls MergeRequests::ReloadDiffsService#execute with correct params' do
let(:commit) { subject.project.commit(sample_commit.id) } user = create(:user)
service = instance_double(MergeRequests::ReloadDiffsService, execute: nil)
it "does not change existing merge request diff" do
expect(subject.merge_request_diff).not_to receive(:save_git_content)
subject.reload_diff
end
it "creates new merge request diff" do expect(MergeRequests::ReloadDiffsService)
expect { subject.reload_diff }.to change { subject.merge_request_diffs.count }.by(1) .to receive(:new).with(subject, user)
end .and_return(service)
it "executes diff cache service" do
expect_any_instance_of(MergeRequests::MergeRequestDiffCacheService).to receive(:execute).with(subject, an_instance_of(MergeRequestDiff))
subject.reload_diff
end
it "calls update_diff_discussion_positions" do subject.reload_diff(user)
expect(subject).to receive(:update_diff_discussion_positions)
subject.reload_diff expect(service).to have_received(:execute)
end end
context 'when using the after_update hook to update' do context 'when using the after_update hook to update' do
......
require 'spec_helper'
describe MergeRequests::DeleteNonLatestDiffsService, :clean_gitlab_redis_shared_state do
let(:merge_request) { create(:merge_request) }
let!(:subject) { described_class.new(merge_request) }
describe '#execute' do
before do
stub_const("#{described_class.name}::BATCH_SIZE", 2)
3.times { merge_request.create_merge_request_diff }
end
it 'schedules non-latest merge request diffs removal' do
diffs = merge_request.merge_request_diffs
expect(diffs.count).to eq(4)
Timecop.freeze do
expect(DeleteDiffFilesWorker)
.to receive(:bulk_perform_in)
.with(5.minutes, [[diffs.first.id], [diffs.second.id]])
expect(DeleteDiffFilesWorker)
.to receive(:bulk_perform_in)
.with(10.minutes, [[diffs.third.id]])
subject.execute
end
end
it 'schedules no removal if it is already cleaned' do
merge_request.merge_request_diffs.each(&:clean!)
expect(DeleteDiffFilesWorker).not_to receive(:bulk_perform_in)
subject.execute
end
it 'schedules no removal if it is empty' do
merge_request.merge_request_diffs.each { |diff| diff.update!(state: :empty) }
expect(DeleteDiffFilesWorker).not_to receive(:bulk_perform_in)
subject.execute
end
it 'schedules no removal if there is no non-latest diffs' do
merge_request
.merge_request_diffs
.where.not(id: merge_request.latest_merge_request_diff_id)
.destroy_all
expect(DeleteDiffFilesWorker).not_to receive(:bulk_perform_in)
subject.execute
end
end
end
require 'spec_helper'
describe MergeRequests::MergeRequestDiffCacheService, :use_clean_rails_memory_store_caching do
let(:subject) { described_class.new }
let(:merge_request) { create(:merge_request) }
describe '#execute' do
before do
allow_any_instance_of(Gitlab::Diff::File).to receive(:text?).and_return(true)
allow_any_instance_of(Gitlab::Diff::File).to receive(:diffable?).and_return(true)
end
it 'retrieves the diff files to cache the highlighted result' do
new_diff = merge_request.merge_request_diff
cache_key = new_diff.diffs.cache_key
expect(Rails.cache).to receive(:read).with(cache_key).and_call_original
expect(Rails.cache).to receive(:write).with(cache_key, anything, anything).and_call_original
subject.execute(merge_request, new_diff)
end
it 'clears the cache for older diffs on the merge request' do
old_diff = merge_request.merge_request_diff
old_cache_key = old_diff.diffs.cache_key
subject.execute(merge_request, old_diff)
new_diff = merge_request.create_merge_request_diff
new_cache_key = new_diff.diffs.cache_key
expect(Rails.cache).to receive(:delete).with(old_cache_key).and_call_original
expect(Rails.cache).to receive(:read).with(new_cache_key).and_call_original
expect(Rails.cache).to receive(:write).with(new_cache_key, anything, anything).and_call_original
subject.execute(merge_request, new_diff)
end
end
end
...@@ -35,5 +35,17 @@ describe MergeRequests::PostMergeService do ...@@ -35,5 +35,17 @@ describe MergeRequests::PostMergeService do
described_class.new(project, user, {}).execute(merge_request) described_class.new(project, user, {}).execute(merge_request)
end end
it 'deletes non-latest diffs' do
diff_removal_service = instance_double(MergeRequests::DeleteNonLatestDiffsService, execute: nil)
expect(MergeRequests::DeleteNonLatestDiffsService)
.to receive(:new).with(merge_request)
.and_return(diff_removal_service)
described_class.new(project, user, {}).execute(merge_request)
expect(diff_removal_service).to have_received(:execute)
end
end end
end end
require 'spec_helper'
describe MergeRequests::ReloadDiffsService, :use_clean_rails_memory_store_caching do
let(:current_user) { create(:user) }
let(:merge_request) { create(:merge_request) }
let(:subject) { described_class.new(merge_request, current_user) }
describe '#execute' do
it 'creates new merge request diff' do
expect { subject.execute }.to change { merge_request.merge_request_diffs.count }.by(1)
end
it 'calls update_diff_discussion_positions with correct params' do
old_diff_refs = merge_request.diff_refs
new_diff = merge_request.create_merge_request_diff
new_diff_refs = merge_request.diff_refs
expect(merge_request).to receive(:create_merge_request_diff).and_return(new_diff)
expect(merge_request).to receive(:update_diff_discussion_positions)
.with(old_diff_refs: old_diff_refs,
new_diff_refs: new_diff_refs,
current_user: current_user)
subject.execute
end
it 'does not change existing merge request diff' do
expect(merge_request.merge_request_diff).not_to receive(:save_git_content)
subject.execute
end
context 'cache clearing' do
before do
allow_any_instance_of(Gitlab::Diff::File).to receive(:text?).and_return(true)
allow_any_instance_of(Gitlab::Diff::File).to receive(:diffable?).and_return(true)
end
it 'retrieves the diff files to cache the highlighted result' do
new_diff = merge_request.create_merge_request_diff
cache_key = new_diff.diffs_collection.cache_key
expect(merge_request).to receive(:create_merge_request_diff).and_return(new_diff)
expect(Rails.cache).to receive(:read).with(cache_key).and_call_original
expect(Rails.cache).to receive(:write).with(cache_key, anything, anything).and_call_original
subject.execute
end
it 'clears the cache for older diffs on the merge request' do
old_diff = merge_request.merge_request_diff
old_cache_key = old_diff.diffs_collection.cache_key
new_diff = merge_request.create_merge_request_diff
new_cache_key = new_diff.diffs_collection.cache_key
expect(merge_request).to receive(:create_merge_request_diff).and_return(new_diff)
expect(Rails.cache).to receive(:delete).with(old_cache_key).and_call_original
expect(Rails.cache).to receive(:read).with(new_cache_key).and_call_original
expect(Rails.cache).to receive(:write).with(new_cache_key, anything, anything).and_call_original
subject.execute
end
end
end
end
require 'spec_helper'
describe DeleteDiffFilesWorker do
describe '#perform' do
let(:merge_request) { create(:merge_request) }
let(:merge_request_diff) { merge_request.merge_request_diff }
it 'deletes all merge request diff files' do
expect { described_class.new.perform(merge_request_diff.id) }
.to change { merge_request_diff.merge_request_diff_files.count }
.from(20).to(0)
end
it 'updates state to without_files' do
expect { described_class.new.perform(merge_request_diff.id) }
.to change { merge_request_diff.reload.state }
.from('collected').to('without_files')
end
it 'does nothing if diff was already marked as "without_files"' do
merge_request_diff.clean!
expect_any_instance_of(MergeRequestDiff).not_to receive(:clean!)
described_class.new.perform(merge_request_diff.id)
end
it 'rollsback if something goes wrong' do
expect(MergeRequestDiffFile).to receive_message_chain(:where, :delete_all)
.and_raise
expect { described_class.new.perform(merge_request_diff.id) }
.to raise_error
merge_request_diff.reload
expect(merge_request_diff.state).to eq('collected')
expect(merge_request_diff.merge_request_diff_files.count).to eq(20)
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment