Commit c13d28c9 authored by Sean McGivern's avatar Sean McGivern

Use sparse checkout for squashes

For large repos, the checkout as part of creating a worktree can be very
expensive. This can be so expensive that processes get 'stuck' on
GitLab.com.

To make this cheaper, configure sparse checkout for squash worktrees so
that we only check out the files that were changed in the target
branch (new files don't need to be included).

Also add specs to cover the case where only new files are added, and the
case where the diff isn't shown in the merge request because it's too
large.
parent 4fa66ffb
...@@ -23,13 +23,15 @@ module MergeRequests ...@@ -23,13 +23,15 @@ module MergeRequests
end end
run_git_command( run_git_command(
%W(worktree add #{tree_path} #{merge_request.target_branch} --detach), %W(worktree add --no-checkout --detach #{tree_path}),
repository.path_to_repo, repository.path_to_repo,
git_env, git_env,
'add worktree for squash' 'add worktree for squash'
) )
diff = git_command(%W(diff --binary #{merge_request.diff_start_sha}...#{merge_request.diff_head_sha})) configure_sparse_checkout
diff = git_command(%W(diff --binary #{diff_range}))
apply = git_command(%w(apply --index)) apply = git_command(%w(apply --index))
run_command( run_command(
...@@ -65,10 +67,64 @@ module MergeRequests ...@@ -65,10 +67,64 @@ module MergeRequests
false false
ensure ensure
clean_dir clean_dir
clean_worktree
end end
def tree_path def tree_path
@tree_path ||= merge_request.squash_dir_path @tree_path ||= merge_request.squash_dir_path
end end
def diff_range
@diff_range ||= "#{merge_request.diff_start_sha}...#{merge_request.diff_head_sha}"
end
def worktree_path
@worktree_path ||= File.join(repository.path_to_repo, 'worktrees', merge_request.id.to_s)
end
def clean_worktree
FileUtils.rm_rf(worktree_path) if File.exist?(worktree_path)
end
# Adding a worktree means checking out the repository. For large repos, this
# can be very expensive, so set up sparse checkout for the worktree to only
# check out the files we're interested in.
#
def configure_sparse_checkout
run_git_command(
%w(config core.sparseCheckout true),
repository.path_to_repo,
git_env,
'configure sparse checkout'
)
# Get the same diff we'll apply, excluding added files. (We can't check
# out files on the target branch if they don't exist yet!)
#
diff_files = run_git_command(
%W(diff --name-only --diff-filter=a --binary #{diff_range}),
repository.path_to_repo,
git_env,
'get files in diff'
)
# If only new files are introduced by this MR, then our sparse checkout
# doesn't need to have any files at all.
#
unless diff_files.empty?
worktree_info = File.join(worktree_path, 'info')
FileUtils.mkdir_p(worktree_info) unless File.directory?(worktree_info)
File.write(File.join(worktree_info, 'sparse-checkout'), diff_files)
end
run_git_command(
%W(checkout --detach #{merge_request.target_branch}),
tree_path,
git_env,
'check out target branch'
)
end
end end
end end
...@@ -5,81 +5,102 @@ describe MergeRequests::SquashService do ...@@ -5,81 +5,102 @@ describe MergeRequests::SquashService do
let(:user) { project.owner } let(:user) { project.owner }
let(:project) { create(:project) } let(:project) { create(:project) }
let(:merge_request) do let(:merge_request_with_one_commit) do
create(:merge_request,
source_branch: 'feature.custom-highlighting', source_project: project,
target_branch: 'master', target_project: project)
end
let(:merge_request_with_only_new_files) do
create(:merge_request, create(:merge_request,
source_branch: 'video', source_project: project, source_branch: 'video', source_project: project,
target_branch: 'master', target_project: project) target_branch: 'master', target_project: project)
end end
let(:merge_request_with_one_commit) do let(:merge_request_with_large_files) do
create(:merge_request, create(:merge_request,
source_branch: 'feature.custom-highlighting', source_project: project, source_branch: 'squash-large-files', source_project: project,
target_branch: 'master', target_project: project) target_branch: 'master', target_project: project)
end end
describe '#execute' do shared_examples 'the squash succeeds' do
context 'when there is only one commit in the merge request' do it 'returns the squashed commit SHA' do
it 'returns that commit SHA' do result = service.execute(merge_request)
result = service.execute(merge_request_with_one_commit)
expect(result).to match(status: :success, squash_sha: merge_request_with_one_commit.diff_head_sha) expect(result).to match(status: :success, squash_sha: a_string_matching(/\h{40}/))
end expect(result[:squash_sha]).not_to eq(merge_request.diff_head_sha)
end
it 'does not perform any git actions' do it 'cleans up the temporary directory' do
expect(service).not_to receive(:run_git_command) expect(service).to receive(:clean_dir).and_call_original
service.execute(merge_request_with_one_commit) service.execute(merge_request)
end
end end
context 'when the squash succeeds' do it 'does not keep the branch push event' do
it 'returns the squashed commit SHA' do expect { service.execute(merge_request) }.not_to change { Event.count }
result = service.execute(merge_request) end
expect(result).to match(status: :success, squash_sha: a_string_matching(/\h{40}/)) context 'the squashed commit' do
expect(result[:squash_sha]).not_to eq(merge_request.diff_head_sha) let(:squash_sha) { service.execute(merge_request)[:squash_sha] }
end let(:squash_commit) { project.repository.commit(squash_sha) }
it 'cleans up the temporary directory' do it 'copies the author info and message from the merge request' do
expect(service).to receive(:clean_dir).and_call_original expect(squash_commit.author_name).to eq(merge_request.author.name)
expect(squash_commit.author_email).to eq(merge_request.author.email)
service.execute(merge_request) # Commit messages have a trailing newline, but titles don't.
expect(squash_commit.message.chomp).to eq(merge_request.title)
end end
it 'does not keep the branch push event' do it 'sets the current user as the committer' do
expect { service.execute(merge_request) }.not_to change { Event.count } expect(squash_commit.committer_name).to eq(user.name.chomp('.'))
expect(squash_commit.committer_email).to eq(user.email)
end end
context 'the squashed commit' do it 'has the same diff as the merge request, but a different SHA' do
let(:squash_sha) { service.execute(merge_request)[:squash_sha] } rugged = project.repository.rugged
let(:squash_commit) { project.repository.commit(squash_sha) } mr_diff = rugged.diff(merge_request.diff_base_sha, merge_request.diff_head_sha)
squash_diff = rugged.diff(merge_request.diff_start_sha, squash_sha)
it 'copies the author info and message from the merge request' do expect(squash_diff.patch.length).to eq(mr_diff.patch.length)
expect(squash_commit.author_name).to eq(merge_request.author.name) expect(squash_commit.sha).not_to eq(merge_request.diff_head_sha)
expect(squash_commit.author_email).to eq(merge_request.author.email) end
end
end
# Commit messages have a trailing newline, but titles don't. describe '#execute' do
expect(squash_commit.message.chomp).to eq(merge_request.title) context 'when there is only one commit in the merge request' do
end it 'returns that commit SHA' do
result = service.execute(merge_request_with_one_commit)
it 'sets the current user as the committer' do expect(result).to match(status: :success, squash_sha: merge_request_with_one_commit.diff_head_sha)
expect(squash_commit.committer_name).to eq(user.name.chomp('.')) end
expect(squash_commit.committer_email).to eq(user.email)
end
it 'has the same diff as the merge request, but a different SHA' do it 'does not perform any git actions' do
rugged = project.repository.rugged expect(service).not_to receive(:run_git_command)
mr_diff = rugged.diff(merge_request.diff_base_sha, merge_request.diff_head_sha)
squash_diff = rugged.diff(merge_request.diff_start_sha, squash_sha)
expect(squash_diff.patch).to eq(mr_diff.patch) service.execute(merge_request_with_one_commit)
expect(squash_commit.sha).not_to eq(merge_request.diff_head_sha)
end
end end
end end
context 'when squashing only new files' do
let(:merge_request) { merge_request_with_only_new_files }
include_examples 'the squash succeeds'
end
context 'when squashing with files too large to display' do
let(:merge_request) { merge_request_with_large_files }
include_examples 'the squash succeeds'
end
stages = { stages = {
'add worktree for squash' => 'worktree', 'add worktree for squash' => 'worktree',
'configure sparse checkout' => 'config',
'get files in diff' => 'diff --name-only',
'check out target branch' => 'checkout',
'apply patch' => 'diff --binary', 'apply patch' => 'diff --binary',
'commit squashed changes' => 'commit', 'commit squashed changes' => 'commit',
'get SHA of squashed commit' => 'rev-parse' 'get SHA of squashed commit' => 'rev-parse'
...@@ -87,13 +108,14 @@ describe MergeRequests::SquashService do ...@@ -87,13 +108,14 @@ describe MergeRequests::SquashService do
stages.each do |stage, command| stages.each do |stage, command|
context "when the #{stage} stage fails" do context "when the #{stage} stage fails" do
let(:merge_request) { merge_request_with_only_new_files }
let(:error) { 'A test error' } let(:error) { 'A test error' }
before do before do
git_command = a_collection_containing_exactly( git_command = a_collection_containing_exactly(
a_string_starting_with("#{Gitlab.config.git.bin_path} #{command}") a_string_starting_with("#{Gitlab.config.git.bin_path} #{command}")
).or( ).or(
a_collection_starting_with([Gitlab.config.git.bin_path, command]) a_collection_starting_with([Gitlab.config.git.bin_path] + command.split)
) )
allow(service).to receive(:popen).and_return(['', 0]) allow(service).to receive(:popen).and_return(['', 0])
...@@ -121,6 +143,7 @@ describe MergeRequests::SquashService do ...@@ -121,6 +143,7 @@ describe MergeRequests::SquashService do
end end
context 'when any other exception is thrown' do context 'when any other exception is thrown' do
let(:merge_request) { merge_request_with_only_new_files }
let(:error) { 'A test error' } let(:error) { 'A test error' }
before do before do
......
...@@ -38,7 +38,8 @@ module TestEnv ...@@ -38,7 +38,8 @@ module TestEnv
'conflict-too-large' => '39fa04f', 'conflict-too-large' => '39fa04f',
'deleted-image-test' => '6c17798', 'deleted-image-test' => '6c17798',
'wip' => 'b9238ee', 'wip' => 'b9238ee',
'csv' => '3dd0896' 'csv' => '3dd0896',
'squash-large-files' => '54cec52'
} }
# gitlab-test-fork is a fork of gitlab-fork, but we don't necessarily # gitlab-test-fork is a fork of gitlab-fork, but we don't necessarily
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment