Commit c13d28c9 authored by Sean McGivern's avatar Sean McGivern

Use sparse checkout for squashes

For large repos, the checkout as part of creating a worktree can be very
expensive. This can be so expensive that processes get 'stuck' on
GitLab.com.

To make this cheaper, configure sparse checkout for squash worktrees so
that we only check out the files that were changed in the target
branch (new files don't need to be included).

Also add specs to cover the case where only new files are added, and the
case where the diff isn't shown in the merge request because it's too
large.
parent 4fa66ffb
......@@ -23,13 +23,15 @@ module MergeRequests
end
run_git_command(
%W(worktree add #{tree_path} #{merge_request.target_branch} --detach),
%W(worktree add --no-checkout --detach #{tree_path}),
repository.path_to_repo,
git_env,
'add worktree for squash'
)
diff = git_command(%W(diff --binary #{merge_request.diff_start_sha}...#{merge_request.diff_head_sha}))
configure_sparse_checkout
diff = git_command(%W(diff --binary #{diff_range}))
apply = git_command(%w(apply --index))
run_command(
......@@ -65,10 +67,64 @@ module MergeRequests
false
ensure
clean_dir
clean_worktree
end
def tree_path
@tree_path ||= merge_request.squash_dir_path
end
def diff_range
@diff_range ||= "#{merge_request.diff_start_sha}...#{merge_request.diff_head_sha}"
end
def worktree_path
@worktree_path ||= File.join(repository.path_to_repo, 'worktrees', merge_request.id.to_s)
end
def clean_worktree
FileUtils.rm_rf(worktree_path) if File.exist?(worktree_path)
end
# Adding a worktree means checking out the repository. For large repos, this
# can be very expensive, so set up sparse checkout for the worktree to only
# check out the files we're interested in.
#
def configure_sparse_checkout
run_git_command(
%w(config core.sparseCheckout true),
repository.path_to_repo,
git_env,
'configure sparse checkout'
)
# Get the same diff we'll apply, excluding added files. (We can't check
# out files on the target branch if they don't exist yet!)
#
diff_files = run_git_command(
%W(diff --name-only --diff-filter=a --binary #{diff_range}),
repository.path_to_repo,
git_env,
'get files in diff'
)
# If only new files are introduced by this MR, then our sparse checkout
# doesn't need to have any files at all.
#
unless diff_files.empty?
worktree_info = File.join(worktree_path, 'info')
FileUtils.mkdir_p(worktree_info) unless File.directory?(worktree_info)
File.write(File.join(worktree_info, 'sparse-checkout'), diff_files)
end
run_git_command(
%W(checkout --detach #{merge_request.target_branch}),
tree_path,
git_env,
'check out target branch'
)
end
end
end
......@@ -5,81 +5,102 @@ describe MergeRequests::SquashService do
let(:user) { project.owner }
let(:project) { create(:project) }
let(:merge_request) do
let(:merge_request_with_one_commit) do
create(:merge_request,
source_branch: 'feature.custom-highlighting', source_project: project,
target_branch: 'master', target_project: project)
end
let(:merge_request_with_only_new_files) do
create(:merge_request,
source_branch: 'video', source_project: project,
target_branch: 'master', target_project: project)
end
let(:merge_request_with_one_commit) do
let(:merge_request_with_large_files) do
create(:merge_request,
source_branch: 'feature.custom-highlighting', source_project: project,
source_branch: 'squash-large-files', source_project: project,
target_branch: 'master', target_project: project)
end
describe '#execute' do
context 'when there is only one commit in the merge request' do
it 'returns that commit SHA' do
result = service.execute(merge_request_with_one_commit)
shared_examples 'the squash succeeds' do
it 'returns the squashed commit SHA' do
result = service.execute(merge_request)
expect(result).to match(status: :success, squash_sha: merge_request_with_one_commit.diff_head_sha)
end
expect(result).to match(status: :success, squash_sha: a_string_matching(/\h{40}/))
expect(result[:squash_sha]).not_to eq(merge_request.diff_head_sha)
end
it 'does not perform any git actions' do
expect(service).not_to receive(:run_git_command)
it 'cleans up the temporary directory' do
expect(service).to receive(:clean_dir).and_call_original
service.execute(merge_request_with_one_commit)
end
service.execute(merge_request)
end
context 'when the squash succeeds' do
it 'returns the squashed commit SHA' do
result = service.execute(merge_request)
it 'does not keep the branch push event' do
expect { service.execute(merge_request) }.not_to change { Event.count }
end
expect(result).to match(status: :success, squash_sha: a_string_matching(/\h{40}/))
expect(result[:squash_sha]).not_to eq(merge_request.diff_head_sha)
end
context 'the squashed commit' do
let(:squash_sha) { service.execute(merge_request)[:squash_sha] }
let(:squash_commit) { project.repository.commit(squash_sha) }
it 'cleans up the temporary directory' do
expect(service).to receive(:clean_dir).and_call_original
it 'copies the author info and message from the merge request' do
expect(squash_commit.author_name).to eq(merge_request.author.name)
expect(squash_commit.author_email).to eq(merge_request.author.email)
service.execute(merge_request)
# Commit messages have a trailing newline, but titles don't.
expect(squash_commit.message.chomp).to eq(merge_request.title)
end
it 'does not keep the branch push event' do
expect { service.execute(merge_request) }.not_to change { Event.count }
it 'sets the current user as the committer' do
expect(squash_commit.committer_name).to eq(user.name.chomp('.'))
expect(squash_commit.committer_email).to eq(user.email)
end
context 'the squashed commit' do
let(:squash_sha) { service.execute(merge_request)[:squash_sha] }
let(:squash_commit) { project.repository.commit(squash_sha) }
it 'has the same diff as the merge request, but a different SHA' do
rugged = project.repository.rugged
mr_diff = rugged.diff(merge_request.diff_base_sha, merge_request.diff_head_sha)
squash_diff = rugged.diff(merge_request.diff_start_sha, squash_sha)
it 'copies the author info and message from the merge request' do
expect(squash_commit.author_name).to eq(merge_request.author.name)
expect(squash_commit.author_email).to eq(merge_request.author.email)
expect(squash_diff.patch.length).to eq(mr_diff.patch.length)
expect(squash_commit.sha).not_to eq(merge_request.diff_head_sha)
end
end
end
# Commit messages have a trailing newline, but titles don't.
expect(squash_commit.message.chomp).to eq(merge_request.title)
end
describe '#execute' do
context 'when there is only one commit in the merge request' do
it 'returns that commit SHA' do
result = service.execute(merge_request_with_one_commit)
it 'sets the current user as the committer' do
expect(squash_commit.committer_name).to eq(user.name.chomp('.'))
expect(squash_commit.committer_email).to eq(user.email)
end
expect(result).to match(status: :success, squash_sha: merge_request_with_one_commit.diff_head_sha)
end
it 'has the same diff as the merge request, but a different SHA' do
rugged = project.repository.rugged
mr_diff = rugged.diff(merge_request.diff_base_sha, merge_request.diff_head_sha)
squash_diff = rugged.diff(merge_request.diff_start_sha, squash_sha)
it 'does not perform any git actions' do
expect(service).not_to receive(:run_git_command)
expect(squash_diff.patch).to eq(mr_diff.patch)
expect(squash_commit.sha).not_to eq(merge_request.diff_head_sha)
end
service.execute(merge_request_with_one_commit)
end
end
context 'when squashing only new files' do
let(:merge_request) { merge_request_with_only_new_files }
include_examples 'the squash succeeds'
end
context 'when squashing with files too large to display' do
let(:merge_request) { merge_request_with_large_files }
include_examples 'the squash succeeds'
end
stages = {
'add worktree for squash' => 'worktree',
'configure sparse checkout' => 'config',
'get files in diff' => 'diff --name-only',
'check out target branch' => 'checkout',
'apply patch' => 'diff --binary',
'commit squashed changes' => 'commit',
'get SHA of squashed commit' => 'rev-parse'
......@@ -87,13 +108,14 @@ describe MergeRequests::SquashService do
stages.each do |stage, command|
context "when the #{stage} stage fails" do
let(:merge_request) { merge_request_with_only_new_files }
let(:error) { 'A test error' }
before do
git_command = a_collection_containing_exactly(
a_string_starting_with("#{Gitlab.config.git.bin_path} #{command}")
).or(
a_collection_starting_with([Gitlab.config.git.bin_path, command])
a_collection_starting_with([Gitlab.config.git.bin_path] + command.split)
)
allow(service).to receive(:popen).and_return(['', 0])
......@@ -121,6 +143,7 @@ describe MergeRequests::SquashService do
end
context 'when any other exception is thrown' do
let(:merge_request) { merge_request_with_only_new_files }
let(:error) { 'A test error' }
before do
......
......@@ -38,7 +38,8 @@ module TestEnv
'conflict-too-large' => '39fa04f',
'deleted-image-test' => '6c17798',
'wip' => 'b9238ee',
'csv' => '3dd0896'
'csv' => '3dd0896',
'squash-large-files' => '54cec52'
}
# gitlab-test-fork is a fork of gitlab-fork, but we don't necessarily
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment