Commit 888ffdfa authored by Nick Thomas's avatar Nick Thomas

Run housekeeping after moving a repository between shards

The current inter-shard move is implemented as a `git fetch` from the
original shard to an empty repository on the new shard. The repository
on the new shard lacks bitmaps, and may be less well packed, than the
repository on the old shard. This has a measurable performance impact.

We may be changing from "FetchInternalRemote" to "ReplicateRepository"
in the future, but until then, forcing housekeeping to run is a good
performance optimisation - we trade off some short-term I/O load on the
new shard for better performance across a wide range of RPCs, including
an order-of-magnitude improvement in `IsAncestor`, which is the find
that motivated this change.
parent 516c0621
---
title: Run housekeeping after moving a repository between shards
merge_request: 20863
author:
type: performance
...@@ -33,6 +33,8 @@ module Projects ...@@ -33,6 +33,8 @@ module Projects
project.update(repository_storage: new_repository_storage_key, repository_read_only: false) project.update(repository_storage: new_repository_storage_key, repository_read_only: false)
project.leave_pool_repository project.leave_pool_repository
project.track_project_repository project.track_project_repository
enqueue_housekeeping
else else
project.update(repository_read_only: false) project.update(repository_read_only: false)
end end
...@@ -89,6 +91,18 @@ module Projects ...@@ -89,6 +91,18 @@ module Projects
"#{path}+#{project.id}+moved+#{Time.now.to_i}" "#{path}+#{project.id}+moved+#{Time.now.to_i}"
end end
# The underlying FetchInternalRemote call uses a `git fetch` to move data
# to the new repository, which leaves it in a less-well-packed state and
# lacking bitmaps. Housekeeping will boost performance significantly.
def enqueue_housekeeping
return unless Gitlab::CurrentSettings.housekeeping_enabled?
return unless Feature.enabled?(:repack_after_shard_migration, project)
Projects::HousekeepingService.new(project, :full_repack).execute
rescue Projects::HousekeepingService::LeaseTaken
# No action required
end
def wait_for_pushes(type) def wait_for_pushes(type)
reference_counter = project.reference_counter(type: type) reference_counter = project.reference_counter(type: type)
......
...@@ -73,7 +73,19 @@ describe Projects::UpdateRepositoryStorageService do ...@@ -73,7 +73,19 @@ describe Projects::UpdateRepositoryStorageService do
.and_return(repository_double) .and_return(repository_double)
end end
context 'when the move succeeds' do context 'when the move succeeds', :clean_gitlab_redis_shared_state do
before do
allow(project_repository_double)
.to receive(:fetch_repository_as_mirror)
.with(project.repository.raw)
.and_return(true)
allow(repository_double)
.to receive(:fetch_repository_as_mirror)
.with(repository.raw)
.and_return(true)
end
it "moves the project and its #{repository_type} repository to the new storage and unmarks the repository as read only" do it "moves the project and its #{repository_type} repository to the new storage and unmarks the repository as read only" do
old_project_repository_path = Gitlab::GitalyClient::StorageSettings.allow_disk_access do old_project_repository_path = Gitlab::GitalyClient::StorageSettings.allow_disk_access do
project.repository.path_to_repo project.repository.path_to_repo
...@@ -81,12 +93,6 @@ describe Projects::UpdateRepositoryStorageService do ...@@ -81,12 +93,6 @@ describe Projects::UpdateRepositoryStorageService do
old_repository_path = repository.full_path old_repository_path = repository.full_path
allow(project_repository_double).to receive(:fetch_repository_as_mirror)
.with(project.repository.raw).and_return(true)
allow(repository_double).to receive(:fetch_repository_as_mirror)
.with(repository.raw).and_return(true)
subject.execute('test_second_storage') subject.execute('test_second_storage')
expect(project).not_to be_repository_read_only expect(project).not_to be_repository_read_only
...@@ -94,6 +100,35 @@ describe Projects::UpdateRepositoryStorageService do ...@@ -94,6 +100,35 @@ describe Projects::UpdateRepositoryStorageService do
expect(gitlab_shell.repository_exists?('default', old_project_repository_path)).to be(false) expect(gitlab_shell.repository_exists?('default', old_project_repository_path)).to be(false)
expect(gitlab_shell.repository_exists?('default', old_repository_path)).to be(false) expect(gitlab_shell.repository_exists?('default', old_repository_path)).to be(false)
end end
context ':repack_after_shard_migration feature flag disabled' do
before do
stub_feature_flags(repack_after_shard_migration: false)
end
it 'does not enqueue a GC run' do
expect { subject.execute('test_second_storage') }
.not_to change(GitGarbageCollectWorker.jobs, :count)
end
end
context ':repack_after_shard_migration feature flag enabled' do
before do
stub_feature_flags(repack_after_shard_migration: true)
end
it 'does not enqueue a GC run if housekeeping is disabled' do
stub_application_setting(housekeeping_enabled: false)
expect { subject.execute('test_second_storage') }
.not_to change(GitGarbageCollectWorker.jobs, :count)
end
it 'enqueues a GC run' do
expect { subject.execute('test_second_storage') }
.to change(GitGarbageCollectWorker.jobs, :count).by(1)
end
end
end end
context 'when the project is already on the target storage' do context 'when the project is already on the target storage' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment