Commit 3d55512c authored by Nick Thomas's avatar Nick Thomas

Prune loose objects when running git gc during repository cleanup

Without this, housekeeping may not make some objects unavailable
immediately, and disk space may go up instead of down. We've marked
the repository as read-only for the duration of the operation, so it
is safe to do this.
parent 6f3125e0
...@@ -40,7 +40,7 @@ module Projects ...@@ -40,7 +40,7 @@ module Projects
apply_bfg_object_map! apply_bfg_object_map!
# Remove older objects that are no longer referenced # Remove older objects that are no longer referenced
GitGarbageCollectWorker.new.perform(project.id, :gc, "project_cleanup:gc:#{project.id}") GitGarbageCollectWorker.new.perform(project.id, :prune, "project_cleanup:gc:#{project.id}")
# The cache may now be inaccurate, and holding onto it could prevent # The cache may now be inaccurate, and holding onto it could prevent
# bugs assuming the presence of some object from manifesting for some # bugs assuming the presence of some object from manifesting for some
......
...@@ -27,15 +27,15 @@ class GitGarbageCollectWorker # rubocop:disable Scalability/IdempotentWorker ...@@ -27,15 +27,15 @@ class GitGarbageCollectWorker # rubocop:disable Scalability/IdempotentWorker
task = task.to_sym task = task.to_sym
if task == :gc if gc?(task)
::Projects::GitDeduplicationService.new(project).execute ::Projects::GitDeduplicationService.new(project).execute
cleanup_orphan_lfs_file_references(project) cleanup_orphan_lfs_file_references(project)
end end
gitaly_call(task, project.repository.raw_repository) gitaly_call(task, project)
# Refresh the branch cache in case garbage collection caused a ref lookup to fail # Refresh the branch cache in case garbage collection caused a ref lookup to fail
flush_ref_caches(project) if task == :gc flush_ref_caches(project) if gc?(task)
update_repository_statistics(project) if task != :pack_refs update_repository_statistics(project) if task != :pack_refs
...@@ -48,6 +48,10 @@ class GitGarbageCollectWorker # rubocop:disable Scalability/IdempotentWorker ...@@ -48,6 +48,10 @@ class GitGarbageCollectWorker # rubocop:disable Scalability/IdempotentWorker
private private
def gc?(task)
task == :gc || task == :prune
end
def try_obtain_lease(key) def try_obtain_lease(key)
::Gitlab::ExclusiveLease.new(key, timeout: LEASE_TIMEOUT).try_obtain ::Gitlab::ExclusiveLease.new(key, timeout: LEASE_TIMEOUT).try_obtain
end end
...@@ -64,8 +68,9 @@ class GitGarbageCollectWorker # rubocop:disable Scalability/IdempotentWorker ...@@ -64,8 +68,9 @@ class GitGarbageCollectWorker # rubocop:disable Scalability/IdempotentWorker
::Gitlab::ExclusiveLease.get_uuid(key) ::Gitlab::ExclusiveLease.get_uuid(key)
end end
## `repository` has to be a Gitlab::Git::Repository def gitaly_call(task, project)
def gitaly_call(task, repository) repository = project.repository.raw_repository
client = if task == :pack_refs client = if task == :pack_refs
Gitlab::GitalyClient::RefService.new(repository) Gitlab::GitalyClient::RefService.new(repository)
else else
...@@ -73,8 +78,8 @@ class GitGarbageCollectWorker # rubocop:disable Scalability/IdempotentWorker ...@@ -73,8 +78,8 @@ class GitGarbageCollectWorker # rubocop:disable Scalability/IdempotentWorker
end end
case task case task
when :gc when :prune, :gc
client.garbage_collect(bitmaps_enabled?) client.garbage_collect(bitmaps_enabled?, prune: task == :prune)
when :full_repack when :full_repack
client.repack_full(bitmaps_enabled?) client.repack_full(bitmaps_enabled?)
when :incremental_repack when :incremental_repack
......
---
title: Prune loose objects during git garbage collection
merge_request: 39592
author:
type: changed
...@@ -26,8 +26,8 @@ module Gitlab ...@@ -26,8 +26,8 @@ module Gitlab
GitalyClient.call(@storage, :repository_service, :cleanup, request, timeout: GitalyClient.fast_timeout) GitalyClient.call(@storage, :repository_service, :cleanup, request, timeout: GitalyClient.fast_timeout)
end end
def garbage_collect(create_bitmap) def garbage_collect(create_bitmap, prune:)
request = Gitaly::GarbageCollectRequest.new(repository: @gitaly_repo, create_bitmap: create_bitmap) request = Gitaly::GarbageCollectRequest.new(repository: @gitaly_repo, create_bitmap: create_bitmap, prune: prune)
GitalyClient.call(@storage, :repository_service, :garbage_collect, request, timeout: GitalyClient.long_timeout) GitalyClient.call(@storage, :repository_service, :garbage_collect, request, timeout: GitalyClient.long_timeout)
end end
......
...@@ -38,7 +38,7 @@ RSpec.describe Gitlab::GitalyClient::RepositoryService do ...@@ -38,7 +38,7 @@ RSpec.describe Gitlab::GitalyClient::RepositoryService do
.with(gitaly_request_with_path(storage_name, relative_path), kind_of(Hash)) .with(gitaly_request_with_path(storage_name, relative_path), kind_of(Hash))
.and_return(double(:garbage_collect_response)) .and_return(double(:garbage_collect_response))
client.garbage_collect(true) client.garbage_collect(true, prune: true)
end end
end end
......
...@@ -89,7 +89,7 @@ RSpec.describe Projects::CleanupService do ...@@ -89,7 +89,7 @@ RSpec.describe Projects::CleanupService do
it 'runs garbage collection on the repository' do it 'runs garbage collection on the repository' do
expect_next_instance_of(GitGarbageCollectWorker) do |worker| expect_next_instance_of(GitGarbageCollectWorker) do |worker|
expect(worker).to receive(:perform).with(project.id, :gc, "project_cleanup:gc:#{project.id}") expect(worker).to receive(:perform).with(project.id, :prune, "project_cleanup:gc:#{project.id}")
end end
service.execute service.execute
......
...@@ -272,6 +272,11 @@ RSpec.describe GitGarbageCollectWorker do ...@@ -272,6 +272,11 @@ RSpec.describe GitGarbageCollectWorker do
expect(before_packs.count).to be >= 1 expect(before_packs.count).to be >= 1
expect_any_instance_of(Gitlab::GitalyClient::RepositoryService)
.to receive(:garbage_collect)
.with(bitmaps_enabled, prune: false)
.and_call_original
subject.perform(project.id, 'gc', lease_key, lease_uuid) subject.perform(project.id, 'gc', lease_key, lease_uuid)
after_packed_refs = packed_refs(project) after_packed_refs = packed_refs(project)
after_packs = packs(project) after_packs = packs(project)
...@@ -292,6 +297,15 @@ RSpec.describe GitGarbageCollectWorker do ...@@ -292,6 +297,15 @@ RSpec.describe GitGarbageCollectWorker do
subject.perform(project.id, 'gc', lease_key, lease_uuid) subject.perform(project.id, 'gc', lease_key, lease_uuid)
end end
it 'prune calls garbage_collect with the option prune: true' do
expect_any_instance_of(Gitlab::GitalyClient::RepositoryService)
.to receive(:garbage_collect)
.with(bitmaps_enabled, prune: true)
.and_return(nil)
subject.perform(project.id, 'prune', lease_key, lease_uuid)
end
end end
context 'with bitmaps enabled' do context 'with bitmaps enabled' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment