Commit d2a5caed authored by Gabriel Mazetto's avatar Gabriel Mazetto

Added git housekeeping for Geo sync

parent 625caf57
......@@ -60,8 +60,24 @@ class Geo::ProjectRegistry < Geo::BaseRegistry
project.wiki_enabled? && (never_synced_wiki? || wiki_sync_needed?(scheduled_time))
end
def syncs_since_gc
Gitlab::Redis::SharedState.with { |redis| redis.get(syncs_since_gc_redis_shared_state_key).to_i }
end
def increment_syncs_since_gc
Gitlab::Redis::SharedState.with { |redis| redis.incr(syncs_since_gc_redis_shared_state_key) }
end
def reset_syncs_since_gc
Gitlab::Redis::SharedState.with { |redis| redis.del(syncs_since_gc_redis_shared_state_key) }
end
private
def syncs_since_gc_redis_shared_state_key
"projects/#{id}/syncs_since_gc"
end
def never_synced_repository?
last_repository_synced_at.nil?
end
......
# Geo::ProjectHousekeepingService class
#
# Used for git housekeeping in Geo Secondary node
#
# Ex.
# Geo::ProjectHousekeepingService.new(project).execute
#
module Geo
class ProjectHousekeepingService < BaseService
# Timeout set to 24h
LEASE_TIMEOUT = 86400
attr_reader :project
def initialize(project)
@project = project
end
def execute
lease_uuid = try_obtain_lease
return false unless lease_uuid.present?
yield if block_given?
execute_gitlab_shell_gc(lease_uuid)
end
def needed?
syncs_since_gc > 0 && period_match? && housekeeping_enabled?
end
def increment!
Gitlab::Metrics.measure(:geo_increment_syncs_since_gc) do
registry.increment_syncs_since_gc
end
end
def registry
@registry ||= Geo::ProjectRegistry.find_or_initialize_by(project_id: project.id)
end
private
def execute_gitlab_shell_gc(lease_uuid)
GitGarbageCollectWorker.perform_async(project.id, task, lease_key, lease_uuid)
ensure
if syncs_since_gc >= gc_period
Gitlab::Metrics.measure(:geo_reset_syncs_since_gc) do
registry.reset_syncs_since_gc
end
end
end
def try_obtain_lease
Gitlab::Metrics.measure(:geo_obtain_housekeeping_lease) do
lease = ::Gitlab::ExclusiveLease.new(lease_key, timeout: LEASE_TIMEOUT)
lease.try_obtain
end
end
def lease_key
"geo_project_housekeeping:#{project.id}"
end
def syncs_since_gc
registry.syncs_since_gc
end
def task
if syncs_since_gc % gc_period == 0
:gc
elsif syncs_since_gc % full_repack_period == 0
:full_repack
else
:incremental_repack
end
end
def period_match?
[gc_period, full_repack_period, repack_period].any? { |period| syncs_since_gc % period == 0 }
end
def housekeeping_enabled?
Gitlab::CurrentSettings.housekeeping_enabled
end
def gc_period
Gitlab::CurrentSettings.housekeeping_gc_period
end
def full_repack_period
Gitlab::CurrentSettings.housekeeping_full_repack_period
end
def repack_period
Gitlab::CurrentSettings.housekeeping_incremental_repack_period
end
end
end
......@@ -31,6 +31,8 @@ module Geo
Geo::RepositorySyncService.new(project).execute if registry.repository_sync_due?(scheduled_time)
Geo::WikiSyncService.new(project).execute if registry.wiki_sync_due?(scheduled_time)
execute_housekeeping(project)
end
private
......@@ -53,5 +55,11 @@ module Geo
log_info("#{success ? 'Successfully marked' : 'Failed to mark'} disabled wiki as synced", registry_id: registry.id, project_id: registry.project_id)
end
end
def execute_housekeeping(project)
housekeeping = Geo::ProjectHousekeepingService.new(project)
housekeeping.increment!
housekeeping.execute if housekeeping.needed?
end
end
end
......@@ -210,4 +210,44 @@ describe Geo::ProjectRegistry do
end
end
end
context 'redis shared state', :redis do
after do
subject.reset_syncs_since_gc
end
describe '#syncs_since_gc' do
context 'without any sync' do
it 'returns 0' do
expect(subject.syncs_since_gc).to eq(0)
end
end
context 'with a number of syncs' do
it 'returns the number of pushes' do
2.times { Geo::ProjectHousekeepingService.new(project).increment! }
expect(subject.syncs_since_gc).to eq(2)
end
end
end
describe '#increment_syncs_since_gc' do
it 'increments the number of pushes since the last GC' do
3.times { subject.increment_syncs_since_gc }
expect(subject.syncs_since_gc).to eq(3)
end
end
describe '#reset_syncs_since_gc' do
it 'resets the number of pushes since the last GC' do
3.times { subject.increment_syncs_since_gc }
subject.reset_syncs_since_gc
expect(subject.syncs_since_gc).to eq(0)
end
end
end
end
require 'spec_helper'
describe Geo::ProjectHousekeepingService do
subject { described_class.new(project) }
set(:project) { create(:project, :repository) }
let(:registry) { subject.registry }
before do
registry.reset_syncs_since_gc
end
after do
registry.reset_syncs_since_gc
end
describe '#execute' do
it 'enqueues a sidekiq job' do
expect(subject).to receive(:try_obtain_lease).and_return(:the_uuid)
expect(subject).to receive(:lease_key).and_return(:the_lease_key)
expect(subject).to receive(:task).and_return(:incremental_repack)
expect(GitGarbageCollectWorker).to receive(:perform_async).with(project.id, :incremental_repack, :the_lease_key, :the_uuid).and_call_original
Sidekiq::Testing.fake! do
expect { subject.execute }.to change(GitGarbageCollectWorker.jobs, :size).by(1)
end
end
it 'yields the block if given' do
expect do |block|
subject.execute(&block)
end.to yield_with_no_args
end
it 'resets counter when syncs_since_gc > gc_period' do
expect(subject).to receive(:try_obtain_lease).and_return(:the_uuid)
allow(subject).to receive(:gc_period).and_return(1)
registry.increment_syncs_since_gc
Sidekiq::Testing.inline! do
expect { subject.execute }.to change { registry.syncs_since_gc }.to(0)
end
end
context 'when no lease can be obtained' do
before do
expect(subject).to receive(:try_obtain_lease).and_return(false)
end
it 'does not enqueue a job' do
expect(GitGarbageCollectWorker).not_to receive(:perform_async)
expect(subject.execute).to be_falsey
end
it 'does not reset syncs_since_gc' do
expect { subject.execute }.not_to change { registry.syncs_since_gc }
end
it 'does not yield' do
expect { |block| subject.execute(&block) }.not_to yield_with_no_args
end
end
context 'task type' do
it 'goes through all three housekeeping tasks, executing only the highest task when there is overlap' do
allow(subject).to receive(:try_obtain_lease).and_return(:the_uuid)
allow(subject).to receive(:lease_key).and_return(:the_lease_key)
# At push 200
expect(GitGarbageCollectWorker).to receive(:perform_async).with(project.id, :gc, :the_lease_key, :the_uuid)
.exactly(1).times
# At push 50, 100, 150
expect(GitGarbageCollectWorker).to receive(:perform_async).with(project.id, :full_repack, :the_lease_key, :the_uuid)
.exactly(3).times
# At push 10, 20, ... (except those above)
expect(GitGarbageCollectWorker).to receive(:perform_async).with(project.id, :incremental_repack, :the_lease_key, :the_uuid)
.exactly(16).times
201.times do
subject.increment!
subject.execute if subject.needed?
end
expect(registry.syncs_since_gc).to eq(1)
end
end
end
describe '#needed?' do
it 'when the count is low enough' do
expect(subject.needed?).to eq(false)
end
it 'when the count is high enough' do
allow(registry).to receive(:syncs_since_gc).and_return(10)
expect(subject.needed?).to eq(true)
end
end
describe '#increment!' do
it 'increments the syncs_since_gc counter' do
expect { subject.increment! }.to change { registry.syncs_since_gc }.by(1)
end
end
describe '#registry' do
it 'returns a Geo::ProjectRegistry linked to current project' do
expect(registry).to be_a(Geo::ProjectRegistry)
expect(registry.project_id).to eq(project.id)
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment