Commit 1d39190d authored by Stan Hu's avatar Stan Hu

Merge branch '5717-geo-housekeeping-git' into 'master'

Resolve "Geo: enable housekeeping functionality when syncing repositories"

Closes #5717

See merge request gitlab-org/gitlab-ee!5461
parents 823ecdec 20fb4b74
......@@ -60,8 +60,24 @@ class Geo::ProjectRegistry < Geo::BaseRegistry
project.wiki_enabled? && (never_synced_wiki? || wiki_sync_needed?(scheduled_time))
end
def syncs_since_gc
Gitlab::Redis::SharedState.with { |redis| redis.get(fetches_since_gc_redis_key).to_i }
end
def increment_syncs_since_gc!
Gitlab::Redis::SharedState.with { |redis| redis.incr(fetches_since_gc_redis_key) }
end
def reset_syncs_since_gc!
Gitlab::Redis::SharedState.with { |redis| redis.del(fetches_since_gc_redis_key) }
end
private
def fetches_since_gc_redis_key
"projects/#{project.id}/fetches_since_gc"
end
def never_synced_repository?
last_repository_synced_at.nil?
end
......
# Geo::ProjectHousekeepingService class
#
# Used for git housekeeping in Geo Secondary node
#
# Ex.
# Geo::ProjectHousekeepingService.new(project).execute
#
module Geo
class ProjectHousekeepingService < BaseService
LEASE_TIMEOUT = 24.hours
attr_reader :project
def initialize(project)
@project = project
end
def execute
increment!
do_housekeeping if needed?
end
def needed?
syncs_since_gc > 0 && period_match? && housekeeping_enabled?
end
def registry
@registry ||= Geo::ProjectRegistry.find_or_initialize_by(project_id: project.id)
end
def increment!
Gitlab::Metrics.measure(:geo_increment_syncs_since_gc) do
registry.increment_syncs_since_gc!
end
end
private
def do_housekeeping
lease_uuid = try_obtain_lease
return false unless lease_uuid.present?
execute_gitlab_shell_gc(lease_uuid)
end
def execute_gitlab_shell_gc(lease_uuid)
GitGarbageCollectWorker.perform_async(project.id, task, lease_key, lease_uuid)
ensure
if should_reset?
Gitlab::Metrics.measure(:geo_reset_syncs_since_gc) do
registry.reset_syncs_since_gc!
end
end
end
def try_obtain_lease
Gitlab::Metrics.measure(:geo_obtain_housekeeping_lease) do
lease = ::Gitlab::ExclusiveLease.new(lease_key, timeout: LEASE_TIMEOUT)
lease.try_obtain
end
end
def should_reset?
syncs_since_gc >= gc_period
end
def lease_key
"geo_project_housekeeping:#{project.id}"
end
def syncs_since_gc
registry.syncs_since_gc
end
def task
if syncs_since_gc % gc_period == 0
:gc
elsif syncs_since_gc % full_repack_period == 0
:full_repack
elsif syncs_since_gc % repack_period == 0
:incremental_repack
end
end
def period_match?
task.present?
end
def housekeeping_enabled?
Gitlab::CurrentSettings.housekeeping_enabled
end
def gc_period
Gitlab::CurrentSettings.housekeeping_gc_period
end
def full_repack_period
Gitlab::CurrentSettings.housekeeping_full_repack_period
end
def repack_period
Gitlab::CurrentSettings.housekeeping_incremental_repack_period
end
end
end
......@@ -29,6 +29,7 @@ module Geo
ensure
clean_up_temporary_repository if redownload
expire_repository_caches
execute_housekeeping
end
def mark_sync_as_successful
......@@ -66,5 +67,9 @@ module Geo
def schedule_repack
GitGarbageCollectWorker.perform_async(@project.id, :full_repack, lease_key)
end
def execute_housekeeping
Geo::ProjectHousekeepingService.new(project).execute
end
end
end
---
title: 'Geo: enable housekeeping functionality when syncing repositories'
merge_request: 5461
author:
type: added
......@@ -210,4 +210,44 @@ describe Geo::ProjectRegistry do
end
end
end
context 'redis shared state', :redis do
after do
subject.reset_syncs_since_gc!
end
describe '#syncs_since_gc' do
context 'without any sync' do
it 'returns 0' do
expect(subject.syncs_since_gc).to eq(0)
end
end
context 'with a number of syncs' do
it 'returns the number of syncs' do
2.times { Geo::ProjectHousekeepingService.new(project).increment! }
expect(subject.syncs_since_gc).to eq(2)
end
end
end
describe '#increment_syncs_since_gc' do
it 'increments the number of syncs since the last GC' do
3.times { subject.increment_syncs_since_gc! }
expect(subject.syncs_since_gc).to eq(3)
end
end
describe '#reset_syncs_since_gc' do
it 'resets the number of syncs since the last GC' do
3.times { subject.increment_syncs_since_gc! }
subject.reset_syncs_since_gc!
expect(subject.syncs_since_gc).to eq(0)
end
end
end
end
require 'spec_helper'
describe Geo::ProjectHousekeepingService do
subject(:service) { described_class.new(project) }
set(:project) { create(:project, :repository) }
let(:registry) { service.registry }
before do
registry.reset_syncs_since_gc!
end
after do
registry.reset_syncs_since_gc!
end
describe '#execute' do
it 'executes housekeeping when conditions are fulfilled' do
allow(service).to receive(:needed?) { true }
expect(service).to receive(:do_housekeeping)
service.execute
end
it 'does not execute housekeeping when conditions are not fulfilled' do
allow(service).to receive(:needed?) { false }
expect(service).not_to receive(:do_housekeeping)
service.execute
end
it 'resets counter when syncs_since_gc > gc_period' do
allow(service).to receive(:gc_period).and_return(1)
allow(service).to receive(:try_obtain_lease).and_return(:the_uuid)
service.increment!
Sidekiq::Testing.inline! do
expect { service.execute }.to change { registry.syncs_since_gc }.to(0)
end
end
context 'task type' do
it 'goes through all three housekeeping tasks, executing only the highest task when there is overlap' do
allow(service).to receive(:lease_key).and_return(:the_lease_key)
allow(service).to receive(:try_obtain_lease).and_return(:the_uuid)
# At fetch 200
expect(GitGarbageCollectWorker).to receive(:perform_async).with(project.id, :gc, :the_lease_key, :the_uuid)
.exactly(1).times
# At fetch 50, 100, 150
expect(GitGarbageCollectWorker).to receive(:perform_async).with(project.id, :full_repack, :the_lease_key, :the_uuid)
.exactly(3).times
# At fetch 10, 20, ... (except those above)
expect(GitGarbageCollectWorker).to receive(:perform_async).with(project.id, :incremental_repack, :the_lease_key, :the_uuid)
.exactly(16).times
201.times do
service.execute
end
expect(registry.syncs_since_gc).to eq(1)
end
end
end
describe 'do_housekeeping' do
context 'when no lease can be obtained' do
before do
expect(service).to receive(:try_obtain_lease).and_return(false)
end
it 'does not enqueue a job' do
expect(GitGarbageCollectWorker).not_to receive(:perform_async)
expect(service.send(:do_housekeeping)).to be_falsey
end
it 'does not reset syncs_since_gc' do
allow(service).to receive(:try_obtain_lease).and_return(false)
allow(service).to receive(:increment!)
expect { service.send(:do_housekeeping) }.not_to change { registry.syncs_since_gc }
end
end
it 'enqueues a sidekiq job' do
expect(service).to receive(:try_obtain_lease).and_return(:the_uuid)
expect(service).to receive(:lease_key).and_return(:the_lease_key)
expect(service).to receive(:task).and_return(:incremental_repack)
expect(GitGarbageCollectWorker).to receive(:perform_async).with(project.id, :incremental_repack, :the_lease_key, :the_uuid).and_call_original
Sidekiq::Testing.fake! do
expect { service.send(:do_housekeeping) }.to change(GitGarbageCollectWorker.jobs, :size).by(1)
end
end
end
describe '#needed?' do
it 'when the count is low enough' do
expect(service.needed?).to eq(false)
end
it 'when the count is high enough' do
allow(registry).to receive(:syncs_since_gc).and_return(10)
expect(service.needed?).to eq(true)
end
end
describe '#increment!' do
it 'increments the syncs_since_gc counter' do
expect { service.increment! }.to change { registry.syncs_since_gc }.by(1)
end
end
describe '#registry' do
it 'returns a Geo::ProjectRegistry linked to current project' do
expect(registry).to be_a(Geo::ProjectRegistry)
expect(registry.project_id).to eq(project.id)
end
end
end
......@@ -7,7 +7,7 @@ describe Geo::RepositorySyncService do
set(:secondary) { create(:geo_node) }
let(:lease) { double(try_obtain: true) }
let(:project) { create(:project_empty_repo) }
set(:project) { create(:project_empty_repo) }
let(:repository) { project.repository }
subject { described_class.new(project) }
......@@ -354,4 +354,18 @@ describe Geo::RepositorySyncService do
end
end
end
context 'repository housekeeping' do
let(:registry) { Geo::ProjectRegistry.find_or_initialize_by(project_id: project.id) }
it 'increases sync count after execution' do
expect { subject.execute }.to change { registry.syncs_since_gc }.by(1)
end
it 'initiate housekeeping at end of execution' do
expect_any_instance_of(Geo::ProjectHousekeepingService).to receive(:execute)
subject.execute
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment