Commit 1a51afbb authored by Douglas Barbosa Alexandre's avatar Douglas Barbosa Alexandre

Merge branch 'jc-geo-dupe' into 'master'

Create pool repositories on Geo secondaries

See merge request gitlab-org/gitlab-ee!9428
parents 377f21ab 2c5c32c4
......@@ -116,3 +116,5 @@ class PoolRepository < ActiveRecord::Base
.new(self, prefix: Storage::HashedProject::POOL_PATH_PREFIX)
end
end
PoolRepository.prepend(EE::PoolRepository)
# frozen_string_literal: true
module EE
# PoolRepository EE mixin
#
# This module is intended to encapsulate EE-specific model logic
# and be prepended in the `PoolRepository` model
module PoolRepository
extend ActiveSupport::Concern
prepended do
delegate :repository, to: :source_project, prefix: true, allow_nil: true
end
end
end
......@@ -198,6 +198,15 @@ module EE
super && !shared_runners_limit_namespace.shared_runners_minutes_used?
end
def link_pool_repository
super
repository.log_geo_updated_event
end
def object_pool_missing?
has_pool_repository? && !pool_repository.object_pool.exists?
end
def shared_runners_minutes_limit_enabled?
!public? && shared_runners_enabled? &&
shared_runners_limit_namespace.shared_runners_minutes_limit_enabled?
......
......@@ -21,6 +21,7 @@ module Geo
def initialize(project)
@project = project
@new_repository = false
end
def execute
......@@ -53,13 +54,13 @@ module Geo
if redownload?
redownload_repository
schedule_repack
@new_repository = true
elsif repository.exists?
fetch_geo_mirror(repository)
else
ensure_repository
fetch_geo_mirror(repository)
schedule_repack
@new_repository = true
end
end
......@@ -67,10 +68,6 @@ module Geo
registry.should_be_redownloaded?(type)
end
def schedule_repack
raise NotImplementedError
end
def redownload_repository
log_info("Redownloading #{type}")
......@@ -263,5 +260,9 @@ module Geo
File.dirname(disk_path)
)
end
def new_repository?
@new_repository
end
end
end
# frozen_string_literal: true
module Geo
class CreateObjectPoolService
include ExclusiveLeaseGuard
include Gitlab::Geo::LogHelpers
LEASE_TIMEOUT = 1.hour.freeze
LEASE_KEY_PREFIX = 'object_pool:create'.freeze
attr_reader :pool_repository
def initialize(pool_repository)
@pool_repository = pool_repository
end
def execute
try_obtain_lease do
log_info("Creating object pool for pool_#{pool_repository.id}")
pool_repository.create_object_pool
end
end
def lease_key
@lease_key ||= "#{LEASE_KEY_PREFIX}:#{pool_repository.id}"
end
def lease_timeout
LEASE_TIMEOUT
end
end
end
......@@ -11,9 +11,12 @@ module Geo
class ProjectHousekeepingService < BaseService
LEASE_TIMEOUT = 24.hours
attr_reader :project
attr_reader :pool_repository
def initialize(project)
def initialize(project, new_repository: false)
@project = project
@pool_repository = project.pool_repository
@new_repository = new_repository
end
def execute
......@@ -22,7 +25,7 @@ module Geo
end
def needed?
syncs_since_gc > 0 && period_match? && housekeeping_enabled?
new_repository? || (syncs_since_gc > 0 && period_match? && housekeeping_enabled?)
end
# rubocop: disable CodeReuse/ActiveRecord
......@@ -43,6 +46,8 @@ module Geo
lease_uuid = try_obtain_lease
return false unless lease_uuid.present?
create_object_pool_on_secondary if create_object_pool_on_secondary?
execute_gitlab_shell_gc(lease_uuid)
end
......@@ -75,18 +80,30 @@ module Geo
registry.syncs_since_gc
end
def new_repository?
@new_repository
end
def should_repack?
syncs_since_gc % full_repack_period == 0
end
def should_gc?
syncs_since_gc % gc_period == 0
end
def should_incremental_repack?
syncs_since_gc % repack_period == 0
end
def task
if syncs_since_gc % gc_period == 0
:gc
elsif syncs_since_gc % full_repack_period == 0
:full_repack
elsif syncs_since_gc % repack_period == 0
:incremental_repack
end
return :gc if new_repository? || should_gc?
return :full_repack if should_repack?
return :incremental_repack if should_incremental_repack?
end
def period_match?
task.present?
should_incremental_repack? || should_repack? || should_gc?
end
def housekeeping_enabled?
......@@ -104,5 +121,17 @@ module Geo
def repack_period
Gitlab::CurrentSettings.housekeeping_incremental_repack_period
end
def create_object_pool_on_secondary
Geo::CreateObjectPoolService.new(pool_repository).execute
end
def create_object_pool_on_secondary?
return unless ::Gitlab::Geo.secondary?
return unless project.object_pool_missing?
return unless pool_repository.source_project_repository.exists?
true
end
end
end
......@@ -54,12 +54,8 @@ module Geo
end
end
def schedule_repack
GitGarbageCollectWorker.perform_async(@project.id, :full_repack, lease_key)
end
def execute_housekeeping
Geo::ProjectHousekeepingService.new(project).execute
Geo::ProjectHousekeepingService.new(project, new_repository: new_repository?).execute
end
end
end
......@@ -42,10 +42,5 @@ module Geo
log_info('Expiring caches')
repository.after_sync
end
def schedule_repack
# No-op: we currently don't schedule wiki repository to repack
# TODO: https://gitlab.com/gitlab-org/gitlab-ce/issues/45523
end
end
end
---
title: Create pool repositories on Geo secondaries
merge_request: 9428
author:
type: added
......@@ -1648,6 +1648,54 @@ describe Project do
end
end
describe '#has_pool_repository?' do
it 'returns false when there is no pool repository' do
project = create(:project)
expect(project.has_pool_repository?).to be false
end
it 'returns true when there is a pool repository' do
pool = create(:pool_repository, :ready)
project = create(:project, pool_repository: pool)
expect(project.has_pool_repository?).to be true
end
end
describe '#link_pool_repository' do
let(:project) { create(:project, :repository) }
subject { project.link_pool_repository }
it 'logs geo event' do
expect(project.repository).to receive(:log_geo_updated_event)
subject
end
end
describe '#object_pool_missing?' do
let(:pool) { create(:pool_repository, :ready) }
subject { create(:project, :repository, pool_repository: pool) }
it 'returns true when object pool is missing' do
allow(pool.object_pool).to receive(:exists?).and_return(false)
expect(subject.object_pool_missing?).to be true
end
it "returns false when pool repository doesnt't exist" do
allow(subject).to receive(:has_pool_repository?).and_return(false)
expect(subject.object_pool_missing?).to be false
end
it 'returns false when object pool exists' do
expect(subject.object_pool_missing?).to be false
end
end
# Despite stubbing the current node as the primary or secondary, the
# behaviour for EE::Project#lfs_http_url_to_repo() is to call
# Project#lfs_http_url_to_repo() which does not have a Geo context.
......
require 'spec_helper'
describe Geo::ProjectHousekeepingService do
include ExclusiveLeaseHelpers
include ::EE::GeoHelpers
subject(:service) { described_class.new(project) }
set(:project) { create(:project, :repository) }
let(:registry) { service.registry }
......@@ -43,7 +46,7 @@ describe Geo::ProjectHousekeepingService do
context 'task type' do
it 'goes through all three housekeeping tasks, executing only the highest task when there is overlap' do
allow(service).to receive(:lease_key).and_return(:the_lease_key)
allow(service).to receive(:try_obtain_lease).and_return(:the_uuid)
stub_exclusive_lease(:the_lease_key, :the_uuid)
# At fetch 200
expect(GitGarbageCollectWorker).to receive(:perform_async).with(project.id, :gc, :the_lease_key, :the_uuid)
......@@ -62,6 +65,31 @@ describe Geo::ProjectHousekeepingService do
expect(registry.syncs_since_gc).to eq(1)
end
end
context 'new repository' do
subject(:service) { described_class.new(project, new_repository: true) }
it 'runs gc for a new repository' do
allow(service).to receive(:lease_key).and_return(:the_lease_key)
stub_exclusive_lease(:the_lease_key, :the_uuid)
expect(GitGarbageCollectWorker).to receive(:perform_async).with(project.id, :gc, :the_lease_key, :the_uuid).once
service.execute
end
end
context 'non-new repository' do
subject(:service) { described_class.new(project, new_repository: false) }
it 'does not run gc for a non-new repository' do
stub_exclusive_lease(:the_lease_key, :the_uuid)
expect(GitGarbageCollectWorker).not_to receive(:perform_async)
service.execute
end
end
end
describe 'do_housekeeping' do
......@@ -94,6 +122,12 @@ describe Geo::ProjectHousekeepingService do
expect { service.send(:do_housekeeping) }.to change(GitGarbageCollectWorker.jobs, :size).by(1)
end
end
it 'does not create object pool' do
expect(project).not_to receive(:create_object_pool_on_secondary)
service.send(:do_housekeeping)
end
end
describe '#needed?' do
......@@ -105,6 +139,12 @@ describe Geo::ProjectHousekeepingService do
allow(registry).to receive(:syncs_since_gc).and_return(10)
expect(service.needed?).to eq(true)
end
it 'when its a new repository' do
service = described_class.new(project, new_repository: true)
expect(service.needed?).to eq(true)
end
end
describe '#increment!' do
......@@ -119,4 +159,43 @@ describe Geo::ProjectHousekeepingService do
expect(registry.project_id).to eq(project.id)
end
end
describe '#create_object_pool_on_secondary' do
let(:pool) { create(:pool_repository, :ready) }
let(:project) { create(:project, pool_repository: pool) }
let!(:secondary) { create(:geo_node) }
before do
allow(subject).to receive(:needed?) { true }
allow(subject).to receive(:task) { :gc }
allow(subject).to receive(:try_obtain_lease).and_return(:the_uuid)
allow(subject).to receive(:execute_gitlab_shell_gc).and_return(nil)
stub_current_geo_node(secondary)
end
it 'creates the object pool when source project repository exists' do
allow(project.pool_repository.source_project_repository).to receive(:exists?).and_return(true)
allow(project).to receive(:object_pool_missing?).and_return(true)
expect_any_instance_of(Geo::CreateObjectPoolService).to receive(:execute)
subject.execute
end
it "doesn't create the object pool when the object pool exists" do
allow(project).to receive(:object_pool_missing?).and_return(false)
expect_any_instance_of(Geo::CreateObjectPoolService).not_to receive(:execute)
subject.execute
end
it "doesn't create the object pool when the source project repository doesn't exist" do
allow(project.pool_repository.source_project_repository).to receive(:exists?).and_return(false)
expect_any_instance_of(Geo::CreateObjectPoolService).not_to receive(:execute)
subject.execute
end
end
end
......@@ -19,7 +19,7 @@ describe Geo::RepositorySyncService do
end
it_behaves_like 'geo base sync execution'
it_behaves_like 'geo base sync fetch and repack'
it_behaves_like 'geo base sync fetch'
it_behaves_like 'reschedules sync due to race condition instead of waiting for backfill'
describe '#execute' do
......@@ -36,6 +36,9 @@ describe Geo::RepositorySyncService do
.to receive(:find_remote_root_ref)
.with('geo')
.and_return('master')
allow_any_instance_of(Geo::ProjectHousekeepingService).to receive(:execute)
.and_return(nil)
end
it 'fetches project repository with JWT credentials' do
......@@ -415,14 +418,6 @@ describe Geo::RepositorySyncService do
end
end
describe '#schedule_repack' do
it 'schedule GitGarbageCollectWorker for full repack' do
Sidekiq::Testing.fake! do
expect { subject.send(:schedule_repack) }.to change { GitGarbageCollectWorker.jobs.count }.by(1)
end
end
end
context 'repository housekeeping' do
let(:registry) { Geo::ProjectRegistry.find_or_initialize_by(project_id: project.id) }
......@@ -436,4 +431,38 @@ describe Geo::RepositorySyncService do
subject.execute
end
end
context 'when the repository is redownloaded' do
before do
allow(subject).to receive(:redownload?).and_return(true)
allow(subject).to receive(:redownload_repository).and_return(nil)
end
it "indicates the repository is new" do
expect(Geo::ProjectHousekeepingService).to receive(:new).with(project, new_repository: true).and_call_original
subject.execute
end
end
context 'when repository did not exist' do
before do
allow(repository).to receive(:exists?).and_return(false)
allow(subject).to receive(:fetch_geo_mirror).and_return(nil)
end
it "indicates the repository is new" do
expect(Geo::ProjectHousekeepingService).to receive(:new).with(project, new_repository: true).and_call_original
subject.execute
end
end
context 'when repository already existed' do
it "indicates the repository is not new" do
expect(Geo::ProjectHousekeepingService).to receive(:new).with(project, new_repository: false).and_call_original
subject.execute
end
end
end
......@@ -19,7 +19,7 @@ RSpec.describe Geo::WikiSyncService do
end
it_behaves_like 'geo base sync execution'
it_behaves_like 'geo base sync fetch and repack'
it_behaves_like 'geo base sync fetch'
it_behaves_like 'reschedules sync due to race condition instead of waiting for backfill'
describe '#execute' do
......
......@@ -48,7 +48,7 @@ shared_examples 'cleans temporary repositories' do
end
end
shared_examples 'geo base sync fetch and repack' do
shared_examples 'geo base sync fetch' do
describe '#fetch_repository' do
let(:fetch_repository) { subject.send(:fetch_repository) }
......@@ -85,12 +85,6 @@ shared_examples 'geo base sync fetch and repack' do
fetch_repository
end
it 'schedule git repack' do
is_expected.to receive(:schedule_repack)
fetch_repository
end
end
end
end
......
......@@ -225,6 +225,7 @@ describe Geo::RepositoryShardSyncWorker, :geo, :delete, :clean_gitlab_redis_cach
allow_any_instance_of(Project).to receive(:ensure_repository).and_raise(Gitlab::Shell::Error.new('foo'))
allow_any_instance_of(Geo::ProjectRegistry).to receive(:wiki_sync_due?).and_return(false)
allow_any_instance_of(Geo::RepositorySyncService).to receive(:expire_repository_caches)
allow_any_instance_of(Geo::ProjectHousekeepingService).to receive(:do_housekeeping)
end
it 'tries to sync every project' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment