Commit 2c5c32c4 authored by John Cai's avatar John Cai

Create object pools on geo secondaries

When a project with a pool repository is synced, we want the object pool
to be created, and the project to be linked to the object pool.
parent 4d0dce5d
...@@ -116,3 +116,5 @@ class PoolRepository < ActiveRecord::Base ...@@ -116,3 +116,5 @@ class PoolRepository < ActiveRecord::Base
.new(self, prefix: Storage::HashedProject::POOL_PATH_PREFIX) .new(self, prefix: Storage::HashedProject::POOL_PATH_PREFIX)
end end
end end
PoolRepository.prepend(EE::PoolRepository)
# frozen_string_literal: true
module EE
# PoolRepository EE mixin
#
# This module is intended to encapsulate EE-specific model logic
# and be prepended in the `PoolRepository` model
module PoolRepository
extend ActiveSupport::Concern
prepended do
delegate :repository, to: :source_project, prefix: true, allow_nil: true
end
end
end
...@@ -197,6 +197,15 @@ module EE ...@@ -197,6 +197,15 @@ module EE
super && !shared_runners_limit_namespace.shared_runners_minutes_used? super && !shared_runners_limit_namespace.shared_runners_minutes_used?
end end
def link_pool_repository
super
repository.log_geo_updated_event
end
def object_pool_missing?
has_pool_repository? && !pool_repository.object_pool.exists?
end
def shared_runners_minutes_limit_enabled? def shared_runners_minutes_limit_enabled?
!public? && shared_runners_enabled? && !public? && shared_runners_enabled? &&
shared_runners_limit_namespace.shared_runners_minutes_limit_enabled? shared_runners_limit_namespace.shared_runners_minutes_limit_enabled?
......
...@@ -21,6 +21,7 @@ module Geo ...@@ -21,6 +21,7 @@ module Geo
def initialize(project) def initialize(project)
@project = project @project = project
@new_repository = false
end end
def execute def execute
...@@ -53,13 +54,13 @@ module Geo ...@@ -53,13 +54,13 @@ module Geo
if redownload? if redownload?
redownload_repository redownload_repository
schedule_repack @new_repository = true
elsif repository.exists? elsif repository.exists?
fetch_geo_mirror(repository) fetch_geo_mirror(repository)
else else
ensure_repository ensure_repository
fetch_geo_mirror(repository) fetch_geo_mirror(repository)
schedule_repack @new_repository = true
end end
end end
...@@ -67,10 +68,6 @@ module Geo ...@@ -67,10 +68,6 @@ module Geo
registry.should_be_redownloaded?(type) registry.should_be_redownloaded?(type)
end end
def schedule_repack
raise NotImplementedError
end
def redownload_repository def redownload_repository
log_info("Redownloading #{type}") log_info("Redownloading #{type}")
...@@ -263,5 +260,9 @@ module Geo ...@@ -263,5 +260,9 @@ module Geo
File.dirname(disk_path) File.dirname(disk_path)
) )
end end
def new_repository?
@new_repository
end
end end
end end
# frozen_string_literal: true
module Geo
class CreateObjectPoolService
include ExclusiveLeaseGuard
include Gitlab::Geo::LogHelpers
LEASE_TIMEOUT = 1.hour.freeze
LEASE_KEY_PREFIX = 'object_pool:create'.freeze
attr_reader :pool_repository
def initialize(pool_repository)
@pool_repository = pool_repository
end
def execute
try_obtain_lease do
log_info("Creating object pool for pool_#{pool_repository.id}")
pool_repository.create_object_pool
end
end
def lease_key
@lease_key ||= "#{LEASE_KEY_PREFIX}:#{pool_repository.id}"
end
def lease_timeout
LEASE_TIMEOUT
end
end
end
...@@ -11,9 +11,12 @@ module Geo ...@@ -11,9 +11,12 @@ module Geo
class ProjectHousekeepingService < BaseService class ProjectHousekeepingService < BaseService
LEASE_TIMEOUT = 24.hours LEASE_TIMEOUT = 24.hours
attr_reader :project attr_reader :project
attr_reader :pool_repository
def initialize(project) def initialize(project, new_repository: false)
@project = project @project = project
@pool_repository = project.pool_repository
@new_repository = new_repository
end end
def execute def execute
...@@ -22,7 +25,7 @@ module Geo ...@@ -22,7 +25,7 @@ module Geo
end end
def needed? def needed?
syncs_since_gc > 0 && period_match? && housekeeping_enabled? new_repository? || (syncs_since_gc > 0 && period_match? && housekeeping_enabled?)
end end
# rubocop: disable CodeReuse/ActiveRecord # rubocop: disable CodeReuse/ActiveRecord
...@@ -43,6 +46,8 @@ module Geo ...@@ -43,6 +46,8 @@ module Geo
lease_uuid = try_obtain_lease lease_uuid = try_obtain_lease
return false unless lease_uuid.present? return false unless lease_uuid.present?
create_object_pool_on_secondary if create_object_pool_on_secondary?
execute_gitlab_shell_gc(lease_uuid) execute_gitlab_shell_gc(lease_uuid)
end end
...@@ -75,18 +80,30 @@ module Geo ...@@ -75,18 +80,30 @@ module Geo
registry.syncs_since_gc registry.syncs_since_gc
end end
def new_repository?
@new_repository
end
def should_repack?
syncs_since_gc % full_repack_period == 0
end
def should_gc?
syncs_since_gc % gc_period == 0
end
def should_incremental_repack?
syncs_since_gc % repack_period == 0
end
def task def task
if syncs_since_gc % gc_period == 0 return :gc if new_repository? || should_gc?
:gc return :full_repack if should_repack?
elsif syncs_since_gc % full_repack_period == 0 return :incremental_repack if should_incremental_repack?
:full_repack
elsif syncs_since_gc % repack_period == 0
:incremental_repack
end
end end
def period_match? def period_match?
task.present? should_incremental_repack? || should_repack? || should_gc?
end end
def housekeeping_enabled? def housekeeping_enabled?
...@@ -104,5 +121,17 @@ module Geo ...@@ -104,5 +121,17 @@ module Geo
def repack_period def repack_period
Gitlab::CurrentSettings.housekeeping_incremental_repack_period Gitlab::CurrentSettings.housekeeping_incremental_repack_period
end end
def create_object_pool_on_secondary
Geo::CreateObjectPoolService.new(pool_repository).execute
end
def create_object_pool_on_secondary?
return unless ::Gitlab::Geo.secondary?
return unless project.object_pool_missing?
return unless pool_repository.source_project_repository.exists?
true
end
end end
end end
...@@ -54,12 +54,8 @@ module Geo ...@@ -54,12 +54,8 @@ module Geo
end end
end end
def schedule_repack
GitGarbageCollectWorker.perform_async(@project.id, :full_repack, lease_key)
end
def execute_housekeeping def execute_housekeeping
Geo::ProjectHousekeepingService.new(project).execute Geo::ProjectHousekeepingService.new(project, new_repository: new_repository?).execute
end end
end end
end end
...@@ -42,10 +42,5 @@ module Geo ...@@ -42,10 +42,5 @@ module Geo
log_info('Expiring caches') log_info('Expiring caches')
repository.after_sync repository.after_sync
end end
def schedule_repack
# No-op: we currently don't schedule wiki repository to repack
# TODO: https://gitlab.com/gitlab-org/gitlab-ce/issues/45523
end
end end
end end
---
title: Create pool repositories on Geo secondaries
merge_request: 9428
author:
type: added
...@@ -1648,6 +1648,54 @@ describe Project do ...@@ -1648,6 +1648,54 @@ describe Project do
end end
end end
describe '#has_pool_repository?' do
it 'returns false when there is no pool repository' do
project = create(:project)
expect(project.has_pool_repository?).to be false
end
it 'returns true when there is a pool repository' do
pool = create(:pool_repository, :ready)
project = create(:project, pool_repository: pool)
expect(project.has_pool_repository?).to be true
end
end
describe '#link_pool_repository' do
let(:project) { create(:project, :repository) }
subject { project.link_pool_repository }
it 'logs geo event' do
expect(project.repository).to receive(:log_geo_updated_event)
subject
end
end
describe '#object_pool_missing?' do
let(:pool) { create(:pool_repository, :ready) }
subject { create(:project, :repository, pool_repository: pool) }
it 'returns true when object pool is missing' do
allow(pool.object_pool).to receive(:exists?).and_return(false)
expect(subject.object_pool_missing?).to be true
end
it "returns false when pool repository doesnt't exist" do
allow(subject).to receive(:has_pool_repository?).and_return(false)
expect(subject.object_pool_missing?).to be false
end
it 'returns false when object pool exists' do
expect(subject.object_pool_missing?).to be false
end
end
# Despite stubbing the current node as the primary or secondary, the # Despite stubbing the current node as the primary or secondary, the
# behaviour for EE::Project#lfs_http_url_to_repo() is to call # behaviour for EE::Project#lfs_http_url_to_repo() is to call
# Project#lfs_http_url_to_repo() which does not have a Geo context. # Project#lfs_http_url_to_repo() which does not have a Geo context.
......
require 'spec_helper' require 'spec_helper'
describe Geo::ProjectHousekeepingService do describe Geo::ProjectHousekeepingService do
include ExclusiveLeaseHelpers
include ::EE::GeoHelpers
subject(:service) { described_class.new(project) } subject(:service) { described_class.new(project) }
set(:project) { create(:project, :repository) } set(:project) { create(:project, :repository) }
let(:registry) { service.registry } let(:registry) { service.registry }
...@@ -43,7 +46,7 @@ describe Geo::ProjectHousekeepingService do ...@@ -43,7 +46,7 @@ describe Geo::ProjectHousekeepingService do
context 'task type' do context 'task type' do
it 'goes through all three housekeeping tasks, executing only the highest task when there is overlap' do it 'goes through all three housekeeping tasks, executing only the highest task when there is overlap' do
allow(service).to receive(:lease_key).and_return(:the_lease_key) allow(service).to receive(:lease_key).and_return(:the_lease_key)
allow(service).to receive(:try_obtain_lease).and_return(:the_uuid) stub_exclusive_lease(:the_lease_key, :the_uuid)
# At fetch 200 # At fetch 200
expect(GitGarbageCollectWorker).to receive(:perform_async).with(project.id, :gc, :the_lease_key, :the_uuid) expect(GitGarbageCollectWorker).to receive(:perform_async).with(project.id, :gc, :the_lease_key, :the_uuid)
...@@ -62,6 +65,31 @@ describe Geo::ProjectHousekeepingService do ...@@ -62,6 +65,31 @@ describe Geo::ProjectHousekeepingService do
expect(registry.syncs_since_gc).to eq(1) expect(registry.syncs_since_gc).to eq(1)
end end
end end
context 'new repository' do
subject(:service) { described_class.new(project, new_repository: true) }
it 'runs gc for a new repository' do
allow(service).to receive(:lease_key).and_return(:the_lease_key)
stub_exclusive_lease(:the_lease_key, :the_uuid)
expect(GitGarbageCollectWorker).to receive(:perform_async).with(project.id, :gc, :the_lease_key, :the_uuid).once
service.execute
end
end
context 'non-new repository' do
subject(:service) { described_class.new(project, new_repository: false) }
it 'does not run gc for a non-new repository' do
stub_exclusive_lease(:the_lease_key, :the_uuid)
expect(GitGarbageCollectWorker).not_to receive(:perform_async)
service.execute
end
end
end end
describe 'do_housekeeping' do describe 'do_housekeeping' do
...@@ -94,6 +122,12 @@ describe Geo::ProjectHousekeepingService do ...@@ -94,6 +122,12 @@ describe Geo::ProjectHousekeepingService do
expect { service.send(:do_housekeeping) }.to change(GitGarbageCollectWorker.jobs, :size).by(1) expect { service.send(:do_housekeeping) }.to change(GitGarbageCollectWorker.jobs, :size).by(1)
end end
end end
it 'does not create object pool' do
expect(project).not_to receive(:create_object_pool_on_secondary)
service.send(:do_housekeeping)
end
end end
describe '#needed?' do describe '#needed?' do
...@@ -105,6 +139,12 @@ describe Geo::ProjectHousekeepingService do ...@@ -105,6 +139,12 @@ describe Geo::ProjectHousekeepingService do
allow(registry).to receive(:syncs_since_gc).and_return(10) allow(registry).to receive(:syncs_since_gc).and_return(10)
expect(service.needed?).to eq(true) expect(service.needed?).to eq(true)
end end
it 'when its a new repository' do
service = described_class.new(project, new_repository: true)
expect(service.needed?).to eq(true)
end
end end
describe '#increment!' do describe '#increment!' do
...@@ -119,4 +159,43 @@ describe Geo::ProjectHousekeepingService do ...@@ -119,4 +159,43 @@ describe Geo::ProjectHousekeepingService do
expect(registry.project_id).to eq(project.id) expect(registry.project_id).to eq(project.id)
end end
end end
describe '#create_object_pool_on_secondary' do
let(:pool) { create(:pool_repository, :ready) }
let(:project) { create(:project, pool_repository: pool) }
let!(:secondary) { create(:geo_node) }
before do
allow(subject).to receive(:needed?) { true }
allow(subject).to receive(:task) { :gc }
allow(subject).to receive(:try_obtain_lease).and_return(:the_uuid)
allow(subject).to receive(:execute_gitlab_shell_gc).and_return(nil)
stub_current_geo_node(secondary)
end
it 'creates the object pool when source project repository exists' do
allow(project.pool_repository.source_project_repository).to receive(:exists?).and_return(true)
allow(project).to receive(:object_pool_missing?).and_return(true)
expect_any_instance_of(Geo::CreateObjectPoolService).to receive(:execute)
subject.execute
end
it "doesn't create the object pool when the object pool exists" do
allow(project).to receive(:object_pool_missing?).and_return(false)
expect_any_instance_of(Geo::CreateObjectPoolService).not_to receive(:execute)
subject.execute
end
it "doesn't create the object pool when the source project repository doesn't exist" do
allow(project.pool_repository.source_project_repository).to receive(:exists?).and_return(false)
expect_any_instance_of(Geo::CreateObjectPoolService).not_to receive(:execute)
subject.execute
end
end
end end
...@@ -19,7 +19,7 @@ describe Geo::RepositorySyncService do ...@@ -19,7 +19,7 @@ describe Geo::RepositorySyncService do
end end
it_behaves_like 'geo base sync execution' it_behaves_like 'geo base sync execution'
it_behaves_like 'geo base sync fetch and repack' it_behaves_like 'geo base sync fetch'
it_behaves_like 'reschedules sync due to race condition instead of waiting for backfill' it_behaves_like 'reschedules sync due to race condition instead of waiting for backfill'
describe '#execute' do describe '#execute' do
...@@ -36,6 +36,9 @@ describe Geo::RepositorySyncService do ...@@ -36,6 +36,9 @@ describe Geo::RepositorySyncService do
.to receive(:find_remote_root_ref) .to receive(:find_remote_root_ref)
.with('geo') .with('geo')
.and_return('master') .and_return('master')
allow_any_instance_of(Geo::ProjectHousekeepingService).to receive(:execute)
.and_return(nil)
end end
it 'fetches project repository with JWT credentials' do it 'fetches project repository with JWT credentials' do
...@@ -415,14 +418,6 @@ describe Geo::RepositorySyncService do ...@@ -415,14 +418,6 @@ describe Geo::RepositorySyncService do
end end
end end
describe '#schedule_repack' do
it 'schedule GitGarbageCollectWorker for full repack' do
Sidekiq::Testing.fake! do
expect { subject.send(:schedule_repack) }.to change { GitGarbageCollectWorker.jobs.count }.by(1)
end
end
end
context 'repository housekeeping' do context 'repository housekeeping' do
let(:registry) { Geo::ProjectRegistry.find_or_initialize_by(project_id: project.id) } let(:registry) { Geo::ProjectRegistry.find_or_initialize_by(project_id: project.id) }
...@@ -436,4 +431,38 @@ describe Geo::RepositorySyncService do ...@@ -436,4 +431,38 @@ describe Geo::RepositorySyncService do
subject.execute subject.execute
end end
end end
context 'when the repository is redownloaded' do
before do
allow(subject).to receive(:redownload?).and_return(true)
allow(subject).to receive(:redownload_repository).and_return(nil)
end
it "indicates the repository is new" do
expect(Geo::ProjectHousekeepingService).to receive(:new).with(project, new_repository: true).and_call_original
subject.execute
end
end
context 'when repository did not exist' do
before do
allow(repository).to receive(:exists?).and_return(false)
allow(subject).to receive(:fetch_geo_mirror).and_return(nil)
end
it "indicates the repository is new" do
expect(Geo::ProjectHousekeepingService).to receive(:new).with(project, new_repository: true).and_call_original
subject.execute
end
end
context 'when repository already existed' do
it "indicates the repository is not new" do
expect(Geo::ProjectHousekeepingService).to receive(:new).with(project, new_repository: false).and_call_original
subject.execute
end
end
end end
...@@ -19,7 +19,7 @@ RSpec.describe Geo::WikiSyncService do ...@@ -19,7 +19,7 @@ RSpec.describe Geo::WikiSyncService do
end end
it_behaves_like 'geo base sync execution' it_behaves_like 'geo base sync execution'
it_behaves_like 'geo base sync fetch and repack' it_behaves_like 'geo base sync fetch'
it_behaves_like 'reschedules sync due to race condition instead of waiting for backfill' it_behaves_like 'reschedules sync due to race condition instead of waiting for backfill'
describe '#execute' do describe '#execute' do
......
...@@ -48,7 +48,7 @@ shared_examples 'cleans temporary repositories' do ...@@ -48,7 +48,7 @@ shared_examples 'cleans temporary repositories' do
end end
end end
shared_examples 'geo base sync fetch and repack' do shared_examples 'geo base sync fetch' do
describe '#fetch_repository' do describe '#fetch_repository' do
let(:fetch_repository) { subject.send(:fetch_repository) } let(:fetch_repository) { subject.send(:fetch_repository) }
...@@ -85,12 +85,6 @@ shared_examples 'geo base sync fetch and repack' do ...@@ -85,12 +85,6 @@ shared_examples 'geo base sync fetch and repack' do
fetch_repository fetch_repository
end end
it 'schedule git repack' do
is_expected.to receive(:schedule_repack)
fetch_repository
end
end end
end end
end end
......
...@@ -221,6 +221,7 @@ describe Geo::RepositoryShardSyncWorker, :geo, :delete, :clean_gitlab_redis_cach ...@@ -221,6 +221,7 @@ describe Geo::RepositoryShardSyncWorker, :geo, :delete, :clean_gitlab_redis_cach
allow_any_instance_of(Project).to receive(:ensure_repository).and_raise(Gitlab::Shell::Error.new('foo')) allow_any_instance_of(Project).to receive(:ensure_repository).and_raise(Gitlab::Shell::Error.new('foo'))
allow_any_instance_of(Geo::ProjectRegistry).to receive(:wiki_sync_due?).and_return(false) allow_any_instance_of(Geo::ProjectRegistry).to receive(:wiki_sync_due?).and_return(false)
allow_any_instance_of(Geo::RepositorySyncService).to receive(:expire_repository_caches) allow_any_instance_of(Geo::RepositorySyncService).to receive(:expire_repository_caches)
allow_any_instance_of(Geo::ProjectHousekeepingService).to receive(:do_housekeeping)
end end
it 'tries to sync every project' do it 'tries to sync every project' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment