Extract a finder that find unsynced projects using cross-database joins

These changes use the new finder when FDW is enabled without selective
sync to avoid code duplication.
parent c6ed91c4
# frozen_string_literal: true
# Finder for retrieving unsynced projects that belong to a specific
# shard using cross-database joins.
#
# Basic usage:
#
# Geo::LegacyProjectUnsyncedFinder
# .new(current_node: Gitlab::Geo.current_node, shard_name: 'default', batch_size: 1000)
# .execute
module Geo
class LegacyProjectUnsyncedFinder < RegistryFinder
def initialize(current_node: nil, shard_name:, batch_size:)
super(current_node: current_node)
@shard_name = shard_name
@batch_size = batch_size
end
# rubocop:disable CodeReuse/ActiveRecord
def execute
legacy_left_outer_join_registry_ids(
current_node.projects.within_shards(shard_name),
Geo::ProjectRegistry.pluck_project_key,
Project
).limit(batch_size)
end
# rubocop:enable CodeReuse/ActiveRecord
private
attr_reader :batch_size, :shard_name
end
end
......@@ -35,6 +35,10 @@ class Geo::ProjectRegistry < Geo::BaseRegistry
where(project_id: ids)
end
def self.pluck_project_key
where(nil).pluck(:project_id)
end
def self.failed
repository_sync_failed = arel_table[:repository_retry_count].gt(0)
wiki_sync_failed = arel_table[:wiki_retry_count].gt(0)
......
# frozen_string_literal: true
require 'spec_helper'
describe Geo::LegacyProjectUnsyncedFinder, :geo do
# Disable transactions via :delete method because a foreign table
# can't see changes inside a transaction of a different connection.
describe '#execute' do
let(:node) { create(:geo_node) }
let(:group_1) { create(:group) }
let(:group_2) { create(:group) }
let(:nested_group_1) { create(:group, parent: group_1) }
let!(:project_1) { create(:project, group: group_1) }
let!(:project_2) { create(:project, group: nested_group_1) }
let!(:project_3) { create(:project, group: group_2) }
let!(:project_4) { create(:project, group: group_1) }
before do
project_4.update_column(:repository_storage, 'foo')
end
subject { described_class.new(current_node: node, shard_name: 'default', batch_size: 100) }
context 'without selective sync' do
it 'returns projects without an entry on the tracking database' do
create(:geo_project_registry, :synced, project: project_2)
expect(subject.execute).to match_ids(project_1, project_3)
end
end
context 'with selective sync by namespace' do
it 'returns projects that belong to the namespaces without an entry on the tracking database' do
create(:geo_project_registry, :synced, project: project_4)
node.update!(selective_sync_type: 'namespaces', namespaces: [group_1, nested_group_1])
expect(subject.execute).to match_ids(project_1, project_2)
end
end
context 'with selective sync by shard' do
before do
node.update!(selective_sync_type: 'shards', selective_sync_shards: ['foo'])
end
it 'does not return registries when selected shards to sync does not include the shard_name' do
subject = described_class.new(current_node: node, shard_name: 'default', batch_size: 100)
expect(subject.execute).to be_empty
end
it 'returns projects that belong to the shards without an entry on the tracking database' do
project_5 = create(:project, group: group_1)
project_5.update_column(:repository_storage, 'foo')
create(:geo_project_registry, :synced, project: project_4)
subject = described_class.new(current_node: node, shard_name: 'foo', batch_size: 100)
expect(subject.execute).to match_ids(project_5)
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment