Commit 33eff283 authored by Michael Kozono's avatar Michael Kozono

Merge branch...

Merge branch '8798-geo-implement-selective-sync-support-for-fdw-queries-to-find-unsynced-projects' into 'master'

Geo - Add selective sync support for the FDW queries to find unsynced projects

See merge request gitlab-org/gitlab-ee!10522
parents feed78d6 76eefff9
# frozen_string_literal: true
# Finder for retrieving unsynced projects that belong to a specific
# shard using cross-database joins.
#
# Basic usage:
#
# Geo::LegacyProjectUnsyncedFinder
# .new(current_node: Gitlab::Geo.current_node, shard_name: 'default', batch_size: 1000)
# .execute
module Geo
class LegacyProjectUnsyncedFinder < RegistryFinder
def initialize(current_node: nil, shard_name:, batch_size:)
super(current_node: current_node)
@shard_name = shard_name
@batch_size = batch_size
end
# rubocop:disable CodeReuse/ActiveRecord
def execute
legacy_left_outer_join_registry_ids(
current_node.projects.within_shards(shard_name),
Geo::ProjectRegistry.pluck_project_key,
Project
).limit(batch_size)
end
# rubocop:enable CodeReuse/ActiveRecord
private
attr_reader :batch_size, :shard_name
end
end
# frozen_string_literal: true
# Finder for retrieving projects updated recently that
# belong to a specific shard using cross-database joins.
#
# Basic usage:
#
# Geo::LegacyProjectUpdatedRecentlyFinder
# .new(current_node: Gitlab::Geo.current_node, shard_name: 'default', batch_size: 1000)
# .execute
module Geo
class LegacyProjectUpdatedRecentlyFinder < RegistryFinder
def initialize(current_node: nil, shard_name:, batch_size:)
super(current_node: current_node)
@shard_name = shard_name
@batch_size = batch_size
end
# rubocop:disable CodeReuse/ActiveRecord
def execute
registries = find_registries_to_resync
return Project.none if registries.empty?
id_and_last_sync_values = registries.map do |id, last_repository_synced_at|
"(#{id}, #{quote_value(last_repository_synced_at)})"
end
projects = current_node.projects.within_shards(shard_name)
joined_relation = projects.joins(<<~SQL)
INNER JOIN
(VALUES #{id_and_last_sync_values.join(',')})
project_registry(id, last_repository_synced_at)
ON #{Project.table_name}.id = project_registry.id
SQL
joined_relation
.limit(batch_size)
end
# rubocop:enable CodeReuse/ActiveRecord
private
attr_reader :batch_size, :shard_name
# rubocop:disable CodeReuse/ActiveRecord
def find_registries_to_resync
Geo::ProjectRegistry
.dirty
.retry_due
.pluck(:project_id, :last_repository_synced_at)
end
# rubocop:enable CodeReuse/ActiveRecord
end
end
......@@ -72,93 +72,16 @@ module Geo
.execute
end
# rubocop: disable CodeReuse/ActiveRecord
def find_unsynced_projects(batch_size:)
relation =
if use_legacy_queries?
legacy_find_unsynced_projects
else
fdw_find_unsynced_projects
end
relation.limit(batch_size)
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def find_projects_updated_recently(batch_size:)
relation =
if use_legacy_queries?
legacy_find_projects_updated_recently
else
fdw_find_projects_updated_recently
end
relation.limit(batch_size)
end
# rubocop: enable CodeReuse/ActiveRecord
protected
#
# FDW accessors
#
# @return [ActiveRecord::Relation<Geo::Fdw::Project>]
# rubocop: disable CodeReuse/ActiveRecord
def fdw_find_unsynced_projects
Geo::Fdw::Project.joins("LEFT OUTER JOIN project_registry ON project_registry.project_id = #{fdw_project_table.name}.id")
.where(project_registry: { project_id: nil })
end
# rubocop: enable CodeReuse/ActiveRecord
# @return [ActiveRecord::Relation<Geo::Fdw::Project>]
# rubocop: disable CodeReuse/ActiveRecord
def fdw_find_projects_updated_recently
Geo::Fdw::Project.joins("INNER JOIN project_registry ON project_registry.project_id = #{fdw_project_table.name}.id")
.merge(Geo::ProjectRegistry.dirty)
.merge(Geo::ProjectRegistry.retry_due)
end
# rubocop: enable CodeReuse/ActiveRecord
#
# Legacy accessors (non FDW)
#
# @return [ActiveRecord::Relation<Project>] list of unsynced projects
# rubocop: disable CodeReuse/ActiveRecord
def legacy_find_unsynced_projects
legacy_left_outer_join_registry_ids(
current_node.projects,
Geo::ProjectRegistry.pluck(:project_id),
Project
)
end
# rubocop: enable CodeReuse/ActiveRecord
# @return [ActiveRecord::Relation<Project>] list of projects updated recently
# rubocop: disable CodeReuse/ActiveRecord
def legacy_find_projects_updated_recently
registries = Geo::ProjectRegistry.dirty.retry_due.pluck(:project_id, :last_repository_synced_at)
return Project.none if registries.empty?
id_and_last_sync_values = registries.map do |id, last_repository_synced_at|
"(#{id}, #{quote_value(last_repository_synced_at)})"
end
joined_relation = current_node.projects.joins(<<~SQL)
INNER JOIN
(VALUES #{id_and_last_sync_values.join(',')})
project_registry(id, last_repository_synced_at)
ON #{Project.table_name}.id = project_registry.id
SQL
joined_relation
def find_unsynced_projects(shard_name:, batch_size:)
finder_klass_for_unsynced_projects
.new(current_node: current_node, shard_name: shard_name, batch_size: batch_size)
.execute
end
# rubocop: enable CodeReuse/ActiveRecord
def fdw_project_table
Geo::Fdw::Project.arel_table
def find_projects_updated_recently(shard_name:, batch_size:)
finder_klass_for_projects_updated_recently
.new(current_node: current_node, shard_name: shard_name, batch_size: batch_size)
.execute
end
private
......@@ -171,6 +94,22 @@ module Geo
fdw_disabled? || selective_sync? && !Gitlab::Geo::Fdw.enabled_for_selective_sync?
end
def finder_klass_for_unsynced_projects
if use_legacy_queries_for_selective_sync?
Geo::LegacyProjectUnsyncedFinder
else
Geo::ProjectUnsyncedFinder
end
end
def finder_klass_for_projects_updated_recently
if use_legacy_queries_for_selective_sync?
Geo::LegacyProjectUpdatedRecentlyFinder
else
Geo::ProjectUpdatedRecentlyFinder
end
end
def finder_klass_for_synced_registries
if use_legacy_queries_for_selective_sync?
Geo::LegacyProjectRegistrySyncedFinder
......
# frozen_string_literal: true
# Finder for retrieving unsynced projects that belong to a specific
# shard using FDW queries.
#
# Basic usage:
#
# Geo::ProjectUnsyncedFinder
# .new(current_node: Gitlab::Geo.current_node, shard_name: 'default', batch_size: 1000)
# .execute.
module Geo
class ProjectUnsyncedFinder
def initialize(current_node:, shard_name:, batch_size:)
@current_node = Geo::Fdw::GeoNode.find(current_node.id)
@shard_name = shard_name
@batch_size = batch_size
end
# rubocop:disable CodeReuse/ActiveRecord
def execute
return Geo::Fdw::Project.none unless valid_shard?
projects
.missing_project_registry
.within_shards(shard_name)
.limit(batch_size)
end
# rubocop:enable CodeReuse/ActiveRecord
private
attr_reader :current_node, :shard_name, :batch_size
def projects
return Geo::Fdw::Project.all if current_node.selective_sync_by_shards?
current_node.projects
end
def valid_shard?
return true unless current_node.selective_sync_by_shards?
current_node.selective_sync_shards.include?(shard_name)
end
end
end
# frozen_string_literal: true
# Finder for retrieving projects updated recently that belong to a specific
# shard using FDW queries.
#
# Basic usage:
#
# Geo::ProjectUpdatedRecentlyFinder
# .new(current_node: Gitlab::Geo.current_node, shard_name: 'default', batch_size: 1000)
# .execute.
module Geo
class ProjectUpdatedRecentlyFinder
def initialize(current_node:, shard_name:, batch_size:)
@current_node = Geo::Fdw::GeoNode.find(current_node.id)
@shard_name = shard_name
@batch_size = batch_size
end
# rubocop:disable CodeReuse/ActiveRecord
def execute
return Geo::Fdw::Project.none unless valid_shard?
projects
.recently_updated
.within_shards(shard_name)
.limit(batch_size)
end
# rubocop:enable CodeReuse/ActiveRecord
private
attr_reader :current_node, :shard_name, :batch_size
def projects
return Geo::Fdw::Project.all if current_node.selective_sync_by_shards?
current_node.projects
end
def valid_shard?
return true unless current_node.selective_sync_by_shards?
current_node.selective_sync_shards.include?(shard_name)
end
end
end
......@@ -14,6 +14,18 @@ module Geo
has_many :geo_node_namespace_links, class_name: 'Geo::Fdw::GeoNodeNamespaceLink'
has_many :namespaces, class_name: 'Geo::Fdw::Namespace', through: :geo_node_namespace_links
def projects
return Geo::Fdw::Project.all unless selective_sync?
if selective_sync_by_namespaces?
projects_for_selected_namespaces
elsif selective_sync_by_shards?
projects_for_selected_shards
else
Geo::Fdw::Project.none
end
end
def project_registries
return Geo::ProjectRegistry.all unless selective_sync?
......@@ -28,11 +40,25 @@ module Geo
private
def projects_for_selected_namespaces
Geo::Fdw::Project
.within_namespaces(selected_namespaces_and_descendants.select(:id))
end
def projects_for_selected_shards
Geo::Fdw::Project.within_shards(selective_sync_shards)
end
def registries_for_selected_namespaces
Gitlab::Geo::Fdw::ProjectRegistryQueryBuilder.new
.within_namespaces(selected_namespaces_and_descendants.select(:id))
end
def registries_for_selected_shards
Gitlab::Geo::Fdw::ProjectRegistryQueryBuilder.new
.within_shards(selective_sync_shards)
end
def selected_namespaces_and_descendants
relation = selected_namespaces_and_descendants_cte.apply_to(Geo::Fdw::Namespace.all)
relation.extend(Gitlab::Database::ReadOnlyRelation)
......@@ -56,11 +82,6 @@ module Geo
cte
end
def registries_for_selected_shards
Gitlab::Geo::Fdw::ProjectRegistryQueryBuilder.new
.within_shards(selective_sync_shards)
end
def fdw_namespaces_table
Geo::Fdw::Namespace.arel_table
end
......
......@@ -8,6 +8,17 @@ module Geo
self.table_name = Gitlab::Geo::Fdw.foreign_table_name('projects')
class << self
def missing_project_registry
left_outer_join_project_registry
.where(Geo::ProjectRegistry.arel_table[:project_id].eq(nil))
end
def recently_updated
inner_join_project_registry
.merge(Geo::ProjectRegistry.dirty)
.merge(Geo::ProjectRegistry.retry_due)
end
# Searches for a list of projects based on the query given in `query`.
#
# On PostgreSQL this method uses "ILIKE" to perform a case-insensitive
......@@ -25,6 +36,26 @@ module Geo
def within_shards(shard_names)
where(repository_storage: Array(shard_names))
end
private
def inner_join_project_registry
join_statement =
arel_table
.join(Geo::ProjectRegistry.arel_table, Arel::Nodes::InnerJoin)
.on(arel_table[:id].eq(Geo::ProjectRegistry.arel_table[:project_id]))
joins(join_statement.join_sources)
end
def left_outer_join_project_registry
join_statement =
arel_table
.join(Geo::ProjectRegistry.arel_table, Arel::Nodes::OuterJoin)
.on(arel_table[:id].eq(Geo::ProjectRegistry.arel_table[:project_id]))
joins(join_statement.join_sources)
end
end
end
end
......
......@@ -35,6 +35,10 @@ class Geo::ProjectRegistry < Geo::BaseRegistry
where(project_id: ids)
end
def self.pluck_project_key
where(nil).pluck(:project_id)
end
def self.failed
repository_sync_failed = arel_table[:repository_retry_count].gt(0)
wiki_sync_failed = arel_table[:wiki_retry_count].gt(0)
......
......@@ -199,9 +199,9 @@ class GeoNode < ApplicationRecord
if selective_sync_by_namespaces?
query = Gitlab::ObjectHierarchy.new(namespaces).base_and_descendants
Project.where(namespace_id: query.select(:id))
Project.in_namespace(query.select(:id))
elsif selective_sync_by_shards?
Project.where(repository_storage: selective_sync_shards)
Project.within_shards(selective_sync_shards)
else
Project.none
end
......
......@@ -77,25 +77,19 @@ module Geo
# rubocop: disable CodeReuse/ActiveRecord
def find_project_ids_not_synced(batch_size:)
shard_restriction(finder.find_unsynced_projects(batch_size: batch_size))
.where.not(id: scheduled_project_ids)
finder.find_unsynced_projects(shard_name: shard_name, batch_size: batch_size)
.id_not_in(scheduled_project_ids)
.reorder(last_repository_updated_at: :desc)
.pluck(:id)
.pluck_primary_key
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def find_project_ids_updated_recently(batch_size:)
shard_restriction(finder.find_projects_updated_recently(batch_size: batch_size))
.where.not(id: scheduled_project_ids)
finder.find_projects_updated_recently(shard_name: shard_name, batch_size: batch_size)
.id_not_in(scheduled_project_ids)
.order('project_registry.last_repository_synced_at ASC NULLS FIRST, projects.last_repository_updated_at ASC')
.pluck(:id)
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def shard_restriction(relation)
relation.where(repository_storage: shard_name)
.pluck_primary_key
end
# rubocop: enable CodeReuse/ActiveRecord
end
......
---
title: 'Geo: Add selective sync support for the FDW queries to find unsynced projects'
merge_request: 10522
author:
type: changed
# frozen_string_literal: true
require 'spec_helper'
describe Geo::LegacyProjectUnsyncedFinder, :geo do
# Disable transactions via :delete method because a foreign table
# can't see changes inside a transaction of a different connection.
describe '#execute' do
let(:node) { create(:geo_node) }
let(:group_1) { create(:group) }
let(:group_2) { create(:group) }
let(:nested_group_1) { create(:group, parent: group_1) }
let!(:project_1) { create(:project, group: group_1) }
let!(:project_2) { create(:project, group: nested_group_1) }
let!(:project_3) { create(:project, group: group_2) }
let!(:project_4) { create(:project, group: group_1) }
before do
project_4.update_column(:repository_storage, 'foo')
end
subject { described_class.new(current_node: node, shard_name: 'default', batch_size: 100) }
context 'without selective sync' do
it 'returns projects without an entry on the tracking database' do
create(:geo_project_registry, :synced, project: project_2)
expect(subject.execute).to match_ids(project_1, project_3)
end
end
context 'with selective sync by namespace' do
it 'returns projects that belong to the namespaces without an entry on the tracking database' do
create(:geo_project_registry, :synced, project: project_4)
node.update!(selective_sync_type: 'namespaces', namespaces: [group_1, nested_group_1])
expect(subject.execute).to match_ids(project_1, project_2)
end
end
context 'with selective sync by shard' do
before do
node.update!(selective_sync_type: 'shards', selective_sync_shards: ['foo'])
end
it 'does not return registries when selected shards to sync does not include the shard_name' do
subject = described_class.new(current_node: node, shard_name: 'default', batch_size: 100)
expect(subject.execute).to be_empty
end
it 'returns projects that belong to the shards without an entry on the tracking database' do
project_5 = create(:project, group: group_1)
project_5.update_column(:repository_storage, 'foo')
create(:geo_project_registry, :synced, project: project_4)
subject = described_class.new(current_node: node, shard_name: 'foo', batch_size: 100)
expect(subject.execute).to match_ids(project_5)
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Geo::LegacyProjectUpdatedRecentlyFinder, :geo do
# Disable transactions via :delete method because a foreign table
# can't see changes inside a transaction of a different connection.
describe '#execute' do
let(:node) { create(:geo_node) }
let(:group_1) { create(:group) }
let(:group_2) { create(:group) }
let(:nested_group_1) { create(:group, parent: group_1) }
let!(:project_1) { create(:project, group: group_1) }
let!(:project_2) { create(:project, group: nested_group_1) }
let!(:project_3) { create(:project, group: group_2) }
let!(:project_4) { create(:project, group: group_1) }
before do
project_4.update_column(:repository_storage, 'foo')
create(:geo_project_registry, :synced, :repository_dirty, project: project_1)
create(:geo_project_registry, :synced, :repository_dirty, project: project_2)
create(:geo_project_registry, :synced, project: project_3)
create(:geo_project_registry, :synced, :wiki_dirty, project: project_4)
end
subject { described_class.new(current_node: node, shard_name: 'default', batch_size: 100) }
context 'without selective sync' do
it 'returns projects with a dirty entry on the tracking database' do
expect(subject.execute).to match_ids(project_1, project_2)
end
end
context 'with selective sync by namespace' do
it 'returns projects that belong to the namespaces with a dirty entry on the tracking database' do
node.update!(selective_sync_type: 'namespaces', namespaces: [group_1])
expect(subject.execute).to match_ids(project_1, project_2)
end
end
context 'with selective sync by shard' do
before do
node.update!(selective_sync_type: 'shards', selective_sync_shards: ['foo'])
end
it 'does not return registries when selected shards to sync does not include the shard_name' do
subject = described_class.new(current_node: node, shard_name: 'default', batch_size: 100)
expect(subject.execute).to be_empty
end
it 'returns projects that belong to the shards with a dirty entry on the tracking database' do
project_5 = create(:project, group: group_1)
project_5.update_column(:repository_storage, 'foo')
create(:geo_project_registry, :synced, project: project_5)
subject = described_class.new(current_node: node, shard_name: 'foo', batch_size: 100)
expect(subject.execute).to match_ids(project_4)
end
end
end
end
......@@ -379,85 +379,6 @@ describe Geo::ProjectRegistryFinder, :geo do
end
end
shared_examples 'finds all the things' do |method_prefix|
describe '#find_unsynced_projects' do
it 'delegates to the correct method' do
expect(subject).to receive("#{method_prefix}_find_unsynced_projects".to_sym).and_call_original
subject.find_unsynced_projects(batch_size: 10)
end
it 'returns projects without an entry on the tracking database' do
project_not_synced = create(:project)
create(:geo_project_registry, :synced, :repository_dirty, project: project_1_in_synced_group)
projects = subject.find_unsynced_projects(batch_size: 10)
expect(projects).to match_ids(project_not_synced)
end
context 'with selective sync' do
before do
secondary.update!(selective_sync_type: 'namespaces', namespaces: [synced_group])
end
it 'delegates to #legacy_find_unsynced_projects' do
expect(subject).to receive(:legacy_find_unsynced_projects).and_call_original
subject.find_unsynced_projects(batch_size: 10)
end
it 'returns untracked projects in the synced group' do
create(:geo_project_registry, :sync_failed, project: project_1_in_synced_group)
projects = subject.find_unsynced_projects(batch_size: 10)
expect(projects).to match_ids(project_2_in_synced_group)
end
end
end
describe '#find_projects_updated_recently' do
it 'delegates to the correct method' do
expect(subject).to receive("#{method_prefix}_find_projects_updated_recently".to_sym).and_call_original
subject.find_projects_updated_recently(batch_size: 10)
end
it 'returns projects with a dirty entry on the tracking database' do
create(:geo_project_registry, :synced, :repository_dirty, project: project_1_in_synced_group)
create(:geo_project_registry, :synced, :wiki_dirty, project: project_2_in_synced_group)
projects = subject.find_projects_updated_recently(batch_size: 10)
expect(projects).to match_ids([project_1_in_synced_group, project_2_in_synced_group])
end
context 'with selective sync' do
before do
secondary.update!(selective_sync_type: 'namespaces', namespaces: [synced_group])
end
it 'delegates to #legacy_find_projects_updated_recently' do
expect(subject).to receive(:legacy_find_projects_updated_recently).and_call_original
subject.find_projects_updated_recently(batch_size: 10)
end
it 'returns dirty projects in the synced group' do
create(:project, group: synced_group)
create(:geo_project_registry, :synced, :repository_dirty, project: project_1_in_synced_group)
create(:geo_project_registry, :synced, :wiki_dirty, project: project_2_in_synced_group)
create(:geo_project_registry, :synced, project: project_3_in_synced_group)
projects = subject.find_projects_updated_recently(batch_size: 10)
expect(projects).to match_ids(project_1_in_synced_group, project_2_in_synced_group)
end
end
end
end
shared_examples 'delegates to the proper finder' do |legacy_finder_klass, finder_klass, method, args|
where(:selective_sync, :fdw_enabled, :fdw_for_selective_sync, :finder) do
false | false | false | legacy_finder_klass
......@@ -500,7 +421,6 @@ describe Geo::ProjectRegistryFinder, :geo do
end
include_examples 'counts all the things', 'fdw'
include_examples 'finds all the things', 'fdw'
end
context 'with use_fdw_queries_for_selective_sync enabled' do
......@@ -509,7 +429,6 @@ describe Geo::ProjectRegistryFinder, :geo do
end
include_examples 'counts all the things', 'fdw'
include_examples 'finds all the things', 'fdw'
end
end
......@@ -519,7 +438,20 @@ describe Geo::ProjectRegistryFinder, :geo do
end
include_examples 'counts all the things', 'legacy'
include_examples 'finds all the things', 'legacy'
end
describe '#find_unsynced_projects', :delete do
include_examples 'delegates to the proper finder',
Geo::LegacyProjectUnsyncedFinder,
Geo::ProjectUnsyncedFinder,
:find_unsynced_projects, [shard_name: 'default', batch_size: 100]
end
describe '#find_projects_updated_recently', :delete do
include_examples 'delegates to the proper finder',
Geo::LegacyProjectUpdatedRecentlyFinder,
Geo::ProjectUpdatedRecentlyFinder,
:find_projects_updated_recently, [shard_name: 'default', batch_size: 100]
end
describe '#find_failed_project_registries', :delete do
......
# frozen_string_literal: true
require 'spec_helper'
describe Geo::ProjectUnsyncedFinder, :geo do
# Disable transactions via :delete method because a foreign table
# can't see changes inside a transaction of a different connection.
describe '#execute', :delete do
let(:node) { create(:geo_node) }
let(:group_1) { create(:group) }
let(:group_2) { create(:group) }
let(:nested_group_1) { create(:group, parent: group_1) }
let!(:project_1) { create(:project, group: group_1) }
let!(:project_2) { create(:project, group: nested_group_1) }
let!(:project_3) { create(:project, group: group_2) }
let!(:project_4) { create(:project, group: group_1) }
before do
skip('FDW is not configured') unless Gitlab::Geo::Fdw.enabled?
project_4.update_column(:repository_storage, 'foo')
end
subject { described_class.new(current_node: node, shard_name: 'default', batch_size: 100) }
context 'without selective sync' do
it 'returns projects without an entry on the tracking database' do
create(:geo_project_registry, :synced, project: project_2)
expect(subject.execute).to match_ids(project_1, project_3)
end
end
context 'with selective sync by namespace' do
it 'returns projects that belong to the namespaces without an entry on the tracking database' do
create(:geo_project_registry, :synced, project: project_4)
node.update!(selective_sync_type: 'namespaces', namespaces: [group_1, nested_group_1])
expect(subject.execute).to match_ids(project_1, project_2)
end
end
context 'with selective sync by shard' do
before do
node.update!(selective_sync_type: 'shards', selective_sync_shards: ['foo'])
end
it 'does not return registries when selected shards to sync does not include the shard_name' do
subject = described_class.new(current_node: node, shard_name: 'default', batch_size: 100)
expect(subject.execute).to be_empty
end
it 'returns projects that belong to the shards without an entry on the tracking database' do
project_5 = create(:project, group: group_1)
project_5.update_column(:repository_storage, 'foo')
create(:geo_project_registry, :synced, project: project_4)
subject = described_class.new(current_node: node, shard_name: 'foo', batch_size: 100)
expect(subject.execute).to match_ids(project_5)
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Geo::ProjectUpdatedRecentlyFinder, :geo do
# Disable transactions via :delete method because a foreign table
# can't see changes inside a transaction of a different connection.
describe '#execute', :delete do
let(:node) { create(:geo_node) }
let(:group_1) { create(:group) }
let(:group_2) { create(:group) }
let(:nested_group_1) { create(:group, parent: group_1) }
let!(:project_1) { create(:project, group: group_1) }
let!(:project_2) { create(:project, group: nested_group_1) }
let!(:project_3) { create(:project, group: group_2) }
let!(:project_4) { create(:project, group: group_1) }
before do
skip('FDW is not configured') unless Gitlab::Geo::Fdw.enabled?
project_4.update_column(:repository_storage, 'foo')
create(:geo_project_registry, :synced, :repository_dirty, project: project_1)
create(:geo_project_registry, :synced, :repository_dirty, project: project_2)
create(:geo_project_registry, :synced, project: project_3)
create(:geo_project_registry, :synced, :wiki_dirty, project: project_4)
end
subject { described_class.new(current_node: node, shard_name: 'default', batch_size: 100) }
context 'without selective sync' do
it 'returns projects with a dirty entry on the tracking database' do
expect(subject.execute).to match_ids(project_1, project_2)
end
end
context 'with selective sync by namespace' do
it 'returns projects that belong to the namespaces with a dirty entry on the tracking database' do
node.update!(selective_sync_type: 'namespaces', namespaces: [group_1])
expect(subject.execute).to match_ids(project_1, project_2)
end
end
context 'with selective sync by shard' do
before do
node.update!(selective_sync_type: 'shards', selective_sync_shards: ['foo'])
end
it 'does not return registries when selected shards to sync does not include the shard_name' do
subject = described_class.new(current_node: node, shard_name: 'default', batch_size: 100)
expect(subject.execute).to be_empty
end
it 'returns projects that belong to the shards with a dirty entry on the tracking database' do
project_5 = create(:project, group: group_1)
project_5.update_column(:repository_storage, 'foo')
create(:geo_project_registry, :synced, project: project_5)
subject = described_class.new(current_node: node, shard_name: 'foo', batch_size: 100)
expect(subject.execute).to match_ids(project_4)
end
end
end
end
......@@ -8,6 +8,46 @@ RSpec.describe Geo::Fdw::GeoNode, :geo, type: :model do
it { is_expected.to have_many(:namespaces).class_name('Geo::Fdw::Namespace').through(:geo_node_namespace_links) }
end
# Disable transactions via :delete method because a foreign table
# can't see changes inside a transaction of a different connection.
describe '#projects', :delete do
before do
skip('FDW is not configured') unless Gitlab::Geo::Fdw.enabled?
end
let(:node) { create(:geo_node) }
let(:group_1) { create(:group) }
let(:group_2) { create(:group) }
let(:nested_group_1) { create(:group, parent: group_1) }
let(:project_1) { create(:project, group: group_1) }
let(:project_2) { create(:project, group: nested_group_1) }
let(:project_3) { create(:project, :broken_storage, group: group_2) }
subject { described_class.find(node.id) }
it 'returns all registries without selective sync' do
expect(subject.projects).to match_ids(project_1, project_2, project_3)
end
it 'returns projects that belong to the namespaces with selective sync by namespace' do
node.update!(selective_sync_type: 'namespaces', namespaces: [group_1])
expect(subject.projects).to match_ids(project_1, project_2)
end
it 'returns projects that belong to the shards with selective sync by shard' do
node.update!(selective_sync_type: 'shards', selective_sync_shards: %w[broken])
expect(subject.projects).to match_ids(project_3)
end
it 'returns nothing if an unrecognised selective sync type is used' do
node.update_attribute(:selective_sync_type, 'unknown')
expect(subject.projects).to be_empty
end
end
# Disable transactions via :delete method because a foreign table
# can't see changes inside a transaction of a different connection.
describe '#project_registries', :delete do
......@@ -33,15 +73,15 @@ RSpec.describe Geo::Fdw::GeoNode, :geo, type: :model do
end
it 'returns registries where projects belong to the namespaces with selective sync by namespace' do
node.update!(selective_sync_type: 'namespaces', namespaces: [group_1, nested_group_1])
node.update!(selective_sync_type: 'namespaces', namespaces: [group_1])
expect(subject.project_registries).to match_array([registry_1, registry_2])
end
it 'returns registries where projects belong to the shards with selective sync by shard' do
node.update!(selective_sync_type: 'shards', selective_sync_shards: %w[default bar])
node.update!(selective_sync_type: 'shards', selective_sync_shards: %w[broken])
expect(subject.project_registries).to match_array([registry_1, registry_2])
expect(subject.project_registries).to match_array([registry_3])
end
it 'returns nothing if an unrecognised selective sync type is used' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment