Commit 3bf460ca authored by Nick Thomas's avatar Nick Thomas

Merge branch 'da-repository-sync-should-not-sync-repos-unhealthy-shards' into 'master'

Geo - Does not sync repos on unhealthy shards in non-backfill conditions

Closes #3690

See merge request gitlab-org/gitlab-ee!3540
parents 74a72198 9f08d025
module Geo
module Fdw
class Project < ::Geo::BaseFdw
self.table_name = Gitlab::Geo.fdw_table('projects')
end
end
end
......@@ -150,27 +150,6 @@ class GeoNode < ActiveRecord::Base
end
end
# These are projects that meet the project restriction but haven't yet been
# synced (i.e., do not yet have a project registry entry).
#
# This query requires data from two different databases, and unavoidably
# plucks a list of project IDs from one into the other. This will not scale
# well with the number of synchronized projects - the query will increase
# linearly in size - so this should be replaced with postgres_fdw ASAP.
def unsynced_projects
registry_project_ids = project_registries.pluck(:project_id)
return projects if registry_project_ids.empty?
joined_relation = projects.joins(<<~SQL)
LEFT OUTER JOIN
(VALUES #{registry_project_ids.map { |id| "(#{id}, 't')" }.join(',')})
project_registry(project_id, registry_present)
ON projects.id = project_registry.project_id
SQL
joined_relation.where(project_registry: { registry_present: [nil, false] })
end
def uploads
if restricted_project_ids
uploads_table = Upload.arel_table
......
......@@ -13,7 +13,7 @@ module Geo
end
def finder
@finder ||= RegistryFinder.new(current_node: current_node)
@finder ||= FileRegistryFinder.new(current_node: current_node)
end
# Pools for new resources to be transferred
......
......@@ -12,6 +12,10 @@ module Geo
{ id: project_id, job_id: job_id } if job_id
end
def finder
@finder ||= ProjectRegistryFinder.new(current_node: current_node)
end
def load_pending_resources
resources = find_project_ids_not_synced(batch_size: db_retrieve_batch_size)
remaining_capacity = db_retrieve_batch_size - resources.size
......@@ -24,19 +28,15 @@ module Geo
end
def find_project_ids_not_synced(batch_size:)
healthy_shards_restriction(current_node.unsynced_projects)
healthy_shards_restriction(finder.find_unsynced_projects(batch_size: batch_size))
.reorder(last_repository_updated_at: :desc)
.limit(batch_size)
.pluck(:id)
end
def find_project_ids_updated_recently(batch_size:)
current_node.project_registries
.dirty
.retry_due
.order(Gitlab::Database.nulls_first_order(:last_repository_synced_at, :desc))
.limit(batch_size)
.pluck(:project_id)
healthy_shards_restriction(finder.find_projects_updated_recently(batch_size: batch_size))
.order(Gitlab::Database.nulls_first_order(:last_repository_updated_at, :desc))
.pluck(:id)
end
def healthy_shards_restriction(relation)
......
---
title: Geo - Does not sync repositories on unhealthy shards in non-backfill conditions
merge_request:
author:
type: fixed
module Geo
class RegistryFinder
attr_reader :current_node
def initialize(current_node: nil)
@current_node = current_node
end
class FileRegistryFinder < RegistryFinder
def find_failed_objects(batch_size:)
Geo::FileRegistry
.failed
......@@ -28,7 +22,7 @@ module Geo
# Selective project replication adds a wrinkle to FDW queries, so
# we fallback to the legacy version for now.
relation =
if Gitlab::Geo.fdw? && !selective_sync
if fdw?
fdw_find_nonreplicated_lfs_objects
else
legacy_find_nonreplicated_lfs_objects(except_registry_ids: except_registry_ids)
......@@ -54,7 +48,7 @@ module Geo
# Selective project replication adds a wrinkle to FDW queries, so
# we fallback to the legacy version for now.
relation =
if Gitlab::Geo.fdw? && !selective_sync
if fdw?
fdw_find_nonreplicated_uploads
else
legacy_find_nonreplicated_uploads(except_registry_ids: except_registry_ids)
......@@ -68,10 +62,6 @@ module Geo
protected
def selective_sync
current_node.restricted_project_ids
end
#
# FDW accessors
#
......
module Geo
class ProjectRegistryFinder < RegistryFinder
def find_unsynced_projects(batch_size:)
relation =
if fdw?
fdw_find_unsynced_projects
else
legacy_find_unsynced_projects
end
relation.limit(batch_size)
end
def find_projects_updated_recently(batch_size:)
relation =
if fdw?
fdw_find_projects_updated_recently
else
legacy_find_projects_updated_recently
end
relation.limit(batch_size)
end
protected
def fdw_table
Geo::Fdw::Project.table_name
end
#
# FDW accessors
#
# @return [ActiveRecord::Relation<Geo::Fdw::Project>]
def fdw_find_unsynced_projects
Geo::Fdw::Project.joins("LEFT OUTER JOIN project_registry ON project_registry.project_id = #{fdw_table}.id")
.where('project_registry.project_id IS NULL')
end
# @return [ActiveRecord::Relation<Geo::Fdw::Project>]
def fdw_find_projects_updated_recently
Geo::Fdw::Project.joins("INNER JOIN project_registry ON project_registry.project_id = #{fdw_table}.id")
.merge(Geo::ProjectRegistry.dirty)
.merge(Geo::ProjectRegistry.retry_due)
end
#
# Legacy accessors (non FDW)
#
# @return [ActiveRecord::Relation<Project>] list of unsynced projects
def legacy_find_unsynced_projects
registry_project_ids = current_node.project_registries.pluck(:project_id)
return current_node.projects if registry_project_ids.empty?
joined_relation = current_node.projects.joins(<<~SQL)
LEFT OUTER JOIN
(VALUES #{registry_project_ids.map { |id| "(#{id}, 't')" }.join(',')})
project_registry(project_id, registry_present)
ON projects.id = project_registry.project_id
SQL
joined_relation.where(project_registry: { registry_present: [nil, false] })
end
# @return [ActiveRecord::Relation<Project>] list of projects updated recently
def legacy_find_projects_updated_recently
registry_project_ids = current_node.project_registries.dirty.retry_due.pluck(:project_id)
return Project.none if registry_project_ids.empty?
joined_relation = current_node.projects.joins(<<~SQL)
INNER JOIN
(VALUES #{registry_project_ids.map { |id| "(#{id})" }.join(',')})
project_registry(project_id)
ON projects.id = project_registry.project_id
SQL
joined_relation
end
end
end
module Geo
class RegistryFinder
attr_reader :current_node
def initialize(current_node: nil)
@current_node = current_node
end
protected
def fdw?
# Selective project replication adds a wrinkle to FDW
# queries, so we fallback to the legacy version for now.
Gitlab::Geo.fdw? && !selective_sync
end
def selective_sync
current_node.restricted_project_ids
end
end
end
require 'spec_helper'
# Disable transactions via :truncate method because a foreign table
# can't see changes inside a transaction of a different connection.
describe Geo::ProjectRegistryFinder, :geo, :truncate do
include ::EE::GeoHelpers
let(:secondary) { create(:geo_node) }
let(:synced_group) { create(:group) }
let!(:project_not_synced) { create(:project) }
let(:project_repository_dirty) { create(:project) }
let(:project_wiki_dirty) { create(:project) }
subject { described_class.new(current_node: secondary) }
before do
stub_current_geo_node(secondary)
end
context 'FDW' do
before do
skip('FDW is not configured') if Gitlab::Database.postgresql? && !Gitlab::Geo.fdw?
end
describe '#find_unsynced_projects' do
it 'delegates to #fdw_find_unsynced_projects' do
expect(subject).to receive(:fdw_find_unsynced_projects).and_call_original
subject.find_unsynced_projects(batch_size: 10)
end
it 'delegates to #legacy_find_unsynced_projects when node has selective sync' do
secondary.update_attribute(:namespaces, [synced_group])
expect(subject).to receive(:legacy_find_unsynced_projects).and_call_original
subject.find_unsynced_projects(batch_size: 10)
end
it 'returns projects without an entry on the tracking database' do
create(:geo_project_registry, :synced, :repository_dirty, project: project_repository_dirty)
projects = subject.find_unsynced_projects(batch_size: 10)
expect(projects.count).to eq(1)
expect(projects.first.id).to eq(project_not_synced.id)
end
end
describe '#find_projects_updated_recently' do
it 'delegates to #fdw_find_projects_updated_recently' do
expect(subject).to receive(:fdw_find_projects_updated_recently).and_call_original
subject.find_projects_updated_recently(batch_size: 10)
end
it 'delegates to #legacy_find_projects_updated_recently when node has selective sync' do
secondary.update_attribute(:namespaces, [synced_group])
expect(subject).to receive(:legacy_find_projects_updated_recently).and_call_original
subject.find_projects_updated_recently(batch_size: 10)
end
it 'returns projects with a dirty entry on the tracking database' do
project_repository_dirty = create(:project)
project_wiki_dirty = create(:project)
create(:geo_project_registry, :synced, :repository_dirty, project: project_repository_dirty)
create(:geo_project_registry, :synced, :wiki_dirty, project: project_wiki_dirty)
projects = subject.find_projects_updated_recently(batch_size: 10)
expect(projects.pluck(:id)).to match_array([project_repository_dirty.id, project_wiki_dirty.id])
end
end
end
context 'Legacy' do
before do
allow(Gitlab::Geo).to receive(:fdw?).and_return(false)
end
describe '#find_unsynced_projects' do
it 'delegates to #legacy_find_unsynced_projects' do
expect(subject).to receive(:legacy_find_unsynced_projects).and_call_original
subject.find_unsynced_projects(batch_size: 10)
end
it 'returns projects without an entry on the tracking database' do
create(:geo_project_registry, :synced, :repository_dirty, project: project_repository_dirty)
projects = subject.find_unsynced_projects(batch_size: 10)
expect(projects).to match_array([project_not_synced])
end
end
describe '#find_projects_updated_recently' do
it 'delegates to #legacy_find_projects_updated_recently' do
expect(subject).to receive(:legacy_find_projects_updated_recently).and_call_original
subject.find_projects_updated_recently(batch_size: 10)
end
it 'returns projects with a dirty entry on the tracking database' do
create(:geo_project_registry, :synced, :repository_dirty, project: project_repository_dirty)
create(:geo_project_registry, :synced, :wiki_dirty, project: project_wiki_dirty)
projects = subject.find_projects_updated_recently(batch_size: 10)
expect(projects.pluck(:id)).to match_array([project_repository_dirty.id, project_wiki_dirty.id])
end
end
end
end
require 'spec_helper'
describe Geo::RepositorySyncWorker, :postgresql do
# Disable transactions via :truncate method because a foreign table
# can't see changes inside a transaction of a different connection.
describe Geo::RepositorySyncWorker, :geo, :truncate do
include ::EE::GeoHelpers
set(:primary) { create(:geo_node, :primary, host: 'primary-geo-node') }
set(:secondary) { create(:geo_node) }
set(:synced_group) { create(:group) }
set(:project_in_synced_group) { create(:project, group: synced_group) }
set(:unsynced_project) { create(:project) }
let(:secondary) { create(:geo_node) }
let(:synced_group) { create(:group) }
let!(:project_in_synced_group) { create(:project, group: synced_group) }
let!(:unsynced_project) { create(:project) }
subject { described_class.new }
......@@ -15,7 +16,11 @@ describe Geo::RepositorySyncWorker, :postgresql do
stub_current_geo_node(secondary)
end
describe '#perform' do
shared_examples '#perform' do |skip_tests|
before do
skip('FDW is not configured') if skip_tests
end
before do
allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:try_obtain) { true }
allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:renew) { true }
......@@ -52,6 +57,10 @@ describe Geo::RepositorySyncWorker, :postgresql do
expect(Geo::ProjectSyncWorker).not_to receive(:perform_async)
subject.perform
# We need to unstub here or the DatabaseCleaner will have issues since it
# will appear as though the tracking DB were not available
allow(Gitlab::Geo).to receive(:geo_database_configured?).and_call_original
end
it 'does not perform Geo::ProjectSyncWorker when not running on a secondary' do
......@@ -129,29 +138,51 @@ describe Geo::RepositorySyncWorker, :postgresql do
context 'unhealthy shards' do
it 'skips backfill for repositories on unhealthy shards' do
unhealthy = create(:project, group: synced_group, repository_storage: 'broken')
unhealthy_not_synced = create(:project, group: synced_group, repository_storage: 'broken')
unhealthy_dirty = create(:project, group: synced_group, repository_storage: 'broken')
create(:geo_project_registry, :synced, :repository_dirty, project: unhealthy_dirty)
# Make the shard unhealthy
FileUtils.rm_rf(unhealthy.repository_storage_path)
FileUtils.rm_rf(unhealthy_not_synced.repository_storage_path)
expect(Geo::ProjectSyncWorker).to receive(:perform_async).with(project_in_synced_group.id, anything)
expect(Geo::ProjectSyncWorker).not_to receive(:perform_async).with(unhealthy.id, anything)
expect(Geo::ProjectSyncWorker).not_to receive(:perform_async).with(unhealthy_not_synced.id, anything)
expect(Geo::ProjectSyncWorker).not_to receive(:perform_async).with(unhealthy_dirty.id, anything)
Sidekiq::Testing.inline! { subject.perform }
end
it 'skips backfill for projects on missing shards' do
missing = create(:project, group: synced_group)
missing.update_column(:repository_storage, 'unknown')
missing_not_synced = create(:project, group: synced_group)
missing_not_synced.update_column(:repository_storage, 'unknown')
missing_dirty = create(:project, group: synced_group)
missing_dirty.update_column(:repository_storage, 'unknown')
create(:geo_project_registry, :synced, :repository_dirty, project: missing_dirty)
# hide the 'broken' storage for this spec
stub_storage_settings({})
expect(Geo::ProjectSyncWorker).to receive(:perform_async).with(project_in_synced_group.id, anything)
expect(Geo::ProjectSyncWorker).not_to receive(:perform_async).with(missing.id, anything)
expect(Geo::ProjectSyncWorker).not_to receive(:perform_async).with(missing_not_synced.id, anything)
expect(Geo::ProjectSyncWorker).not_to receive(:perform_async).with(missing_dirty.id, anything)
Sidekiq::Testing.inline! { subject.perform }
end
end
end
describe 'when PostgreSQL FDW is available', :geo do
# Skip if FDW isn't activated on this database
it_behaves_like '#perform', Gitlab::Database.postgresql? && !Gitlab::Geo.fdw?
end
describe 'when PostgreSQL FDW is not enabled', :geo do
before do
allow(Gitlab::Geo).to receive(:fdw?).and_return(false)
end
it_behaves_like '#perform', false
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment