Commit 06a27dc3 authored by Michael Kozono's avatar Michael Kozono

Merge branch...

Merge branch '10999-geo-implement-selective-sync-support-for-the-attachments-fdw-queries' into 'master'

Geo - Implement selective sync support for the FDW queries to count the number of attachments to sync

See merge request gitlab-org/gitlab-ee!11107
parents 33ef4a85 a1c88bdf
......@@ -2,14 +2,20 @@
module Geo
class AttachmentRegistryFinder < FileRegistryFinder
def syncable
all.geo_syncable
end
def count_syncable
syncable.count
end
def syncable
if use_legacy_queries_for_selective_sync?
legacy_finder.syncable
elsif selective_sync?
fdw_all.geo_syncable
else
Upload.geo_syncable
end
end
def count_synced
if aggregate_pushdown_supported?
find_synced.count
......@@ -78,7 +84,7 @@ module Geo
def find_retryable_failed_registries(batch_size:, except_file_ids: [])
find_failed_registries
.retry_due
.where.not(file_id: except_file_ids)
.file_id_not_in(except_file_ids)
.limit(batch_size)
end
# rubocop: enable CodeReuse/ActiveRecord
......@@ -87,30 +93,40 @@ module Geo
def find_retryable_synced_missing_on_primary_registries(batch_size:, except_file_ids: [])
find_synced_missing_on_primary_registries
.retry_due
.where.not(file_id: except_file_ids)
.file_id_not_in(except_file_ids)
.limit(batch_size)
end
# rubocop: enable CodeReuse/ActiveRecord
private
# rubocop:disable CodeReuse/Finder
def legacy_finder
@legacy_finder ||= Geo::LegacyAttachmentRegistryFinder.new(current_node: current_node)
end
# rubocop:enable CodeReuse/Finder
def fdw_geo_node
@fdw_geo_node ||= Geo::Fdw::GeoNode.find(current_node.id)
end
# rubocop: disable CodeReuse/ActiveRecord
def all
def fdw_all
if selective_sync?
Upload.where(group_uploads.or(project_uploads).or(other_uploads))
Geo::Fdw::Upload.where(fdw_group_uploads.or(fdw_project_uploads).or(fdw_other_uploads))
else
Upload.all
Geo::Fdw::Upload.all
end
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def group_uploads
def fdw_group_uploads
namespace_ids =
if current_node.selective_sync_by_namespaces?
Gitlab::ObjectHierarchy.new(current_node.namespaces).base_and_descendants.select(:id)
Gitlab::ObjectHierarchy.new(fdw_geo_node.namespaces).base_and_descendants.select(:id)
elsif current_node.selective_sync_by_shards?
leaf_groups = Namespace.where(id: current_node.projects.select(:namespace_id))
leaf_groups = Geo::Fdw::Namespace.where(id: fdw_geo_node.projects.select(:namespace_id))
Gitlab::ObjectHierarchy.new(leaf_groups).base_and_ancestors.select(:id)
else
Namespace.none
......@@ -119,29 +135,29 @@ module Geo
# This query was intentionally converted to a raw one to get it work in Rails 5.0.
# In Rails 5.0 and 5.1 there's a bug: https://github.com/rails/arel/issues/531
# Please convert it back when on rails 5.2 as it works again as expected since 5.2.
namespace_ids_in_sql = Arel::Nodes::SqlLiteral.new("uploads.model_id IN (#{namespace_ids.to_sql})")
namespace_ids_in_sql = Arel::Nodes::SqlLiteral.new("#{fdw_upload_table.name}.#{fdw_upload_table[:model_id].name} IN (#{namespace_ids.to_sql})")
upload_table[:model_type].eq('Namespace').and(namespace_ids_in_sql)
fdw_upload_table[:model_type].eq('Namespace').and(namespace_ids_in_sql)
end
# rubocop: enable CodeReuse/ActiveRecord
def project_uploads
project_ids = current_node.projects.select(:id)
def fdw_project_uploads
project_ids = fdw_geo_node.projects.select(:id)
# This query was intentionally converted to a raw one to get it work in Rails 5.0.
# In Rails 5.0 and 5.1 there's a bug: https://github.com/rails/arel/issues/531
# Please convert it back when on rails 5.2 as it works again as expected since 5.2.
project_ids_in_sql = Arel::Nodes::SqlLiteral.new("uploads.model_id IN (#{project_ids.to_sql})")
project_ids_in_sql = Arel::Nodes::SqlLiteral.new("#{fdw_upload_table.name}.#{fdw_upload_table[:model_id].name} IN (#{project_ids.to_sql})")
upload_table[:model_type].eq('Project').and(project_ids_in_sql)
fdw_upload_table[:model_type].eq('Project').and(project_ids_in_sql)
end
def other_uploads
upload_table[:model_type].not_in(%w[Namespace Project])
def fdw_other_uploads
fdw_upload_table[:model_type].not_in(%w[Namespace Project])
end
def upload_table
Upload.arel_table
def fdw_upload_table
Geo::Fdw::Upload.arel_table
end
def find_synced
......@@ -172,10 +188,6 @@ module Geo
find_synced_registries.missing_on_primary
end
#
# FDW accessors
#
def fdw_find_synced
fdw_find_syncable.merge(Geo::FileRegistry.synced)
end
......@@ -184,81 +196,50 @@ module Geo
fdw_find_syncable.merge(Geo::FileRegistry.failed)
end
# rubocop: disable CodeReuse/ActiveRecord
def fdw_find_syncable
fdw_all.joins("INNER JOIN file_registry ON file_registry.file_id = #{fdw_table}.id")
fdw_all
.inner_join_file_registry
.geo_syncable
.merge(Geo::FileRegistry.attachments)
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def fdw_find_unsynced(except_file_ids:)
upload_types = Geo::FileService::DEFAULT_OBJECT_TYPES.map { |val| "'#{val}'" }.join(',')
fdw_all.joins("LEFT OUTER JOIN file_registry
ON file_registry.file_id = #{fdw_table}.id
AND file_registry.file_type IN (#{upload_types})")
fdw_all
.missing_file_registry
.geo_syncable
.where(file_registry: { id: nil })
.where.not(id: except_file_ids)
.id_not_in(except_file_ids)
end
# rubocop: enable CodeReuse/ActiveRecord
def fdw_find_synced_missing_on_primary
fdw_find_synced.merge(Geo::FileRegistry.missing_on_primary)
end
# rubocop: disable CodeReuse/ActiveRecord
def fdw_all
if selective_sync?
Geo::Fdw::Upload.where(group_uploads.or(project_uploads).or(other_uploads))
else
Geo::Fdw::Upload.all
end
end
# rubocop: enable CodeReuse/ActiveRecord
def fdw_table
Geo::Fdw::Upload.table_name
end
# rubocop: disable CodeReuse/ActiveRecord
def fdw_find_migrated_local(except_file_ids:)
fdw_all.joins("INNER JOIN file_registry ON file_registry.file_id = #{fdw_table}.id")
fdw_all
.inner_join_file_registry
.with_files_stored_remotely
.merge(Geo::FileRegistry.attachments)
.where.not(id: except_file_ids)
.id_not_in(except_file_ids)
end
# rubocop: enable CodeReuse/ActiveRecord
#
# Legacy accessors (non FDW)
#
# rubocop: disable CodeReuse/ActiveRecord
def legacy_find_synced
legacy_inner_join_registry_ids(
syncable,
find_synced_registries.pluck(:file_id),
find_synced_registries.pluck_file_key,
Upload
)
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def legacy_find_failed
legacy_inner_join_registry_ids(
syncable,
find_failed_registries.pluck(:file_id),
find_failed_registries.pluck_file_key,
Upload
)
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def legacy_find_unsynced(except_file_ids:)
registry_file_ids = Geo::FileRegistry.attachments.pluck(:file_id) | except_file_ids
registry_file_ids = Geo::FileRegistry.attachments.pluck_file_key | except_file_ids
legacy_left_outer_join_registry_ids(
syncable,
......@@ -266,28 +247,23 @@ module Geo
Upload
)
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def legacy_find_migrated_local(except_file_ids:)
registry_file_ids = Geo::FileRegistry.attachments.pluck(:file_id) - except_file_ids
registry_file_ids = Geo::FileRegistry.attachments.pluck_file_key - except_file_ids
legacy_inner_join_registry_ids(
all.with_files_stored_remotely,
legacy_finder.attachments.with_files_stored_remotely,
registry_file_ids,
Upload
)
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def legacy_find_synced_missing_on_primary
legacy_inner_join_registry_ids(
syncable,
find_synced_missing_on_primary_registries.pluck(:file_id),
find_synced_missing_on_primary_registries.pluck_file_key,
Upload
)
end
# rubocop: enable CodeReuse/ActiveRecord
end
end
# frozen_string_literal: true
module Geo
class LegacyAttachmentRegistryFinder < RegistryFinder
def syncable
attachments.geo_syncable
end
# rubocop:disable CodeReuse/ActiveRecord
def attachments
if selective_sync?
Upload.where(group_uploads.or(project_uploads).or(other_uploads))
else
Upload.all
end
end
# rubocop:enable CodeReuse/ActiveRecord
private
# rubocop:disable CodeReuse/ActiveRecord
def group_uploads
namespace_ids =
if current_node.selective_sync_by_namespaces?
Gitlab::ObjectHierarchy.new(current_node.namespaces).base_and_descendants.select(:id)
elsif current_node.selective_sync_by_shards?
leaf_groups = Namespace.where(id: current_node.projects.select(:namespace_id))
Gitlab::ObjectHierarchy.new(leaf_groups).base_and_ancestors.select(:id)
else
Namespace.none
end
# This query was intentionally converted to a raw one to get it work in Rails 5.0.
# In Rails 5.0 and 5.1 there's a bug: https://github.com/rails/arel/issues/531
# Please convert it back when on rails 5.2 as it works again as expected since 5.2.
namespace_ids_in_sql = Arel::Nodes::SqlLiteral.new("#{upload_table.name}.#{upload_table[:model_id].name} IN (#{namespace_ids.to_sql})")
upload_table[:model_type].eq('Namespace').and(namespace_ids_in_sql)
end
# rubocop:enable CodeReuse/ActiveRecord
def project_uploads
project_ids = current_node.projects.select(:id)
# This query was intentionally converted to a raw one to get it work in Rails 5.0.
# In Rails 5.0 and 5.1 there's a bug: https://github.com/rails/arel/issues/531
# Please convert it back when on rails 5.2 as it works again as expected since 5.2.
project_ids_in_sql = Arel::Nodes::SqlLiteral.new("#{upload_table.name}.#{upload_table[:model_id].name} IN (#{project_ids.to_sql})")
upload_table[:model_type].eq('Project').and(project_ids_in_sql)
end
def other_uploads
upload_table[:model_type].not_in(%w[Namespace Project])
end
def upload_table
Upload.arel_table
end
end
end
......@@ -19,6 +19,20 @@ module Geo
.merge(Geo::FileRegistry.with_file_type(type))
end
def inner_join_file_registry
join_statement =
arel_table
.join(file_registry_table, Arel::Nodes::InnerJoin)
.on(arel_table[:id].eq(file_registry_table[:file_id]))
joins(join_statement.join_sources)
end
def missing_file_registry
left_outer_join_file_registry
.where(file_registry_table[:id].eq(nil))
end
# Searches for a list of uploads based on the query given in `query`.
#
# On PostgreSQL this method uses "ILIKE" to perform a case-insensitive
......@@ -31,18 +45,18 @@ module Geo
private
def inner_join_file_registry
def file_registry_table
Geo::FileRegistry.arel_table
end
def left_outer_join_file_registry
join_statement =
arel_table
.join(file_registry_table, Arel::Nodes::InnerJoin)
.on(arel_table[:id].eq(file_registry_table[:file_id]))
.join(file_registry_table, Arel::Nodes::OuterJoin)
.on(arel_table[:id].eq(file_registry_table[:file_id]).and(file_registry_table[:file_type].in(Geo::FileService::DEFAULT_OBJECT_TYPES)))
joins(join_statement.join_sources)
end
def file_registry_table
Geo::FileRegistry.arel_table
end
end
end
end
......
---
title: 'Geo: Implement selective sync support for the FDW queries to count the number
of attachments to sync'
merge_request: 11107
author:
type: changed
......@@ -13,6 +13,7 @@ describe Geo::AttachmentRegistryFinder, :geo do
let(:synced_subgroup) { create(:group, parent: synced_group) }
let(:unsynced_group) { create(:group) }
let(:synced_project) { create(:project, group: synced_group) }
let(:synced_project_in_nested_group) { create(:project, group: synced_subgroup) }
let(:unsynced_project) { create(:project, :broken_storage, group: unsynced_group) }
let(:upload_1) { create(:upload, model: synced_group) }
......@@ -24,7 +25,6 @@ describe Geo::AttachmentRegistryFinder, :geo do
let(:upload_7) { create(:upload, model: synced_subgroup) }
let(:upload_8) { create(:upload, :object_storage, model: unsynced_project) }
let(:upload_9) { create(:upload, :object_storage, model: unsynced_group) }
let(:lfs_object) { create(:lfs_object) }
let(:upload_remote_1) { create(:upload, :object_storage, model: synced_project) }
subject { described_class.new(current_node: secondary) }
......@@ -34,41 +34,6 @@ describe Geo::AttachmentRegistryFinder, :geo do
end
shared_examples 'counts all the things' do
describe '#count_syncable' do
before do
upload_1
upload_2
upload_3
upload_4
end
it 'counts attachments' do
expect(subject.count_syncable).to eq 4
end
it 'ignores remote attachments' do
upload_1.update!(store: ObjectStorage::Store::REMOTE)
expect(subject.count_syncable).to eq 3
end
context 'with selective sync' do
before do
secondary.update!(selective_sync_type: 'namespaces', namespaces: [synced_group])
end
it 'counts attachments' do
expect(subject.count_syncable).to eq 2
end
it 'ignores remote attachments' do
upload_1.update!(store: ObjectStorage::Store::REMOTE)
expect(subject.count_syncable).to eq 1
end
end
end
describe '#count_synced' do
it 'delegates to #legacy_find_synced' do
allow(subject).to receive(:aggregate_pushdown_supported?).and_return(false)
......@@ -408,4 +373,96 @@ describe Geo::AttachmentRegistryFinder, :geo do
end
it_behaves_like 'a file registry finder'
shared_examples 'counts all the things with selective sync coverage' do
describe '#count_syncable' do
let!(:upload_1) { create(:upload, model: synced_group) }
let!(:upload_2) { create(:upload, model: unsynced_group) }
let!(:upload_3) { create(:upload, :issuable_upload, model: synced_project_in_nested_group) }
let!(:upload_4) { create(:upload, model: unsynced_project) }
let!(:upload_5) { create(:upload, :personal_snippet_upload) }
it 'counts attachments' do
expect(subject.count_syncable).to eq 5
end
it 'ignores remote attachments' do
upload_1.update!(store: ObjectStorage::Store::REMOTE)
expect(subject.count_syncable).to eq 4
end
context 'with selective sync by namespace' do
before do
secondary.update!(selective_sync_type: 'namespaces', namespaces: [synced_group])
end
it 'counts attachments' do
expect(subject.count_syncable).to eq 3
end
it 'ignores remote attachments' do
upload_1.update!(store: ObjectStorage::Store::REMOTE)
expect(subject.count_syncable).to eq 2
end
end
context 'with selective sync by shard' do
before do
secondary.update!(selective_sync_type: 'shards', selective_sync_shards: ['broken'])
end
it 'counts attachments' do
expect(subject.count_syncable).to eq 3
end
it 'ignores remote attachments' do
upload_4.update!(store: ObjectStorage::Store::REMOTE)
expect(subject.count_syncable).to eq 2
end
end
end
end
context 'FDW', :geo_fdw do
context 'with a PostgreSQL version that does not support aggregate pushdown' do
before do
allow(Gitlab::Database).to receive(:version).and_return(9.6)
end
include_examples 'counts all the things with selective sync coverage'
end
context 'with a PostgreSQL version that supports aggregate pushdown' do
before do
allow(Gitlab::Database).to receive(:version).and_return(10.0)
end
context 'with use_fdw_queries_for_selective_sync disabled' do
before do
stub_feature_flags(use_fdw_queries_for_selective_sync: false)
end
include_examples 'counts all the things with selective sync coverage'
end
context 'with use_fdw_queries_for_selective_sync enabled' do
before do
stub_feature_flags(use_fdw_queries_for_selective_sync: true)
end
include_examples 'counts all the things with selective sync coverage'
end
end
end
context 'Legacy' do
before do
stub_fdw_disabled
end
include_examples 'counts all the things with selective sync coverage'
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment