Commit 5188dece authored by Douglas Barbosa Alexandre's avatar Douglas Barbosa Alexandre

Merge branch 'mk/refactor-geo-job-artifacts-scope' into 'master'

Geo: Refactor job artifacts scope

See merge request gitlab-org/gitlab!37874
parents 71880bf5 2a7b73c5
......@@ -54,8 +54,7 @@ module Geo
#
# @return [Array] the first element is an Array of untracked IDs, and the second element is an Array of tracked IDs that are unused
def find_registry_differences(range)
source = local_storage_only? ? replicables.with_files_stored_locally : replicables
source_ids = source.id_in(range).pluck(replicable_primary_key) # rubocop:disable CodeReuse/ActiveRecord
source_ids = replicables.id_in(range).pluck(replicable_primary_key) # rubocop:disable CodeReuse/ActiveRecord
tracked_ids = syncable.pluck_model_ids_in_range(range)
untracked_ids = source_ids - tracked_ids
......
......@@ -3,7 +3,7 @@
module Geo
class JobArtifactRegistryFinder < FileRegistryFinder
def replicables
current_node(fdw: false).job_artifacts.not_expired
::Ci::JobArtifact.replicables_for_geo_node
end
def syncable
......
......@@ -3,7 +3,9 @@
module Geo
class LfsObjectRegistryFinder < FileRegistryFinder
def replicables
current_node(fdw: false).lfs_objects
lfs_objects = current_node(fdw: false).lfs_objects
local_storage_only? ? lfs_objects.with_files_stored_locally : lfs_objects
end
def syncable
......
......@@ -73,6 +73,23 @@ module EE
super
end
def replicables_for_geo_node(node = ::Gitlab::Geo.current_node)
not_expired.merge(selective_sync_scope(node))
.merge(object_storage_scope(node))
end
def object_storage_scope(node)
return all if node.sync_object_storage?
with_files_stored_locally
end
def selective_sync_scope(node)
return all unless node.selective_sync?
project_id_in(node.projects)
end
end
def log_geo_deleted_event
......
......@@ -237,25 +237,6 @@ class GeoNode < ApplicationRecord
end
end
def job_artifacts
return Ci::JobArtifact.all unless selective_sync?
query = Ci::JobArtifact.project_id_in(projects).select(:id)
cte = Gitlab::SQL::CTE.new(:restricted_job_artifacts, query)
job_artifact_table = Ci::JobArtifact.arel_table
inner_join_restricted_job_artifacts =
cte.table
.join(job_artifact_table, Arel::Nodes::InnerJoin)
.on(cte.table[:id].eq(job_artifact_table[:id]))
.join_sources
Ci::JobArtifact
.with(cte.to_arel)
.from(cte.table)
.joins(inner_join_restricted_job_artifacts)
end
def container_repositories
return ContainerRepository.none unless Geo::ContainerRepositoryRegistry.replication_enabled?
return ContainerRepository.all unless selective_sync?
......
......@@ -23,4 +23,45 @@ FactoryBot.define do
sync_object_storage { false }
end
end
factory :geo_node_with_selective_sync_for, parent: :geo_node do
transient do
model { nil }
namespaces { nil }
shards { nil }
end
after :build do |node, options|
namespaces = options.namespaces
shards = options.shards
model = options.model
if namespaces
node.selective_sync_type = 'namespaces'
elsif shards
node.selective_sync_type = 'shards'
end
case namespaces
when :model
node.namespaces = [model]
when :model_parent
node.namespaces = [model.parent]
when :model_parent_parent
node.namespaces = [model.parent.parent]
when :other
node.namespaces = [create(:group)]
end
case shards
when :model
node.selective_sync_shards = [model.repository_storage]
when :model_project
project = create(:project, namespace: model)
node.selective_sync_shards = [project.repository_storage]
when :other
node.selective_sync_shards = ['other_shard_name']
end
end
end
end
......@@ -2,7 +2,8 @@
require 'spec_helper'
RSpec.describe EE::Ci::JobArtifact do
RSpec.describe Ci::JobArtifact do
using RSpec::Parameterized::TableSyntax
include EE::GeoHelpers
describe '#destroy' do
......@@ -102,4 +103,84 @@ RSpec.describe EE::Ci::JobArtifact do
it { is_expected.to eq result }
end
end
describe '#replicables_for_geo_node' do
# Selective sync is configured relative to the job artifact's project.
#
# Permutations of sync_object_storage combined with object-stored-artifacts
# are tested in code, because the logic is simple, and to do it in the table
# would quadruple its size and have too much duplication.
where(:selective_sync_namespaces, :selective_sync_shards, :factory, :project_factory, :include_expectation) do
nil | nil | [:ci_job_artifact] | [:project] | true
# selective sync by shard
nil | :model | [:ci_job_artifact] | [:project] | true
nil | :other | [:ci_job_artifact] | [:project] | false
# selective sync by namespace
:model_parent | nil | [:ci_job_artifact] | [:project] | true
:model_parent_parent | nil | [:ci_job_artifact] | [:project, :in_subgroup] | true
:other | nil | [:ci_job_artifact] | [:project] | false
:other | nil | [:ci_job_artifact] | [:project, :in_subgroup] | false
# expired
nil | nil | [:ci_job_artifact, :expired] | [:project] | false
# selective sync by shard
nil | :model | [:ci_job_artifact, :expired] | [:project] | false
nil | :other | [:ci_job_artifact, :expired] | [:project] | false
# selective sync by namespace
:model_parent | nil | [:ci_job_artifact, :expired] | [:project] | false
:model_parent_parent | nil | [:ci_job_artifact, :expired] | [:project, :in_subgroup] | false
:other | nil | [:ci_job_artifact, :expired] | [:project] | false
:other | nil | [:ci_job_artifact, :expired] | [:project, :in_subgroup] | false
end
with_them do
subject(:job_artifact_included) { described_class.replicables_for_geo_node.include?(ci_job_artifact) }
let(:project) { create(*project_factory) }
let(:ci_build) { create(:ci_build, project: project) }
let(:node) do
create(:geo_node_with_selective_sync_for,
model: project,
namespaces: selective_sync_namespaces,
shards: selective_sync_shards,
sync_object_storage: sync_object_storage)
end
before do
stub_artifacts_object_storage
stub_current_geo_node(node)
end
context 'when sync object storage is enabled' do
let(:sync_object_storage) { true }
context 'when the job artifact is locally stored' do
let(:ci_job_artifact) { create(*factory, job: ci_build) }
it { is_expected.to eq(include_expectation) }
end
context 'when the job artifact is object stored' do
let(:ci_job_artifact) { create(*factory, :remote_store, job: ci_build) }
it { is_expected.to eq(include_expectation) }
end
end
context 'when sync object storage is disabled' do
let(:sync_object_storage) { false }
context 'when the job artifact is locally stored' do
let(:ci_job_artifact) { create(*factory, job: ci_build) }
it { is_expected.to eq(include_expectation) }
end
context 'when the job artifact is object stored' do
let(:ci_job_artifact) { create(*factory, :remote_store, job: ci_build) }
it { is_expected.to be_falsey }
end
end
end
end
end
......@@ -813,24 +813,6 @@ RSpec.describe GeoNode, :request_store, :geo, type: :model do
end
end
describe '#job_artifacts' do
context 'when selective sync is enabled' do
it 'applies a CTE statement' do
node.update!(selective_sync_type: 'namespaces')
expect(node.job_artifacts.to_sql).to match(/WITH .+restricted_job_artifacts/)
end
end
context 'when selective sync is disabled' do
it 'doest not apply a CTE statement' do
node.update!(selective_sync_type: nil)
expect(node.job_artifacts.to_sql).not_to match(/WITH .+restricted_job_artifacts/)
end
end
end
describe '#lfs_objects' do
let_it_be(:synced_group) { create(:group) }
let_it_be(:nested_group) { create(:group, parent: synced_group) }
......
......@@ -40,7 +40,13 @@ RSpec.describe Upload do
subject(:upload_included) { described_class.replicables_for_geo_node.include?(upload) }
let(:model) { create(*model_factory) }
let(:node) { create_geo_node(model, sync_object_storage: sync_object_storage) }
let(:node) do
create(:geo_node_with_selective_sync_for,
model: model,
namespaces: selective_sync_namespaces,
shards: selective_sync_shards,
sync_object_storage: sync_object_storage)
end
before do
stub_current_geo_node(node)
......@@ -78,42 +84,6 @@ RSpec.describe Upload do
end
end
end
def create_geo_node(model, sync_object_storage:)
node = build(:geo_node)
if selective_sync_namespaces
node.selective_sync_type = 'namespaces'
elsif selective_sync_shards
node.selective_sync_type = 'shards'
end
case selective_sync_namespaces
when :model
node.namespaces = [model]
when :model_parent
node.namespaces = [model.parent]
when :model_parent_parent
node.namespaces = [model.parent.parent]
when :other
node.namespaces = [create(:group)]
end
case selective_sync_shards
when :model
node.selective_sync_shards = [model.repository_storage]
when :model_project
project = create(:project, namespace: model)
node.selective_sync_shards = [project.repository_storage]
when :other
node.selective_sync_shards = ['other_shard_name']
end
node.sync_object_storage = sync_object_storage
node.save!
node
end
end
describe '#destroy' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment