Default FDW to false on Geo::JobArtifactRegistryFinder

The feature flag have that disable the FDW queries
has been enabled by default and will be removed.
parent 2675f3e0
......@@ -2,10 +2,8 @@
module Geo
class JobArtifactRegistryFinder < FileRegistryFinder
# Counts all existing registries independent
# of any change on filters / selective sync
def count_registry
Geo::JobArtifactRegistry.count
syncable.count
end
def count_syncable
......@@ -13,22 +11,19 @@ module Geo
end
def count_synced
registries_for_job_artifacts.merge(Geo::JobArtifactRegistry.synced).count
syncable.synced.count
end
def count_failed
registries_for_job_artifacts.merge(Geo::JobArtifactRegistry.failed).count
syncable.failed.count
end
def count_synced_missing_on_primary
registries_for_job_artifacts.merge(Geo::JobArtifactRegistry.synced.missing_on_primary).count
syncable.synced.missing_on_primary.count
end
def syncable
return job_artifacts.not_expired if selective_sync?
return Ci::JobArtifact.not_expired.with_files_stored_locally if local_storage_only?
Ci::JobArtifact.not_expired
Geo::JobArtifactRegistry
end
# Returns untracked IDs as well as tracked IDs that are unused.
......@@ -49,16 +44,8 @@ module Geo
#
# @return [Array] the first element is an Array of untracked IDs, and the second element is an Array of tracked IDs that are unused
def find_registry_differences(range)
# rubocop:disable CodeReuse/ActiveRecord
source_ids =
job_artifacts(fdw: false)
.id_in(range)
.pluck(::Ci::JobArtifact.arel_table[:id])
# rubocop:enable CodeReuse/ActiveRecord
tracked_ids =
Geo::JobArtifactRegistry
.pluck_model_ids_in_range(range)
source_ids = job_artifacts.id_in(range).pluck(::Ci::JobArtifact.arel_table[:id]) # rubocop:disable CodeReuse/ActiveRecord
tracked_ids = syncable.pluck_model_ids_in_range(range)
untracked_ids = source_ids - tracked_ids
unused_tracked_ids = tracked_ids - source_ids
......@@ -84,49 +71,27 @@ module Geo
# @param [Array<Integer>] except_ids ids that will be ignored from the query
# rubocop:disable CodeReuse/ActiveRecord
def find_never_synced_registries(batch_size:, except_ids: [])
Geo::JobArtifactRegistry
syncable
.never
.model_id_not_in(except_ids)
.limit(batch_size)
end
alias_method :find_unsynced, :find_never_synced_registries
# rubocop:enable CodeReuse/ActiveRecord
# Deprecated in favor of the process using
# #find_registry_differences and #find_never_synced_registries
#
# Find limited amount of non replicated job artifacts.
#
# You can pass a list with `except_ids:` so you can exclude items you
# already scheduled but haven't finished and aren't persisted to the database yet
#
# TODO: Alternative here is to use some sort of window function with a cursor instead
# of simply limiting the query and passing a list of items we don't want
#
# @param [Integer] batch_size used to limit the results returned
# @param [Array<Integer>] except_ids ids that will be ignored from the query
# rubocop: disable CodeReuse/ActiveRecord
def find_unsynced(batch_size:, except_ids: [])
job_artifacts
.not_expired
.missing_job_artifact_registry
.id_not_in(except_ids)
.limit(batch_size)
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def find_migrated_local(batch_size:, except_ids: [])
all_job_artifacts
.inner_join_job_artifact_registry
.with_files_stored_remotely
.id_not_in(except_ids)
.limit(batch_size)
# all_job_artifacts
# .inner_join_job_artifact_registry
# .with_files_stored_remotely
# .id_not_in(except_ids)
# .limit(batch_size)
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def find_retryable_failed_registries(batch_size:, except_ids: [])
Geo::JobArtifactRegistry
syncable
.failed
.retry_due
.model_id_not_in(except_ids)
......@@ -136,7 +101,7 @@ module Geo
# rubocop: disable CodeReuse/ActiveRecord
def find_retryable_synced_missing_on_primary_registries(batch_size:, except_ids: [])
Geo::JobArtifactRegistry
syncable
.synced
.missing_on_primary
.retry_due
......@@ -147,18 +112,12 @@ module Geo
private
def job_artifacts(fdw: true)
local_storage_only?(fdw: fdw) ? all_job_artifacts(fdw: fdw).with_files_stored_locally : all_job_artifacts(fdw: fdw)
end
def all_job_artifacts(fdw: true)
current_node(fdw: fdw).job_artifacts
def job_artifacts
local_storage_only?(fdw: false) ? all_job_artifacts.with_files_stored_locally : all_job_artifacts
end
def registries_for_job_artifacts
job_artifacts
.inner_join_job_artifact_registry
.not_expired
def all_job_artifacts
current_node(fdw: false).job_artifacts
end
end
end
......@@ -2,287 +2,160 @@
require 'spec_helper'
RSpec.describe Geo::JobArtifactRegistryFinder, :geo_fdw do
RSpec.describe Geo::JobArtifactRegistryFinder, :geo do
include ::EE::GeoHelpers
# Using let() instead of set() because set() does not work properly
# when using the :delete DatabaseCleaner strategy, which is required for FDW
# tests because a foreign table can't see changes inside a transaction of a
# different connection.
let(:secondary) { create(:geo_node) }
let(:synced_group) { create(:group) }
let(:synced_project) { create(:project, group: synced_group) }
let(:unsynced_project) { create(:project) }
let(:project_broken_storage) { create(:project, :broken_storage) }
subject { described_class.new(current_node_id: secondary.id) }
let_it_be(:secondary) { create(:geo_node) }
before do
stub_current_geo_node(secondary)
stub_artifacts_object_storage
end
let!(:job_artifact_synced_project) { create(:ci_job_artifact, project: synced_project) }
let!(:job_artifact_unsynced_project) { create(:ci_job_artifact, project: unsynced_project) }
let!(:job_artifact_broken_storage_1) { create(:ci_job_artifact, project: project_broken_storage) }
let!(:job_artifact_broken_storage_2) { create(:ci_job_artifact, project: project_broken_storage) }
let!(:job_artifact_expired_synced_project) { create(:ci_job_artifact, :expired, project: synced_project) }
let!(:job_artifact_expired_broken_storage) { create(:ci_job_artifact, :expired, project: project_broken_storage) }
let!(:job_artifact_remote_synced_project) { create(:ci_job_artifact, :remote_store, project: synced_project) }
let!(:job_artifact_remote_unsynced_project) { create(:ci_job_artifact, :remote_store, project: unsynced_project) }
let!(:job_artifact_remote_broken_storage) { create(:ci_job_artifact, :expired, :remote_store, project: project_broken_storage) }
context 'counts all the things' do
describe '#count_syncable' do
it 'counts non-expired job artifacts' do
expect(subject.count_syncable).to eq 6
end
let_it_be(:synced_group) { create(:group) }
let_it_be(:nested_group_1) { create(:group, parent: synced_group) }
let_it_be(:synced_project) { create(:project, group: synced_group) }
let_it_be(:synced_project_in_nested_group) { create(:project, group: nested_group_1) }
let_it_be(:unsynced_project) { create(:project) }
let_it_be(:project_broken_storage) { create(:project, :broken_storage) }
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
let!(:ci_job_artifact_1) { create(:ci_job_artifact, project: synced_project) }
let!(:ci_job_artifact_2) { create(:ci_job_artifact, project: synced_project_in_nested_group) }
let!(:ci_job_artifact_3) { create(:ci_job_artifact, project: synced_project_in_nested_group) }
let!(:ci_job_artifact_4) { create(:ci_job_artifact, project: unsynced_project) }
let!(:ci_job_artifact_5) { create(:ci_job_artifact, project: project_broken_storage) }
let!(:ci_job_artifact_remote_1) { create(:ci_job_artifact, :remote_store) }
let!(:ci_job_artifact_remote_2) { create(:ci_job_artifact, :remote_store) }
let!(:ci_job_artifact_remote_3) { create(:ci_job_artifact, :remote_store) }
it 'counts non-expired job artifacts' do
expect(subject.count_syncable).to eq 2
end
end
subject { described_class.new(current_node_id: secondary.id) }
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
describe '#count_syncable' do
it 'counts registries for job artifacts' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
it 'counts non-expired job artifacts' do
expect(subject.count_syncable).to eq 2
expect(subject.count_syncable).to eq 8
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
describe '#count_registry' do
it 'counts registries for job artifacts' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
it 'counts non-expired job artifacts' do
expect(subject.count_syncable).to eq 4
end
expect(subject.count_registry).to eq 8
end
end
describe '#count_synced' do
before do
create(:geo_job_artifact_registry, artifact_id: job_artifact_synced_project.id, success: false)
create(:geo_job_artifact_registry, artifact_id: job_artifact_unsynced_project.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_broken_storage_1.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_broken_storage_2.id, success: false)
create(:geo_job_artifact_registry, artifact_id: job_artifact_expired_synced_project.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_expired_broken_storage.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_synced_project.id)
end
it 'counts registries that has been synced' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
context 'without selective sync' do
it 'counts job artifacts that have been synced ignoring expired job artifacts' do
expect(subject.count_synced).to eq 3
end
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'counts job artifacts that has been synced ignoring expired job artifacts' do
expect(subject.count_synced).to eq 1
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'counts job artifacts that has been synced ignoring expired job artifacts' do
expect(subject.count_synced).to eq 1
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
it 'counts job artifacts that has been synced ignoring expired job artifacts' do
expect(subject.count_synced).to eq 2
end
end
end
describe '#count_failed' do
before do
create(:geo_job_artifact_registry, :failed, artifact_id: job_artifact_synced_project.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_unsynced_project.id)
create(:geo_job_artifact_registry, :failed, artifact_id: job_artifact_broken_storage_1.id)
create(:geo_job_artifact_registry, :failed, artifact_id: job_artifact_expired_synced_project.id)
create(:geo_job_artifact_registry, :failed, artifact_id: job_artifact_expired_broken_storage.id)
create(:geo_job_artifact_registry, :failed, artifact_id: job_artifact_remote_synced_project.id)
create(:geo_job_artifact_registry, :failed, artifact_id: job_artifact_remote_broken_storage.id)
end
it 'counts registries that sync has failed' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
context 'without selective sync' do
it 'counts job artifacts that sync has failed ignoring expired ones' do
expect(subject.count_failed).to eq 3
end
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'counts job artifacts that sync has failed ignoring expired ones' do
expect(subject.count_failed).to eq 2
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'counts job artifacts that sync has failed ignoring expired ones' do
expect(subject.count_failed).to eq 1
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
it 'counts job artifacts that sync has failed ignoring expired ones' do
expect(subject.count_failed).to eq 2
end
end
end
describe '#count_synced_missing_on_primary' do
before do
create(:geo_job_artifact_registry, artifact_id: job_artifact_synced_project.id, success: false, missing_on_primary: false)
create(:geo_job_artifact_registry, artifact_id: job_artifact_unsynced_project.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_broken_storage_1.id, missing_on_primary: true)
create(:geo_job_artifact_registry, artifact_id: job_artifact_broken_storage_2.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_expired_synced_project.id, missing_on_primary: true)
create(:geo_job_artifact_registry, artifact_id: job_artifact_expired_broken_storage.id, missing_on_primary: true)
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_synced_project.id, missing_on_primary: true)
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_unsynced_project.id, missing_on_primary: false)
end
context 'without selective sync' do
it 'counts job artifacts that have been synced and are missing on the primary, ignoring expired ones' do
expect(subject.count_synced_missing_on_primary).to eq 2
end
end
it 'counts registries that have been synced and are missing on the primary, excluding not synced ones' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'counts job artifacts that have been synced and are missing on the primary, ignoring expired ones' do
expect(subject.count_synced_missing_on_primary).to eq 1
expect(subject.count_synced_missing_on_primary).to eq 3
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'counts job artifacts that have been synced and are missing on the primary, ignoring expired ones' do
expect(subject.count_synced_missing_on_primary).to eq 1
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
it 'counts job artifacts that have been synced and are missing on the primary, ignoring expired ones' do
expect(subject.count_synced_missing_on_primary).to eq 1
end
end
end
describe '#count_registry' do
before do
create(:geo_job_artifact_registry, artifact_id: job_artifact_synced_project.id, success: false)
create(:geo_job_artifact_registry, artifact_id: job_artifact_broken_storage_2.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_synced_project.id, missing_on_primary: true)
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_unsynced_project.id)
end
it 'counts file registries for job artifacts' do
expect(subject.count_registry).to eq 4
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'does not apply the selective sync restriction' do
expect(subject.count_registry).to eq 4
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'does not apply the selective sync restriction' do
expect(subject.count_registry).to eq 4
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
it 'counts file registries for job artifacts ignoring remote artifacts' do
expect(subject.count_registry).to eq 4
end
end
end
end
context 'finds all the things' do
describe '#find_registry_differences' do
context 'untracked IDs' do
before do
create(:geo_job_artifact_registry, artifact_id: job_artifact_synced_project.id)
create(:geo_job_artifact_registry, :failed, artifact_id: job_artifact_broken_storage_1.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_unsynced_project.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_expired_broken_storage.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_4.id)
end
it 'includes Job Artifact IDs without an entry on the tracking database' do
it 'includes job artifact IDs without an entry on the tracking database' do
untracked_ids, _ = subject.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id)
expect(untracked_ids).to match_array(
[job_artifact_unsynced_project.id, job_artifact_remote_synced_project.id,
job_artifact_broken_storage_2.id, job_artifact_expired_synced_project.id,
job_artifact_remote_broken_storage.id])
[ci_job_artifact_2.id, ci_job_artifact_5.id, ci_job_artifact_remote_1.id,
ci_job_artifact_remote_2.id, ci_job_artifact_remote_3.id])
end
it 'excludes Job Artifacts outside the ID range' do
untracked_ids, _ = subject.find_registry_differences(job_artifact_unsynced_project.id..job_artifact_broken_storage_2.id)
it 'excludes job artifacts outside the ID range' do
untracked_ids, _ = subject.find_registry_differences(ci_job_artifact_3.id..ci_job_artifact_remote_2.id)
expect(untracked_ids).to match_array(
[job_artifact_unsynced_project.id, job_artifact_broken_storage_2.id])
[ci_job_artifact_5.id, ci_job_artifact_remote_1.id,
ci_job_artifact_remote_2.id])
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'excludes Job Artifacts that are not in selectively synced projects' do
it 'excludes job artifact IDs that are not in selectively synced projects' do
untracked_ids, _ = subject.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id)
expect(untracked_ids).to match_array([job_artifact_expired_synced_project.id, job_artifact_remote_synced_project.id])
expect(untracked_ids).to match_array([ci_job_artifact_2.id])
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'excludes Job Artifacts that are not in selectively synced projects' do
it 'excludes job artifact IDs that are not in selectively synced projects' do
untracked_ids, _ = subject.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id)
expect(untracked_ids).to match_array([job_artifact_broken_storage_2.id, job_artifact_remote_broken_storage.id])
expect(untracked_ids).to match_array([ci_job_artifact_5.id])
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
it 'excludes Job Artifacts in object storage' do
it 'excludes job artifacts in object storage' do
untracked_ids, _ = subject.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id)
expect(untracked_ids).to match_array(
[job_artifact_unsynced_project.id, job_artifact_broken_storage_2.id,
job_artifact_expired_synced_project.id])
expect(untracked_ids).to match_array([ci_job_artifact_2.id, ci_job_artifact_5.id])
end
end
end
......@@ -292,13 +165,17 @@ RSpec.describe Geo::JobArtifactRegistryFinder, :geo_fdw do
let!(:orphaned) { create(:geo_job_artifact_registry, artifact_id: non_existing_record_id) }
it 'includes tracked IDs that do not exist in the model table' do
_, unused_tracked_ids = subject.find_registry_differences(non_existing_record_id..non_existing_record_id)
range = non_existing_record_id..non_existing_record_id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([non_existing_record_id])
end
it 'excludes IDs outside the ID range' do
_, unused_tracked_ids = subject.find_registry_differences(1..1000)
range = 1..1000
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
......@@ -307,68 +184,76 @@ RSpec.describe Geo::JobArtifactRegistryFinder, :geo_fdw do
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
context 'with a tracked Job Artifact' do
it 'includes tracked Job Artifact IDs that exist but are not in a selectively synced project' do
create(:geo_job_artifact_registry, artifact_id: job_artifact_synced_project.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_unsynced_project.id)
context 'with a tracked job artifact' do
let!(:registry_entry) { create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_1.id) }
let(:range) { ci_job_artifact_1.id..ci_job_artifact_4.id }
_, unused_tracked_ids = subject.find_registry_differences(job_artifact_synced_project.id..job_artifact_unsynced_project.id)
context 'excluded from selective sync' do
it 'includes tracked job artifact IDs that exist but are not in a selectively synced project' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_4.id)
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([job_artifact_unsynced_project.id])
expect(unused_tracked_ids).to match_array([ci_job_artifact_4.id])
end
end
context 'without a tracked Job Artifact' do
it 'returns empty' do
_, unused_tracked_ids = subject.find_registry_differences(job_artifact_synced_project.id..job_artifact_unsynced_project.id)
context 'included in selective sync' do
it 'excludes tracked job artifact IDs that are in selectively synced projects' do
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
context 'with a tracked Job Artifact' do
it 'includes tracked Job Artifact IDs that exist but are not in a selectively synced project' do
create(:geo_job_artifact_registry, artifact_id: job_artifact_synced_project.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_broken_storage_1.id)
context 'with a tracked job artifact' do
let!(:registry_entry) { create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id) }
let(:range) { ci_job_artifact_1.id..ci_job_artifact_5.id }
_, unused_tracked_ids = subject.find_registry_differences(job_artifact_synced_project.id..job_artifact_broken_storage_1.id)
context 'excluded from selective sync' do
it 'includes tracked job artifact IDs that exist but are not in a selectively synced shard' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_1.id)
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([job_artifact_synced_project.id])
expect(unused_tracked_ids).to match_array([ci_job_artifact_1.id])
end
end
context 'without a tracked Job Artifact' do
it 'returns empty' do
_, unused_tracked_ids = subject.find_registry_differences(job_artifact_synced_project.id..job_artifact_broken_storage_1.id)
context 'included in selective sync' do
it 'excludes tracked job artifact IDs that are in selectively synced shards' do
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
context 'with a tracked Job Artifact' do
context 'with a tracked job artifact' do
context 'in object storage' do
it 'includes tracked Job Artifact IDs that are in object storage' do
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_synced_project.id)
range = job_artifact_remote_synced_project.id..job_artifact_remote_synced_project.id
it 'includes tracked job artifact IDs that are in object storage' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_1.id)
range = ci_job_artifact_remote_1.id..ci_job_artifact_remote_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([job_artifact_remote_synced_project.id])
expect(unused_tracked_ids).to match_array([ci_job_artifact_remote_1.id])
end
end
context 'not in object storage' do
it 'excludes tracked Job Artifact IDs that are not in object storage' do
create(:geo_lfs_object_registry, lfs_object_id: job_artifact_synced_project.id)
range = job_artifact_synced_project.id..job_artifact_synced_project.id
it 'excludes tracked job artifact IDs that are not in object storage' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_1.id)
range = ci_job_artifact_1.id..ci_job_artifact_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
......@@ -381,116 +266,182 @@ RSpec.describe Geo::JobArtifactRegistryFinder, :geo_fdw do
end
describe '#find_never_synced_registries' do
let!(:registry_job_artifact_1) { create(:geo_job_artifact_registry, :never_synced, artifact_id: job_artifact_synced_project.id) }
let!(:registry_job_artifact_2) { create(:geo_job_artifact_registry, :never_synced, artifact_id: job_artifact_unsynced_project.id) }
let!(:registry_job_artifact_3) { create(:geo_job_artifact_registry, artifact_id: job_artifact_broken_storage_1.id) }
let!(:registry_job_artifact_4) { create(:geo_job_artifact_registry, :failed, artifact_id: job_artifact_broken_storage_2.id) }
let!(:registry_job_artifact_remote_1) { create(:geo_job_artifact_registry, :never_synced, artifact_id: job_artifact_remote_synced_project.id) }
it 'returns registries for job artifacts that have never been synced' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
registry_ci_job_artifact_3 = create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
registry_ci_job_artifact_remote_3 = create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
it 'returns registries for Job Artifacts that have never been synced' do
registries = subject.find_never_synced_registries(batch_size: 10)
expect(registries).to match_ids(registry_job_artifact_1, registry_job_artifact_2, registry_job_artifact_remote_1)
end
expect(registries).to match_ids(registry_ci_job_artifact_3, registry_ci_job_artifact_remote_3)
end
describe '#find_unsynced' do
before do
create(:geo_job_artifact_registry, artifact_id: job_artifact_synced_project.id, success: false)
create(:geo_job_artifact_registry, artifact_id: job_artifact_broken_storage_1.id, success: true)
create(:geo_job_artifact_registry, artifact_id: job_artifact_expired_broken_storage.id, success: true)
end
it 'excludes except_ids' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
registry_ci_job_artifact_remote_3 = create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
context 'without selective sync' do
it 'returns job artifacts without an entry on the tracking database, ignoring expired ones' do
job_artifacts = subject.find_unsynced(batch_size: 10, except_ids: [job_artifact_unsynced_project.id])
registries = subject.find_never_synced_registries(batch_size: 10, except_ids: [ci_job_artifact_3.id])
expect(job_artifacts).to match_ids(job_artifact_remote_synced_project, job_artifact_remote_unsynced_project,
job_artifact_broken_storage_2)
expect(registries).to match_ids(registry_ci_job_artifact_remote_3)
end
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
describe '#find_unsynced' do
it 'returns registries for job artifacts that have never been synced' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
registry_ci_job_artifact_3 = create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
registry_ci_job_artifact_remote_3 = create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
it 'returns job artifacts without an entry on the tracking database, ignoring expired ones' do
job_artifacts = subject.find_unsynced(batch_size: 10)
registries = subject.find_unsynced(batch_size: 10)
expect(job_artifacts).to match_ids(job_artifact_remote_synced_project)
end
expect(registries).to match_ids(registry_ci_job_artifact_3, registry_ci_job_artifact_remote_3)
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'excludes except_ids' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
registry_ci_job_artifact_remote_3 = create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
it 'returns job artifacts without an entry on the tracking database, ignoring expired ones' do
job_artifacts = subject.find_unsynced(batch_size: 10)
registries = subject.find_unsynced(batch_size: 10, except_ids: [ci_job_artifact_3.id])
expect(job_artifacts).to match_ids(job_artifact_broken_storage_2)
expect(registries).to match_ids(registry_ci_job_artifact_remote_3)
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
# describe '#find_migrated_local' do
# before do
# create(:geo_job_artifact_registry, artifact_id: job_artifact_synced_project.id)
# create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_synced_project.id)
# create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_unsynced_project.id)
# create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_broken_storage.id)
# end
it 'returns job artifacts without an entry on the tracking database, ignoring expired ones and remotes' do
job_artifacts = subject.find_unsynced(batch_size: 10)
# it 'returns job artifacts excluding ones from the exception list' do
# job_artifacts = subject.find_migrated_local(batch_size: 10, except_ids: [job_artifact_remote_synced_project.id])
expect(job_artifacts).to match_ids(job_artifact_unsynced_project, job_artifact_broken_storage_2)
end
end
end
# expect(job_artifacts).to match_ids(job_artifact_remote_unsynced_project, job_artifact_remote_broken_storage)
# end
describe '#find_migrated_local' do
before do
create(:geo_job_artifact_registry, artifact_id: job_artifact_synced_project.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_synced_project.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_unsynced_project.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_broken_storage.id)
end
# it 'includes synced job artifacts that are expired, exclude stored locally' do
# job_artifacts = subject.find_migrated_local(batch_size: 10)
it 'returns job artifacts excluding ones from the exception list' do
job_artifacts = subject.find_migrated_local(batch_size: 10, except_ids: [job_artifact_remote_synced_project.id])
# expect(job_artifacts).to match_ids(job_artifact_remote_synced_project, job_artifact_remote_unsynced_project,
# job_artifact_remote_broken_storage)
# end
expect(job_artifacts).to match_ids(job_artifact_remote_unsynced_project, job_artifact_remote_broken_storage)
end
# context 'with selective sync by namespace' do
# let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'includes synced job artifacts that are expired, exclude stored locally' do
job_artifacts = subject.find_migrated_local(batch_size: 10)
# it 'returns job artifacts remotely and successfully synced locally' do
# job_artifacts = subject.find_migrated_local(batch_size: 10)
expect(job_artifacts).to match_ids(job_artifact_remote_synced_project, job_artifact_remote_unsynced_project,
job_artifact_remote_broken_storage)
end
# expect(job_artifacts).to match_ids(job_artifact_remote_synced_project)
# end
# end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
# context 'with selective sync by shard' do
# let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'returns job artifacts remotely and successfully synced locally' do
job_artifacts = subject.find_migrated_local(batch_size: 10)
# it 'returns job artifacts remotely and successfully synced locally' do
# job_artifacts = subject.find_migrated_local(batch_size: 10)
expect(job_artifacts).to match_ids(job_artifact_remote_synced_project)
end
# expect(job_artifacts).to match_ids(job_artifact_remote_broken_storage)
# end
# end
# context 'with object storage sync disabled' do
# let(:secondary) { create(:geo_node, :local_storage_only) }
# it 'returns job artifacts excluding ones from the exception list' do
# job_artifacts = subject.find_migrated_local(batch_size: 10, except_ids: [job_artifact_remote_synced_project.id])
# expect(job_artifacts).to match_ids(job_artifact_remote_unsynced_project, job_artifact_remote_broken_storage)
# end
# end
# end
describe '#find_retryable_failed_registries' do
it 'returns registries for job artifacts that have failed to sync' do
registry_ci_job_artifact_1 = create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
registry_ci_job_artifact_4 = create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
registry_ci_job_artifact_remote_1 = create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
registries = subject.find_retryable_failed_registries(batch_size: 10)
expect(registries).to match_ids(registry_ci_job_artifact_1, registry_ci_job_artifact_4, registry_ci_job_artifact_remote_1)
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'excludes except_ids' do
registry_ci_job_artifact_1 = create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
registry_ci_job_artifact_remote_1 = create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
it 'returns job artifacts remotely and successfully synced locally' do
job_artifacts = subject.find_migrated_local(batch_size: 10)
registries = subject.find_retryable_failed_registries(batch_size: 10, except_ids: [ci_job_artifact_4.id])
expect(job_artifacts).to match_ids(job_artifact_remote_broken_storage)
expect(registries).to match_ids(registry_ci_job_artifact_1, registry_ci_job_artifact_remote_1)
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
describe '#find_retryable_synced_missing_on_primary_registries' do
it 'returns registries for job artifacts that have been synced and are missing on the primary' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
registry_ci_job_artifact_2 = create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
registry_ci_job_artifact_5 = create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
it 'returns job artifacts excluding ones from the exception list' do
job_artifacts = subject.find_migrated_local(batch_size: 10, except_ids: [job_artifact_remote_synced_project.id])
registries = subject.find_retryable_synced_missing_on_primary_registries(batch_size: 10)
expect(job_artifacts).to match_ids(job_artifact_remote_unsynced_project, job_artifact_remote_broken_storage)
end
expect(registries).to match_ids(registry_ci_job_artifact_2, registry_ci_job_artifact_5)
end
it 'excludes except_ids' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
registry_ci_job_artifact_2 = create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
registries = subject.find_retryable_synced_missing_on_primary_registries(batch_size: 10, except_ids: [ci_job_artifact_5.id])
expect(registries).to match_ids(registry_ci_job_artifact_2)
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment