Commit d236471c authored by Nick Thomas's avatar Nick Thomas

Merge branch 'mk/geo/exclude-expired-artifacts' into 'master'

Geo: Exclude expired job artifacts from syncing and counts

Closes #5357

See merge request gitlab-org/gitlab-ee!5380
parents 0be466e5 75638b38
......@@ -8,12 +8,12 @@ module Geo
end
end
def local_attachments
attachments.with_files_stored_locally
def syncable_attachments
attachments.geo_syncable
end
def count_local_attachments
local_attachments.count
def count_syncable_attachments
syncable_attachments.count
end
def count_synced_attachments
......@@ -152,16 +152,16 @@ module Geo
#
def fdw_find_synced_attachments
fdw_find_local_attachments.merge(Geo::FileRegistry.synced)
fdw_find_syncable_attachments.merge(Geo::FileRegistry.synced)
end
def fdw_find_failed_attachments
fdw_find_local_attachments.merge(Geo::FileRegistry.failed)
fdw_find_syncable_attachments.merge(Geo::FileRegistry.failed)
end
def fdw_find_local_attachments
def fdw_find_syncable_attachments
fdw_attachments.joins("INNER JOIN file_registry ON file_registry.file_id = #{fdw_attachments_table}.id")
.with_files_stored_locally
.geo_syncable
.merge(Geo::FileRegistry.attachments)
end
......@@ -171,7 +171,7 @@ module Geo
fdw_attachments.joins("LEFT OUTER JOIN file_registry
ON file_registry.file_id = #{fdw_attachments_table}.id
AND file_registry.file_type IN (#{upload_types})")
.with_files_stored_locally
.geo_syncable
.where(file_registry: { id: nil })
.where.not(id: except_file_ids)
end
......@@ -205,7 +205,7 @@ module Geo
def legacy_find_synced_attachments
legacy_inner_join_registry_ids(
local_attachments,
syncable_attachments,
Geo::FileRegistry.attachments.synced.pluck(:file_id),
Upload
)
......@@ -213,7 +213,7 @@ module Geo
def legacy_find_failed_attachments
legacy_inner_join_registry_ids(
local_attachments,
syncable_attachments,
find_failed_attachments_registries.pluck(:file_id),
Upload
)
......@@ -223,7 +223,7 @@ module Geo
registry_file_ids = legacy_pluck_registry_file_ids(file_types: Geo::FileService::DEFAULT_OBJECT_TYPES) | except_file_ids
legacy_left_outer_join_registry_ids(
local_attachments,
syncable_attachments,
registry_file_ids,
Upload
)
......@@ -241,7 +241,7 @@ module Geo
def legacy_find_synced_missing_on_primary_attachments
legacy_inner_join_registry_ids(
local_attachments,
syncable_attachments,
Geo::FileRegistry.attachments.synced.missing_on_primary.pluck(:file_id),
Upload
)
......
module Geo
class JobArtifactRegistryFinder < RegistryFinder
def count_local_job_artifacts
local_job_artifacts.count
def count_syncable_job_artifacts
syncable_job_artifacts.count
end
def count_synced_job_artifacts
......@@ -72,8 +72,8 @@ module Geo
end
end
def local_job_artifacts
job_artifacts.with_files_stored_locally
def syncable_job_artifacts
job_artifacts.geo_syncable
end
def find_retryable_failed_job_artifacts_registries(batch_size:, except_artifact_ids: [])
......@@ -134,13 +134,13 @@ module Geo
def fdw_find_job_artifacts
fdw_job_artifacts.joins("INNER JOIN job_artifact_registry ON job_artifact_registry.artifact_id = #{fdw_job_artifacts_table}.id")
.with_files_stored_locally
.geo_syncable
end
def fdw_find_unsynced_job_artifacts(except_artifact_ids:)
fdw_job_artifacts.joins("LEFT OUTER JOIN job_artifact_registry
ON job_artifact_registry.artifact_id = #{fdw_job_artifacts_table}.id")
.with_files_stored_locally
.geo_syncable
.where(job_artifact_registry: { artifact_id: nil })
.where.not(id: except_artifact_ids)
end
......@@ -170,7 +170,7 @@ module Geo
def legacy_find_synced_job_artifacts
legacy_inner_join_registry_ids(
local_job_artifacts,
syncable_job_artifacts,
find_synced_job_artifacts_registries.pluck(:artifact_id),
Ci::JobArtifact
)
......@@ -178,7 +178,7 @@ module Geo
def legacy_find_failed_job_artifacts
legacy_inner_join_registry_ids(
local_job_artifacts,
syncable_job_artifacts,
find_failed_job_artifacts_registries.pluck(:artifact_id),
Ci::JobArtifact
)
......@@ -188,7 +188,7 @@ module Geo
registry_artifact_ids = legacy_pluck_artifact_ids(include_registry_ids: except_artifact_ids)
legacy_left_outer_join_registry_ids(
local_job_artifacts,
syncable_job_artifacts,
registry_artifact_ids,
Ci::JobArtifact
)
......@@ -211,7 +211,7 @@ module Geo
def legacy_find_synced_missing_on_primary_job_artifacts
legacy_inner_join_registry_ids(
local_job_artifacts,
syncable_job_artifacts,
find_synced_missing_on_primary_job_artifacts_registries.pluck(:artifact_id),
Ci::JobArtifact
)
......
module Geo
class LfsObjectRegistryFinder < FileRegistryFinder
def count_local_lfs_objects
local_lfs_objects.count
def count_syncable_lfs_objects
syncable_lfs_objects.count
end
def count_synced_lfs_objects
......@@ -72,8 +72,8 @@ module Geo
end
end
def local_lfs_objects
lfs_objects.with_files_stored_locally
def syncable_lfs_objects
lfs_objects.geo_syncable
end
def find_retryable_failed_lfs_objects_registries(batch_size:, except_file_ids: [])
......@@ -122,7 +122,7 @@ module Geo
def fdw_find_lfs_objects
fdw_lfs_objects.joins("INNER JOIN file_registry ON file_registry.file_id = #{fdw_lfs_objects_table}.id")
.with_files_stored_locally
.geo_syncable
.merge(Geo::FileRegistry.lfs_objects)
end
......@@ -130,7 +130,7 @@ module Geo
fdw_lfs_objects.joins("LEFT OUTER JOIN file_registry
ON file_registry.file_id = #{fdw_lfs_objects_table}.id
AND file_registry.file_type = 'lfs'")
.with_files_stored_locally
.geo_syncable
.where(file_registry: { id: nil })
.where.not(id: except_file_ids)
end
......@@ -172,7 +172,7 @@ module Geo
def legacy_find_synced_lfs_objects
legacy_inner_join_registry_ids(
local_lfs_objects,
syncable_lfs_objects,
Geo::FileRegistry.lfs_objects.synced.pluck(:file_id),
LfsObject
)
......@@ -180,7 +180,7 @@ module Geo
def legacy_find_failed_lfs_objects
legacy_inner_join_registry_ids(
local_lfs_objects,
syncable_lfs_objects,
find_failed_lfs_objects_registries.pluck(:file_id),
LfsObject
)
......@@ -190,7 +190,7 @@ module Geo
registry_file_ids = legacy_pluck_registry_file_ids(file_types: :lfs) | except_file_ids
legacy_left_outer_join_registry_ids(
local_lfs_objects,
syncable_lfs_objects,
registry_file_ids,
LfsObject
)
......@@ -208,7 +208,7 @@ module Geo
def legacy_find_synced_missing_on_primary_lfs_objects
legacy_inner_join_registry_ids(
local_lfs_objects,
syncable_lfs_objects,
Geo::FileRegistry.lfs_objects.synced.missing_on_primary.pluck(:file_id),
LfsObject
)
......
module ObjectStorable
extend ActiveSupport::Concern
included do
scope :with_files_stored_locally, -> { where(self::STORE_COLUMN => [nil, ObjectStorage::Store::LOCAL]) }
scope :with_files_stored_remotely, -> { where(self::STORE_COLUMN => ObjectStorage::Store::REMOTE) }
end
end
......@@ -8,6 +8,9 @@ module EE
prepended do
after_destroy :log_geo_event
scope :not_expired, -> { where('expire_at IS NULL OR expire_at > ?', Time.current) }
scope :geo_syncable, -> { with_files_stored_locally.not_expired }
end
private
......
......@@ -8,6 +8,8 @@ module EE
prepended do
after_destroy :log_geo_event
scope :geo_syncable, -> { with_files_stored_locally }
end
private
......
......@@ -8,6 +8,8 @@ module EE
prepended do
after_destroy :log_geo_event
scope :geo_syncable, -> { with_files_stored_locally }
end
private
......
......@@ -2,10 +2,14 @@ module Geo
module Fdw
module Ci
class JobArtifact < ::Geo::BaseFdw
include ObjectStorable
STORE_COLUMN = :file_store
self.table_name = Gitlab::Geo::Fdw.table('ci_job_artifacts')
scope :with_files_stored_locally, -> { where(file_store: [nil, JobArtifactUploader::Store::LOCAL]) }
scope :with_files_stored_remotely, -> { where(file_store: JobArtifactUploader::Store::REMOTE) }
scope :not_expired, -> { where('expire_at IS NULL OR expire_at > ?', Time.current) }
scope :geo_syncable, -> { with_files_stored_locally.not_expired }
end
end
end
......
module Geo
module Fdw
class LfsObject < ::Geo::BaseFdw
include ObjectStorable
STORE_COLUMN = :file_store
self.table_name = Gitlab::Geo::Fdw.table('lfs_objects')
scope :with_files_stored_locally, -> { where(file_store: [nil, LfsObjectUploader::Store::LOCAL]) }
scope :with_files_stored_remotely, -> { where(file_store: LfsObjectUploader::Store::REMOTE) }
scope :geo_syncable, -> { with_files_stored_locally }
end
end
end
module Geo
module Fdw
class Upload < ::Geo::BaseFdw
include ObjectStorable
STORE_COLUMN = :store
self.table_name = Gitlab::Geo::Fdw.table('uploads')
scope :with_files_stored_locally, -> { where(store: [nil, ObjectStorage::Store::LOCAL]) }
scope :with_files_stored_remotely, -> { where(store: ObjectStorage::Store::REMOTE) }
scope :geo_syncable, -> { with_files_stored_locally }
end
end
end
......@@ -39,19 +39,19 @@ class GeoNodeStatus < ActiveRecord::Base
wikis_verified_count: 'Number of wikis verified on secondary',
wikis_verification_failed_count: 'Number of wikis failed to verify on secondary',
wikis_checksum_mismatch_count: 'Number of wikis that checksum mismatch on secondary',
lfs_objects_count: 'Total number of local LFS objects available on primary',
lfs_objects_synced_count: 'Number of local LFS objects synced on secondary',
lfs_objects_failed_count: 'Number of local LFS objects failed to sync on secondary',
lfs_objects_count: 'Total number of syncable LFS objects available on primary',
lfs_objects_synced_count: 'Number of syncable LFS objects synced on secondary',
lfs_objects_failed_count: 'Number of syncable LFS objects failed to sync on secondary',
lfs_objects_registry_count: 'Number of LFS objects in the registry',
lfs_objects_synced_missing_on_primary_count: 'Number of LFS objects marked as synced due to the file missing on the primary',
job_artifacts_count: 'Total number of local job artifacts available on primary',
job_artifacts_synced_count: 'Number of local job artifacts synced on secondary',
job_artifacts_failed_count: 'Number of local job artifacts failed to sync on secondary',
job_artifacts_count: 'Total number of syncable job artifacts available on primary',
job_artifacts_synced_count: 'Number of syncable job artifacts synced on secondary',
job_artifacts_failed_count: 'Number of syncable job artifacts failed to sync on secondary',
job_artifacts_registry_count: 'Number of job artifacts in the registry',
job_artifacts_synced_missing_on_primary_count: 'Number of job artifacts marked as synced due to the file missing on the primary',
attachments_count: 'Total number of local file attachments available on primary',
attachments_synced_count: 'Number of local file attachments synced on secondary',
attachments_failed_count: 'Number of local file attachments failed to sync on secondary',
attachments_count: 'Total number of syncable file attachments available on primary',
attachments_synced_count: 'Number of syncable file attachments synced on secondary',
attachments_failed_count: 'Number of syncable file attachments failed to sync on secondary',
attachments_registry_count: 'Number of attachments in the registry',
attachments_synced_missing_on_primary_count: 'Number of attachments marked as synced due to the file missing on the primary',
replication_slots_count: 'Total number of replication slots on the primary',
......@@ -150,9 +150,9 @@ class GeoNodeStatus < ActiveRecord::Base
self.last_event_date = latest_event&.created_at
self.repositories_count = projects_finder.count_repositories
self.wikis_count = projects_finder.count_wikis
self.lfs_objects_count = lfs_objects_finder.count_local_lfs_objects
self.job_artifacts_count = job_artifacts_finder.count_local_job_artifacts
self.attachments_count = attachments_finder.count_local_attachments
self.lfs_objects_count = lfs_objects_finder.count_syncable_lfs_objects
self.job_artifacts_count = job_artifacts_finder.count_syncable_job_artifacts
self.attachments_count = attachments_finder.count_syncable_attachments
self.last_successful_status_check_at = Time.now
self.storage_shards = StorageShard.all
......
---
title: 'Geo: Exclude expired job artifacts from syncing and counts'
merge_request: 5380
author:
type: fixed
......@@ -30,7 +30,7 @@ describe Geo::AttachmentRegistryFinder, :geo do
end
shared_examples 'counts all the things' do
describe '#count_local_attachments' do
describe '#count_syncable_attachments' do
before do
upload_1
upload_2
......@@ -39,13 +39,13 @@ describe Geo::AttachmentRegistryFinder, :geo do
end
it 'counts attachments' do
expect(subject.count_local_attachments).to eq 4
expect(subject.count_syncable_attachments).to eq 4
end
it 'ignores remote attachments' do
upload_1.update!(store: ObjectStorage::Store::REMOTE)
expect(subject.count_local_attachments).to eq 3
expect(subject.count_syncable_attachments).to eq 3
end
context 'with selective sync' do
......@@ -54,13 +54,13 @@ describe Geo::AttachmentRegistryFinder, :geo do
end
it 'counts attachments' do
expect(subject.count_local_attachments).to eq 2
expect(subject.count_syncable_attachments).to eq 2
end
it 'ignores remote attachments' do
upload_1.update!(store: ObjectStorage::Store::REMOTE)
expect(subject.count_local_attachments).to eq 1
expect(subject.count_syncable_attachments).to eq 1
end
end
end
......@@ -286,7 +286,7 @@ describe Geo::AttachmentRegistryFinder, :geo do
expect(synced_attachments).to match_ids(upload_1, upload_2, upload_6, upload_7)
end
it 'only finds local attachments' do
it 'only finds syncable attachments' do
create(:geo_file_registry, :avatar, file_id: upload_1.id)
create(:geo_file_registry, :avatar, file_id: upload_2.id)
upload_1.update!(store: ObjectStorage::Store::REMOTE)
......
......@@ -23,7 +23,7 @@ describe Geo::JobArtifactRegistryFinder, :geo do
end
shared_examples 'counts all the things' do
describe '#count_local_job_artifacts' do
describe '#count_syncable_job_artifacts' do
before do
job_artifact_1
job_artifact_2
......@@ -32,13 +32,19 @@ describe Geo::JobArtifactRegistryFinder, :geo do
end
it 'counts job artifacts' do
expect(subject.count_local_job_artifacts).to eq 4
expect(subject.count_syncable_job_artifacts).to eq 4
end
it 'ignores remote job artifacts' do
job_artifact_1.update_column(:file_store, ObjectStorage::Store::REMOTE)
expect(subject.count_local_job_artifacts).to eq 3
expect(subject.count_syncable_job_artifacts).to eq 3
end
it 'ignores expired job artifacts' do
job_artifact_1.update_column(:expire_at, Date.yesterday)
expect(subject.count_syncable_job_artifacts).to eq 3
end
context 'with selective sync' do
......@@ -47,13 +53,19 @@ describe Geo::JobArtifactRegistryFinder, :geo do
end
it 'counts job artifacts' do
expect(subject.count_local_job_artifacts).to eq 2
expect(subject.count_syncable_job_artifacts).to eq 2
end
it 'ignores remote job artifacts' do
job_artifact_1.update_column(:file_store, ObjectStorage::Store::REMOTE)
expect(subject.count_local_job_artifacts).to eq 1
expect(subject.count_syncable_job_artifacts).to eq 1
end
it 'ignores expired job artifacts' do
job_artifact_1.update_column(:expire_at, Date.yesterday)
expect(subject.count_syncable_job_artifacts).to eq 1
end
end
end
......@@ -91,6 +103,15 @@ describe Geo::JobArtifactRegistryFinder, :geo do
expect(subject.count_synced_job_artifacts).to eq 2
end
it 'ignores expired job artifacts' do
job_artifact_1.update_column(:expire_at, Date.yesterday)
create(:geo_job_artifact_registry, artifact_id: job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_2.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_3.id)
expect(subject.count_synced_job_artifacts).to eq 2
end
context 'with selective sync' do
before do
secondary.update!(selective_sync_type: 'namespaces', namespaces: [synced_group])
......@@ -117,6 +138,15 @@ describe Geo::JobArtifactRegistryFinder, :geo do
expect(subject.count_synced_job_artifacts).to eq 1
end
it 'ignores expired job artifacts' do
job_artifact_1.update_column(:expire_at, Date.yesterday)
create(:geo_job_artifact_registry, artifact_id: job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_2.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_3.id)
expect(subject.count_synced_job_artifacts).to eq 1
end
end
end
......@@ -153,6 +183,15 @@ describe Geo::JobArtifactRegistryFinder, :geo do
expect(subject.count_failed_job_artifacts).to eq 2
end
it 'ignores expired job artifacts' do
job_artifact_1.update_column(:expire_at, Date.yesterday)
create(:geo_job_artifact_registry, artifact_id: job_artifact_1.id, success: false)
create(:geo_job_artifact_registry, artifact_id: job_artifact_2.id, success: false)
create(:geo_job_artifact_registry, artifact_id: job_artifact_3.id, success: false)
expect(subject.count_failed_job_artifacts).to eq 2
end
context 'with selective sync' do
before do
secondary.update!(selective_sync_type: 'namespaces', namespaces: [synced_group])
......@@ -178,10 +217,19 @@ describe Geo::JobArtifactRegistryFinder, :geo do
end
it 'ignores remote job artifacts' do
job_artifact_1.update_column(:file_store, ObjectStorage::Store::REMOTE)
create(:geo_job_artifact_registry, artifact_id: job_artifact_1.id, success: false)
create(:geo_job_artifact_registry, artifact_id: job_artifact_2.id, success: false)
create(:geo_job_artifact_registry, artifact_id: job_artifact_3.id, success: false)
expect(subject.count_failed_job_artifacts).to eq 1
end
it 'ignores expired job artifacts' do
job_artifact_1.update_column(:expire_at, Date.yesterday)
create(:geo_job_artifact_registry, artifact_id: job_artifact_1.id, success: false)
create(:geo_job_artifact_registry, artifact_id: job_artifact_2.id, success: false)
create(:geo_job_artifact_registry, artifact_id: job_artifact_3.id, success: false)
job_artifact_1.update_column(:file_store, ObjectStorage::Store::REMOTE)
expect(subject.count_failed_job_artifacts).to eq 1
end
......@@ -229,6 +277,13 @@ describe Geo::JobArtifactRegistryFinder, :geo do
expect(subject.count_synced_missing_on_primary_job_artifacts).to eq 0
end
it 'ignores expired job artifacts' do
job_artifact_1.update_column(:expire_at, Date.yesterday)
create(:geo_job_artifact_registry, artifact_id: job_artifact_1.id, missing_on_primary: true)
expect(subject.count_synced_missing_on_primary_job_artifacts).to eq 0
end
context 'with selective sync' do
before do
secondary.update!(selective_sync_type: 'namespaces', namespaces: [synced_group])
......@@ -253,6 +308,13 @@ describe Geo::JobArtifactRegistryFinder, :geo do
expect(subject.count_synced_missing_on_primary_job_artifacts).to eq 0
end
it 'ignores expired job artifacts' do
job_artifact_1.update_column(:expire_at, Date.yesterday)
create(:geo_job_artifact_registry, artifact_id: job_artifact_1.id, missing_on_primary: true)
expect(subject.count_synced_missing_on_primary_job_artifacts).to eq 0
end
end
end
end
......@@ -282,6 +344,22 @@ describe Geo::JobArtifactRegistryFinder, :geo do
expect(job_artifacts).to match_ids(job_artifact_4)
end
it 'ignores remote job artifacts' do
job_artifact_2.update_column(:file_store, ObjectStorage::Store::REMOTE)
job_artifacts = subject.find_unsynced_job_artifacts(batch_size: 10)
expect(job_artifacts).to match_ids(job_artifact_4)
end
it 'ignores expired job artifacts' do
job_artifact_2.update_column(:expire_at, Date.yesterday)
job_artifacts = subject.find_unsynced_job_artifacts(batch_size: 10)
expect(job_artifacts).to match_ids(job_artifact_4)
end
end
describe '#find_migrated_local_job_artifacts' do
......@@ -324,6 +402,15 @@ describe Geo::JobArtifactRegistryFinder, :geo do
expect(job_artifacts).to match_ids(job_artifact_remote_2)
end
it 'includes synced job artifacts that are expired' do
job_artifact = create(:ci_job_artifact, :remote_store, project: synced_project, expire_at: Date.yesterday)
create(:geo_job_artifact_registry, artifact_id: job_artifact.id)
job_artifacts = subject.find_migrated_local_job_artifacts(batch_size: 10)
expect(job_artifacts).to match_ids(job_artifact)
end
end
end
......
......@@ -23,7 +23,7 @@ describe Geo::LfsObjectRegistryFinder, :geo do
end
shared_examples 'counts all the things' do
describe '#count_local_lfs_objects' do
describe '#count_syncable_lfs_objects' do
before do
lfs_object_1
lfs_object_2
......@@ -32,13 +32,13 @@ describe Geo::LfsObjectRegistryFinder, :geo do
end
it 'counts LFS objects' do
expect(subject.count_local_lfs_objects).to eq 4
expect(subject.count_syncable_lfs_objects).to eq 4
end
it 'ignores remote LFS objects' do
lfs_object_1.update_column(:file_store, ObjectStorage::Store::REMOTE)
expect(subject.count_local_lfs_objects).to eq 3
expect(subject.count_syncable_lfs_objects).to eq 3
end
context 'with selective sync' do
......@@ -53,13 +53,13 @@ describe Geo::LfsObjectRegistryFinder, :geo do
end
it 'counts LFS objects' do
expect(subject.count_local_lfs_objects).to eq 2
expect(subject.count_syncable_lfs_objects).to eq 2
end
it 'ignores remote LFS objects' do
lfs_object_1.update_column(:file_store, ObjectStorage::Store::REMOTE)
expect(subject.count_local_lfs_objects).to eq 1
expect(subject.count_syncable_lfs_objects).to eq 1
end
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment