Commit 67ed817f authored by Michael Kozono's avatar Michael Kozono

Merge branch '205128-geo-continue-to-make-registry-table-ssot-for-lfs-objects' into 'master'

Geo - Make registry table SSOT for LFS objects

See merge request gitlab-org/gitlab!33432
parents 81f0e592 c3e0176f
...@@ -2,10 +2,8 @@ ...@@ -2,10 +2,8 @@
module Geo module Geo
class LfsObjectRegistryFinder < FileRegistryFinder class LfsObjectRegistryFinder < FileRegistryFinder
# Counts all existing registries independent
# of any change on filters / selective sync
def count_registry def count_registry
Geo::LfsObjectRegistry.count syncable.count
end end
def count_syncable def count_syncable
...@@ -13,22 +11,19 @@ module Geo ...@@ -13,22 +11,19 @@ module Geo
end end
def count_synced def count_synced
lfs_objects.synced.count syncable.synced.count
end end
def count_failed def count_failed
lfs_objects.failed.count syncable.failed.count
end end
def count_synced_missing_on_primary def count_synced_missing_on_primary
lfs_objects.synced.missing_on_primary.count syncable.synced.missing_on_primary.count
end end
def syncable def syncable
return lfs_objects if selective_sync? Geo::LfsObjectRegistry
return LfsObject.with_files_stored_locally if local_storage_only?
LfsObject
end end
# Returns untracked IDs as well as tracked IDs that are unused. # Returns untracked IDs as well as tracked IDs that are unused.
...@@ -49,14 +44,8 @@ module Geo ...@@ -49,14 +44,8 @@ module Geo
# #
# @return [Array] the first element is an Array of untracked IDs, and the second element is an Array of tracked IDs that are unused # @return [Array] the first element is an Array of untracked IDs, and the second element is an Array of tracked IDs that are unused
def find_registry_differences(range) def find_registry_differences(range)
source_ids = source_ids = lfs_objects.id_in(range).pluck_primary_key
lfs_objects(fdw: false) tracked_ids = syncable.pluck_model_ids_in_range(range)
.id_in(range)
.pluck_primary_key
tracked_ids =
Geo::LfsObjectRegistry
.pluck_model_ids_in_range(range)
untracked_ids = source_ids - tracked_ids untracked_ids = source_ids - tracked_ids
unused_tracked_ids = tracked_ids - source_ids unused_tracked_ids = tracked_ids - source_ids
...@@ -82,47 +71,19 @@ module Geo ...@@ -82,47 +71,19 @@ module Geo
# @param [Array<Integer>] except_ids ids that will be ignored from the query # @param [Array<Integer>] except_ids ids that will be ignored from the query
# rubocop:disable CodeReuse/ActiveRecord # rubocop:disable CodeReuse/ActiveRecord
def find_never_synced_registries(batch_size:, except_ids: []) def find_never_synced_registries(batch_size:, except_ids: [])
Geo::LfsObjectRegistry syncable
.never .never
.model_id_not_in(except_ids) .model_id_not_in(except_ids)
.limit(batch_size) .limit(batch_size)
end end
# rubocop:enable CodeReuse/ActiveRecord alias_method :find_unsynced, :find_never_synced_registries
# Deprecated in favor of the process using
# #find_registry_differences and #find_never_synced_registries
#
# Find limited amount of non replicated lfs objects.
#
# You can pass a list with `except_ids:` so you can exclude items you
# already scheduled but haven't finished and aren't persisted to the database yet
#
# @param [Integer] batch_size used to limit the results returned
# @param [Array<Integer>] except_ids ids that will be ignored from the query
# rubocop:disable CodeReuse/ActiveRecord
def find_unsynced(batch_size:, except_ids: [])
lfs_objects
.missing_registry
.id_not_in(except_ids)
.limit(batch_size)
end
# rubocop:enable CodeReuse/ActiveRecord
# rubocop:disable CodeReuse/ActiveRecord
def find_migrated_local(batch_size:, except_ids: [])
all_lfs_objects
.inner_join_registry
.with_files_stored_remotely
.id_not_in(except_ids)
.limit(batch_size)
end
# rubocop:enable CodeReuse/ActiveRecord # rubocop:enable CodeReuse/ActiveRecord
# rubocop:disable CodeReuse/ActiveRecord # rubocop:disable CodeReuse/ActiveRecord
def find_retryable_failed_registries(batch_size:, except_ids: []) def find_retryable_failed_registries(batch_size:, except_ids: [])
registries_for_lfs_objects syncable
.merge(Geo::LfsObjectRegistry.failed) .failed
.merge(Geo::LfsObjectRegistry.retry_due) .retry_due
.model_id_not_in(except_ids) .model_id_not_in(except_ids)
.limit(batch_size) .limit(batch_size)
end end
...@@ -130,7 +91,7 @@ module Geo ...@@ -130,7 +91,7 @@ module Geo
# rubocop:disable CodeReuse/ActiveRecord # rubocop:disable CodeReuse/ActiveRecord
def find_retryable_synced_missing_on_primary_registries(batch_size:, except_ids: []) def find_retryable_synced_missing_on_primary_registries(batch_size:, except_ids: [])
registries_for_lfs_objects syncable
.synced .synced
.missing_on_primary .missing_on_primary
.retry_due .retry_due
...@@ -141,16 +102,12 @@ module Geo ...@@ -141,16 +102,12 @@ module Geo
private private
def lfs_objects(fdw: true) def lfs_objects
local_storage_only?(fdw: fdw) ? all_lfs_objects(fdw: fdw).with_files_stored_locally : all_lfs_objects(fdw: fdw) local_storage_only?(fdw: false) ? all_lfs_objects.with_files_stored_locally : all_lfs_objects
end
def all_lfs_objects(fdw: true)
current_node(fdw: fdw).lfs_objects
end end
def registries_for_lfs_objects def all_lfs_objects
current_node.lfs_object_registries current_node(fdw: false).lfs_objects
end end
end end
end end
...@@ -37,23 +37,12 @@ module Geo ...@@ -37,23 +37,12 @@ module Geo
end end
def find_migrated_local_objects(batch_size:) def find_migrated_local_objects(batch_size:)
lfs_object_ids = find_migrated_local_lfs_objects_ids(batch_size: batch_size)
attachment_ids = find_migrated_local_attachments_ids(batch_size: batch_size) attachment_ids = find_migrated_local_attachments_ids(batch_size: batch_size)
job_artifact_ids = find_migrated_local_job_artifacts_ids(batch_size: batch_size) job_artifact_ids = find_migrated_local_job_artifacts_ids(batch_size: batch_size)
take_batch(lfs_object_ids, attachment_ids, job_artifact_ids) take_batch(attachment_ids, job_artifact_ids)
end end
# rubocop: disable CodeReuse/ActiveRecord
def find_migrated_local_lfs_objects_ids(batch_size:)
return [] unless lfs_objects_object_store_enabled?
lfs_objects_finder.find_migrated_local(batch_size: batch_size, except_ids: scheduled_file_ids(:lfs))
.pluck(Geo::Fdw::LfsObject.arel_table[:id])
.map { |id| ['lfs', id] }
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord # rubocop: disable CodeReuse/ActiveRecord
def find_migrated_local_attachments_ids(batch_size:) def find_migrated_local_attachments_ids(batch_size:)
return [] unless attachments_object_store_enabled? return [] unless attachments_object_store_enabled?
...@@ -85,17 +74,12 @@ module Geo ...@@ -85,17 +74,12 @@ module Geo
FileUploader.object_store_enabled? FileUploader.object_store_enabled?
end end
def lfs_objects_object_store_enabled?
LfsObjectUploader.object_store_enabled?
end
def job_artifacts_object_store_enabled? def job_artifacts_object_store_enabled?
JobArtifactUploader.object_store_enabled? JobArtifactUploader.object_store_enabled?
end end
def object_store_enabled? def object_store_enabled?
attachments_object_store_enabled? || attachments_object_store_enabled? ||
lfs_objects_object_store_enabled? ||
job_artifacts_object_store_enabled? job_artifacts_object_store_enabled?
end end
...@@ -107,10 +91,6 @@ module Geo ...@@ -107,10 +91,6 @@ module Geo
@attachments_finder ||= AttachmentRegistryFinder.new(current_node_id: current_node.id) @attachments_finder ||= AttachmentRegistryFinder.new(current_node_id: current_node.id)
end end
def lfs_objects_finder
@lfs_objects_finder ||= LfsObjectRegistryFinder.new(current_node_id: current_node.id)
end
def job_artifacts_finder def job_artifacts_finder
@job_artifacts_finder ||= JobArtifactRegistryFinder.new(current_node_id: current_node.id) @job_artifacts_finder ||= JobArtifactRegistryFinder.new(current_node_id: current_node.id)
end end
......
---
title: Geo - Make registry table SSOT for LFS objects
merge_request: 33432
author:
type: performance
...@@ -244,9 +244,9 @@ RSpec.describe GeoNodeStatus, :geo, :geo_fdw do ...@@ -244,9 +244,9 @@ RSpec.describe GeoNodeStatus, :geo, :geo_fdw do
create(:geo_upload_registry, :failed) create(:geo_upload_registry, :failed)
create(:geo_upload_registry, :avatar) create(:geo_upload_registry, :avatar)
create(:geo_upload_registry, file_type: :attachment) create(:geo_upload_registry, file_type: :attachment)
create(:geo_lfs_object_registry, :with_lfs_object, :failed) create(:geo_lfs_object_registry, :failed)
create(:geo_lfs_object_registry, :with_lfs_object) create(:geo_lfs_object_registry)
expect(subject.lfs_objects_synced_count).to eq(1) expect(subject.lfs_objects_synced_count).to eq(1)
end end
...@@ -258,9 +258,9 @@ RSpec.describe GeoNodeStatus, :geo, :geo_fdw do ...@@ -258,9 +258,9 @@ RSpec.describe GeoNodeStatus, :geo, :geo_fdw do
create(:geo_upload_registry, :failed) create(:geo_upload_registry, :failed)
create(:geo_upload_registry, :avatar, missing_on_primary: true) create(:geo_upload_registry, :avatar, missing_on_primary: true)
create(:geo_upload_registry, file_type: :attachment, missing_on_primary: true) create(:geo_upload_registry, file_type: :attachment, missing_on_primary: true)
create(:geo_lfs_object_registry, :with_lfs_object, :failed) create(:geo_lfs_object_registry, :failed)
create(:geo_lfs_object_registry, :with_lfs_object, missing_on_primary: true) create(:geo_lfs_object_registry, missing_on_primary: true)
expect(subject.lfs_objects_synced_missing_on_primary_count).to eq(1) expect(subject.lfs_objects_synced_missing_on_primary_count).to eq(1)
end end
...@@ -272,40 +272,27 @@ RSpec.describe GeoNodeStatus, :geo, :geo_fdw do ...@@ -272,40 +272,27 @@ RSpec.describe GeoNodeStatus, :geo, :geo_fdw do
create(:geo_upload_registry, :failed) create(:geo_upload_registry, :failed)
create(:geo_upload_registry, :avatar, :failed) create(:geo_upload_registry, :avatar, :failed)
create(:geo_upload_registry, :failed, file_type: :attachment) create(:geo_upload_registry, :failed, file_type: :attachment)
create(:geo_lfs_object_registry, :with_lfs_object) create(:geo_lfs_object_registry)
create(:geo_lfs_object_registry, :with_lfs_object, :failed) create(:geo_lfs_object_registry, :failed)
expect(subject.lfs_objects_failed_count).to eq(1) expect(subject.lfs_objects_failed_count).to eq(1)
end end
end end
describe '#lfs_objects_synced_in_percentage' do describe '#lfs_objects_synced_in_percentage' do
let(:lfs_object_project) { create(:lfs_objects_project, project: project_1) } it 'returns 0 when there are no registries' do
before do
allow(ProjectCacheWorker).to receive(:perform_async).and_return(true)
create(:lfs_objects_project, project: project_1)
create_list(:lfs_objects_project, 2, project: project_3)
end
it 'returns 0 when no objects are available' do
expect(subject.lfs_objects_synced_in_percentage).to eq(0) expect(subject.lfs_objects_synced_in_percentage).to eq(0)
end end
it 'returns the right percentage with no group restrictions' do it 'returns the right percentage' do
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_project.lfs_object_id) create(:geo_lfs_object_registry)
create(:geo_lfs_object_registry, :failed)
create(:geo_lfs_object_registry, :never_synced)
create(:geo_lfs_object_registry, :never_synced)
expect(subject.lfs_objects_synced_in_percentage).to be_within(0.0001).of(25) expect(subject.lfs_objects_synced_in_percentage).to be_within(0.0001).of(25)
end end
it 'returns the right percentage with group restrictions' do
secondary.update!(selective_sync_type: 'namespaces', namespaces: [group])
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_project.lfs_object_id)
expect(subject.lfs_objects_synced_in_percentage).to be_within(0.0001).of(50)
end
end end
describe '#job_artifacts_synced_count' do describe '#job_artifacts_synced_count' do
......
...@@ -35,62 +35,6 @@ RSpec.describe Geo::MigratedLocalFilesCleanUpWorker, :geo, :geo_fdw, :use_sql_qu ...@@ -35,62 +35,6 @@ RSpec.describe Geo::MigratedLocalFilesCleanUpWorker, :geo, :geo_fdw, :use_sql_qu
subject.perform subject.perform
end end
context 'with LFS objects' do
let(:lfs_object_local) { create(:lfs_object) }
let(:lfs_object_remote_1) { create(:lfs_object, :object_storage) }
let(:lfs_object_remote_2) { create(:lfs_object, :object_storage) }
before do
stub_lfs_object_storage
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_local.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_remote_1.id)
end
it 'schedules worker for file stored remotely and synced locally' do
expect(Geo::FileRegistryRemovalWorker).to receive(:perform_async).with('lfs', lfs_object_remote_1.id)
expect(Geo::FileRegistryRemovalWorker).not_to receive(:perform_async).with(anything, lfs_object_local.id)
subject.perform
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, :local_storage_only, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
before do
create(:lfs_objects_project, project: synced_project, lfs_object: lfs_object_local)
create(:lfs_objects_project, project: synced_project, lfs_object: lfs_object_remote_1)
create(:lfs_objects_project, project: unsynced_project, lfs_object: lfs_object_remote_2)
end
it 'schedules worker for file stored remotely and synced locally' do
expect(Geo::FileRegistryRemovalWorker).to receive(:perform_async).with('lfs', lfs_object_remote_1.id)
expect(Geo::FileRegistryRemovalWorker).not_to receive(:perform_async).with(anything, lfs_object_remote_2.id)
expect(Geo::FileRegistryRemovalWorker).not_to receive(:perform_async).with(anything, lfs_object_local.id)
subject.perform
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, :local_storage_only, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
before do
create(:lfs_objects_project, project: project_broken_storage, lfs_object: lfs_object_local)
create(:lfs_objects_project, project: project_broken_storage, lfs_object: lfs_object_remote_1)
create(:lfs_objects_project, project: synced_project, lfs_object: lfs_object_remote_2)
end
it 'schedules worker for file stored remotely and synced locally' do
expect(Geo::FileRegistryRemovalWorker).to receive(:perform_async).with('lfs', lfs_object_remote_1.id)
expect(Geo::FileRegistryRemovalWorker).not_to receive(:perform_async).with(anything, lfs_object_remote_2.id)
expect(Geo::FileRegistryRemovalWorker).not_to receive(:perform_async).with(anything, lfs_object_local.id)
subject.perform
end
end
end
context 'with attachments' do context 'with attachments' do
let(:avatar_upload) { create(:upload) } let(:avatar_upload) { create(:upload) }
let(:personal_snippet_upload) { create(:upload, :personal_snippet_upload) } let(:personal_snippet_upload) { create(:upload, :personal_snippet_upload) }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment