Commit fe46d403 authored by Nick Thomas's avatar Nick Thomas

Geo: Don't retry repositories or files until everything has been backfilled

parent c2809c54
......@@ -13,23 +13,27 @@ module Geo
end
def load_pending_resources
unsynced = find_unsynced_objects
failed = find_failed_objects
interleave(unsynced, failed)
resources = find_unsynced_objects(batch_size: db_retrieve_batch_size)
remaining_capacity = db_retrieve_batch_size - resources.count
if remaining_capacity.zero?
resources
else
resources + find_failed_objects(batch_size: remaining_capacity)
end
end
def find_unsynced_objects
lfs_object_ids = find_lfs_object_ids
upload_objects_ids = find_upload_object_ids
def find_unsynced_objects(batch_size:)
lfs_object_ids = find_lfs_object_ids(batch_size: batch_size)
upload_objects_ids = find_upload_object_ids(batch_size: batch_size)
interleave(lfs_object_ids, upload_objects_ids)
end
def find_failed_objects
def find_failed_objects(batch_size:)
Geo::FileRegistry
.failed
.limit(db_retrieve_batch_size)
.limit(batch_size)
.pluck(:file_id, :file_type)
end
......@@ -37,7 +41,7 @@ module Geo
current_node.restricted_project_ids
end
def find_lfs_object_ids
def find_lfs_object_ids(batch_size:)
# Selective project replication adds a wrinkle to FDW queries, so
# we fallback to the legacy version for now.
relation =
......@@ -48,12 +52,12 @@ module Geo
end
relation
.limit(db_retrieve_batch_size)
.limit(batch_size)
.pluck(:id)
.map { |id| [id, :lfs] }
end
def find_upload_object_ids
def find_upload_object_ids(batch_size:)
# Selective project replication adds a wrinkle to FDW queries, so
# we fallback to the legacy version for now.
relation =
......@@ -64,7 +68,7 @@ module Geo
end
relation
.limit(db_retrieve_batch_size)
.limit(batch_size)
.pluck(:id, :uploader)
.map { |id, uploader| [id, uploader.sub(/Uploader\z/, '').underscore] }
end
......@@ -84,7 +88,6 @@ module Geo
Geo::Fdw::Upload.joins("LEFT OUTER JOIN file_registry ON file_registry.file_id = #{fdw_table}.id AND file_registry.file_type IN (#{obj_types})")
.where('file_registry.file_id IS NULL')
.order(created_at: :desc)
end
def legacy_find_upload_object_ids
......
......@@ -13,24 +13,28 @@ module Geo
end
def load_pending_resources
project_ids_not_synced = find_project_ids_not_synced
project_ids_updated_recently = find_project_ids_updated_recently
interleave(project_ids_not_synced, project_ids_updated_recently)
resources = find_project_ids_not_synced(batch_size: db_retrieve_batch_size)
remaining_capacity = db_retrieve_batch_size - resources.size
if remaining_capacity.zero?
resources
else
resources + find_project_ids_updated_recently(batch_size: remaining_capacity)
end
end
def find_project_ids_not_synced
def find_project_ids_not_synced(batch_size:)
healthy_shards_restriction(current_node.unsynced_projects)
.reorder(last_repository_updated_at: :desc)
.limit(db_retrieve_batch_size)
.limit(batch_size)
.pluck(:id)
end
def find_project_ids_updated_recently
def find_project_ids_updated_recently(batch_size:)
current_node.project_registries
.dirty
.order(Gitlab::Database.nulls_first_order(:last_repository_synced_at, :desc))
.limit(db_retrieve_batch_size)
.limit(batch_size)
.pluck(:project_id)
end
......
---
title: 'Geo: Don''t retry repositories or files until everything has been backfilled'
merge_request: 3182
author:
type: changed
......@@ -101,10 +101,14 @@ describe Geo::RepositorySyncWorker, :postgresql do
let!(:project_list) { create_list(:project, 4, :random_last_repository_updated_at) }
before do
# Neither of these are needed for this spec
unsynced_project.destroy
project_in_synced_group.destroy
allow_any_instance_of(described_class).to receive(:db_retrieve_batch_size).and_return(2) # Must be >1 because of the Geo::BaseSchedulerWorker#interleave
secondary.update!(repos_max_capacity: 3) # Must be more than db_retrieve_batch_size
allow_any_instance_of(Project).to receive(:ensure_repository).and_raise(Gitlab::Shell::Error.new('foo'))
allow_any_instance_of(Geo::ProjectSyncWorker).to receive(:sync_wiki?).and_return(false)
allow_any_instance_of(Geo::ProjectRegistry).to receive(:wiki_sync_due?).and_return(false)
allow_any_instance_of(Geo::RepositorySyncService).to receive(:expire_repository_caches)
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment