Commit d12004f1 authored by Michael Kozono's avatar Michael Kozono Committed by Andy Soiron

Geo: Forward pulls when repo is out of date

A secondary site will now forward project repo pulls to the primary site
if the latest repo change is not yet replicated.

Current limitations:

We assume last_repository_updated_at is a timestamp of the latest
change. So we compare the timestamp of the latest sync to
last_repository_updated_at.

- Unfortunately, project wiki updates also touch
  last_repository_updated_at. So wiki updates will also bump the repo to
  out-of-date.
- Unfortunately, successive last_repository_updated_at touches are
  throttled within Event::REPOSITORY_UPDATED_AT_INTERVAL minutes. This
  is currently set to 5 minutes. So for example, secondary sites will
  think highly active repos are up-to-date after the first sync of each
  5 minute interval.

This is a rough improvement over the current code.

Next, we can iteratively improve the accuracy of
`.repository_out_of_date?` to fully address the underlying issue. It may
be rather involved, so this MR helps by reducing the scope of that
future change.
parent f1ec7d36
......@@ -70,13 +70,13 @@ module EE
!!CONTROLLER_AND_ACTIONS_TO_REDIRECT[controller_name]&.include?(action_name) ||
git_receive_pack_request? ||
redirect_to_avoid_enumeration? ||
not_yet_replicated_redirect?
out_of_date_redirect?
end
def not_yet_replicated_redirect?
def out_of_date_redirect?
return false unless project
git_upload_pack_request? && !::Geo::ProjectRegistry.repository_replicated_for?(project.id)
git_upload_pack_request? && ::Geo::ProjectRegistry.repository_out_of_date?(project.id)
end
private
......@@ -150,7 +150,7 @@ module EE
def redirect?
return true if batch_upload?
return true if not_yet_replicated_redirect?
return true if out_of_date_redirect?
false
end
......@@ -186,10 +186,10 @@ module EE
geo_route_helper.match?('lfs_storage', 'download')
end
def not_yet_replicated_redirect?
def out_of_date_redirect?
return false unless project
(batch_download? || transfer_download?) && !::Geo::ProjectRegistry.repository_replicated_for?(project.id)
(batch_download? || transfer_download?) && ::Geo::ProjectRegistry.repository_out_of_date?(project.id)
end
def wanted_version
......
......@@ -228,10 +228,29 @@ class Geo::ProjectRegistry < Geo::BaseRegistry
where(id: start..finish)
end
def self.repository_replicated_for?(project_id)
return true unless ::Gitlab::Geo.secondary_with_primary?
# @return [Boolean] whether the project repository is out-of-date on this site
def self.repository_out_of_date?(project_id)
return false unless ::Gitlab::Geo.secondary_with_primary?
where(project_id: project_id).where.not(last_repository_successful_sync_at: nil).exists?
registry = find_by(project_id: project_id)
# Out-of-date if registry or project don't exist
return true if registry.nil? || registry.project.nil?
# Up-to-date if there is no timestamp for the latest change to the repo
return false unless registry.project.last_repository_updated_at
# Out-of-date if the repo has never been synced
return true unless registry.last_repository_successful_sync_at
# Return whether the latest change is replicated
#
# Current limitations:
#
# - We assume last_repository_updated_at is a timestamp of the latest change
# - last_repository_updated_at is also touched when a project wiki is updated
# - last_repository_updated_at touches are throttled within Event::REPOSITORY_UPDATED_AT_INTERVAL minutes
registry.last_repository_successful_sync_at <= registry.project.last_repository_updated_at
end
# Must be run before fetching the repository to avoid a race condition
......
......@@ -28,13 +28,13 @@ module EE
return unless ::Gitlab::Database.read_only?
return unless ::Gitlab::Geo.secondary_with_primary?
receive_pack? || upload_pack_and_not_replicated?
receive_pack? || upload_pack_and_out_of_date?
end
def upload_pack_and_not_replicated?
def upload_pack_and_out_of_date?
return false unless project
upload_pack? && !::Geo::ProjectRegistry.repository_replicated_for?(project.id)
upload_pack? && ::Geo::ProjectRegistry.repository_out_of_date?(project.id)
end
def messages
......
......@@ -370,12 +370,12 @@ RSpec.describe Geo::ProjectRegistry, :geo do
end
end
describe '.repository_replicated_for?' do
describe '.repository_out_of_date?' do
let_it_be(:project) { create(:project) }
context 'for a non-Geo setup' do
it 'returns true' do
expect(described_class.repository_replicated_for?(project.id)).to be_truthy
it 'returns false' do
expect(described_class.repository_out_of_date?(project.id)).to be_falsey
end
end
......@@ -387,8 +387,8 @@ RSpec.describe Geo::ProjectRegistry, :geo do
context 'for a Geo Primary' do
let(:current_node) { create(:geo_node, :primary) }
it 'returns true' do
expect(described_class.repository_replicated_for?(project.id)).to be_truthy
it 'returns false' do
expect(described_class.repository_out_of_date?(project.id)).to be_falsey
end
end
......@@ -396,8 +396,8 @@ RSpec.describe Geo::ProjectRegistry, :geo do
let(:current_node) { create(:geo_node) }
context 'where Primary node is not configured' do
it 'returns true' do
expect(described_class.repository_replicated_for?(project.id)).to be_truthy
it 'returns false' do
expect(described_class.repository_out_of_date?(project.id)).to be_falsey
end
end
......@@ -407,29 +407,46 @@ RSpec.describe Geo::ProjectRegistry, :geo do
end
context 'where project_registry entry does not exist' do
it 'returns false' do
project_without_registry = create(:project)
expect(described_class.repository_replicated_for?(project_without_registry.id)).to be_falsey
it 'returns true' do
expect(described_class.repository_out_of_date?(project.id)).to be_truthy
end
end
context 'where project_registry entry does exist' do
context 'where last_repository_successful_sync_at is not set' do
context 'where last_repository_updated_at is not set' do
it 'returns false' do
project_with_failed_registry = create(:project)
create(:geo_project_registry, :repository_sync_failed, project: project_with_failed_registry)
registry = create(:geo_project_registry, :synced)
registry.project.update!(last_repository_updated_at: nil)
expect(described_class.repository_replicated_for?(project_with_failed_registry.id)).to be_falsey
expect(described_class.repository_out_of_date?(registry.project_id)).to be_falsey
end
end
context 'where last_repository_successful_sync_at is set' do
it 'returns true' do
project_with_synced_registry = create(:project)
create(:geo_project_registry, :synced, project: project_with_synced_registry)
context 'where last_repository_updated_at is set' do
context 'where last_repository_successful_sync_at is not set' do
it 'returns true' do
registry = create(:geo_project_registry, :repository_sync_failed)
expect(described_class.repository_out_of_date?(registry.project_id)).to be_truthy
end
end
context 'where last_repository_successful_sync_at is set' do
context 'where the latest repo change has probably not been synced yet' do
it 'returns true' do
create(:geo_project_registry, :synced, project: project, last_repository_successful_sync_at: project.last_repository_updated_at - 1.minute)
expect(described_class.repository_out_of_date?(project.id)).to be_truthy
end
end
context 'where the latest repo change has probably been synced' do
it 'returns false' do
create(:geo_project_registry, :synced, project: project, last_repository_successful_sync_at: project.last_repository_updated_at + 1.minute)
expect(described_class.repository_replicated_for?(project_with_synced_registry.id)).to be_truthy
expect(described_class.repository_out_of_date?(project.id)).to be_falsey
end
end
end
end
end
......
......@@ -11,6 +11,7 @@ RSpec.describe "Git HTTP requests (Geo)", :geo do
let_it_be(:project_with_repo) { create(:project, :repository, :private) }
let_it_be(:project_no_repo) { create(:project, :private) }
let_it_be(:registry_with_repo) { create(:geo_project_registry, :synced, project: project_with_repo, last_repository_successful_sync_at: project_with_repo.last_repository_updated_at + 10.seconds) }
let_it_be(:primary_url) { 'http://primary.example.com' }
let_it_be(:primary_internal_url) { 'http://primary-internal.example.com' }
......@@ -31,8 +32,6 @@ RSpec.describe "Git HTTP requests (Geo)", :geo do
before do
project_with_repo.add_maintainer(user)
project_with_repo.add_guest(user_without_push_access)
create(:geo_project_registry, :synced, project: project_with_repo)
project_no_repo.add_maintainer(user)
project_no_repo.add_guest(user_without_push_access)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment