Commit a479fd9d authored by Valery Sizov's avatar Valery Sizov

Merge branch '6508-geo-project-sync-failures-usually-double-increment-_retry_count' into 'master'

Geo: Project sync failures usually double-increment "*_retry_count"

Closes #6508

See merge request gitlab-org/gitlab-ee!11381
parents 61c2f575 574bb252
......@@ -216,12 +216,9 @@ class Geo::ProjectRegistry < Geo::BaseRegistry
def start_sync!(type)
ensure_valid_type!(type)
new_count = retry_count(type) + 1
update!(
"last_#{type}_synced_at" => Time.now,
"#{type}_retry_count" => new_count,
"#{type}_retry_at" => next_retry_time(new_count))
"#{type}_retry_count" => retry_count(type))
end
# Is called when synchronization finishes without any issue
......@@ -257,9 +254,12 @@ class Geo::ProjectRegistry < Geo::BaseRegistry
def fail_sync!(type, message, error, attrs = {})
ensure_valid_type!(type)
new_retry_count = retry_count(type) + 1
attrs["resync_#{type}"] = true
attrs["last_#{type}_sync_failure"] = "#{message}: #{error.message}"
attrs["#{type}_retry_count"] = retry_count(type) + 1
attrs["#{type}_retry_count"] = new_retry_count
attrs["#{type}_retry_at"] = next_retry_time(new_retry_count)
update!(attrs)
end
......@@ -463,7 +463,7 @@ class Geo::ProjectRegistry < Geo::BaseRegistry
# @param [String] type must be one of the values in TYPES
# @see REGISTRY_TYPES
def retry_count(type)
public_send("#{type}_retry_count") || -1 # rubocop:disable GitlabSecurity/PublicSend
public_send("#{type}_retry_count") || 0 # rubocop:disable GitlabSecurity/PublicSend
end
# Mark repository as synced using atomic conditions
......
......@@ -48,10 +48,6 @@ module Geo
log_info("Trying to fetch #{type}")
clean_up_temporary_repository
log_info("Marking #{type} sync as started")
registry.start_sync!(type)
if redownload?
redownload_repository
@new_repository = true
......@@ -161,7 +157,12 @@ module Geo
)
end
def fail_registry!(message, error, attrs = {})
def start_registry_sync!
log_info("Marking #{type} sync as started")
registry.start_sync!(type)
end
def fail_registry_sync!(message, error, attrs = {})
log_error(message, error)
registry.fail_sync!(type, message, error, attrs)
......
......@@ -7,6 +7,7 @@ module Geo
private
def sync_repository
start_registry_sync!
fetch_repository
update_root_ref
mark_sync_as_successful
......@@ -15,17 +16,17 @@ module Geo
if e.message.include? Gitlab::GitAccess::ERROR_MESSAGES[:no_repo]
if repository_presumably_exists_on_primary?
log_info('Repository is not found, but it seems to exist on the primary')
fail_registry!('Repository is not found', e)
fail_registry_sync!('Repository is not found', e)
else
log_info('Repository is not found, marking it as successfully synced')
mark_sync_as_successful(missing_on_primary: true)
end
else
fail_registry!('Error syncing repository', e)
fail_registry_sync!('Error syncing repository', e)
end
rescue Gitlab::Git::Repository::NoRepository => e
log_info('Setting force_to_redownload flag')
fail_registry!('Invalid repository', e, force_to_redownload_repository: true)
fail_registry_sync!('Invalid repository', e, force_to_redownload_repository: true)
log_info('Expiring caches')
project.repository.after_create
......
......@@ -7,8 +7,8 @@ module Geo
private
def sync_repository
start_registry_sync!
fetch_repository
mark_sync_as_successful
rescue Gitlab::Shell::Error, Gitlab::Git::BaseError, ProjectWiki::CouldNotCreateWikiError => e
# In some cases repository does not exist, the only way to know about this is to parse the error text.
......@@ -16,17 +16,17 @@ module Geo
if e.message.include? Gitlab::GitAccess::ERROR_MESSAGES[:no_repo]
if repository_presumably_exists_on_primary?
log_info('Wiki is not found, but it seems to exist on the primary')
fail_registry!('Wiki is not found', e)
fail_registry_sync!('Wiki is not found', e)
else
log_info('Wiki is not found, marking it as successfully synced')
mark_sync_as_successful(missing_on_primary: true)
end
else
fail_registry!('Error syncing wiki repository', e)
fail_registry_sync!('Error syncing wiki repository', e)
end
rescue Gitlab::Git::Repository::NoRepository => e
log_info('Setting force_to_redownload flag')
fail_registry!('Invalid wiki', e, force_to_redownload_wiki: true)
fail_registry_sync!('Invalid wiki', e, force_to_redownload_wiki: true)
ensure
expire_repository_caches
end
......
---
title: 'Geo: Fix: Project sync failures usually double-increment *_retry_count'
merge_request: 11381
author:
type: fixed
......@@ -334,61 +334,12 @@ describe Geo::ProjectRegistry do
expect(subject.last_repository_synced_at).to be_like_time(Time.now)
end
it 'ensures repository_retry_at is capped at one hour' do
subject.update(repository_retry_count: 31)
subject.start_sync!(type)
expect(subject).to have_attributes(
repository_retry_at: be_within(100.seconds).of(1.hour.from_now),
repository_retry_count: 32
)
end
shared_examples_for 'sets repository_retry_at to a future time' do
it 'sets repository_retry_at to a future time' do
subject.start_sync!(type)
expect(subject.repository_retry_at > Time.now).to be(true)
end
end
context 'when repository_retry_count is nil' do
it 'sets repository_retry_count to 0' do
expect do
subject.start_sync!(type)
end.to change { subject.repository_retry_count }.from(nil).to(0)
end
it_behaves_like 'sets repository_retry_at to a future time'
end
context 'when repository_retry_count is 0' do
before do
subject.update!(repository_retry_count: 0)
end
it 'increments repository_retry_count' do
expect do
subject.start_sync!(type)
end.to change { subject.repository_retry_count }.by(1)
end
it_behaves_like 'sets repository_retry_at to a future time'
end
context 'when repository_retry_count is 1' do
before do
subject.update!(repository_retry_count: 1)
end
it 'increments repository_retry_count' do
expect do
subject.start_sync!(type)
end.to change { subject.repository_retry_count }.by(1)
end
it_behaves_like 'sets repository_retry_at to a future time'
end
end
......@@ -401,61 +352,12 @@ describe Geo::ProjectRegistry do
expect(subject.last_wiki_synced_at).to be_like_time(Time.now)
end
it 'ensures wiki_retry_at is capped at one hour' do
subject.update(wiki_retry_count: 31)
subject.start_sync!(type)
expect(subject).to have_attributes(
wiki_retry_at: be_within(100.seconds).of(1.hour.from_now),
wiki_retry_count: 32
)
end
shared_examples_for 'sets wiki_retry_at to a future time' do
it 'sets wiki_retry_at to a future time' do
subject.start_sync!(type)
expect(subject.wiki_retry_at > Time.now).to be(true)
end
end
context 'when wiki_retry_count is nil' do
it 'sets wiki_retry_count to 0' do
expect do
subject.start_sync!(type)
end.to change { subject.wiki_retry_count }.from(nil).to(0)
end
it_behaves_like 'sets wiki_retry_at to a future time'
end
context 'when wiki_retry_count is 0' do
before do
subject.update!(wiki_retry_count: 0)
end
it 'increments wiki_retry_count' do
expect do
subject.start_sync!(type)
end.to change { subject.wiki_retry_count }.by(1)
end
it_behaves_like 'sets wiki_retry_at to a future time'
end
context 'when wiki_retry_count is 1' do
before do
subject.update!(wiki_retry_count: 1)
end
it 'increments wiki_retry_count' do
expect do
subject.start_sync!(type)
end.to change { subject.wiki_retry_count }.by(1)
end
it_behaves_like 'sets wiki_retry_at to a future time'
end
end
end
......@@ -664,6 +566,25 @@ describe Geo::ProjectRegistry do
last_repository_sync_failure: 'foo')
end
it 'sets repository_retry_at to a future time' do
subject.update(repository_retry_count: 0)
subject.fail_sync!(type, message, error)
expect(subject.repository_retry_at > Time.now).to be(true)
end
it 'ensures repository_retry_at is capped at one hour' do
subject.update(repository_retry_count: 31)
subject.fail_sync!(type, message, error)
expect(subject).to have_attributes(
repository_retry_at: be_within(100.seconds).of(1.hour.from_now),
repository_retry_count: 32
)
end
it 'sets resync_repository to true' do
subject.fail_sync!(type, message, error)
......@@ -693,6 +614,30 @@ describe Geo::ProjectRegistry do
expect(subject.reload.force_to_redownload_repository).to be true
end
context 'when repository_retry_count is 0' do
before do
subject.update!(repository_retry_count: 0)
end
it 'increments repository_retry_count' do
expect do
subject.fail_sync!(type, message, error)
end.to change { subject.repository_retry_count }.by(1)
end
end
context 'when repository_retry_count is 1' do
before do
subject.update!(repository_retry_count: 1)
end
it 'increments repository_retry_count' do
expect do
subject.fail_sync!(type, message, error)
end.to change { subject.repository_retry_count }.by(1)
end
end
end
context 'for a wiki' do
......@@ -706,6 +651,25 @@ describe Geo::ProjectRegistry do
last_wiki_sync_failure: 'foo')
end
it 'sets wiki_retry_at to a future time' do
subject.update(wiki_retry_count: 0)
subject.fail_sync!(type, message, error)
expect(subject.wiki_retry_at > Time.now).to be(true)
end
it 'ensures wiki_retry_at is capped at one hour' do
subject.update(wiki_retry_count: 31)
subject.fail_sync!(type, message, error)
expect(subject).to have_attributes(
wiki_retry_at: be_within(100.seconds).of(1.hour.from_now),
wiki_retry_count: 32
)
end
it 'sets resync_wiki to true' do
subject.fail_sync!(type, message, error)
......@@ -735,6 +699,30 @@ describe Geo::ProjectRegistry do
expect(subject.reload.force_to_redownload_wiki).to be true
end
context 'when wiki_retry_count is 0' do
before do
subject.update!(wiki_retry_count: 0)
end
it 'increments wiki_retry_count' do
expect do
subject.fail_sync!(type, message, error)
end.to change { subject.wiki_retry_count }.by(1)
end
end
context 'when wiki_retry_count is 1' do
before do
subject.update!(wiki_retry_count: 1)
end
it 'increments wiki_retry_count' do
expect do
subject.fail_sync!(type, message, error)
end.to change { subject.wiki_retry_count }.by(1)
end
end
end
end
......
......@@ -424,7 +424,7 @@ describe Geo::RepositorySyncService do
)
expect(project.repository).to receive(:expire_exists_cache).twice.and_call_original
expect(subject).not_to receive(:fail_registry!)
expect(subject).not_to receive(:fail_registry_sync!)
subject.execute
end
......
......@@ -230,7 +230,7 @@ RSpec.describe Geo::WikiSyncService do
)
expect(project.wiki.repository).to receive(:expire_exists_cache).twice.and_call_original
expect(subject).not_to receive(:fail_registry!)
expect(subject).not_to receive(:fail_registry_sync!)
subject.execute
end
......
......@@ -49,6 +49,15 @@ shared_examples 'cleans temporary repositories' do
end
shared_examples 'geo base sync fetch' do
describe '#sync_repository' do
it 'tells registry that sync will start now' do
registry = subject.send(:registry)
expect(registry).to receive(:start_sync!)
subject.send(:sync_repository)
end
end
describe '#fetch_repository' do
let(:fetch_repository) { subject.send(:fetch_repository) }
......@@ -62,13 +71,6 @@ shared_examples 'geo base sync fetch' do
fetch_repository
end
it 'tells registry that sync will start now' do
registry = subject.send(:registry)
expect(registry).to receive(:start_sync!)
fetch_repository
end
it 'fetches repository from geo node' do
is_expected.to receive(:fetch_geo_mirror).with(subject.send(:repository))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment