Commit 3b8d943b authored by Vasilii Iakliushin's avatar Vasilii Iakliushin

Disable mirroring for hard failures

Contributes to https://gitlab.com/gitlab-org/gitlab/-/issues/281835

**Problem**

Pull mirroring jobs retry to update mirrors even for hard failures
until they reach a retry limit. When a big external git hosting has a
persistent problem, then requests generated by update mirror jobs can
violate the acceptable error ratio and trigger an incident.

**Solution**

Identify unrecoverable failures and set a hard failure state for update
mirror jobs. Trigger an email to the customer notifying them about the
problem.
parent f43ee18b
...@@ -8,6 +8,7 @@ module EE ...@@ -8,6 +8,7 @@ module EE
prepended do prepended do
BACKOFF_PERIOD = 24.seconds BACKOFF_PERIOD = 24.seconds
JITTER = 6.seconds JITTER = 6.seconds
SSL_CERTIFICATE_PROBLEM = /SSL certificate problem/.freeze
delegate :mirror?, :mirror_with_content?, :archived, :pending_delete, to: :project delegate :mirror?, :mirror_with_content?, :archived, :pending_delete, to: :project
...@@ -42,8 +43,13 @@ module EE ...@@ -42,8 +43,13 @@ module EE
before_transition started: :failed do |state, _| before_transition started: :failed do |state, _|
if state.mirror? if state.mirror?
state.last_update_at = Time.current state.last_update_at = Time.current
state.increment_retry_count
state.set_next_execution_timestamp if state.unrecoverable_failure?
state.set_max_retry_count
else
state.increment_retry_count
state.set_next_execution_timestamp
end
end end
end end
...@@ -136,6 +142,20 @@ module EE ...@@ -136,6 +142,20 @@ module EE
self.retry_count += 1 self.retry_count += 1
end end
def set_max_retry_count
self.retry_count = ::Gitlab::Mirror::MAX_RETRY + 1
end
def unrecoverable_failure?
last_update_failed? && unrecoverable_error_message?
end
def unrecoverable_error_message?
return false if last_error.blank?
last_error.match?(SSL_CERTIFICATE_PROBLEM)
end
# We schedule the next sync time based on the duration of the # We schedule the next sync time based on the duration of the
# last mirroring period and add it a fixed backoff period with a random jitter # last mirroring period and add it a fixed backoff period with a random jitter
def set_next_execution_timestamp def set_next_execution_timestamp
......
---
title: Disable repository mirroring for hard failures
merge_request: 54964
author:
type: added
...@@ -191,6 +191,26 @@ RSpec.describe ProjectImportState, type: :model do ...@@ -191,6 +191,26 @@ RSpec.describe ProjectImportState, type: :model do
end end
end end
describe 'mirror has an unrecoverable failure' do
let(:import_state) { create(:import_state, :mirror, :started, last_error: 'SSL certificate problem: certificate has expired') }
it 'sends a notification' do
expect_any_instance_of(EE::NotificationService).to receive(:mirror_was_hard_failed).with(import_state.project)
import_state.fail_op
end
it 'marks import state as hard_failed' do
import_state.fail_op
expect(import_state.hard_failed?).to be_truthy
end
it 'does not set next execution timestamp' do
expect { import_state.fail_op }.not_to change { import_state.next_execution_timestamp }
end
end
describe '#mirror_waiting_duration' do describe '#mirror_waiting_duration' do
it 'returns nil if not mirror' do it 'returns nil if not mirror' do
import_state = create(:import_state, :scheduled) import_state = create(:import_state, :scheduled)
...@@ -569,4 +589,39 @@ RSpec.describe ProjectImportState, type: :model do ...@@ -569,4 +589,39 @@ RSpec.describe ProjectImportState, type: :model do
expect { import_state.increment_retry_count }.to change { import_state.retry_count }.from(0).to(1) expect { import_state.increment_retry_count }.to change { import_state.retry_count }.from(0).to(1)
end end
end end
describe '#set_max_retry_count' do
let(:import_state) { create(:import_state, :mirror, :failed) }
it 'sets retry_count to max' do
expect { import_state.set_max_retry_count }.to change { import_state.retry_count }.from(0).to(Gitlab::Mirror::MAX_RETRY + 1)
end
end
describe '#unrecoverable_failure?' do
subject { import_state.unrecoverable_failure? }
let(:import_state) { create(:import_state, :mirror, :failed, last_error: last_error) }
let(:last_error) { 'fetch remote: "fatal: unable to access \'https://expired_cert.host\': SSL certificate problem: certificate has expired\n": exit status 128' }
it { is_expected.to be_truthy }
context 'when error is recoverable' do
let(:last_error) { 'fetch remote: "fatal: unable to access \'host\': Failed to connect to host port 80: Connection timed out\n": exit status 128' }
it { is_expected.to be_falsey }
end
context 'when error is missing' do
let(:last_error) { nil }
it { is_expected.to be_falsey }
end
context 'when import_state is not failed' do
let(:import_state) { create(:import_state, :mirror, :finished, last_error: last_error) }
it { is_expected.to be_falsey }
end
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment