Commit 3c558b16 authored by Furkan Ayhan's avatar Furkan Ayhan Committed by Bob Van Landuyt

Allow ci minutes reset service to continue in case of failure

We achieve this via handling error on Ci::Minutes::BatchResetService

Before this change, when an error occurs, the service is failed
completely. With this, the errors is caught and logged. The service
will continue to its work.
parent 8e0fc530
......@@ -7,12 +7,21 @@ module Ci
BATCH_SIZE = 1000.freeze
def initialize
@errors = []
end
def execute!(ids_range: nil, batch_size: BATCH_SIZE)
relation = Namespace
relation = relation.id_in(ids_range) if ids_range
relation.each_batch(of: batch_size) do |namespaces|
reset_ci_minutes!(namespaces)
end
if @errors.any?
exception = BatchNotResetError.new('Some namespace shared runner minutes were not reset.')
Gitlab::ErrorTracking.track_and_raise_exception(exception, namespace_ranges: @errors)
end
end
private
......@@ -27,13 +36,7 @@ module Ci
reset_ci_minutes_notifications!(namespaces)
end
rescue ActiveRecord::ActiveRecordError
# We don't need to print a thousand of namespace_ids
# in the message if all batches failed.
# A small batch would be sufficient for investigation.
failed_namespace_ids = namespaces.limit(10).ids # rubocop: disable CodeReuse/ActiveRecord
raise BatchNotResetError.new(
"#{namespaces.size} namespace shared runner minutes were not reset and the transaction was rolled back. Namespace Ids: #{failed_namespace_ids}")
@errors << { count: namespaces.size, first_id: namespaces.first.id, last_id: namespaces.last.id }
end
def recalculate_extra_shared_runners_minutes_limits!(namespaces)
......
---
title: Allow ci minutes reset service to continue in case of failure
merge_request: 32867
author:
type: other
......@@ -95,14 +95,29 @@ RSpec.describe Ci::Minutes::BatchResetService do
let(:ids_range) { nil }
before do
allow(Namespace).to receive(:transaction).and_raise(ActiveRecord::ActiveRecordError)
expect(Namespace).to receive(:transaction).once.ordered.and_raise(ActiveRecord::ActiveRecordError)
expect(Namespace).to receive(:transaction).once.ordered.and_call_original
end
it 'decorates the error with more information' do
expect { subject }
.to raise_error(
Ci::Minutes::BatchResetService::BatchNotResetError,
'3 namespace shared runner minutes were not reset and the transaction was rolled back. Namespace Ids: [1, 2, 3]')
it 'continues its progress' do
expect(service).to receive(:reset_ci_minutes!).with([namespace_1, namespace_2, namespace_3]).and_call_original
expect(service).to receive(:reset_ci_minutes!).with([namespace_4, namespace_5, namespace_6]).and_call_original
expect(Gitlab::ErrorTracking).to receive(:track_and_raise_exception)
subject
end
it 'raises exception with namespace details' do
expect(Gitlab::ErrorTracking).to receive(
:track_and_raise_exception
).with(
Ci::Minutes::BatchResetService::BatchNotResetError.new(
'Some namespace shared runner minutes were not reset.'
),
{ namespace_ranges: [{ count: 3, first_id: 1, last_id: 3 }] }
).once.and_call_original
expect { subject }.to raise_error(Ci::Minutes::BatchResetService::BatchNotResetError)
end
end
end
......
......@@ -50,7 +50,8 @@ RSpec.describe ClearSharedRunnersMinutesWorker do
it 'raises an exception' do
expect { worker.perform }.to raise_error(
Ci::Minutes::BatchResetService::BatchNotResetError,
"#{namespace_ids.count} namespace shared runner minutes were not reset and the transaction was rolled back. Namespace Ids: #{namespace_ids}")
'Some namespace shared runner minutes were not reset.'
)
end
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment