Commit 3c558b16 authored by Furkan Ayhan's avatar Furkan Ayhan Committed by Bob Van Landuyt

Allow ci minutes reset service to continue in case of failure

We achieve this via handling error on Ci::Minutes::BatchResetService

Before this change, when an error occurs, the service is failed
completely. With this, the errors is caught and logged. The service
will continue to its work.
parent 8e0fc530
...@@ -7,12 +7,21 @@ module Ci ...@@ -7,12 +7,21 @@ module Ci
BATCH_SIZE = 1000.freeze BATCH_SIZE = 1000.freeze
def initialize
@errors = []
end
def execute!(ids_range: nil, batch_size: BATCH_SIZE) def execute!(ids_range: nil, batch_size: BATCH_SIZE)
relation = Namespace relation = Namespace
relation = relation.id_in(ids_range) if ids_range relation = relation.id_in(ids_range) if ids_range
relation.each_batch(of: batch_size) do |namespaces| relation.each_batch(of: batch_size) do |namespaces|
reset_ci_minutes!(namespaces) reset_ci_minutes!(namespaces)
end end
if @errors.any?
exception = BatchNotResetError.new('Some namespace shared runner minutes were not reset.')
Gitlab::ErrorTracking.track_and_raise_exception(exception, namespace_ranges: @errors)
end
end end
private private
...@@ -27,13 +36,7 @@ module Ci ...@@ -27,13 +36,7 @@ module Ci
reset_ci_minutes_notifications!(namespaces) reset_ci_minutes_notifications!(namespaces)
end end
rescue ActiveRecord::ActiveRecordError rescue ActiveRecord::ActiveRecordError
# We don't need to print a thousand of namespace_ids @errors << { count: namespaces.size, first_id: namespaces.first.id, last_id: namespaces.last.id }
# in the message if all batches failed.
# A small batch would be sufficient for investigation.
failed_namespace_ids = namespaces.limit(10).ids # rubocop: disable CodeReuse/ActiveRecord
raise BatchNotResetError.new(
"#{namespaces.size} namespace shared runner minutes were not reset and the transaction was rolled back. Namespace Ids: #{failed_namespace_ids}")
end end
def recalculate_extra_shared_runners_minutes_limits!(namespaces) def recalculate_extra_shared_runners_minutes_limits!(namespaces)
......
---
title: Allow ci minutes reset service to continue in case of failure
merge_request: 32867
author:
type: other
...@@ -95,14 +95,29 @@ RSpec.describe Ci::Minutes::BatchResetService do ...@@ -95,14 +95,29 @@ RSpec.describe Ci::Minutes::BatchResetService do
let(:ids_range) { nil } let(:ids_range) { nil }
before do before do
allow(Namespace).to receive(:transaction).and_raise(ActiveRecord::ActiveRecordError) expect(Namespace).to receive(:transaction).once.ordered.and_raise(ActiveRecord::ActiveRecordError)
expect(Namespace).to receive(:transaction).once.ordered.and_call_original
end end
it 'decorates the error with more information' do it 'continues its progress' do
expect { subject } expect(service).to receive(:reset_ci_minutes!).with([namespace_1, namespace_2, namespace_3]).and_call_original
.to raise_error( expect(service).to receive(:reset_ci_minutes!).with([namespace_4, namespace_5, namespace_6]).and_call_original
Ci::Minutes::BatchResetService::BatchNotResetError,
'3 namespace shared runner minutes were not reset and the transaction was rolled back. Namespace Ids: [1, 2, 3]') expect(Gitlab::ErrorTracking).to receive(:track_and_raise_exception)
subject
end
it 'raises exception with namespace details' do
expect(Gitlab::ErrorTracking).to receive(
:track_and_raise_exception
).with(
Ci::Minutes::BatchResetService::BatchNotResetError.new(
'Some namespace shared runner minutes were not reset.'
),
{ namespace_ranges: [{ count: 3, first_id: 1, last_id: 3 }] }
).once.and_call_original
expect { subject }.to raise_error(Ci::Minutes::BatchResetService::BatchNotResetError)
end end
end end
end end
......
...@@ -50,7 +50,8 @@ RSpec.describe ClearSharedRunnersMinutesWorker do ...@@ -50,7 +50,8 @@ RSpec.describe ClearSharedRunnersMinutesWorker do
it 'raises an exception' do it 'raises an exception' do
expect { worker.perform }.to raise_error( expect { worker.perform }.to raise_error(
Ci::Minutes::BatchResetService::BatchNotResetError, Ci::Minutes::BatchResetService::BatchNotResetError,
"#{namespace_ids.count} namespace shared runner minutes were not reset and the transaction was rolled back. Namespace Ids: #{namespace_ids}") 'Some namespace shared runner minutes were not reset.'
)
end end
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment