Commit 3fbd48e1 authored by Shinya Maeda's avatar Shinya Maeda

Squashed commit of the following:

commit 10456b1e9240886432f565dd17689080bbb133b9
Merge: 312c1a9bdf8 a5f46278
Author: Shinya Maeda <shinya@gitlab.com>
Date:   Thu Nov 29 14:33:21 2018 +0900

    Merge branch 'master-ce' into add-counter-for-trace-chunks

commit 312c1a9bdf8efc45c3fed5ff50f05cc589bbb4ed
Author: Shinya Maeda <shinya@gitlab.com>
Date:   Wed Nov 28 20:06:18 2018 +0900

    Fix coding offence

commit e397cc2ccc1b2cf7f8b3558b8fa81fe2aa0ab366
Author: Shinya Maeda <shinya@gitlab.com>
Date:   Wed Nov 28 14:40:24 2018 +0900

    Fix tracking archive failure
parent a5f46278
# frozen_string_literal: true
module Ci
class ArchiveTraceService
def execute(job)
job.trace.archive!
rescue ::Gitlab::Ci::Trace::AlreadyArchivedError
# It's already archived, thus we can safely ignore this exception.
rescue => e
archive_error(e, job)
end
private
def failed_archive_counter
@failed_archive_counter ||= Gitlab::Metrics.counter(:job_trace_archive_failed_total, "Counter of failed attempts of trace archiving")
end
def archive_error(error, job)
failed_archive_counter.increment
Gitlab::Sentry.track_exception(error, issue_url: 'https://gitlab.com/gitlab-org/gitlab-ce/issues/51502', extra: { job_id: job.id })
Rails.logger.error "Failed to archive trace. id: #{job.id} message: #{error.message}"
end
end
end
...@@ -7,7 +7,7 @@ class ArchiveTraceWorker ...@@ -7,7 +7,7 @@ class ArchiveTraceWorker
# rubocop: disable CodeReuse/ActiveRecord # rubocop: disable CodeReuse/ActiveRecord
def perform(job_id) def perform(job_id)
Ci::Build.without_archived_trace.find_by(id: job_id).try do |job| Ci::Build.without_archived_trace.find_by(id: job_id).try do |job|
job.trace.archive! Ci::ArchiveTraceService.new.execute(job)
end end
end end
# rubocop: enable CodeReuse/ActiveRecord # rubocop: enable CodeReuse/ActiveRecord
......
...@@ -11,21 +11,9 @@ module Ci ...@@ -11,21 +11,9 @@ module Ci
# This could happen when ArchiveTraceWorker sidekiq jobs were lost by receiving SIGKILL # This could happen when ArchiveTraceWorker sidekiq jobs were lost by receiving SIGKILL
# More details in https://gitlab.com/gitlab-org/gitlab-ce/issues/36791 # More details in https://gitlab.com/gitlab-org/gitlab-ce/issues/36791
Ci::Build.finished.with_live_trace.find_each(batch_size: 100) do |build| Ci::Build.finished.with_live_trace.find_each(batch_size: 100) do |build|
begin Ci::ArchiveTraceService.new.execute(build)
build.trace.archive!
rescue ::Gitlab::Ci::Trace::AlreadyArchivedError
rescue => e
failed_archive_counter.increment
Rails.logger.error "Failed to archive stale live trace. id: #{build.id} message: #{e.message}"
end
end end
end end
# rubocop: enable CodeReuse/ActiveRecord # rubocop: enable CodeReuse/ActiveRecord
private
def failed_archive_counter
@failed_archive_counter ||= Gitlab::Metrics.counter(:job_trace_archive_failed_total, "Counter of failed attempts of traces archiving")
end
end end
end end
require 'spec_helper'
describe Ci::ArchiveTraceService, '#execute' do
subject { described_class.new.execute(job) }
context 'when job is finished' do
let(:job) { create(:ci_build, :success, :trace_live) }
it 'creates an archived trace' do
expect { subject }.not_to raise_error
expect(job.reload.job_artifacts_trace).to be_exist
end
end
context 'when job is running' do
let(:job) { create(:ci_build, :running, :trace_live) }
it 'increments Prometheus counter, sends crash report to Sentry and ignore an error for continuing to archive' do
expect(Gitlab::Sentry)
.to receive(:track_exception)
.with(::Gitlab::Ci::Trace::ArchiveError,
issue_url: 'https://gitlab.com/gitlab-org/gitlab-ce/issues/51502',
extra: { job_id: job.id } ).once
expect(Rails.logger)
.to receive(:error)
.with("Failed to archive trace. id: #{job.id} message: Job is not finished yet")
.and_call_original
expect(Gitlab::Metrics)
.to receive(:counter)
.with(:job_trace_archive_failed_total, "Counter of failed attempts of trace archiving")
.and_call_original
expect { subject }.not_to raise_error
end
end
end
...@@ -23,5 +23,33 @@ describe ArchiveTraceWorker do ...@@ -23,5 +23,33 @@ describe ArchiveTraceWorker do
subject subject
end end
end end
context 'when an unexpected exception happened during archiving' do
let!(:job) { create(:ci_build, :success, :trace_live) }
before do
allow_any_instance_of(Gitlab::Ci::Trace).to receive(:archive_stream!).and_raise('Unexpected error')
end
it 'increments Prometheus counter, sends crash report to Sentry and ignore an error for continuing to archive' do
expect(Gitlab::Sentry)
.to receive(:track_exception)
.with(RuntimeError,
issue_url: 'https://gitlab.com/gitlab-org/gitlab-ce/issues/51502',
extra: { job_id: job.id } ).once
expect(Rails.logger)
.to receive(:error)
.with("Failed to archive trace. id: #{job.id} message: Unexpected error")
.and_call_original
expect(Gitlab::Metrics)
.to receive(:counter)
.with(:job_trace_archive_failed_total, "Counter of failed attempts of trace archiving")
.and_call_original
expect { subject }.not_to raise_error
end
end
end end
end end
...@@ -46,13 +46,27 @@ describe Ci::ArchiveTracesCronWorker do ...@@ -46,13 +46,27 @@ describe Ci::ArchiveTracesCronWorker do
let!(:build) { create(:ci_build, :success, :trace_live) } let!(:build) { create(:ci_build, :success, :trace_live) }
before do before do
allow_any_instance_of(Gitlab::Ci::Trace).to receive(:archive!).and_raise('Unexpected error') allow_any_instance_of(Gitlab::Ci::Trace).to receive(:archive_stream!).and_raise('Unexpected error')
end end
it 'puts a log' do it 'increments Prometheus counter, sends crash report to Sentry and ignore an error for continuing to archive' do
expect(Rails.logger).to receive(:error).with("Failed to archive stale live trace. id: #{build.id} message: Unexpected error") expect(Gitlab::Sentry)
.to receive(:track_exception)
.with(RuntimeError,
issue_url: 'https://gitlab.com/gitlab-org/gitlab-ce/issues/51502',
extra: { job_id: build.id } ).once
subject expect(Rails.logger)
.to receive(:error)
.with("Failed to archive trace. id: #{build.id} message: Unexpected error")
.and_call_original
expect(Gitlab::Metrics)
.to receive(:counter)
.with(:job_trace_archive_failed_total, "Counter of failed attempts of trace archiving")
.and_call_original
expect { subject }.not_to raise_error
end end
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment