Commit 197b1c3e authored by Fabio Pitino's avatar Fabio Pitino

Merge branch '259619-add-metric-for-tracking-invalid-traces' into 'master'

Add metrics to track invalid job traces

See merge request gitlab-org/gitlab!72001
parents dee08e12 3bda0e58
......@@ -73,9 +73,11 @@ module Ci
::Gitlab::Ci::Trace::Checksum.new(build).then do |checksum|
unless checksum.valid?
metrics.increment_trace_operation(operation: :invalid)
metrics.increment_error_counter(type: :chunks_invalid_checksum)
if checksum.corrupted?
metrics.increment_trace_operation(operation: :corrupted)
metrics.increment_error_counter(type: :chunks_invalid_size)
end
next unless log_invalid_chunks?
......
......@@ -140,6 +140,7 @@ The following metrics are available:
| `gitlab_snowplow_events_total` | Counter | 14.1 | Total number of GitLab Snowplow product intelligence events emitted | |
| `gitlab_snowplow_failed_events_total` | Counter | 14.1 | Total number of GitLab Snowplow product intelligence events emission failures | |
| `gitlab_snowplow_successful_events_total` | Counter | 14.1 | Total number of GitLab Snowplow product intelligence events emission successes | |
| `gitlab_ci_build_trace_errors_total` | Counter | 14.4 | Total amount of different error types on a build trace | `type` |
## Metrics controlled by a feature flag
......
......@@ -21,6 +21,12 @@ module Gitlab
:corrupted # malformed trace found after comparing CRC32 and size
].freeze
TRACE_ERROR_TYPES = [
:chunks_invalid_size, # used to be :corrupted
:chunks_invalid_checksum, # used to be :invalid
:archive_invalid_checksum # malformed trace found into object store after comparing MD5
].freeze
def increment_trace_operation(operation: :unknown)
unless OPERATIONS.include?(operation)
raise ArgumentError, "unknown trace operation: #{operation}"
......@@ -33,6 +39,14 @@ module Gitlab
self.class.trace_bytes.increment({}, size.to_i)
end
def increment_error_counter(type: :unknown)
unless TRACE_ERROR_TYPES.include?(type)
raise ArgumentError, "unknown error type: #{type}"
end
self.class.trace_errors_counter.increment(type: type)
end
def observe_migration_duration(seconds)
self.class.finalize_histogram.observe({}, seconds.to_f)
end
......@@ -65,6 +79,15 @@ module Gitlab
::Gitlab::Metrics.histogram(name, comment, labels, buckets)
end
end
def self.trace_errors_counter
strong_memoize(:trace_errors_counter) do
name = :gitlab_ci_build_trace_errors_total
comment = 'Total amount of different error types on a build trace'
Gitlab::Metrics.counter(name, comment)
end
end
end
end
end
......
......@@ -15,4 +15,27 @@ RSpec.describe Gitlab::Ci::Trace::Metrics, :prometheus do
end
end
end
describe '#increment_error_counter' do
context 'when the operation type is known' do
it 'increments the counter' do
subject.increment_error_counter(type: :chunks_invalid_size)
subject.increment_error_counter(type: :chunks_invalid_checksum)
subject.increment_error_counter(type: :archive_invalid_checksum)
expect(described_class.trace_errors_counter.get(type: :chunks_invalid_size)).to eq 1
expect(described_class.trace_errors_counter.get(type: :chunks_invalid_checksum)).to eq 1
expect(described_class.trace_errors_counter.get(type: :archive_invalid_checksum)).to eq 1
expect(described_class.trace_errors_counter.values.count).to eq 3
end
end
context 'when the operation type is known' do
it 'raises an exception' do
expect { subject.increment_error_counter(type: :invalid_type) }
.to raise_error(ArgumentError)
end
end
end
end
......@@ -112,6 +112,14 @@ RSpec.describe Ci::UpdateBuildStateService do
.not_to have_received(:increment_trace_operation)
.with(operation: :invalid)
end
it 'does not increment chunks_invalid_checksum trace metric' do
execute_with_stubbed_metrics!
expect(metrics)
.not_to have_received(:increment_error_counter)
.with(type: :chunks_invalid_checksum)
end
end
context 'when build trace has been migrated' do
......@@ -174,6 +182,14 @@ RSpec.describe Ci::UpdateBuildStateService do
.to have_received(:increment_trace_operation)
.with(operation: :invalid)
end
it 'increments chunks_invalid_checksum trace metric' do
execute_with_stubbed_metrics!
expect(metrics)
.to have_received(:increment_error_counter)
.with(type: :chunks_invalid_checksum)
end
end
context 'when trace checksum is valid' do
......@@ -191,6 +207,14 @@ RSpec.describe Ci::UpdateBuildStateService do
expect(metrics)
.not_to have_received(:increment_trace_operation)
.with(operation: :corrupted)
expect(metrics)
.not_to have_received(:increment_error_counter)
.with(type: :chunks_invalid_checksum)
expect(metrics)
.not_to have_received(:increment_error_counter)
.with(type: :chunks_invalid_size)
end
context 'when using deprecated parameters' do
......@@ -208,6 +232,14 @@ RSpec.describe Ci::UpdateBuildStateService do
expect(metrics)
.not_to have_received(:increment_trace_operation)
.with(operation: :corrupted)
expect(metrics)
.not_to have_received(:increment_error_counter)
.with(type: :chunks_invalid_checksum)
expect(metrics)
.not_to have_received(:increment_error_counter)
.with(type: :chunks_invalid_size)
end
end
end
......@@ -227,6 +259,14 @@ RSpec.describe Ci::UpdateBuildStateService do
expect(metrics)
.to have_received(:increment_trace_operation)
.with(operation: :corrupted)
expect(metrics)
.to have_received(:increment_error_counter)
.with(type: :chunks_invalid_checksum)
expect(metrics)
.to have_received(:increment_error_counter)
.with(type: :chunks_invalid_size)
end
end
......@@ -242,9 +282,17 @@ RSpec.describe Ci::UpdateBuildStateService do
.to have_received(:increment_trace_operation)
.with(operation: :invalid)
expect(metrics)
.to have_received(:increment_error_counter)
.with(type: :chunks_invalid_checksum)
expect(metrics)
.not_to have_received(:increment_trace_operation)
.with(operation: :corrupted)
expect(metrics)
.not_to have_received(:increment_error_counter)
.with(type: :chunks_invalid_size)
end
end
......@@ -325,6 +373,10 @@ RSpec.describe Ci::UpdateBuildStateService do
expect(metrics)
.not_to have_received(:increment_trace_operation)
.with(operation: :invalid)
expect(metrics)
.not_to have_received(:increment_error_counter)
.with(type: :chunks_invalid_checksum)
end
context 'when build pending state is outdated' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment