Commit 1482b1f6 authored by Bob Van Landuyt's avatar Bob Van Landuyt

Merge branch 'eb-track-failures-on-pipeline-complete' into 'master'

Track test failures on pipeline completion

See merge request gitlab-org/gitlab!48695
parents e75161fb 22ce4dff
...@@ -269,6 +269,12 @@ module Ci ...@@ -269,6 +269,12 @@ module Ci
end end
end end
after_transition any => ::Ci::Pipeline.completed_statuses do |pipeline|
pipeline.run_after_commit do
::Ci::TestFailureHistoryService.new(pipeline).async.perform_if_needed # rubocop: disable CodeReuse/ServiceClass
end
end
after_transition any => [:success, :failed] do |pipeline| after_transition any => [:success, :failed] do |pipeline|
ref_status = pipeline.ci_ref&.update_status_by!(pipeline) ref_status = pipeline.ci_ref&.update_status_by!(pipeline)
...@@ -938,10 +944,18 @@ module Ci ...@@ -938,10 +944,18 @@ module Ci
builds.latest.with_reports(reports_scope) builds.latest.with_reports(reports_scope)
end end
def latest_test_report_builds
latest_report_builds(Ci::JobArtifact.test_reports).preload(:project)
end
def builds_with_coverage def builds_with_coverage
builds.latest.with_coverage builds.latest.with_coverage
end end
def builds_with_failed_tests(limit: nil)
latest_test_report_builds.failed.limit(limit)
end
def has_reports?(reports_scope) def has_reports?(reports_scope)
complete? && latest_report_builds(reports_scope).exists? complete? && latest_report_builds(reports_scope).exists?
end end
...@@ -962,7 +976,7 @@ module Ci ...@@ -962,7 +976,7 @@ module Ci
def test_reports def test_reports
Gitlab::Ci::Reports::TestReports.new.tap do |test_reports| Gitlab::Ci::Reports::TestReports.new.tap do |test_reports|
latest_report_builds(Ci::JobArtifact.test_reports).preload(:project).find_each do |build| latest_test_report_builds.find_each do |build|
build.collect_test_reports!(test_reports) build.collect_test_reports!(test_reports)
end end
end end
......
# frozen_string_literal: true
module Ci
class TestCasesService
MAX_TRACKABLE_FAILURES = 200
def execute(build)
return unless Feature.enabled?(:test_failure_history, build.project)
return unless build.has_test_reports?
return unless build.project.default_branch_or_master == build.ref
test_suite = generate_test_suite_report(build)
track_failures(build, test_suite)
end
private
def generate_test_suite_report(build)
build.collect_test_reports!(Gitlab::Ci::Reports::TestReports.new)
end
def track_failures(build, test_suite)
return if test_suite.failed_count > MAX_TRACKABLE_FAILURES
test_suite.failed.keys.each_slice(100) do |keys|
Ci::TestCase.transaction do
test_cases = Ci::TestCase.find_or_create_by_batch(build.project, keys)
Ci::TestCaseFailure.insert_all(test_case_failures(test_cases, build))
end
end
end
def test_case_failures(test_cases, build)
test_cases.map do |test_case|
{
test_case_id: test_case.id,
build_id: build.id,
failed_at: build.finished_at
}
end
end
end
end
# frozen_string_literal: true
module Ci
class TestFailureHistoryService
class Async
attr_reader :service
def initialize(service)
@service = service
end
def perform_if_needed
TestFailureHistoryWorker.perform_async(service.pipeline.id) if service.should_track_failures?
end
end
MAX_TRACKABLE_FAILURES = 200
attr_reader :pipeline
delegate :project, to: :pipeline
def initialize(pipeline)
@pipeline = pipeline
end
def execute
return unless should_track_failures?
track_failures
end
def should_track_failures?
return false unless Feature.enabled?(:test_failure_history, project)
return false unless project.default_branch_or_master == pipeline.ref
# We fetch for up to MAX_TRACKABLE_FAILURES + 1 builds. So if ever we get
# 201 total number of builds with the assumption that each job has at least
# 1 failed test case, then we have at least 201 failed test cases which exceeds
# the MAX_TRACKABLE_FAILURES of 200. If this is the case, let's early exit so we
# don't have to parse each JUnit report of each of the 201 builds.
failed_builds.length <= MAX_TRACKABLE_FAILURES
end
def async
Async.new(self)
end
private
def failed_builds
@failed_builds ||= pipeline.builds_with_failed_tests(limit: MAX_TRACKABLE_FAILURES + 1)
end
def track_failures
failed_test_cases = gather_failed_test_cases(failed_builds)
return if failed_test_cases.size > MAX_TRACKABLE_FAILURES
failed_test_cases.keys.each_slice(100) do |key_hashes|
Ci::TestCase.transaction do
ci_test_cases = Ci::TestCase.find_or_create_by_batch(project, key_hashes)
failures = test_case_failures(ci_test_cases, failed_test_cases)
Ci::TestCaseFailure.insert_all(failures)
end
end
end
def gather_failed_test_cases(failed_builds)
failed_builds.each_with_object({}) do |build, failed_test_cases|
test_suite = generate_test_suite!(build)
test_suite.failed.keys.each do |key|
failed_test_cases[key] = build
end
end
end
def generate_test_suite!(build)
# Returns an instance of Gitlab::Ci::Reports::TestSuite
build.collect_test_reports!(Gitlab::Ci::Reports::TestReports.new)
end
def test_case_failures(ci_test_cases, failed_test_cases)
ci_test_cases.map do |test_case|
build = failed_test_cases[test_case.key_hash]
{
test_case_id: test_case.id,
build_id: build.id,
failed_at: build.finished_at
}
end
end
end
end
...@@ -1101,6 +1101,14 @@ ...@@ -1101,6 +1101,14 @@
:weight: 1 :weight: 1
:idempotent: true :idempotent: true
:tags: [] :tags: []
- :name: pipeline_background:ci_test_failure_history
:feature_category: :continuous_integration
:has_external_dependencies:
:urgency: :low
:resource_boundary: :unknown
:weight: 1
:idempotent: true
:tags: []
- :name: pipeline_cache:expire_job_cache - :name: pipeline_cache:expire_job_cache
:feature_category: :continuous_integration :feature_category: :continuous_integration
:has_external_dependencies: :has_external_dependencies:
......
...@@ -33,11 +33,6 @@ class BuildFinishedWorker # rubocop:disable Scalability/IdempotentWorker ...@@ -33,11 +33,6 @@ class BuildFinishedWorker # rubocop:disable Scalability/IdempotentWorker
BuildCoverageWorker.new.perform(build.id) BuildCoverageWorker.new.perform(build.id)
Ci::BuildReportResultWorker.new.perform(build.id) Ci::BuildReportResultWorker.new.perform(build.id)
# TODO: As per https://gitlab.com/groups/gitlab-com/gl-infra/-/epics/194, it may be
# best to avoid creating more workers that we have no intention of calling async.
# Change the previous worker calls on top to also just call the service directly.
Ci::TestCasesService.new.execute(build)
# We execute these async as these are independent operations. # We execute these async as these are independent operations.
BuildHooksWorker.perform_async(build.id) BuildHooksWorker.perform_async(build.id)
ExpirePipelineCacheWorker.perform_async(build.pipeline_id) if build.pipeline.cacheable? ExpirePipelineCacheWorker.perform_async(build.pipeline_id) if build.pipeline.cacheable?
......
# frozen_string_literal: true
module Ci
class TestFailureHistoryWorker
include ApplicationWorker
include PipelineBackgroundQueue
idempotent!
def perform(pipeline_id)
Ci::Pipeline.find_by_id(pipeline_id).try do |pipeline|
Ci::TestFailureHistoryService.new(pipeline).execute
end
end
end
end
---
title: Track test failures on pipeline completion
merge_request: 48695
author:
type: changed
...@@ -126,10 +126,10 @@ FactoryBot.define do ...@@ -126,10 +126,10 @@ FactoryBot.define do
end end
trait :with_test_reports_with_three_failures do trait :with_test_reports_with_three_failures do
status { :success } status { :failed }
after(:build) do |pipeline, _evaluator| after(:build) do |pipeline, _evaluator|
pipeline.builds << build(:ci_build, :test_reports_with_three_failures, pipeline: pipeline, project: pipeline.project) pipeline.builds << build(:ci_build, :failed, :test_reports_with_three_failures, pipeline: pipeline, project: pipeline.project)
end end
end end
......
...@@ -4030,4 +4030,70 @@ RSpec.describe Ci::Pipeline, :mailer, factory_default: :keep do ...@@ -4030,4 +4030,70 @@ RSpec.describe Ci::Pipeline, :mailer, factory_default: :keep do
bridge bridge
end end
end end
describe 'test failure history processing' do
it 'performs the service asynchronously when the pipeline is completed' do
service = double
expect(Ci::TestFailureHistoryService).to receive(:new).with(pipeline).and_return(service)
expect(service).to receive_message_chain(:async, :perform_if_needed)
pipeline.succeed!
end
end
describe '#latest_test_report_builds' do
it 'returns pipeline builds with test report artifacts' do
test_build = create(:ci_build, :test_reports, pipeline: pipeline, project: project)
create(:ci_build, :artifacts, pipeline: pipeline, project: project)
expect(pipeline.latest_test_report_builds).to contain_exactly(test_build)
end
it 'preloads project on each build to avoid N+1 queries' do
create(:ci_build, :test_reports, pipeline: pipeline, project: project)
control_count = ActiveRecord::QueryRecorder.new do
pipeline.latest_test_report_builds.map(&:project).map(&:full_path)
end
multi_build_pipeline = create(:ci_empty_pipeline, status: :created, project: project)
create(:ci_build, :test_reports, pipeline: multi_build_pipeline, project: project)
create(:ci_build, :test_reports, pipeline: multi_build_pipeline, project: project)
expect { multi_build_pipeline.latest_test_report_builds.map(&:project).map(&:full_path) }
.not_to exceed_query_limit(control_count)
end
end
describe '#builds_with_failed_tests' do
it 'returns pipeline builds with test report artifacts' do
failed_build = create(:ci_build, :failed, :test_reports, pipeline: pipeline, project: project)
create(:ci_build, :success, :test_reports, pipeline: pipeline, project: project)
expect(pipeline.builds_with_failed_tests).to contain_exactly(failed_build)
end
it 'supports limiting the number of builds to fetch' do
create(:ci_build, :failed, :test_reports, pipeline: pipeline, project: project)
create(:ci_build, :failed, :test_reports, pipeline: pipeline, project: project)
expect(pipeline.builds_with_failed_tests(limit: 1).count).to eq(1)
end
it 'preloads project on each build to avoid N+1 queries' do
create(:ci_build, :failed, :test_reports, pipeline: pipeline, project: project)
control_count = ActiveRecord::QueryRecorder.new do
pipeline.builds_with_failed_tests.map(&:project).map(&:full_path)
end
multi_build_pipeline = create(:ci_empty_pipeline, status: :created, project: project)
create(:ci_build, :failed, :test_reports, pipeline: multi_build_pipeline, project: project)
create(:ci_build, :failed, :test_reports, pipeline: multi_build_pipeline, project: project)
expect { multi_build_pipeline.builds_with_failed_tests.map(&:project).map(&:full_path) }
.not_to exceed_query_limit(control_count)
end
end
end end
...@@ -67,7 +67,7 @@ RSpec.describe Ci::CompareTestReportsService do ...@@ -67,7 +67,7 @@ RSpec.describe Ci::CompareTestReportsService do
# The JUnit fixture for the given build has 3 failures. # The JUnit fixture for the given build has 3 failures.
# This service will create 1 test case failure record for each. # This service will create 1 test case failure record for each.
Ci::TestCasesService.new.execute(build) Ci::TestFailureHistoryService.new(head_pipeline).execute
end end
it 'loads recent failures on limited test cases to avoid building up a huge DB query', :aggregate_failures do it 'loads recent failures on limited test cases to avoid building up a huge DB query', :aggregate_failures do
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Ci::TestCasesService, :aggregate_failures do
describe '#execute' do
subject(:execute_service) { described_class.new.execute(build) }
context 'when build has test reports' do
let(:build) { create(:ci_build, :success, :test_reports) } # The test report has 2 test case failures
it 'creates test case failures records' do
execute_service
expect(Ci::TestCase.count).to eq(2)
expect(Ci::TestCaseFailure.count).to eq(2)
end
context 'when feature flag for test failure history is disabled' do
before do
stub_feature_flags(test_failure_history: false)
end
it 'does not persist data' do
execute_service
expect(Ci::TestCase.count).to eq(0)
expect(Ci::TestCaseFailure.count).to eq(0)
end
end
context 'when build is not for the default branch' do
before do
build.update_column(:ref, 'new-feature')
end
it 'does not persist data' do
execute_service
expect(Ci::TestCase.count).to eq(0)
expect(Ci::TestCaseFailure.count).to eq(0)
end
end
context 'when test failure data have already been persisted with the same exact attributes' do
before do
execute_service
end
it 'does not fail but does not persist new data' do
expect { described_class.new.execute(build) }.not_to raise_error
expect(Ci::TestCase.count).to eq(2)
expect(Ci::TestCaseFailure.count).to eq(2)
end
end
context 'when test failure data have duplicates within the same payload (happens when the JUnit report has duplicate test case names but have different failures)' do
let(:build) { create(:ci_build, :success, :test_reports_with_duplicate_failed_test_names) } # The test report has 2 test case failures but with the same test case keys
it 'does not fail but does not persist duplicate data' do
expect { described_class.new.execute(build) }.not_to raise_error
expect(Ci::TestCase.count).to eq(1)
expect(Ci::TestCaseFailure.count).to eq(1)
end
end
context 'when number of failed test cases exceed the limit' do
before do
stub_const("#{described_class.name}::MAX_TRACKABLE_FAILURES", 1)
end
it 'does not persist data' do
execute_service
expect(Ci::TestCase.count).to eq(0)
expect(Ci::TestCaseFailure.count).to eq(0)
end
end
end
context 'when build has no test reports' do
let(:build) { create(:ci_build, :running) }
it 'does not persist data' do
execute_service
expect(Ci::TestCase.count).to eq(0)
expect(Ci::TestCaseFailure.count).to eq(0)
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Ci::TestFailureHistoryService, :aggregate_failures do
describe '#execute' do
let(:project) { create(:project) }
let(:pipeline) { create(:ci_empty_pipeline, status: :created, project: project) }
subject(:execute_service) { described_class.new(pipeline).execute }
context 'when pipeline has failed builds with test reports' do
before do
# The test report has 2 test case failures
create(:ci_build, :failed, :test_reports, pipeline: pipeline, project: project)
end
it 'creates test case failures records' do
execute_service
expect(Ci::TestCase.count).to eq(2)
expect(Ci::TestCaseFailure.count).to eq(2)
end
context 'when feature flag for test failure history is disabled' do
before do
stub_feature_flags(test_failure_history: false)
end
it 'does not persist data' do
execute_service
expect(Ci::TestCase.count).to eq(0)
expect(Ci::TestCaseFailure.count).to eq(0)
end
end
context 'when pipeline is not for the default branch' do
before do
pipeline.update_column(:ref, 'new-feature')
end
it 'does not persist data' do
execute_service
expect(Ci::TestCase.count).to eq(0)
expect(Ci::TestCaseFailure.count).to eq(0)
end
end
context 'when test failure data have already been persisted with the same exact attributes' do
before do
execute_service
end
it 'does not fail but does not persist new data' do
expect { described_class.new(pipeline).execute }.not_to raise_error
expect(Ci::TestCase.count).to eq(2)
expect(Ci::TestCaseFailure.count).to eq(2)
end
end
context 'when number of failed test cases exceed the limit' do
before do
stub_const("#{described_class.name}::MAX_TRACKABLE_FAILURES", 1)
end
it 'does not persist data' do
execute_service
expect(Ci::TestCase.count).to eq(0)
expect(Ci::TestCaseFailure.count).to eq(0)
end
end
context 'when number of failed test cases across multiple builds exceed the limit' do
before do
stub_const("#{described_class.name}::MAX_TRACKABLE_FAILURES", 2)
# This other test report has 1 unique test case failure which brings us to 3 total failures across all builds
# thus exceeding the limit of 2 for MAX_TRACKABLE_FAILURES
create(:ci_build, :failed, :test_reports_with_duplicate_failed_test_names, pipeline: pipeline, project: project)
end
it 'does not persist data' do
execute_service
expect(Ci::TestCase.count).to eq(0)
expect(Ci::TestCaseFailure.count).to eq(0)
end
end
end
context 'when test failure data have duplicates within the same payload (happens when the JUnit report has duplicate test case names but have different failures)' do
before do
# The test report has 2 test case failures but with the same test case keys
create(:ci_build, :failed, :test_reports_with_duplicate_failed_test_names, pipeline: pipeline, project: project)
end
it 'does not fail but does not persist duplicate data' do
expect { execute_service }.not_to raise_error
expect(Ci::TestCase.count).to eq(1)
expect(Ci::TestCaseFailure.count).to eq(1)
end
end
context 'when pipeline has no failed builds with test reports' do
before do
create(:ci_build, :test_reports, pipeline: pipeline, project: project)
create(:ci_build, :failed, pipeline: pipeline, project: project)
end
it 'does not persist data' do
execute_service
expect(Ci::TestCase.count).to eq(0)
expect(Ci::TestCaseFailure.count).to eq(0)
end
end
end
describe '#should_track_failures?' do
let(:project) { create(:project, :repository) }
let(:pipeline) { create(:ci_empty_pipeline, status: :created, project: project, ref: project.default_branch) }
subject { described_class.new(pipeline).should_track_failures? }
before do
create(:ci_build, :test_reports, :failed, pipeline: pipeline, project: project)
create(:ci_build, :test_reports, :failed, pipeline: pipeline, project: project)
end
context 'when feature flag is enabled and pipeline ref is the default branch' do
it { is_expected.to eq(true) }
end
context 'when feature flag is disabled' do
before do
stub_feature_flags(test_failure_history: false)
end
it { is_expected.to eq(false) }
end
context 'when pipeline is not equal to the project default branch' do
before do
pipeline.update_column(:ref, 'some-other-branch')
end
it { is_expected.to eq(false) }
end
context 'when total number of builds with failed tests exceeds the max number of trackable failures' do
before do
stub_const("#{described_class.name}::MAX_TRACKABLE_FAILURES", 1)
end
it { is_expected.to eq(false) }
end
end
describe '#async' do
let(:pipeline) { double(id: 1) }
let(:service) { described_class.new(pipeline) }
context 'when service should track failures' do
before do
allow(service).to receive(:should_track_failures?).and_return(true)
end
it 'enqueues the worker when #perform_if_needed is called' do
expect(Ci::TestFailureHistoryWorker).to receive(:perform_async).with(pipeline.id)
service.async.perform_if_needed
end
end
context 'when service should not track failures' do
before do
allow(service).to receive(:should_track_failures?).and_return(false)
end
it 'does not enqueue the worker when #perform_if_needed is called' do
expect(Ci::TestFailureHistoryWorker).not_to receive(:perform_async)
service.async.perform_if_needed
end
end
end
end
...@@ -26,9 +26,6 @@ RSpec.describe BuildFinishedWorker do ...@@ -26,9 +26,6 @@ RSpec.describe BuildFinishedWorker do
expect_next_instance_of(Ci::BuildReportResultWorker) do |instance| expect_next_instance_of(Ci::BuildReportResultWorker) do |instance|
expect(instance).to receive(:perform) expect(instance).to receive(:perform)
end end
expect_next_instance_of(Ci::TestCasesService) do |instance|
expect(instance).to receive(:execute)
end
expect(BuildHooksWorker).to receive(:perform_async) expect(BuildHooksWorker).to receive(:perform_async)
expect(ExpirePipelineCacheWorker).to receive(:perform_async) expect(ExpirePipelineCacheWorker).to receive(:perform_async)
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe ::Ci::TestFailureHistoryWorker do
describe '#perform' do
subject(:perform) { described_class.new.perform(pipeline_id) }
context 'when pipeline exists' do
let(:pipeline) { create(:ci_pipeline) }
let(:pipeline_id) { pipeline.id }
it 'executes test failure history service' do
expect_next_instance_of(::Ci::TestFailureHistoryService) do |service|
expect(service).to receive(:execute)
end
perform
end
end
context 'when pipeline does not exist' do
let(:pipeline_id) { non_existing_record_id }
it 'does not execute test failure history service' do
expect(Ci::TestFailureHistoryService).not_to receive(:new)
perform
end
end
end
include_examples 'an idempotent worker' do
let(:pipeline) { create(:ci_pipeline) }
let(:job_args) { [pipeline.id] }
it 'tracks test failures' do
# The test report has 2 test case failures
create(:ci_build, :failed, :test_reports, pipeline: pipeline, project: pipeline.project)
subject
expect(Ci::TestCase.count).to eq(2)
expect(Ci::TestCaseFailure.count).to eq(2)
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment