Merge branch 'eb-track-failures-on-pipeline-complete' into 'master'

Track test failures on pipeline completion See merge request gitlab-org/gitlab!48695

Merge branch 'eb-track-failures-on-pipeline-complete' into 'master'
Track test failures on pipeline completion See merge request gitlab-org/gitlab!48695
1482b1f6 · Bob Van Landuyt · e75161fb · 22ce4dff · 1482b1f6 · e75161fb
Commit 1482b1f6 authored Dec 10, 2020 by Bob Van Landuyt
14 changed files
--- a/app/models/ci/pipeline.rb
+++ b/app/models/ci/pipeline.rb
@@ -269,6 +269,12 @@ module Ci
        end
      end

+      after_transition any => ::Ci::Pipeline.completed_statuses do |pipeline|
+        pipeline.run_after_commit do
+          ::Ci::TestFailureHistoryService.new(pipeline).async.perform_if_needed # rubocop: disable CodeReuse/ServiceClass
+        end
+      end
+
      after_transition any => [:success, :failed] do |pipeline|
        ref_status = pipeline.ci_ref&.update_status_by!(pipeline)

@@ -938,10 +944,18 @@ module Ci
      builds.latest.with_reports(reports_scope)
    end

+    def latest_test_report_builds
+      latest_report_builds(Ci::JobArtifact.test_reports).preload(:project)
+    end
+
    def builds_with_coverage
      builds.latest.with_coverage
    end

+    def builds_with_failed_tests(limit: nil)
+      latest_test_report_builds.failed.limit(limit)
+    end
+
    def has_reports?(reports_scope)
      complete? && latest_report_builds(reports_scope).exists?
    end
@@ -962,7 +976,7 @@ module Ci

    def test_reports
      Gitlab::Ci::Reports::TestReports.new.tap do |test_reports|
-        latest_report_builds(Ci::JobArtifact.test_reports).preload(:project).find_each do |build|
+        latest_test_report_builds.find_each do |build|
          build.collect_test_reports!(test_reports)
        end
      end

--- a/app/services/ci/test_cases_service.rb
+++ b/app/services/ci/test_cases_service.rb
-# frozen_string_literal: true
-
-module Ci
-  class TestCasesService
-    MAX_TRACKABLE_FAILURES = 200
-
-    def execute(build)
-      return unless Feature.enabled?(:test_failure_history, build.project)
-      return unless build.has_test_reports?
-      return unless build.project.default_branch_or_master == build.ref
-
-      test_suite = generate_test_suite_report(build)
-
-      track_failures(build, test_suite)
-    end
-
-    private
-
-    def generate_test_suite_report(build)
-      build.collect_test_reports!(Gitlab::Ci::Reports::TestReports.new)
-    end
-
-    def track_failures(build, test_suite)
-      return if test_suite.failed_count > MAX_TRACKABLE_FAILURES
-
-      test_suite.failed.keys.each_slice(100) do |keys|
-        Ci::TestCase.transaction do
-          test_cases = Ci::TestCase.find_or_create_by_batch(build.project, keys)
-          Ci::TestCaseFailure.insert_all(test_case_failures(test_cases, build))
-        end
-      end
-    end
-
-    def test_case_failures(test_cases, build)
-      test_cases.map do |test_case|
-        {
-          test_case_id: test_case.id,
-          build_id: build.id,
-          failed_at: build.finished_at
-        }
-      end
-    end
-  end
-end
--- a/app/services/ci/test_failure_history_service.rb
+++ b/app/services/ci/test_failure_history_service.rb
+# frozen_string_literal: true
+
+module Ci
+  class TestFailureHistoryService
+    class Async
+      attr_reader :service
+
+      def initialize(service)
+        @service = service
+      end
+
+      def perform_if_needed
+        TestFailureHistoryWorker.perform_async(service.pipeline.id) if service.should_track_failures?
+      end
+    end
+
+    MAX_TRACKABLE_FAILURES = 200
+
+    attr_reader :pipeline
+    delegate :project, to: :pipeline
+
+    def initialize(pipeline)
+      @pipeline = pipeline
+    end
+
+    def execute
+      return unless should_track_failures?
+
+      track_failures
+    end
+
+    def should_track_failures?
+      return false unless Feature.enabled?(:test_failure_history, project)
+      return false unless project.default_branch_or_master == pipeline.ref
+
+      # We fetch for up to MAX_TRACKABLE_FAILURES + 1 builds. So if ever we get
+      # 201 total number of builds with the assumption that each job has at least
+      # 1 failed test case, then we have at least 201 failed test cases which exceeds
+      # the MAX_TRACKABLE_FAILURES of 200. If this is the case, let's early exit so we
+      # don't have to parse each JUnit report of each of the 201 builds.
+      failed_builds.length <= MAX_TRACKABLE_FAILURES
+    end
+
+    def async
+      Async.new(self)
+    end
+
+    private
+
+    def failed_builds
+      @failed_builds ||= pipeline.builds_with_failed_tests(limit: MAX_TRACKABLE_FAILURES + 1)
+    end
+
+    def track_failures
+      failed_test_cases = gather_failed_test_cases(failed_builds)
+
+      return if failed_test_cases.size > MAX_TRACKABLE_FAILURES
+
+      failed_test_cases.keys.each_slice(100) do |key_hashes|
+        Ci::TestCase.transaction do
+          ci_test_cases = Ci::TestCase.find_or_create_by_batch(project, key_hashes)
+          failures = test_case_failures(ci_test_cases, failed_test_cases)
+
+          Ci::TestCaseFailure.insert_all(failures)
+        end
+      end
+    end
+
+    def gather_failed_test_cases(failed_builds)
+      failed_builds.each_with_object({}) do |build, failed_test_cases|
+        test_suite = generate_test_suite!(build)
+        test_suite.failed.keys.each do |key|
+          failed_test_cases[key] = build
+        end
+      end
+    end
+
+    def generate_test_suite!(build)
+      # Returns an instance of Gitlab::Ci::Reports::TestSuite
+      build.collect_test_reports!(Gitlab::Ci::Reports::TestReports.new)
+    end
+
+    def test_case_failures(ci_test_cases, failed_test_cases)
+      ci_test_cases.map do |test_case|
+        build = failed_test_cases[test_case.key_hash]
+
+        {
+          test_case_id: test_case.id,
+          build_id: build.id,
+          failed_at: build.finished_at
+        }
+      end
+    end
+  end
+end
--- a/app/workers/all_queues.yml
+++ b/app/workers/all_queues.yml
@@ -1101,6 +1101,14 @@
  :weight: 1
  :idempotent: true
  :tags: []
+- :name: pipeline_background:ci_test_failure_history
+  :feature_category: :continuous_integration
+  :has_external_dependencies:
+  :urgency: :low
+  :resource_boundary: :unknown
+  :weight: 1
+  :idempotent: true
+  :tags: []
 - :name: pipeline_cache:expire_job_cache
  :feature_category: :continuous_integration
  :has_external_dependencies:

--- a/app/workers/build_finished_worker.rb
+++ b/app/workers/build_finished_worker.rb
@@ -33,11 +33,6 @@ class BuildFinishedWorker # rubocop:disable Scalability/IdempotentWorker
    BuildCoverageWorker.new.perform(build.id)
    Ci::BuildReportResultWorker.new.perform(build.id)

-    # TODO: As per https://gitlab.com/groups/gitlab-com/gl-infra/-/epics/194, it may be
-    # best to avoid creating more workers that we have no intention of calling async.
-    # Change the previous worker calls on top to also just call the service directly.
-    Ci::TestCasesService.new.execute(build)
-
    # We execute these async as these are independent operations.
    BuildHooksWorker.perform_async(build.id)
    ExpirePipelineCacheWorker.perform_async(build.pipeline_id) if build.pipeline.cacheable?

--- a/app/workers/ci/test_failure_history_worker.rb
+++ b/app/workers/ci/test_failure_history_worker.rb
+# frozen_string_literal: true
+
+module Ci
+  class TestFailureHistoryWorker
+    include ApplicationWorker
+    include PipelineBackgroundQueue
+
+    idempotent!
+
+    def perform(pipeline_id)
+      Ci::Pipeline.find_by_id(pipeline_id).try do |pipeline|
+        Ci::TestFailureHistoryService.new(pipeline).execute
+      end
+    end
+  end
+end
--- a/changelogs/unreleased/eb-track-failures-on-pipeline-complete.yml
+++ b/changelogs/unreleased/eb-track-failures-on-pipeline-complete.yml
+---
+title: Track test failures on pipeline completion
+merge_request: 48695
+author:
+type: changed
--- a/spec/factories/ci/pipelines.rb
+++ b/spec/factories/ci/pipelines.rb
@@ -126,10 +126,10 @@ FactoryBot.define do
      end

      trait :with_test_reports_with_three_failures do
-        status { :success }
+        status { :failed }

        after(:build) do |pipeline, _evaluator|
-          pipeline.builds << build(:ci_build, :test_reports_with_three_failures, pipeline: pipeline, project: pipeline.project)
+          pipeline.builds << build(:ci_build, :failed, :test_reports_with_three_failures, pipeline: pipeline, project: pipeline.project)
        end
      end


--- a/spec/models/ci/pipeline_spec.rb
+++ b/spec/models/ci/pipeline_spec.rb
@@ -4030,4 +4030,70 @@ RSpec.describe Ci::Pipeline, :mailer, factory_default: :keep do
      bridge
    end
  end
+
+  describe 'test failure history processing' do
+    it 'performs the service asynchronously when the pipeline is completed' do
+      service = double
+
+      expect(Ci::TestFailureHistoryService).to receive(:new).with(pipeline).and_return(service)
+      expect(service).to receive_message_chain(:async, :perform_if_needed)
+
+      pipeline.succeed!
+    end
+  end
+
+  describe '#latest_test_report_builds' do
+    it 'returns pipeline builds with test report artifacts' do
+      test_build = create(:ci_build, :test_reports, pipeline: pipeline, project: project)
+      create(:ci_build, :artifacts, pipeline: pipeline, project: project)
+
+      expect(pipeline.latest_test_report_builds).to contain_exactly(test_build)
+    end
+
+    it 'preloads project on each build to avoid N+1 queries' do
+      create(:ci_build, :test_reports, pipeline: pipeline, project: project)
+
+      control_count = ActiveRecord::QueryRecorder.new do
+        pipeline.latest_test_report_builds.map(&:project).map(&:full_path)
+      end
+
+      multi_build_pipeline = create(:ci_empty_pipeline, status: :created, project: project)
+      create(:ci_build, :test_reports, pipeline: multi_build_pipeline, project: project)
+      create(:ci_build, :test_reports, pipeline: multi_build_pipeline, project: project)
+
+      expect { multi_build_pipeline.latest_test_report_builds.map(&:project).map(&:full_path) }
+        .not_to exceed_query_limit(control_count)
+    end
+  end
+
+  describe '#builds_with_failed_tests' do
+    it 'returns pipeline builds with test report artifacts' do
+      failed_build = create(:ci_build, :failed, :test_reports, pipeline: pipeline, project: project)
+      create(:ci_build, :success, :test_reports, pipeline: pipeline, project: project)
+
+      expect(pipeline.builds_with_failed_tests).to contain_exactly(failed_build)
+    end
+
+    it 'supports limiting the number of builds to fetch' do
+      create(:ci_build, :failed, :test_reports, pipeline: pipeline, project: project)
+      create(:ci_build, :failed, :test_reports, pipeline: pipeline, project: project)
+
+      expect(pipeline.builds_with_failed_tests(limit: 1).count).to eq(1)
+    end
+
+    it 'preloads project on each build to avoid N+1 queries' do
+      create(:ci_build, :failed, :test_reports, pipeline: pipeline, project: project)
+
+      control_count = ActiveRecord::QueryRecorder.new do
+        pipeline.builds_with_failed_tests.map(&:project).map(&:full_path)
+      end
+
+      multi_build_pipeline = create(:ci_empty_pipeline, status: :created, project: project)
+      create(:ci_build, :failed, :test_reports, pipeline: multi_build_pipeline, project: project)
+      create(:ci_build, :failed, :test_reports, pipeline: multi_build_pipeline, project: project)
+
+      expect { multi_build_pipeline.builds_with_failed_tests.map(&:project).map(&:full_path) }
+        .not_to exceed_query_limit(control_count)
+    end
+  end
 end
--- a/spec/services/ci/compare_test_reports_service_spec.rb
+++ b/spec/services/ci/compare_test_reports_service_spec.rb
@@ -67,7 +67,7 @@ RSpec.describe Ci::CompareTestReportsService do

        # The JUnit fixture for the given build has 3 failures.
        # This service will create 1 test case failure record for each.
-        Ci::TestCasesService.new.execute(build)
+        Ci::TestFailureHistoryService.new(head_pipeline).execute
      end

      it 'loads recent failures on limited test cases to avoid building up a huge DB query', :aggregate_failures do

--- a/spec/services/ci/test_cases_service_spec.rb
+++ b/spec/services/ci/test_cases_service_spec.rb
-# frozen_string_literal: true
-
-require 'spec_helper'
-
-RSpec.describe Ci::TestCasesService, :aggregate_failures do
-  describe '#execute' do
-    subject(:execute_service) { described_class.new.execute(build) }
-
-    context 'when build has test reports' do
-      let(:build) { create(:ci_build, :success, :test_reports) } # The test report has 2 test case failures
-
-      it 'creates test case failures records' do
-        execute_service
-
-        expect(Ci::TestCase.count).to eq(2)
-        expect(Ci::TestCaseFailure.count).to eq(2)
-      end
-
-      context 'when feature flag for test failure history is disabled' do
-        before do
-          stub_feature_flags(test_failure_history: false)
-        end
-
-        it 'does not persist data' do
-          execute_service
-
-          expect(Ci::TestCase.count).to eq(0)
-          expect(Ci::TestCaseFailure.count).to eq(0)
-        end
-      end
-
-      context 'when build is not for the default branch' do
-        before do
-          build.update_column(:ref, 'new-feature')
-        end
-
-        it 'does not persist data' do
-          execute_service
-
-          expect(Ci::TestCase.count).to eq(0)
-          expect(Ci::TestCaseFailure.count).to eq(0)
-        end
-      end
-
-      context 'when test failure data have already been persisted with the same exact attributes' do
-        before do
-          execute_service
-        end
-
-        it 'does not fail but does not persist new data' do
-          expect { described_class.new.execute(build) }.not_to raise_error
-
-          expect(Ci::TestCase.count).to eq(2)
-          expect(Ci::TestCaseFailure.count).to eq(2)
-        end
-      end
-
-      context 'when test failure data have duplicates within the same payload (happens when the JUnit report has duplicate test case names but have different failures)' do
-        let(:build) { create(:ci_build, :success, :test_reports_with_duplicate_failed_test_names) } # The test report has 2 test case failures but with the same test case keys
-
-        it 'does not fail but does not persist duplicate data' do
-          expect { described_class.new.execute(build) }.not_to raise_error
-
-          expect(Ci::TestCase.count).to eq(1)
-          expect(Ci::TestCaseFailure.count).to eq(1)
-        end
-      end
-
-      context 'when number of failed test cases exceed the limit' do
-        before do
-          stub_const("#{described_class.name}::MAX_TRACKABLE_FAILURES", 1)
-        end
-
-        it 'does not persist data' do
-          execute_service
-
-          expect(Ci::TestCase.count).to eq(0)
-          expect(Ci::TestCaseFailure.count).to eq(0)
-        end
-      end
-    end
-
-    context 'when build has no test reports' do
-      let(:build) { create(:ci_build, :running) }
-
-      it 'does not persist data' do
-        execute_service
-
-        expect(Ci::TestCase.count).to eq(0)
-        expect(Ci::TestCaseFailure.count).to eq(0)
-      end
-    end
-  end
-end
--- a/spec/services/ci/test_failure_history_service_spec.rb
+++ b/spec/services/ci/test_failure_history_service_spec.rb
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Ci::TestFailureHistoryService, :aggregate_failures do
+  describe '#execute' do
+    let(:project) { create(:project) }
+    let(:pipeline) { create(:ci_empty_pipeline, status: :created, project: project) }
+
+    subject(:execute_service) { described_class.new(pipeline).execute }
+
+    context 'when pipeline has failed builds with test reports' do
+      before do
+        # The test report has 2 test case failures
+        create(:ci_build, :failed, :test_reports, pipeline: pipeline, project: project)
+      end
+
+      it 'creates test case failures records' do
+        execute_service
+
+        expect(Ci::TestCase.count).to eq(2)
+        expect(Ci::TestCaseFailure.count).to eq(2)
+      end
+
+      context 'when feature flag for test failure history is disabled' do
+        before do
+          stub_feature_flags(test_failure_history: false)
+        end
+
+        it 'does not persist data' do
+          execute_service
+
+          expect(Ci::TestCase.count).to eq(0)
+          expect(Ci::TestCaseFailure.count).to eq(0)
+        end
+      end
+
+      context 'when pipeline is not for the default branch' do
+        before do
+          pipeline.update_column(:ref, 'new-feature')
+        end
+
+        it 'does not persist data' do
+          execute_service
+
+          expect(Ci::TestCase.count).to eq(0)
+          expect(Ci::TestCaseFailure.count).to eq(0)
+        end
+      end
+
+      context 'when test failure data have already been persisted with the same exact attributes' do
+        before do
+          execute_service
+        end
+
+        it 'does not fail but does not persist new data' do
+          expect { described_class.new(pipeline).execute }.not_to raise_error
+
+          expect(Ci::TestCase.count).to eq(2)
+          expect(Ci::TestCaseFailure.count).to eq(2)
+        end
+      end
+
+      context 'when number of failed test cases exceed the limit' do
+        before do
+          stub_const("#{described_class.name}::MAX_TRACKABLE_FAILURES", 1)
+        end
+
+        it 'does not persist data' do
+          execute_service
+
+          expect(Ci::TestCase.count).to eq(0)
+          expect(Ci::TestCaseFailure.count).to eq(0)
+        end
+      end
+
+      context 'when number of failed test cases across multiple builds exceed the limit' do
+        before do
+          stub_const("#{described_class.name}::MAX_TRACKABLE_FAILURES", 2)
+
+          # This other test report has 1 unique test case failure which brings us to 3 total failures across all builds
+          # thus exceeding the limit of 2 for MAX_TRACKABLE_FAILURES
+          create(:ci_build, :failed, :test_reports_with_duplicate_failed_test_names, pipeline: pipeline, project: project)
+        end
+
+        it 'does not persist data' do
+          execute_service
+
+          expect(Ci::TestCase.count).to eq(0)
+          expect(Ci::TestCaseFailure.count).to eq(0)
+        end
+      end
+    end
+
+    context 'when test failure data have duplicates within the same payload (happens when the JUnit report has duplicate test case names but have different failures)' do
+      before do
+        # The test report has 2 test case failures but with the same test case keys
+        create(:ci_build, :failed, :test_reports_with_duplicate_failed_test_names, pipeline: pipeline, project: project)
+      end
+
+      it 'does not fail but does not persist duplicate data' do
+        expect { execute_service }.not_to raise_error
+
+        expect(Ci::TestCase.count).to eq(1)
+        expect(Ci::TestCaseFailure.count).to eq(1)
+      end
+    end
+
+    context 'when pipeline has no failed builds with test reports' do
+      before do
+        create(:ci_build, :test_reports, pipeline: pipeline, project: project)
+        create(:ci_build, :failed, pipeline: pipeline, project: project)
+      end
+
+      it 'does not persist data' do
+        execute_service
+
+        expect(Ci::TestCase.count).to eq(0)
+        expect(Ci::TestCaseFailure.count).to eq(0)
+      end
+    end
+  end
+
+  describe '#should_track_failures?' do
+    let(:project) { create(:project, :repository) }
+    let(:pipeline) { create(:ci_empty_pipeline, status: :created, project: project, ref: project.default_branch) }
+
+    subject { described_class.new(pipeline).should_track_failures? }
+
+    before do
+      create(:ci_build, :test_reports, :failed, pipeline: pipeline, project: project)
+      create(:ci_build, :test_reports, :failed, pipeline: pipeline, project: project)
+    end
+
+    context 'when feature flag is enabled and pipeline ref is the default branch' do
+      it { is_expected.to eq(true) }
+    end
+
+    context 'when feature flag is disabled' do
+      before do
+        stub_feature_flags(test_failure_history: false)
+      end
+
+      it { is_expected.to eq(false) }
+    end
+
+    context 'when pipeline is not equal to the project default branch' do
+      before do
+        pipeline.update_column(:ref, 'some-other-branch')
+      end
+
+      it { is_expected.to eq(false) }
+    end
+
+    context 'when total number of builds with failed tests exceeds the max number of trackable failures' do
+      before do
+        stub_const("#{described_class.name}::MAX_TRACKABLE_FAILURES", 1)
+      end
+
+      it { is_expected.to eq(false) }
+    end
+  end
+
+  describe '#async' do
+    let(:pipeline) { double(id: 1) }
+    let(:service) { described_class.new(pipeline) }
+
+    context 'when service should track failures' do
+      before do
+        allow(service).to receive(:should_track_failures?).and_return(true)
+      end
+
+      it 'enqueues the worker when #perform_if_needed is called' do
+        expect(Ci::TestFailureHistoryWorker).to receive(:perform_async).with(pipeline.id)
+
+        service.async.perform_if_needed
+      end
+    end
+
+    context 'when service should not track failures' do
+      before do
+        allow(service).to receive(:should_track_failures?).and_return(false)
+      end
+
+      it 'does not enqueue the worker when #perform_if_needed is called' do
+        expect(Ci::TestFailureHistoryWorker).not_to receive(:perform_async)
+
+        service.async.perform_if_needed
+      end
+    end
+  end
+end
--- a/spec/workers/build_finished_worker_spec.rb
+++ b/spec/workers/build_finished_worker_spec.rb
@@ -26,9 +26,6 @@ RSpec.describe BuildFinishedWorker do
        expect_next_instance_of(Ci::BuildReportResultWorker) do |instance|
          expect(instance).to receive(:perform)
        end
-        expect_next_instance_of(Ci::TestCasesService) do |instance|
-          expect(instance).to receive(:execute)
-        end

        expect(BuildHooksWorker).to receive(:perform_async)
        expect(ExpirePipelineCacheWorker).to receive(:perform_async)

--- a/spec/workers/ci/test_failure_history_worker_spec.rb
+++ b/spec/workers/ci/test_failure_history_worker_spec.rb
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe ::Ci::TestFailureHistoryWorker do
+  describe '#perform' do
+    subject(:perform) { described_class.new.perform(pipeline_id) }
+
+    context 'when pipeline exists' do
+      let(:pipeline) { create(:ci_pipeline) }
+      let(:pipeline_id) { pipeline.id }
+
+      it 'executes test failure history service' do
+        expect_next_instance_of(::Ci::TestFailureHistoryService) do |service|
+          expect(service).to receive(:execute)
+        end
+
+        perform
+      end
+    end
+
+    context 'when pipeline does not exist' do
+      let(:pipeline_id) { non_existing_record_id }
+
+      it 'does not execute test failure history service' do
+        expect(Ci::TestFailureHistoryService).not_to receive(:new)
+
+        perform
+      end
+    end
+  end
+
+  include_examples 'an idempotent worker' do
+    let(:pipeline) { create(:ci_pipeline) }
+    let(:job_args) { [pipeline.id] }
+
+    it 'tracks test failures' do
+      # The test report has 2 test case failures
+      create(:ci_build, :failed, :test_reports, pipeline: pipeline, project: pipeline.project)
+
+      subject
+
+      expect(Ci::TestCase.count).to eq(2)
+      expect(Ci::TestCaseFailure.count).to eq(2)
+    end
+  end
+end