Commit 12476cb9 authored by Markus Koller's avatar Markus Koller

Merge branch '296520_improve_merge_reports_service' into 'master'

Improve MergeReportsService

See merge request gitlab-org/gitlab!59682
parents 2522d750 87508599
...@@ -2,112 +2,69 @@ ...@@ -2,112 +2,69 @@
module Security module Security
class MergeReportsService class MergeReportsService
ANALYZER_ORDER = { attr_reader :source_reports
"bundler_audit" => 1,
"retire.js" => 2,
"gemnasium" => 3,
"gemnasium-maven" => 3,
"gemnasium-python" => 3,
"bandit" => 1,
"semgrep" => 2,
"unknown" => 999
}.freeze
def initialize(*source_reports) def initialize(*source_reports)
@source_reports = source_reports @source_reports = source_reports
end
def execute
copy_resources_to_target_report
copy_findings_to_target
target_report
end
sort_by_analyzer_order! private
@target_report = ::Gitlab::Ci::Reports::Security::Report.new( def target_report
@source_reports.first.type, @target_report ||= ::Gitlab::Ci::Reports::Security::Report.new(
@source_reports.first.pipeline, source_reports.first.type,
@source_reports.first.created_at source_reports.first.pipeline,
source_reports.first.created_at
).tap { |report| report.errors = source_reports.flat_map(&:errors) } ).tap { |report| report.errors = source_reports.flat_map(&:errors) }
@findings = []
end end
def execute def copy_resources_to_target_report
@source_reports.each do |source| sorted_source_reports.each do |source_report|
copy_scanners_to_target(source) copy_scanners_to_target(source_report)
copy_identifiers_to_target(source) copy_identifiers_to_target(source_report)
copy_findings_to_buffer(source) copy_scanned_resources_to_target(source_report)
copy_scanned_resources_to_target(source)
end end
copy_findings_to_target
@target_report
end end
private def sorted_source_reports
source_reports.sort { |a, b| a.primary_scanner_order_to(b) }
end
def copy_scanners_to_target(source_report) def copy_scanners_to_target(source_report)
# no need for de-duping: it's done by Report internally # no need for de-duping: it's done by Report internally
source_report.scanners.values.each { |scanner| @target_report.add_scanner(scanner) } source_report.scanners.values.each { |scanner| target_report.add_scanner(scanner) }
end end
def copy_identifiers_to_target(source_report) def copy_identifiers_to_target(source_report)
# no need for de-duping: it's done by Report internally # no need for de-duping: it's done by Report internally
source_report.identifiers.values.each { |identifier| @target_report.add_identifier(identifier) } source_report.identifiers.values.each { |identifier| target_report.add_identifier(identifier) }
end end
def copy_findings_to_buffer(source) def copy_scanned_resources_to_target(source_report)
@findings.concat(source.findings) target_report.scanned_resources.concat(source_report.scanned_resources).uniq!
end end
def copy_scanned_resources_to_target(source_report) def copy_findings_to_target
@target_report.scanned_resources.concat(source_report.scanned_resources).uniq! deduplicated_findings.sort.each { |finding| target_report.add_finding(finding) }
end end
def deduplicate_findings! def deduplicated_findings
@findings, * = @findings.each_with_object([[], Set.new]) do |finding, (deduplicated, seen_identifiers)| prioritized_findings.each_with_object([[], Set.new]) do |finding, (deduplicated, seen_identifiers)|
next if seen_identifiers.intersect?(finding.keys.to_set) next if seen_identifiers.intersect?(finding.keys.to_set)
seen_identifiers.merge(finding.keys) seen_identifiers.merge(finding.keys)
deduplicated << finding deduplicated << finding
end end.first
end end
def sort_findings! def prioritized_findings
@findings.sort! do |a, b| source_reports.flat_map(&:findings).sort { |a, b| a.scanner_order_to(b) }
a_severity = a.severity
b_severity = b.severity
if a_severity == b_severity
a.compare_key <=> b.compare_key
else
::Enums::Vulnerability.severity_levels[b_severity] <=>
::Enums::Vulnerability.severity_levels[a_severity]
end
end
end
def copy_findings_to_target
deduplicate_findings!
sort_findings!
@findings.each { |finding| @target_report.add_finding(finding) }
end
def reports_sortable?
return true if @source_reports.all? { |x| x.type == :dependency_scanning }
return true if @source_reports.all? { |x| x.type == :sast }
false
end
def sort_by_analyzer_order!
return unless reports_sortable?
@source_reports.sort! do |a, b|
a_scanner_id = a.scanners.values[0].external_id
b_scanner_id = b.scanners.values[0].external_id
a_scanner_id = "unknown" if ANALYZER_ORDER[a_scanner_id].nil?
b_scanner_id = "unknown" if ANALYZER_ORDER[b_scanner_id].nil?
ANALYZER_ORDER[a_scanner_id] <=> ANALYZER_ORDER[b_scanner_id]
end
end end
end end
end end
...@@ -43,14 +43,12 @@ module Security ...@@ -43,14 +43,12 @@ module Security
end end
def sorted_artifacts def sorted_artifacts
@sorted_artifacts ||= artifacts.sort_by { |artifact| [scanner_order_for(artifact), artifact.job.name] } @sorted_artifacts ||= artifacts.sort do |a, b|
end report_a = a.security_report(validate: true)
report_b = b.security_report(validate: true)
# This method returns the priority of scanners for dependency_scanning and sast report_a.primary_scanner_order_to(report_b)
# and `INFINITY` for all the other scan types. There is no problem with end
# calling this method for all the scan types to get rid of branching.
def scanner_order_for(artifact)
MergeReportsService::ANALYZER_ORDER.fetch(artifact.security_report(validate: true).primary_scanner&.external_id, Float::INFINITY)
end end
def store_scan_for(artifact, deduplicate) def store_scan_for(artifact, deduplicate)
......
---
title: Make the MergeReportsService more reliable
merge_request: 59682
author:
type: fixed
...@@ -123,6 +123,22 @@ module Gitlab ...@@ -123,6 +123,22 @@ module Gitlab
primary_identifier&.fingerprint primary_identifier&.fingerprint
end end
def <=>(other)
if severity == other.severity
compare_key <=> other.compare_key
else
::Enums::Vulnerability.severity_levels[other.severity] <=>
::Enums::Vulnerability.severity_levels[severity]
end
end
def scanner_order_to(other)
return 1 unless scanner
return -1 unless other&.scanner
scanner <=> other.scanner
end
private private
def generate_project_fingerprint def generate_project_fingerprint
......
...@@ -62,6 +62,13 @@ module Gitlab ...@@ -62,6 +62,13 @@ module Gitlab
def primary_scanner def primary_scanner
scanners.first&.second scanners.first&.second
end end
def primary_scanner_order_to(other)
return 1 unless primary_scanner
return -1 unless other.primary_scanner
primary_scanner <=> other.primary_scanner
end
end end
end end
end end
......
...@@ -5,18 +5,26 @@ module Gitlab ...@@ -5,18 +5,26 @@ module Gitlab
module Reports module Reports
module Security module Security
class Scanner class Scanner
ANALYZER_ORDER = {
"bundler_audit" => 1,
"retire.js" => 2,
"gemnasium" => 3,
"gemnasium-maven" => 3,
"gemnasium-python" => 3,
"bandit" => 1,
"semgrep" => 2
}.freeze
attr_accessor :external_id, :name, :vendor attr_accessor :external_id, :name, :vendor
alias_method :key, :external_id
def initialize(external_id:, name:, vendor:) def initialize(external_id:, name:, vendor:)
@external_id = external_id @external_id = external_id
@name = name @name = name
@vendor = vendor @vendor = vendor
end end
def key
external_id
end
def to_hash def to_hash
{ {
external_id: external_id.to_s, external_id: external_id.to_s,
...@@ -28,6 +36,22 @@ module Gitlab ...@@ -28,6 +36,22 @@ module Gitlab
def ==(other) def ==(other)
other.external_id == external_id other.external_id == external_id
end end
def <=>(other)
sort_keys <=> other.sort_keys
end
protected
def sort_keys
@sort_keys ||= [order, external_id, name, vendor]
end
private
def order
ANALYZER_ORDER.fetch(external_id, Float::INFINITY)
end
end end
end end
end end
......
...@@ -408,4 +408,56 @@ RSpec.describe Gitlab::Ci::Reports::Security::Finding do ...@@ -408,4 +408,56 @@ RSpec.describe Gitlab::Ci::Reports::Security::Finding do
end end
end end
end end
describe '#scanner_order_to' do
let(:scanner_1) { build(:ci_reports_security_scanner) }
let(:scanner_2) { build(:ci_reports_security_scanner) }
let(:finding_1) { build(:ci_reports_security_finding, scanner: scanner_1) }
let(:finding_2) { build(:ci_reports_security_finding, scanner: scanner_2) }
subject(:compare_based_on_scanners) { finding_1.scanner_order_to(finding_2) }
context 'when the scanner of the receiver is nil' do
let(:scanner_1) { nil }
context 'when the scanner of the other is nil' do
let(:scanner_2) { nil }
it { is_expected.to be(1) }
end
context 'when the scanner of the other is not nil' do
it { is_expected.to be(1) }
end
end
context 'when the scanner of the receiver is not nil' do
context 'when the scanner of the other is nil' do
let(:scanner_2) { nil }
it { is_expected.to be(-1) }
end
context 'when the scanner of the other is not nil' do
before do
allow(scanner_1).to receive(:<=>).and_return(0)
end
it 'compares two scanners' do
expect(compare_based_on_scanners).to be(0)
expect(scanner_1).to have_received(:<=>).with(scanner_2)
end
end
end
end
describe '#<=>' do
let(:finding_1) { build(:ci_reports_security_finding, severity: :critical, compare_key: 'b') }
let(:finding_2) { build(:ci_reports_security_finding, severity: :critical, compare_key: 'a') }
let(:finding_3) { build(:ci_reports_security_finding, severity: :high) }
subject { [finding_1, finding_2, finding_3].sort }
it { is_expected.to eq([finding_2, finding_1, finding_3]) }
end
end end
...@@ -173,4 +173,52 @@ RSpec.describe Gitlab::Ci::Reports::Security::Report do ...@@ -173,4 +173,52 @@ RSpec.describe Gitlab::Ci::Reports::Security::Report do
it { is_expected.to be_truthy } it { is_expected.to be_truthy }
end end
end end
describe '#primary_scanner_order_to' do
let(:scanner_1) { build(:ci_reports_security_scanner) }
let(:scanner_2) { build(:ci_reports_security_scanner) }
let(:report_1) { described_class.new('sast', pipeline, created_at) }
let(:report_2) { described_class.new('sast', pipeline, created_at) }
subject(:compare_based_on_primary_scanners) { report_1.primary_scanner_order_to(report_2) }
context 'when the primary scanner of the receiver is nil' do
context 'when the primary scanner of the other is nil' do
it { is_expected.to be(1) }
end
context 'when the primary scanner of the other is not nil' do
before do
report_2.add_scanner(scanner_2)
end
it { is_expected.to be(1) }
end
end
context 'when the primary scanner of the receiver is not nil' do
before do
report_1.add_scanner(scanner_1)
end
context 'when the primary scanner of the other is nil' do
let(:scanner_2) { nil }
it { is_expected.to be(-1) }
end
context 'when the primary scanner of the other is not nil' do
before do
report_2.add_scanner(scanner_2)
allow(scanner_1).to receive(:<=>).and_return(0)
end
it 'compares two scanners' do
expect(compare_based_on_primary_scanners).to be(0)
expect(scanner_1).to have_received(:<=>).with(scanner_2)
end
end
end
end
end end
...@@ -91,4 +91,54 @@ RSpec.describe Gitlab::Ci::Reports::Security::Scanner do ...@@ -91,4 +91,54 @@ RSpec.describe Gitlab::Ci::Reports::Security::Scanner do
end end
end end
end end
describe '#<=>' do
using RSpec::Parameterized::TableSyntax
let(:scanner_1) { create(:ci_reports_security_scanner, **scanner_1_attributes) }
let(:scanner_2) { create(:ci_reports_security_scanner, **scanner_2_attributes) }
subject { scanner_1 <=> scanner_2 }
context 'when the `external_id` of the scanners are different' do
where(:scanner_1_attributes, :scanner_2_attributes, :expected_comparison_result) do
{ external_id: 'bundler_audit', name: 'foo', vendor: 'bar' } | { external_id: 'retire.js', name: 'foo', vendor: 'bar' } | -1
{ external_id: 'retire.js', name: 'foo', vendor: 'bar' } | { external_id: 'gemnasium', name: 'foo', vendor: 'bar' } | -1
{ external_id: 'gemnasium', name: 'foo', vendor: 'bar' } | { external_id: 'gemnasium-maven', name: 'foo', vendor: 'bar' } | -1
{ external_id: 'gemnasium-maven', name: 'foo', vendor: 'bar' } | { external_id: 'gemnasium-python', name: 'foo', vendor: 'bar' } | -1
{ external_id: 'gemnasium-python', name: 'foo', vendor: 'bar' } | { external_id: 'bandit', name: 'foo', vendor: 'bar' } | 1
{ external_id: 'bandit', name: 'foo', vendor: 'bar' } | { external_id: 'semgrep', name: 'foo', vendor: 'bar' } | -1
{ external_id: 'semgrep', name: 'foo', vendor: 'bar' } | { external_id: 'unknown', name: 'foo', vendor: 'bar' } | -1
end
with_them do
it { is_expected.to eq(expected_comparison_result) }
end
end
context 'when the `external_id` of the scanners are equal' do
context 'when the `name` of the scanners are different' do
where(:scanner_1_attributes, :scanner_2_attributes, :expected_comparison_result) do
{ external_id: 'gemnasium', name: 'a', vendor: 'bar' } | { external_id: 'gemnasium', name: 'b', vendor: 'bar' } | -1
{ external_id: 'gemnasium', name: 'd', vendor: 'bar' } | { external_id: 'gemnasium', name: 'c', vendor: 'bar' } | 1
end
with_them do
it { is_expected.to eq(expected_comparison_result) }
end
end
context 'when the `name` of the scanners are equal' do
where(:scanner_1_attributes, :scanner_2_attributes, :expected_comparison_result) do
{ external_id: 'gemnasium', name: 'foo', vendor: 'a' } | { external_id: 'gemnasium', name: 'foo', vendor: 'a' } | 0 # rubocop:disable Lint/BinaryOperatorWithIdenticalOperands
{ external_id: 'gemnasium', name: 'foo', vendor: 'a' } | { external_id: 'gemnasium', name: 'foo', vendor: 'b' } | -1
{ external_id: 'gemnasium', name: 'foo', vendor: 'b' } | { external_id: 'gemnasium', name: 'foo', vendor: 'a' } | 1
end
with_them do
it { is_expected.to eq(expected_comparison_result) }
end
end
end
end
end end
...@@ -294,7 +294,7 @@ RSpec.describe Security::MergeReportsService, '#execute' do ...@@ -294,7 +294,7 @@ RSpec.describe Security::MergeReportsService, '#execute' do
subject(:merged_report) { described_class.new(pre_merged_report, retirejs_report).execute } subject(:merged_report) { described_class.new(pre_merged_report, retirejs_report).execute }
it 'keeps the finding from `retirejs` as it has higher priority', pending: 'https://gitlab.com/gitlab-org/gitlab/-/issues/296520' do it 'keeps the finding from `retirejs` as it has higher priority' do
expect(merged_report.findings).to include(finding_id_5) expect(merged_report.findings).to include(finding_id_5)
end end
end end
......
...@@ -4,9 +4,9 @@ require 'spec_helper' ...@@ -4,9 +4,9 @@ require 'spec_helper'
RSpec.describe Security::StoreGroupedScansService do RSpec.describe Security::StoreGroupedScansService do
let_it_be(:report_type) { :dast } let_it_be(:report_type) { :dast }
let_it_be(:build_1) { create(:ee_ci_build, name: 'Report 1') } let_it_be(:build_1) { create(:ee_ci_build) }
let_it_be(:build_2) { create(:ee_ci_build, name: 'Report 3') } let_it_be(:build_2) { create(:ee_ci_build) }
let_it_be(:build_3) { create(:ee_ci_build, name: 'Report 2') } let_it_be(:build_3) { create(:ee_ci_build) }
let_it_be(:artifact_1) { create(:ee_ci_job_artifact, report_type, job: build_1) } let_it_be(:artifact_1) { create(:ee_ci_job_artifact, report_type, job: build_1) }
let_it_be(:artifact_2) { create(:ee_ci_job_artifact, report_type, job: build_2) } let_it_be(:artifact_2) { create(:ee_ci_job_artifact, report_type, job: build_2) }
let_it_be(:artifact_3) { create(:ee_ci_job_artifact, report_type, job: build_3) } let_it_be(:artifact_3) { create(:ee_ci_job_artifact, report_type, job: build_3) }
...@@ -61,8 +61,7 @@ RSpec.describe Security::StoreGroupedScansService do ...@@ -61,8 +61,7 @@ RSpec.describe Security::StoreGroupedScansService do
end end
context 'schema validation' do context 'schema validation' do
let(:mock_scanner) { instance_double(::Gitlab::Ci::Reports::Security::Scanner, external_id: 'unknown') } let(:mock_report) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner_order_to: -1) }
let(:mock_report) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner: mock_scanner) }
before do before do
allow(artifact_1).to receive(:security_report).and_return(mock_report) allow(artifact_1).to receive(:security_report).and_return(mock_report)
...@@ -73,9 +72,9 @@ RSpec.describe Security::StoreGroupedScansService do ...@@ -73,9 +72,9 @@ RSpec.describe Security::StoreGroupedScansService do
it 'accesses the validated security reports' do it 'accesses the validated security reports' do
store_scan_group store_scan_group
expect(artifact_1).to have_received(:security_report).with(validate: true) expect(artifact_1).to have_received(:security_report).with(validate: true).once
expect(artifact_2).to have_received(:security_report).with(validate: true) expect(artifact_2).to have_received(:security_report).with(validate: true).twice
expect(artifact_3).to have_received(:security_report).with(validate: true) expect(artifact_3).to have_received(:security_report).with(validate: true).once
end end
end end
...@@ -84,8 +83,8 @@ RSpec.describe Security::StoreGroupedScansService do ...@@ -84,8 +83,8 @@ RSpec.describe Security::StoreGroupedScansService do
store_scan_group store_scan_group
expect(Security::StoreScanService).to have_received(:execute).with(artifact_1, empty_set, false).ordered expect(Security::StoreScanService).to have_received(:execute).with(artifact_1, empty_set, false).ordered
expect(Security::StoreScanService).to have_received(:execute).with(artifact_3, empty_set, true).ordered
expect(Security::StoreScanService).to have_received(:execute).with(artifact_2, empty_set, true).ordered expect(Security::StoreScanService).to have_received(:execute).with(artifact_2, empty_set, true).ordered
expect(Security::StoreScanService).to have_received(:execute).with(artifact_3, empty_set, true).ordered
end end
end end
...@@ -94,12 +93,9 @@ RSpec.describe Security::StoreGroupedScansService do ...@@ -94,12 +93,9 @@ RSpec.describe Security::StoreGroupedScansService do
let_it_be(:sast_artifact_2) { create(:ee_ci_job_artifact, :sast, job: create(:ee_ci_build)) } let_it_be(:sast_artifact_2) { create(:ee_ci_job_artifact, :sast, job: create(:ee_ci_build)) }
let_it_be(:sast_artifact_3) { create(:ee_ci_job_artifact, :sast, job: create(:ee_ci_build)) } let_it_be(:sast_artifact_3) { create(:ee_ci_job_artifact, :sast, job: create(:ee_ci_build)) }
let(:scanner_1) { instance_double(::Gitlab::Ci::Reports::Security::Scanner, external_id: 'unknown') } let(:mock_report_1) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner_order_to: 1) }
let(:scanner_2) { instance_double(::Gitlab::Ci::Reports::Security::Scanner, external_id: 'bandit') } let(:mock_report_2) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner_order_to: -1) }
let(:scanner_3) { instance_double(::Gitlab::Ci::Reports::Security::Scanner, external_id: 'semgrep') } let(:mock_report_3) { instance_double(::Gitlab::Ci::Reports::Security::Report) }
let(:mock_report_1) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner: scanner_1) }
let(:mock_report_2) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner: scanner_2) }
let(:mock_report_3) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner: scanner_3) }
let(:artifacts) { [sast_artifact_1, sast_artifact_2, sast_artifact_3] } let(:artifacts) { [sast_artifact_1, sast_artifact_2, sast_artifact_3] }
before do before do
...@@ -119,28 +115,21 @@ RSpec.describe Security::StoreGroupedScansService do ...@@ -119,28 +115,21 @@ RSpec.describe Security::StoreGroupedScansService do
context 'when the artifacts are dependency_scanning' do context 'when the artifacts are dependency_scanning' do
let(:report_type) { :dependency_scanning } let(:report_type) { :dependency_scanning }
let(:build_4) { create(:ee_ci_build, name: 'Report 0') } let(:mock_report_1) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner_order_to: 1) }
let(:artifact_4) { create(:ee_ci_job_artifact, report_type, job: build_4) } let(:mock_report_2) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner_order_to: -1) }
let(:scanner_1) { instance_double(::Gitlab::Ci::Reports::Security::Scanner, external_id: 'this is an unknown id') } let(:mock_report_3) { instance_double(::Gitlab::Ci::Reports::Security::Report) }
let(:scanner_2) { instance_double(::Gitlab::Ci::Reports::Security::Scanner, external_id: 'bundler_audit') } let(:artifacts) { [artifact_1, artifact_2, artifact_3] }
let(:scanner_3) { instance_double(::Gitlab::Ci::Reports::Security::Scanner, external_id: 'retire.js') }
let(:mock_report_1) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner: scanner_1) }
let(:mock_report_2) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner: scanner_2) }
let(:mock_report_3) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner: scanner_3) }
let(:artifacts) { [artifact_1, artifact_2, artifact_3, artifact_4] }
before do before do
allow(artifact_1).to receive(:security_report).and_return(mock_report_1) allow(artifact_1).to receive(:security_report).and_return(mock_report_1)
allow(artifact_2).to receive(:security_report).and_return(mock_report_2) allow(artifact_2).to receive(:security_report).and_return(mock_report_2)
allow(artifact_3).to receive(:security_report).and_return(mock_report_3) allow(artifact_3).to receive(:security_report).and_return(mock_report_3)
allow(artifact_4).to receive(:security_report).and_return(mock_report_2)
end end
it 'calls the Security::StoreScanService with ordered artifacts' do it 'calls the Security::StoreScanService with ordered artifacts' do
store_scan_group store_scan_group
expect(Security::StoreScanService).to have_received(:execute).with(artifact_4, empty_set, false).ordered expect(Security::StoreScanService).to have_received(:execute).with(artifact_2, empty_set, false).ordered
expect(Security::StoreScanService).to have_received(:execute).with(artifact_2, empty_set, true).ordered
expect(Security::StoreScanService).to have_received(:execute).with(artifact_3, empty_set, true).ordered expect(Security::StoreScanService).to have_received(:execute).with(artifact_3, empty_set, true).ordered
expect(Security::StoreScanService).to have_received(:execute).with(artifact_1, empty_set, true).ordered expect(Security::StoreScanService).to have_received(:execute).with(artifact_1, empty_set, true).ordered
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment