Commit 5e9410e9 authored by Etienne Baqué's avatar Etienne Baqué

Merge branch 'improve_the_execution_time_for_store_scans_worker' into 'master'

Improve the performance of StoreScansWorker

See merge request gitlab-org/gitlab!79394
parents 1ba3d52a 9716738c
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
module Security module Security
class Finding < ApplicationRecord class Finding < ApplicationRecord
include IgnorableColumns include IgnorableColumns
include EachBatch
self.table_name = 'security_findings' self.table_name = 'security_findings'
......
...@@ -3,15 +3,18 @@ ...@@ -3,15 +3,18 @@
module Security module Security
# This service class stores the findings metadata for all pipelines. # This service class stores the findings metadata for all pipelines.
class StoreFindingsMetadataService < ::BaseService class StoreFindingsMetadataService < ::BaseService
attr_reader :security_scan, :report BATCH_SIZE = 50
def self.execute(security_scan, report) attr_reader :security_scan, :report, :deduplicated_finding_uuids
new(security_scan, report).execute
def self.execute(security_scan, report, deduplicated_finding_uuids)
new(security_scan, report, deduplicated_finding_uuids).execute
end end
def initialize(security_scan, report) def initialize(security_scan, report, deduplicated_finding_uuids)
@security_scan = security_scan @security_scan = security_scan
@report = report @report = report
@deduplicated_finding_uuids = deduplicated_finding_uuids
end end
def execute def execute
...@@ -30,28 +33,39 @@ module Security ...@@ -30,28 +33,39 @@ module Security
end end
def store_findings def store_findings
report_findings.each { |report_finding| store_finding!(report_finding) } report_findings.each_slice(BATCH_SIZE) { |batch| store_finding_batch(batch) }
end end
def report_findings def report_findings
report.findings.select(&:valid?) report.findings.select(&:valid?)
end end
def store_finding!(report_finding) def store_finding_batch(batch)
security_scan.findings.create!(finding_data(report_finding)) batch.map(&method(:finding_data))
.then(&method(:import_batch))
end
def import_batch(report_finding_data)
Security::Finding.insert_all(report_finding_data)
end end
def finding_data(report_finding) def finding_data(report_finding)
{ {
scan_id: security_scan.id,
severity: report_finding.severity, severity: report_finding.severity,
confidence: report_finding.confidence, confidence: report_finding.confidence,
uuid: report_finding.uuid, uuid: report_finding.uuid,
overridden_uuid: report_finding.overridden_uuid, overridden_uuid: report_finding.overridden_uuid,
project_fingerprint: report_finding.project_fingerprint, project_fingerprint: report_finding.project_fingerprint,
scanner: persisted_scanner_for(report_finding.scanner) scanner_id: persisted_scanner_for(report_finding.scanner).id,
deduplicated: deduplicated?(report_finding)
} }
end end
def deduplicated?(report_finding)
deduplicated_finding_uuids.include?(report_finding.uuid)
end
def persisted_scanner_for(report_scanner) def persisted_scanner_for(report_scanner)
existing_scanners[report_scanner.key] ||= create_scanner!(report_scanner) existing_scanners[report_scanner.key] ||= create_scanner!(report_scanner)
end end
......
...@@ -8,6 +8,8 @@ ...@@ -8,6 +8,8 @@
# @param deduplicate [Boolean] attribute to force running deduplication logic. # @param deduplicate [Boolean] attribute to force running deduplication logic.
module Security module Security
class StoreScanService class StoreScanService
DEDUPLICATE_BATCH_SIZE = 50
def self.execute(artifact, known_keys, deduplicate) def self.execute(artifact, known_keys, deduplicate)
new(artifact, known_keys, deduplicate).execute new(artifact, known_keys, deduplicate).execute
end end
...@@ -49,8 +51,11 @@ module Security ...@@ -49,8 +51,11 @@ module Security
end end
def store_findings def store_findings
StoreFindingsMetadataService.execute(security_scan, security_report) StoreFindingsMetadataService.execute(security_scan, security_report, register_finding_keys).then do |result|
deduplicate_findings? ? update_deduplicated_findings : register_finding_keys # If `StoreFindingsMetadataService` returns error, it means the findings
# have already been stored before so we may re-run the deduplication logic.
update_deduplicated_findings if result[:status] == :error && deduplicate_findings?
end
deduplicate_findings? deduplicate_findings?
end end
...@@ -65,10 +70,19 @@ module Security ...@@ -65,10 +70,19 @@ module Security
def update_deduplicated_findings def update_deduplicated_findings
Security::Scan.transaction do Security::Scan.transaction do
security_scan.findings.update_all(deduplicated: false) mark_all_findings_as_duplicate
mark_unique_findings
end
end
def mark_all_findings_as_duplicate
security_scan.findings.deduplicated.each_batch(of: DEDUPLICATE_BATCH_SIZE) { |batch| batch.update_all(deduplicated: false) }
end
def mark_unique_findings
register_finding_keys.each_slice(DEDUPLICATE_BATCH_SIZE) do |batch|
security_scan.findings security_scan.findings
.by_uuid(register_finding_keys) .by_uuid(batch)
.update_all(deduplicated: true) .update_all(deduplicated: true)
end end
end end
......
...@@ -7,18 +7,20 @@ RSpec.describe Security::StoreFindingsMetadataService do ...@@ -7,18 +7,20 @@ RSpec.describe Security::StoreFindingsMetadataService do
let_it_be(:project) { security_scan.project } let_it_be(:project) { security_scan.project }
let_it_be(:security_finding_1) { build(:ci_reports_security_finding) } let_it_be(:security_finding_1) { build(:ci_reports_security_finding) }
let_it_be(:security_finding_2) { build(:ci_reports_security_finding) } let_it_be(:security_finding_2) { build(:ci_reports_security_finding) }
let_it_be(:security_finding_3) { build(:ci_reports_security_finding, uuid: nil) } let_it_be(:security_finding_3) { build(:ci_reports_security_finding) }
let_it_be(:security_finding_4) { build(:ci_reports_security_finding, uuid: nil) }
let_it_be(:deduplicated_finding_uuids) { [security_finding_1.uuid, security_finding_3.uuid] }
let_it_be(:security_scanner) { build(:ci_reports_security_scanner) } let_it_be(:security_scanner) { build(:ci_reports_security_scanner) }
let_it_be(:report) do let_it_be(:report) do
build( build(
:ci_reports_security_report, :ci_reports_security_report,
findings: [security_finding_1, security_finding_2, security_finding_3], findings: [security_finding_1, security_finding_2, security_finding_3, security_finding_4],
scanners: [security_scanner] scanners: [security_scanner]
) )
end end
describe '#execute' do describe '#execute' do
let(:service_object) { described_class.new(security_scan, report) } let(:service_object) { described_class.new(security_scan, report, deduplicated_finding_uuids) }
subject(:store_findings) { service_object.execute } subject(:store_findings) { service_object.execute }
...@@ -27,6 +29,10 @@ RSpec.describe Security::StoreFindingsMetadataService do ...@@ -27,6 +29,10 @@ RSpec.describe Security::StoreFindingsMetadataService do
create(:security_finding, scan: security_scan) create(:security_finding, scan: security_scan)
end end
it 'returns error message' do
expect(store_findings).to eq({ status: :error, message: "Findings are already stored!" })
end
it 'does not create new findings in database' do it 'does not create new findings in database' do
expect { store_findings }.not_to change { Security::Finding.count } expect { store_findings }.not_to change { Security::Finding.count }
end end
...@@ -38,11 +44,14 @@ RSpec.describe Security::StoreFindingsMetadataService do ...@@ -38,11 +44,14 @@ RSpec.describe Security::StoreFindingsMetadataService do
end end
it 'creates the security finding entries in database' do it 'creates the security finding entries in database' do
expect { store_findings }.to change { security_scan.findings.count }.by(2) store_findings
.and change { security_scan.findings.first&.severity }.to(security_finding_1.severity.to_s)
.and change { security_scan.findings.first&.confidence }.to(security_finding_1.confidence.to_s) expect(security_scan.findings.reload.as_json(only: [:uuid, :deduplicated]))
.and change { security_scan.findings.first&.uuid }.to(security_finding_1.uuid) .to match_array([
.and change { security_scan.findings.last&.uuid }.to(security_finding_2.uuid) { "uuid" => security_finding_1.uuid, "deduplicated" => true },
{ "uuid" => security_finding_2.uuid, "deduplicated" => false },
{ "uuid" => security_finding_3.uuid, "deduplicated" => true }
])
end end
context 'when the scanners already exist in the database' do context 'when the scanners already exist in the database' do
......
...@@ -56,7 +56,7 @@ RSpec.describe Security::StoreScanService do ...@@ -56,7 +56,7 @@ RSpec.describe Security::StoreScanService do
subject(:store_scan) { service_object.execute } subject(:store_scan) { service_object.execute }
before do before do
allow(Security::StoreFindingsMetadataService).to receive(:execute) allow(Security::StoreFindingsMetadataService).to receive(:execute).and_return(status: :success)
known_keys.add(finding_key) known_keys.add(finding_key)
end end
...@@ -170,7 +170,18 @@ RSpec.describe Security::StoreScanService do ...@@ -170,7 +170,18 @@ RSpec.describe Security::StoreScanService do
context 'when the `deduplicate` param is set as true' do context 'when the `deduplicate` param is set as true' do
let(:deduplicate) { true } let(:deduplicate) { true }
it 'does not change the deduplicated flag of duplicated finding false' do context 'when the `StoreFindingsMetadataService` returns success' do
it 'does not run the re-deduplicate logic' do
expect { store_scan }.not_to change { unique_security_finding.reload.deduplicated }.from(false)
end
end
context 'when the `StoreFindingsMetadataService` returns error' do
before do
allow(Security::StoreFindingsMetadataService).to receive(:execute).and_return({ status: :error })
end
it 'does not change the deduplicated flag of duplicated finding from false' do
expect { store_scan }.not_to change { duplicated_security_finding.reload.deduplicated }.from(false) expect { store_scan }.not_to change { duplicated_security_finding.reload.deduplicated }.from(false)
end end
...@@ -179,6 +190,7 @@ RSpec.describe Security::StoreScanService do ...@@ -179,6 +190,7 @@ RSpec.describe Security::StoreScanService do
end end
end end
end end
end
context 'when the security scan does not exist for the artifact' do context 'when the security scan does not exist for the artifact' do
let(:unique_finding_attribute) do let(:unique_finding_attribute) do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment