Commit cf1ba80c authored by Saikat Sarkar's avatar Saikat Sarkar

Remove N+1 query for updating Vulnerability links and identifier_objects

parent f19494db
...@@ -6,7 +6,7 @@ module Security ...@@ -6,7 +6,7 @@ module Security
class StoreReportService < ::BaseService class StoreReportService < ::BaseService
include Gitlab::Utils::StrongMemoize include Gitlab::Utils::StrongMemoize
attr_reader :pipeline, :report, :project attr_reader :pipeline, :report, :project, :vulnerability_finding_to_finding_map
BATCH_SIZE = 1000 BATCH_SIZE = 1000
...@@ -14,6 +14,7 @@ module Security ...@@ -14,6 +14,7 @@ module Security
@pipeline = pipeline @pipeline = pipeline
@report = report @report = report
@project = @pipeline.project @project = @pipeline.project
@vulnerability_finding_to_finding_map = {}
end end
def execute def execute
...@@ -34,6 +35,12 @@ module Security ...@@ -34,6 +35,12 @@ module Security
pipeline.vulnerability_findings.report_type(@report.type).any? pipeline.vulnerability_findings.report_type(@report.type).any?
end end
def optimize_sql_query_for_security_report_enabled?
strong_memoize(:optimize_sql_query_for_security_report_enabled) do
Feature.enabled?(:optimize_sql_query_for_security_report, project)
end
end
def create_all_vulnerabilities! def create_all_vulnerabilities!
# Look for existing Findings using UUID # Look for existing Findings using UUID
finding_uuids = @report.findings.map(&:uuid) finding_uuids = @report.findings.map(&:uuid)
...@@ -41,11 +48,20 @@ module Security ...@@ -41,11 +48,20 @@ module Security
.where(uuid: finding_uuids) # rubocop: disable CodeReuse/ActiveRecord .where(uuid: finding_uuids) # rubocop: disable CodeReuse/ActiveRecord
.to_h { |vf| [vf.uuid, vf] } .to_h { |vf| [vf.uuid, vf] }
update_vulnerability_scanners!(@report.findings) if Feature.enabled?(:optimize_sql_query_for_security_report, project) update_vulnerability_scanners!(@report.findings) if optimize_sql_query_for_security_report_enabled?
@report.findings.map do |finding| vulnerability_ids = @report.findings.map do |finding|
create_vulnerability_finding(vulnerability_findings_by_uuid, finding)&.id create_vulnerability_finding(vulnerability_findings_by_uuid, finding)&.id
end.compact.uniq end.compact.uniq
if optimize_sql_query_for_security_report_enabled?
update_vulnerability_links_info
create_vulnerability_pipeline_objects
update_vulnerabilities_identifiers
update_vulnerabilities_finding_identifiers
end
vulnerability_ids
end end
def mark_as_resolved_except(vulnerability_ids) def mark_as_resolved_except(vulnerability_ids)
...@@ -67,7 +83,9 @@ module Security ...@@ -67,7 +83,9 @@ module Security
vulnerability_finding = vulnerability_findings_by_uuid[finding.uuid] || vulnerability_finding = vulnerability_findings_by_uuid[finding.uuid] ||
find_or_create_vulnerability_finding(finding, vulnerability_params.merge(entity_params)) find_or_create_vulnerability_finding(finding, vulnerability_params.merge(entity_params))
update_vulnerability_scanner(finding) unless Feature.enabled?(:optimize_sql_query_for_security_report, project) vulnerability_finding_to_finding_map[vulnerability_finding] = finding
update_vulnerability_scanner(finding) unless optimize_sql_query_for_security_report_enabled?
update_vulnerability_finding(vulnerability_finding, vulnerability_params) update_vulnerability_finding(vulnerability_finding, vulnerability_params)
reset_remediations_for(vulnerability_finding, finding) reset_remediations_for(vulnerability_finding, finding)
...@@ -77,15 +95,16 @@ module Security ...@@ -77,15 +95,16 @@ module Security
update_finding_signatures(finding, vulnerability_finding) update_finding_signatures(finding, vulnerability_finding)
end end
# The maximum number of identifiers is not used in validation unless optimize_sql_query_for_security_report_enabled?
# we just want to ignore the rest if a finding has more than that. # The maximum number of identifiers is not used in validation
finding.identifiers.take(Vulnerabilities::Finding::MAX_NUMBER_OF_IDENTIFIERS).map do |identifier| # rubocop: disable CodeReuse/ActiveRecord # we just want to ignore the rest if a finding has more than that.
create_or_update_vulnerability_identifier_object(vulnerability_finding, identifier) finding.identifiers.take(Vulnerabilities::Finding::MAX_NUMBER_OF_IDENTIFIERS).map do |identifier| # rubocop: disable CodeReuse/ActiveRecord
end create_or_update_vulnerability_identifier_object(vulnerability_finding, identifier)
end
create_or_update_vulnerability_links(finding, vulnerability_finding)
create_vulnerability_pipeline_object(vulnerability_finding, pipeline) create_or_update_vulnerability_links(finding, vulnerability_finding)
create_vulnerability_pipeline_object(vulnerability_finding, pipeline)
end
create_vulnerability(vulnerability_finding, pipeline) create_vulnerability(vulnerability_finding, pipeline)
end end
...@@ -235,6 +254,67 @@ module Security ...@@ -235,6 +254,67 @@ module Security
vulnerability_finding.update!(update_params) vulnerability_finding.update!(update_params)
end end
def update_vulnerabilities_identifiers
vulnerability_finding_to_finding_map.keys.in_groups_of(BATCH_SIZE, false) do |vulnerability_findings|
identifier_object_records = get_vulnerability_identifier_objects_for(vulnerability_findings)
insert_new_vulnerability_identifiers_for(identifier_object_records)
update_existing_vulnerability_identifiers_for(identifier_object_records)
end
rescue StandardError => e
Gitlab::ErrorTracking.track_exception(e)
ensure
clear_memoization(:identifiers_objects)
clear_memoization(:existing_identifiers_objects)
end
def get_vulnerability_identifier_objects_for(vulnerability_findings)
timestamps = { created_at: Time.current, updated_at: Time.current }
vulnerability_findings.flat_map do |vulnerability_finding|
finding = vulnerability_finding_to_finding_map[vulnerability_finding]
finding.identifiers.take(Vulnerabilities::Finding::MAX_NUMBER_OF_IDENTIFIERS).map do |identifier|
identifier_object = identifiers_objects[identifier.key]
identifier_object.attributes.with_indifferent_access.merge(**timestamps)
.merge(identifier.to_hash).compact
end
end
end
def insert_new_vulnerability_identifiers_for(identifier_object_records)
identifier_object_records_without_id = identifier_object_records.select { |identifier| identifier[:id].nil? }.uniq
Vulnerabilities::Identifier.insert_all(identifier_object_records_without_id) if identifier_object_records_without_id.present?
end
def update_existing_vulnerability_identifiers_for(identifier_object_records)
identifier_object_records_with_id = identifier_object_records.select { |identifier| identifier[:id].present? }.uniq
Vulnerabilities::Identifier.upsert_all(identifier_object_records_with_id) if identifier_object_records_with_id.present?
end
def update_vulnerabilities_finding_identifiers
vulnerability_finding_to_finding_map.keys.in_groups_of(BATCH_SIZE, false) do |vulnerability_findings|
finding_identifier_records = get_finding_identifier_objects_for(vulnerability_findings)
finding_identifier_records.uniq!
Vulnerabilities::FindingIdentifier.insert_all(finding_identifier_records) if finding_identifier_records.present?
end
rescue StandardError => e
Gitlab::ErrorTracking.track_exception(e)
end
def get_finding_identifier_objects_for(vulnerability_findings)
timestamps = { created_at: Time.current, updated_at: Time.current }
vulnerability_findings.flat_map do |vulnerability_finding|
finding = vulnerability_finding_to_finding_map[vulnerability_finding]
finding.identifiers.take(Vulnerabilities::Finding::MAX_NUMBER_OF_IDENTIFIERS).map do |identifier|
identifier_object = identifiers_objects[identifier.key]
next nil unless identifier_object.id
{ occurrence_id: vulnerability_finding.id, identifier_id: identifier_object.id, **timestamps }.compact
end.compact
end
end
def create_or_update_vulnerability_identifier_object(vulnerability_finding, identifier) def create_or_update_vulnerability_identifier_object(vulnerability_finding, identifier)
identifier_object = identifiers_objects[identifier.key] identifier_object = identifiers_objects[identifier.key]
vulnerability_finding.finding_identifiers.find_or_create_by!(identifier: identifier_object) vulnerability_finding.finding_identifiers.find_or_create_by!(identifier: identifier_object)
...@@ -242,6 +322,22 @@ module Security ...@@ -242,6 +322,22 @@ module Security
rescue ActiveRecord::RecordNotUnique rescue ActiveRecord::RecordNotUnique
end end
def update_vulnerability_links_info
timestamps = { created_at: Time.current, updated_at: Time.current }
vulnerability_finding_to_finding_map.each_slice(BATCH_SIZE) do |vf_to_findings|
records = vf_to_findings.flat_map do |vulnerability_finding, finding|
finding.links.map { |link| { vulnerability_occurrence_id: vulnerability_finding.id, **link.to_hash, **timestamps } }
end
records.uniq!
Vulnerabilities::FindingLink.insert_all(records) if records.present?
end
rescue StandardError => e
Gitlab::ErrorTracking.track_exception(e)
end
def create_or_update_vulnerability_links(finding, vulnerability_finding) def create_or_update_vulnerability_links(finding, vulnerability_finding)
return if finding.links.blank? return if finding.links.blank?
...@@ -279,6 +375,22 @@ module Security ...@@ -279,6 +375,22 @@ module Security
@project.vulnerability_remediations.new(summary: remediation.summary, file: remediation.diff_file, checksum: remediation.checksum) @project.vulnerability_remediations.new(summary: remediation.summary, file: remediation.diff_file, checksum: remediation.checksum)
end end
def create_vulnerability_pipeline_objects
timestamps = { created_at: Time.current, updated_at: Time.current }
vulnerability_finding_to_finding_map.keys.in_groups_of(BATCH_SIZE, false) do |vulnerability_findings|
records = vulnerability_findings.map do |vulnerability_finding|
{ occurrence_id: vulnerability_finding.id, pipeline_id: pipeline.id, **timestamps }
end
records.uniq!
Vulnerabilities::FindingPipeline.insert_all(records) if records.present?
end
rescue StandardError => e
Gitlab::ErrorTracking.track_exception(e)
end
def create_vulnerability_pipeline_object(vulnerability_finding, pipeline) def create_vulnerability_pipeline_object(vulnerability_finding, pipeline)
vulnerability_finding.finding_pipelines.find_or_create_by!(pipeline: pipeline) vulnerability_finding.finding_pipelines.find_or_create_by!(pipeline: pipeline)
rescue ActiveRecord::RecordNotUnique rescue ActiveRecord::RecordNotUnique
......
...@@ -48,11 +48,11 @@ RSpec.describe Security::StoreReportService, '#execute' do ...@@ -48,11 +48,11 @@ RSpec.describe Security::StoreReportService, '#execute' do
end end
context 'for different security reports' do context 'for different security reports' do
where(:case_name, :trait, :scanners, :identifiers, :findings, :finding_identifiers, :finding_pipelines, :remediations, :signatures) do where(:case_name, :trait, :scanners, :identifiers, :findings, :finding_identifiers, :finding_pipelines, :remediations, :signatures, :finding_links) do
'with SAST report' | :sast | 1 | 6 | 5 | 7 | 5 | 0 | 2 'with SAST report' | :sast | 1 | 6 | 5 | 7 | 5 | 0 | 2 | 0
'with exceeding identifiers' | :with_exceeding_identifiers | 1 | 20 | 1 | 20 | 1 | 0 | 0 'with exceeding identifiers' | :with_exceeding_identifiers | 1 | 20 | 1 | 20 | 1 | 0 | 0 | 0
'with Dependency Scanning report' | :dependency_scanning_remediation | 1 | 3 | 2 | 3 | 2 | 1 | 0 'with Dependency Scanning report' | :dependency_scanning_remediation | 1 | 3 | 2 | 3 | 2 | 1 | 0 | 6
'with Container Scanning report' | :container_scanning | 1 | 8 | 8 | 8 | 8 | 0 | 0 'with Container Scanning report' | :container_scanning | 1 | 8 | 8 | 8 | 8 | 0 | 0 | 8
end end
with_them do with_them do
...@@ -68,6 +68,10 @@ RSpec.describe Security::StoreReportService, '#execute' do ...@@ -68,6 +68,10 @@ RSpec.describe Security::StoreReportService, '#execute' do
expect { subject }.to change { Vulnerabilities::Finding.count }.by(findings) expect { subject }.to change { Vulnerabilities::Finding.count }.by(findings)
end end
it 'inserts all finding links' do
expect { subject }.to change { Vulnerabilities::FindingLink.count }.by(finding_links)
end
it 'inserts all finding identifiers (join model)' do it 'inserts all finding identifiers (join model)' do
expect { subject }.to change { Vulnerabilities::FindingIdentifier.count }.by(finding_identifiers) expect { subject }.to change { Vulnerabilities::FindingIdentifier.count }.by(finding_identifiers)
end end
...@@ -105,6 +109,9 @@ RSpec.describe Security::StoreReportService, '#execute' do ...@@ -105,6 +109,9 @@ RSpec.describe Security::StoreReportService, '#execute' do
context 'when N+1 database queries have been removed' do context 'when N+1 database queries have been removed' do
let(:trait) { :sast } let(:trait) { :sast }
let(:bandit_scanner) { build(:ci_reports_security_scanner, external_id: 'bandit', name: 'Bandit') } let(:bandit_scanner) { build(:ci_reports_security_scanner, external_id: 'bandit', name: 'Bandit') }
let(:link) { build(:ci_reports_security_link) }
let(:bandit_finding) { build(:ci_reports_security_finding, scanner: bandit_scanner, links: [link]) }
let(:vulnerability_findings) { [] }
subject { described_class.new(pipeline, report) } subject { described_class.new(pipeline, report) }
...@@ -113,9 +120,60 @@ RSpec.describe Security::StoreReportService, '#execute' do ...@@ -113,9 +120,60 @@ RSpec.describe Security::StoreReportService, '#execute' do
control_count = ActiveRecord::QueryRecorder.new(skip_cached: false) { subject.send(:update_vulnerability_scanners!, report.findings) }.count control_count = ActiveRecord::QueryRecorder.new(skip_cached: false) { subject.send(:update_vulnerability_scanners!, report.findings) }.count
5.times { report.add_finding(build(:ci_reports_security_finding, scanner: bandit_scanner)) } 2.times { add_finding_to_report }
expect { subject.send(:update_vulnerability_scanners!, report.findings) }.not_to exceed_query_limit(control_count)
end
it "avoids N+1 database queries for updating finding_links", :use_sql_query_cache do
report.add_scanner(bandit_scanner)
add_finding_to_report
stub_vulnerability_finding_id_to_finding_map
control_count = ActiveRecord::QueryRecorder.new(skip_cached: false) { subject.send(:update_vulnerability_links_info) }.count
2.times { add_finding_to_report }
stub_vulnerability_finding_id_to_finding_map
expect { subject.send(:update_vulnerability_links_info) }.not_to exceed_query_limit(control_count)
end
it "avoids N+1 database queries for updating vulnerabilities_identifiers", :use_sql_query_cache do
report.add_scanner(bandit_scanner)
add_finding_to_report
expect { described_class.new(pipeline, report).send(:update_vulnerability_scanners!, report.findings) }.not_to exceed_query_limit(control_count) stub_vulnerability_finding_id_to_finding_map
stub_vulnerability_findings
control_count = ActiveRecord::QueryRecorder.new(skip_cached: false) { subject.send(:update_vulnerabilities_identifiers) }.count
2.times { add_finding_to_report }
stub_vulnerability_finding_id_to_finding_map
stub_vulnerability_findings
expect { subject.send(:update_vulnerabilities_identifiers) }.not_to exceed_query_limit(control_count)
end
def add_finding_to_report
report.add_finding(bandit_finding)
end
def stub_vulnerability_findings
allow(subject).to receive(:vulnerability_findings)
.and_return(vulnerability_findings)
end
def stub_vulnerability_finding_id_to_finding_map
allow(subject).to receive(:vulnerability_finding_id_to_finding_map)
.and_return(vulnerability_finding_id_to_finding_map)
end
def vulnerability_finding_id_to_finding_map
vulnerability_findings.clear
report.findings.to_h do |finding|
vulnerability_finding = create(:vulnerabilities_finding)
vulnerability_findings << vulnerability_finding
[vulnerability_finding.id, finding]
end
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment