Commit 3be15804 authored by Mehmet Emin INAC's avatar Mehmet Emin INAC

Fix the deduplication logic in `StoreScanService`

This change also unifies the way we compare findings with each other in
`MergeReportsService` and `StoreScanService`.
parent babfbe0f
......@@ -11,20 +11,6 @@ module Security
"unknown" => 999
}.freeze
IdentifierKey = Struct.new(:location_sha, :identifier_type, :identifier_value) do
def ==(other)
location_sha == other.location_sha &&
identifier_type == other.identifier_type &&
identifier_value == other.identifier_value
end
def hash
location_sha.hash ^ identifier_type.hash ^ identifier_value.hash
end
alias_method :eql?, :==
end
def initialize(*source_reports)
@source_reports = source_reports
# temporary sort https://gitlab.com/gitlab-org/gitlab/-/issues/213839
......@@ -70,41 +56,13 @@ module Security
@target_report.scanned_resources.concat(source_report.scanned_resources).uniq!
end
# this method mutates the passed seen_identifiers set
def check_or_mark_seen_identifier!(identifier, location_fingerprint, seen_identifiers)
key = IdentifierKey.new(location_fingerprint, identifier.external_type, identifier.external_id)
if seen_identifiers.include?(key)
true
else
seen_identifiers.add(key)
false
end
end
def deduplicate_findings!
seen_identifiers = Set.new
deduplicated = []
@findings.each do |finding|
seen = false
# We are looping through all identifiers in order to find the same vulnerabilities reported for the same location
# but from different source reports and keeping only first of them
finding.identifiers.each do |identifier|
# TODO: remove .downcase here after the DAST parser is harmonized to the common library identifiers' keys format
# See https://gitlab.com/gitlab-org/gitlab/issues/11976#note_191257912
next if %w[cwe wasc].include?(identifier.external_type.downcase) # ignored because these describe a class of vulnerabilities
@findings, * = @findings.each_with_object([[], Set.new]) do |finding, (deduplicated, seen_identifiers)|
next if finding.keys.any? { |key| seen_identifiers.member?(key) }
seen = check_or_mark_seen_identifier!(identifier, finding.location.fingerprint, seen_identifiers)
break if seen
end
deduplicated << finding unless seen
seen_identifiers.merge(finding.keys)
deduplicated << finding
end
@findings = deduplicated
end
def sort_findings!
......
......@@ -57,7 +57,9 @@ module Security
end
def register_keys(keys)
keys.all? { |key| known_keys.add?(key) }
return false if keys.any? { |key| known_keys.member?(key) }
known_keys.merge(keys)
end
end
end
......@@ -92,7 +92,7 @@ module Gitlab
end
def keys
@keys ||= identifiers.map do |identifier|
@keys ||= identifiers.reject(&:type_identifier?).map do |identifier|
FindingKey.new(location_fingerprint: location&.fingerprint, identifier_fingerprint: identifier.fingerprint)
end
end
......
......@@ -11,8 +11,9 @@ module Gitlab
end
def ==(other)
location_fingerprint == other.location_fingerprint &&
identifier_fingerprint == other.identifier_fingerprint
has_fingerprints? && other.has_fingerprints? &&
location_fingerprint == other.location_fingerprint &&
identifier_fingerprint == other.identifier_fingerprint
end
def hash
......@@ -24,6 +25,10 @@ module Gitlab
protected
attr_reader :location_fingerprint, :identifier_fingerprint
def has_fingerprints?
location_fingerprint.present? && identifier_fingerprint.present?
end
end
end
end
......
......@@ -41,12 +41,20 @@ module Gitlab
other.external_id == external_id
end
def type_identifier?
cwe? || wasc?
end
def cve?
external_type.to_s.casecmp('cve') == 0
external_type.to_s.casecmp?('cve')
end
def cwe?
external_type.to_s.casecmp('cwe') == 0
external_type.to_s.casecmp?('cwe')
end
def wasc?
external_type.to_s.casecmp?('wasc')
end
private
......
......@@ -7,6 +7,10 @@ RSpec.describe Gitlab::Ci::Reports::Security::FindingKey do
describe '#==' do
where(:location_fp_1, :location_fp_2, :identifier_fp_1, :identifier_fp_2, :equals?) do
nil | 'different location fp' | 'identifier fp' | 'different identifier fp' | false
'location fp' | nil | 'identifier fp' | 'different identifier fp' | false
'location fp' | 'different location fp' | nil | 'different identifier fp' | false
'location fp' | 'different location fp' | 'identifier fp' | nil | false
'location fp' | 'different location fp' | 'identifier fp' | 'different identifier fp' | false
'location fp' | 'different location fp' | 'identifier fp' | 'identifier fp' | false
'location fp' | 'location fp' | 'identifier fp' | 'different identifier fp' | false
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment