Commit 30d8b466 authored by Mehmet Emin INAC's avatar Mehmet Emin INAC

Download & parse only the necessary CI job artifact

Previously we were downloading & parsing all CI job artifacts for each
request to fetch the list of findings of a pipeline. This was causing
timeout errors for pipelines with tens of security scans.

With this change, we are downloading & parsing only the necessary CI
job artifacts which should perform better than the previous version.
parent fd8caac1
# frozen_string_literal: true
# Security::FindingsFinder
#
# Used to find Ci::Builds associated with requested findings.
#
# Arguments:
# pipeline - object to filter findings
# params:
# severity: Array<String>
# confidence: Array<String>
# report_type: Array<String>
# scope: String
# page: Int
# per_page: Int
module Security
class FindingsFinder
ResultSet = Struct.new(:relation, :findings) do
delegate :current_page, :limit_value, :total_pages, :total_count, :next_page, :prev_page, to: :relation
end
DEFAULT_PAGE = 1
DEFAULT_PER_PAGE = 20
def initialize(pipeline, params: {})
@pipeline = pipeline
@params = params
end
def execute
return unless can_use_security_findings?
ResultSet.new(security_findings, findings)
end
private
attr_reader :pipeline, :params
delegate :project, :has_security_findings?, to: :pipeline, private: true
def can_use_security_findings?
Feature.enabled?(:store_security_findings, project) && has_security_findings?
end
def findings
security_findings.map do |security_finding|
build_vulnerability_finding(security_finding)
end
end
def build_vulnerability_finding(security_finding)
report_finding = report_finding_for(security_finding)
return Vulnerabilities::Finding.new unless report_finding
finding_data = report_finding.to_hash.except(:compare_key, :identifiers, :location, :scanner, :links)
identifiers = report_finding.identifiers.map do |identifier|
Vulnerabilities::Identifier.new(identifier.to_hash)
end
Vulnerabilities::Finding.new(finding_data).tap do |finding|
finding.location_fingerprint = report_finding.location.fingerprint
finding.vulnerability = vulnerability_for(security_finding)
finding.project = project
finding.sha = pipeline.sha
finding.scanner = security_finding.scanner
finding.identifiers = identifiers
end
end
def report_finding_for(security_finding)
security_reports[security_finding.build.id].findings[security_finding.position]
end
def vulnerability_for(security_finding)
existing_vulnerabilities.dig(security_finding.scan.scan_type, security_finding.project_fingerprint)&.first
end
def existing_vulnerabilities
@existing_vulnerabilities ||= begin
project.vulnerabilities
.with_findings
.with_report_types(loaded_report_types)
.by_project_fingerprints(loaded_project_fingerprints)
.group_by(&:report_type)
.transform_values { |vulnerabilties| vulnerabilties.group_by { |v| v.finding.project_fingerprint } }
end
end
def loaded_report_types
security_findings.map(&:scan).flat_map(&:scan_type).uniq
end
def loaded_project_fingerprints
security_findings.map(&:project_fingerprint)
end
def security_reports
@security_reports ||= begin
builds.each_with_object({}) do |build, memo|
memo[build.id] = build.job_artifacts.map(&:security_report).compact.first
end
end
end
def builds
security_findings.map(&:build).uniq
end
def security_findings
@security_findings ||= include_dismissed? ? all_security_findings : all_security_findings.undismissed
end
def all_security_findings
pipeline.security_findings
.with_build_and_artifacts
.with_scan
.with_scanner
.deduplicated
.ordered
.page(page)
.per(per_page)
.then { |relation| by_confidence_levels(relation) }
.then { |relation| by_report_types(relation) }
.then { |relation| by_severity_levels(relation) }
end
def per_page
@per_page ||= params[:per_page] || DEFAULT_PER_PAGE
end
def page
@page ||= params[:page] || DEFAULT_PAGE
end
def include_dismissed?
params[:scope] == 'all'
end
def by_confidence_levels(relation)
return relation unless params[:confidence]
relation.by_confidence_levels(params[:confidence])
end
def by_report_types(relation)
return relation unless params[:report_type]
relation.by_report_types(params[:report_type])
end
def by_severity_levels(relation)
return relation unless params[:severity]
relation.by_severity_levels(params[:severity])
end
end
end
......@@ -23,6 +23,7 @@ module EE
# Subscriptions to this pipeline
has_many :downstream_bridges, class_name: '::Ci::Bridge', foreign_key: :upstream_pipeline_id
has_many :security_scans, class_name: 'Security::Scan', through: :builds
has_many :security_findings, class_name: 'Security::Finding', through: :security_scans, source: :findings
has_one :source_project, class_name: 'Ci::Sources::Project', foreign_key: :pipeline_id
......@@ -175,6 +176,10 @@ module EE
project.can_store_security_reports? && has_security_reports?
end
def has_security_findings?
security_findings.exists?
end
private
def has_security_reports?
......
......@@ -82,6 +82,7 @@ module EE
scope :with_states, -> (states) { where(state: states) }
scope :with_scanners, -> (scanners) { joins(findings: :scanner).merge(::Vulnerabilities::Scanner.with_external_id(scanners)) }
scope :grouped_by_severity, -> { reorder(severity: :desc).group(:severity) }
scope :by_project_fingerprints, -> (project_fingerprints) { joins(:findings).merge(Vulnerabilities::Finding.by_project_fingerprints(project_fingerprints)) }
scope :with_resolution, -> (has_resolution = true) { where(resolved_on_default_branch: has_resolution) }
scope :with_issues, -> (has_issues = true) do
......
......@@ -14,6 +14,8 @@ module Security
belongs_to :scan, inverse_of: :findings, optional: false
belongs_to :scanner, class_name: 'Vulnerabilities::Scanner', inverse_of: :security_findings, optional: false
has_one :build, through: :scan
# TODO: These are duplicated between this model and Vulnerabilities::Finding,
# we should create a shared module to encapculate this in one place.
enum confidence: Vulnerabilities::Finding::CONFIDENCE_LEVELS, _prefix: :confidence
......@@ -24,5 +26,17 @@ module Security
scope :by_position, -> (positions) { where(position: positions) }
scope :by_build_ids, -> (build_ids) { joins(scan: :build).where(ci_builds: { id: build_ids }) }
scope :by_project_fingerprints, -> (fingerprints) { where(project_fingerprint: fingerprints) }
scope :by_severity_levels, -> (severity_levels) { where(severity: severity_levels) }
scope :by_confidence_levels, -> (confidence_levels) { where(confidence: confidence_levels) }
scope :by_report_types, -> (report_types) { joins(:scan).merge(Scan.by_scan_types(report_types)) }
scope :undismissed, -> do
where('NOT EXISTS (?)', Scan.select(1).has_dismissal_feedback.where('vulnerability_feedback.project_fingerprint = security_findings.project_fingerprint'))
end
scope :ordered, -> { order(severity: :desc, confidence: :desc, id: :asc) }
scope :with_build_and_artifacts, -> { includes(build: :job_artifacts) }
scope :with_scan, -> { includes(:scan) }
scope :with_scanner, -> { includes(:scanner) }
scope :deduplicated, -> { where(deduplicated: true) }
end
end
......@@ -27,6 +27,16 @@ module Security
api_fuzzing: 7
}
scope :by_scan_types, -> (scan_types) { where(scan_type: scan_types) }
scope :has_dismissal_feedback, -> do
# The `category` enum on `vulnerability_feedback` table starts from 0 but the `scan_type` enum
# on `security_scans` from 1. For this reason, we have to decrease the value of `scan_type` by one
# to match with category values on `vulnerability_feedback` table.
joins(build: { project: :vulnerability_feedback })
.where('vulnerability_feedback.category = (security_scans.scan_type - 1)')
.merge(Vulnerabilities::Feedback.for_dismissal)
end
delegate :project, to: :build
end
end
......@@ -97,6 +97,7 @@ module Vulnerabilities
scope :by_projects, -> (values) { where(project_id: values) }
scope :by_severities, -> (values) { where(severity: values) }
scope :by_confidences, -> (values) { where(confidence: values) }
scope :by_project_fingerprints, -> (values) { where(project_fingerprint: values) }
scope :all_preloaded, -> do
preload(:scanner, :identifiers, project: [:namespace, :project_feature])
......
......@@ -18,11 +18,29 @@ module API
end
end
def vulnerability_findings_by(params)
return [] unless pipeline
def vulnerability_findings
@vulnerability_findings ||= begin
return paginate(Kaminari.paginate_array([])) unless pipeline
aggregated_report = Security::PipelineVulnerabilitiesFinder.new(pipeline: pipeline, params: params).execute
aggregated_report.findings
with_adaptive_finder || with_vulnerabilities_finder
end
end
def with_vulnerabilities_finder
aggregated_report = Security::PipelineVulnerabilitiesFinder.new(pipeline: pipeline, params: declared_params).execute
# We might have to add rubocop:disable annotation here in case
# https://gitlab.com/gitlab-org/gitlab/issues/32763 happens, using
# Kaminari.paginate_array here is correct
# See https://gitlab.com/gitlab-org/gitlab/issues/33588#note_291849433
# for discussion
paginate(Kaminari.paginate_array(aggregated_report.findings))
end
def with_adaptive_finder
Security::FindingsFinder.new(pipeline, params: declared_params).execute.tap do |result|
paginate(result) if result
end&.findings
end
end
......@@ -71,17 +89,6 @@ module API
get ':id/vulnerability_findings' do
authorize! :read_vulnerability, user_project
# We might have to add rubocop:disable annotation here in case
# https://gitlab.com/gitlab-org/gitlab/issues/32763 happens, using
# Kaminari.paginate_array here is correct
# See https://gitlab.com/gitlab-org/gitlab/issues/33588#note_291849433
# for discussion
vulnerability_findings = paginate(
Kaminari.paginate_array(
vulnerability_findings_by(declared_params)
)
)
Gitlab::Vulnerabilities::FindingsPreloader.preload_feedback!(vulnerability_findings)
present vulnerability_findings,
......
This diff is collapsed.
......@@ -13,6 +13,7 @@ RSpec.describe Ci::Pipeline do
end
it { is_expected.to have_many(:security_scans).through(:builds).class_name('Security::Scan') }
it { is_expected.to have_many(:security_findings).through(:security_scans).class_name('Security::Finding').source(:findings) }
it { is_expected.to have_many(:downstream_bridges) }
it { is_expected.to have_many(:vulnerability_findings).through(:vulnerabilities_finding_pipelines).class_name('Vulnerabilities::Finding') }
it { is_expected.to have_many(:vulnerabilities_finding_pipelines).class_name('Vulnerabilities::FindingPipeline') }
......@@ -588,4 +589,21 @@ RSpec.describe Ci::Pipeline do
end
end
end
describe '#has_security_findings?' do
subject { pipeline.has_security_findings? }
context 'when the pipeline has security_findings' do
before do
scan = create(:security_scan, pipeline: pipeline)
create(:security_finding, scan: scan)
end
it { is_expected.to be_truthy }
end
context 'when the pipeline does not have security_findings' do
it { is_expected.to be_falsey }
end
end
end
......@@ -492,6 +492,16 @@ RSpec.describe Vulnerability do
it { is_expected.to eq('critical' => 6, 'high' => 4, 'info' => 1, 'low' => 5, 'medium' => 2, 'unknown' => 3) }
end
describe '.by_project_fingerprints' do
let!(:vulnerability_1) { create(:vulnerability, :with_findings) }
let!(:vulnerability_2) { create(:vulnerability, :with_findings) }
let(:expected_vulnerabilities) { [vulnerability_1] }
subject { described_class.by_project_fingerprints(vulnerability_1.finding.project_fingerprint) }
it { is_expected.to match_array(expected_vulnerabilities) }
end
describe '#finding' do
let_it_be(:project) { create(:project, :with_vulnerability) }
let_it_be(:vulnerability) { project.vulnerabilities.first }
......
......@@ -6,6 +6,7 @@ RSpec.describe Security::Finding do
describe 'associations' do
it { is_expected.to belong_to(:scan).required }
it { is_expected.to belong_to(:scanner).required }
it { is_expected.to have_one(:build).through(:scan) }
end
describe 'validations' do
......@@ -32,4 +33,89 @@ RSpec.describe Security::Finding do
it { is_expected.to eq([finding_1]) }
end
describe '.by_severity_levels' do
let!(:critical_severity_finding) { create(:security_finding, severity: :critical) }
let!(:high_severity_finding) { create(:security_finding, severity: :high) }
let(:expected_findings) { [critical_severity_finding] }
subject { described_class.by_severity_levels(:critical) }
it { is_expected.to match_array(expected_findings) }
end
describe '.by_confidence_levels' do
let!(:high_confidence_finding) { create(:security_finding, confidence: :high) }
let!(:low_confidence_finding) { create(:security_finding, confidence: :low) }
let(:expected_findings) { [high_confidence_finding] }
subject { described_class.by_confidence_levels(:high) }
it { is_expected.to match_array(expected_findings) }
end
describe '.by_report_types' do
let!(:sast_scan) { create(:security_scan, scan_type: :sast) }
let!(:dast_scan) { create(:security_scan, scan_type: :dast) }
let!(:sast_finding) { create(:security_finding, scan: sast_scan) }
let!(:dast_finding) { create(:security_finding, scan: dast_scan) }
let(:expected_findings) { [sast_finding] }
subject { described_class.by_report_types(:sast) }
it { is_expected.to match_array(expected_findings) }
end
describe '.by_project_fingerprints' do
let!(:finding_1) { create(:security_finding) }
let!(:finding_2) { create(:security_finding) }
let(:expected_findings) { [finding_1] }
subject { described_class.by_project_fingerprints(finding_1.project_fingerprint) }
it { is_expected.to match_array(expected_findings) }
end
describe '.undismissed' do
let(:scan) { create(:security_scan) }
let!(:undismissed_finding) { create(:security_finding, scan: scan) }
let!(:dismissed_finding) { create(:security_finding, scan: scan) }
let(:expected_findings) { [undismissed_finding] }
subject { described_class.undismissed }
before do
create(:vulnerability_feedback,
:dismissal,
project: scan.project,
category: scan.scan_type,
project_fingerprint: dismissed_finding.project_fingerprint)
end
it { is_expected.to match_array(expected_findings) }
end
describe '.ordered' do
let!(:finding_1) { create(:security_finding, severity: :high, confidence: :unknown) }
let!(:finding_2) { create(:security_finding, severity: :low, confidence: :confirmed) }
let!(:finding_3) { create(:security_finding, severity: :critical, confidence: :confirmed) }
let!(:finding_4) { create(:security_finding, severity: :critical, confidence: :high) }
let(:expected_findings) { [finding_3, finding_4, finding_1, finding_2] }
subject { described_class.ordered }
it { is_expected.to eq(expected_findings) }
end
describe '.deduplicated' do
let!(:finding_1) { create(:security_finding, deduplicated: true) }
let!(:finding_2) { create(:security_finding, deduplicated: false) }
let(:expected_findings) { [finding_1] }
subject { described_class.deduplicated }
it { is_expected.to eq(expected_findings) }
end
end
......@@ -18,5 +18,30 @@ RSpec.describe Security::Scan do
it { is_expected.to delegate_method(:project).to(:build) }
end
describe '.by_scan_types' do
let!(:sast_scan) { create(:security_scan, scan_type: :sast) }
let!(:dast_scan) { create(:security_scan, scan_type: :dast) }
let(:expected_scans) { [sast_scan] }
subject { described_class.by_scan_types(:sast) }
it { is_expected.to match_array(expected_scans) }
end
describe '.has_dismissal_feedback' do
let(:scan_1) { create(:security_scan) }
let(:scan_2) { create(:security_scan) }
let(:expected_scans) { [scan_1] }
subject { described_class.has_dismissal_feedback }
before do
create(:vulnerability_feedback, :dismissal, project: scan_1.project, category: scan_1.scan_type)
create(:vulnerability_feedback, :issue, project: scan_2.project, category: scan_2.scan_type)
end
it { is_expected.to match_array(expected_scans) }
end
it_behaves_like 'having unique enum values'
end
......@@ -71,6 +71,37 @@ RSpec.describe API::VulnerabilityFindings do
expect { get api(project_vulnerability_findings_path, user) }.not_to exceed_query_limit(control_count).with_threshold(1)
end
describe 'using different finders' do
before do
allow(Security::PipelineVulnerabilitiesFinder).to receive(:new).and_call_original
allow_next_instance_of(Security::FindingsFinder) do |finder|
allow(finder).to receive(:execute).and_return(mock_result)
end
end
context 'when the project uses `security_findings`' do
let(:finding) { create(:vulnerability_finding) }
let(:mock_result) { double(findings: [finding]) }
it 'does not use `Security::PipelineVulnerabilitiesFinder`' do
get api(project_vulnerability_findings_path, user), params: pagination
expect(Security::PipelineVulnerabilitiesFinder).not_to have_received(:new)
end
end
context 'when the project does not use `security_findings`' do
let(:mock_result) { nil }
it 'fallsback to `Security::PipelineVulnerabilitiesFinder`' do
get api(project_vulnerability_findings_path, user), params: pagination
expect(Security::PipelineVulnerabilitiesFinder).to have_received(:new)
end
end
end
describe 'filtering' do
it 'returns vulnerabilities with sast report_type' do
finding_count = (sast_report.findings.count - 1).to_s # all SAST findings except one that was dismissed
......
......@@ -240,6 +240,7 @@ ci_pipelines:
- vulnerability_findings
- pipeline_config
- security_scans
- security_findings
- daily_build_group_report_results
- latest_builds
- daily_report_results
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment