Merge branch 'pipeline_vulnerabilities_finder_n_1_fix' into 'master'

Uses eager loading to avoid N+1 queries See merge request gitlab-org/gitlab!55692

Merge branch 'pipeline_vulnerabilities_finder_n_1_fix' into 'master'
Uses eager loading to avoid N+1 queries See merge request gitlab-org/gitlab!55692
6cc0f7c5 · Sean McGivern · 68016c39 · 7ac98184 · 6cc0f7c5 · 6cc0f7c5
Commit 6cc0f7c5 authored Mar 12, 2021 by Sean McGivern
7 changed files
--- a/db/migrate/20210310000627_add_idx_vulnerability_occurrences_dedup.rb
+++ b/db/migrate/20210310000627_add_idx_vulnerability_occurrences_dedup.rb
+# frozen_string_literal: true
+
+# See https://docs.gitlab.com/ee/development/migration_style_guide.html
+# for more information on how to write migrations for GitLab.
+
+class AddIdxVulnerabilityOccurrencesDedup < ActiveRecord::Migration[6.0]
+  include Gitlab::Database::MigrationHelpers
+
+  DOWNTIME = false
+
+  TABLE = :vulnerability_occurrences
+  INDEX_NAME = 'index_vulnerability_occurrences_deduplication'
+  COLUMNS = %i[project_id report_type project_fingerprint]
+
+  disable_ddl_transaction!
+
+  def up
+    add_concurrent_index TABLE, COLUMNS, name: INDEX_NAME
+  end
+
+  def down
+    remove_concurrent_index TABLE, COLUMNS, name: INDEX_NAME
+  end
+end
--- a/db/schema_migrations/20210310000627
+++ b/db/schema_migrations/20210310000627
+71cea758ecb70049468531f70c52aec021060cd48e58155de0ba118786562ac6
\ No newline at end of file
--- a/db/structure.sql
+++ b/db/structure.sql
@@ -23999,6 +23999,8 @@ CREATE UNIQUE INDEX index_vulnerability_occurrence_identifiers_on_unique_keys ON

 CREATE INDEX index_vulnerability_occurrence_pipelines_on_pipeline_id ON vulnerability_occurrence_pipelines USING btree (pipeline_id);

+CREATE INDEX index_vulnerability_occurrences_deduplication ON vulnerability_occurrences USING btree (project_id, report_type, project_fingerprint);
+
 CREATE INDEX index_vulnerability_occurrences_for_issue_links_migration ON vulnerability_occurrences USING btree (project_id, report_type, encode(project_fingerprint, 'hex'::text));

 CREATE INDEX index_vulnerability_occurrences_on_primary_identifier_id ON vulnerability_occurrences USING btree (primary_identifier_id);
--- a/ee/app/finders/security/pipeline_vulnerabilities_finder.rb
+++ b/ee/app/finders/security/pipeline_vulnerabilities_finder.rb
@@ -58,12 +58,12 @@ module Security

    def vulnerabilities_by_finding_fingerprint(report_type, report)
      Vulnerabilities::Finding
-        .with_vulnerabilities_for_state(
-          project: pipeline.project,
-          report_type: report_type,
-          project_fingerprints: report.findings.map(&:project_fingerprint))
+        .by_project_fingerprints(report.findings.map(&:project_fingerprint))
+        .by_projects(pipeline.project)
+        .by_report_types(report_type)
+        .select(:vulnerability_id, :project_fingerprint)
       .each_with_object({}) do |finding, hash|
-        hash[finding.project_fingerprint] = finding.vulnerability
+        hash[finding.project_fingerprint] = finding.vulnerability_id
      end
    end

@@ -80,7 +80,7 @@ module Security
        finding = Vulnerabilities::Finding.new(finding_hash)
        # assigning Vulnerabilities to Findings to enable the computed state
        finding.location_fingerprint = report_finding.location.fingerprint
-        finding.vulnerability = vulnerabilities[finding.project_fingerprint]
+        finding.vulnerability_id = vulnerabilities[finding.project_fingerprint]
        finding.project = pipeline.project
        finding.sha = pipeline.sha
        finding.build_scanner(report_finding.scanner&.to_hash)

--- a/ee/app/models/vulnerabilities/finding.rb
+++ b/ee/app/models/vulnerabilities/finding.rb
@@ -103,18 +103,6 @@ module Vulnerabilities
      end
    end

-    def self.with_vulnerabilities_for_state(project:, report_type:, project_fingerprints:)
-      Vulnerabilities::Finding
-        .joins(:vulnerability)
-        .where(
-          project: project,
-          report_type: report_type,
-          project_fingerprint: project_fingerprints
-        )
-        .select('vulnerability_occurrences.report_type, vulnerability_id, project_fingerprint, raw_metadata, '\
-                'vulnerabilities.id, vulnerabilities.state') # fetching only required attributes
-    end
-
    # sha can be sourced from a joined pipeline or set from the report
    def sha
      self[:sha] || @sha

--- a/ee/changelogs/unreleased/pipeline_vulnerabilities_finder_n_1_fix.yml
+++ b/ee/changelogs/unreleased/pipeline_vulnerabilities_finder_n_1_fix.yml
+---
+title: Pipline vulnerabilities loading performance improvements
+merge_request: 55692
+author:
+type: performance
--- a/ee/spec/finders/security/pipeline_vulnerabilities_finder_spec.rb
+++ b/ee/spec/finders/security/pipeline_vulnerabilities_finder_spec.rb
@@ -73,6 +73,56 @@ RSpec.describe Security::PipelineVulnerabilitiesFinder do
          end
        end
      end
+
+      it 'does not have N+1 queries' do
+        # We need to create a situation where we have one Vulnerabilities::Finding
+        # AND one Vulnerability for each finding in the sast and dast reports
+        #
+        # Running the pipeline vulnerabilities finder on both report types should
+        # use the same number of queries, regardless of the number of findings
+        # contained in the pipeline report.
+
+        sast_findings = pipeline.security_reports.reports['sast'].findings
+        dep_findings = pipeline.security_reports.reports['dependency_scanning'].findings
+        # this test is invalid if we don't have more sast findings than dep findings
+        expect(sast_findings.count).to be > dep_findings.count
+
+        (sast_findings + dep_findings).each do |report_finding|
+          # create a finding and a vulnerability for each report finding
+          # (the vulnerability is created with the :confirmed trait)
+          create(:vulnerabilities_finding,
+            :confirmed,
+            project: project,
+            report_type: report_finding.report_type,
+            project_fingerprint: report_finding.project_fingerprint)
+        end
+
+        # Need to warm the cache
+        described_class.new(pipeline: pipeline, params: { report_type: %w[dependency_scanning] }).execute
+
+        # should be the same number of queries between different report types
+        expect do
+          described_class.new(pipeline: pipeline, params: { report_type: %w[sast] }).execute
+        end.to issue_same_number_of_queries_as {
+          described_class.new(pipeline: pipeline, params: { report_type: %w[dependency_scanning] }).execute
+        }
+
+        # should also be the same number of queries on the same report type
+        # with a different number of findings
+        #
+        # The pipeline.security_reports object is created dynamically from
+        # pipeline artifacts. We're caching the value so that we can mock it
+        # and force it to include another finding.
+        orig_security_reports = pipeline.security_reports
+        new_finding = create(:ci_reports_security_finding)
+        expect do
+          described_class.new(pipeline: pipeline, params: { report_type: %w[sast] }).execute
+        end.to issue_same_number_of_queries_as {
+          orig_security_reports.reports['sast'].add_finding(new_finding)
+          allow(pipeline).to receive(:security_reports).and_return(orig_security_reports)
+          described_class.new(pipeline: pipeline, params: { report_type: %w[sast] }).execute
+        }
+      end
    end

    context 'by report type' do