Commit b0b08ac2 authored by Mark Chao's avatar Mark Chao

Merge branch...

Merge branch '227114_populate_resolved_on_default_branch_column_for_existing_vulnerabilities_second_try' into 'master'

Populate `resolved_on_default_branch` column for existing vulnerabilities

See merge request gitlab-org/gitlab!40755
parents 56fe59c2 7fc35578
# frozen_string_literal: true
class AddCompoundIndexOnVulnerabilitiesForBackgroundMigration < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
INDEX_NAME = 'index_vulnerabilities_on_project_id_and_id'
disable_ddl_transaction!
def up
add_concurrent_index :vulnerabilities, [:project_id, :id], name: INDEX_NAME
end
def down
remove_concurrent_index_by_name :vulnerabilities, INDEX_NAME
end
end
# frozen_string_literal: true
class SchedulePopulateResolvedOnDefaultBranchColumn < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
BATCH_SIZE = 100
DELAY_INTERVAL = 5.minutes.to_i
MIGRATION_CLASS = 'PopulateResolvedOnDefaultBranchColumn'
disable_ddl_transaction!
def up
return unless Gitlab.ee?
EE::Gitlab::BackgroundMigration::PopulateResolvedOnDefaultBranchColumn::Vulnerability.distinct.each_batch(of: BATCH_SIZE, column: :project_id) do |batch, index|
project_ids = batch.pluck(:project_id)
migrate_in(index * DELAY_INTERVAL, MIGRATION_CLASS, project_ids)
end
end
def down
# no-op
# This migration schedules background tasks to populate
# `resolved_on_default_branch` column of `vulnerabilities`
# table so there is no rollback operation needed for this.
end
end
ee38dd60087a8879c4686214da1d25a60ab74306eb07b938efb1a8dfc46cc73a
\ No newline at end of file
2564c387b727e557b2988996aa533ba5e4e6d7b01515407bd2692c09644ac2be
\ No newline at end of file
......@@ -21175,6 +21175,8 @@ CREATE INDEX index_vulnerabilities_on_milestone_id ON public.vulnerabilities USI
CREATE INDEX index_vulnerabilities_on_project_id ON public.vulnerabilities USING btree (project_id);
CREATE INDEX index_vulnerabilities_on_project_id_and_id ON public.vulnerabilities USING btree (project_id, id);
CREATE INDEX index_vulnerabilities_on_resolved_by_id ON public.vulnerabilities USING btree (resolved_by_id);
CREATE INDEX index_vulnerabilities_on_start_date_sourcing_milestone_id ON public.vulnerabilities USING btree (start_date_sourcing_milestone_id);
......
---
title: Populate `resolved_on_default_branch` column for existing vulnerabilities
merge_request: 40755
author:
type: added
# frozen_string_literal: true
module EE
module Gitlab
module BackgroundMigration
module PopulateResolvedOnDefaultBranchColumn
def perform(*project_ids)
project_ids.flatten.each { |project_id| PopulateResolvedOnDefaultBranchColumnForProject.perform(project_id) }
end
module Routable
extend ActiveSupport::Concern
included do
has_one :route, as: :source
end
def full_path
route&.path || build_full_path
end
def build_full_path
if parent && path
parent.full_path + '/' + path
else
path
end
end
end
module Visibility
PUBLIC_LEVEL = 20
def public?
visibility_level == PUBLIC_LEVEL
end
end
# This class depends on Gitlab::CurrentSettings
class Project < ActiveRecord::Base
include Routable
include Visibility
include ::Gitlab::Utils::StrongMemoize
self.table_name = 'projects'
# These are the artifact file types to query
# only security report related artifacts.
# sast: 5
# dependency_scanning: 6
# container_scanning: 7
# dast: 8
# secret_detection: 21
# coverage_fuzzing: 23
FILE_TYPES = [5, 6, 7, 8, 21, 23].freeze
LATEST_PIPELINE_WITH_REPORTS_SQL = <<~SQL
SELECT
"ci_pipelines"."id"
FROM
"ci_pipelines"
WHERE
("ci_pipelines"."id" IN (
SELECT
"ci_pipelines"."id"
FROM
"ci_pipelines"
WHERE
ci_pipelines.project_id = %{project_id}
AND ci_pipelines.ref = %{ref}
AND ci_pipelines.status IN ('success')
ORDER BY
"ci_pipelines"."id" DESC
LIMIT 100))
AND (EXISTS (
SELECT
1
FROM
"ci_builds"
WHERE
"ci_builds"."type" = 'Ci::Build'
AND ("ci_builds"."retried" IS FALSE OR "ci_builds"."retried" IS NULL)
AND (EXISTS (
SELECT
1
FROM
"ci_job_artifacts"
WHERE
(ci_builds.id = ci_job_artifacts.job_id)
AND "ci_job_artifacts"."file_type" IN (%{file_types})))
AND (ci_pipelines.id = ci_builds.commit_id)))
ORDER BY
"ci_pipelines"."id" DESC
LIMIT 1
SQL
belongs_to :namespace
alias_method :parent, :namespace
has_one :route, as: :source
has_many :vulnerabilities
def self.polymorphic_name
'Project'
end
def resolved_vulnerabilities
return Vulnerability.none unless latest_pipeline_id
vulnerabilities.not_found_in_pipeline_id(latest_pipeline_id)
end
private
delegate :connection, to: :'self.class', private: true
def latest_pipeline_id
strong_memoize(:latest_pipeline_id) { pipeline_with_reports&.fetch('id') }
end
def pipeline_with_reports
connection.execute(pipeline_with_reports_sql).first
end
def pipeline_with_reports_sql
format(LATEST_PIPELINE_WITH_REPORTS_SQL, project_id: id, ref: connection.quote(default_branch), file_types: FILE_TYPES.join(', '))
end
def default_branch
@default_branch ||= repository.root_ref || default_branch_from_preferences
end
def repository
@repository ||= Repository.new(full_path, self, shard: repository_storage, disk_path: storage.disk_path)
end
def storage
@storage ||=
if hashed_repository_storage?
Storage::Hashed.new(self)
else
Storage::LegacyProject.new(self)
end
end
def hashed_repository_storage?
storage_version.to_i >= 1
end
def default_branch_from_preferences
::Gitlab::CurrentSettings.default_branch_name if repository.empty?
end
end
module Storage
class Hashed
attr_accessor :container
REPOSITORY_PATH_PREFIX = '@hashed'
def initialize(container)
@container = container
end
def base_dir
"#{REPOSITORY_PATH_PREFIX}/#{disk_hash[0..1]}/#{disk_hash[2..3]}" if disk_hash
end
def disk_path
"#{base_dir}/#{disk_hash}" if disk_hash
end
private
def disk_hash
@disk_hash ||= Digest::SHA2.hexdigest(container.id.to_s) if container.id
end
end
class LegacyProject
attr_accessor :project
def initialize(project)
@project = project
end
def disk_path
project.full_path
end
end
end
class Namespace < ActiveRecord::Base
include Routable
include Visibility
self.table_name = 'namespaces'
belongs_to :parent, class_name: 'Namespace'
def self.find_sti_class(type_name)
super("EE::Gitlab::BackgroundMigration::PopulateResolvedOnDefaultBranchColumn::#{type_name}")
end
end
class Group < Namespace
def self.polymorphic_name
'Group'
end
end
class Route < ActiveRecord::Base
self.table_name = 'routes'
end
class Vulnerability < ActiveRecord::Base
include EachBatch
self.table_name = 'vulnerabilities'
scope :not_found_in_pipeline_id, -> (pipeline_id) do
where(<<~SQL)
NOT EXISTS (
SELECT 1
FROM vulnerability_occurrences vo
INNER JOIN vulnerability_occurrence_pipelines vop ON vop.occurrence_id = vo.id
WHERE vo.vulnerability_id = vulnerabilities.id AND vop.pipeline_id = #{pipeline_id}
)
SQL
end
end
# This class depends on following classes
# GlRepository class defined in `lib/gitlab/gl_repository.rb`
# Repository class defined in `lib/gitlab/git/repository.rb`.
class Repository
def initialize(full_path, container, shard:, disk_path: nil, repo_type: ::Gitlab::GlRepository::PROJECT)
@full_path = full_path
@shard = shard
@disk_path = disk_path || full_path
@container = container
@commit_cache = {}
@repo_type = repo_type
end
def root_ref
raw_repository&.root_ref
rescue Gitlab::Git::Repository::NoRepository
end
def empty?
return true unless exists?
!has_visible_content?
end
private
attr_reader :full_path, :shard, :disk_path, :container, :repo_type
delegate :has_visible_content?, to: :raw_repository, private: true
def exists?
return false unless full_path
raw_repository.exists?
end
def raw_repository
return unless full_path
@raw_repository ||= initialize_raw_repository
end
def initialize_raw_repository
::Gitlab::Git::Repository.new(shard,
disk_path + '.git',
repo_type.identifier_for_container(container),
container.full_path)
end
end
class PopulateResolvedOnDefaultBranchColumnForProject
def self.perform(project_id)
new(project_id).perform
end
def initialize(project_id)
self.project_id = project_id
self.updated_count = 0
end
def perform
update_vulnerabilities
log_info
rescue StandardError => e
log_error(e)
end
attr_accessor :project_id, :updated_count
private
def update_vulnerabilities
return if project.resolved_vulnerabilities.none?
project.vulnerabilities.each_batch(of: 100) do |relation|
self.updated_count += relation.merge(project.resolved_vulnerabilities)
.update_all(resolved_on_default_branch: true)
end
end
def log_info
::Gitlab::BackgroundMigration::Logger.info(
migrator: 'PopulateResolvedOnDefaultBranchColumnForProject',
message: 'Project migrated',
updated_count: updated_count,
project_id: project_id
)
end
def log_error(error)
::Gitlab::BackgroundMigration::Logger.error(
migrator: 'PopulateResolvedOnDefaultBranchColumnForProject',
message: error.message,
project_id: project_id
)
end
def project
@project ||= Project.find(project_id)
end
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe ::Gitlab::BackgroundMigration::PopulateResolvedOnDefaultBranchColumn do
let(:users) { table(:users) }
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
let(:pipelines) { table(:ci_pipelines) }
let(:vulnerabilities) { table(:vulnerabilities) }
let(:findings) { table(:vulnerability_occurrences) }
let(:finding_pipelines) { table(:vulnerability_occurrence_pipelines) }
let(:builds) { table(:ci_builds) }
let(:artifacts) { table(:ci_job_artifacts) }
let(:scanners) { table(:vulnerability_scanners) }
let(:vulnerability_identifiers) { table(:vulnerability_identifiers) }
let(:namespace) { namespaces.create!(name: "foo", path: "bar") }
describe '#perform' do
let!(:project_1) { projects.create!(namespace_id: namespace.id) }
let!(:project_2) { projects.create!(namespace_id: namespace.id) }
let(:utility_class) { described_class::PopulateResolvedOnDefaultBranchColumnForProject }
subject(:populate_resolved_on_default_branch_column) { described_class.new.perform([project_1.id, project_2.id]) }
before do
allow(utility_class).to receive(:perform)
end
it 'calls `PopulateResolvedOnDefaultBranchColumnForProject.perform` for each project by given ids' do
populate_resolved_on_default_branch_column
expect(utility_class).to have_received(:perform).twice
expect(utility_class).to have_received(:perform).with(project_1.id)
expect(utility_class).to have_received(:perform).with(project_2.id)
end
end
describe EE::Gitlab::BackgroundMigration::PopulateResolvedOnDefaultBranchColumn::PopulateResolvedOnDefaultBranchColumnForProject do
describe '.perform' do
let(:project_id) { 1 }
let(:mock_utility_object) { instance_double(described_class, perform: true) }
subject(:populate_for_project) { described_class.perform(project_id) }
before do
allow(described_class).to receive(:new).and_return(mock_utility_object)
end
it 'instantiates the utility service object and calls #perform on it' do
populate_for_project
expect(described_class).to have_received(:new).with(project_id)
expect(mock_utility_object).to have_received(:perform)
end
end
describe '#perform' do
let(:user) { users.create!(name: 'John Doe', email: 'test@example.com', projects_limit: 5) }
let(:project) { projects.create!(namespace_id: namespace.id) }
let(:pipeline) { pipelines.create!(project_id: project.id, ref: 'master', sha: 'adf43c3a', status: 'success') }
let(:utility_object) { described_class.new(project.id) }
let(:scanner) { scanners.create!(project_id: project.id, external_id: 'bandit', name: 'Bandit') }
let(:sha_attribute) { Gitlab::Database::ShaAttribute.new }
let(:vulnerability_identifier) do
vulnerability_identifiers.create!(
project_id: project.id,
name: 'identifier',
fingerprint: sha_attribute.serialize('e6dd15eda2137be0034977a85b300a94a4f243a3'),
external_type: 'bar',
external_id: 'zoo')
end
let(:disappeared_vulnerability) do
vulnerabilities.create!(
project_id: project.id,
author_id: user.id,
title: 'Vulnerability',
severity: 5,
confidence: 5,
report_type: 5
)
end
let(:existing_vulnerability) do
vulnerabilities.create!(
project_id: project.id,
author_id: user.id,
title: 'Vulnerability',
severity: 5,
confidence: 5,
report_type: 5
)
end
subject(:populate_for_project) { utility_object.perform }
before do
build = builds.create!(commit_id: pipeline.id, retried: false, type: 'Ci::Build')
artifacts.create!(project_id: project.id, job_id: build.id, file_type: 5, file_format: 1)
finding = findings.create!(
project_id: project.id,
vulnerability_id: existing_vulnerability.id,
severity: 5,
confidence: 5,
report_type: 5,
scanner_id: scanner.id,
primary_identifier_id: vulnerability_identifier.id,
project_fingerprint: 'foo',
location_fingerprint: sha_attribute.serialize('d869ba3f0b3347eb2749135a437dc07c8ae0f420'),
uuid: SecureRandom.uuid,
name: 'Solar blast vulnerability',
metadata_version: '1',
raw_metadata: '')
finding_pipelines.create!(occurrence_id: finding.id, pipeline_id: pipeline.id)
allow(::Gitlab::CurrentSettings).to receive(:default_branch_name).and_return(:master)
end
it 'sets `resolved_on_default_branch` attribute of disappeared vulnerabilities' do
expect { populate_for_project }.to change { disappeared_vulnerability.reload[:resolved_on_default_branch] }.from(false).to(true)
.and not_change { existing_vulnerability.reload[:resolved_on_default_branch] }
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
require_migration!
RSpec.describe SchedulePopulateResolvedOnDefaultBranchColumn do
before do
allow_any_instance_of(Gitlab).to receive(:ee?).and_return(ee?)
end
around do |example|
Timecop.freeze { Sidekiq::Testing.fake! { example.run } }
end
context 'when the Gitlab instance is CE' do
let(:ee?) { false }
it 'does not run the migration' do
expect { migrate! }.not_to change { BackgroundMigrationWorker.jobs.size }
end
end
context 'when the Gitlab instance is EE' do
let(:ee?) { true }
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
let(:vulnerabilities) { table(:vulnerabilities) }
let(:users) { table(:users) }
let(:namespace) { namespaces.create!(name: "foo", path: "bar") }
let!(:project_1) { projects.create!(namespace_id: namespace.id) }
let!(:project_2) { projects.create!(namespace_id: namespace.id) }
let!(:project_3) { projects.create!(namespace_id: namespace.id) }
let(:user) { users.create!(name: 'John Doe', email: 'test@example.com', projects_limit: 1) }
let(:vulnerability_data) do
{
author_id: user.id,
title: 'Vulnerability',
severity: 5,
confidence: 5,
report_type: 5
}
end
before do
vulnerabilities.create!(**vulnerability_data, project_id: project_1.id)
vulnerabilities.create!(**vulnerability_data, project_id: project_2.id)
stub_const("#{described_class.name}::BATCH_SIZE", 1)
end
it 'schedules the background jobs', :aggregate_failures do
migrate!
expect(BackgroundMigrationWorker.jobs.size).to be(2)
expect(described_class::MIGRATION_CLASS).to be_scheduled_delayed_migration(5.minutes, project_1.id)
expect(described_class::MIGRATION_CLASS).to be_scheduled_delayed_migration(10.minutes, project_2.id)
end
end
end
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# rubocop:disable Style/Documentation
class PopulateResolvedOnDefaultBranchColumn
def perform(*); end
end
end
end
Gitlab::BackgroundMigration::PopulateResolvedOnDefaultBranchColumn.prepend_if_ee('EE::Gitlab::BackgroundMigration::PopulateResolvedOnDefaultBranchColumn')
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment