Commit e82018c4 authored by Jarka Košanová's avatar Jarka Košanová

Merge branch '217811-add-vulnerability-historical-statistcs-populate-migration' into 'master'

Add post migration for populating Historical Statistics

See merge request gitlab-org/gitlab!37554
parents 999f7608 8d0dff57
---
title: Migrate vulnerability statistics historical data to vulnerability historical
statistics
merge_request: 37554
author:
type: added
# frozen_string_literal: true
class SchedulePopulateVulnerabilityHistoricalStatistics < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
DELAY_INTERVAL = 2.minutes.to_i
BATCH_SIZE = 50
MIGRATION = 'PopulateVulnerabilityHistoricalStatistics'
disable_ddl_transaction!
class Vulnerability < ActiveRecord::Base
self.table_name = 'vulnerabilities'
include ::EachBatch
end
def up
return unless Gitlab.ee?
Vulnerability.select('project_id').distinct.each_batch(of: BATCH_SIZE, column: 'project_id') do |project_batch, index|
migrate_in(index * DELAY_INTERVAL, MIGRATION, [project_batch.pluck(:project_id)])
end
end
def down
# no-op
end
end
f5f7cfe4a8f42a7116ecf50230df2705135bea4e5fd4c75512bb9ca15867919b
\ No newline at end of file
......@@ -6,6 +6,8 @@ module Vulnerabilities
self.table_name = 'vulnerability_historical_statistics'
RETENTION_PERIOD = 90.days
belongs_to :project, optional: false
validates :date, presence: true
......
# frozen_string_literal: true
module EE
module Gitlab
module BackgroundMigration
# This class creates/updates those project historical vulnerability statistics
# that haven't been created nor initialized.
class PopulateVulnerabilityHistoricalStatistics
def perform(project_ids)
project_ids.each do |project_id|
upsert_vulnerability_historical_statistics(project_id)
rescue => e
error_message("Error updating statistics for project #{project_id}: #{e.message}")
end
end
private
RETENTION_PERIOD = 90.days
MAX_DAYS_IN_SINGLE_QUERY = 10
EMPTY_STATISTIC = {
total: 0,
critical: 0,
high: 0,
medium: 0,
low: 0,
unknown: 0,
info: 0
}.freeze
class Vulnerability < ActiveRecord::Base
self.table_name = 'vulnerabilities'
enum severity: { info: 1, unknown: 2, low: 4, medium: 5, high: 6, critical: 7 }
end
class VulnerabilityHistoricalStatistic < ActiveRecord::Base
self.table_name = 'vulnerability_historical_statistics'
enum letter_grade: { a: 0, b: 1, c: 2, d: 3, f: 4 }
end
def upsert_vulnerability_historical_statistics(project_id)
end_date = Date.today
start_date = end_date - RETENTION_PERIOD
time_now = Time.current
counts_by_day_and_severity_in_batches(project_id, start_date, end_date, of: MAX_DAYS_IN_SINGLE_QUERY)
.map { |day, statistics| prepare_historical_statistic_attributes(project_id, day, statistics, time_now) }
.then { |statistics| deduplicate_statistics(statistics) }
.then { |statistics| VulnerabilityHistoricalStatistic.insert_all(statistics) }
end
def counts_by_day_and_severity_in_batches(project_id, start_date, end_date, of:)
(start_date...end_date)
.each_slice(of)
.flat_map { |date_range| counts_by_day_and_severity(project_id, date_range.first, date_range.last) }
.group_by(&:day)
end
def counts_by_day_and_severity(project_id, start_date, end_date)
quoted_start_date = ActiveRecord::Base.connection.quote(start_date)
quoted_end_date = ActiveRecord::Base.connection.quote(end_date)
Vulnerability
.where(project_id: project_id)
.select('DATE(calendar.entry) AS day, severity, COUNT(*)')
.from("generate_series(DATE #{quoted_start_date}, DATE #{quoted_end_date}, INTERVAL '1 day') as calendar(entry)")
.joins('INNER JOIN vulnerabilities ON vulnerabilities.created_at <= calendar.entry')
.where('(vulnerabilities.dismissed_at IS NULL OR vulnerabilities.dismissed_at > calendar.entry) AND (vulnerabilities.resolved_at IS NULL OR vulnerabilities.resolved_at > calendar.entry)')
.group(:day, :severity)
end
def prepare_historical_statistic_attributes(project_id, day, statistics, time_now)
severity_counts = statistics.map { |statistic| { statistic.severity.to_sym => statistic.count } }.inject(:merge)
EMPTY_STATISTIC.merge(
date: day,
total: statistics.sum(&:count),
letter_grade: letter_grade_for(severity_counts),
created_at: time_now,
updated_at: time_now,
project_id: project_id,
**severity_counts
)
end
def deduplicate_statistics(statistics)
statistics
.sort_by { |statistic| statistic[:date] }
.slice_when { |statistic_before, statistic| statistic_before.except(:date) != statistic.except(:date) }
.map(&:first)
end
def letter_grade_for(statistic)
if statistic[:critical].to_i > 0
VulnerabilityHistoricalStatistic.letter_grades[:f]
elsif statistic[:high].to_i > 0 || statistic[:unknown].to_i > 0
VulnerabilityHistoricalStatistic.letter_grades[:d]
elsif statistic[:medium].to_i > 0
VulnerabilityHistoricalStatistic.letter_grades[:c]
elsif statistic[:low].to_i > 0
VulnerabilityHistoricalStatistic.letter_grades[:b]
else
VulnerabilityHistoricalStatistic.letter_grades[:a]
end
end
def logger
@logger ||= ::Gitlab::BackgroundMigration::Logger.build
end
def error_message(message)
logger.error(message: "Vulnerability Historical Statistics Migration: #{message}")
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe EE::Gitlab::BackgroundMigration::PopulateVulnerabilityHistoricalStatistics, schema: 2020_08_21_224343 do
let(:users) { table(:users) }
let(:namespaces) { table(:namespaces) }
let(:vulnerabilities) { table(:vulnerabilities) }
let(:historical_statistics) { table(:vulnerability_historical_statistics) }
let(:projects) { table(:projects) }
let(:namespace) { namespaces.create!(name: 'gitlab', path: 'gitlab-org') }
let(:user) { users.create!(name: 'test', email: 'test@example.com', projects_limit: 5) }
def create_project(id)
project_params = {
id: id,
namespace_id: namespace.id,
name: 'foo'
}
project = projects.create!(project_params)
vulnerability_params = { title: 'title', state: 1, confidence: 5, report_type: 2, project_id: project.id, author_id: user.id }
vulnerabilities.create!(vulnerability_params.merge(created_at: 5.days.ago, dismissed_at: Date.current, severity: 7))
vulnerabilities.create!(vulnerability_params.merge(created_at: 5.days.ago, dismissed_at: 1.day.ago, severity: 6))
vulnerabilities.create!(vulnerability_params.merge(created_at: 4.days.ago, resolved_at: 2.days.ago, severity: 7))
end
around do |example|
Timecop.freeze(Date.parse('2020-07-28')) { example.run }
end
before do
create_project(1)
create_project(2)
end
describe '#perform' do
it 'creates historical statistic rows according to projects', :aggregate_failures do
expect { subject.perform([1, 2]) }.to change(Vulnerabilities::HistoricalStatistic, :count).by(8)
created_rows = [
{ 'letter_grade' => 4, 'project_id' => 1, 'total' => 2, 'critical' => 1, 'high' => 1, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.parse('2020-07-23') },
{ 'letter_grade' => 4, 'project_id' => 1, 'total' => 3, 'critical' => 2, 'high' => 1, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.parse('2020-07-24') },
{ 'letter_grade' => 4, 'project_id' => 1, 'total' => 2, 'critical' => 1, 'high' => 1, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.parse('2020-07-26') },
{ 'letter_grade' => 4, 'project_id' => 1, 'total' => 1, 'critical' => 1, 'high' => 0, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.parse('2020-07-27') },
{ 'letter_grade' => 4, 'project_id' => 2, 'total' => 2, 'critical' => 1, 'high' => 1, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.parse('2020-07-23') },
{ 'letter_grade' => 4, 'project_id' => 2, 'total' => 3, 'critical' => 2, 'high' => 1, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.parse('2020-07-24') },
{ 'letter_grade' => 4, 'project_id' => 2, 'total' => 2, 'critical' => 1, 'high' => 1, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.parse('2020-07-26') },
{ 'letter_grade' => 4, 'project_id' => 2, 'total' => 1, 'critical' => 1, 'high' => 0, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.parse('2020-07-27') }
]
rows = historical_statistics.order(:project_id, :date).map do |row|
row.attributes.slice(*%w(letter_grade project_id total critical high medium low unknown info date))
end
expect(rows).to match_array(created_rows)
end
end
end
# frozen_string_literal: true
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20200821224343_schedule_populate_vulnerability_historical_statistics.rb')
RSpec.describe SchedulePopulateVulnerabilityHistoricalStatistics do
let(:users) { table(:users) }
let(:namespaces) { table(:namespaces) }
let(:vulnerabilities) { table(:vulnerabilities) }
let(:projects) { table(:projects) }
let(:namespace) { namespaces.create!(name: 'gitlab', path: 'gitlab-org') }
let(:user) { users.create!(name: 'test', email: 'test@example.com', projects_limit: 5) }
def create_project(id, with_vulnerabilities: false)
project_params = {
id: id,
namespace_id: namespace.id,
name: 'foo'
}
project = projects.create!(project_params)
return unless with_vulnerabilities
vulnerabilities.create!(title: 'title', state: 1, severity: 0, confidence: 5, report_type: 2, project_id: project.id, author_id: user.id)
end
it 'correctly schedules background migrations with projects with vulnerabilities only', :aggregate_failures do
create_project(1, with_vulnerabilities: true)
create_project(2, with_vulnerabilities: true)
create_project(5, with_vulnerabilities: false)
create_project(6, with_vulnerabilities: false)
stub_const("#{described_class.name}::BATCH_SIZE", 1)
Sidekiq::Testing.fake! do
Timecop.freeze do
migrate!
expect(described_class::MIGRATION)
.to be_scheduled_delayed_migration(2.minutes, [1])
expect(described_class::MIGRATION)
.to be_scheduled_delayed_migration(4.minutes, [2])
expect(BackgroundMigrationWorker.jobs.size).to eq(2)
end
end
end
context 'for FOSS version' do
before do
allow(Gitlab).to receive(:ee?).and_return(false)
end
it 'does not schedule any jobs' do
create_project(2)
Sidekiq::Testing.fake! do
Timecop.freeze do
migrate!
expect(BackgroundMigrationWorker.jobs.size).to eq(0)
end
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment