Commit 8d0dff57 authored by Alan (Maciej) Paruszewski's avatar Alan (Maciej) Paruszewski Committed by Jarka Košanová

Add post migration for populating Vulnerability Historical Statistics

This change adds post migration to populate Vulnerability Historical
Statistics table with data from last 90 days.
parent ac5ed9ff
---
title: Migrate vulnerability statistics historical data to vulnerability historical
statistics
merge_request: 37554
author:
type: added
# frozen_string_literal: true
class SchedulePopulateVulnerabilityHistoricalStatistics < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
DELAY_INTERVAL = 2.minutes.to_i
BATCH_SIZE = 50
MIGRATION = 'PopulateVulnerabilityHistoricalStatistics'
disable_ddl_transaction!
class Vulnerability < ActiveRecord::Base
self.table_name = 'vulnerabilities'
include ::EachBatch
end
def up
return unless Gitlab.ee?
Vulnerability.select('project_id').distinct.each_batch(of: BATCH_SIZE, column: 'project_id') do |project_batch, index|
migrate_in(index * DELAY_INTERVAL, MIGRATION, [project_batch.pluck(:project_id)])
end
end
def down
# no-op
end
end
f5f7cfe4a8f42a7116ecf50230df2705135bea4e5fd4c75512bb9ca15867919b
\ No newline at end of file
......@@ -6,6 +6,8 @@ module Vulnerabilities
self.table_name = 'vulnerability_historical_statistics'
RETENTION_PERIOD = 90.days
belongs_to :project, optional: false
validates :date, presence: true
......
# frozen_string_literal: true
module EE
module Gitlab
module BackgroundMigration
# This class creates/updates those project historical vulnerability statistics
# that haven't been created nor initialized.
class PopulateVulnerabilityHistoricalStatistics
def perform(project_ids)
project_ids.each do |project_id|
upsert_vulnerability_historical_statistics(project_id)
rescue => e
error_message("Error updating statistics for project #{project_id}: #{e.message}")
end
end
private
RETENTION_PERIOD = 90.days
MAX_DAYS_IN_SINGLE_QUERY = 10
EMPTY_STATISTIC = {
total: 0,
critical: 0,
high: 0,
medium: 0,
low: 0,
unknown: 0,
info: 0
}.freeze
class Vulnerability < ActiveRecord::Base
self.table_name = 'vulnerabilities'
enum severity: { info: 1, unknown: 2, low: 4, medium: 5, high: 6, critical: 7 }
end
class VulnerabilityHistoricalStatistic < ActiveRecord::Base
self.table_name = 'vulnerability_historical_statistics'
enum letter_grade: { a: 0, b: 1, c: 2, d: 3, f: 4 }
end
def upsert_vulnerability_historical_statistics(project_id)
end_date = Date.today
start_date = end_date - RETENTION_PERIOD
time_now = Time.current
counts_by_day_and_severity_in_batches(project_id, start_date, end_date, of: MAX_DAYS_IN_SINGLE_QUERY)
.map { |day, statistics| prepare_historical_statistic_attributes(project_id, day, statistics, time_now) }
.then { |statistics| deduplicate_statistics(statistics) }
.then { |statistics| VulnerabilityHistoricalStatistic.insert_all(statistics) }
end
def counts_by_day_and_severity_in_batches(project_id, start_date, end_date, of:)
(start_date...end_date)
.each_slice(of)
.flat_map { |date_range| counts_by_day_and_severity(project_id, date_range.first, date_range.last) }
.group_by(&:day)
end
def counts_by_day_and_severity(project_id, start_date, end_date)
quoted_start_date = ActiveRecord::Base.connection.quote(start_date)
quoted_end_date = ActiveRecord::Base.connection.quote(end_date)
Vulnerability
.where(project_id: project_id)
.select('DATE(calendar.entry) AS day, severity, COUNT(*)')
.from("generate_series(DATE #{quoted_start_date}, DATE #{quoted_end_date}, INTERVAL '1 day') as calendar(entry)")
.joins('INNER JOIN vulnerabilities ON vulnerabilities.created_at <= calendar.entry')
.where('(vulnerabilities.dismissed_at IS NULL OR vulnerabilities.dismissed_at > calendar.entry) AND (vulnerabilities.resolved_at IS NULL OR vulnerabilities.resolved_at > calendar.entry)')
.group(:day, :severity)
end
def prepare_historical_statistic_attributes(project_id, day, statistics, time_now)
severity_counts = statistics.map { |statistic| { statistic.severity.to_sym => statistic.count } }.inject(:merge)
EMPTY_STATISTIC.merge(
date: day,
total: statistics.sum(&:count),
letter_grade: letter_grade_for(severity_counts),
created_at: time_now,
updated_at: time_now,
project_id: project_id,
**severity_counts
)
end
def deduplicate_statistics(statistics)
statistics
.sort_by { |statistic| statistic[:date] }
.slice_when { |statistic_before, statistic| statistic_before.except(:date) != statistic.except(:date) }
.map(&:first)
end
def letter_grade_for(statistic)
if statistic[:critical].to_i > 0
VulnerabilityHistoricalStatistic.letter_grades[:f]
elsif statistic[:high].to_i > 0 || statistic[:unknown].to_i > 0
VulnerabilityHistoricalStatistic.letter_grades[:d]
elsif statistic[:medium].to_i > 0
VulnerabilityHistoricalStatistic.letter_grades[:c]
elsif statistic[:low].to_i > 0
VulnerabilityHistoricalStatistic.letter_grades[:b]
else
VulnerabilityHistoricalStatistic.letter_grades[:a]
end
end
def logger
@logger ||= ::Gitlab::BackgroundMigration::Logger.build
end
def error_message(message)
logger.error(message: "Vulnerability Historical Statistics Migration: #{message}")
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe EE::Gitlab::BackgroundMigration::PopulateVulnerabilityHistoricalStatistics, schema: 2020_08_21_224343 do
let(:users) { table(:users) }
let(:namespaces) { table(:namespaces) }
let(:vulnerabilities) { table(:vulnerabilities) }
let(:historical_statistics) { table(:vulnerability_historical_statistics) }
let(:projects) { table(:projects) }
let(:namespace) { namespaces.create!(name: 'gitlab', path: 'gitlab-org') }
let(:user) { users.create!(name: 'test', email: 'test@example.com', projects_limit: 5) }
def create_project(id)
project_params = {
id: id,
namespace_id: namespace.id,
name: 'foo'
}
project = projects.create!(project_params)
vulnerability_params = { title: 'title', state: 1, confidence: 5, report_type: 2, project_id: project.id, author_id: user.id }
vulnerabilities.create!(vulnerability_params.merge(created_at: 5.days.ago, dismissed_at: Date.current, severity: 7))
vulnerabilities.create!(vulnerability_params.merge(created_at: 5.days.ago, dismissed_at: 1.day.ago, severity: 6))
vulnerabilities.create!(vulnerability_params.merge(created_at: 4.days.ago, resolved_at: 2.days.ago, severity: 7))
end
around do |example|
Timecop.freeze(Date.parse('2020-07-28')) { example.run }
end
before do
create_project(1)
create_project(2)
end
describe '#perform' do
it 'creates historical statistic rows according to projects', :aggregate_failures do
expect { subject.perform([1, 2]) }.to change(Vulnerabilities::HistoricalStatistic, :count).by(8)
created_rows = [
{ 'letter_grade' => 4, 'project_id' => 1, 'total' => 2, 'critical' => 1, 'high' => 1, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.parse('2020-07-23') },
{ 'letter_grade' => 4, 'project_id' => 1, 'total' => 3, 'critical' => 2, 'high' => 1, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.parse('2020-07-24') },
{ 'letter_grade' => 4, 'project_id' => 1, 'total' => 2, 'critical' => 1, 'high' => 1, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.parse('2020-07-26') },
{ 'letter_grade' => 4, 'project_id' => 1, 'total' => 1, 'critical' => 1, 'high' => 0, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.parse('2020-07-27') },
{ 'letter_grade' => 4, 'project_id' => 2, 'total' => 2, 'critical' => 1, 'high' => 1, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.parse('2020-07-23') },
{ 'letter_grade' => 4, 'project_id' => 2, 'total' => 3, 'critical' => 2, 'high' => 1, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.parse('2020-07-24') },
{ 'letter_grade' => 4, 'project_id' => 2, 'total' => 2, 'critical' => 1, 'high' => 1, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.parse('2020-07-26') },
{ 'letter_grade' => 4, 'project_id' => 2, 'total' => 1, 'critical' => 1, 'high' => 0, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.parse('2020-07-27') }
]
rows = historical_statistics.order(:project_id, :date).map do |row|
row.attributes.slice(*%w(letter_grade project_id total critical high medium low unknown info date))
end
expect(rows).to match_array(created_rows)
end
end
end
# frozen_string_literal: true
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20200821224343_schedule_populate_vulnerability_historical_statistics.rb')
RSpec.describe SchedulePopulateVulnerabilityHistoricalStatistics do
let(:users) { table(:users) }
let(:namespaces) { table(:namespaces) }
let(:vulnerabilities) { table(:vulnerabilities) }
let(:projects) { table(:projects) }
let(:namespace) { namespaces.create!(name: 'gitlab', path: 'gitlab-org') }
let(:user) { users.create!(name: 'test', email: 'test@example.com', projects_limit: 5) }
def create_project(id, with_vulnerabilities: false)
project_params = {
id: id,
namespace_id: namespace.id,
name: 'foo'
}
project = projects.create!(project_params)
return unless with_vulnerabilities
vulnerabilities.create!(title: 'title', state: 1, severity: 0, confidence: 5, report_type: 2, project_id: project.id, author_id: user.id)
end
it 'correctly schedules background migrations with projects with vulnerabilities only', :aggregate_failures do
create_project(1, with_vulnerabilities: true)
create_project(2, with_vulnerabilities: true)
create_project(5, with_vulnerabilities: false)
create_project(6, with_vulnerabilities: false)
stub_const("#{described_class.name}::BATCH_SIZE", 1)
Sidekiq::Testing.fake! do
Timecop.freeze do
migrate!
expect(described_class::MIGRATION)
.to be_scheduled_delayed_migration(2.minutes, [1])
expect(described_class::MIGRATION)
.to be_scheduled_delayed_migration(4.minutes, [2])
expect(BackgroundMigrationWorker.jobs.size).to eq(2)
end
end
end
context 'for FOSS version' do
before do
allow(Gitlab).to receive(:ee?).and_return(false)
end
it 'does not schedule any jobs' do
create_project(2)
Sidekiq::Testing.fake! do
Timecop.freeze do
migrate!
expect(BackgroundMigrationWorker.jobs.size).to eq(0)
end
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment