Commit 30631669 authored by Patrick Bair's avatar Patrick Bair

Merge branch '323139-create-alert-escalations' into 'master'

Add Pending Alert Escalations table, model, services and worker

See merge request gitlab-org/gitlab!64274
parents 8b3a22a6 78ca2362
......@@ -225,6 +225,10 @@ module AlertManagement
open_statuses.include?(status)
end
def open?
self.class.open_status?(status_name)
end
def status_event_for(status)
self.class.state_machines[:status].events.transitions_for(self, to: status.to_s.to_sym).first&.event
end
......
......@@ -37,7 +37,6 @@ module AlertManagement
private
attr_reader :alert, :current_user, :params, :param_errors, :status
delegate :resolved?, to: :alert
def allowed?
current_user&.can?(:update_alert_management_alert, alert)
......@@ -129,7 +128,7 @@ module AlertManagement
def handle_status_change
add_status_change_system_note
resolve_todos if resolved?
resolve_todos if alert.resolved?
end
def add_status_change_system_note
......@@ -177,3 +176,5 @@ module AlertManagement
end
end
end
AlertManagement::Alerts::UpdateService.prepend_mod
......@@ -644,6 +644,9 @@ Gitlab.ee do
Settings.cron_jobs['incident_management_persist_oncall_rotation_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['incident_management_persist_oncall_rotation_worker']['cron'] ||= '*/5 * * * *'
Settings.cron_jobs['incident_management_persist_oncall_rotation_worker']['job_class'] = 'IncidentManagement::OncallRotations::PersistAllRotationsShiftsJob'
Settings.cron_jobs['incident_management_schedule_escalation_check_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['incident_management_schedule_escalation_check_worker']['cron'] ||= '*/1 * * * *'
Settings.cron_jobs['incident_management_schedule_escalation_check_worker']['job_class'] = 'IncidentManagement::PendingEscalations::ScheduleCheckCronWorker'
Settings.cron_jobs['import_software_licenses_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['import_software_licenses_worker']['cron'] ||= '0 3 * * 0'
Settings.cron_jobs['import_software_licenses_worker']['job_class'] = 'ImportSoftwareLicensesWorker'
......
......@@ -6,6 +6,10 @@
Gitlab::Database::Partitioning::PartitionCreator.register(AuditEvent)
Gitlab::Database::Partitioning::PartitionCreator.register(WebHookLog)
if Gitlab.ee?
Gitlab::Database::Partitioning::PartitionCreator.register(IncidentManagement::PendingEscalations::Alert)
end
begin
Gitlab::Database::Partitioning::PartitionCreator.new.create_partitions unless ENV['DISABLE_POSTGRES_PARTITION_CREATION_ON_STARTUP']
rescue ActiveRecord::ActiveRecordError, PG::Error
......
......@@ -184,6 +184,10 @@
- 1
- - incident_management_oncall_rotations_persist_shifts_job
- 1
- - incident_management_pending_escalations_alert_check
- 1
- - incident_management_pending_escalations_alert_create
- 1
- - invalid_gpg_signature_update
- 2
- - irker
......
# frozen_string_literal: true
class CreateIncidentManagementPendingAlertEscalations < ActiveRecord::Migration[6.1]
include Gitlab::Database::MigrationHelpers
def up
with_lock_retries do
execute(<<~SQL)
CREATE TABLE incident_management_pending_alert_escalations (
id bigserial NOT NULL,
rule_id bigint,
alert_id bigint NOT NULL,
schedule_id bigint NOT NULL,
process_at timestamp with time zone NOT NULL,
created_at timestamp with time zone NOT NULL,
updated_at timestamp with time zone NOT NULL,
status smallint NOT NULL,
PRIMARY KEY (id, process_at)
) PARTITION BY RANGE (process_at);
CREATE INDEX index_incident_management_pending_alert_escalations_on_alert_id
ON incident_management_pending_alert_escalations USING btree (alert_id);
CREATE INDEX index_incident_management_pending_alert_escalations_on_rule_id
ON incident_management_pending_alert_escalations USING btree (rule_id);
CREATE INDEX index_incident_management_pending_alert_escalations_on_schedule_id
ON incident_management_pending_alert_escalations USING btree (schedule_id);
ALTER TABLE incident_management_pending_alert_escalations ADD CONSTRAINT fk_rails_fcbfd9338b
FOREIGN KEY (schedule_id) REFERENCES incident_management_oncall_schedules(id) ON DELETE CASCADE;
ALTER TABLE incident_management_pending_alert_escalations ADD CONSTRAINT fk_rails_057c1e3d87
FOREIGN KEY (rule_id) REFERENCES incident_management_escalation_rules(id) ON DELETE SET NULL;
ALTER TABLE incident_management_pending_alert_escalations ADD CONSTRAINT fk_rails_8d8de95da9
FOREIGN KEY (alert_id) REFERENCES alert_management_alerts(id) ON DELETE CASCADE;
SQL
end
end
def down
with_lock_retries do
drop_table :incident_management_pending_alert_escalations
end
end
end
fa4f1ec80e7039e59d283dc6effd6904ca33c637d27c687d990822eb2f6198e5
\ No newline at end of file
......@@ -190,6 +190,18 @@ CREATE TABLE audit_events (
)
PARTITION BY RANGE (created_at);
CREATE TABLE incident_management_pending_alert_escalations (
id bigint NOT NULL,
rule_id bigint,
alert_id bigint NOT NULL,
schedule_id bigint NOT NULL,
process_at timestamp with time zone NOT NULL,
created_at timestamp with time zone NOT NULL,
updated_at timestamp with time zone NOT NULL,
status smallint NOT NULL
)
PARTITION BY RANGE (process_at);
CREATE TABLE web_hook_logs (
id bigint NOT NULL,
web_hook_id integer NOT NULL,
......@@ -13943,6 +13955,15 @@ CREATE SEQUENCE incident_management_oncall_shifts_id_seq
ALTER SEQUENCE incident_management_oncall_shifts_id_seq OWNED BY incident_management_oncall_shifts.id;
CREATE SEQUENCE incident_management_pending_alert_escalations_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
ALTER SEQUENCE incident_management_pending_alert_escalations_id_seq OWNED BY incident_management_pending_alert_escalations.id;
CREATE TABLE index_statuses (
id integer NOT NULL,
project_id integer NOT NULL,
......@@ -20054,6 +20075,8 @@ ALTER TABLE ONLY incident_management_oncall_schedules ALTER COLUMN id SET DEFAUL
ALTER TABLE ONLY incident_management_oncall_shifts ALTER COLUMN id SET DEFAULT nextval('incident_management_oncall_shifts_id_seq'::regclass);
ALTER TABLE ONLY incident_management_pending_alert_escalations ALTER COLUMN id SET DEFAULT nextval('incident_management_pending_alert_escalations_id_seq'::regclass);
ALTER TABLE ONLY index_statuses ALTER COLUMN id SET DEFAULT nextval('index_statuses_id_seq'::regclass);
ALTER TABLE ONLY insights ALTER COLUMN id SET DEFAULT nextval('insights_id_seq'::regclass);
......@@ -21438,6 +21461,9 @@ ALTER TABLE ONLY incident_management_oncall_schedules
ALTER TABLE ONLY incident_management_oncall_shifts
ADD CONSTRAINT incident_management_oncall_shifts_pkey PRIMARY KEY (id);
ALTER TABLE ONLY incident_management_pending_alert_escalations
ADD CONSTRAINT incident_management_pending_alert_escalations_pkey PRIMARY KEY (id, process_at);
ALTER TABLE ONLY index_statuses
ADD CONSTRAINT index_statuses_pkey PRIMARY KEY (id);
......@@ -23652,6 +23678,12 @@ CREATE INDEX index_incident_management_oncall_schedules_on_project_id ON inciden
CREATE INDEX index_incident_management_oncall_shifts_on_participant_id ON incident_management_oncall_shifts USING btree (participant_id);
CREATE INDEX index_incident_management_pending_alert_escalations_on_alert_id ON ONLY incident_management_pending_alert_escalations USING btree (alert_id);
CREATE INDEX index_incident_management_pending_alert_escalations_on_rule_id ON ONLY incident_management_pending_alert_escalations USING btree (rule_id);
CREATE INDEX index_incident_management_pending_alert_escalations_on_schedule ON ONLY incident_management_pending_alert_escalations USING btree (schedule_id);
CREATE UNIQUE INDEX index_index_statuses_on_project_id ON index_statuses USING btree (project_id);
CREATE INDEX index_insights_on_namespace_id ON insights USING btree (namespace_id);
......@@ -26374,6 +26406,9 @@ ALTER TABLE ONLY terraform_state_versions
ALTER TABLE ONLY ci_build_report_results
ADD CONSTRAINT fk_rails_056d298d48 FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE;
ALTER TABLE incident_management_pending_alert_escalations
ADD CONSTRAINT fk_rails_057c1e3d87 FOREIGN KEY (rule_id) REFERENCES incident_management_escalation_rules(id) ON DELETE SET NULL;
ALTER TABLE ONLY ci_daily_build_group_report_results
ADD CONSTRAINT fk_rails_0667f7608c FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE;
......@@ -27217,6 +27252,9 @@ ALTER TABLE ONLY vulnerability_feedback
ALTER TABLE ONLY ci_pipeline_messages
ADD CONSTRAINT fk_rails_8d3b04e3e1 FOREIGN KEY (pipeline_id) REFERENCES ci_pipelines(id) ON DELETE CASCADE;
ALTER TABLE incident_management_pending_alert_escalations
ADD CONSTRAINT fk_rails_8d8de95da9 FOREIGN KEY (alert_id) REFERENCES alert_management_alerts(id) ON DELETE CASCADE;
ALTER TABLE ONLY approval_merge_request_rules_approved_approvers
ADD CONSTRAINT fk_rails_8dc94cff4d FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE;
......@@ -27877,6 +27915,9 @@ ALTER TABLE ONLY ci_job_variables
ALTER TABLE ONLY packages_nuget_metadata
ADD CONSTRAINT fk_rails_fc0c19f5b4 FOREIGN KEY (package_id) REFERENCES packages_packages(id) ON DELETE CASCADE;
ALTER TABLE incident_management_pending_alert_escalations
ADD CONSTRAINT fk_rails_fcbfd9338b FOREIGN KEY (schedule_id) REFERENCES incident_management_oncall_schedules(id) ON DELETE CASCADE;
ALTER TABLE ONLY external_approval_rules
ADD CONSTRAINT fk_rails_fd4f9ac573 FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE;
......@@ -13,9 +13,13 @@ module IncidentManagement
# @option oncall_at [ActiveSupport::TimeWithZone]
# Limits users to only those
# on-call at the specified time.
def initialize(project, oncall_at: Time.current)
# @option schedule [IncidentManagement::OncallSchedule]
# Limits the users to rotations within a
# specific schedule
def initialize(project, oncall_at: Time.current, schedule: nil)
@project = project
@oncall_at = oncall_at
@schedule = schedule
end
# @return [User::ActiveRecord_Relation]
......@@ -28,7 +32,7 @@ module IncidentManagement
private
attr_reader :project, :oncall_at
attr_reader :project, :oncall_at, :schedule
def user_ids
strong_memoize(:user_ids) do
......@@ -44,11 +48,17 @@ module IncidentManagement
ids_for_persisted_shifts.flat_map(&:first)
end
def rotations
strong_memoize(:rotations) do
schedule ? schedule.rotations : project.incident_management_oncall_rotations
end
end
# @return [Array<[rotation_id, user_id]>]
# @example - [ [1, 16], [2, 200] ]
def ids_for_persisted_shifts
strong_memoize(:ids_for_persisted_shifts) do
project.incident_management_oncall_rotations
rotations
.merge(IncidentManagement::OncallShift.for_timestamp(oncall_at))
.pluck_id_and_user_id
end
......@@ -63,7 +73,7 @@ module IncidentManagement
end
def rotations_without_persisted_shifts
project.incident_management_oncall_rotations
rotations
.except_ids(rotation_ids_for_persisted_shifts)
.with_shift_generation_associations
end
......
......@@ -8,6 +8,8 @@ module EE
prepended do
include AfterCommitQueue
has_many :pending_escalations, class_name: 'IncidentManagement::PendingEscalations::Alert', foreign_key: :alert_id, inverse_of: :alert
after_create do |alert|
run_after_commit { alert.trigger_auto_rollback }
end
......
......@@ -13,5 +13,7 @@ module IncidentManagement
validates :rules, presence: true
accepts_nested_attributes_for :rules
scope :with_rules, -> { includes(:rules) }
end
end
......@@ -4,6 +4,8 @@ module IncidentManagement
class EscalationRule < ApplicationRecord
self.table_name = 'incident_management_escalation_rules'
MAX_RULE_PER_POLICY_COUNT = 10
belongs_to :policy, class_name: 'EscalationPolicy', inverse_of: 'rules', foreign_key: 'policy_id'
belongs_to :oncall_schedule, class_name: 'OncallSchedule', inverse_of: 'rotations', foreign_key: 'oncall_schedule_id'
......@@ -16,5 +18,16 @@ module IncidentManagement
numericality: { only_integer: true, greater_than_or_equal_to: 0, less_than_or_equal_to: 24.hours }
validates :policy_id, uniqueness: { scope: [:oncall_schedule_id, :status, :elapsed_time_seconds], message: _('must have a unique schedule, status, and elapsed time') }
validate :rules_count_not_exceeded, on: :create, if: :policy
private
def rules_count_not_exceeded
# We need to add to the count if we aren't creating the rules at the same time as the policy.
rules_count = policy.new_record? ? policy.rules.size : policy.rules.size + 1
errors.add(:base, "cannot have more than #{MAX_RULE_PER_POLICY_COUNT} rules") if rules_count > MAX_RULE_PER_POLICY_COUNT
end
end
end
# frozen_string_literal: true
module IncidentManagement
module PendingEscalations
class Alert < ApplicationRecord
include PartitionedTable
include EachBatch
alias_attribute :target, :alert
self.primary_key = :id
self.table_name = 'incident_management_pending_alert_escalations'
ESCALATION_BUFFER = 1.month.freeze
partitioned_by :process_at, strategy: :monthly
belongs_to :oncall_schedule, class_name: 'OncallSchedule', foreign_key: 'schedule_id'
belongs_to :alert, class_name: 'AlertManagement::Alert', foreign_key: 'alert_id', inverse_of: :pending_escalations
belongs_to :rule, class_name: 'EscalationRule', foreign_key: 'rule_id', optional: true
scope :processable, -> { where(process_at: ESCALATION_BUFFER.ago..Time.current) }
enum status: AlertManagement::Alert::STATUSES.slice(:acknowledged, :resolved)
validates :process_at, presence: true
validates :status, presence: true
validates :rule_id, presence: true, uniqueness: { scope: [:alert_id] }
delegate :project, to: :alert
end
end
end
......@@ -11,7 +11,16 @@ module EE
def complete_post_processing_tasks
super
notify_oncall if oncall_notification_recipients.present? && notifying_alert?
notify_oncall if !escalation_policies_available? && oncall_notification_recipients.present? && notifying_alert?
process_escalations
end
def process_escalations
if alert.resolved? || alert.ignored?
delete_pending_escalations
elsif alert.previously_new_record?
create_pending_escalations
end
end
def notify_oncall
......@@ -25,6 +34,19 @@ module EE
::IncidentManagement::OncallUsersFinder.new(project).execute
end
end
def escalation_policies_available?
::Gitlab::IncidentManagement.escalation_policies_available?(project)
end
def delete_pending_escalations
# We use :delete_all here to avoid null constraint errors. (the default is :nullify).
alert.pending_escalations.delete_all(:delete_all)
end
def create_pending_escalations
::IncidentManagement::PendingEscalations::AlertCreateWorker.perform_async(alert.id)
end
end
end
end
# frozen_string_literal: true
module EE
module AlertManagement
module Alerts
module UpdateService
extend ::Gitlab::Utils::Override
override :handle_status_change
def handle_status_change
super
delete_pending_escalations if alert.resolved? || alert.ignored?
old_status = alert.status_previously_was
if !::AlertManagement::Alert.open_status?(old_status) && alert.open?
create_pending_escalations
end
end
private
def delete_pending_escalations
alert.pending_escalations.delete_all(:delete_all)
end
def create_pending_escalations
::IncidentManagement::PendingEscalations::AlertCreateWorker.perform_async(alert.id)
end
end
end
end
end
# frozen_string_literal: true
module IncidentManagement
module PendingEscalations
class CreateService < BaseService
def initialize(target)
@target = target
@project = target.project
@process_time = Time.current
end
def execute
return unless ::Gitlab::IncidentManagement.escalation_policies_available?(project) && !target.resolved?
policy = escalation_policies.first
return unless policy
create_escalations(policy.rules)
end
private
attr_reader :target, :project, :escalation, :process_time
def escalation_policies
project.incident_management_escalation_policies.with_rules
end
def create_escalations(rules)
escalation_ids = rules.map do |rule|
escalaton = create_escalation(rule)
escalaton.id
end
process_escalations(escalation_ids)
end
def create_escalation(rule)
IncidentManagement::PendingEscalations::Alert.create!(
target: target,
rule: rule,
schedule_id: rule.oncall_schedule_id,
status: rule.status,
process_at: rule.elapsed_time_seconds.seconds.after(process_time)
)
end
def process_escalations(escalation_ids)
args = escalation_ids.map { |id| [id] }
::IncidentManagement::PendingEscalations::AlertCheckWorker.bulk_perform_async(args) # rubocop:disable Scalability/BulkPerformWithContext
end
end
end
end
# frozen_string_literal: true
module IncidentManagement
module PendingEscalations
class ProcessService < BaseService
def initialize(escalation)
@escalation = escalation
@project = escalation.project
@oncall_schedule = escalation.oncall_schedule
@target = escalation.target
end
def execute
return unless ::Gitlab::IncidentManagement.escalation_policies_available?(project)
return if too_early_to_process?
return if target_already_resolved?
return if target_status_exceeded_rule?
notify_recipients
destroy_escalation!
end
private
attr_reader :escalation, :project, :target, :oncall_schedule
def target_already_resolved?
return false unless target.resolved?
destroy_escalation!
end
def target_status_exceeded_rule?
target.status >= escalation.status_before_type_cast
end
def too_early_to_process?
Time.current < escalation.process_at
end
def notify_recipients
NotificationService
.new
.async
.notify_oncall_users_of_alert(oncall_notification_recipients.to_a, target)
end
def oncall_notification_recipients
::IncidentManagement::OncallUsersFinder.new(project, schedule: oncall_schedule).execute
end
def destroy_escalation!
escalation.destroy!
end
end
end
end
......@@ -314,6 +314,15 @@
:idempotent: true
:tags:
- :exclude_from_kubernetes
- :name: cronjob:incident_management_pending_escalations_schedule_check_cron
:worker_name: IncidentManagement::PendingEscalations::ScheduleCheckCronWorker
:feature_category: :incident_management
:has_external_dependencies:
:urgency: :low
:resource_boundary: :unknown
:weight: 1
:idempotent: true
:tags: []
- :name: cronjob:iterations_cadences_create_iterations
:worker_name: Iterations::Cadences::CreateIterationsWorker
:feature_category: :issue_tracking
......@@ -1063,6 +1072,24 @@
:idempotent: true
:tags:
- :exclude_from_kubernetes
- :name: incident_management_pending_escalations_alert_check
:worker_name: IncidentManagement::PendingEscalations::AlertCheckWorker
:feature_category: :incident_management
:has_external_dependencies:
:urgency: :high
:resource_boundary: :unknown
:weight: 1
:idempotent: true
:tags: []
- :name: incident_management_pending_escalations_alert_create
:worker_name: IncidentManagement::PendingEscalations::AlertCreateWorker
:feature_category: :incident_management
:has_external_dependencies:
:urgency: :high
:resource_boundary: :unknown
:weight: 1
:idempotent: true
:tags: []
- :name: ldap_group_sync
:worker_name: LdapGroupSyncWorker
:feature_category: :authentication_and_authorization
......
# frozen_string_literal: true
module IncidentManagement
module PendingEscalations
class AlertCheckWorker
include ApplicationWorker
urgency :high
idempotent!
feature_category :incident_management
def perform(escalation_id)
escalation = IncidentManagement::PendingEscalations::Alert.find(escalation_id)
IncidentManagement::PendingEscalations::ProcessService.new(escalation).execute
end
end
end
end
# frozen_string_literal: true
module IncidentManagement
module PendingEscalations
class AlertCreateWorker
include ApplicationWorker
urgency :high
idempotent!
feature_category :incident_management
def perform(alert_id)
alert = ::AlertManagement::Alert.find(alert_id)
::IncidentManagement::PendingEscalations::CreateService.new(alert).execute
end
end
end
end
# frozen_string_literal: true
module IncidentManagement
module PendingEscalations
class ScheduleCheckCronWorker
include ApplicationWorker
# This worker does not perform work scoped to a context
include CronjobQueue # rubocop:disable Scalability/CronWorkerContext
idempotent!
feature_category :incident_management
def perform
::IncidentManagement::PendingEscalations::Alert.processable.each_batch do |relation|
args = relation.pluck(:id).map { |id| [id] } # rubocop:disable CodeReuse/ActiveRecord
::IncidentManagement::PendingEscalations::AlertCheckWorker.bulk_perform_async(args) # rubocop:disable Scalability/BulkPerformWithContext
end
end
end
end
end
......@@ -10,5 +10,17 @@ FactoryBot.define do
trait :utc do
timezone { 'Etc/UTC' }
end
trait :with_rotation do
transient do
rotation_count { 1 }
end
after(:create) do |schedule, evaluator|
evaluator.rotation_count.times do
schedule.rotations << create(:incident_management_oncall_rotation, :with_participants, schedule: schedule)
end
end
end
end
end
# frozen_string_literal: true
FactoryBot.define do
factory :incident_management_pending_alert_escalation, class: 'IncidentManagement::PendingEscalations::Alert' do
transient do
project { create(:project) } # rubocop:disable FactoryBot/InlineAssociation
policy { create(:incident_management_escalation_policy, project: project) } # rubocop:disable FactoryBot/InlineAssociation
end
rule { association :incident_management_escalation_rule, policy: policy }
oncall_schedule { association :incident_management_oncall_schedule, project: project }
alert { association :alert_management_alert, project: project }
status { IncidentManagement::EscalationRule.statuses[:acknowledged] }
process_at { 5.minutes.from_now }
end
end
......@@ -54,9 +54,10 @@ RSpec.describe IncidentManagement::OncallUsersFinder do
let_it_be(:proj2_s1_r1_shift2) { create(:incident_management_oncall_shift, participant: proj2_s1_r1_p2, starts_at: proj2_s1_r1_shift1.ends_at) }
let(:oncall_at) { Time.current }
let(:schedule) { nil }
describe '#execute' do
subject(:execute) { described_class.new(project, oncall_at: oncall_at).execute }
subject(:execute) { described_class.new(project, oncall_at: oncall_at, schedule: schedule).execute }
context 'when feature is available' do
before do
......@@ -69,6 +70,12 @@ RSpec.describe IncidentManagement::OncallUsersFinder do
it { is_expected.to contain_exactly(user_1, user_2, user_4) }
end
context 'with :schedule paramater specified' do
let(:schedule) { s1 }
it { is_expected.to contain_exactly(user_1, user_2) }
end
context 'with :oncall_at parameter specified' do
let(:during_first_shift) { Time.current }
let(:during_second_shift) { s1_r2_shift2.starts_at + 5.minutes }
......
......@@ -2,10 +2,14 @@
require 'spec_helper'
RSpec.describe EE::AlertManagement::Alert do
RSpec.describe AlertManagement::Alert do
let_it_be(:project, refind: true) { create(:project) }
let_it_be(:environment, refind: true) { create(:environment, project: project) }
describe 'associations' do
it { is_expected.to have_many(:pending_escalations).class_name('IncidentManagement::PendingEscalations::Alert') }
end
describe 'after_create' do
it 'attempts to trigger auto rollback' do
alert = build(:alert_management_alert, :triggered, :critical)
......
......@@ -20,5 +20,13 @@ RSpec.describe IncidentManagement::EscalationRule do
it { is_expected.to validate_presence_of(:elapsed_time_seconds) }
it { is_expected.to validate_numericality_of(:elapsed_time_seconds).is_greater_than_or_equal_to(0).is_less_than_or_equal_to(24.hours) }
it { is_expected.to validate_uniqueness_of(:policy_id).scoped_to([:oncall_schedule_id, :status, :elapsed_time_seconds] ).with_message('must have a unique schedule, status, and elapsed time') }
it 'validates the number of rules' do
policy = create(:incident_management_escalation_policy, rule_count: 10)
rule = build(:incident_management_escalation_rule, policy: policy)
expect(rule).not_to be_valid
expect(rule.errors).to contain_exactly("cannot have more than #{described_class::MAX_RULE_PER_POLICY_COUNT} rules")
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe IncidentManagement::PendingEscalations::Alert do
subject { create(:incident_management_pending_alert_escalation) }
it { is_expected.to be_valid }
describe 'validations' do
it { is_expected.to validate_presence_of(:process_at) }
it { is_expected.to validate_presence_of(:status) }
it { is_expected.to delegate_method(:project).to(:alert) }
it { is_expected.to validate_uniqueness_of(:rule_id).scoped_to([:alert_id]) }
end
describe 'associations' do
it { is_expected.to belong_to(:oncall_schedule) }
it { is_expected.to belong_to(:alert) }
it { is_expected.to belong_to(:rule) }
end
describe 'scopes' do
describe '.processable' do
subject { described_class.processable }
let_it_be(:policy) { create(:incident_management_escalation_policy) }
let_it_be(:rule) { policy.rules.first }
let_it_be(:two_months_ago_escalation) { create(:incident_management_pending_alert_escalation, rule: rule, process_at: 2.months.ago) }
let_it_be(:three_weeks_ago_escalation) { create(:incident_management_pending_alert_escalation, rule: rule, process_at: 3.weeks.ago) }
let_it_be(:three_days_ago_escalation) { create(:incident_management_pending_alert_escalation, rule: rule, process_at: 3.days.ago) }
let_it_be(:future_escalation) { create(:incident_management_pending_alert_escalation, rule: rule, process_at: 5.minutes.from_now) }
it { is_expected.to eq [three_weeks_ago_escalation, three_days_ago_escalation] }
end
end
end
......@@ -13,7 +13,8 @@ RSpec.describe AlertManagement::ProcessPrometheusAlertService do
context 'when alert payload is valid' do
let_it_be(:starts_at) { '2020-04-27T10:10:22.265949279Z' }
let_it_be(:title) { 'Alert title' }
let_it_be(:gitlab_fingerprint) { Digest::SHA1.hexdigest([starts_at, title, 'vector(1)'].join('/')) }
let_it_be(:plain_fingerprint) { [starts_at, title, 'vector(1)'].join('/') }
let_it_be(:gitlab_fingerprint) { Digest::SHA1.hexdigest(plain_fingerprint) }
let(:payload) { raw_payload }
let(:raw_payload) do
......@@ -45,6 +46,35 @@ RSpec.describe AlertManagement::ProcessPrometheusAlertService do
include_examples 'oncall users are correctly notified of recovery alert'
end
context 'with escalation policies ready' do
let_it_be(:project) { schedule.project }
let_it_be(:policy) { create(:incident_management_escalation_policy, project: project) }
before do
stub_licensed_features(oncall_schedules: true, escalation_policies: true)
stub_feature_flags(escalation_policies_mvc: project)
end
it_behaves_like 'does not send on-call notification'
include_examples 'creates an escalation', 1
context 'existing alert is now resolved' do
let(:payload) { raw_payload.merge('status' => 'resolved') }
let!(:target) { create(:alert_management_alert, :from_payload, project: project, payload: payload, fingerprint: gitlab_fingerprint) }
let!(:pending_escalation) { create(:incident_management_pending_alert_escalation, alert: target) }
include_examples "deletes the target's escalations"
context 'with escalation policy feature disabled' do
before do
stub_feature_flags(escalation_policies_mvc: false)
end
include_examples "deletes the target's escalations"
end
end
end
end
end
end
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe AlertManagement::Alerts::UpdateService do
let_it_be(:user_with_permissions) { create(:user) }
let_it_be(:project) { create(:project) }
let_it_be(:escalation_policy) { create(:incident_management_escalation_policy, project: project) }
let_it_be(:alert, reload: true) { create(:alert_management_alert, :triggered, project: project) }
let(:current_user) { user_with_permissions }
let(:params) { {} }
let(:service) { described_class.new(alert, current_user, params) }
before do
stub_licensed_features(oncall_schedules: true, escalation_policies: true)
stub_feature_flags(escalation_policies_mvc: project)
end
before_all do
project.add_developer(user_with_permissions)
end
describe '#execute' do
context 'when a status is included' do
let(:params) { { status: new_status } }
subject(:execute) { service.execute }
context 'when moving from a closed status to an open status' do
let_it_be(:alert, reload: true) { create(:alert_management_alert, :resolved, project: project) }
let(:new_status) { :triggered }
it_behaves_like 'creates an escalation'
end
context 'moving from an open status to closed status' do
let_it_be(:alert) { create(:alert_management_alert, :triggered, project: project) }
let_it_be(:escalation) { create(:incident_management_pending_alert_escalation, alert: alert) }
let(:new_status) { :resolved }
let(:target) { alert }
include_examples "deletes the target's escalations"
context 'with escalation policy feature disabled' do
before do
stub_feature_flags(escalation_policies_mvc: false)
end
include_examples "deletes the target's escalations"
end
end
context 'moving from a status of the same group' do
let(:new_status) { :ignored }
it 'does not create or delete escalations' do
expect { execute }.to change { IncidentManagement::PendingEscalations::Alert.count }.by(0)
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe IncidentManagement::PendingEscalations::CreateService do
let_it_be(:project) { create(:project) }
let_it_be(:target) { create(:alert_management_alert, project: project) }
let_it_be(:rule_count) { 2 }
let!(:escalation_policy) { create(:incident_management_escalation_policy, project: project, rule_count: rule_count) }
let(:rules) { escalation_policy.rules }
let(:service) { described_class.new(target) }
subject(:execute) { service.execute }
context 'feature not available' do
it 'does nothing' do
expect { execute }.not_to change { IncidentManagement::PendingEscalations::Alert.count }
end
end
context 'feature available' do
before do
stub_licensed_features(oncall_schedules: true, escalation_policies: true)
stub_feature_flags(escalation_policies_mvc: project)
end
context 'target is resolved' do
let(:target) { create(:alert_management_alert, :resolved, project: project) }
it 'does nothing' do
expect { execute }.not_to change { IncidentManagement::PendingEscalations::Alert.count }
end
end
it 'creates an escalation for each rule for the policy' do
execution_time = Time.current
expect { execute }.to change { IncidentManagement::PendingEscalations::Alert.count }.by(rule_count)
first_escalation, second_escalation = target.pending_escalations.order(created_at: :asc)
first_rule, second_rule = rules
expect_escalation_attributes_with(escalation: first_escalation, target: target, rule: first_rule, execution_time: execution_time)
expect_escalation_attributes_with(escalation: second_escalation, target: target, rule: second_rule, execution_time: execution_time)
end
context 'when there is no escalation policy for the project' do
let!(:escalation_policy) { nil }
it 'does nothing' do
expect { execute }.not_to change { IncidentManagement::PendingEscalations::Alert.count }
end
end
it 'creates the escalations and queues the escalation process check' do
expect(IncidentManagement::PendingEscalations::AlertCheckWorker)
.to receive(:bulk_perform_async)
.with([[a_kind_of(Integer)], [a_kind_of(Integer)]])
expect { execute }.to change { IncidentManagement::PendingEscalations::Alert.count }.by(rule_count)
end
def expect_escalation_attributes_with(escalation:, target:, rule:, execution_time: Time.current)
expect(escalation).to have_attributes(
rule_id: rule.id,
alert_id: target.id,
schedule_id: rule.oncall_schedule_id,
status: rule.status,
process_at: be_within(1.minute).of(rule.elapsed_time_seconds.seconds.after(execution_time))
)
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe IncidentManagement::PendingEscalations::ProcessService do
let_it_be(:project) { create(:project) }
let_it_be(:schedule_1) { create(:incident_management_oncall_schedule, :with_rotation, project: project) }
let_it_be(:schedule_1_users) { schedule_1.participants.map(&:user) }
let(:escalation_rule) { build(:incident_management_escalation_rule, oncall_schedule: schedule_1 ) }
let!(:escalation_policy) { create(:incident_management_escalation_policy, project: project, rules: [escalation_rule]) }
let(:alert) { create(:alert_management_alert, project: project, **alert_params) }
let(:alert_params) { { status: AlertManagement::Alert::STATUSES[:triggered] } }
let(:target) { alert }
let(:process_at) { 5.minutes.ago }
let(:escalation) { create(:incident_management_pending_alert_escalation, rule: escalation_rule, oncall_schedule: schedule_1, target: target, status: IncidentManagement::EscalationRule.statuses[:acknowledged], process_at: process_at) }
let(:service) { described_class.new(escalation) }
before do
stub_licensed_features(oncall_schedules: true, escalation_policies: true)
stub_feature_flags(escalation_policies_mvc: project)
end
describe '#execute' do
subject(:execute) { service.execute }
shared_examples 'it does not escalate' do
it_behaves_like 'does not send on-call notification'
it 'does not delete the escalation' do
subject
expect { escalation.reload }.not_to raise_error(ActiveRecord::RecordNotFound)
end
end
shared_examples 'deletes the escalation' do
specify do
subject
expect { escalation.reload }.to raise_error(ActiveRecord::RecordNotFound)
end
end
context 'all conditions are met' do
let(:users) { schedule_1_users }
it_behaves_like 'sends on-call notification'
it_behaves_like 'deletes the escalation'
context 'feature flag is off' do
before do
stub_feature_flags(escalation_policies_mvc: false)
end
it_behaves_like 'it does not escalate'
end
end
context 'target is already resolved' do
let(:target) { create(:alert_management_alert, :resolved, project: project) }
it_behaves_like 'does not send on-call notification'
it_behaves_like 'deletes the escalation'
end
context 'target status is not above threshold' do
let(:target) { create(:alert_management_alert, :acknowledged, project: project) }
it_behaves_like 'it does not escalate'
end
context 'escalation is not ready to be processed' do
let(:process_at) { 5.minutes.from_now }
it_behaves_like 'it does not escalate'
end
end
end
......@@ -16,6 +16,10 @@ RSpec.describe Projects::Alerting::NotifyService do
}
end
before do
stub_feature_flags(escalation_policies_mvc: false)
end
subject { service.execute(token, integration) }
context 'existing alert with same payload fingerprint' do
......@@ -91,6 +95,52 @@ RSpec.describe Projects::Alerting::NotifyService do
include_examples 'oncall users are correctly notified of recovery alert'
end
context 'with escalation policies ready' do
let_it_be(:policy) { create(:incident_management_escalation_policy, project: project) }
before do
stub_licensed_features(oncall_schedules: project, escalation_policies: true)
stub_feature_flags(escalation_policies_mvc: project)
end
it_behaves_like 'does not send on-call notification'
include_examples 'creates an escalation'
context 'existing alert with same payload fingerprint' do
let_it_be(:alert) { create(:alert_management_alert, fingerprint: gitlab_fingerprint, project: project) }
let_it_be(:pending_escalation) { create(:incident_management_pending_alert_escalation, alert: alert) }
it 'does not create an escalation' do
expect { subject }.not_to change { alert.pending_escalations.count }
end
context 'with resolving payload' do
let(:payload) do
{
'fingerprint' => fingerprint,
'end_time' => Time.current.iso8601
}
end
context 'with existing alert escalation' do
let_it_be(:pending_escalation) { create(:incident_management_pending_alert_escalation, alert: alert) }
let(:target) { alert }
include_examples "deletes the target's escalations"
context 'with escalation policy feature disabled' do
before do
stub_feature_flags(escalation_policies_mvc: false)
end
include_examples "deletes the target's escalations"
end
end
end
end
end
end
end
end
# frozen_string_literal: true
RSpec.shared_examples 'creates an escalation' do
specify do
expect(IncidentManagement::PendingEscalations::AlertCreateWorker)
.to receive(:perform_async)
.with(a_kind_of(Integer))
subject
end
end
RSpec.shared_examples "deletes the target's escalations" do
specify do
before_count = target.pending_escalations.count
expect(before_count).to be > 0
expect { subject }.to change { target.pending_escalations.reload.count }.from(before_count).to(0)
end
end
......@@ -5,16 +5,17 @@
# - `gitlab_fingerprint`, SHA which is used to uniquely identify the alert
RSpec.shared_examples 'sends on-call notification if enabled' do
context 'with on-call schedules enabled' do
let(:notification_async) { double(NotificationService::Async) }
let(:alert) { having_attributes(class: AlertManagement::Alert, fingerprint: gitlab_fingerprint) }
it 'sends on-call notification' do
allow(NotificationService).to receive_message_chain(:new, :async).and_return(notification_async)
expect(notification_async).to receive(:notify_oncall_users_of_alert).with(
users,
having_attributes(class: AlertManagement::Alert, fingerprint: gitlab_fingerprint)
)
it_behaves_like 'sends on-call notification'
subject
context 'escalation policy features are disabled' do
before do
stub_licensed_features(oncall_schedules: true, escalation_policies: false)
stub_feature_flags(escalation_policies_mvc: false)
end
it_behaves_like 'sends on-call notification'
end
end
......@@ -27,6 +28,20 @@ RSpec.shared_examples 'sends on-call notification if enabled' do
end
end
RSpec.shared_examples 'sends on-call notification' do
let(:notification_async) { double(NotificationService::Async) }
specify do
allow(NotificationService).to receive_message_chain(:new, :async).and_return(notification_async)
expect(notification_async).to receive(:notify_oncall_users_of_alert).with(
users,
alert
)
subject
end
end
RSpec.shared_examples 'does not send on-call notification' do
specify do
expect(NotificationService).not_to receive(:new)
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe IncidentManagement::PendingEscalations::AlertCheckWorker do
let(:worker) { described_class.new }
let_it_be(:escalation) { create(:incident_management_pending_alert_escalation) }
describe '#perform' do
subject { worker.perform(escalation.id) }
it 'processes the escalation' do
process_service = spy(IncidentManagement::PendingEscalations::ProcessService)
expect(IncidentManagement::PendingEscalations::ProcessService).to receive(:new).with(escalation).and_return(process_service)
subject
expect(process_service).to have_received(:execute)
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe IncidentManagement::PendingEscalations::ScheduleCheckCronWorker do
let(:worker) { described_class.new }
let_it_be(:escalation_1) { create(:incident_management_pending_alert_escalation, process_at: 5.minutes.ago) }
let_it_be(:escalation_2) { create(:incident_management_pending_alert_escalation, process_at: 2.days.ago) }
let_it_be(:escalation_not_ready_to_process) { create(:incident_management_pending_alert_escalation) }
describe '#perform' do
subject { worker.perform }
it 'schedules a job for each processable escalation' do
expect(IncidentManagement::PendingEscalations::AlertCheckWorker).to receive(:bulk_perform_async)
.with(array_including([escalation_2.id], [escalation_1.id]))
subject
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment