Commit b33aaccf authored by Hordur Freyr Yngvason's avatar Hordur Freyr Yngvason Committed by Gabriel Mazetto

Port prometheus health check to cluster integration

parent ac6d25e8
...@@ -150,9 +150,7 @@ module Clusters ...@@ -150,9 +150,7 @@ module Clusters
scope :for_project_namespace, -> (namespace_id) { joins(:projects).where(projects: { namespace_id: namespace_id }) } scope :for_project_namespace, -> (namespace_id) { joins(:projects).where(projects: { namespace_id: namespace_id }) }
scope :with_name, -> (name) { where(name: name) } scope :with_name, -> (name) { where(name: name) }
# with_application_prometheus scope is deprecated, and scheduled for removal scope :with_integration_prometheus, -> { includes(:integration_prometheus).joins(:integration_prometheus) }
# in %14.0. See https://gitlab.com/groups/gitlab-org/-/epics/4280
scope :with_application_prometheus, -> { includes(:application_prometheus).joins(:application_prometheus) }
scope :with_project_http_integrations, -> (project_ids) do scope :with_project_http_integrations, -> (project_ids) do
conditions = { projects: :alert_management_http_integrations } conditions = { projects: :alert_management_http_integrations }
includes(conditions).joins(conditions).where(projects: { id: project_ids }) includes(conditions).joins(conditions).where(projects: { id: project_ids })
......
...@@ -14,6 +14,13 @@ module Clusters ...@@ -14,6 +14,13 @@ module Clusters
validates :cluster, presence: true validates :cluster, presence: true
validates :enabled, inclusion: { in: [true, false] } validates :enabled, inclusion: { in: [true, false] }
# Periodically checked and kept up to date for Monitor demo projects
enum health_status: {
unknown: 0,
healthy: 1,
unhealthy: 2
}
attr_encrypted :alert_manager_token, attr_encrypted :alert_manager_token,
mode: :per_attribute_iv, mode: :per_attribute_iv,
key: Settings.attr_encrypted_db_key_base_32, key: Settings.attr_encrypted_db_key_base_32,
......
# frozen_string_literal: true # frozen_string_literal: true
module Clusters module Clusters
module Applications module Integrations
class PrometheusHealthCheckService class PrometheusHealthCheckService
include Gitlab::Utils::StrongMemoize include Gitlab::Utils::StrongMemoize
include Gitlab::Routing include Gitlab::Routing
...@@ -14,7 +14,7 @@ module Clusters ...@@ -14,7 +14,7 @@ module Clusters
def execute def execute
raise 'Invalid cluster type. Only project types are allowed.' unless @cluster.project_type? raise 'Invalid cluster type. Only project types are allowed.' unless @cluster.project_type?
return unless prometheus_application.installed? return unless prometheus_integration.enabled
project = @cluster.clusterable project = @cluster.clusterable
...@@ -28,32 +28,46 @@ module Clusters ...@@ -28,32 +28,46 @@ module Clusters
send_notification(project) if became_unhealthy? send_notification(project) if became_unhealthy?
prometheus_application.update_columns(healthy: currently_healthy?) if health_changed? prometheus_integration.update_columns(health_status: current_health_status) if health_changed?
end end
private private
def prometheus_application def prometheus_integration
strong_memoize(:prometheus_application) do strong_memoize(:prometheus_integration) do
@cluster.application_prometheus @cluster.integration_prometheus
end
end
def current_health_status
if currently_healthy?
:healthy
else
:unhealthy
end end
end end
def currently_healthy? def currently_healthy?
strong_memoize(:currently_healthy) do strong_memoize(:currently_healthy) do
prometheus_application.prometheus_client.healthy? prometheus_integration.prometheus_client.healthy?
end end
end end
def became_unhealthy? def became_unhealthy?
strong_memoize(:became_unhealthy) do strong_memoize(:became_unhealthy) do
(was_healthy? || was_healthy?.nil?) && !currently_healthy? (was_healthy? || was_unknown?) && !currently_healthy?
end end
end end
def was_healthy? def was_healthy?
strong_memoize(:was_healthy) do strong_memoize(:was_healthy) do
prometheus_application.healthy prometheus_integration.healthy?
end
end
def was_unknown?
strong_memoize(:was_unknown) do
prometheus_integration.unknown?
end end
end end
......
...@@ -1069,8 +1069,8 @@ ...@@ -1069,8 +1069,8 @@
:idempotent: :idempotent:
:tags: :tags:
- :needs_own_queue - :needs_own_queue
- :name: incident_management:clusters_applications_check_prometheus_health - :name: incident_management:clusters_integrations_check_prometheus_health
:worker_name: Clusters::Applications::CheckPrometheusHealthWorker :worker_name: Clusters::Integrations::CheckPrometheusHealthWorker
:feature_category: :incident_management :feature_category: :incident_management
:has_external_dependencies: true :has_external_dependencies: true
:urgency: :low :urgency: :low
......
# frozen_string_literal: true # frozen_string_literal: true
module Clusters module Clusters
module Applications module Integrations
class CheckPrometheusHealthWorker class CheckPrometheusHealthWorker
include ApplicationWorker include ApplicationWorker
...@@ -22,11 +22,11 @@ module Clusters ...@@ -22,11 +22,11 @@ module Clusters
def perform def perform
demo_project_ids = Gitlab::Monitor::DemoProjects.primary_keys demo_project_ids = Gitlab::Monitor::DemoProjects.primary_keys
clusters = Clusters::Cluster.with_application_prometheus clusters = Clusters::Cluster.with_integration_prometheus
.with_project_http_integrations(demo_project_ids) .with_project_http_integrations(demo_project_ids)
# Move to a seperate worker with scoped context if expanded to do work on customer projects # Move to a seperate worker with scoped context if expanded to do work on customer projects
clusters.each { |cluster| Clusters::Applications::PrometheusHealthCheckService.new(cluster).execute } clusters.each { |cluster| Clusters::Integrations::PrometheusHealthCheckService.new(cluster).execute }
end end
end end
end end
......
# frozen_string_literal: true
class AddHealthStatusColumnOnClustersIntegrationPrometheus < Gitlab::Database::Migration[1.0]
def change
# For now, health checks will only run on monitor demo projects
add_column :clusters_integration_prometheus, :health_status, :smallint, limit: 2, default: 0, null: false
end
end
97efc3bb2039b66dac98135d93baefc780a62571bd80aa39d7458f37ce92905b
\ No newline at end of file
...@@ -12620,7 +12620,8 @@ CREATE TABLE clusters_integration_prometheus ( ...@@ -12620,7 +12620,8 @@ CREATE TABLE clusters_integration_prometheus (
cluster_id bigint NOT NULL, cluster_id bigint NOT NULL,
enabled boolean DEFAULT false NOT NULL, enabled boolean DEFAULT false NOT NULL,
encrypted_alert_manager_token text, encrypted_alert_manager_token text,
encrypted_alert_manager_token_iv text encrypted_alert_manager_token_iv text,
health_status smallint DEFAULT 0 NOT NULL
); );
CREATE TABLE clusters_kubernetes_namespaces ( CREATE TABLE clusters_kubernetes_namespaces (
...@@ -178,13 +178,13 @@ RSpec.describe Clusters::Cluster, :use_clean_rails_memory_store_caching do ...@@ -178,13 +178,13 @@ RSpec.describe Clusters::Cluster, :use_clean_rails_memory_store_caching do
end end
end end
describe '.with_application_prometheus' do describe '.with_integration_prometheus' do
subject { described_class.with_application_prometheus } subject { described_class.with_integration_prometheus }
let!(:cluster) { create(:cluster) } let!(:cluster) { create(:cluster) }
context 'cluster has prometheus application' do context 'cluster has prometheus application' do
let!(:application) { create(:clusters_applications_prometheus, :installed, cluster: cluster) } let!(:application) { create(:clusters_integrations_prometheus, cluster: cluster) }
it { is_expected.to include(cluster) } it { is_expected.to include(cluster) }
end end
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Clusters::Applications::PrometheusHealthCheckService, '#execute' do RSpec.describe Clusters::Integrations::PrometheusHealthCheckService, '#execute' do
let(:service) { described_class.new(cluster) } let(:service) { described_class.new(cluster) }
subject { service.execute } subject { service.execute }
...@@ -26,10 +26,10 @@ RSpec.describe Clusters::Applications::PrometheusHealthCheckService, '#execute' ...@@ -26,10 +26,10 @@ RSpec.describe Clusters::Applications::PrometheusHealthCheckService, '#execute'
end end
RSpec.shared_examples 'correct health stored' do RSpec.shared_examples 'correct health stored' do
it 'stores the correct health of prometheus app' do it 'stores the correct health of prometheus' do
subject subject
expect(prometheus.healthy).to eq(client_healthy) expect(prometheus.healthy?).to eq(client_healthy)
end end
end end
...@@ -43,19 +43,19 @@ RSpec.describe Clusters::Applications::PrometheusHealthCheckService, '#execute' ...@@ -43,19 +43,19 @@ RSpec.describe Clusters::Applications::PrometheusHealthCheckService, '#execute'
let_it_be(:project) { create(:project) } let_it_be(:project) { create(:project) }
let_it_be(:integration) { create(:alert_management_http_integration, project: project) } let_it_be(:integration) { create(:alert_management_http_integration, project: project) }
let(:applications_prometheus_healthy) { true } let(:previous_health_status) { :healthy }
let(:prometheus) { create(:clusters_applications_prometheus, status: prometheus_status_value, healthy: applications_prometheus_healthy) } let(:prometheus) { create(:clusters_integrations_prometheus, enabled: prometheus_enabled, health_status: previous_health_status) }
let(:cluster) { create(:cluster, :project, application_prometheus: prometheus, projects: [project]) } let(:cluster) { create(:cluster, :project, integration_prometheus: prometheus, projects: [project]) }
context 'when prometheus not installed' do context 'when prometheus not enabled' do
let(:prometheus_status_value) { Clusters::Applications::Prometheus.state_machine.states[:installing].value } let(:prometheus_enabled) { false }
it { expect(subject).to eq(nil) } it { expect(subject).to eq(nil) }
include_examples 'no alert' include_examples 'no alert'
end end
context 'when prometheus installed' do context 'when prometheus enabled' do
let(:prometheus_status_value) { Clusters::Applications::Prometheus.state_machine.states[:installed].value } let(:prometheus_enabled) { true }
before do before do
client = instance_double('PrometheusClient', healthy?: client_healthy) client = instance_double('PrometheusClient', healthy?: client_healthy)
...@@ -63,7 +63,7 @@ RSpec.describe Clusters::Applications::PrometheusHealthCheckService, '#execute' ...@@ -63,7 +63,7 @@ RSpec.describe Clusters::Applications::PrometheusHealthCheckService, '#execute'
end end
context 'when newly unhealthy' do context 'when newly unhealthy' do
let(:applications_prometheus_healthy) { true } let(:previous_health_status) { :healthy }
let(:client_healthy) { false } let(:client_healthy) { false }
include_examples 'sends alert' include_examples 'sends alert'
...@@ -71,7 +71,7 @@ RSpec.describe Clusters::Applications::PrometheusHealthCheckService, '#execute' ...@@ -71,7 +71,7 @@ RSpec.describe Clusters::Applications::PrometheusHealthCheckService, '#execute'
end end
context 'when newly healthy' do context 'when newly healthy' do
let(:applications_prometheus_healthy) { false } let(:previous_health_status) { :unhealthy }
let(:client_healthy) { true } let(:client_healthy) { true }
include_examples 'no alert' include_examples 'no alert'
...@@ -79,7 +79,7 @@ RSpec.describe Clusters::Applications::PrometheusHealthCheckService, '#execute' ...@@ -79,7 +79,7 @@ RSpec.describe Clusters::Applications::PrometheusHealthCheckService, '#execute'
end end
context 'when continuously unhealthy' do context 'when continuously unhealthy' do
let(:applications_prometheus_healthy) { false } let(:previous_health_status) { :unhealthy }
let(:client_healthy) { false } let(:client_healthy) { false }
include_examples 'no alert' include_examples 'no alert'
...@@ -87,7 +87,7 @@ RSpec.describe Clusters::Applications::PrometheusHealthCheckService, '#execute' ...@@ -87,7 +87,7 @@ RSpec.describe Clusters::Applications::PrometheusHealthCheckService, '#execute'
end end
context 'when continuously healthy' do context 'when continuously healthy' do
let(:applications_prometheus_healthy) { true } let(:previous_health_status) { :healthy }
let(:client_healthy) { true } let(:client_healthy) { true }
include_examples 'no alert' include_examples 'no alert'
...@@ -95,7 +95,7 @@ RSpec.describe Clusters::Applications::PrometheusHealthCheckService, '#execute' ...@@ -95,7 +95,7 @@ RSpec.describe Clusters::Applications::PrometheusHealthCheckService, '#execute'
end end
context 'when first health check and healthy' do context 'when first health check and healthy' do
let(:applications_prometheus_healthy) { nil } let(:previous_health_status) { :unknown }
let(:client_healthy) { true } let(:client_healthy) { true }
include_examples 'no alert' include_examples 'no alert'
...@@ -103,7 +103,7 @@ RSpec.describe Clusters::Applications::PrometheusHealthCheckService, '#execute' ...@@ -103,7 +103,7 @@ RSpec.describe Clusters::Applications::PrometheusHealthCheckService, '#execute'
end end
context 'when first health check and not healthy' do context 'when first health check and not healthy' do
let(:applications_prometheus_healthy) { nil } let(:previous_health_status) { :unknown }
let(:client_healthy) { false } let(:client_healthy) { false }
include_examples 'sends alert' include_examples 'sends alert'
......
...@@ -2,16 +2,16 @@ ...@@ -2,16 +2,16 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Clusters::Applications::CheckPrometheusHealthWorker, '#perform' do RSpec.describe Clusters::Integrations::CheckPrometheusHealthWorker, '#perform' do
subject { described_class.new.perform } subject { described_class.new.perform }
it 'triggers health service' do it 'triggers health service' do
cluster = create(:cluster) cluster = create(:cluster)
allow(Gitlab::Monitor::DemoProjects).to receive(:primary_keys) allow(Gitlab::Monitor::DemoProjects).to receive(:primary_keys)
allow(Clusters::Cluster).to receive_message_chain(:with_application_prometheus, :with_project_http_integrations).and_return([cluster]) allow(Clusters::Cluster).to receive_message_chain(:with_integration_prometheus, :with_project_http_integrations).and_return([cluster])
service_instance = instance_double(Clusters::Applications::PrometheusHealthCheckService) service_instance = instance_double(Clusters::Integrations::PrometheusHealthCheckService)
expect(Clusters::Applications::PrometheusHealthCheckService).to receive(:new).with(cluster).and_return(service_instance) expect(Clusters::Integrations::PrometheusHealthCheckService).to receive(:new).with(cluster).and_return(service_instance)
expect(service_instance).to receive(:execute) expect(service_instance).to receive(:execute)
subject subject
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment