Commit 4e499a76 authored by Matthias Käppler's avatar Matthias Käppler Committed by James Lopez

Track failures in usage ping payload

This exports a `failures` field carrying information
about failed queries during topology data collection.
parent 0d34cf28
...@@ -594,6 +594,7 @@ appear to be associated to any of the services running, since they all appear to ...@@ -594,6 +594,7 @@ appear to be associated to any of the services running, since they all appear to
| `ldap_enabled` | | | | | | | `ldap_enabled` | | | | | |
| `mattermost_enabled` | | | | | | | `mattermost_enabled` | | | | | |
| `omniauth_enabled` | | | | | | | `omniauth_enabled` | | | | | |
| `prometheus_enabled` | | | | | Whether the bundled Prometheus is enabled |
| `prometheus_metrics_enabled` | | | | | | | `prometheus_metrics_enabled` | | | | | |
| `reply_by_email_enabled` | | | | | | | `reply_by_email_enabled` | | | | | |
| `average` | `avg_cycle_analytics - code` | | | | | | `average` | `avg_cycle_analytics - code` | | | | |
...@@ -671,6 +672,7 @@ appear to be associated to any of the services running, since they all appear to ...@@ -671,6 +672,7 @@ appear to be associated to any of the services running, since they all appear to
| `merge_requests_users` | `usage_activity_by_stage_monthly` | `create` | | | Unique count of users who used a merge request | | `merge_requests_users` | `usage_activity_by_stage_monthly` | `create` | | | Unique count of users who used a merge request |
| `duration_s` | `topology` | `enablement` | | | Time it took to collect topology data | | `duration_s` | `topology` | `enablement` | | | Time it took to collect topology data |
| `application_requests_per_hour` | `topology` | `enablement` | | | Number of requests to the web application per hour | | `application_requests_per_hour` | `topology` | `enablement` | | | Number of requests to the web application per hour |
| `failures` | `topology` | `enablement` | | | Contains information about failed queries |
| `nodes` | `topology` | `enablement` | | | The list of server nodes on which GitLab components are running | | `nodes` | `topology` | `enablement` | | | The list of server nodes on which GitLab components are running |
| `node_memory_total_bytes` | `topology > nodes` | `enablement` | | | The total available memory of this node | | `node_memory_total_bytes` | `topology > nodes` | `enablement` | | | The total available memory of this node |
| `node_cpus` | `topology > nodes` | `enablement` | | | The number of CPU cores of this node | | `node_cpus` | `topology > nodes` | `enablement` | | | The number of CPU cores of this node |
...@@ -723,6 +725,7 @@ The following is example content of the Usage Ping payload. ...@@ -723,6 +725,7 @@ The following is example content of the Usage Ping payload.
"ldap_enabled": false, "ldap_enabled": false,
"mattermost_enabled": false, "mattermost_enabled": false,
"omniauth_enabled": true, "omniauth_enabled": true,
"prometheus_enabled": false,
"prometheus_metrics_enabled": false, "prometheus_metrics_enabled": false,
"reply_by_email_enabled": "incoming+%{key}@incoming.gitlab.com", "reply_by_email_enabled": "incoming+%{key}@incoming.gitlab.com",
"signup_enabled": true, "signup_enabled": true,
...@@ -879,6 +882,7 @@ The following is example content of the Usage Ping payload. ...@@ -879,6 +882,7 @@ The following is example content of the Usage Ping payload.
"topology": { "topology": {
"duration_s": 0.013836685999194742, "duration_s": 0.013836685999194742,
"application_requests_per_hour": 4224, "application_requests_per_hour": 4224,
"failures": [],
"nodes": [ "nodes": [
{ {
"node_memory_total_bytes": 33269903360, "node_memory_total_bytes": 33269903360,
......
...@@ -5,6 +5,8 @@ module Gitlab ...@@ -5,6 +5,8 @@ module Gitlab
class PrometheusClient class PrometheusClient
include Gitlab::Utils::StrongMemoize include Gitlab::Utils::StrongMemoize
Error = Class.new(StandardError) Error = Class.new(StandardError)
ConnectionError = Class.new(Gitlab::PrometheusClient::Error)
UnexpectedResponseError = Class.new(Gitlab::PrometheusClient::Error)
QueryError = Class.new(Gitlab::PrometheusClient::Error) QueryError = Class.new(Gitlab::PrometheusClient::Error)
HEALTHY_RESPONSE = "Prometheus is Healthy.\n" HEALTHY_RESPONSE = "Prometheus is Healthy.\n"
...@@ -44,7 +46,7 @@ module Gitlab ...@@ -44,7 +46,7 @@ module Gitlab
path = api_path(type) path = api_path(type)
get(path, args) get(path, args)
rescue Gitlab::HTTP::ResponseError => ex rescue Gitlab::HTTP::ResponseError => ex
raise PrometheusClient::Error, "Network connection error" unless ex.response && ex.response.try(:code) raise PrometheusClient::ConnectionError, "Network connection error" unless ex.response && ex.response.try(:code)
handle_querying_api_response(ex.response) handle_querying_api_response(ex.response)
end end
...@@ -115,7 +117,7 @@ module Gitlab ...@@ -115,7 +117,7 @@ module Gitlab
response = get(path, args) response = get(path, args)
handle_querying_api_response(response) handle_querying_api_response(response)
rescue Gitlab::HTTP::ResponseError => ex rescue Gitlab::HTTP::ResponseError => ex
raise PrometheusClient::Error, "Network connection error" unless ex.response && ex.response.try(:code) raise PrometheusClient::ConnectionError, "Network connection error" unless ex.response && ex.response.try(:code)
handle_querying_api_response(ex.response) handle_querying_api_response(ex.response)
end end
...@@ -137,18 +139,18 @@ module Gitlab ...@@ -137,18 +139,18 @@ module Gitlab
def get(path, args) def get(path, args)
Gitlab::HTTP.get(path, { query: args }.merge(http_options) ) Gitlab::HTTP.get(path, { query: args }.merge(http_options) )
rescue SocketError rescue SocketError
raise PrometheusClient::Error, "Can't connect to #{api_url}" raise PrometheusClient::ConnectionError, "Can't connect to #{api_url}"
rescue OpenSSL::SSL::SSLError rescue OpenSSL::SSL::SSLError
raise PrometheusClient::Error, "#{api_url} contains invalid SSL data" raise PrometheusClient::ConnectionError, "#{api_url} contains invalid SSL data"
rescue Errno::ECONNREFUSED rescue Errno::ECONNREFUSED
raise PrometheusClient::Error, 'Connection refused' raise PrometheusClient::ConnectionError, 'Connection refused'
end end
def handle_management_api_response(response) def handle_management_api_response(response)
if response.code == 200 if response.code == 200
response.body response.body
else else
raise PrometheusClient::Error, "#{response.code} - #{response.body}" raise PrometheusClient::UnexpectedResponseError, "#{response.code} - #{response.body}"
end end
end end
...@@ -156,7 +158,7 @@ module Gitlab ...@@ -156,7 +158,7 @@ module Gitlab
response_code = response.try(:code) response_code = response.try(:code)
response_body = response.try(:body) response_body = response.try(:body)
raise PrometheusClient::Error, "#{response_code} - #{response_body}" unless response_code raise PrometheusClient::UnexpectedResponseError, "#{response_code} - #{response_body}" unless response_code
json_data = parse_json(response_body) if [200, 400].include?(response_code) json_data = parse_json(response_body) if [200, 400].include?(response_code)
...@@ -166,7 +168,7 @@ module Gitlab ...@@ -166,7 +168,7 @@ module Gitlab
when 400 when 400
raise PrometheusClient::QueryError, json_data['error'] || 'Bad data received' raise PrometheusClient::QueryError, json_data['error'] || 'Bad data received'
else else
raise PrometheusClient::Error, "#{response_code} - #{response_body}" raise PrometheusClient::UnexpectedResponseError, "#{response_code} - #{response_body}"
end end
end end
...@@ -178,7 +180,7 @@ module Gitlab ...@@ -178,7 +180,7 @@ module Gitlab
def parse_json(response_body) def parse_json(response_body)
Gitlab::Json.parse(response_body, legacy_mode: true) Gitlab::Json.parse(response_body, legacy_mode: true)
rescue JSON::ParserError rescue JSON::ParserError
raise PrometheusClient::Error, 'Parsing response failed' raise PrometheusClient::UnexpectedResponseError, 'Parsing response failed'
end end
end end
end end
...@@ -18,7 +18,6 @@ module Gitlab ...@@ -18,7 +18,6 @@ module Gitlab
class << self class << self
include Gitlab::Utils::UsageData include Gitlab::Utils::UsageData
include Gitlab::Utils::StrongMemoize include Gitlab::Utils::StrongMemoize
include Gitlab::UsageDataConcerns::Topology
def data(force_refresh: false) def data(force_refresh: false)
Rails.cache.fetch('usage_data', force: force_refresh, expires_in: 2.weeks) do Rails.cache.fetch('usage_data', force: force_refresh, expires_in: 2.weeks) do
...@@ -210,6 +209,7 @@ module Gitlab ...@@ -210,6 +209,7 @@ module Gitlab
ldap_enabled: alt_usage_data(fallback: nil) { Gitlab.config.ldap.enabled }, ldap_enabled: alt_usage_data(fallback: nil) { Gitlab.config.ldap.enabled },
mattermost_enabled: alt_usage_data(fallback: nil) { Gitlab.config.mattermost.enabled }, mattermost_enabled: alt_usage_data(fallback: nil) { Gitlab.config.mattermost.enabled },
omniauth_enabled: alt_usage_data(fallback: nil) { Gitlab::Auth.omniauth_enabled? }, omniauth_enabled: alt_usage_data(fallback: nil) { Gitlab::Auth.omniauth_enabled? },
prometheus_enabled: alt_usage_data(fallback: nil) { Gitlab::Prometheus::Internal.prometheus_enabled? },
prometheus_metrics_enabled: alt_usage_data(fallback: nil) { Gitlab::Metrics.prometheus_metrics_enabled? }, prometheus_metrics_enabled: alt_usage_data(fallback: nil) { Gitlab::Metrics.prometheus_metrics_enabled? },
reply_by_email_enabled: alt_usage_data(fallback: nil) { Gitlab::IncomingEmail.enabled? }, reply_by_email_enabled: alt_usage_data(fallback: nil) { Gitlab::IncomingEmail.enabled? },
signup_enabled: alt_usage_data(fallback: nil) { Gitlab::CurrentSettings.allow_signup? }, signup_enabled: alt_usage_data(fallback: nil) { Gitlab::CurrentSettings.allow_signup? },
...@@ -303,6 +303,10 @@ module Gitlab ...@@ -303,6 +303,10 @@ module Gitlab
} }
end end
def topology_usage_data
Gitlab::UsageData::Topology.new.topology_usage_data
end
def ingress_modsecurity_usage def ingress_modsecurity_usage
## ##
# This method measures usage of the Modsecurity Web Application Firewall across the entire # This method measures usage of the Modsecurity Web Application Firewall across the entire
......
# frozen_string_literal: true # frozen_string_literal: true
module Gitlab module Gitlab
module UsageDataConcerns class UsageData
module Topology class Topology
include Gitlab::Utils::UsageData include Gitlab::Utils::UsageData
JOB_TO_SERVICE_NAME = { JOB_TO_SERVICE_NAME = {
...@@ -16,11 +16,20 @@ module Gitlab ...@@ -16,11 +16,20 @@ module Gitlab
'node' => 'node-exporter' 'node' => 'node-exporter'
}.freeze }.freeze
def topology_usage_data CollectionFailure = Struct.new(:query, :error) do
topology_data, duration = measure_duration do def to_h
alt_usage_data(fallback: {}) { topology_fetch_all_data } { query => error }
end end
{ topology: topology_data.merge(duration_s: duration) } end
def topology_usage_data
@failures = []
topology_data, duration = measure_duration { topology_fetch_all_data }
{
topology: topology_data
.merge(duration_s: duration)
.merge(failures: @failures.map(&:to_h))
}
end end
private private
...@@ -32,10 +41,17 @@ module Gitlab ...@@ -32,10 +41,17 @@ module Gitlab
nodes: topology_node_data(client) nodes: topology_node_data(client)
}.compact }.compact
end end
rescue => e
@failures << CollectionFailure.new('other', e.class.to_s)
{}
end end
def topology_app_requests_per_hour(client) def topology_app_requests_per_hour(client)
result = client.query(one_week_average('gitlab_usage_ping:ops:rate5m')).first result = query_safely('gitlab_usage_ping:ops:rate5m', 'app_requests', fallback: nil) do |query|
client.query(one_week_average(query)).first
end
return unless result return unless result
# the metric is recorded as a per-second rate # the metric is recorded as a per-second rate
...@@ -62,11 +78,15 @@ module Gitlab ...@@ -62,11 +78,15 @@ module Gitlab
end end
def topology_node_memory(client) def topology_node_memory(client)
aggregate_by_instance(client, 'gitlab_usage_ping:node_memory_total_bytes:avg') query_safely('gitlab_usage_ping:node_memory_total_bytes:avg', 'node_memory', fallback: {}) do |query|
aggregate_by_instance(client, query)
end
end end
def topology_node_cpus(client) def topology_node_cpus(client)
aggregate_by_instance(client, 'gitlab_usage_ping:node_cpus:count') query_safely('gitlab_usage_ping:node_cpus:count', 'node_cpus', fallback: {}) do |query|
aggregate_by_instance(client, query)
end
end end
def topology_all_service_memory(client) def topology_all_service_memory(client)
...@@ -78,19 +98,39 @@ module Gitlab ...@@ -78,19 +98,39 @@ module Gitlab
end end
def topology_service_memory_rss(client) def topology_service_memory_rss(client)
aggregate_by_labels(client, 'gitlab_usage_ping:node_service_process_resident_memory_bytes:avg') query_safely(
'gitlab_usage_ping:node_service_process_resident_memory_bytes:avg', 'service_rss', fallback: []
) { |query| aggregate_by_labels(client, query) }
end end
def topology_service_memory_uss(client) def topology_service_memory_uss(client)
aggregate_by_labels(client, 'gitlab_usage_ping:node_service_process_unique_memory_bytes:avg') query_safely(
'gitlab_usage_ping:node_service_process_unique_memory_bytes:avg', 'service_uss', fallback: []
) { |query| aggregate_by_labels(client, query) }
end end
def topology_service_memory_pss(client) def topology_service_memory_pss(client)
aggregate_by_labels(client, 'gitlab_usage_ping:node_service_process_proportional_memory_bytes:avg') query_safely(
'gitlab_usage_ping:node_service_process_proportional_memory_bytes:avg', 'service_pss', fallback: []
) { |query| aggregate_by_labels(client, query) }
end end
def topology_all_service_process_count(client) def topology_all_service_process_count(client)
aggregate_by_labels(client, 'gitlab_usage_ping:node_service_process:count') query_safely(
'gitlab_usage_ping:node_service_process:count', 'service_process_count', fallback: []
) { |query| aggregate_by_labels(client, query) }
end
def query_safely(query, query_name, fallback:)
result = yield query
return result if result.present?
@failures << CollectionFailure.new(query_name, 'empty_result')
fallback
rescue => e
@failures << CollectionFailure.new(query_name, e.class.to_s)
fallback
end end
def topology_node_services(instance, all_process_counts, all_process_memory) def topology_node_services(instance, all_process_counts, all_process_memory)
......
...@@ -32,7 +32,7 @@ RSpec.describe Gitlab::PrometheusClient do ...@@ -32,7 +32,7 @@ RSpec.describe Gitlab::PrometheusClient do
it 'raises error when status code not 200' do it 'raises error when status code not 200' do
stub_request(:get, subject.health_url).to_return(status: 500, body: '') stub_request(:get, subject.health_url).to_return(status: 500, body: '')
expect { subject.healthy? }.to raise_error(Gitlab::PrometheusClient::Error) expect { subject.healthy? }.to raise_error(Gitlab::PrometheusClient::UnexpectedResponseError)
end end
end end
...@@ -41,41 +41,41 @@ RSpec.describe Gitlab::PrometheusClient do ...@@ -41,41 +41,41 @@ RSpec.describe Gitlab::PrometheusClient do
# - execute_query: A query call # - execute_query: A query call
shared_examples 'failure response' do shared_examples 'failure response' do
context 'when request returns 400 with an error message' do context 'when request returns 400 with an error message' do
it 'raises a Gitlab::PrometheusClient::Error error' do it 'raises a Gitlab::PrometheusClient::QueryError error' do
req_stub = stub_prometheus_request(query_url, status: 400, body: { error: 'bar!' }) req_stub = stub_prometheus_request(query_url, status: 400, body: { error: 'bar!' })
expect { execute_query } expect { execute_query }
.to raise_error(Gitlab::PrometheusClient::Error, 'bar!') .to raise_error(Gitlab::PrometheusClient::QueryError, 'bar!')
expect(req_stub).to have_been_requested expect(req_stub).to have_been_requested
end end
end end
context 'when request returns 400 without an error message' do context 'when request returns 400 without an error message' do
it 'raises a Gitlab::PrometheusClient::Error error' do it 'raises a Gitlab::PrometheusClient::QueryError error' do
req_stub = stub_prometheus_request(query_url, status: 400) req_stub = stub_prometheus_request(query_url, status: 400)
expect { execute_query } expect { execute_query }
.to raise_error(Gitlab::PrometheusClient::Error, 'Bad data received') .to raise_error(Gitlab::PrometheusClient::QueryError, 'Bad data received')
expect(req_stub).to have_been_requested expect(req_stub).to have_been_requested
end end
end end
context 'when request returns 500' do context 'when request returns 500' do
it 'raises a Gitlab::PrometheusClient::Error error' do it 'raises a Gitlab::PrometheusClient::UnexpectedResponseError error' do
req_stub = stub_prometheus_request(query_url, status: 500, body: { message: 'FAIL!' }) req_stub = stub_prometheus_request(query_url, status: 500, body: { message: 'FAIL!' })
expect { execute_query } expect { execute_query }
.to raise_error(Gitlab::PrometheusClient::Error, '500 - {"message":"FAIL!"}') .to raise_error(Gitlab::PrometheusClient::UnexpectedResponseError, '500 - {"message":"FAIL!"}')
expect(req_stub).to have_been_requested expect(req_stub).to have_been_requested
end end
end end
context 'when request returns non json data' do context 'when request returns non json data' do
it 'raises a Gitlab::PrometheusClient::Error error' do it 'raises a Gitlab::PrometheusClient::UnexpectedResponseError error' do
req_stub = stub_prometheus_request(query_url, status: 200, body: 'not json') req_stub = stub_prometheus_request(query_url, status: 200, body: 'not json')
expect { execute_query } expect { execute_query }
.to raise_error(Gitlab::PrometheusClient::Error, 'Parsing response failed') .to raise_error(Gitlab::PrometheusClient::UnexpectedResponseError, 'Parsing response failed')
expect(req_stub).to have_been_requested expect(req_stub).to have_been_requested
end end
end end
...@@ -85,35 +85,35 @@ RSpec.describe Gitlab::PrometheusClient do ...@@ -85,35 +85,35 @@ RSpec.describe Gitlab::PrometheusClient do
let(:prometheus_url) {"https://prometheus.invalid.example.com/api/v1/query?query=1"} let(:prometheus_url) {"https://prometheus.invalid.example.com/api/v1/query?query=1"}
shared_examples 'exceptions are raised' do shared_examples 'exceptions are raised' do
it 'raises a Gitlab::PrometheusClient::Error error when a SocketError is rescued' do it 'raises a Gitlab::PrometheusClient::ConnectionError error when a SocketError is rescued' do
req_stub = stub_prometheus_request_with_exception(prometheus_url, SocketError) req_stub = stub_prometheus_request_with_exception(prometheus_url, SocketError)
expect { subject } expect { subject }
.to raise_error(Gitlab::PrometheusClient::Error, "Can't connect to #{prometheus_url}") .to raise_error(Gitlab::PrometheusClient::ConnectionError, "Can't connect to #{prometheus_url}")
expect(req_stub).to have_been_requested expect(req_stub).to have_been_requested
end end
it 'raises a Gitlab::PrometheusClient::Error error when a SSLError is rescued' do it 'raises a Gitlab::PrometheusClient::ConnectionError error when a SSLError is rescued' do
req_stub = stub_prometheus_request_with_exception(prometheus_url, OpenSSL::SSL::SSLError) req_stub = stub_prometheus_request_with_exception(prometheus_url, OpenSSL::SSL::SSLError)
expect { subject } expect { subject }
.to raise_error(Gitlab::PrometheusClient::Error, "#{prometheus_url} contains invalid SSL data") .to raise_error(Gitlab::PrometheusClient::ConnectionError, "#{prometheus_url} contains invalid SSL data")
expect(req_stub).to have_been_requested expect(req_stub).to have_been_requested
end end
it 'raises a Gitlab::PrometheusClient::Error error when a Gitlab::HTTP::ResponseError is rescued' do it 'raises a Gitlab::PrometheusClient::ConnectionError error when a Gitlab::HTTP::ResponseError is rescued' do
req_stub = stub_prometheus_request_with_exception(prometheus_url, Gitlab::HTTP::ResponseError) req_stub = stub_prometheus_request_with_exception(prometheus_url, Gitlab::HTTP::ResponseError)
expect { subject } expect { subject }
.to raise_error(Gitlab::PrometheusClient::Error, "Network connection error") .to raise_error(Gitlab::PrometheusClient::ConnectionError, "Network connection error")
expect(req_stub).to have_been_requested expect(req_stub).to have_been_requested
end end
it 'raises a Gitlab::PrometheusClient::Error error when a Gitlab::HTTP::ResponseError with a code is rescued' do it 'raises a Gitlab::PrometheusClient::ConnectionError error when a Gitlab::HTTP::ResponseError with a code is rescued' do
req_stub = stub_prometheus_request_with_exception(prometheus_url, Gitlab::HTTP::ResponseError.new(code: 400)) req_stub = stub_prometheus_request_with_exception(prometheus_url, Gitlab::HTTP::ResponseError.new(code: 400))
expect { subject } expect { subject }
.to raise_error(Gitlab::PrometheusClient::Error, "Network connection error") .to raise_error(Gitlab::PrometheusClient::ConnectionError, "Network connection error")
expect(req_stub).to have_been_requested expect(req_stub).to have_been_requested
end end
end end
...@@ -400,9 +400,9 @@ RSpec.describe Gitlab::PrometheusClient do ...@@ -400,9 +400,9 @@ RSpec.describe Gitlab::PrometheusClient do
context "without response code" do context "without response code" do
let(:response_error) { Gitlab::HTTP::ResponseError } let(:response_error) { Gitlab::HTTP::ResponseError }
it 'raises PrometheusClient::Error' do it 'raises PrometheusClient::ConnectionError' do
expect { subject.proxy('query', { query: prometheus_query }) }.to( expect { subject.proxy('query', { query: prometheus_query }) }.to(
raise_error(Gitlab::PrometheusClient::Error, 'Network connection error') raise_error(Gitlab::PrometheusClient::ConnectionError, 'Network connection error')
) )
end end
end end
......
...@@ -2,11 +2,11 @@ ...@@ -2,11 +2,11 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Gitlab::UsageDataConcerns::Topology do RSpec.describe Gitlab::UsageData::Topology do
include UsageDataHelpers include UsageDataHelpers
describe '#topology_usage_data' do describe '#topology_usage_data' do
subject { Class.new.extend(described_class).topology_usage_data } subject { described_class.new.topology_usage_data }
before do before do
# this pins down time shifts when benchmarking durations # this pins down time shifts when benchmarking durations
...@@ -34,6 +34,7 @@ RSpec.describe Gitlab::UsageDataConcerns::Topology do ...@@ -34,6 +34,7 @@ RSpec.describe Gitlab::UsageDataConcerns::Topology do
expect(subject[:topology]).to eq({ expect(subject[:topology]).to eq({
duration_s: 0, duration_s: 0,
application_requests_per_hour: 36, application_requests_per_hour: 36,
failures: [],
nodes: [ nodes: [
{ {
node_memory_total_bytes: 512, node_memory_total_bytes: 512,
...@@ -76,7 +77,7 @@ RSpec.describe Gitlab::UsageDataConcerns::Topology do ...@@ -76,7 +77,7 @@ RSpec.describe Gitlab::UsageDataConcerns::Topology do
end end
context 'and some node memory metrics are missing' do context 'and some node memory metrics are missing' do
it 'removes the respective entries' do it 'removes the respective entries and includes the failures' do
expect_prometheus_api_to( expect_prometheus_api_to(
receive_app_request_volume_query(result: []), receive_app_request_volume_query(result: []),
receive_node_memory_query(result: []), receive_node_memory_query(result: []),
...@@ -89,6 +90,12 @@ RSpec.describe Gitlab::UsageDataConcerns::Topology do ...@@ -89,6 +90,12 @@ RSpec.describe Gitlab::UsageDataConcerns::Topology do
expect(subject[:topology]).to eq({ expect(subject[:topology]).to eq({
duration_s: 0, duration_s: 0,
failures: [
{ 'app_requests' => 'empty_result' },
{ 'node_memory' => 'empty_result' },
{ 'service_rss' => 'empty_result' },
{ 'service_uss' => 'empty_result' }
],
nodes: [ nodes: [
{ {
node_cpus: 16, node_cpus: 16,
...@@ -123,31 +130,50 @@ RSpec.describe Gitlab::UsageDataConcerns::Topology do ...@@ -123,31 +130,50 @@ RSpec.describe Gitlab::UsageDataConcerns::Topology do
end end
end end
context 'and no results are found' do context 'and an error is raised when querying Prometheus' do
it 'does not report anything' do it 'returns empty result with failures' do
expect_prometheus_api_to receive(:query).at_least(:once).and_return({}) expect_prometheus_api_to receive(:query)
.at_least(:once)
.and_raise(Gitlab::PrometheusClient::ConnectionError)
expect(subject[:topology]).to eq({ expect(subject[:topology]).to eq({
duration_s: 0, duration_s: 0,
failures: [
{ 'app_requests' => 'Gitlab::PrometheusClient::ConnectionError' },
{ 'node_memory' => 'Gitlab::PrometheusClient::ConnectionError' },
{ 'node_cpus' => 'Gitlab::PrometheusClient::ConnectionError' },
{ 'service_rss' => 'Gitlab::PrometheusClient::ConnectionError' },
{ 'service_uss' => 'Gitlab::PrometheusClient::ConnectionError' },
{ 'service_pss' => 'Gitlab::PrometheusClient::ConnectionError' },
{ 'service_process_count' => 'Gitlab::PrometheusClient::ConnectionError' }
],
nodes: [] nodes: []
}) })
end end
end end
end
context 'and a connection error is raised' do context 'when embedded Prometheus server is disabled' do
it 'does not report anything' do it 'returns empty result with no failures' do
expect_prometheus_api_to receive(:query).and_raise('Connection failed') expect(Gitlab::Prometheus::Internal).to receive(:prometheus_enabled?).and_return(false)
expect(subject[:topology]).to eq({ duration_s: 0 }) expect(subject[:topology]).to eq({
end duration_s: 0,
failures: []
})
end end
end end
context 'when embedded Prometheus server is disabled' do context 'when top-level function raises error' do
it 'does not report anything' do it 'returns empty result with generic failure' do
expect(Gitlab::Prometheus::Internal).to receive(:prometheus_enabled?).and_return(false) allow(Gitlab::Prometheus::Internal).to receive(:prometheus_enabled?).and_raise(RuntimeError)
expect(subject[:topology]).to eq({ duration_s: 0 }) expect(subject[:topology]).to eq({
duration_s: 0,
failures: [
{ 'other' => 'RuntimeError' }
]
})
end end
end end
end end
......
...@@ -347,6 +347,20 @@ RSpec.describe Gitlab::UsageData, :aggregate_failures do ...@@ -347,6 +347,20 @@ RSpec.describe Gitlab::UsageData, :aggregate_failures do
expect(subject[:grafana_link_enabled]).to eq(Gitlab::CurrentSettings.grafana_enabled?) expect(subject[:grafana_link_enabled]).to eq(Gitlab::CurrentSettings.grafana_enabled?)
end end
context 'with embedded Prometheus' do
it 'returns true when embedded Prometheus is enabled' do
allow(Gitlab::Prometheus::Internal).to receive(:prometheus_enabled?).and_return(true)
expect(subject[:prometheus_enabled]).to eq(true)
end
it 'returns false when embedded Prometheus is disabled' do
allow(Gitlab::Prometheus::Internal).to receive(:prometheus_enabled?).and_return(false)
expect(subject[:prometheus_enabled]).to eq(false)
end
end
context 'with embedded grafana' do context 'with embedded grafana' do
it 'returns true when embedded grafana is enabled' do it 'returns true when embedded grafana is enabled' do
stub_application_setting(grafana_enabled: true) stub_application_setting(grafana_enabled: true)
......
...@@ -23,7 +23,7 @@ RSpec.describe PrometheusService, :use_clean_rails_memory_store_caching do ...@@ -23,7 +23,7 @@ RSpec.describe PrometheusService, :use_clean_rails_memory_store_caching do
# result = { success: false, result: error } # result = { success: false, result: error }
expect(result[:success]).to be_falsy expect(result[:success]).to be_falsy
expect(result[:result]).to be_instance_of(Gitlab::PrometheusClient::Error) expect(result[:result]).to be_instance_of(Gitlab::PrometheusClient::UnexpectedResponseError)
expect(redirect_req_stub).to have_been_requested expect(redirect_req_stub).to have_been_requested
expect(redirected_req_stub).not_to have_been_requested expect(redirected_req_stub).not_to have_been_requested
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment