Commit 6c2d2266 authored by Matthias Kaeppler's avatar Matthias Kaeppler

Track unmapped services in Usage Ping

This will help us understand if we're simply missing a mapping
or whether customers run services that we simply do not track.
parent 3c6ee540
......@@ -730,7 +730,7 @@ appear to be associated to any of the services running, since they all appear to
| `merge_requests_users` | `usage_activity_by_stage_monthly` | `create` | | | Unique count of users who used a merge request |
| `duration_s` | `topology` | `enablement` | | | Time it took to collect topology data |
| `application_requests_per_hour` | `topology` | `enablement` | | | Number of requests to the web application per hour |
| `failures` | `topology` | `enablement` | | | Contains information about failed queries |
| `failures` | `topology` | `enablement` | | | Contains information about failed queries, including types of exceptions and names of unsupported services |
| `nodes` | `topology` | `enablement` | | | The list of server nodes on which GitLab components are running |
| `node_memory_total_bytes` | `topology > nodes` | `enablement` | | | The total available memory of this node |
| `node_cpus` | `topology > nodes` | `enablement` | | | The number of CPU cores of this node |
......
......@@ -160,14 +160,17 @@ module Gitlab
.deep_merge(topology_instance_service_memory(instance, all_process_memory))
.deep_merge(topology_instance_service_server_types(instance, all_server_types))
# map to list of hashes where service names become values instead, and remove
# map to list of hashes where service names become values instead, and skip
# unknown services, since they might not be ours
instance_service_data.each_with_object([]) do |entry, list|
service, service_metrics = entry
gitlab_service = JOB_TO_SERVICE_NAME[service.to_s]
next unless gitlab_service
service_name = service.to_s.strip
list << { name: gitlab_service }.merge(service_metrics)
if gitlab_service = JOB_TO_SERVICE_NAME[service_name]
list << { name: gitlab_service }.merge(service_metrics)
else
@failures << CollectionFailure.new('service_unknown', service_name)
end
end
end
......
......@@ -335,6 +335,40 @@ RSpec.describe Gitlab::UsageData::Topology do
end
end
context 'and unknown services are encountered' do
let(:unknown_service_process_count_response) do
[
{
'metric' => { 'instance' => 'instance2:9000', 'job' => 'unknown-service-A' },
'value' => [1000, '42']
},
{
'metric' => { 'instance' => 'instance2:9001', 'job' => 'unknown-service-B' },
'value' => [1000, '42']
}
]
end
it 'filters out unknown service data and reports the unknown services as a failure' do
expect_prometheus_api_to(
receive_app_request_volume_query(result: []),
receive_node_memory_query(result: []),
receive_node_cpu_count_query(result: []),
receive_node_uname_info_query(result: []),
receive_node_service_memory_rss_query(result: []),
receive_node_service_memory_uss_query(result: []),
receive_node_service_memory_pss_query(result: []),
receive_node_service_process_count_query(result: unknown_service_process_count_response),
receive_node_service_app_server_workers_query(result: [])
)
expect(subject.dig(:topology, :failures)).to include(
{ 'service_unknown' => 'unknown-service-A' },
{ 'service_unknown' => 'unknown-service-B' }
)
end
end
context 'and an error is raised when querying Prometheus' do
it 'returns empty result with failures' do
expect_prometheus_api_to receive(:query)
......@@ -534,11 +568,6 @@ RSpec.describe Gitlab::UsageData::Topology do
{
'metric' => { 'instance' => 'instance2:8080', 'job' => 'registry' },
'value' => [1000, '1']
},
# unknown service => should be stripped out
{
'metric' => { 'instance' => 'instance2:9000', 'job' => 'not-a-gitlab-service' },
'value' => [1000, '42']
}
])
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment