Commit 21f96271 authored by Matthias Kaeppler's avatar Matthias Kaeppler

Add app server type to topology ping

This already existed top-level, but it was broken.

Moreover, this approach allows us to know exactly
where the service is running, even if the customer runs
a combination of puma & unicorn.
parent acf5e90f
...@@ -682,6 +682,7 @@ appear to be associated to any of the services running, since they all appear to ...@@ -682,6 +682,7 @@ appear to be associated to any of the services running, since they all appear to
| `process_memory_rss` | `topology > nodes > node_services` | `enablement` | | | The average Resident Set Size of a service process | | `process_memory_rss` | `topology > nodes > node_services` | `enablement` | | | The average Resident Set Size of a service process |
| `process_memory_uss` | `topology > nodes > node_services` | `enablement` | | | The average Unique Set Size of a service process | | `process_memory_uss` | `topology > nodes > node_services` | `enablement` | | | The average Unique Set Size of a service process |
| `process_memory_pss` | `topology > nodes > node_services` | `enablement` | | | The average Proportional Set Size of a service process | | `process_memory_pss` | `topology > nodes > node_services` | `enablement` | | | The average Proportional Set Size of a service process |
| `server` | `topology > nodes > node_services` | `enablement` | | | The type of web server used (Unicorn or Puma) |
## Example Usage Ping payload ## Example Usage Ping payload
...@@ -893,7 +894,8 @@ The following is example content of the Usage Ping payload. ...@@ -893,7 +894,8 @@ The following is example content of the Usage Ping payload.
"process_count": 16, "process_count": 16,
"process_memory_pss": 233349888, "process_memory_pss": 233349888,
"process_memory_rss": 788220927, "process_memory_rss": 788220927,
"process_memory_uss": 195295487 "process_memory_uss": 195295487,
"server": "puma"
}, },
{ {
"name": "sidekiq", "name": "sidekiq",
......
...@@ -65,6 +65,7 @@ module Gitlab ...@@ -65,6 +65,7 @@ module Gitlab
# service-level data # service-level data
by_instance_by_job_by_type_memory = topology_all_service_memory(client) by_instance_by_job_by_type_memory = topology_all_service_memory(client)
by_instance_by_job_process_count = topology_all_service_process_count(client) by_instance_by_job_process_count = topology_all_service_process_count(client)
by_instance_by_job_server_types = topology_all_service_server_types(client)
instances = Set.new(by_instance_mem.keys + by_instance_cpus.keys) instances = Set.new(by_instance_mem.keys + by_instance_cpus.keys)
instances.map do |instance| instances.map do |instance|
...@@ -72,20 +73,22 @@ module Gitlab ...@@ -72,20 +73,22 @@ module Gitlab
node_memory_total_bytes: by_instance_mem[instance], node_memory_total_bytes: by_instance_mem[instance],
node_cpus: by_instance_cpus[instance], node_cpus: by_instance_cpus[instance],
node_services: node_services:
topology_node_services(instance, by_instance_by_job_process_count, by_instance_by_job_by_type_memory) topology_node_services(
instance, by_instance_by_job_process_count, by_instance_by_job_by_type_memory, by_instance_by_job_server_types
)
}.compact }.compact
end end
end end
def topology_node_memory(client) def topology_node_memory(client)
query_safely('gitlab_usage_ping:node_memory_total_bytes:avg', 'node_memory', fallback: {}) do |query| query_safely('gitlab_usage_ping:node_memory_total_bytes:avg', 'node_memory', fallback: {}) do |query|
aggregate_by_instance(client, query) aggregate_by_instance(client, one_week_average(query))
end end
end end
def topology_node_cpus(client) def topology_node_cpus(client)
query_safely('gitlab_usage_ping:node_cpus:count', 'node_cpus', fallback: {}) do |query| query_safely('gitlab_usage_ping:node_cpus:count', 'node_cpus', fallback: {}) do |query|
aggregate_by_instance(client, query) aggregate_by_instance(client, one_week_average(query))
end end
end end
...@@ -100,24 +103,30 @@ module Gitlab ...@@ -100,24 +103,30 @@ module Gitlab
def topology_service_memory_rss(client) def topology_service_memory_rss(client)
query_safely( query_safely(
'gitlab_usage_ping:node_service_process_resident_memory_bytes:avg', 'service_rss', fallback: [] 'gitlab_usage_ping:node_service_process_resident_memory_bytes:avg', 'service_rss', fallback: []
) { |query| aggregate_by_labels(client, query) } ) { |query| aggregate_by_labels(client, one_week_average(query)) }
end end
def topology_service_memory_uss(client) def topology_service_memory_uss(client)
query_safely( query_safely(
'gitlab_usage_ping:node_service_process_unique_memory_bytes:avg', 'service_uss', fallback: [] 'gitlab_usage_ping:node_service_process_unique_memory_bytes:avg', 'service_uss', fallback: []
) { |query| aggregate_by_labels(client, query) } ) { |query| aggregate_by_labels(client, one_week_average(query)) }
end end
def topology_service_memory_pss(client) def topology_service_memory_pss(client)
query_safely( query_safely(
'gitlab_usage_ping:node_service_process_proportional_memory_bytes:avg', 'service_pss', fallback: [] 'gitlab_usage_ping:node_service_process_proportional_memory_bytes:avg', 'service_pss', fallback: []
) { |query| aggregate_by_labels(client, query) } ) { |query| aggregate_by_labels(client, one_week_average(query)) }
end end
def topology_all_service_process_count(client) def topology_all_service_process_count(client)
query_safely( query_safely(
'gitlab_usage_ping:node_service_process:count', 'service_process_count', fallback: [] 'gitlab_usage_ping:node_service_process:count', 'service_process_count', fallback: []
) { |query| aggregate_by_labels(client, one_week_average(query)) }
end
def topology_all_service_server_types(client)
query_safely(
'gitlab_usage_ping:node_service_app_server_workers:sum', 'service_workers', fallback: []
) { |query| aggregate_by_labels(client, query) } ) { |query| aggregate_by_labels(client, query) }
end end
...@@ -133,11 +142,12 @@ module Gitlab ...@@ -133,11 +142,12 @@ module Gitlab
fallback fallback
end end
def topology_node_services(instance, all_process_counts, all_process_memory) def topology_node_services(instance, all_process_counts, all_process_memory, all_server_types)
# returns all node service data grouped by service name as the key # returns all node service data grouped by service name as the key
instance_service_data = instance_service_data =
topology_instance_service_process_count(instance, all_process_counts) topology_instance_service_process_count(instance, all_process_counts)
.deep_merge(topology_instance_service_memory(instance, all_process_memory)) .deep_merge(topology_instance_service_memory(instance, all_process_memory))
.deep_merge(topology_instance_service_server_types(instance, all_server_types))
# map to list of hashes where service names become values instead, and remove # map to list of hashes where service names become values instead, and remove
# unknown services, since they might not be ours # unknown services, since they might not be ours
...@@ -173,6 +183,12 @@ module Gitlab ...@@ -173,6 +183,12 @@ module Gitlab
result result
end end
def topology_instance_service_server_types(instance, all_instance_data)
topology_data_for_instance(instance, all_instance_data).to_h do |metric, _value|
[metric['job'], { server: metric['server'] }]
end
end
def topology_data_for_instance(instance, all_instance_data) def topology_data_for_instance(instance, all_instance_data)
all_instance_data.filter { |metric, _value| metric['instance'] == instance } all_instance_data.filter { |metric, _value| metric['instance'] == instance }
end end
...@@ -186,12 +202,12 @@ module Gitlab ...@@ -186,12 +202,12 @@ module Gitlab
end end
def aggregate_by_instance(client, query) def aggregate_by_instance(client, query)
client.aggregate(one_week_average(query)) { |metric| drop_port(metric['instance']) } client.aggregate(query) { |metric| drop_port(metric['instance']) }
end end
# Will retain a composite key that values are mapped to # Will retain a composite key that values are mapped to
def aggregate_by_labels(client, query) def aggregate_by_labels(client, query)
client.aggregate(one_week_average(query)) do |metric| client.aggregate(query) do |metric|
metric['instance'] = drop_port(metric['instance']) metric['instance'] = drop_port(metric['instance'])
metric metric
end end
......
...@@ -28,7 +28,8 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -28,7 +28,8 @@ RSpec.describe Gitlab::UsageData::Topology do
receive_node_service_memory_rss_query, receive_node_service_memory_rss_query,
receive_node_service_memory_uss_query, receive_node_service_memory_uss_query,
receive_node_service_memory_pss_query, receive_node_service_memory_pss_query,
receive_node_service_process_count_query receive_node_service_process_count_query,
receive_node_service_app_server_workers_query
) )
expect(subject[:topology]).to eq({ expect(subject[:topology]).to eq({
...@@ -45,7 +46,8 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -45,7 +46,8 @@ RSpec.describe Gitlab::UsageData::Topology do
process_count: 10, process_count: 10,
process_memory_rss: 300, process_memory_rss: 300,
process_memory_uss: 301, process_memory_uss: 301,
process_memory_pss: 302 process_memory_pss: 302,
server: 'puma'
}, },
{ {
name: 'sidekiq', name: 'sidekiq',
...@@ -68,6 +70,10 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -68,6 +70,10 @@ RSpec.describe Gitlab::UsageData::Topology do
name: 'redis', name: 'redis',
process_count: 1, process_count: 1,
process_memory_rss: 402 process_memory_rss: 402
},
{
name: 'web',
server: 'unicorn'
} }
] ]
} }
...@@ -85,7 +91,8 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -85,7 +91,8 @@ RSpec.describe Gitlab::UsageData::Topology do
receive_node_service_memory_rss_query(result: []), receive_node_service_memory_rss_query(result: []),
receive_node_service_memory_uss_query(result: []), receive_node_service_memory_uss_query(result: []),
receive_node_service_memory_pss_query, receive_node_service_memory_pss_query,
receive_node_service_process_count_query receive_node_service_process_count_query,
receive_node_service_app_server_workers_query(result: [])
) )
expect(subject[:topology]).to eq({ expect(subject[:topology]).to eq({
...@@ -94,7 +101,8 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -94,7 +101,8 @@ RSpec.describe Gitlab::UsageData::Topology do
{ 'app_requests' => 'empty_result' }, { 'app_requests' => 'empty_result' },
{ 'node_memory' => 'empty_result' }, { 'node_memory' => 'empty_result' },
{ 'service_rss' => 'empty_result' }, { 'service_rss' => 'empty_result' },
{ 'service_uss' => 'empty_result' } { 'service_uss' => 'empty_result' },
{ 'service_workers' => 'empty_result' }
], ],
nodes: [ nodes: [
{ {
...@@ -145,7 +153,8 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -145,7 +153,8 @@ RSpec.describe Gitlab::UsageData::Topology do
{ 'service_rss' => 'Gitlab::PrometheusClient::ConnectionError' }, { 'service_rss' => 'Gitlab::PrometheusClient::ConnectionError' },
{ 'service_uss' => 'Gitlab::PrometheusClient::ConnectionError' }, { 'service_uss' => 'Gitlab::PrometheusClient::ConnectionError' },
{ 'service_pss' => 'Gitlab::PrometheusClient::ConnectionError' }, { 'service_pss' => 'Gitlab::PrometheusClient::ConnectionError' },
{ 'service_process_count' => 'Gitlab::PrometheusClient::ConnectionError' } { 'service_process_count' => 'Gitlab::PrometheusClient::ConnectionError' },
{ 'service_workers' => 'Gitlab::PrometheusClient::ConnectionError' }
], ],
nodes: [] nodes: []
}) })
...@@ -298,4 +307,21 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -298,4 +307,21 @@ RSpec.describe Gitlab::UsageData::Topology do
} }
]) ])
end end
def receive_node_service_app_server_workers_query(result: nil)
receive(:query)
.with(/app_server_workers/, an_instance_of(Hash))
.and_return(result || [
# instance 1
{
'metric' => { 'instance' => 'instance1:8080', 'job' => 'gitlab-rails', 'server' => 'puma' },
'value' => [1000, '2']
},
# instance 2
{
'metric' => { 'instance' => 'instance2:8080', 'job' => 'gitlab-rails', 'server' => 'unicorn' },
'value' => [1000, '1']
}
])
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment