Commit 3098e1be authored by Qingyu Zhao's avatar Qingyu Zhao Committed by Imre Farkas

Collect node CPU and memory utilization in usage ping

parent 3da460b8
---
title: Collect node CPU and memory utilization in usage ping
merge_request: 38681
author:
type: other
...@@ -593,7 +593,9 @@ The following is example content of the Usage Ping payload. ...@@ -593,7 +593,9 @@ The following is example content of the Usage Ping payload.
"nodes": [ "nodes": [
{ {
"node_memory_total_bytes": 33269903360, "node_memory_total_bytes": 33269903360,
"node_memory_utilization": 0.35,
"node_cpus": 16, "node_cpus": 16,
"node_cpu_utilization": 0.2,
"node_uname_info": { "node_uname_info": {
"machine": "x86_64", "machine": "x86_64",
"sysname": "Linux", "sysname": "Linux",
......
...@@ -77,12 +77,12 @@ module Gitlab ...@@ -77,12 +77,12 @@ module Gitlab
# metric labels to their respective values. # metric labels to their respective values.
# #
# @return [Hash] mapping labels to their aggregate numeric values, or the empty hash if no results were found # @return [Hash] mapping labels to their aggregate numeric values, or the empty hash if no results were found
def aggregate(aggregate_query, time: Time.now) def aggregate(aggregate_query, time: Time.now, transform_value: :to_f)
response = query(aggregate_query, time: time) response = query(aggregate_query, time: time)
response.to_h do |result| response.to_h do |result|
key = block_given? ? yield(result['metric']) : result['metric'] key = block_given? ? yield(result['metric']) : result['metric']
_timestamp, value = result['value'] _timestamp, value = result['value']
[key, value.to_i] [key, value.public_send(transform_value)] # rubocop:disable GitlabSecurity/PublicSend
end end
end end
......
...@@ -63,7 +63,9 @@ module Gitlab ...@@ -63,7 +63,9 @@ module Gitlab
def topology_node_data(client) def topology_node_data(client)
# node-level data # node-level data
by_instance_mem = topology_node_memory(client) by_instance_mem = topology_node_memory(client)
by_instance_mem_utilization = topology_node_memory_utilization(client)
by_instance_cpus = topology_node_cpus(client) by_instance_cpus = topology_node_cpus(client)
by_instance_cpu_utilization = topology_node_cpu_utilization(client)
by_instance_uname_info = topology_node_uname_info(client) by_instance_uname_info = topology_node_uname_info(client)
# service-level data # service-level data
by_instance_by_job_by_type_memory = topology_all_service_memory(client) by_instance_by_job_by_type_memory = topology_all_service_memory(client)
...@@ -73,7 +75,9 @@ module Gitlab ...@@ -73,7 +75,9 @@ module Gitlab
@instances.map do |instance| @instances.map do |instance|
{ {
node_memory_total_bytes: by_instance_mem[instance], node_memory_total_bytes: by_instance_mem[instance],
node_memory_utilization: by_instance_mem_utilization[instance],
node_cpus: by_instance_cpus[instance], node_cpus: by_instance_cpus[instance],
node_cpu_utilization: by_instance_cpu_utilization[instance],
node_uname_info: by_instance_uname_info[instance], node_uname_info: by_instance_uname_info[instance],
node_services: node_services:
topology_node_services( topology_node_services(
...@@ -89,12 +93,24 @@ module Gitlab ...@@ -89,12 +93,24 @@ module Gitlab
end end
end end
def topology_node_memory_utilization(client)
query_safely('gitlab_usage_ping:node_memory_utilization:avg', 'node_memory_utilization', fallback: {}) do |query|
aggregate_by_instance(client, aggregate_one_week(query), transform_value: :to_f)
end
end
def topology_node_cpus(client) def topology_node_cpus(client)
query_safely('gitlab_usage_ping:node_cpus:count', 'node_cpus', fallback: {}) do |query| query_safely('gitlab_usage_ping:node_cpus:count', 'node_cpus', fallback: {}) do |query|
aggregate_by_instance(client, aggregate_one_week(query, aggregation: :max)) aggregate_by_instance(client, aggregate_one_week(query, aggregation: :max))
end end
end end
def topology_node_cpu_utilization(client)
query_safely('gitlab_usage_ping:node_cpu_utilization:avg', 'node_cpu_utilization', fallback: {}) do |query|
aggregate_by_instance(client, aggregate_one_week(query), transform_value: :to_f)
end
end
def topology_node_uname_info(client) def topology_node_uname_info(client)
node_uname_info = query_safely('node_uname_info', 'node_uname_info', fallback: []) do |query| node_uname_info = query_safely('node_uname_info', 'node_uname_info', fallback: []) do |query|
client.query(query) client.query(query)
...@@ -235,13 +251,13 @@ module Gitlab ...@@ -235,13 +251,13 @@ module Gitlab
"#{aggregation}_over_time (#{query}[1w])" "#{aggregation}_over_time (#{query}[1w])"
end end
def aggregate_by_instance(client, query) def aggregate_by_instance(client, query, transform_value: :to_i)
client.aggregate(query) { |metric| normalize_and_track_instance(metric['instance']) } client.aggregate(query, transform_value: transform_value) { |metric| normalize_and_track_instance(metric['instance']) }
end end
# Will retain a composite key that values are mapped to # Will retain a composite key that values are mapped to
def aggregate_by_labels(client, query) def aggregate_by_labels(client, query, transform_value: :to_i)
client.aggregate(query) do |metric| client.aggregate(query, transform_value: transform_value) do |metric|
metric['instance'] = normalize_and_track_instance(metric['instance']) metric['instance'] = normalize_and_track_instance(metric['instance'])
metric metric
end end
......
...@@ -24,7 +24,9 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -24,7 +24,9 @@ RSpec.describe Gitlab::UsageData::Topology do
expect_prometheus_api_to( expect_prometheus_api_to(
receive_app_request_volume_query, receive_app_request_volume_query,
receive_node_memory_query, receive_node_memory_query,
receive_node_memory_utilization_query,
receive_node_cpu_count_query, receive_node_cpu_count_query,
receive_node_cpu_utilization_query,
receive_node_uname_info_query, receive_node_uname_info_query,
receive_node_service_memory_rss_query, receive_node_service_memory_rss_query,
receive_node_service_memory_uss_query, receive_node_service_memory_uss_query,
...@@ -40,7 +42,9 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -40,7 +42,9 @@ RSpec.describe Gitlab::UsageData::Topology do
nodes: [ nodes: [
{ {
node_memory_total_bytes: 512, node_memory_total_bytes: 512,
node_memory_utilization: 0.45,
node_cpus: 8, node_cpus: 8,
node_cpu_utilization: 0.1,
node_uname_info: { node_uname_info: {
machine: 'x86_64', machine: 'x86_64',
sysname: 'Linux', sysname: 'Linux',
...@@ -64,7 +68,9 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -64,7 +68,9 @@ RSpec.describe Gitlab::UsageData::Topology do
}, },
{ {
node_memory_total_bytes: 1024, node_memory_total_bytes: 1024,
node_memory_utilization: 0.25,
node_cpus: 16, node_cpus: 16,
node_cpu_utilization: 0.2,
node_uname_info: { node_uname_info: {
machine: 'x86_64', machine: 'x86_64',
sysname: 'Linux', sysname: 'Linux',
...@@ -102,7 +108,9 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -102,7 +108,9 @@ RSpec.describe Gitlab::UsageData::Topology do
expect_prometheus_api_to( expect_prometheus_api_to(
receive_app_request_volume_query(result: []), receive_app_request_volume_query(result: []),
receive_node_memory_query(result: []), receive_node_memory_query(result: []),
receive_node_memory_utilization_query(result: []),
receive_node_cpu_count_query, receive_node_cpu_count_query,
receive_node_cpu_utilization_query,
receive_node_uname_info_query, receive_node_uname_info_query,
receive_node_service_memory_rss_query(result: []), receive_node_service_memory_rss_query(result: []),
receive_node_service_memory_uss_query(result: []), receive_node_service_memory_uss_query(result: []),
...@@ -116,6 +124,7 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -116,6 +124,7 @@ RSpec.describe Gitlab::UsageData::Topology do
failures: [ failures: [
{ 'app_requests' => 'empty_result' }, { 'app_requests' => 'empty_result' },
{ 'node_memory' => 'empty_result' }, { 'node_memory' => 'empty_result' },
{ 'node_memory_utilization' => 'empty_result' },
{ 'service_rss' => 'empty_result' }, { 'service_rss' => 'empty_result' },
{ 'service_uss' => 'empty_result' }, { 'service_uss' => 'empty_result' },
{ 'service_workers' => 'empty_result' } { 'service_workers' => 'empty_result' }
...@@ -123,6 +132,7 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -123,6 +132,7 @@ RSpec.describe Gitlab::UsageData::Topology do
nodes: [ nodes: [
{ {
node_cpus: 16, node_cpus: 16,
node_cpu_utilization: 0.2,
node_uname_info: { node_uname_info: {
machine: 'x86_64', machine: 'x86_64',
release: '4.15.0-101-generic', release: '4.15.0-101-generic',
...@@ -146,6 +156,7 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -146,6 +156,7 @@ RSpec.describe Gitlab::UsageData::Topology do
}, },
{ {
node_cpus: 8, node_cpus: 8,
node_cpu_utilization: 0.1,
node_uname_info: { node_uname_info: {
machine: 'x86_64', machine: 'x86_64',
release: '4.19.76-linuxkit', release: '4.19.76-linuxkit',
...@@ -178,6 +189,15 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -178,6 +189,15 @@ RSpec.describe Gitlab::UsageData::Topology do
] ]
end end
let(:node_memory_utilization_response) do
[
{
'metric' => { 'instance' => 'localhost:9100' },
'value' => [1000, '0.35']
}
]
end
let(:node_uname_info_response) do let(:node_uname_info_response) do
[ [
{ {
...@@ -226,7 +246,9 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -226,7 +246,9 @@ RSpec.describe Gitlab::UsageData::Topology do
expect_prometheus_api_to( expect_prometheus_api_to(
receive_app_request_volume_query(result: []), receive_app_request_volume_query(result: []),
receive_node_memory_query(result: node_memory_response), receive_node_memory_query(result: node_memory_response),
receive_node_memory_utilization_query(result: node_memory_utilization_response),
receive_node_cpu_count_query(result: []), receive_node_cpu_count_query(result: []),
receive_node_cpu_utilization_query(result: []),
receive_node_uname_info_query(result: node_uname_info_response), receive_node_uname_info_query(result: node_uname_info_response),
receive_node_service_memory_rss_query(result: service_memory_response), receive_node_service_memory_rss_query(result: service_memory_response),
receive_node_service_memory_uss_query(result: []), receive_node_service_memory_uss_query(result: []),
...@@ -240,6 +262,7 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -240,6 +262,7 @@ RSpec.describe Gitlab::UsageData::Topology do
failures: [ failures: [
{ 'app_requests' => 'empty_result' }, { 'app_requests' => 'empty_result' },
{ 'node_cpus' => 'empty_result' }, { 'node_cpus' => 'empty_result' },
{ 'node_cpu_utilization' => 'empty_result' },
{ 'service_uss' => 'empty_result' }, { 'service_uss' => 'empty_result' },
{ 'service_pss' => 'empty_result' }, { 'service_pss' => 'empty_result' },
{ 'service_process_count' => 'empty_result' }, { 'service_process_count' => 'empty_result' },
...@@ -248,6 +271,7 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -248,6 +271,7 @@ RSpec.describe Gitlab::UsageData::Topology do
nodes: [ nodes: [
{ {
node_memory_total_bytes: 512, node_memory_total_bytes: 512,
node_memory_utilization: 0.35,
node_uname_info: { node_uname_info: {
machine: 'x86_64', machine: 'x86_64',
sysname: 'Linux', sysname: 'Linux',
...@@ -286,7 +310,9 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -286,7 +310,9 @@ RSpec.describe Gitlab::UsageData::Topology do
expect_prometheus_api_to( expect_prometheus_api_to(
receive_app_request_volume_query(result: []), receive_app_request_volume_query(result: []),
receive_node_memory_query(result: []), receive_node_memory_query(result: []),
receive_node_memory_utilization_query(result: []),
receive_node_cpu_count_query(result: []), receive_node_cpu_count_query(result: []),
receive_node_cpu_utilization_query(result: []),
receive_node_uname_info_query(result: []), receive_node_uname_info_query(result: []),
receive_node_service_memory_rss_query, receive_node_service_memory_rss_query,
receive_node_service_memory_uss_query(result: []), receive_node_service_memory_uss_query(result: []),
...@@ -300,7 +326,9 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -300,7 +326,9 @@ RSpec.describe Gitlab::UsageData::Topology do
failures: [ failures: [
{ 'app_requests' => 'empty_result' }, { 'app_requests' => 'empty_result' },
{ 'node_memory' => 'empty_result' }, { 'node_memory' => 'empty_result' },
{ 'node_memory_utilization' => 'empty_result' },
{ 'node_cpus' => 'empty_result' }, { 'node_cpus' => 'empty_result' },
{ 'node_cpu_utilization' => 'empty_result' },
{ 'node_uname_info' => 'empty_result' }, { 'node_uname_info' => 'empty_result' },
{ 'service_uss' => 'empty_result' }, { 'service_uss' => 'empty_result' },
{ 'service_pss' => 'empty_result' }, { 'service_pss' => 'empty_result' },
...@@ -355,7 +383,9 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -355,7 +383,9 @@ RSpec.describe Gitlab::UsageData::Topology do
expect_prometheus_api_to( expect_prometheus_api_to(
receive_app_request_volume_query(result: []), receive_app_request_volume_query(result: []),
receive_node_memory_query(result: []), receive_node_memory_query(result: []),
receive_node_memory_utilization_query(result: []),
receive_node_cpu_count_query(result: []), receive_node_cpu_count_query(result: []),
receive_node_cpu_utilization_query(result: []),
receive_node_uname_info_query(result: []), receive_node_uname_info_query(result: []),
receive_node_service_memory_rss_query(result: []), receive_node_service_memory_rss_query(result: []),
receive_node_service_memory_uss_query(result: []), receive_node_service_memory_uss_query(result: []),
...@@ -382,7 +412,9 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -382,7 +412,9 @@ RSpec.describe Gitlab::UsageData::Topology do
failures: [ failures: [
{ 'app_requests' => 'Gitlab::PrometheusClient::ConnectionError' }, { 'app_requests' => 'Gitlab::PrometheusClient::ConnectionError' },
{ 'node_memory' => 'Gitlab::PrometheusClient::ConnectionError' }, { 'node_memory' => 'Gitlab::PrometheusClient::ConnectionError' },
{ 'node_memory_utilization' => 'Gitlab::PrometheusClient::ConnectionError' },
{ 'node_cpus' => 'Gitlab::PrometheusClient::ConnectionError' }, { 'node_cpus' => 'Gitlab::PrometheusClient::ConnectionError' },
{ 'node_cpu_utilization' => 'Gitlab::PrometheusClient::ConnectionError' },
{ 'node_uname_info' => 'Gitlab::PrometheusClient::ConnectionError' }, { 'node_uname_info' => 'Gitlab::PrometheusClient::ConnectionError' },
{ 'service_rss' => 'Gitlab::PrometheusClient::ConnectionError' }, { 'service_rss' => 'Gitlab::PrometheusClient::ConnectionError' },
{ 'service_uss' => 'Gitlab::PrometheusClient::ConnectionError' }, { 'service_uss' => 'Gitlab::PrometheusClient::ConnectionError' },
...@@ -447,6 +479,21 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -447,6 +479,21 @@ RSpec.describe Gitlab::UsageData::Topology do
]) ])
end end
def receive_node_memory_utilization_query(result: nil)
receive(:query)
.with(/node_memory_utilization/, an_instance_of(Hash))
.and_return(result || [
{
'metric' => { 'instance' => 'instance1:8080' },
'value' => [1000, '0.45']
},
{
'metric' => { 'instance' => 'instance2:8090' },
'value' => [1000, '0.25']
}
])
end
def receive_node_cpu_count_query(result: nil) def receive_node_cpu_count_query(result: nil)
receive(:query) receive(:query)
.with(/node_cpus/, an_instance_of(Hash)) .with(/node_cpus/, an_instance_of(Hash))
...@@ -462,6 +509,21 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -462,6 +509,21 @@ RSpec.describe Gitlab::UsageData::Topology do
]) ])
end end
def receive_node_cpu_utilization_query(result: nil)
receive(:query)
.with(/node_cpu_utilization/, an_instance_of(Hash))
.and_return(result || [
{
'metric' => { 'instance' => 'instance2:8090' },
'value' => [1000, '0.2']
},
{
'metric' => { 'instance' => 'instance1:8080' },
'value' => [1000, '0.1']
}
])
end
def receive_node_uname_info_query(result: nil) def receive_node_uname_info_query(result: nil)
receive(:query) receive(:query)
.with('node_uname_info') .with('node_uname_info')
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment