Commit 4ce1ad96 authored by Reuben Pereira's avatar Reuben Pereira Committed by Sean McGivern

Improve prometheus queries used in Pod health dashboard

- Aggregate by container where possible.
- Correct the disk I/O queries.
parent b9892e9e
......@@ -7,7 +7,7 @@ module Metrics
DASHBOARD_NAME = N_('K8s pod health')
# SHA256 hash of dashboard content
DASHBOARD_VERSION = '0515db7a99078a2423b037f99251ba16bd163603c0a30229ae8aa7386e96421c'
DASHBOARD_VERSION = '3a91b32f91b2dd3d90275333c0ea3630b3f3f37c4296ede5b5eef59bf523d66b'
SEQUENCE = [
STAGES::MetricEndpointInserter,
......
......@@ -15,55 +15,101 @@ panel_groups:
panels:
- title: "CPU usage"
type: "line-chart"
y_label: "Cores per pod"
y_label: "Cores per container"
metrics:
- id: pod_cpu_usage_seconds_total
query_range: 'rate(container_cpu_usage_seconds_total{pod="{{pod}}",container="POD"}[5m])'
query_range: >-
sum(
rate(container_cpu_usage_seconds_total{pod="{{pod}}",container!="POD"}[5m])
)
by (container)
unit: "cores"
label: pod
label: container
- title: "CPU throttling"
type: "line-chart"
y_label: "Cores per container"
metrics:
- id: pod_cpu_cfs_throttle
query_range: >-
sum(
rate(container_cpu_cfs_throttled_seconds_total{pod="{{pod}}"}[5m])
)
by (container)
unit: "cores"
label: container
- group: Memory metrics
panels:
- title: "Memory usage working set"
type: "line-chart"
y_label: "Working set memory (MiB)"
y_label: "Working set memory"
metrics:
- id: pod_memory_working_set
query_range: 'container_memory_working_set_bytes{pod="{{pod}}",container="POD"}/1024/1024'
unit: "MiB"
label: pod
query_range: >-
sum(
container_memory_working_set_bytes{pod="{{pod}}",container!="POD"}
) by (container)
unit: "bytes"
label: container
- group: Network metrics
panels:
- title: "Network Receive (In)"
type: "line-chart"
y_label: "Received (KiB/sec)"
y_label: "Received (bytes/sec)"
metrics:
- id: pod_network_receive
query_range: 'rate(container_network_receive_bytes_total{pod="{{pod}}",container="POD"}[5m])/1024'
unit: "KiB / sec"
query_range: >-
sum(
rate(
container_network_receive_bytes_total{pod="{{pod}}"}[5m]
)
) by (pod)
unit: "bytes"
label: pod
- title: "Network Transmit (Out)"
type: "line-chart"
y_label: "Transmitted (KiB/sec)"
y_label: "Transmitted (bytes/sec)"
metrics:
- id: pod_network_transmit
query_range: 'rate(container_network_transmit_bytes_total{pod="{{pod}}",container="POD"}[5m])/1024'
unit: "KiB / sec"
query_range: >-
sum(
rate(
container_network_transmit_bytes_total{pod="{{pod}}"}[5m]
)
) by (pod)
unit: bytes
label: pod
- group: Disk metrics
panels:
- title: "Disk Reads"
type: "line-chart"
y_label: "Disk reads (KiB/sec)"
y_label: "Disk reads (bytes/sec)"
metrics:
- id: pod_disk_reads
query_range: 'rate(container_fs_reads_bytes_total{container="POD",pod="{{pod}}"}[5m])/1024'
unit: "KiB / sec"
label: pod
query_range: >-
sum(
rate(
container_fs_reads_bytes_total{pod="{{pod}}", container!="POD"}[5m]
)
) by (container,device)
unit: "bytes / sec"
label: "{{container}} {{device}}"
- title: "Disk Writes"
type: "line-chart"
y_label: "Disk writes (KiB/sec)"
y_label: "Disk writes (bytes/sec)"
metrics:
- id: pod_disk_writes
query_range: 'rate(container_fs_writes_bytes_total{container="POD",pod="{{pod}}"}[5m])/1024'
unit: "KiB / sec"
label: pod
query_range: >-
sum(
rate(
container_fs_writes_bytes_total{pod="{{pod}}", container!="POD"}[5m]
)
) by (container,device)
unit: "bytes / sec"
label: "{{container}} {{device}}"
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment