Commit 4ce1ad96 authored by Reuben Pereira's avatar Reuben Pereira Committed by Sean McGivern

Improve prometheus queries used in Pod health dashboard

- Aggregate by container where possible.
- Correct the disk I/O queries.
parent b9892e9e
...@@ -7,7 +7,7 @@ module Metrics ...@@ -7,7 +7,7 @@ module Metrics
DASHBOARD_NAME = N_('K8s pod health') DASHBOARD_NAME = N_('K8s pod health')
# SHA256 hash of dashboard content # SHA256 hash of dashboard content
DASHBOARD_VERSION = '0515db7a99078a2423b037f99251ba16bd163603c0a30229ae8aa7386e96421c' DASHBOARD_VERSION = '3a91b32f91b2dd3d90275333c0ea3630b3f3f37c4296ede5b5eef59bf523d66b'
SEQUENCE = [ SEQUENCE = [
STAGES::MetricEndpointInserter, STAGES::MetricEndpointInserter,
......
...@@ -15,55 +15,101 @@ panel_groups: ...@@ -15,55 +15,101 @@ panel_groups:
panels: panels:
- title: "CPU usage" - title: "CPU usage"
type: "line-chart" type: "line-chart"
y_label: "Cores per pod" y_label: "Cores per container"
metrics: metrics:
- id: pod_cpu_usage_seconds_total - id: pod_cpu_usage_seconds_total
query_range: 'rate(container_cpu_usage_seconds_total{pod="{{pod}}",container="POD"}[5m])' query_range: >-
sum(
rate(container_cpu_usage_seconds_total{pod="{{pod}}",container!="POD"}[5m])
)
by (container)
unit: "cores" unit: "cores"
label: pod label: container
- title: "CPU throttling"
type: "line-chart"
y_label: "Cores per container"
metrics:
- id: pod_cpu_cfs_throttle
query_range: >-
sum(
rate(container_cpu_cfs_throttled_seconds_total{pod="{{pod}}"}[5m])
)
by (container)
unit: "cores"
label: container
- group: Memory metrics - group: Memory metrics
panels: panels:
- title: "Memory usage working set" - title: "Memory usage working set"
type: "line-chart" type: "line-chart"
y_label: "Working set memory (MiB)" y_label: "Working set memory"
metrics: metrics:
- id: pod_memory_working_set - id: pod_memory_working_set
query_range: 'container_memory_working_set_bytes{pod="{{pod}}",container="POD"}/1024/1024' query_range: >-
unit: "MiB" sum(
label: pod container_memory_working_set_bytes{pod="{{pod}}",container!="POD"}
) by (container)
unit: "bytes"
label: container
- group: Network metrics - group: Network metrics
panels: panels:
- title: "Network Receive (In)" - title: "Network Receive (In)"
type: "line-chart" type: "line-chart"
y_label: "Received (KiB/sec)" y_label: "Received (bytes/sec)"
metrics: metrics:
- id: pod_network_receive - id: pod_network_receive
query_range: 'rate(container_network_receive_bytes_total{pod="{{pod}}",container="POD"}[5m])/1024' query_range: >-
unit: "KiB / sec" sum(
rate(
container_network_receive_bytes_total{pod="{{pod}}"}[5m]
)
) by (pod)
unit: "bytes"
label: pod label: pod
- title: "Network Transmit (Out)" - title: "Network Transmit (Out)"
type: "line-chart" type: "line-chart"
y_label: "Transmitted (KiB/sec)" y_label: "Transmitted (bytes/sec)"
metrics: metrics:
- id: pod_network_transmit - id: pod_network_transmit
query_range: 'rate(container_network_transmit_bytes_total{pod="{{pod}}",container="POD"}[5m])/1024' query_range: >-
unit: "KiB / sec" sum(
rate(
container_network_transmit_bytes_total{pod="{{pod}}"}[5m]
)
) by (pod)
unit: bytes
label: pod label: pod
- group: Disk metrics - group: Disk metrics
panels: panels:
- title: "Disk Reads" - title: "Disk Reads"
type: "line-chart" type: "line-chart"
y_label: "Disk reads (KiB/sec)" y_label: "Disk reads (bytes/sec)"
metrics: metrics:
- id: pod_disk_reads - id: pod_disk_reads
query_range: 'rate(container_fs_reads_bytes_total{container="POD",pod="{{pod}}"}[5m])/1024' query_range: >-
unit: "KiB / sec" sum(
label: pod rate(
container_fs_reads_bytes_total{pod="{{pod}}", container!="POD"}[5m]
)
) by (container,device)
unit: "bytes / sec"
label: "{{container}} {{device}}"
- title: "Disk Writes" - title: "Disk Writes"
type: "line-chart" type: "line-chart"
y_label: "Disk writes (KiB/sec)" y_label: "Disk writes (bytes/sec)"
metrics: metrics:
- id: pod_disk_writes - id: pod_disk_writes
query_range: 'rate(container_fs_writes_bytes_total{container="POD",pod="{{pod}}"}[5m])/1024' query_range: >-
unit: "KiB / sec" sum(
label: pod rate(
container_fs_writes_bytes_total{pod="{{pod}}", container!="POD"}[5m]
)
) by (container,device)
unit: "bytes / sec"
label: "{{container}} {{device}}"
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment