Skip to content

Commit 0a5f952

Browse files
committed
fixup! fixup! Use kube-scheduler's metrics instead of kube-state-metrics
1 parent 758d714 commit 0a5f952

11 files changed

+100
-100
lines changed

alerts/resource_alerts.libsonnet

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
} +
3535
if $._config.showMultiCluster then {
3636
expr: |||
37-
sum(namespace_cpu:kube_pod_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) by (%(clusterLabel)s) - (sum(kube_node_status_allocatable{resource="cpu"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="cpu"}) by (%(clusterLabel)s)) > 0
37+
sum(namespace_cpu:kube_pod_resource_request:sum{%(ignoringOverprovisionedWorkloadSelector)s}) by (%(clusterLabel)s) - (sum(kube_node_status_allocatable{resource="cpu"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="cpu"}) by (%(clusterLabel)s)) > 0
3838
and
3939
(sum(kube_node_status_allocatable{resource="cpu"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="cpu"}) by (%(clusterLabel)s)) > 0
4040
||| % $._config,
@@ -43,7 +43,7 @@
4343
},
4444
} else {
4545
expr: |||
46-
sum(namespace_cpu:kube_pod_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - (sum(kube_node_status_allocatable{resource="cpu"}) - max(kube_node_status_allocatable{resource="cpu"})) > 0
46+
sum(namespace_cpu:kube_pod_resource_request:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - (sum(kube_node_status_allocatable{resource="cpu"}) - max(kube_node_status_allocatable{resource="cpu"})) > 0
4747
and
4848
(sum(kube_node_status_allocatable{resource="cpu"}) - max(kube_node_status_allocatable{resource="cpu"})) > 0
4949
||| % $._config,
@@ -63,7 +63,7 @@
6363
} +
6464
if $._config.showMultiCluster then {
6565
expr: |||
66-
sum(namespace_memory:kube_pod_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) by (%(clusterLabel)s) - (sum(kube_node_status_allocatable{resource="memory"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="memory"}) by (%(clusterLabel)s)) > 0
66+
sum(namespace_memory:kube_pod_resource_request:sum{%(ignoringOverprovisionedWorkloadSelector)s}) by (%(clusterLabel)s) - (sum(kube_node_status_allocatable{resource="memory"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="memory"}) by (%(clusterLabel)s)) > 0
6767
and
6868
(sum(kube_node_status_allocatable{resource="memory"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="memory"}) by (%(clusterLabel)s)) > 0
6969
||| % $._config,
@@ -73,7 +73,7 @@
7373
} else
7474
{
7575
expr: |||
76-
sum(namespace_memory:kube_pod_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - (sum(kube_node_status_allocatable{resource="memory"}) - max(kube_node_status_allocatable{resource="memory"})) > 0
76+
sum(namespace_memory:kube_pod_resource_request:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - (sum(kube_node_status_allocatable{resource="memory"}) - max(kube_node_status_allocatable{resource="memory"})) > 0
7777
and
7878
(sum(kube_node_status_allocatable{resource="memory"}) - max(kube_node_status_allocatable{resource="memory"})) > 0
7979
||| % $._config,

dashboards/resources/cluster.libsonnet

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -144,23 +144,23 @@ local template = grafana.template;
144144
)
145145
.addPanel(
146146
g.panel('CPU Requests Commitment') +
147-
g.statPanel('sum(namespace_cpu:kube_pod_resource_requests:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu",%(clusterLabel)s="$cluster"})' % $._config)
147+
g.statPanel('sum(namespace_cpu:kube_pod_resource_request:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu",%(clusterLabel)s="$cluster"})' % $._config)
148148
)
149149
.addPanel(
150150
g.panel('CPU Limits Commitment') +
151-
g.statPanel('sum(namespace_cpu:kube_pod_resource_limits:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu",%(clusterLabel)s="$cluster"})' % $._config)
151+
g.statPanel('sum(namespace_cpu:kube_pod_resource_limit:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu",%(clusterLabel)s="$cluster"})' % $._config)
152152
)
153153
.addPanel(
154154
g.panel('Memory Utilisation') +
155155
g.statPanel('1 - sum(:node_memory_MemAvailable_bytes:sum{%(clusterLabel)s="$cluster"}) / sum(node_memory_MemTotal_bytes{%(nodeExporterSelector)s,%(clusterLabel)s="$cluster"})' % $._config)
156156
)
157157
.addPanel(
158158
g.panel('Memory Requests Commitment') +
159-
g.statPanel('sum(namespace_memory:kube_pod_resource_requests:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="memory",%(clusterLabel)s="$cluster"})' % $._config)
159+
g.statPanel('sum(namespace_memory:kube_pod_resource_request:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="memory",%(clusterLabel)s="$cluster"})' % $._config)
160160
)
161161
.addPanel(
162162
g.panel('Memory Limits Commitment') +
163-
g.statPanel('sum(namespace_memory:kube_pod_resource_limits:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="memory",%(clusterLabel)s="$cluster"})' % $._config)
163+
g.statPanel('sum(namespace_memory:kube_pod_resource_limit:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="memory",%(clusterLabel)s="$cluster"})' % $._config)
164164
)
165165
)
166166
.addRow(
@@ -177,10 +177,10 @@ local template = grafana.template;
177177
g.panel('CPU Quota') +
178178
g.tablePanel(podWorkloadColumns + [
179179
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
180-
'sum(namespace_cpu:kube_pod_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
181-
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(namespace_cpu:kube_pod_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
182-
'sum(namespace_cpu:kube_pod_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
183-
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(namespace_cpu:kube_pod_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
180+
'sum(namespace_cpu:kube_pod_resource_request:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
181+
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(namespace_cpu:kube_pod_resource_request:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
182+
'sum(namespace_cpu:kube_pod_resource_limit:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
183+
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(namespace_cpu:kube_pod_resource_limit:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
184184
], tableStyles {
185185
'Value #C': { alias: 'CPU Usage' },
186186
'Value #D': { alias: 'CPU Requests' },
@@ -207,10 +207,10 @@ local template = grafana.template;
207207
g.tablePanel(podWorkloadColumns + [
208208
// Not using container_memory_usage_bytes here because that includes page cache
209209
'sum(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!=""}) by (namespace)' % $._config,
210-
'sum(namespace_memory:kube_pod_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
211-
'sum(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!=""}) by (namespace) / sum(namespace_memory:kube_pod_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
212-
'sum(namespace_memory:kube_pod_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
213-
'sum(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!=""}) by (namespace) / sum(namespace_memory:kube_pod_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
210+
'sum(namespace_memory:kube_pod_resource_request:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
211+
'sum(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!=""}) by (namespace) / sum(namespace_memory:kube_pod_resource_request:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
212+
'sum(namespace_memory:kube_pod_resource_limit:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
213+
'sum(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!=""}) by (namespace) / sum(namespace_memory:kube_pod_resource_limit:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
214214
], tableStyles {
215215
'Value #C': { alias: 'Memory Usage', unit: 'bytes' },
216216
'Value #D': { alias: 'Memory Requests', unit: 'bytes' },

dashboards/resources/multi-cluster.libsonnet

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -26,23 +26,23 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson
2626
)
2727
.addPanel(
2828
g.panel('CPU Requests Commitment') +
29-
g.statPanel('sum(kube_pod_resource_requests{%(kubeSchedulerSelector)s, resource="cpu"}) / sum(kube_node_status_allocatable{%(kubeSchedulerSelector)s, resource="cpu"})' % $._config)
29+
g.statPanel('sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="cpu"}) / sum(kube_node_status_allocatable{%(kubeSchedulerSelector)s, resource="cpu"})' % $._config)
3030
)
3131
.addPanel(
3232
g.panel('CPU Limits Commitment') +
33-
g.statPanel('sum(kube_pod_resource_limits{%(kubeSchedulerSelector)s, resource="cpu"}) / sum(kube_node_status_allocatable{%(kubeSchedulerSelector)s, resource="cpu"})' % $._config)
33+
g.statPanel('sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="cpu"}) / sum(kube_node_status_allocatable{%(kubeSchedulerSelector)s, resource="cpu"})' % $._config)
3434
)
3535
.addPanel(
3636
g.panel('Memory Utilisation') +
3737
g.statPanel('1 - sum(:node_memory_MemAvailable_bytes:sum) / sum(node_memory_MemTotal_bytes{%(nodeExporterSelector)s})' % $._config)
3838
)
3939
.addPanel(
4040
g.panel('Memory Requests Commitment') +
41-
g.statPanel('sum(kube_pod_resource_requests{%(kubeSchedulerSelector)s, resource="memory"}) / sum(kube_node_status_allocatable{%(kubeSchedulerSelector)s, resource="memory"})' % $._config)
41+
g.statPanel('sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="memory"}) / sum(kube_node_status_allocatable{%(kubeSchedulerSelector)s, resource="memory"})' % $._config)
4242
)
4343
.addPanel(
4444
g.panel('Memory Limits Commitment') +
45-
g.statPanel('sum(kube_pod_resource_limits{%(kubeSchedulerSelector)s, resource="memory"}) / sum(kube_node_status_allocatable{%(kubeSchedulerSelector)s, resource="memory"})' % $._config)
45+
g.statPanel('sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="memory"}) / sum(kube_node_status_allocatable{%(kubeSchedulerSelector)s, resource="memory"})' % $._config)
4646
)
4747
)
4848
.addRow(
@@ -59,10 +59,10 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson
5959
g.panel('CPU Quota') +
6060
g.tablePanel([
6161
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s)' % $._config,
62-
'sum(kube_pod_resource_requests{%(kubeSchedulerSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config,
63-
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s) / sum(kube_pod_resource_requests{%(kubeSchedulerSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config,
64-
'sum(kube_pod_resource_limits{%(kubeSchedulerSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config,
65-
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s) / sum(kube_pod_resource_limits{%(kubeSchedulerSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config,
62+
'sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config,
63+
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s) / sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config,
64+
'sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config,
65+
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s) / sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config,
6666
], tableStyles {
6767
'Value #A': { alias: 'CPU Usage' },
6868
'Value #B': { alias: 'CPU Requests' },
@@ -88,10 +88,10 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson
8888
g.tablePanel([
8989
// Not using container_memory_usage_bytes here because that includes page cache
9090
'sum(container_memory_rss{%(cadvisorSelector)s, container!=""}) by (%(clusterLabel)s)' % $._config,
91-
'sum(kube_pod_resource_requests{%(kubeSchedulerSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config,
92-
'sum(container_memory_rss{%(cadvisorSelector)s, container!=""}) by (%(clusterLabel)s) / sum(kube_pod_resource_requests{%(kubeSchedulerSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config,
93-
'sum(kube_pod_resource_limits{%(kubeSchedulerSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config,
94-
'sum(container_memory_rss{%(cadvisorSelector)s, container!=""}) by (%(clusterLabel)s) / sum(kube_pod_resource_limits{%(kubeSchedulerSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config,
91+
'sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config,
92+
'sum(container_memory_rss{%(cadvisorSelector)s, container!=""}) by (%(clusterLabel)s) / sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config,
93+
'sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config,
94+
'sum(container_memory_rss{%(cadvisorSelector)s, container!=""}) by (%(clusterLabel)s) / sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config,
9595
], tableStyles {
9696
'Value #A': { alias: 'Memory Usage', unit: 'bytes' },
9797
'Value #B': { alias: 'Memory Requests', unit: 'bytes' },

0 commit comments

Comments
 (0)