From b3788310647d557bd53d8b73d51eb4308645f065 Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Thu, 27 Jul 2023 22:35:12 +0200 Subject: extend promethues metrics scraping from standalone-kubelet --- .../dashboard-standalone-kubelet-overview.json | 1649 ++++++++++++++++++++ .../exporter/standalone-kubelet/tasks/main.yml | 10 + .../prometheus/server/defaults/main/main.yml | 2 + .../main/rules_standalone-kubelet__probes.yml | 3 + .../main/rules_standalone-kubelet__resource.yml | 3 + .../server/templates/jobs/standalone-kubelet.j2 | 14 + 6 files changed, 1681 insertions(+) create mode 100644 roles/monitoring/grafana/files/dashboard-standalone-kubelet-overview.json create mode 100644 roles/monitoring/prometheus/server/defaults/main/rules_standalone-kubelet__probes.yml create mode 100644 roles/monitoring/prometheus/server/defaults/main/rules_standalone-kubelet__resource.yml create mode 100644 roles/monitoring/prometheus/server/templates/jobs/standalone-kubelet.j2 (limited to 'roles/monitoring') diff --git a/roles/monitoring/grafana/files/dashboard-standalone-kubelet-overview.json b/roles/monitoring/grafana/files/dashboard-standalone-kubelet-overview.json new file mode 100644 index 00000000..50003a81 --- /dev/null +++ b/roles/monitoring/grafana/files/dashboard-standalone-kubelet-overview.json @@ -0,0 +1,1649 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "10.0.2" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "Kubelet and Cadvisor Metrics", + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": 16361, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 0, + "y": 0 + }, + "id": 2, + "links": [], + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum(up{job=\"standalone-kubelet\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Running Kubelets", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 3, + "y": 0 + }, + "id": 3, + "links": [], + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum(kubelet_desired_pods{job=\"standalone-kubelet\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Desired Pods", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 6, + "y": 0 + }, + "id": 26, + "links": [], + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum(kubelet_running_pods{job=\"standalone-kubelet\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Running Pods", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 9, + "y": 0 + }, + "id": 27, + "links": [], + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "builder", + "expr": "sum(kubelet_running_containers{job=\"standalone-kubelet\", instance=~\"$instance\", container_state=\"created\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Containers (created)", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 12, + "y": 0 + }, + "id": 4, + "links": [], + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "builder", + "expr": "sum(kubelet_running_containers{job=\"standalone-kubelet\", instance=~\"$instance\", container_state=\"running\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Containers (running)", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 15, + "y": 0 + }, + "id": 28, + "links": [], + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "builder", + "expr": "sum(kubelet_running_containers{job=\"standalone-kubelet\", instance=~\"$instance\", container_state=\"exited\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Containers (exited)", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 18, + "y": 0 + }, + "id": 6, + "links": [], + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum(volume_manager_total_volumes{job=\"standalone-kubelet\", instance=~\"$instance\", state=\"desired_state_of_world\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Desired Volume Count", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 21, + "y": 0 + }, + "id": 5, + "links": [], + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum(volume_manager_total_volumes{job=\"standalone-kubelet\", instance=~\"$instance\", state=\"actual_state_of_world\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Actual Volume Count", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "stepAfter", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 5 + }, + "id": 33, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "rate(node_cpu_usage_seconds_total{instance=~\"$instance\"}[$__rate_interval])", + "format": "time_series", + "hide": false, + "interval": "15s", + "intervalFactor": 1, + "legendFormat": "{{ pod }}", + "metric": "container_cpu", + "range": true, + "refId": "A", + "step": 10 + } + ], + "title": "Node CPU usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "stepAfter", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 5 + }, + "id": 30, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "node_memory_working_set_bytes{instance=~\"$instance\"}", + "format": "time_series", + "hide": false, + "interval": "15s", + "intervalFactor": 1, + "legendFormat": "{{ pod }}", + "metric": "container_cpu", + "range": true, + "refId": "A", + "step": 10 + } + ], + "title": "Node Memory usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "stepAfter", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 12 + }, + "id": 29, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "rate(pod_cpu_usage_seconds_total{instance=~\"$instance\", pod=~\"$pod\"}[$__rate_interval])", + "format": "time_series", + "hide": false, + "interval": "15s", + "intervalFactor": 1, + "legendFormat": "{{ pod }}", + "metric": "container_cpu", + "range": true, + "refId": "A", + "step": 10 + } + ], + "title": "Pod CPU usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "stepAfter", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 12 + }, + "id": 34, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "pod_memory_working_set_bytes{instance=~\"$instance\", pod=~\"$pod\"}", + "format": "time_series", + "hide": false, + "interval": "15s", + "intervalFactor": 1, + "legendFormat": "{{ pod }}", + "metric": "container_cpu", + "range": true, + "refId": "A", + "step": 10 + } + ], + "title": "Pod Memory usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "stepAfter", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 19 + }, + "id": 31, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "rate(container_cpu_usage_seconds_total{instance=~\"$instance\", pod=~\"$pod\", container=~\"$container\"}[$__rate_interval])", + "format": "time_series", + "hide": false, + "interval": "15s", + "intervalFactor": 1, + "legendFormat": "{{ pod }} / {{ container }}", + "metric": "container_cpu", + "range": true, + "refId": "A", + "step": 10 + } + ], + "title": "Container CPU usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "stepAfter", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 19 + }, + "id": 32, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "container_memory_working_set_bytes{instance=~\"$instance\", pod=~\"$pod\", container=~\"$container\"}", + "format": "time_series", + "hide": false, + "interval": "15s", + "intervalFactor": 1, + "legendFormat": "{{ pod }} / {{ container }}", + "metric": "container_cpu", + "range": true, + "refId": "A", + "step": 10 + } + ], + "title": "Container Memory usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 26 + }, + "id": 23, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "process_resident_memory_bytes{job=\"standalone-kubelet\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Kubelet Memory usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 26 + }, + "id": 24, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "rate(process_cpu_seconds_total{job=\"standalone-kubelet\",instance=~\"$instance\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Kubelet CPU usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 26 + }, + "id": 25, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "go_goroutines{job=\"standalone-kubelet\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Kubelet Goroutines", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 33 + }, + "id": 10, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{job=\"standalone-kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}/{{operation_type}}", + "range": true, + "refId": "A" + } + ], + "title": "Kubelet Operation duration 99th quantile", + "type": "timeseries" + } + ], + "refresh": "10s", + "schemaVersion": 38, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(up{job=\"standalone-kubelet\"},instance)", + "hide": 0, + "includeAll": true, + "label": "instance", + "multi": false, + "name": "instance", + "options": [], + "query": { + "query": "label_values(up{job=\"standalone-kubelet\"},instance)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": {}, + "definition": "label_values(pod_cpu_usage_seconds_total{instance=~\"$instance\"},pod)", + "hide": 0, + "includeAll": true, + "multi": false, + "name": "pod", + "options": [], + "query": { + "query": "label_values(pod_cpu_usage_seconds_total{instance=~\"$instance\"},pod)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": {}, + "definition": "label_values(container_cpu_usage_seconds_total{instance=~\"$instance\", pod=~\"$pod\"},container)", + "hide": 0, + "includeAll": true, + "multi": false, + "name": "container", + "options": [], + "query": { + "query": "label_values(container_cpu_usage_seconds_total{instance=~\"$instance\", pod=~\"$pod\"},container)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-24h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Standalone Kubelet Overview", + "uid": "3138fa155d5915769fbded898ac09fd9", + "version": 3, + "weekStart": "" +} \ No newline at end of file diff --git a/roles/monitoring/prometheus/exporter/standalone-kubelet/tasks/main.yml b/roles/monitoring/prometheus/exporter/standalone-kubelet/tasks/main.yml index e676f263..ffeb974f 100644 --- a/roles/monitoring/prometheus/exporter/standalone-kubelet/tasks/main.yml +++ b/roles/monitoring/prometheus/exporter/standalone-kubelet/tasks/main.yml @@ -7,5 +7,15 @@ proxy_ssl_certificate /etc/ssl/standalone-kubelet/client/crt.pem; proxy_ssl_certificate_key /etc/ssl/standalone-kubelet/client/key.pem; } + location = /standalone-kubelet/resource { + proxy_pass https://{{ kubernetes_standalone_address | default('127.0.0.1') }}:{{ kubernetes_standalone_port | default(10250) }}/metrics/resource; + proxy_ssl_certificate /etc/ssl/standalone-kubelet/client/crt.pem; + proxy_ssl_certificate_key /etc/ssl/standalone-kubelet/client/key.pem; + } + location = /standalone-kubelet/probes { + proxy_pass https://{{ kubernetes_standalone_address | default('127.0.0.1') }}:{{ kubernetes_standalone_port | default(10250) }}/metrics/probes; + proxy_ssl_certificate /etc/ssl/standalone-kubelet/client/crt.pem; + proxy_ssl_certificate_key /etc/ssl/standalone-kubelet/client/key.pem; + } dest: /etc/prometheus/exporter/standalone-kubelet.locations notify: reload nginx diff --git a/roles/monitoring/prometheus/server/defaults/main/main.yml b/roles/monitoring/prometheus/server/defaults/main/main.yml index 1e0dcf32..21430a3d 100644 --- a/roles/monitoring/prometheus/server/defaults/main/main.yml +++ b/roles/monitoring/prometheus/server/defaults/main/main.yml @@ -36,6 +36,8 @@ prometheus_server_rules: snmp: "{{ prometheus_server_rules_snmp + prometheus_server_rules_snmp_extra }}" snmp/probe: "{{ prometheus_server_rules_snmp__probe + prometheus_server_rules_snmp__probe_extra }}" standalone-kubelet: "{{ prometheus_server_rules_standalone_kubelet + prometheus_server_rules_standalone_kubelet_extra }}" + standalone-kubelet/resource: "{{ prometheus_server_rules_standalone_kubelet__resource + prometheus_server_rules_standalone_kubelet__resource_extra }}" + standalone-kubelet/probes: "{{ prometheus_server_rules_standalone_kubelet__probes + prometheus_server_rules_standalone_kubelet__probes_extra }}" modbus: "{{ prometheus_server_rules_modbus + prometheus_server_rules_modbus_extra }}" modbus/probe: "{{ prometheus_server_rules_modbus__probe + prometheus_server_rules_modbus__probe_extra }}" diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_standalone-kubelet__probes.yml b/roles/monitoring/prometheus/server/defaults/main/rules_standalone-kubelet__probes.yml new file mode 100644 index 00000000..a384045c --- /dev/null +++ b/roles/monitoring/prometheus/server/defaults/main/rules_standalone-kubelet__probes.yml @@ -0,0 +1,3 @@ +--- +prometheus_server_rules_standalone_kubelet__probes_extra: [] +prometheus_server_rules_standalone_kubelet__probes: [] diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_standalone-kubelet__resource.yml b/roles/monitoring/prometheus/server/defaults/main/rules_standalone-kubelet__resource.yml new file mode 100644 index 00000000..6894e16f --- /dev/null +++ b/roles/monitoring/prometheus/server/defaults/main/rules_standalone-kubelet__resource.yml @@ -0,0 +1,3 @@ +--- +prometheus_server_rules_standalone_kubelet__resource_extra: [] +prometheus_server_rules_standalone_kubelet__resource: [] diff --git a/roles/monitoring/prometheus/server/templates/jobs/standalone-kubelet.j2 b/roles/monitoring/prometheus/server/templates/jobs/standalone-kubelet.j2 new file mode 100644 index 00000000..eafd740a --- /dev/null +++ b/roles/monitoring/prometheus/server/templates/jobs/standalone-kubelet.j2 @@ -0,0 +1,14 @@ + - job_name: '{{ job }}' + metrics_path: /{{ job }} + scheme: https + tls_config: + ca_file: /etc/ssl/prometheus/ca-crt.pem + cert_file: /etc/ssl/prometheus/server/scrape-crt.pem + key_file: /etc/ssl/prometheus/server/scrape-key.pem + file_sd_configs: + - files: + - "/etc/prometheus/targets/{{ job }}/*.yml" + metric_relabel_configs: + - source_labels: [ "__name__" ] + regex: "(kubernetes_feature_enabled|apiserver_.*)" + action: drop -- cgit v1.2.3