diff options
Diffstat (limited to 'roles/monitoring')
20 files changed, 2547 insertions, 6 deletions
diff --git a/roles/monitoring/grafana/files/dashboard-standalone-kubelet-overview.json b/roles/monitoring/grafana/files/dashboard-apps/standalone-kubelet-overview.json index 5ff9a338..5ff9a338 100644 --- a/roles/monitoring/grafana/files/dashboard-standalone-kubelet-overview.json +++ b/roles/monitoring/grafana/files/dashboard-apps/standalone-kubelet-overview.json diff --git a/roles/monitoring/grafana/files/dashboard-network-ups-tools.json b/roles/monitoring/grafana/files/dashboard-environment/network-ups-tools.json index 367e9b56..367e9b56 100644 --- a/roles/monitoring/grafana/files/dashboard-network-ups-tools.json +++ b/roles/monitoring/grafana/files/dashboard-environment/network-ups-tools.json diff --git a/roles/monitoring/grafana/files/dashboard-environment-sensors.json b/roles/monitoring/grafana/files/dashboard-environment/sensors.json index 78734478..78734478 100644 --- a/roles/monitoring/grafana/files/dashboard-environment-sensors.json +++ b/roles/monitoring/grafana/files/dashboard-environment/sensors.json diff --git a/roles/monitoring/grafana/files/dashboard-bind.json b/roles/monitoring/grafana/files/dashboard-net/bind.json index 1a18bf04..1a18bf04 100644 --- a/roles/monitoring/grafana/files/dashboard-bind.json +++ b/roles/monitoring/grafana/files/dashboard-net/bind.json diff --git a/roles/monitoring/grafana/files/dashboard-chrony.json b/roles/monitoring/grafana/files/dashboard-net/chrony.json index 2c093cf6..2c093cf6 100644 --- a/roles/monitoring/grafana/files/dashboard-chrony.json +++ b/roles/monitoring/grafana/files/dashboard-net/chrony.json diff --git a/roles/monitoring/grafana/files/dashboard-net/coredns.json b/roles/monitoring/grafana/files/dashboard-net/coredns.json new file mode 100644 index 00000000..61776b09 --- /dev/null +++ b/roles/monitoring/grafana/files/dashboard-net/coredns.json @@ -0,0 +1,2463 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "panel", + "id": "gauge", + "name": "Gauge", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "11.1.0" + }, + { + "type": "panel", + "id": "piechart", + "name": "Pie chart", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "A dashboard for the CoreDNS DNS server with updated metrics for version 1.7.0+. Based on the CoreDNS 1.7.0+ dashboard by ejkinger", + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": 14981, + "graphTooltip": 0, + "id": null, + "links": [ + { + "$$hashKey": "object:94", + "icon": "external link", + "tags": [], + "targetBlank": true, + "title": "CoreDNS.io", + "type": "link", + "url": "https://coredns.io" + } + ], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 44, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "refId": "A" + } + ], + "title": "Global stats", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 39, + "options": { + "displayLabels": [ + "percent" + ], + "legend": { + "displayMode": "table", + "placement": "right", + "showLegend": true, + "values": [ + "value" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "sum" + ], + "fields": "", + "values": false + }, + "text": {}, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "7.5.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum(rate(coredns_dns_requests_total[5m])) by (instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 60 + } + ], + "title": "Requests (by instance)", + "type": "piechart" + }, + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 41, + "panels": [], + "repeat": "instance", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "refId": "A" + } + ], + "title": "Health: $instance", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "super-light-blue", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 3, + "x": 0, + "y": 12 + }, + "id": 42, + "maxPerRow": 2, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "name", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "coredns_build_info{instance=~\"$instance\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{version}}", + "refId": "A", + "step": 60 + } + ], + "title": "Version", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 3, + "x": 3, + "y": 12 + }, + "id": 35, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "sum" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum(rate(coredns_forward_healthcheck_broken_total{instance=~\"$instance\"}[5m]))", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Upstream Health Check Fails", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 3, + "x": 6, + "y": 12 + }, + "id": 36, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "sum" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum(rate(coredns_forward_max_concurrent_rejects_total{instance=~\"$instance\"}[5m]))", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Upstream Rejected Queries", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 3, + "x": 9, + "y": 12 + }, + "id": 81, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "sum" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum(rate(coredns_panics_total{instance=~\"$instance\"}[5m]))", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Panics", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 3, + "x": 12, + "y": 12 + }, + "id": 92, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "sum" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum(rate(coredns_reload_failed_total{instance=~\"$instance\"}[5m]))", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Failed Reloads", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 0.03, + "min": 0, + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 60 + }, + { + "color": "red", + "value": 85 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 15, + "y": 12 + }, + "id": 119, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto", + "text": {} + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(process_cpu_seconds_total{job=\"coredns\", instance=~\"$instance\"}[5m]))", + "interval": "", + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "CPU Time", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "super-light-blue", + "value": null + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 5, + "x": 19, + "y": 12 + }, + "id": 134, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "value", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "go_memstats_alloc_bytes{job=\"coredns\", instance=~\"$instance\"}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Memory Usage", + "type": "stat" + }, + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 26, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "refId": "A" + } + ], + "title": "Local", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 19 + }, + "id": 2, + "options": { + "graph": {}, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "7.5.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum(rate(coredns_dns_requests_total{instance=~\"$instance\"}[5m])) by (server)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{server}}", + "refId": "A", + "step": 60 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum(rate(coredns_cache_requests_total{instance=~\"$instance\"}[5m]))", + "hide": false, + "interval": "", + "legendFormat": "cache", + "refId": "B" + } + ], + "title": "Requests (total)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 19 + }, + "id": 6, + "options": { + "graph": {}, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "7.5.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum(rate(coredns_dns_requests_total{instance=~\"$instance\"}[5m])) by (zone)", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{zone}}", + "refId": "A", + "step": 60 + } + ], + "title": "Requests (by zone)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 26 + }, + "id": 32, + "options": { + "graph": {}, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "7.5.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_request_duration_seconds_bucket{instance=~\"$instance\", zone=\".\"}[5m])) by (le))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "99%", + "refId": "A", + "step": 40 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_request_duration_seconds_bucket{instance=~\"$instance\", zone=\".\"}[5m])) by (le))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "90%", + "refId": "B", + "step": 40 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_request_duration_seconds_bucket{instance=~\"$instance\", zone=\".\"}[5m])) by (le))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "50%", + "refId": "C", + "step": 40 + } + ], + "title": "Responses (latency, internet zone)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 26 + }, + "id": 4, + "options": { + "graph": {}, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "7.5.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum(rate(coredns_dns_requests_total{instance=~\"$instance\"}[5m])) by (type)", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{type}}", + "refId": "A", + "step": 60 + } + ], + "title": "Requests (by type)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 33 + }, + "id": 24, + "options": { + "graph": {}, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "7.5.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum(rate(coredns_cache_hits_total{instance=~\"$instance\", type=\"success\"}[5m])) / sum(rate(coredns_cache_requests_total{instance=~\"$instance\"}[5m]))", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "hits: success", + "refId": "A", + "step": 40 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum(rate(coredns_cache_hits_total{instance=~\"$instance\", type=\"denial\"}[5m])) / sum(rate(coredns_cache_requests_total{instance=~\"$instance\"}[5m]))", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "hits: denial", + "refId": "B", + "step": 40 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "(sum(rate(coredns_cache_requests_total{instance=~\"$instance\"}[5m])) - sum(rate(coredns_cache_hits_total{instance=~\"$instance\", type=\"success\"}[5m]))) / sum(rate(coredns_cache_requests_total{instance=~\"$instance\"}[5m]))", + "hide": false, + "interval": "", + "legendFormat": "misses", + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum(rate(coredns_dnssec_cache_hits_total{instance=~\"$instance\"}[5m])) / sum(rate(coredns_cache_requests_total{instance=~\"$instance\"}[5m]))", + "hide": false, + "interval": "", + "legendFormat": "hits: DNSSEC", + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "(sum(rate(coredns_cache_requests_total{instance=~\"$instance\"}[5m])) - sum(rate(coredns_dnssec_cache_hits_total{instance=~\"$instance\"}[5m]))) / sum(rate(coredns_cache_requests_total{instance=~\"$instance\"}[5m]))", + "hide": false, + "interval": "", + "legendFormat": "misses: DNSSEC", + "refId": "E" + } + ], + "title": "Cache (hitrate)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 33 + }, + "id": 8, + "options": { + "graph": {}, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "7.5.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum(rate(coredns_dns_do_requests_total{instance=~\"$instance\"}[5m])) by (zone)", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{zone}}", + "refId": "A", + "step": 40 + } + ], + "title": "Requests (DNSSEC by zone)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "decimals": 0, + "mappings": [], + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 40 + }, + "id": 14, + "options": { + "displayLabels": [], + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "values": [ + "value", + "percent" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "sum" + ], + "fields": "", + "values": false + }, + "text": {}, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "7.5.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum(rate(coredns_dns_responses_total{instance=~\"$instance\"}[5m])) by (rcode)", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{rcode}}", + "refId": "A", + "step": 40 + } + ], + "title": "Responses (by code)", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 40 + }, + "id": 18, + "options": { + "graph": {}, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "7.5.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~\"$instance\", zone=\".\"}[5m])) by (le))", + "interval": "", + "intervalFactor": 2, + "legendFormat": "99%", + "refId": "A", + "step": 40 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~\"$instance\", zone=\".\"}[5m])) by (le))", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "90%", + "refId": "B", + "step": 40 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~\"$instance\", zone=\".\"}[5m])) by (le))", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "50%", + "metric": "", + "refId": "C", + "step": 40 + } + ], + "title": "Requests (size, internet zone)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 40 + }, + "id": 33, + "options": { + "graph": {}, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "7.5.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~\"$instance\", zone=\".\"}[5m])) by (le))", + "interval": "", + "intervalFactor": 2, + "legendFormat": "99%", + "refId": "A", + "step": 40 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~\"$instance\", zone=\".\"}[5m])) by (le))", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "90%", + "refId": "B", + "step": 40 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~\"$instance\", zone=\".\"}[5m])) by (le))", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "50%", + "metric": "", + "refId": "C", + "step": 40 + } + ], + "title": "Responses (size, internet zone)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 47 + }, + "id": 22, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum(coredns_cache_entries{instance=~\"$instance\"}) by (type)", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{type}}", + "refId": "A", + "step": 40 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum(coredns_dnssec_cache_entries{instance=~\"$instance\"})", + "hide": false, + "interval": "", + "legendFormat": "DNSSEC", + "refId": "B" + } + ], + "title": "Cache (size)", + "type": "stat" + }, + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 54 + }, + "id": 63, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "refId": "A" + } + ], + "title": "Upstream", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 55 + }, + "id": 72, + "options": { + "graph": {}, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "7.5.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum(rate(coredns_forward_requests_total{instance=~\"$instance\"}[5m]))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "upstream", + "refId": "A", + "step": 60 + } + ], + "title": "Requests (total)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 55 + }, + "id": 38, + "maxPerRow": 6, + "options": { + "graph": {}, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "7.5.6", + "repeatDirection": "h", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum(rate(coredns_forward_conn_cache_hits_total{instance=~\"$instance\"}[5m])) / sum(rate(coredns_forward_requests_total{instance=~\"$instance\"}[5m]))", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "hits", + "refId": "A", + "step": 40 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "(sum(rate(coredns_forward_requests_total{instance=~\"$instance\"}[5m])) - sum(rate(coredns_forward_conn_cache_hits_total{instance=~\"$instance\"}[5m]))) / sum(rate(coredns_forward_requests_total{instance=~\"$instance\"}[5m]))", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "misses", + "refId": "B", + "step": 40 + } + ], + "title": "Cache (hitrate)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 62 + }, + "id": 37, + "options": { + "graph": {}, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "7.5.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(coredns_forward_request_duration_seconds_bucket{instance=~\"$instance\"}[5m])) by (le))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "99%", + "refId": "A", + "step": 40 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "histogram_quantile(0.90, sum(rate(coredns_forward_request_duration_seconds_bucket{instance=~\"$instance\"}[5m])) by (le))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "90%", + "refId": "B", + "step": 40 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "histogram_quantile(0.50, sum(rate(coredns_forward_request_duration_seconds_bucket{instance=~\"$instance\"}[5m])) by (le))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "50%", + "refId": "C", + "step": 40 + } + ], + "title": "Responses (latency)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "decimals": 0, + "mappings": [], + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 62 + }, + "id": 105, + "options": { + "displayLabels": [ + "percent" + ], + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "values": [ + "value" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "sum" + ], + "fields": "", + "values": false + }, + "text": {}, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "7.5.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum(rate(coredns_forward_requests_total{instance=~\"$instance\"}[5m])) by (to)", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{to}}", + "refId": "A", + "step": 40 + } + ], + "title": "Requests (by upstream)", + "transformations": [ + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "Time", + "1.0.0.1:853", + "1.1.1.1:853", + "8.8.4.4:853", + "8.8.8.8:853" + ] + } + } + } + ], + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "decimals": 0, + "mappings": [], + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 62 + }, + "id": 53, + "options": { + "displayLabels": [], + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "values": [ + "value", + "percent" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "sum" + ], + "fields": "", + "values": false + }, + "text": {}, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "7.5.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum(rate(coredns_forward_responses_total{instance=~\"$instance\"}[5m])) by (rcode)", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{rcode}}", + "refId": "A", + "step": 40 + } + ], + "title": "Responses (by code)", + "type": "piechart" + } + ], + "refresh": "5s", + "schemaVersion": 39, + "tags": [ + "dns", + "coredns" + ], + "templating": { + "list": [ + { + "allValue": ".*", + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(up{job=\"coredns\"}, instance)", + "hide": 0, + "includeAll": true, + "label": "Instance", + "multi": true, + "name": "instance", + "options": [], + "query": { + "query": "label_values(up{job=\"coredns\"}, instance)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 3, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "CoreDNS", + "uid": "wY4blRMGz", + "version": 1, + "weekStart": "" +}
\ No newline at end of file diff --git a/roles/monitoring/grafana/files/dashboard-mosquitto.json b/roles/monitoring/grafana/files/dashboard-net/mosquitto.json index 3c0ce190..3c0ce190 100644 --- a/roles/monitoring/grafana/files/dashboard-mosquitto.json +++ b/roles/monitoring/grafana/files/dashboard-net/mosquitto.json diff --git a/roles/monitoring/grafana/files/dashboard-smokeping.json b/roles/monitoring/grafana/files/dashboard-net/smokeping.json index ef26a1f9..ef26a1f9 100644 --- a/roles/monitoring/grafana/files/dashboard-smokeping.json +++ b/roles/monitoring/grafana/files/dashboard-net/smokeping.json diff --git a/roles/monitoring/grafana/files/dashboard-ipmi.json b/roles/monitoring/grafana/files/dashboard-sys/ipmi.json index 62920b61..62920b61 100644 --- a/roles/monitoring/grafana/files/dashboard-ipmi.json +++ b/roles/monitoring/grafana/files/dashboard-sys/ipmi.json diff --git a/roles/monitoring/grafana/files/dashboard-node-full.json b/roles/monitoring/grafana/files/dashboard-sys/node-full.json index e887f9b5..e887f9b5 100644 --- a/roles/monitoring/grafana/files/dashboard-node-full.json +++ b/roles/monitoring/grafana/files/dashboard-sys/node-full.json diff --git a/roles/monitoring/grafana/files/dashboard-openwrt.json b/roles/monitoring/grafana/files/dashboard-sys/openwrt.json index b3bf4cea..b3bf4cea 100644 --- a/roles/monitoring/grafana/files/dashboard-openwrt.json +++ b/roles/monitoring/grafana/files/dashboard-sys/openwrt.json diff --git a/roles/monitoring/grafana/tasks/main.yml b/roles/monitoring/grafana/tasks/main.yml index 5d6c9c17..240d8125 100644 --- a/roles/monitoring/grafana/tasks/main.yml +++ b/roles/monitoring/grafana/tasks/main.yml @@ -126,7 +126,7 @@ mode: 0750 - name: create sub-directories for dashboards - loop: "{{ grafana_dashboards | grafana_dashboard_subdirs }}" + loop: "{{ grafana_dashboards | grafana_dashboard_subdirs | unique }}" file: path: "/var/lib/grafana/dashboards/ansible/{{ item }}" state: directory diff --git a/roles/monitoring/prometheus/exporter/coredns/handlers/main.yml b/roles/monitoring/prometheus/exporter/coredns/handlers/main.yml new file mode 100644 index 00000000..d4e42ca0 --- /dev/null +++ b/roles/monitoring/prometheus/exporter/coredns/handlers/main.yml @@ -0,0 +1,5 @@ +--- +- name: reload nginx + service: + name: nginx + state: reloaded diff --git a/roles/monitoring/prometheus/exporter/coredns/tasks/main.yml b/roles/monitoring/prometheus/exporter/coredns/tasks/main.yml new file mode 100644 index 00000000..884eb4bf --- /dev/null +++ b/roles/monitoring/prometheus/exporter/coredns/tasks/main.yml @@ -0,0 +1,9 @@ +--- +- name: register exporter + copy: + content: | + location = /coredns { + proxy_pass http://127.0.0.1:9153/metrics; + } + dest: /etc/prometheus/exporter/coredns.locations + notify: reload nginx diff --git a/roles/monitoring/prometheus/exporter/meta/main.yml b/roles/monitoring/prometheus/exporter/meta/main.yml index b60d0dbc..0580861a 100644 --- a/roles/monitoring/prometheus/exporter/meta/main.yml +++ b/roles/monitoring/prometheus/exporter/meta/main.yml @@ -29,4 +29,6 @@ dependencies: when: "'nftables' in (prometheus_exporters_default | union(prometheus_exporters_extra))" - role: monitoring/prometheus/exporter/mosquitto when: "'mosquitto' in (prometheus_exporters_default | union(prometheus_exporters_extra))" + - role: monitoring/prometheus/exporter/coredns + when: "'coredns' in (prometheus_exporters_default | union(prometheus_exporters_extra))" - role: monitoring/prometheus/exporter/register diff --git a/roles/monitoring/prometheus/server/defaults/main/main.yml b/roles/monitoring/prometheus/server/defaults/main/main.yml index 9f867568..dd290e9e 100644 --- a/roles/monitoring/prometheus/server/defaults/main/main.yml +++ b/roles/monitoring/prometheus/server/defaults/main/main.yml @@ -15,7 +15,7 @@ prometheus_server_evaluation_interval: "15s" prometheus_server_jobs: - node -#prometheus_server_jobs_extra: | +#prometheus_server_jobs_extra: |- # - job_name: ... prometheus_server_rules: @@ -41,6 +41,7 @@ prometheus_server_rules: nftables: "{{ prometheus_server_rules_nftables + prometheus_server_rules_nftables_extra }}" whawty-nginx-sso: "{{ prometheus_server_rules_whawty_nginx_sso + prometheus_server_rules_whawty_nginx_sso_extra }}" mosquitto: "{{ prometheus_server_rules_mosquitto + prometheus_server_rules_mosquitto_extra }}" + coredns: "{{ prometheus_server_rules_coredns + prometheus_server_rules_coredns_extra }}" # prometheus_server_alertmanager: # url: "127.0.0.1:9093" @@ -48,6 +49,7 @@ prometheus_server_rules: # basic_auth: # username: server # password: geheim +# scrape_instance: <inventory-hostname> prometheus_server_web_listen_address: 127.0.0.1:9090 # prometheus_server_web_route_prefix: /prometheus/ @@ -73,5 +75,27 @@ prometheus_server_web_listen_address: 127.0.0.1:9090 # - node # - blackbox +prometheus_server_remote_write_receiver: no + +# prometheus_server_remote_write_destinations: +# example: +# url: "https://mon.example.com/prometheus/api/v1/write" +# basic_auth: +# username: remote +# password_file: /etc/prometheus/prometheus-remote.secret +# tls_config: +# ca: | +# -----BEGIN CERTIFICATE----- +# ... +# -----END CERTIFICATE----- +# write_relabel_configs: +# - source_labels: ['__name__'] +# regex: 'go_gc_.*' +# action: 'drop' +# - source_labels: ['job'] +# regex: 'alertmanager' +# action: 'drop' + # prometheus_server_secret_files: # user: secret +# remote: othersecret diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_coredns.yml b/roles/monitoring/prometheus/server/defaults/main/rules_coredns.yml new file mode 100644 index 00000000..126a7ba4 --- /dev/null +++ b/roles/monitoring/prometheus/server/defaults/main/rules_coredns.yml @@ -0,0 +1,29 @@ +--- +prometheus_server_rules_coredns_extra: [] +prometheus_server_rules_coredns: + - alert: CorednsPanicCount + expr: increase(coredns_panics_total[15m]) > 0 + for: 0m + labels: + severity: critical + annotations: + summary: CoreDNS Panic (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "Number of CoreDNS panics encountered has been increasing in the last 15 minutes\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + + - alert: CorednsFailedReloadCount + expr: increase(coredns_reload_failed_total[15m]) > 0 + for: 0m + labels: + severity: critical + annotations: + summary: CoreDNS reload failed (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "Number of CoreDNS failed reloads has been increasing in the last 15 minutes\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + + - alert: CorednsBrokenForwardHealthchecks + expr: increase(coredns_forward_healthcheck_broken_total[15m]) > 0 + for: 0m + labels: + severity: warning + annotations: + summary: CoreDNS broken forward healthchecks (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "Number of CoreDNS broken forward healthchecks has been increasing in the last 15 minutes\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_prometheus.yml b/roles/monitoring/prometheus/server/defaults/main/rules_prometheus.yml index 4db6cd17..5cb27264 100644 --- a/roles/monitoring/prometheus/server/defaults/main/rules_prometheus.yml +++ b/roles/monitoring/prometheus/server/defaults/main/rules_prometheus.yml @@ -93,13 +93,13 @@ prometheus_server_rules_prometheus: description: "Prometheus has no target in service discovery\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" - alert: PrometheusTargetScrapingSlow - expr: prometheus_target_interval_length_seconds{quantile="0.9"} > 60 + expr: prometheus_target_interval_length_seconds{quantile="0.9"} / on (interval, instance, job) prometheus_target_interval_length_seconds{quantile="0.5"} > 1.05 for: 5m labels: severity: warning annotations: summary: Prometheus target scraping slow (instance {{ '{{' }} $labels.instance {{ '}}' }}) - description: "Prometheus is scraping exporters slowly\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + description: "Prometheus is scraping exporters slowly since it exceeded the requested interval time\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" - alert: PrometheusLargeScrape expr: increase(prometheus_target_scrapes_exceeded_sample_limit_total[10m]) > 10 diff --git a/roles/monitoring/prometheus/server/templates/prometheus.service.j2 b/roles/monitoring/prometheus/server/templates/prometheus.service.j2 index e65e9425..86c30cbd 100644 --- a/roles/monitoring/prometheus/server/templates/prometheus.service.j2 +++ b/roles/monitoring/prometheus/server/templates/prometheus.service.j2 @@ -6,7 +6,7 @@ After=time-sync.target [Service] Restart=on-failure User=prometheus -ExecStart=/usr/bin/prometheus --config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/var/lib/prometheus/metrics2/ --storage.tsdb.retention.time={{ prometheus_server_retention }}{% if prometheus_server_web_external_url is defined %} --web.external-url={{ prometheus_server_web_external_url }}{% endif %}{% if prometheus_server_web_route_prefix is defined %} --web.route-prefix={{ prometheus_server_web_route_prefix }}{% endif %}{% if prometheus_server_auth_users is defined %} --web.config.file=/etc/prometheus/prometheus-web.yml{% endif %} --web.listen-address={{ prometheus_server_web_listen_address }} +ExecStart=/usr/bin/prometheus --config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/var/lib/prometheus/metrics2/ --storage.tsdb.retention.time={{ prometheus_server_retention }}{% if prometheus_server_web_external_url is defined %} --web.external-url={{ prometheus_server_web_external_url }}{% endif %}{% if prometheus_server_web_route_prefix is defined %} --web.route-prefix={{ prometheus_server_web_route_prefix }}{% endif %}{% if prometheus_server_auth_users is defined %} --web.config.file=/etc/prometheus/prometheus-web.yml{% endif %}{% if prometheus_server_remote_write_receiver %} --web.enable-remote-write-receiver{% endif %} --web.listen-address={{ prometheus_server_web_listen_address }} ExecReload=/bin/kill -HUP $MAINPID TimeoutStopSec=20s SendSIGKILL=no diff --git a/roles/monitoring/prometheus/server/templates/prometheus.yml.j2 b/roles/monitoring/prometheus/server/templates/prometheus.yml.j2 index 09d5452d..d72a4815 100644 --- a/roles/monitoring/prometheus/server/templates/prometheus.yml.j2 +++ b/roles/monitoring/prometheus/server/templates/prometheus.yml.j2 @@ -45,7 +45,7 @@ scrape_configs: - targets: ['localhost:9090'] labels: instance: '{{ inventory_hostname }}' -{% if prometheus_server_alertmanager is defined %} +{% if prometheus_server_alertmanager is defined and 'scrape_instance' in prometheus_server_alertmanager %} - job_name: 'alertmanager' {% if 'path_prefix' in prometheus_server_alertmanager %} @@ -58,6 +58,8 @@ scrape_configs: {% endif %} static_configs: - targets: ['{{ prometheus_server_alertmanager.url }}'] + labels: + instance: '{{ prometheus_server_alertmanager.scrape_instance }}' {% endif %} {% for job in (prometheus_server_jobs) %} @@ -94,3 +96,10 @@ scrape_configs: - targets: ['{{ config.url }}'] {% endfor %} {% endfor %} +{% if prometheus_server_remote_write_destinations is defined %} + +remote_write: +{% for name, config in prometheus_server_remote_write_destinations.items() %} + - {{ config | combine({'name': name }) | to_nice_yaml(indent=2) | indent(4) }} +{% endfor %} +{% endif %} |