summaryrefslogtreecommitdiff
path: root/roles/monitoring
diff options
context:
space:
mode:
authorChristian Pointner <equinox@spreadspace.org>2022-11-06 22:45:38 +0100
committerChristian Pointner <equinox@spreadspace.org>2022-11-06 22:45:38 +0100
commite290141439b60d387c3719dbb54efe3fcd7a41b4 (patch)
tree6835573cf67752d0681ccb41da0ecbaa7bfc2e4e /roles/monitoring
parentmonitoring/prometheus: add smokeping-prober (diff)
monitoring/prometheus: improve grafana dashboard for smokeping-prober
Diffstat (limited to 'roles/monitoring')
-rw-r--r--roles/monitoring/grafana/files/dashboard-smokeping.json320
-rw-r--r--roles/monitoring/prometheus/server/defaults/main/rules_smokeping.yml9
2 files changed, 226 insertions, 103 deletions
diff --git a/roles/monitoring/grafana/files/dashboard-smokeping.json b/roles/monitoring/grafana/files/dashboard-smokeping.json
index aab228d4..b6882813 100644
--- a/roles/monitoring/grafana/files/dashboard-smokeping.json
+++ b/roles/monitoring/grafana/files/dashboard-smokeping.json
@@ -9,12 +9,13 @@
"pluginName": "Prometheus"
}
],
+ "__elements": {},
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
- "version": "6.4.0"
+ "version": "9.2.3"
},
{
"type": "panel",
@@ -27,175 +28,288 @@
"id": "prometheus",
"name": "Prometheus",
"version": "1.0.0"
+ },
+ {
+ "type": "panel",
+ "id": "timeseries",
+ "name": "Time series",
+ "version": ""
}
],
"annotations": {
"list": [
{
"builtIn": 1,
- "datasource": "-- Grafana --",
+ "datasource": {
+ "type": "grafana",
+ "uid": "-- Grafana --"
+ },
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
+ "target": {
+ "limit": 100,
+ "matchAny": false,
+ "tags": [],
+ "type": "dashboard"
+ },
"type": "dashboard"
}
]
},
"editable": true,
- "gnetId": null,
+ "fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
- "iteration": 1573496641574,
"links": [],
+ "liveNow": false,
"panels": [
{
- "cards": {
- "cardPadding": null,
- "cardRound": null
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
},
- "color": {
- "cardColor": "#b4ff00",
- "colorScale": "sqrt",
- "colorScheme": "interpolateOranges",
- "exponent": 0.5,
- "mode": "opacity"
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "smooth",
+ "lineStyle": {
+ "fill": "solid"
+ },
+ "lineWidth": 2,
+ "pointSize": 6,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "min": 0,
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "#E24D42",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "percentunit"
+ },
+ "overrides": []
},
- "dataFormat": "tsbuckets",
- "datasource": "${DS_PROMETHEUS}",
"gridPos": {
- "h": 17,
+ "h": 9,
"w": 24,
"x": 0,
"y": 0
},
- "heatmap": {},
- "hideZeroBuckets": false,
- "highlightCards": true,
"id": 2,
- "legend": {
- "show": false
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "none"
+ }
},
- "links": [],
- "options": {},
- "reverseYBuckets": false,
"targets": [
{
- "expr": "sum(rate(smokeping_response_duration_seconds_bucket{instance=~\"$prober\",host=\"$target\"}[5m])) by (le)",
- "format": "heatmap",
- "intervalFactor": 1,
- "legendFormat": "{{le}}",
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "editorMode": "code",
+ "expr": "1- instance:smokeping_probe_success:ratio1m{instance=\"$prober\", host=~\"$target\"}",
+ "interval": "",
+ "legendFormat": "{{ ip }}",
+ "range": true,
"refId": "A"
}
],
- "title": "Smokeping",
- "tooltip": {
- "show": true,
- "showHistogram": false
+ "title": "Packet Loss",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
},
- "type": "heatmap",
- "xAxis": {
- "show": true
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "scaleDistribution": {
+ "type": "linear"
+ }
+ }
+ },
+ "overrides": []
},
- "xBucketNumber": null,
- "xBucketSize": null,
- "yAxis": {
- "decimals": 0,
- "format": "s",
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true,
- "splitFactor": null
+ "gridPos": {
+ "h": 11,
+ "w": 12,
+ "x": 0,
+ "y": 9
+ },
+ "id": 4,
+ "maxPerRow": 2,
+ "options": {
+ "calculate": false,
+ "cellGap": 1,
+ "color": {
+ "exponent": 1,
+ "fill": "green",
+ "mode": "scheme",
+ "reverse": false,
+ "scale": "exponential",
+ "scheme": "Inferno",
+ "steps": 64
+ },
+ "exemplars": {
+ "color": "rgba(255,0,255,0.7)"
+ },
+ "filterValues": {
+ "le": 1e-9
+ },
+ "legend": {
+ "show": false
+ },
+ "rowsFrame": {
+ "layout": "auto"
+ },
+ "tooltip": {
+ "show": true,
+ "yHistogram": false
+ },
+ "yAxis": {
+ "axisPlacement": "left",
+ "reverse": false,
+ "unit": "s"
+ }
},
- "yBucketBound": "auto",
- "yBucketNumber": null,
- "yBucketSize": null
+ "pluginVersion": "9.2.3",
+ "repeat": "target",
+ "repeatDirection": "h",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum (rate(smokeping_response_duration_seconds_bucket{instance=\"$prober\", host=~\"$target\"}[$__rate_interval])) by (le)",
+ "format": "heatmap",
+ "instant": false,
+ "interval": "",
+ "legendFormat": "{{le}}",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "$target",
+ "type": "heatmap"
}
],
- "refresh": "30s",
- "schemaVersion": 20,
+ "refresh": false,
+ "schemaVersion": 37,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
- "allValue": null,
"current": {},
- "datasource": "${DS_PROMETHEUS}",
- "definition": "",
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "definition": "label_values(smokeping_prober_build_info, instance)",
"hide": 0,
"includeAll": false,
- "label": null,
- "multi": true,
+ "multi": false,
"name": "prober",
"options": [],
- "query": "label_values(smokeping_prober_build_info, instance)",
+ "query": {
+ "query": "label_values(smokeping_prober_build_info, instance)",
+ "refId": "StandardVariableQuery"
+ },
"refresh": 1,
"regex": "",
"skipUrlSync": false,
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
+ "sort": 0,
+ "type": "query"
},
{
- "allValue": null,
"current": {},
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
"definition": "label_values(smokeping_response_duration_seconds_bucket, host)",
"hide": 0,
- "includeAll": false,
- "label": null,
- "multi": false,
+ "includeAll": true,
+ "multi": true,
"name": "target",
"options": [],
- "query": "label_values(smokeping_response_duration_seconds_bucket, host)",
+ "query": {
+ "query": "label_values(smokeping_response_duration_seconds_bucket, host)",
+ "refId": "StandardVariableQuery"
+ },
"refresh": 1,
"regex": "",
"skipUrlSync": false,
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
+ "sort": 0,
+ "type": "query"
}
]
},
"time": {
- "from": "now-1h",
+ "from": "now-6h",
"to": "now"
},
- "timepicker": {
- "refresh_intervals": [
- "5s",
- "10s",
- "30s",
- "1m",
- "5m",
- "15m",
- "30m",
- "1h",
- "2h",
- "1d"
- ],
- "time_options": [
- "5m",
- "15m",
- "1h",
- "6h",
- "12h",
- "24h",
- "2d",
- "7d",
- "30d"
- ]
- },
+ "timepicker": {},
"timezone": "",
"title": "Smokeping",
- "uid": "i5aRaLaik",
- "version": 11
-}
+ "uid": "kHGhzOv4z",
+ "version": 5,
+ "weekStart": ""
+} \ No newline at end of file
diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_smokeping.yml b/roles/monitoring/prometheus/server/defaults/main/rules_smokeping.yml
index 5bab05f1..6dfcd2f9 100644
--- a/roles/monitoring/prometheus/server/defaults/main/rules_smokeping.yml
+++ b/roles/monitoring/prometheus/server/defaults/main/rules_smokeping.yml
@@ -1,6 +1,15 @@
---
prometheus_server_rules_smokeping_extra: []
prometheus_server_rules_smokeping:
+ - record: instance:smokeping_probe_success:ratio1m
+ expr: increase(smokeping_response_duration_seconds_count[1m]) / increase(smokeping_requests_total[1m])
+ - record: instance:smokeping_response_duration_seconds:q50
+ expr: histogram_quantile(0.50, rate(smokeping_response_duration_seconds_bucket[1m]))
+ - record: instance:smokeping_response_duration_seconds:q90
+ expr: histogram_quantile(0.90, rate(smokeping_response_duration_seconds_bucket[1m]))
+ - record: instance:smokeping_response_duration_seconds:q99
+ expr: histogram_quantile(0.99, rate(smokeping_response_duration_seconds_bucket[1m]))
+
- alert: SmokepingProberConfigReloadFailure
expr: smokeping_prober_config_last_reload_successful != 1
for: 0m