summaryrefslogtreecommitdiff
path: root/roles/monitoring
diff options
context:
space:
mode:
authorChristian Pointner <equinox@spreadspace.org>2023-05-26 21:34:14 +0200
committerChristian Pointner <equinox@spreadspace.org>2023-07-17 21:51:22 +0200
commit695131994b5a749e129fb304e8ba709acd37afe8 (patch)
tree02111746a1cd0cfc31c5736170b12aafadcb771e /roles/monitoring
parentmake textfile collector for apt packages configurable (diff)
add support for chrony_exporter (replaces textfile collector)
Diffstat (limited to 'roles/monitoring')
-rw-r--r--roles/monitoring/grafana/defaults/main.yml2
-rw-r--r--roles/monitoring/grafana/files/dashboard-chrony.json (renamed from roles/monitoring/grafana/files/dashboard-chronyd.json)758
-rw-r--r--roles/monitoring/prometheus/exporter/chrony/defaults/main.yml6
-rw-r--r--roles/monitoring/prometheus/exporter/chrony/handlers/main.yml15
-rw-r--r--roles/monitoring/prometheus/exporter/chrony/tasks/main.yml65
-rw-r--r--roles/monitoring/prometheus/exporter/chrony/templates/service.j231
-rw-r--r--roles/monitoring/prometheus/exporter/meta/main.yml2
-rw-r--r--roles/monitoring/prometheus/exporter/node/defaults/main.yml1
-rw-r--r--roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.j2138
-rw-r--r--roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.service.j233
-rw-r--r--roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.timer.j29
-rw-r--r--roles/monitoring/prometheus/server/defaults/main/main.yml1
-rw-r--r--roles/monitoring/prometheus/server/defaults/main/rules_chrony.yml14
13 files changed, 546 insertions, 529 deletions
diff --git a/roles/monitoring/grafana/defaults/main.yml b/roles/monitoring/grafana/defaults/main.yml
index 10eac947..8a113e2d 100644
--- a/roles/monitoring/grafana/defaults/main.yml
+++ b/roles/monitoring/grafana/defaults/main.yml
@@ -41,7 +41,7 @@ grafana_datasources: []
grafana_dashboards: []
# - file: node-full
# datasource: "Prometheus"
-# - file: chronyd
+# - file: chrony
# datasource: "Prometheus"
# - file: environment-sensors
# datasource: "Prometheus"
diff --git a/roles/monitoring/grafana/files/dashboard-chronyd.json b/roles/monitoring/grafana/files/dashboard-chrony.json
index 1a401ffa..d6ae5c4f 100644
--- a/roles/monitoring/grafana/files/dashboard-chronyd.json
+++ b/roles/monitoring/grafana/files/dashboard-chrony.json
@@ -9,12 +9,13 @@
"pluginName": "Prometheus"
}
],
+ "__elements": {},
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
- "version": "8.2.2"
+ "version": "10.0.2"
},
{
"type": "datasource",
@@ -30,12 +31,6 @@
},
{
"type": "panel",
- "id": "state-timeline",
- "name": "State timeline",
- "version": ""
- },
- {
- "type": "panel",
"id": "table",
"name": "Table",
"version": ""
@@ -51,7 +46,10 @@
"list": [
{
"builtIn": 1,
- "datasource": "-- Grafana --",
+ "datasource": {
+ "type": "datasource",
+ "uid": "grafana"
+ },
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
@@ -68,16 +66,17 @@
},
"editable": true,
"fiscalYearStartMonth": 0,
- "gnetId": null,
"graphTooltip": 1,
"id": null,
- "iteration": 1642434896568,
"links": [],
"liveNow": false,
"panels": [
{
"collapsed": false,
- "datasource": null,
+ "datasource": {
+ "type": "prometheus",
+ "uid": "PBFA97CFB590B2093"
+ },
"gridPos": {
"h": 1,
"w": 24,
@@ -86,11 +85,23 @@
},
"id": 11,
"panels": [],
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "PBFA97CFB590B2093"
+ },
+ "refId": "A"
+ }
+ ],
"title": "Tracking",
"type": "row"
},
{
- "datasource": null,
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
"description": "",
"fieldConfig": {
"defaults": {
@@ -132,24 +143,31 @@
"text": {},
"textMode": "name"
},
- "pluginVersion": "8.2.2",
- "repeat": null,
+ "pluginVersion": "10.0.2",
"targets": [
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "editorMode": "code",
"exemplar": false,
- "expr": "chronyd_tracking_source{instance=\"$instance\"}",
+ "expr": "chrony_tracking_info{instance=\"$instance\"}",
"format": "time_series",
"instant": true,
"interval": "",
- "legendFormat": "{{ value }}",
+ "legendFormat": "{{ tracking_address }}",
"refId": "A"
}
],
- "title": "Reference Source",
+ "title": "Reference Source Address",
"type": "stat"
},
{
- "datasource": null,
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
"description": "",
"fieldConfig": {
"defaults": {
@@ -199,11 +217,16 @@
"text": {},
"textMode": "value"
},
- "pluginVersion": "8.2.2",
+ "pluginVersion": "10.0.2",
"targets": [
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "editorMode": "code",
"exemplar": false,
- "expr": "chronyd_tracking_stratum{instance=\"$instance\"}",
+ "expr": "chrony_tracking_stratum{instance=\"$instance\"}",
"instant": true,
"interval": "",
"legendFormat": "",
@@ -214,14 +237,18 @@
"type": "stat"
},
{
- "datasource": null,
- "description": "",
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
@@ -256,60 +283,87 @@
{
"color": "green",
"value": null
+ },
+ {
+ "color": "red",
+ "value": 80
}
]
},
- "unit": "ppm"
+ "unit": "s"
},
"overrides": []
},
"gridPos": {
- "h": 8,
+ "h": 7,
"w": 12,
"x": 12,
"y": 1
},
- "id": 6,
+ "id": 8,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
- "placement": "bottom"
+ "placement": "bottom",
+ "showLegend": true
},
"tooltip": {
- "mode": "single"
+ "mode": "single",
+ "sort": "none"
}
},
"targets": [
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "editorMode": "code",
"exemplar": true,
- "expr": "chronyd_tracking_frequency_error{instance=\"$instance\"}",
+ "expr": "chrony_tracking_last_offset_seconds{instance=\"$instance\"}",
"interval": "",
- "legendFormat": "Frequency",
+ "legendFormat": "Last Offset",
+ "range": true,
"refId": "A"
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "editorMode": "code",
"exemplar": true,
- "expr": "chronyd_tracking_frequency_residual{instance=\"$instance\"}",
+ "expr": "chrony_tracking_rms_offset_seconds{instance=\"$instance\"}",
"hide": false,
"interval": "",
- "legendFormat": "Residual Frequency",
+ "legendFormat": "RMS Offset (long term average)",
+ "range": true,
"refId": "B"
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "editorMode": "code",
"exemplar": true,
- "expr": "chronyd_tracking_frequency_skew{instance=\"$instance\"}",
+ "expr": "chrony_tracking_system_time_seconds{instance=\"$instance\"}",
"hide": false,
"interval": "",
- "legendFormat": "Skew",
+ "legendFormat": "System Time",
+ "range": true,
"refId": "C"
}
],
- "title": "Frequency",
+ "title": "Offset",
"type": "timeseries"
},
{
- "datasource": null,
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
"description": "",
"fieldConfig": {
"defaults": {
@@ -351,28 +405,39 @@
"text": {},
"textMode": "name"
},
- "pluginVersion": "8.2.2",
+ "pluginVersion": "10.0.2",
"targets": [
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "editorMode": "code",
"exemplar": true,
- "expr": "chronyd_tracking_leap_status{instance=\"$instance\"}",
+ "expr": "chrony_tracking_info{instance=\"$instance\"}",
"instant": true,
"interval": "",
- "legendFormat": "{{ value }}",
+ "legendFormat": "{{ tracking_refid }}",
"refId": "A"
}
],
- "title": "Leap second status",
+ "title": "Reference Source ID",
"type": "stat"
},
{
- "datasource": null,
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "description": "Absolute bound on the computer’s clock accuracy (assuming the stratum-1 computer is correct)",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
@@ -407,10 +472,6 @@
{
"color": "green",
"value": null
- },
- {
- "color": "red",
- "value": 80
}
]
},
@@ -419,58 +480,57 @@
"overrides": []
},
"gridPos": {
- "h": 10,
+ "h": 8,
"w": 12,
"x": 0,
"y": 7
},
- "id": 8,
+ "id": 6,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
- "placement": "bottom"
+ "placement": "bottom",
+ "showLegend": true
},
"tooltip": {
- "mode": "single"
+ "mode": "single",
+ "sort": "none"
}
},
"targets": [
{
- "exemplar": true,
- "expr": "chronyd_tracking_last_offset{instance=\"$instance\"}",
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "instance:chrony_clock_error_seconds:abs{instance=\"$instance\"}",
+ "format": "time_series",
+ "instant": false,
"interval": "",
- "legendFormat": "Last Offset",
+ "legendFormat": "Clock Error",
+ "range": true,
"refId": "A"
- },
- {
- "exemplar": true,
- "expr": "chronyd_tracking_rms_offset{instance=\"$instance\"}",
- "hide": false,
- "interval": "",
- "legendFormat": "RMS Offse (long term average)",
- "refId": "B"
- },
- {
- "exemplar": true,
- "expr": "chronyd_tracking_system_time{instance=\"$instance\"}",
- "hide": false,
- "interval": "",
- "legendFormat": "System Time",
- "refId": "C"
}
],
- "title": "Offset",
+ "title": "Maximum Clock Error",
"type": "timeseries"
},
{
- "datasource": null,
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
@@ -517,36 +577,50 @@
"overrides": []
},
"gridPos": {
- "h": 8,
+ "h": 7,
"w": 12,
"x": 12,
- "y": 9
+ "y": 8
},
"id": 9,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
- "placement": "bottom"
+ "placement": "bottom",
+ "showLegend": true
},
"tooltip": {
- "mode": "single"
+ "mode": "single",
+ "sort": "none"
}
},
"targets": [
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "editorMode": "code",
"exemplar": true,
- "expr": "chronyd_tracking_root_delay{instance=\"$instance\"}",
+ "expr": "chrony_tracking_root_delay_seconds{instance=\"$instance\"}",
"interval": "",
"legendFormat": "Root delay",
+ "range": true,
"refId": "A"
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "editorMode": "code",
"exemplar": true,
- "expr": "chronyd_tracking_root_dispersion{instance=\"$instance\"}",
+ "expr": "chrony_tracking_root_dispersion_seconds{instance=\"$instance\"}",
"hide": false,
"interval": "",
"legendFormat": "Root dispersion",
+ "range": true,
"refId": "B"
}
],
@@ -555,20 +629,35 @@
},
{
"collapsed": false,
- "datasource": null,
+ "datasource": {
+ "type": "prometheus",
+ "uid": "PBFA97CFB590B2093"
+ },
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
- "y": 17
+ "y": 15
},
"id": 13,
"panels": [],
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "PBFA97CFB590B2093"
+ },
+ "refId": "A"
+ }
+ ],
"title": "Sources",
"type": "row"
},
{
- "datasource": null,
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
"description": "",
"fieldConfig": {
"defaults": {
@@ -577,211 +666,234 @@
},
"custom": {
"align": "auto",
- "displayMode": "color-background-solid",
- "filterable": false
+ "cellOptions": {
+ "mode": "basic",
+ "type": "color-background"
+ },
+ "filterable": false,
+ "inspect": false
},
- "mappings": [
- {
- "options": {
- "0": {
- "index": 0,
- "text": "may be in error"
- },
- "1": {
- "index": 1,
- "text": "unusable"
- },
- "2": {
- "index": 2,
- "text": "not combined"
- },
- "3": {
- "index": 3,
- "text": "combined"
- },
- "4": {
- "index": 4,
- "text": "current best"
- }
- },
- "type": "value"
- }
- ],
+ "mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "transparent",
"value": null
- },
- {
- "color": "red",
- "value": 0
- },
- {
- "color": "orange",
- "value": 1
- },
- {
- "color": "blue",
- "value": 2
- },
- {
- "color": "light-green",
- "value": 3
- },
- {
- "color": "green",
- "value": 4
}
]
},
"unit": "none"
},
- "overrides": []
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "State"
+ },
+ "properties": [
+ {
+ "id": "mappings",
+ "value": [
+ {
+ "options": {
+ "candidate": {
+ "color": "light-green",
+ "index": 4,
+ "text": "combined"
+ },
+ "falseticker": {
+ "color": "orange",
+ "index": 2,
+ "text": "falseticker"
+ },
+ "jittery": {
+ "color": "yellow",
+ "index": 3,
+ "text": "jittery"
+ },
+ "outlier": {
+ "color": "transparent",
+ "index": 5,
+ "text": "not combined"
+ },
+ "sync": {
+ "color": "green",
+ "index": 0,
+ "text": "syncronized"
+ },
+ "unreach": {
+ "color": "red",
+ "index": 1,
+ "text": "unreachable"
+ }
+ },
+ "type": "value"
+ }
+ ]
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Polling Interval"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "s"
+ }
+ ]
+ }
+ ]
},
"gridPos": {
- "h": 7,
- "w": 8,
+ "h": 8,
+ "w": 24,
"x": 0,
- "y": 18
+ "y": 16
},
"id": 19,
"options": {
+ "cellHeight": "sm",
+ "footer": {
+ "countRows": false,
+ "fields": "",
+ "reducer": [
+ "sum"
+ ],
+ "show": false
+ },
"frameIndex": 0,
- "showHeader": false
+ "showHeader": true,
+ "sortBy": []
},
- "pluginVersion": "8.2.2",
- "repeat": null,
+ "pluginVersion": "10.0.2",
"targets": [
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "editorMode": "builder",
"exemplar": false,
- "expr": "chronyd_peer_status{instance=\"$instance\"}",
+ "expr": "chrony_sources_state_info{instance=\"$instance\"}",
"format": "table",
"instant": true,
"interval": "",
- "legendFormat": "{{ remote }}",
+ "legendFormat": "__auto",
+ "range": false,
"refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "editorMode": "builder",
+ "exemplar": false,
+ "expr": "chrony_sources_stratum{instance=\"$instance\"}",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "B"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "editorMode": "builder",
+ "exemplar": false,
+ "expr": "chrony_sources_polling_interval_seconds{instance=\"$instance\"}",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "C"
}
],
"title": "Current Status",
"transformations": [
{
+ "id": "joinByField",
+ "options": {
+ "byField": "source_address",
+ "mode": "outer"
+ }
+ },
+ {
"id": "organize",
"options": {
"excludeByName": {
"Time": true,
+ "Time 2": true,
+ "Value #A": true,
+ "Value #B": false,
"__name__": true,
+ "__name__ 2": true,
"instance": true,
+ "instance 2": true,
"job": true,
- "mode": true,
- "stratum": true
+ "job 2": true,
+ "source_mode": true,
+ "source_name 2": true,
+ "source_name 3": true
+ },
+ "indexByName": {
+ "Time 1": 1,
+ "Time 2": 9,
+ "Time 3": 15,
+ "Value #A": 8,
+ "Value #B": 14,
+ "Value #C": 20,
+ "__name__ 1": 2,
+ "__name__ 2": 10,
+ "__name__ 3": 16,
+ "instance 1": 3,
+ "instance 2": 11,
+ "instance 3": 17,
+ "job 1": 4,
+ "job 2": 12,
+ "job 3": 18,
+ "source_address": 0,
+ "source_mode": 5,
+ "source_name 1": 6,
+ "source_name 2": 13,
+ "source_name 3": 19,
+ "source_state": 7
},
- "indexByName": {},
"renameByName": {
- "Time": ""
+ "Value #B": "Stratum",
+ "Value #C": "Polling Interval",
+ "source_address": "Address",
+ "source_name": "Name",
+ "source_name 1": "Name",
+ "source_state": "State"
}
}
- },
- {
- "id": "sortBy",
- "options": {
- "fields": {},
- "sort": [
- {
- "desc": true,
- "field": "Value"
- }
- ]
- }
}
],
"type": "table"
},
{
- "datasource": null,
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "thresholds"
- },
- "custom": {
- "fillOpacity": 74,
- "lineWidth": 0
- },
- "mappings": [
- {
- "options": {
- "0": {
- "index": 0,
- "text": "unreachable"
- },
- "1": {
- "index": 1,
- "text": "ok"
- }
- },
- "type": "value"
- }
- ],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "red",
- "value": null
- },
- {
- "color": "green",
- "value": 1
- }
- ]
- },
- "unit": "none"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 7,
- "w": 8,
- "x": 8,
- "y": 18
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
},
- "id": 16,
- "options": {
- "alignValue": "center",
- "legend": {
- "displayMode": "hidden",
- "placement": "bottom"
- },
- "mergeValues": true,
- "rowHeight": 0.5,
- "showValue": "never",
- "tooltip": {
- "mode": "single"
- }
- },
- "targets": [
- {
- "exemplar": false,
- "expr": "max without (stratum) (chronyd_peer_reachable{instance=\"$instance\"})",
- "instant": false,
- "interval": "",
- "legendFormat": "{{ remote }}",
- "refId": "A"
- }
- ],
- "title": "Reachability",
- "type": "state-timeline"
- },
- {
- "datasource": null,
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
@@ -830,122 +942,53 @@
"gridPos": {
"h": 7,
"w": 8,
- "x": 16,
- "y": 18
- },
- "id": 17,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single"
- }
- },
- "targets": [
- {
- "exemplar": true,
- "expr": "chronyd_offset_seconds{instance=\"$instance\"}",
- "interval": "",
- "legendFormat": "{{ remote }}",
- "refId": "A"
- }
- ],
- "title": "Offset",
- "type": "timeseries"
- },
- {
- "datasource": null,
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "smooth",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
- },
- "unit": "ppm"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 7,
- "w": 8,
"x": 0,
- "y": 25
+ "y": 24
},
- "id": 15,
+ "id": 18,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
- "placement": "bottom"
+ "placement": "bottom",
+ "showLegend": true
},
"tooltip": {
- "mode": "single"
+ "mode": "single",
+ "sort": "none"
}
},
"targets": [
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "editorMode": "builder",
"exemplar": true,
- "expr": "chronyd_freq_ppm{instance=\"$instance\"}",
+ "expr": "chrony_sources_last_sample_age_seconds{instance=\"$instance\"} < 4294967295",
"interval": "",
- "legendFormat": "{{ remote }}",
+ "legendFormat": "{{ source_address }} ({{ source_name }})",
+ "range": true,
"refId": "A"
}
],
- "title": "Frequency",
+ "title": "Last Sample: Age",
"type": "timeseries"
},
{
- "datasource": null,
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
@@ -987,7 +1030,7 @@
}
]
},
- "unit": "ppm"
+ "unit": "s"
},
"overrides": []
},
@@ -995,39 +1038,52 @@
"h": 7,
"w": 8,
"x": 8,
- "y": 25
+ "y": 24
},
- "id": 20,
+ "id": 21,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
- "placement": "bottom"
+ "placement": "bottom",
+ "showLegend": true
},
"tooltip": {
- "mode": "single"
+ "mode": "single",
+ "sort": "none"
}
},
"targets": [
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "editorMode": "code",
"exemplar": true,
- "expr": "chronyd_freq_skew_ppm{instance=\"$instance\"}",
+ "expr": "chrony_sources_last_sample_offset_seconds{instance=\"$instance\"}",
"interval": "",
- "legendFormat": "{{ remote }}",
+ "legendFormat": "{{ source_address }} ({{ source_name }})",
+ "range": true,
"refId": "A"
}
],
- "title": "Frequency Skew",
+ "title": "Last Sample: Offset",
"type": "timeseries"
},
{
- "datasource": null,
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
@@ -1077,54 +1133,61 @@
"h": 7,
"w": 8,
"x": 16,
- "y": 25
+ "y": 24
},
- "id": 18,
+ "id": 20,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
- "placement": "bottom"
+ "placement": "bottom",
+ "showLegend": true
},
"tooltip": {
- "mode": "single"
+ "mode": "single",
+ "sort": "none"
}
},
"targets": [
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "editorMode": "code",
"exemplar": true,
- "expr": "chronyd_std_dev_seconds{instance=\"$instance\"}",
+ "expr": "chrony_sources_last_sample_error_margin_seconds{instance=\"$instance\"}",
"interval": "",
- "legendFormat": "{{ remote }}",
+ "legendFormat": "{{ source_address }} ({{ source_name }})",
+ "range": true,
"refId": "A"
}
],
- "title": "Std. Dev.",
+ "title": "Last Sample: Error Margin",
"type": "timeseries"
}
],
"refresh": "30s",
- "schemaVersion": 31,
+ "schemaVersion": 38,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
- "allValue": null,
"current": {},
- "datasource": "${DS_PROMETHEUS}",
- "definition": "label_values(chronyd_tracking_source, instance)",
- "description": null,
- "error": null,
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "definition": "label_values(chrony_up,instance)",
"hide": 0,
"includeAll": false,
- "label": null,
"multi": false,
"name": "instance",
"options": [],
"query": {
- "query": "label_values(chronyd_tracking_source, instance)",
- "refId": "StandardVariableQuery"
+ "query": "label_values(chrony_up,instance)",
+ "refId": "PrometheusVariableQueryEditor-VariableQuery"
},
"refresh": 1,
"regex": "",
@@ -1140,7 +1203,8 @@
},
"timepicker": {},
"timezone": "",
- "title": "chronyd",
- "uid": "eIO_Uyd7k",
- "version": 3
-}
+ "title": "chrony",
+ "uid": "c252be85-471b-41c9-b1cb-f0e1b8206593",
+ "version": 3,
+ "weekStart": ""
+} \ No newline at end of file
diff --git a/roles/monitoring/prometheus/exporter/chrony/defaults/main.yml b/roles/monitoring/prometheus/exporter/chrony/defaults/main.yml
new file mode 100644
index 00000000..699ed580
--- /dev/null
+++ b/roles/monitoring/prometheus/exporter/chrony/defaults/main.yml
@@ -0,0 +1,6 @@
+---
+# prometheus_exporter_chrony_version:
+
+prometheus_exporter_chrony_enable_collectors:
+ - sources
+ - tracking
diff --git a/roles/monitoring/prometheus/exporter/chrony/handlers/main.yml b/roles/monitoring/prometheus/exporter/chrony/handlers/main.yml
new file mode 100644
index 00000000..0c940ca9
--- /dev/null
+++ b/roles/monitoring/prometheus/exporter/chrony/handlers/main.yml
@@ -0,0 +1,15 @@
+---
+- name: restart prometheus-chrony-exporter
+ service:
+ name: prometheus-chrony-exporter
+ state: restarted
+
+- name: reload nginx
+ service:
+ name: nginx
+ state: reloaded
+
+### TODO: remove this once all hosts have been migrated
+- name: reload systemd
+ systemd:
+ daemon_reload: yes
diff --git a/roles/monitoring/prometheus/exporter/chrony/tasks/main.yml b/roles/monitoring/prometheus/exporter/chrony/tasks/main.yml
new file mode 100644
index 00000000..f15037ec
--- /dev/null
+++ b/roles/monitoring/prometheus/exporter/chrony/tasks/main.yml
@@ -0,0 +1,65 @@
+---
+- name: generate apt pin file for exporter-chrony package
+ when: prometheus_exporter_chrony_version is defined
+ copy:
+ dest: "/etc/apt/preferences.d/prom-exporter-chrony.pref"
+ content: |
+ Package: prom-exporter-chrony
+ Pin: version {{ prometheus_exporter_chrony_version }}-1
+ Pin-Priority: 1001
+
+- name: remove apt pin file for exporter-chrony package
+ when: prometheus_exporter_chrony_version is not defined
+ file:
+ path: "/etc/apt/preferences.d/prom-exporter-chrony.pref"
+ state: absent
+
+- name: install apt packages
+ apt:
+ name: "prom-exporter-chrony{% if prometheus_exporter_chrony_version is defined %}={{ prometheus_exporter_chrony_version }}-1{% endif %}"
+ state: present
+ allow_downgrade: yes
+ notify: restart prometheus-chrony-exporter
+
+- name: generate systemd service unit
+ template:
+ src: service.j2
+ dest: /etc/systemd/system/prometheus-chrony-exporter.service
+ notify: restart prometheus-chrony-exporter
+
+- name: make sure prometheus-chrony-exporter is enabled and started
+ systemd:
+ name: prometheus-chrony-exporter.service
+ daemon_reload: yes
+ state: started
+ enabled: yes
+
+- name: register exporter
+ copy:
+ content: |
+ location = /chrony {
+ proxy_pass http://127.0.0.1:9123/metrics;
+ }
+ dest: /etc/prometheus/exporter/chrony.locations
+ notify: reload nginx
+
+
+## TODO: remove these tasks once all hosts have been migrated
+- name: make sure the systemd timer for chrony textfile collector is disabled and stopped
+ systemd:
+ service: prometheus-node-exporter_chrony.timer
+ enabled: no
+ state: stopped
+ register: result_systemd_stop
+ failed_when: "result_systemd_stop is failed and 'Could not find the requested service' not in result_systemd_stop.msg"
+
+- name: remove files from chrony textfile collector
+ loop:
+ - /etc/systemd/system/prometheus-node-exporter_chrony.timer
+ - /etc/systemd/system/prometheus-node-exporter_chrony.service
+ - /usr/local/share/prometheus-node-exporter/chrony
+ - /var/lib/prometheus-node-exporter/textfile-collector/chrony.prom
+ file:
+ path: "{{ item }}"
+ state: absent
+ notify: reload systemd
diff --git a/roles/monitoring/prometheus/exporter/chrony/templates/service.j2 b/roles/monitoring/prometheus/exporter/chrony/templates/service.j2
new file mode 100644
index 00000000..cb806649
--- /dev/null
+++ b/roles/monitoring/prometheus/exporter/chrony/templates/service.j2
@@ -0,0 +1,31 @@
+[Unit]
+Description=Prometheus chrony exporter
+
+[Service]
+Restart=always
+User=_chrony
+ExecStart=/usr/bin/prometheus-chrony-exporter --web.listen-address="127.0.0.1:9123" --chrony.address=unix:///run/chrony/chronyd.sock {% for collector in prometheus_exporter_chrony_enable_collectors %} --collector.{{ collector }}{% endfor %}{{ '' }}
+
+# systemd hardening-options
+AmbientCapabilities=
+CapabilityBoundingSet=
+DeviceAllow=/dev/null rw
+DevicePolicy=strict
+LockPersonality=true
+MemoryDenyWriteExecute=true
+NoNewPrivileges=true
+PrivateDevices=true
+PrivateTmp=true
+ProtectControlGroups=true
+ProtectHome=true
+ProtectKernelModules=true
+ProtectKernelTunables=true
+ProtectSystem=strict
+ReadWritePaths=/run/chrony
+RemoveIPC=true
+RestrictNamespaces=true
+RestrictRealtime=true
+SystemCallArchitectures=native
+
+[Install]
+WantedBy=multi-user.target
diff --git a/roles/monitoring/prometheus/exporter/meta/main.yml b/roles/monitoring/prometheus/exporter/meta/main.yml
index 4a427770..10a251f4 100644
--- a/roles/monitoring/prometheus/exporter/meta/main.yml
+++ b/roles/monitoring/prometheus/exporter/meta/main.yml
@@ -23,4 +23,6 @@ dependencies:
when: "'standalone-kubelet' in (prometheus_exporters_default | union(prometheus_exporters_extra))"
- role: monitoring/prometheus/exporter/modbus
when: "'modbus' in (prometheus_exporters_default | union(prometheus_exporters_extra))"
+ - role: monitoring/prometheus/exporter/chrony
+ when: "'chrony' in (prometheus_exporters_default | union(prometheus_exporters_extra))"
- role: monitoring/prometheus/exporter/register
diff --git a/roles/monitoring/prometheus/exporter/node/defaults/main.yml b/roles/monitoring/prometheus/exporter/node/defaults/main.yml
index 3b961a4f..ab4cee38 100644
--- a/roles/monitoring/prometheus/exporter/node/defaults/main.yml
+++ b/roles/monitoring/prometheus/exporter/node/defaults/main.yml
@@ -18,7 +18,6 @@ prometheus_exporter_node_install_apt_textfile_collector_script: "{{ ansible_pkg_
prometheus_exporter_node_textfile_collector_scripts:
- deleted-libraries
# - smartmon
-# - chrony
# - sensors
# prometheus_exporter_node_textfile_collector__sensors:
diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.j2
deleted file mode 100644
index 95c6a5d3..00000000
--- a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.j2
+++ /dev/null
@@ -1,138 +0,0 @@
-#!/usr/bin/env {{ python_basename }}
-#
-# Description: Extract chronyd metrics from chronyc -c.
-# Author: Aanchal Malhotra <aanch...@bu.edu>
-#
-# Works with chrony version 2.4 and higher
-#
-# this is from: https://www.mail-archive.com/chrony-users@chrony.tuxfamily.org/msg02179.html
-
-import subprocess
-import sys
-
-chrony_sourcestats_cmd = ['chronyc', '-n', '-c', 'sourcestats']
-chrony_source_cmd = ['chronyc', '-n', '-c', 'sources']
-chrony_tracking_cmd = ['chronyc', '-n', '-c', 'tracking']
-
-metrics_fields = [
- "Name/IP Address",
- "NP",
- "NR",
- "Span",
- "Frequency",
- "Freq Skew",
- "Offset",
- "Std Dev"]
-
-status_types = {'x': 0, '?': 1, '-': 2, '+': 3, '*': 4}
-
-metrics_source = {
- "*": "synchronized (system peer)",
- "+": "synchronized",
- "?": "unreachable",
- "x": "Falseticker",
- "-": "reference clock"}
-
-metrics_mode = {
- '^': "server",
- '=': "peer",
- "#": "reference clock"}
-
-
-def get_cmdoutput(command):
- proc = subprocess.Popen(command, stdout=subprocess.PIPE)
- out, err = proc.communicate()
- return_code = proc.poll()
- if return_code:
- raise RuntimeError('Call to "{}" returned error: \
- {}'.format(command, return_code))
- return out.decode("utf-8")
-
-
-def printPrometheusformat(metric, values):
- print("# HELP chronyd_%s chronyd metric for %s" % (metric, metric))
- print("# TYPE chronyd_%s gauge" % (metric))
- for labels in values:
- if labels is None:
- print("chronyd_%s %f" % (metric, values[labels]))
- else:
- print("chronyd_%s{{ '{%' }}s} %f" % (metric, labels, values[labels]))
-
-
-def printPrometheusscalar(metric, value):
- print("# HELP chronyd_%s chronyd metric for %s" % (metric, metric))
- print("# TYPE chronyd_%s gauge" % (metric))
- print("chronyd_%s %f" % (metric, value))
-
-
-def printPrometheusEnum(metric, name):
- print("# HELP chronyd_%s enum for %s" % (metric, metric))
- print("# TYPE chronyd_%s gauge" % (metric))
- print("chronyd_%s{value=\"%s\"} 1" % (metric, name))
-
-
-def weight(value):
- val_int = int(value, 8)
- return bin(val_int).count('1')/8.0
-
-
-def main(argv):
- peer_status_metrics = {}
- peer_reach_metrics = {}
- offset_metrics = {}
- freq_skew_metrics = {}
- freq_metrics = {}
- std_dev_metrics = {}
- chrony_sourcestats = get_cmdoutput(chrony_sourcestats_cmd)
- for line in chrony_sourcestats.split('\n'):
- if (len(line)) > 0:
- x = line.split(',')
- common_labels = "remote=\"%s\"" % (x[0])
- freq_metrics[common_labels] = float(x[4])
- freq_skew_metrics[common_labels] = float(x[5])
- std_dev_metrics[common_labels] = float(x[7])
-
- printPrometheusformat('freq_skew_ppm', freq_skew_metrics)
- printPrometheusformat('freq_ppm', freq_metrics)
- printPrometheusformat('std_dev_seconds', std_dev_metrics)
-
- chrony_source = get_cmdoutput(chrony_source_cmd)
- for line in chrony_source.split('\n'):
- if (len(line)) > 0:
- x = line.split(',')
- stratum = x[3]
- reach = x[5]
- mode = metrics_mode[x[0]]
- common_labels = "remote=\"%s\"" % (x[2])
- peer_labels = "%s,stratum=\"%s\",mode=\"%s\"" % (
- common_labels,
- stratum,
- mode,
- )
- peer_status_metrics[peer_labels] = float(status_types[x[1]])
- peer_reach_metrics[peer_labels] = weight(reach)
- offset_metrics[common_labels] = float(x[8])
-
- printPrometheusformat('peer_status', peer_status_metrics)
- printPrometheusformat('offset_seconds', offset_metrics)
- printPrometheusformat('peer_reachable', peer_reach_metrics)
-
- chrony_tracking_stats = get_cmdoutput(chrony_tracking_cmd).rstrip()
- fields = chrony_tracking_stats.split(",")
- printPrometheusEnum("tracking_source", fields[1])
- printPrometheusscalar("tracking_stratum", float(fields[2]))
- printPrometheusscalar("tracking_ref_time", float(fields[3]))
- printPrometheusscalar("tracking_system_time", float(fields[4]))
- printPrometheusscalar("tracking_last_offset", float(fields[5]))
- printPrometheusscalar("tracking_rms_offset", float(fields[6]))
- printPrometheusscalar("tracking_frequency_error", float(fields[7]))
- printPrometheusscalar("tracking_frequency_residual", float(fields[8]))
- printPrometheusscalar("tracking_frequency_skew", float(fields[9]))
- printPrometheusscalar("tracking_root_delay", float(fields[10]))
- printPrometheusscalar("tracking_root_dispersion", float(fields[11]))
- printPrometheusscalar("tracking_update_interval", float(fields[12]))
- printPrometheusEnum("tracking_leap_status", fields[13])
-
-
-if __name__ == "__main__":
- main(sys.argv[1:])
diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.service.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.service.j2
deleted file mode 100644
index 49b15185..00000000
--- a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.service.j2
+++ /dev/null
@@ -1,33 +0,0 @@
-[Unit]
-Description=Promethues node exporter textfile collector chrony
-
-[Service]
-Type=oneshot
-Environment=TMPDIR=/var/lib/prometheus-node-exporter/textfile-collector
-ExecStart=bash -o pipefail -c "/usr/local/share/prometheus-node-exporter/chrony | sponge /var/lib/prometheus-node-exporter/textfile-collector/chrony.prom"
-TimeoutStartSec=30s
-
-# systemd hardening-options
-AmbientCapabilities=CAP_DAC_OVERRIDE
-CapabilityBoundingSet=CAP_DAC_OVERRIDE
-DeviceAllow=/dev/null rw
-DevicePolicy=strict
-LockPersonality=true
-MemoryDenyWriteExecute=true
-NoNewPrivileges=true
-PrivateDevices=true
-PrivateTmp=true
-ProtectControlGroups=true
-ProtectHome=true
-ProtectKernelModules=true
-ProtectKernelTunables=true
-ProtectSystem=strict
-ReadWritePaths=/var/lib/prometheus-node-exporter/textfile-collector /var/run/chrony
-RemoveIPC=true
-RestrictNamespaces=true
-RestrictRealtime=true
-RestrictAddressFamilies=AF_UNIX
-SystemCallArchitectures=native
-
-[Install]
-WantedBy=multi-user.target
diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.timer.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.timer.j2
deleted file mode 100644
index eecc70e2..00000000
--- a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.timer.j2
+++ /dev/null
@@ -1,9 +0,0 @@
-[Unit]
-Description=Promethues node exporter textfile collector chrony
-
-[Timer]
-OnBootSec=40s
-OnUnitActiveSec=2min
-
-[Install]
-WantedBy=timers.target
diff --git a/roles/monitoring/prometheus/server/defaults/main/main.yml b/roles/monitoring/prometheus/server/defaults/main/main.yml
index d778bad8..1e0dcf32 100644
--- a/roles/monitoring/prometheus/server/defaults/main/main.yml
+++ b/roles/monitoring/prometheus/server/defaults/main/main.yml
@@ -22,6 +22,7 @@ prometheus_server_rules:
prometheus: "{{ prometheus_server_rules_prometheus + ((prometheus_server_alertmanager is defined) | ternary(prometheus_server_rules_prometheus_alertmanager, [])) + prometheus_server_rules_prometheus_extra }}"
node: "{{ prometheus_server_rules_node + prometheus_server_rules_node_extra }}"
openwrt: "{{ prometheus_server_rules_openwrt + prometheus_server_rules_openwrt_extra }}"
+ chrony: "{{ prometheus_server_rules_chrony + prometheus_server_rules_chrony_extra }}"
nut: "{{ prometheus_server_rules_nut + prometheus_server_rules_nut_extra }}"
nut/ups: "{{ prometheus_server_rules_nut__ups + prometheus_server_rules_nut__ups_extra }}"
blackbox: "{{ prometheus_server_rules_blackbox + prometheus_server_rules_blackbox_extra }}"
diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_chrony.yml b/roles/monitoring/prometheus/server/defaults/main/rules_chrony.yml
new file mode 100644
index 00000000..e845a60b
--- /dev/null
+++ b/roles/monitoring/prometheus/server/defaults/main/rules_chrony.yml
@@ -0,0 +1,14 @@
+---
+prometheus_server_rules_chrony_extra: []
+prometheus_server_rules_chrony:
+ - record: instance:chrony_clock_error_seconds:abs
+ expr: abs(chrony_tracking_last_offset_seconds) + chrony_tracking_root_dispersion_seconds + (0.5 * chrony_tracking_root_delay_seconds)
+
+ - alert: ChronyUnreachable
+ expr: chrony_up == 0
+ for: 0m
+ labels:
+ severity: critical
+ annotations:
+ summary: Unable to scrape chrony metrics (instance {{ '{{' }} $labels.instance {{ '}}' }})
+ description: "The chrony process might have crashed.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"