diff options
Diffstat (limited to 'roles/monitoring')
13 files changed, 546 insertions, 529 deletions
diff --git a/roles/monitoring/grafana/defaults/main.yml b/roles/monitoring/grafana/defaults/main.yml index 10eac947..8a113e2d 100644 --- a/roles/monitoring/grafana/defaults/main.yml +++ b/roles/monitoring/grafana/defaults/main.yml @@ -41,7 +41,7 @@ grafana_datasources: [] grafana_dashboards: [] # - file: node-full # datasource: "Prometheus" -# - file: chronyd +# - file: chrony # datasource: "Prometheus" # - file: environment-sensors # datasource: "Prometheus" diff --git a/roles/monitoring/grafana/files/dashboard-chronyd.json b/roles/monitoring/grafana/files/dashboard-chrony.json index 1a401ffa..d6ae5c4f 100644 --- a/roles/monitoring/grafana/files/dashboard-chronyd.json +++ b/roles/monitoring/grafana/files/dashboard-chrony.json @@ -9,12 +9,13 @@ "pluginName": "Prometheus" } ], + "__elements": {}, "__requires": [ { "type": "grafana", "id": "grafana", "name": "Grafana", - "version": "8.2.2" + "version": "10.0.2" }, { "type": "datasource", @@ -30,12 +31,6 @@ }, { "type": "panel", - "id": "state-timeline", - "name": "State timeline", - "version": "" - }, - { - "type": "panel", "id": "table", "name": "Table", "version": "" @@ -51,7 +46,10 @@ "list": [ { "builtIn": 1, - "datasource": "-- Grafana --", + "datasource": { + "type": "datasource", + "uid": "grafana" + }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", @@ -68,16 +66,17 @@ }, "editable": true, "fiscalYearStartMonth": 0, - "gnetId": null, "graphTooltip": 1, "id": null, - "iteration": 1642434896568, "links": [], "liveNow": false, "panels": [ { "collapsed": false, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, "gridPos": { "h": 1, "w": 24, @@ -86,11 +85,23 @@ }, "id": 11, "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "refId": "A" + } + ], "title": "Tracking", "type": "row" }, { - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "description": "", "fieldConfig": { "defaults": { @@ -132,24 +143,31 @@ "text": {}, "textMode": "name" }, - "pluginVersion": "8.2.2", - "repeat": null, + "pluginVersion": "10.0.2", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", "exemplar": false, - "expr": "chronyd_tracking_source{instance=\"$instance\"}", + "expr": "chrony_tracking_info{instance=\"$instance\"}", "format": "time_series", "instant": true, "interval": "", - "legendFormat": "{{ value }}", + "legendFormat": "{{ tracking_address }}", "refId": "A" } ], - "title": "Reference Source", + "title": "Reference Source Address", "type": "stat" }, { - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "description": "", "fieldConfig": { "defaults": { @@ -199,11 +217,16 @@ "text": {}, "textMode": "value" }, - "pluginVersion": "8.2.2", + "pluginVersion": "10.0.2", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", "exemplar": false, - "expr": "chronyd_tracking_stratum{instance=\"$instance\"}", + "expr": "chrony_tracking_stratum{instance=\"$instance\"}", "instant": true, "interval": "", "legendFormat": "", @@ -214,14 +237,18 @@ "type": "stat" }, { - "datasource": null, - "description": "", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, @@ -256,60 +283,87 @@ { "color": "green", "value": null + }, + { + "color": "red", + "value": 80 } ] }, - "unit": "ppm" + "unit": "s" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, "y": 1 }, - "id": 6, + "id": 8, "options": { "legend": { "calcs": [], "displayMode": "list", - "placement": "bottom" + "placement": "bottom", + "showLegend": true }, "tooltip": { - "mode": "single" + "mode": "single", + "sort": "none" } }, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", "exemplar": true, - "expr": "chronyd_tracking_frequency_error{instance=\"$instance\"}", + "expr": "chrony_tracking_last_offset_seconds{instance=\"$instance\"}", "interval": "", - "legendFormat": "Frequency", + "legendFormat": "Last Offset", + "range": true, "refId": "A" }, { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", "exemplar": true, - "expr": "chronyd_tracking_frequency_residual{instance=\"$instance\"}", + "expr": "chrony_tracking_rms_offset_seconds{instance=\"$instance\"}", "hide": false, "interval": "", - "legendFormat": "Residual Frequency", + "legendFormat": "RMS Offset (long term average)", + "range": true, "refId": "B" }, { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", "exemplar": true, - "expr": "chronyd_tracking_frequency_skew{instance=\"$instance\"}", + "expr": "chrony_tracking_system_time_seconds{instance=\"$instance\"}", "hide": false, "interval": "", - "legendFormat": "Skew", + "legendFormat": "System Time", + "range": true, "refId": "C" } ], - "title": "Frequency", + "title": "Offset", "type": "timeseries" }, { - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "description": "", "fieldConfig": { "defaults": { @@ -351,28 +405,39 @@ "text": {}, "textMode": "name" }, - "pluginVersion": "8.2.2", + "pluginVersion": "10.0.2", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", "exemplar": true, - "expr": "chronyd_tracking_leap_status{instance=\"$instance\"}", + "expr": "chrony_tracking_info{instance=\"$instance\"}", "instant": true, "interval": "", - "legendFormat": "{{ value }}", + "legendFormat": "{{ tracking_refid }}", "refId": "A" } ], - "title": "Leap second status", + "title": "Reference Source ID", "type": "stat" }, { - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "Absolute bound on the computer’s clock accuracy (assuming the stratum-1 computer is correct)", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, @@ -407,10 +472,6 @@ { "color": "green", "value": null - }, - { - "color": "red", - "value": 80 } ] }, @@ -419,58 +480,57 @@ "overrides": [] }, "gridPos": { - "h": 10, + "h": 8, "w": 12, "x": 0, "y": 7 }, - "id": 8, + "id": 6, "options": { "legend": { "calcs": [], "displayMode": "list", - "placement": "bottom" + "placement": "bottom", + "showLegend": true }, "tooltip": { - "mode": "single" + "mode": "single", + "sort": "none" } }, "targets": [ { - "exemplar": true, - "expr": "chronyd_tracking_last_offset{instance=\"$instance\"}", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "instance:chrony_clock_error_seconds:abs{instance=\"$instance\"}", + "format": "time_series", + "instant": false, "interval": "", - "legendFormat": "Last Offset", + "legendFormat": "Clock Error", + "range": true, "refId": "A" - }, - { - "exemplar": true, - "expr": "chronyd_tracking_rms_offset{instance=\"$instance\"}", - "hide": false, - "interval": "", - "legendFormat": "RMS Offse (long term average)", - "refId": "B" - }, - { - "exemplar": true, - "expr": "chronyd_tracking_system_time{instance=\"$instance\"}", - "hide": false, - "interval": "", - "legendFormat": "System Time", - "refId": "C" } ], - "title": "Offset", + "title": "Maximum Clock Error", "type": "timeseries" }, { - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, @@ -517,36 +577,50 @@ "overrides": [] }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 9 + "y": 8 }, "id": 9, "options": { "legend": { "calcs": [], "displayMode": "list", - "placement": "bottom" + "placement": "bottom", + "showLegend": true }, "tooltip": { - "mode": "single" + "mode": "single", + "sort": "none" } }, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", "exemplar": true, - "expr": "chronyd_tracking_root_delay{instance=\"$instance\"}", + "expr": "chrony_tracking_root_delay_seconds{instance=\"$instance\"}", "interval": "", "legendFormat": "Root delay", + "range": true, "refId": "A" }, { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", "exemplar": true, - "expr": "chronyd_tracking_root_dispersion{instance=\"$instance\"}", + "expr": "chrony_tracking_root_dispersion_seconds{instance=\"$instance\"}", "hide": false, "interval": "", "legendFormat": "Root dispersion", + "range": true, "refId": "B" } ], @@ -555,20 +629,35 @@ }, { "collapsed": false, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 17 + "y": 15 }, "id": 13, "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "refId": "A" + } + ], "title": "Sources", "type": "row" }, { - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "description": "", "fieldConfig": { "defaults": { @@ -577,211 +666,234 @@ }, "custom": { "align": "auto", - "displayMode": "color-background-solid", - "filterable": false + "cellOptions": { + "mode": "basic", + "type": "color-background" + }, + "filterable": false, + "inspect": false }, - "mappings": [ - { - "options": { - "0": { - "index": 0, - "text": "may be in error" - }, - "1": { - "index": 1, - "text": "unusable" - }, - "2": { - "index": 2, - "text": "not combined" - }, - "3": { - "index": 3, - "text": "combined" - }, - "4": { - "index": 4, - "text": "current best" - } - }, - "type": "value" - } - ], + "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "transparent", "value": null - }, - { - "color": "red", - "value": 0 - }, - { - "color": "orange", - "value": 1 - }, - { - "color": "blue", - "value": 2 - }, - { - "color": "light-green", - "value": 3 - }, - { - "color": "green", - "value": 4 } ] }, "unit": "none" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "State" + }, + "properties": [ + { + "id": "mappings", + "value": [ + { + "options": { + "candidate": { + "color": "light-green", + "index": 4, + "text": "combined" + }, + "falseticker": { + "color": "orange", + "index": 2, + "text": "falseticker" + }, + "jittery": { + "color": "yellow", + "index": 3, + "text": "jittery" + }, + "outlier": { + "color": "transparent", + "index": 5, + "text": "not combined" + }, + "sync": { + "color": "green", + "index": 0, + "text": "syncronized" + }, + "unreach": { + "color": "red", + "index": 1, + "text": "unreachable" + } + }, + "type": "value" + } + ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Polling Interval" + }, + "properties": [ + { + "id": "unit", + "value": "s" + } + ] + } + ] }, "gridPos": { - "h": 7, - "w": 8, + "h": 8, + "w": 24, "x": 0, - "y": 18 + "y": 16 }, "id": 19, "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, "frameIndex": 0, - "showHeader": false + "showHeader": true, + "sortBy": [] }, - "pluginVersion": "8.2.2", - "repeat": null, + "pluginVersion": "10.0.2", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "builder", "exemplar": false, - "expr": "chronyd_peer_status{instance=\"$instance\"}", + "expr": "chrony_sources_state_info{instance=\"$instance\"}", "format": "table", "instant": true, "interval": "", - "legendFormat": "{{ remote }}", + "legendFormat": "__auto", + "range": false, "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "builder", + "exemplar": false, + "expr": "chrony_sources_stratum{instance=\"$instance\"}", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "builder", + "exemplar": false, + "expr": "chrony_sources_polling_interval_seconds{instance=\"$instance\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "__auto", + "range": false, + "refId": "C" } ], "title": "Current Status", "transformations": [ { + "id": "joinByField", + "options": { + "byField": "source_address", + "mode": "outer" + } + }, + { "id": "organize", "options": { "excludeByName": { "Time": true, + "Time 2": true, + "Value #A": true, + "Value #B": false, "__name__": true, + "__name__ 2": true, "instance": true, + "instance 2": true, "job": true, - "mode": true, - "stratum": true + "job 2": true, + "source_mode": true, + "source_name 2": true, + "source_name 3": true + }, + "indexByName": { + "Time 1": 1, + "Time 2": 9, + "Time 3": 15, + "Value #A": 8, + "Value #B": 14, + "Value #C": 20, + "__name__ 1": 2, + "__name__ 2": 10, + "__name__ 3": 16, + "instance 1": 3, + "instance 2": 11, + "instance 3": 17, + "job 1": 4, + "job 2": 12, + "job 3": 18, + "source_address": 0, + "source_mode": 5, + "source_name 1": 6, + "source_name 2": 13, + "source_name 3": 19, + "source_state": 7 }, - "indexByName": {}, "renameByName": { - "Time": "" + "Value #B": "Stratum", + "Value #C": "Polling Interval", + "source_address": "Address", + "source_name": "Name", + "source_name 1": "Name", + "source_state": "State" } } - }, - { - "id": "sortBy", - "options": { - "fields": {}, - "sort": [ - { - "desc": true, - "field": "Value" - } - ] - } } ], "type": "table" }, { - "datasource": null, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "fillOpacity": 74, - "lineWidth": 0 - }, - "mappings": [ - { - "options": { - "0": { - "index": 0, - "text": "unreachable" - }, - "1": { - "index": 1, - "text": "ok" - } - }, - "type": "value" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "green", - "value": 1 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 18 + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" }, - "id": 16, - "options": { - "alignValue": "center", - "legend": { - "displayMode": "hidden", - "placement": "bottom" - }, - "mergeValues": true, - "rowHeight": 0.5, - "showValue": "never", - "tooltip": { - "mode": "single" - } - }, - "targets": [ - { - "exemplar": false, - "expr": "max without (stratum) (chronyd_peer_reachable{instance=\"$instance\"})", - "instant": false, - "interval": "", - "legendFormat": "{{ remote }}", - "refId": "A" - } - ], - "title": "Reachability", - "type": "state-timeline" - }, - { - "datasource": null, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, @@ -830,122 +942,53 @@ "gridPos": { "h": 7, "w": 8, - "x": 16, - "y": 18 - }, - "id": 17, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "targets": [ - { - "exemplar": true, - "expr": "chronyd_offset_seconds{instance=\"$instance\"}", - "interval": "", - "legendFormat": "{{ remote }}", - "refId": "A" - } - ], - "title": "Offset", - "type": "timeseries" - }, - { - "datasource": null, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "smooth", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ppm" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, "x": 0, - "y": 25 + "y": 24 }, - "id": 15, + "id": 18, "options": { "legend": { "calcs": [], "displayMode": "list", - "placement": "bottom" + "placement": "bottom", + "showLegend": true }, "tooltip": { - "mode": "single" + "mode": "single", + "sort": "none" } }, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "builder", "exemplar": true, - "expr": "chronyd_freq_ppm{instance=\"$instance\"}", + "expr": "chrony_sources_last_sample_age_seconds{instance=\"$instance\"} < 4294967295", "interval": "", - "legendFormat": "{{ remote }}", + "legendFormat": "{{ source_address }} ({{ source_name }})", + "range": true, "refId": "A" } ], - "title": "Frequency", + "title": "Last Sample: Age", "type": "timeseries" }, { - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, @@ -987,7 +1030,7 @@ } ] }, - "unit": "ppm" + "unit": "s" }, "overrides": [] }, @@ -995,39 +1038,52 @@ "h": 7, "w": 8, "x": 8, - "y": 25 + "y": 24 }, - "id": 20, + "id": 21, "options": { "legend": { "calcs": [], "displayMode": "list", - "placement": "bottom" + "placement": "bottom", + "showLegend": true }, "tooltip": { - "mode": "single" + "mode": "single", + "sort": "none" } }, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", "exemplar": true, - "expr": "chronyd_freq_skew_ppm{instance=\"$instance\"}", + "expr": "chrony_sources_last_sample_offset_seconds{instance=\"$instance\"}", "interval": "", - "legendFormat": "{{ remote }}", + "legendFormat": "{{ source_address }} ({{ source_name }})", + "range": true, "refId": "A" } ], - "title": "Frequency Skew", + "title": "Last Sample: Offset", "type": "timeseries" }, { - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, @@ -1077,54 +1133,61 @@ "h": 7, "w": 8, "x": 16, - "y": 25 + "y": 24 }, - "id": 18, + "id": 20, "options": { "legend": { "calcs": [], "displayMode": "list", - "placement": "bottom" + "placement": "bottom", + "showLegend": true }, "tooltip": { - "mode": "single" + "mode": "single", + "sort": "none" } }, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", "exemplar": true, - "expr": "chronyd_std_dev_seconds{instance=\"$instance\"}", + "expr": "chrony_sources_last_sample_error_margin_seconds{instance=\"$instance\"}", "interval": "", - "legendFormat": "{{ remote }}", + "legendFormat": "{{ source_address }} ({{ source_name }})", + "range": true, "refId": "A" } ], - "title": "Std. Dev.", + "title": "Last Sample: Error Margin", "type": "timeseries" } ], "refresh": "30s", - "schemaVersion": 31, + "schemaVersion": 38, "style": "dark", "tags": [], "templating": { "list": [ { - "allValue": null, "current": {}, - "datasource": "${DS_PROMETHEUS}", - "definition": "label_values(chronyd_tracking_source, instance)", - "description": null, - "error": null, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(chrony_up,instance)", "hide": 0, "includeAll": false, - "label": null, "multi": false, "name": "instance", "options": [], "query": { - "query": "label_values(chronyd_tracking_source, instance)", - "refId": "StandardVariableQuery" + "query": "label_values(chrony_up,instance)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, "regex": "", @@ -1140,7 +1203,8 @@ }, "timepicker": {}, "timezone": "", - "title": "chronyd", - "uid": "eIO_Uyd7k", - "version": 3 -} + "title": "chrony", + "uid": "c252be85-471b-41c9-b1cb-f0e1b8206593", + "version": 3, + "weekStart": "" +}
\ No newline at end of file diff --git a/roles/monitoring/prometheus/exporter/chrony/defaults/main.yml b/roles/monitoring/prometheus/exporter/chrony/defaults/main.yml new file mode 100644 index 00000000..699ed580 --- /dev/null +++ b/roles/monitoring/prometheus/exporter/chrony/defaults/main.yml @@ -0,0 +1,6 @@ +--- +# prometheus_exporter_chrony_version: + +prometheus_exporter_chrony_enable_collectors: + - sources + - tracking diff --git a/roles/monitoring/prometheus/exporter/chrony/handlers/main.yml b/roles/monitoring/prometheus/exporter/chrony/handlers/main.yml new file mode 100644 index 00000000..0c940ca9 --- /dev/null +++ b/roles/monitoring/prometheus/exporter/chrony/handlers/main.yml @@ -0,0 +1,15 @@ +--- +- name: restart prometheus-chrony-exporter + service: + name: prometheus-chrony-exporter + state: restarted + +- name: reload nginx + service: + name: nginx + state: reloaded + +### TODO: remove this once all hosts have been migrated +- name: reload systemd + systemd: + daemon_reload: yes diff --git a/roles/monitoring/prometheus/exporter/chrony/tasks/main.yml b/roles/monitoring/prometheus/exporter/chrony/tasks/main.yml new file mode 100644 index 00000000..f15037ec --- /dev/null +++ b/roles/monitoring/prometheus/exporter/chrony/tasks/main.yml @@ -0,0 +1,65 @@ +--- +- name: generate apt pin file for exporter-chrony package + when: prometheus_exporter_chrony_version is defined + copy: + dest: "/etc/apt/preferences.d/prom-exporter-chrony.pref" + content: | + Package: prom-exporter-chrony + Pin: version {{ prometheus_exporter_chrony_version }}-1 + Pin-Priority: 1001 + +- name: remove apt pin file for exporter-chrony package + when: prometheus_exporter_chrony_version is not defined + file: + path: "/etc/apt/preferences.d/prom-exporter-chrony.pref" + state: absent + +- name: install apt packages + apt: + name: "prom-exporter-chrony{% if prometheus_exporter_chrony_version is defined %}={{ prometheus_exporter_chrony_version }}-1{% endif %}" + state: present + allow_downgrade: yes + notify: restart prometheus-chrony-exporter + +- name: generate systemd service unit + template: + src: service.j2 + dest: /etc/systemd/system/prometheus-chrony-exporter.service + notify: restart prometheus-chrony-exporter + +- name: make sure prometheus-chrony-exporter is enabled and started + systemd: + name: prometheus-chrony-exporter.service + daemon_reload: yes + state: started + enabled: yes + +- name: register exporter + copy: + content: | + location = /chrony { + proxy_pass http://127.0.0.1:9123/metrics; + } + dest: /etc/prometheus/exporter/chrony.locations + notify: reload nginx + + +## TODO: remove these tasks once all hosts have been migrated +- name: make sure the systemd timer for chrony textfile collector is disabled and stopped + systemd: + service: prometheus-node-exporter_chrony.timer + enabled: no + state: stopped + register: result_systemd_stop + failed_when: "result_systemd_stop is failed and 'Could not find the requested service' not in result_systemd_stop.msg" + +- name: remove files from chrony textfile collector + loop: + - /etc/systemd/system/prometheus-node-exporter_chrony.timer + - /etc/systemd/system/prometheus-node-exporter_chrony.service + - /usr/local/share/prometheus-node-exporter/chrony + - /var/lib/prometheus-node-exporter/textfile-collector/chrony.prom + file: + path: "{{ item }}" + state: absent + notify: reload systemd diff --git a/roles/monitoring/prometheus/exporter/chrony/templates/service.j2 b/roles/monitoring/prometheus/exporter/chrony/templates/service.j2 new file mode 100644 index 00000000..cb806649 --- /dev/null +++ b/roles/monitoring/prometheus/exporter/chrony/templates/service.j2 @@ -0,0 +1,31 @@ +[Unit] +Description=Prometheus chrony exporter + +[Service] +Restart=always +User=_chrony +ExecStart=/usr/bin/prometheus-chrony-exporter --web.listen-address="127.0.0.1:9123" --chrony.address=unix:///run/chrony/chronyd.sock {% for collector in prometheus_exporter_chrony_enable_collectors %} --collector.{{ collector }}{% endfor %}{{ '' }} + +# systemd hardening-options +AmbientCapabilities= +CapabilityBoundingSet= +DeviceAllow=/dev/null rw +DevicePolicy=strict +LockPersonality=true +MemoryDenyWriteExecute=true +NoNewPrivileges=true +PrivateDevices=true +PrivateTmp=true +ProtectControlGroups=true +ProtectHome=true +ProtectKernelModules=true +ProtectKernelTunables=true +ProtectSystem=strict +ReadWritePaths=/run/chrony +RemoveIPC=true +RestrictNamespaces=true +RestrictRealtime=true +SystemCallArchitectures=native + +[Install] +WantedBy=multi-user.target diff --git a/roles/monitoring/prometheus/exporter/meta/main.yml b/roles/monitoring/prometheus/exporter/meta/main.yml index 4a427770..10a251f4 100644 --- a/roles/monitoring/prometheus/exporter/meta/main.yml +++ b/roles/monitoring/prometheus/exporter/meta/main.yml @@ -23,4 +23,6 @@ dependencies: when: "'standalone-kubelet' in (prometheus_exporters_default | union(prometheus_exporters_extra))" - role: monitoring/prometheus/exporter/modbus when: "'modbus' in (prometheus_exporters_default | union(prometheus_exporters_extra))" + - role: monitoring/prometheus/exporter/chrony + when: "'chrony' in (prometheus_exporters_default | union(prometheus_exporters_extra))" - role: monitoring/prometheus/exporter/register diff --git a/roles/monitoring/prometheus/exporter/node/defaults/main.yml b/roles/monitoring/prometheus/exporter/node/defaults/main.yml index 3b961a4f..ab4cee38 100644 --- a/roles/monitoring/prometheus/exporter/node/defaults/main.yml +++ b/roles/monitoring/prometheus/exporter/node/defaults/main.yml @@ -18,7 +18,6 @@ prometheus_exporter_node_install_apt_textfile_collector_script: "{{ ansible_pkg_ prometheus_exporter_node_textfile_collector_scripts: - deleted-libraries # - smartmon -# - chrony # - sensors # prometheus_exporter_node_textfile_collector__sensors: diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.j2 deleted file mode 100644 index 95c6a5d3..00000000 --- a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.j2 +++ /dev/null @@ -1,138 +0,0 @@ -#!/usr/bin/env {{ python_basename }} -# -# Description: Extract chronyd metrics from chronyc -c. -# Author: Aanchal Malhotra <aanch...@bu.edu> -# -# Works with chrony version 2.4 and higher -# -# this is from: https://www.mail-archive.com/chrony-users@chrony.tuxfamily.org/msg02179.html - -import subprocess -import sys - -chrony_sourcestats_cmd = ['chronyc', '-n', '-c', 'sourcestats'] -chrony_source_cmd = ['chronyc', '-n', '-c', 'sources'] -chrony_tracking_cmd = ['chronyc', '-n', '-c', 'tracking'] - -metrics_fields = [ - "Name/IP Address", - "NP", - "NR", - "Span", - "Frequency", - "Freq Skew", - "Offset", - "Std Dev"] - -status_types = {'x': 0, '?': 1, '-': 2, '+': 3, '*': 4} - -metrics_source = { - "*": "synchronized (system peer)", - "+": "synchronized", - "?": "unreachable", - "x": "Falseticker", - "-": "reference clock"} - -metrics_mode = { - '^': "server", - '=': "peer", - "#": "reference clock"} - - -def get_cmdoutput(command): - proc = subprocess.Popen(command, stdout=subprocess.PIPE) - out, err = proc.communicate() - return_code = proc.poll() - if return_code: - raise RuntimeError('Call to "{}" returned error: \ - {}'.format(command, return_code)) - return out.decode("utf-8") - - -def printPrometheusformat(metric, values): - print("# HELP chronyd_%s chronyd metric for %s" % (metric, metric)) - print("# TYPE chronyd_%s gauge" % (metric)) - for labels in values: - if labels is None: - print("chronyd_%s %f" % (metric, values[labels])) - else: - print("chronyd_%s{{ '{%' }}s} %f" % (metric, labels, values[labels])) - - -def printPrometheusscalar(metric, value): - print("# HELP chronyd_%s chronyd metric for %s" % (metric, metric)) - print("# TYPE chronyd_%s gauge" % (metric)) - print("chronyd_%s %f" % (metric, value)) - - -def printPrometheusEnum(metric, name): - print("# HELP chronyd_%s enum for %s" % (metric, metric)) - print("# TYPE chronyd_%s gauge" % (metric)) - print("chronyd_%s{value=\"%s\"} 1" % (metric, name)) - - -def weight(value): - val_int = int(value, 8) - return bin(val_int).count('1')/8.0 - - -def main(argv): - peer_status_metrics = {} - peer_reach_metrics = {} - offset_metrics = {} - freq_skew_metrics = {} - freq_metrics = {} - std_dev_metrics = {} - chrony_sourcestats = get_cmdoutput(chrony_sourcestats_cmd) - for line in chrony_sourcestats.split('\n'): - if (len(line)) > 0: - x = line.split(',') - common_labels = "remote=\"%s\"" % (x[0]) - freq_metrics[common_labels] = float(x[4]) - freq_skew_metrics[common_labels] = float(x[5]) - std_dev_metrics[common_labels] = float(x[7]) - - printPrometheusformat('freq_skew_ppm', freq_skew_metrics) - printPrometheusformat('freq_ppm', freq_metrics) - printPrometheusformat('std_dev_seconds', std_dev_metrics) - - chrony_source = get_cmdoutput(chrony_source_cmd) - for line in chrony_source.split('\n'): - if (len(line)) > 0: - x = line.split(',') - stratum = x[3] - reach = x[5] - mode = metrics_mode[x[0]] - common_labels = "remote=\"%s\"" % (x[2]) - peer_labels = "%s,stratum=\"%s\",mode=\"%s\"" % ( - common_labels, - stratum, - mode, - ) - peer_status_metrics[peer_labels] = float(status_types[x[1]]) - peer_reach_metrics[peer_labels] = weight(reach) - offset_metrics[common_labels] = float(x[8]) - - printPrometheusformat('peer_status', peer_status_metrics) - printPrometheusformat('offset_seconds', offset_metrics) - printPrometheusformat('peer_reachable', peer_reach_metrics) - - chrony_tracking_stats = get_cmdoutput(chrony_tracking_cmd).rstrip() - fields = chrony_tracking_stats.split(",") - printPrometheusEnum("tracking_source", fields[1]) - printPrometheusscalar("tracking_stratum", float(fields[2])) - printPrometheusscalar("tracking_ref_time", float(fields[3])) - printPrometheusscalar("tracking_system_time", float(fields[4])) - printPrometheusscalar("tracking_last_offset", float(fields[5])) - printPrometheusscalar("tracking_rms_offset", float(fields[6])) - printPrometheusscalar("tracking_frequency_error", float(fields[7])) - printPrometheusscalar("tracking_frequency_residual", float(fields[8])) - printPrometheusscalar("tracking_frequency_skew", float(fields[9])) - printPrometheusscalar("tracking_root_delay", float(fields[10])) - printPrometheusscalar("tracking_root_dispersion", float(fields[11])) - printPrometheusscalar("tracking_update_interval", float(fields[12])) - printPrometheusEnum("tracking_leap_status", fields[13]) - - -if __name__ == "__main__": - main(sys.argv[1:]) diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.service.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.service.j2 deleted file mode 100644 index 49b15185..00000000 --- a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.service.j2 +++ /dev/null @@ -1,33 +0,0 @@ -[Unit] -Description=Promethues node exporter textfile collector chrony - -[Service] -Type=oneshot -Environment=TMPDIR=/var/lib/prometheus-node-exporter/textfile-collector -ExecStart=bash -o pipefail -c "/usr/local/share/prometheus-node-exporter/chrony | sponge /var/lib/prometheus-node-exporter/textfile-collector/chrony.prom" -TimeoutStartSec=30s - -# systemd hardening-options -AmbientCapabilities=CAP_DAC_OVERRIDE -CapabilityBoundingSet=CAP_DAC_OVERRIDE -DeviceAllow=/dev/null rw -DevicePolicy=strict -LockPersonality=true -MemoryDenyWriteExecute=true -NoNewPrivileges=true -PrivateDevices=true -PrivateTmp=true -ProtectControlGroups=true -ProtectHome=true -ProtectKernelModules=true -ProtectKernelTunables=true -ProtectSystem=strict -ReadWritePaths=/var/lib/prometheus-node-exporter/textfile-collector /var/run/chrony -RemoveIPC=true -RestrictNamespaces=true -RestrictRealtime=true -RestrictAddressFamilies=AF_UNIX -SystemCallArchitectures=native - -[Install] -WantedBy=multi-user.target diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.timer.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.timer.j2 deleted file mode 100644 index eecc70e2..00000000 --- a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.timer.j2 +++ /dev/null @@ -1,9 +0,0 @@ -[Unit] -Description=Promethues node exporter textfile collector chrony - -[Timer] -OnBootSec=40s -OnUnitActiveSec=2min - -[Install] -WantedBy=timers.target diff --git a/roles/monitoring/prometheus/server/defaults/main/main.yml b/roles/monitoring/prometheus/server/defaults/main/main.yml index d778bad8..1e0dcf32 100644 --- a/roles/monitoring/prometheus/server/defaults/main/main.yml +++ b/roles/monitoring/prometheus/server/defaults/main/main.yml @@ -22,6 +22,7 @@ prometheus_server_rules: prometheus: "{{ prometheus_server_rules_prometheus + ((prometheus_server_alertmanager is defined) | ternary(prometheus_server_rules_prometheus_alertmanager, [])) + prometheus_server_rules_prometheus_extra }}" node: "{{ prometheus_server_rules_node + prometheus_server_rules_node_extra }}" openwrt: "{{ prometheus_server_rules_openwrt + prometheus_server_rules_openwrt_extra }}" + chrony: "{{ prometheus_server_rules_chrony + prometheus_server_rules_chrony_extra }}" nut: "{{ prometheus_server_rules_nut + prometheus_server_rules_nut_extra }}" nut/ups: "{{ prometheus_server_rules_nut__ups + prometheus_server_rules_nut__ups_extra }}" blackbox: "{{ prometheus_server_rules_blackbox + prometheus_server_rules_blackbox_extra }}" diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_chrony.yml b/roles/monitoring/prometheus/server/defaults/main/rules_chrony.yml new file mode 100644 index 00000000..e845a60b --- /dev/null +++ b/roles/monitoring/prometheus/server/defaults/main/rules_chrony.yml @@ -0,0 +1,14 @@ +--- +prometheus_server_rules_chrony_extra: [] +prometheus_server_rules_chrony: + - record: instance:chrony_clock_error_seconds:abs + expr: abs(chrony_tracking_last_offset_seconds) + chrony_tracking_root_dispersion_seconds + (0.5 * chrony_tracking_root_delay_seconds) + + - alert: ChronyUnreachable + expr: chrony_up == 0 + for: 0m + labels: + severity: critical + annotations: + summary: Unable to scrape chrony metrics (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "The chrony process might have crashed.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" |