summaryrefslogtreecommitdiff
path: root/roles/monitoring
diff options
context:
space:
mode:
authorChristian Pointner <equinox@spreadspace.org>2021-10-21 00:11:29 +0200
committerChristian Pointner <equinox@spreadspace.org>2021-10-21 00:11:29 +0200
commit95a3e8f0728580ead42b19110bf2a4c7fb3a3c71 (patch)
tree96c8aa40f0c87d1f5ba62bbdb5a3c55078c50e09 /roles/monitoring
parentmove prometheus node-exporter text collector scripts to templates (diff)
prometheus: install textfile collector for chrony
Diffstat (limited to 'roles/monitoring')
-rw-r--r--roles/monitoring/prometheus/exporter/node/defaults/main.yml3
-rw-r--r--roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.service.j22
-rw-r--r--roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.j2138
-rw-r--r--roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.service.j233
-rw-r--r--roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.timer.j29
-rw-r--r--roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/deleted-libraries.service.j22
-rw-r--r--roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/smartmon.service.j22
7 files changed, 188 insertions, 1 deletions
diff --git a/roles/monitoring/prometheus/exporter/node/defaults/main.yml b/roles/monitoring/prometheus/exporter/node/defaults/main.yml
index 0e2242af..0bcea14f 100644
--- a/roles/monitoring/prometheus/exporter/node/defaults/main.yml
+++ b/roles/monitoring/prometheus/exporter/node/defaults/main.yml
@@ -2,7 +2,7 @@
_prometheus_exporter_node_time_collector_map_:
"": timex
systemd-timesyncd: timex
- chrony: ntp
+ chrony: timex
openntpd: ntp
prometheus_exporter_node_timesync_collector: "{{ _prometheus_exporter_node_time_collector_map_[ntp_variant | default('')] }}"
@@ -15,3 +15,4 @@ prometheus_exporter_node_extra_collectors:
prometheus_exporter_node_textfile_collector_scripts:
- deleted-libraries
# - smartmon
+# - chrony
diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.service.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.service.j2
index 7eca94fb..e624dfc3 100644
--- a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.service.j2
+++ b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.service.j2
@@ -5,6 +5,7 @@ Description=Promethues node exporter textfile collector apt
Type=oneshot
Environment=TMPDIR=/var/lib/prometheus-node-exporter/textfile-collector
ExecStart=bash -c "/usr/local/share/prometheus-node-exporter/apt | sponge /var/lib/prometheus-node-exporter/textfile-collector/apt.prom"
+TimeoutStartSec=30s
# systemd hardening-options
AmbientCapabilities=
@@ -25,6 +26,7 @@ ReadWritePaths=/var/lib/prometheus-node-exporter/textfile-collector
RemoveIPC=true
RestrictNamespaces=true
RestrictRealtime=true
+RestrictAddressFamilies=AF_UNIX
SystemCallArchitectures=native
[Install]
diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.j2
new file mode 100644
index 00000000..95c6a5d3
--- /dev/null
+++ b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.j2
@@ -0,0 +1,138 @@
+#!/usr/bin/env {{ python_basename }}
+#
+# Description: Extract chronyd metrics from chronyc -c.
+# Author: Aanchal Malhotra <aanch...@bu.edu>
+#
+# Works with chrony version 2.4 and higher
+#
+# this is from: https://www.mail-archive.com/chrony-users@chrony.tuxfamily.org/msg02179.html
+
+import subprocess
+import sys
+
+chrony_sourcestats_cmd = ['chronyc', '-n', '-c', 'sourcestats']
+chrony_source_cmd = ['chronyc', '-n', '-c', 'sources']
+chrony_tracking_cmd = ['chronyc', '-n', '-c', 'tracking']
+
+metrics_fields = [
+ "Name/IP Address",
+ "NP",
+ "NR",
+ "Span",
+ "Frequency",
+ "Freq Skew",
+ "Offset",
+ "Std Dev"]
+
+status_types = {'x': 0, '?': 1, '-': 2, '+': 3, '*': 4}
+
+metrics_source = {
+ "*": "synchronized (system peer)",
+ "+": "synchronized",
+ "?": "unreachable",
+ "x": "Falseticker",
+ "-": "reference clock"}
+
+metrics_mode = {
+ '^': "server",
+ '=': "peer",
+ "#": "reference clock"}
+
+
+def get_cmdoutput(command):
+ proc = subprocess.Popen(command, stdout=subprocess.PIPE)
+ out, err = proc.communicate()
+ return_code = proc.poll()
+ if return_code:
+ raise RuntimeError('Call to "{}" returned error: \
+ {}'.format(command, return_code))
+ return out.decode("utf-8")
+
+
+def printPrometheusformat(metric, values):
+ print("# HELP chronyd_%s chronyd metric for %s" % (metric, metric))
+ print("# TYPE chronyd_%s gauge" % (metric))
+ for labels in values:
+ if labels is None:
+ print("chronyd_%s %f" % (metric, values[labels]))
+ else:
+ print("chronyd_%s{{ '{%' }}s} %f" % (metric, labels, values[labels]))
+
+
+def printPrometheusscalar(metric, value):
+ print("# HELP chronyd_%s chronyd metric for %s" % (metric, metric))
+ print("# TYPE chronyd_%s gauge" % (metric))
+ print("chronyd_%s %f" % (metric, value))
+
+
+def printPrometheusEnum(metric, name):
+ print("# HELP chronyd_%s enum for %s" % (metric, metric))
+ print("# TYPE chronyd_%s gauge" % (metric))
+ print("chronyd_%s{value=\"%s\"} 1" % (metric, name))
+
+
+def weight(value):
+ val_int = int(value, 8)
+ return bin(val_int).count('1')/8.0
+
+
+def main(argv):
+ peer_status_metrics = {}
+ peer_reach_metrics = {}
+ offset_metrics = {}
+ freq_skew_metrics = {}
+ freq_metrics = {}
+ std_dev_metrics = {}
+ chrony_sourcestats = get_cmdoutput(chrony_sourcestats_cmd)
+ for line in chrony_sourcestats.split('\n'):
+ if (len(line)) > 0:
+ x = line.split(',')
+ common_labels = "remote=\"%s\"" % (x[0])
+ freq_metrics[common_labels] = float(x[4])
+ freq_skew_metrics[common_labels] = float(x[5])
+ std_dev_metrics[common_labels] = float(x[7])
+
+ printPrometheusformat('freq_skew_ppm', freq_skew_metrics)
+ printPrometheusformat('freq_ppm', freq_metrics)
+ printPrometheusformat('std_dev_seconds', std_dev_metrics)
+
+ chrony_source = get_cmdoutput(chrony_source_cmd)
+ for line in chrony_source.split('\n'):
+ if (len(line)) > 0:
+ x = line.split(',')
+ stratum = x[3]
+ reach = x[5]
+ mode = metrics_mode[x[0]]
+ common_labels = "remote=\"%s\"" % (x[2])
+ peer_labels = "%s,stratum=\"%s\",mode=\"%s\"" % (
+ common_labels,
+ stratum,
+ mode,
+ )
+ peer_status_metrics[peer_labels] = float(status_types[x[1]])
+ peer_reach_metrics[peer_labels] = weight(reach)
+ offset_metrics[common_labels] = float(x[8])
+
+ printPrometheusformat('peer_status', peer_status_metrics)
+ printPrometheusformat('offset_seconds', offset_metrics)
+ printPrometheusformat('peer_reachable', peer_reach_metrics)
+
+ chrony_tracking_stats = get_cmdoutput(chrony_tracking_cmd).rstrip()
+ fields = chrony_tracking_stats.split(",")
+ printPrometheusEnum("tracking_source", fields[1])
+ printPrometheusscalar("tracking_stratum", float(fields[2]))
+ printPrometheusscalar("tracking_ref_time", float(fields[3]))
+ printPrometheusscalar("tracking_system_time", float(fields[4]))
+ printPrometheusscalar("tracking_last_offset", float(fields[5]))
+ printPrometheusscalar("tracking_rms_offset", float(fields[6]))
+ printPrometheusscalar("tracking_frequency_error", float(fields[7]))
+ printPrometheusscalar("tracking_frequency_residual", float(fields[8]))
+ printPrometheusscalar("tracking_frequency_skew", float(fields[9]))
+ printPrometheusscalar("tracking_root_delay", float(fields[10]))
+ printPrometheusscalar("tracking_root_dispersion", float(fields[11]))
+ printPrometheusscalar("tracking_update_interval", float(fields[12]))
+ printPrometheusEnum("tracking_leap_status", fields[13])
+
+
+if __name__ == "__main__":
+ main(sys.argv[1:])
diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.service.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.service.j2
new file mode 100644
index 00000000..bd96daf4
--- /dev/null
+++ b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.service.j2
@@ -0,0 +1,33 @@
+[Unit]
+Description=Promethues node exporter textfile collector chrony
+
+[Service]
+Type=oneshot
+Environment=TMPDIR=/var/lib/prometheus-node-exporter/textfile-collector
+ExecStart=bash -c "/usr/local/share/prometheus-node-exporter/chrony | sponge /var/lib/prometheus-node-exporter/textfile-collector/chrony.prom"
+TimeoutStartSec=30s
+
+# systemd hardening-options
+AmbientCapabilities=CAP_DAC_OVERRIDE
+CapabilityBoundingSet=CAP_DAC_OVERRIDE
+DeviceAllow=/dev/null rw
+DevicePolicy=strict
+LockPersonality=true
+MemoryDenyWriteExecute=true
+NoNewPrivileges=true
+PrivateDevices=true
+PrivateTmp=true
+ProtectControlGroups=true
+ProtectHome=true
+ProtectKernelModules=true
+ProtectKernelTunables=true
+ProtectSystem=strict
+ReadWritePaths=/var/lib/prometheus-node-exporter/textfile-collector /var/run/chrony
+RemoveIPC=true
+RestrictNamespaces=true
+RestrictRealtime=true
+RestrictAddressFamilies=AF_UNIX
+SystemCallArchitectures=native
+
+[Install]
+WantedBy=multi-user.target
diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.timer.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.timer.j2
new file mode 100644
index 00000000..597b917a
--- /dev/null
+++ b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.timer.j2
@@ -0,0 +1,9 @@
+[Unit]
+Description=Promethues node exporter textfile collector chrony
+
+[Timer]
+OnBootSec=40s
+OnUnitActiveSec=15min
+
+[Install]
+WantedBy=timers.target
diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/deleted-libraries.service.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/deleted-libraries.service.j2
index 7b15e558..c37936ac 100644
--- a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/deleted-libraries.service.j2
+++ b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/deleted-libraries.service.j2
@@ -5,6 +5,7 @@ Description=Promethues node exporter textfile collector deleted-libraries
Type=oneshot
Environment=TMPDIR=/var/lib/prometheus-node-exporter/textfile-collector
ExecStart=bash -c "/usr/local/share/prometheus-node-exporter/deleted-libraries | sponge /var/lib/prometheus-node-exporter/textfile-collector/deleted-libraries.prom"
+TimeoutStartSec=30s
# systemd hardening-options
AmbientCapabilities=CAP_SYS_PTRACE
@@ -25,6 +26,7 @@ ReadWritePaths=/var/lib/prometheus-node-exporter/textfile-collector
RemoveIPC=true
RestrictNamespaces=true
RestrictRealtime=true
+RestrictAddressFamilies=AF_UNIX
SystemCallArchitectures=native
[Install]
diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/smartmon.service.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/smartmon.service.j2
index 0b826fc6..58792417 100644
--- a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/smartmon.service.j2
+++ b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/smartmon.service.j2
@@ -6,6 +6,7 @@ Type=oneshot
Environment=TMPDIR=/var/lib/prometheus-node-exporter/textfile-collector
Environment=LC_NUMERIC=C
ExecStart=bash -c "/usr/local/share/prometheus-node-exporter/smartmon | sponge /var/lib/prometheus-node-exporter/textfile-collector/smartmon.prom"
+TimeoutStartSec=30s
# systemd hardening-options
AmbientCapabilities=CAP_SYS_RAWIO
@@ -23,6 +24,7 @@ ReadWritePaths=/var/lib/prometheus-node-exporter/textfile-collector
RemoveIPC=true
RestrictNamespaces=true
RestrictRealtime=true
+RestrictAddressFamilies=AF_UNIX
SystemCallArchitectures=native
[Install]