diff options
author | Christian Pointner <equinox@spreadspace.org> | 2021-10-21 00:11:29 +0200 |
---|---|---|
committer | Christian Pointner <equinox@spreadspace.org> | 2021-10-21 00:11:29 +0200 |
commit | 95a3e8f0728580ead42b19110bf2a4c7fb3a3c71 (patch) | |
tree | 96c8aa40f0c87d1f5ba62bbdb5a3c55078c50e09 | |
parent | move prometheus node-exporter text collector scripts to templates (diff) |
prometheus: install textfile collector for chrony
8 files changed, 200 insertions, 1 deletions
diff --git a/inventory/host_vars/ch-testvm-prometheus.yml b/inventory/host_vars/ch-testvm-prometheus.yml index e539735f..c4ba7364 100644 --- a/inventory/host_vars/ch-testvm-prometheus.yml +++ b/inventory/host_vars/ch-testvm-prometheus.yml @@ -37,6 +37,18 @@ spreadspace_apt_repo_components: - prometheus +ntp_variant: chrony + +ntp_client: + pools: + - name: at.pool.ntp.org + options: iburst + +prometheus_exporter_node_textfile_collector_scripts: + - deleted-libraries + - chrony + + containerd_storage: type: lvm vg: "{{ host_name }}" diff --git a/roles/monitoring/prometheus/exporter/node/defaults/main.yml b/roles/monitoring/prometheus/exporter/node/defaults/main.yml index 0e2242af..0bcea14f 100644 --- a/roles/monitoring/prometheus/exporter/node/defaults/main.yml +++ b/roles/monitoring/prometheus/exporter/node/defaults/main.yml @@ -2,7 +2,7 @@ _prometheus_exporter_node_time_collector_map_: "": timex systemd-timesyncd: timex - chrony: ntp + chrony: timex openntpd: ntp prometheus_exporter_node_timesync_collector: "{{ _prometheus_exporter_node_time_collector_map_[ntp_variant | default('')] }}" @@ -15,3 +15,4 @@ prometheus_exporter_node_extra_collectors: prometheus_exporter_node_textfile_collector_scripts: - deleted-libraries # - smartmon +# - chrony diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.service.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.service.j2 index 7eca94fb..e624dfc3 100644 --- a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.service.j2 +++ b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.service.j2 @@ -5,6 +5,7 @@ Description=Promethues node exporter textfile collector apt Type=oneshot Environment=TMPDIR=/var/lib/prometheus-node-exporter/textfile-collector ExecStart=bash -c "/usr/local/share/prometheus-node-exporter/apt | sponge /var/lib/prometheus-node-exporter/textfile-collector/apt.prom" +TimeoutStartSec=30s # systemd hardening-options AmbientCapabilities= @@ -25,6 +26,7 @@ ReadWritePaths=/var/lib/prometheus-node-exporter/textfile-collector RemoveIPC=true RestrictNamespaces=true RestrictRealtime=true +RestrictAddressFamilies=AF_UNIX SystemCallArchitectures=native [Install] diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.j2 new file mode 100644 index 00000000..95c6a5d3 --- /dev/null +++ b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.j2 @@ -0,0 +1,138 @@ +#!/usr/bin/env {{ python_basename }} +# +# Description: Extract chronyd metrics from chronyc -c. +# Author: Aanchal Malhotra <aanch...@bu.edu> +# +# Works with chrony version 2.4 and higher +# +# this is from: https://www.mail-archive.com/chrony-users@chrony.tuxfamily.org/msg02179.html + +import subprocess +import sys + +chrony_sourcestats_cmd = ['chronyc', '-n', '-c', 'sourcestats'] +chrony_source_cmd = ['chronyc', '-n', '-c', 'sources'] +chrony_tracking_cmd = ['chronyc', '-n', '-c', 'tracking'] + +metrics_fields = [ + "Name/IP Address", + "NP", + "NR", + "Span", + "Frequency", + "Freq Skew", + "Offset", + "Std Dev"] + +status_types = {'x': 0, '?': 1, '-': 2, '+': 3, '*': 4} + +metrics_source = { + "*": "synchronized (system peer)", + "+": "synchronized", + "?": "unreachable", + "x": "Falseticker", + "-": "reference clock"} + +metrics_mode = { + '^': "server", + '=': "peer", + "#": "reference clock"} + + +def get_cmdoutput(command): + proc = subprocess.Popen(command, stdout=subprocess.PIPE) + out, err = proc.communicate() + return_code = proc.poll() + if return_code: + raise RuntimeError('Call to "{}" returned error: \ + {}'.format(command, return_code)) + return out.decode("utf-8") + + +def printPrometheusformat(metric, values): + print("# HELP chronyd_%s chronyd metric for %s" % (metric, metric)) + print("# TYPE chronyd_%s gauge" % (metric)) + for labels in values: + if labels is None: + print("chronyd_%s %f" % (metric, values[labels])) + else: + print("chronyd_%s{{ '{%' }}s} %f" % (metric, labels, values[labels])) + + +def printPrometheusscalar(metric, value): + print("# HELP chronyd_%s chronyd metric for %s" % (metric, metric)) + print("# TYPE chronyd_%s gauge" % (metric)) + print("chronyd_%s %f" % (metric, value)) + + +def printPrometheusEnum(metric, name): + print("# HELP chronyd_%s enum for %s" % (metric, metric)) + print("# TYPE chronyd_%s gauge" % (metric)) + print("chronyd_%s{value=\"%s\"} 1" % (metric, name)) + + +def weight(value): + val_int = int(value, 8) + return bin(val_int).count('1')/8.0 + + +def main(argv): + peer_status_metrics = {} + peer_reach_metrics = {} + offset_metrics = {} + freq_skew_metrics = {} + freq_metrics = {} + std_dev_metrics = {} + chrony_sourcestats = get_cmdoutput(chrony_sourcestats_cmd) + for line in chrony_sourcestats.split('\n'): + if (len(line)) > 0: + x = line.split(',') + common_labels = "remote=\"%s\"" % (x[0]) + freq_metrics[common_labels] = float(x[4]) + freq_skew_metrics[common_labels] = float(x[5]) + std_dev_metrics[common_labels] = float(x[7]) + + printPrometheusformat('freq_skew_ppm', freq_skew_metrics) + printPrometheusformat('freq_ppm', freq_metrics) + printPrometheusformat('std_dev_seconds', std_dev_metrics) + + chrony_source = get_cmdoutput(chrony_source_cmd) + for line in chrony_source.split('\n'): + if (len(line)) > 0: + x = line.split(',') + stratum = x[3] + reach = x[5] + mode = metrics_mode[x[0]] + common_labels = "remote=\"%s\"" % (x[2]) + peer_labels = "%s,stratum=\"%s\",mode=\"%s\"" % ( + common_labels, + stratum, + mode, + ) + peer_status_metrics[peer_labels] = float(status_types[x[1]]) + peer_reach_metrics[peer_labels] = weight(reach) + offset_metrics[common_labels] = float(x[8]) + + printPrometheusformat('peer_status', peer_status_metrics) + printPrometheusformat('offset_seconds', offset_metrics) + printPrometheusformat('peer_reachable', peer_reach_metrics) + + chrony_tracking_stats = get_cmdoutput(chrony_tracking_cmd).rstrip() + fields = chrony_tracking_stats.split(",") + printPrometheusEnum("tracking_source", fields[1]) + printPrometheusscalar("tracking_stratum", float(fields[2])) + printPrometheusscalar("tracking_ref_time", float(fields[3])) + printPrometheusscalar("tracking_system_time", float(fields[4])) + printPrometheusscalar("tracking_last_offset", float(fields[5])) + printPrometheusscalar("tracking_rms_offset", float(fields[6])) + printPrometheusscalar("tracking_frequency_error", float(fields[7])) + printPrometheusscalar("tracking_frequency_residual", float(fields[8])) + printPrometheusscalar("tracking_frequency_skew", float(fields[9])) + printPrometheusscalar("tracking_root_delay", float(fields[10])) + printPrometheusscalar("tracking_root_dispersion", float(fields[11])) + printPrometheusscalar("tracking_update_interval", float(fields[12])) + printPrometheusEnum("tracking_leap_status", fields[13]) + + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.service.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.service.j2 new file mode 100644 index 00000000..bd96daf4 --- /dev/null +++ b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.service.j2 @@ -0,0 +1,33 @@ +[Unit] +Description=Promethues node exporter textfile collector chrony + +[Service] +Type=oneshot +Environment=TMPDIR=/var/lib/prometheus-node-exporter/textfile-collector +ExecStart=bash -c "/usr/local/share/prometheus-node-exporter/chrony | sponge /var/lib/prometheus-node-exporter/textfile-collector/chrony.prom" +TimeoutStartSec=30s + +# systemd hardening-options +AmbientCapabilities=CAP_DAC_OVERRIDE +CapabilityBoundingSet=CAP_DAC_OVERRIDE +DeviceAllow=/dev/null rw +DevicePolicy=strict +LockPersonality=true +MemoryDenyWriteExecute=true +NoNewPrivileges=true +PrivateDevices=true +PrivateTmp=true +ProtectControlGroups=true +ProtectHome=true +ProtectKernelModules=true +ProtectKernelTunables=true +ProtectSystem=strict +ReadWritePaths=/var/lib/prometheus-node-exporter/textfile-collector /var/run/chrony +RemoveIPC=true +RestrictNamespaces=true +RestrictRealtime=true +RestrictAddressFamilies=AF_UNIX +SystemCallArchitectures=native + +[Install] +WantedBy=multi-user.target diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.timer.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.timer.j2 new file mode 100644 index 00000000..597b917a --- /dev/null +++ b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/chrony.timer.j2 @@ -0,0 +1,9 @@ +[Unit] +Description=Promethues node exporter textfile collector chrony + +[Timer] +OnBootSec=40s +OnUnitActiveSec=15min + +[Install] +WantedBy=timers.target diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/deleted-libraries.service.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/deleted-libraries.service.j2 index 7b15e558..c37936ac 100644 --- a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/deleted-libraries.service.j2 +++ b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/deleted-libraries.service.j2 @@ -5,6 +5,7 @@ Description=Promethues node exporter textfile collector deleted-libraries Type=oneshot Environment=TMPDIR=/var/lib/prometheus-node-exporter/textfile-collector ExecStart=bash -c "/usr/local/share/prometheus-node-exporter/deleted-libraries | sponge /var/lib/prometheus-node-exporter/textfile-collector/deleted-libraries.prom" +TimeoutStartSec=30s # systemd hardening-options AmbientCapabilities=CAP_SYS_PTRACE @@ -25,6 +26,7 @@ ReadWritePaths=/var/lib/prometheus-node-exporter/textfile-collector RemoveIPC=true RestrictNamespaces=true RestrictRealtime=true +RestrictAddressFamilies=AF_UNIX SystemCallArchitectures=native [Install] diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/smartmon.service.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/smartmon.service.j2 index 0b826fc6..58792417 100644 --- a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/smartmon.service.j2 +++ b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/smartmon.service.j2 @@ -6,6 +6,7 @@ Type=oneshot Environment=TMPDIR=/var/lib/prometheus-node-exporter/textfile-collector Environment=LC_NUMERIC=C ExecStart=bash -c "/usr/local/share/prometheus-node-exporter/smartmon | sponge /var/lib/prometheus-node-exporter/textfile-collector/smartmon.prom" +TimeoutStartSec=30s # systemd hardening-options AmbientCapabilities=CAP_SYS_RAWIO @@ -23,6 +24,7 @@ ReadWritePaths=/var/lib/prometheus-node-exporter/textfile-collector RemoveIPC=true RestrictNamespaces=true RestrictRealtime=true +RestrictAddressFamilies=AF_UNIX SystemCallArchitectures=native [Install] |