diff options
Diffstat (limited to 'roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts')
-rw-r--r-- | roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.j2 | 40 | ||||
-rw-r--r-- | roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt_info.j2 | 104 | ||||
-rw-r--r-- | roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt_info.service.j2 (renamed from roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.service.j2) | 4 | ||||
-rw-r--r-- | roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt_info.timer.j2 (renamed from roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.timer.j2) | 2 |
4 files changed, 107 insertions, 43 deletions
diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.j2 deleted file mode 100644 index 015addb0..00000000 --- a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.j2 +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash -# -# Description: Expose metrics from apt updates. -# -# Author: Ben Kochie <superq@gmail.com> - -upgrades="$(/usr/bin/apt-get --just-print dist-upgrade \ - | /usr/bin/awk -F'[()]' \ - '/^Inst/ { sub("^[^ ]+ ", "", $2); gsub(" ","",$2); - sub("\\[", " ", $2); sub("\\]", "", $2); print $2 }' \ - | /usr/bin/sort \ - | /usr/bin/uniq -c \ - | awk '{ gsub(/\\\\/, "\\\\", $2); gsub(/"/, "\\\"", $2); - gsub(/\[/, "", $3); gsub(/\]/, "", $3); - print "apt_upgrades_pending{origin=\"" $2 "\",arch=\"" $NF "\"} " $1}' -)" - -autoremove="$(/usr/bin/apt-get --just-print autoremove \ - | /usr/bin/awk '/^Remv/{a++}END{printf "apt_autoremove_pending %d", a}' -)" - -echo '# HELP apt_upgrades_pending Apt package pending updates by origin.' -echo '# TYPE apt_upgrades_pending gauge' -if [[ -n "${upgrades}" ]] ; then - echo "${upgrades}" -else - echo 'apt_upgrades_pending{origin="",arch=""} 0' -fi - -echo '# HELP apt_autoremove_pending Apt package pending autoremove.' -echo '# TYPE apt_autoremove_pending gauge' -echo "${autoremove}" - -echo '# HELP node_reboot_required Node reboot is required for software updates.' -echo '# TYPE node_reboot_required gauge' -if [[ -f '/run/reboot-required' ]] ; then - echo 'node_reboot_required 1' -else - echo 'node_reboot_required 0' -fi diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt_info.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt_info.j2 new file mode 100644 index 00000000..20cfdf4f --- /dev/null +++ b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt_info.j2 @@ -0,0 +1,104 @@ +#!/usr/bin/env {{ python_basename }} +# +# Description: Expose metrics from apt. This is inspired by and +# intended to be a replacement for the original apt.sh. +# +# Dependencies: python3-apt, python3-prometheus-client +# +# Authors: Kyle Fazzari <kyrofa@ubuntu.com> +# Daniel Swarbrick <dswarbrick@debian.org> + +import apt +import collections +import contextlib +import os +from prometheus_client import CollectorRegistry, Gauge, generate_latest + +_UpgradeInfo = collections.namedtuple("_UpgradeInfo", ["labels", "count"]) + + +def _convert_candidates_to_upgrade_infos(candidates): + changes_dict = collections.defaultdict(lambda: collections.defaultdict(int)) + + for candidate in candidates: + origins = sorted( + {f"{o.origin}:{o.codename}/{o.archive}" for o in candidate.origins} + ) + changes_dict[",".join(origins)][candidate.architecture] += 1 + + changes_list = list() + for origin in sorted(changes_dict.keys()): + for arch in sorted(changes_dict[origin].keys()): + changes_list.append( + _UpgradeInfo( + labels=dict(origin=origin, arch=arch), + count=changes_dict[origin][arch], + ) + ) + + return changes_list + + +def _write_pending_upgrades(registry, cache): + # Discount any changes that apply to packages that aren't installed (e.g. + # count an upgrade to package A that adds a new dependency on package B as + # only one upgrade, not two). See the following issue for more details: + # https://github.com/prometheus-community/node-exporter-textfile-collector-scripts/issues/85 + candidates = { + p.candidate for p in cache.get_changes() if p.is_installed and p.marked_upgrade + } + upgrade_list = _convert_candidates_to_upgrade_infos(candidates) + + if upgrade_list: + g = Gauge('apt_upgrades_pending', "Apt packages pending updates by origin", + ['origin', 'arch'], registry=registry) + for change in upgrade_list: + g.labels(change.labels['origin'], change.labels['arch']).set(change.count) + + +def _write_held_upgrades(registry, cache): + held_candidates = {p.candidate for p in cache if p.is_upgradable and p.marked_keep} + upgrade_list = _convert_candidates_to_upgrade_infos(held_candidates) + + if upgrade_list: + g = Gauge('apt_upgrades_held', "Apt packages pending updates but held back.", + ['origin', 'arch'], registry=registry) + for change in upgrade_list: + g.labels(change.labels['origin'], change.labels['arch']).set(change.count) + + +def _write_autoremove_pending(registry, cache): + autoremovable_packages = {p for p in cache if p.is_auto_removable} + g = Gauge('apt_autoremove_pending', "Apt packages pending autoremoval.", + registry=registry) + g.set(len(autoremovable_packages)) + + +def _write_reboot_required(registry): + g = Gauge('node_reboot_required', "Node reboot is required for software updates.", + registry=registry) + g.set(int(os.path.isfile('/run/reboot-required'))) + + +def _main(): + cache = apt.cache.Cache() + + # First of all, attempt to update the index. If we don't have permission + # to do so (or it fails for some reason), it's not the end of the world, + # we'll operate on the old index. + with contextlib.suppress(apt.cache.LockFailedException, apt.cache.FetchFailedException): + cache.update() + + cache.open() + cache.upgrade(True) + + registry = CollectorRegistry() + _write_pending_upgrades(registry, cache) + _write_held_upgrades(registry, cache) + _write_autoremove_pending(registry, cache) + _write_reboot_required(registry) + print(generate_latest(registry).decode(), end='') + + +if __name__ == "__main__": + _main() diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.service.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt_info.service.j2 index c60439c4..70211c1a 100644 --- a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.service.j2 +++ b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt_info.service.j2 @@ -1,10 +1,10 @@ [Unit] -Description=Promethues node exporter textfile collector apt +Description=Promethues node exporter textfile collector apt_info [Service] Type=oneshot Environment=TMPDIR=/var/lib/prometheus-node-exporter/textfile-collector -ExecStart=bash -o pipefail -c "/usr/local/share/prometheus-node-exporter/apt | sponge /var/lib/prometheus-node-exporter/textfile-collector/apt.prom" +ExecStart=bash -o pipefail -c "/usr/local/share/prometheus-node-exporter/apt_info | sponge /var/lib/prometheus-node-exporter/textfile-collector/apt_info.prom" TimeoutStartSec=30s # systemd hardening-options diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.timer.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt_info.timer.j2 index dc473749..d7881d38 100644 --- a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.timer.j2 +++ b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt_info.timer.j2 @@ -1,5 +1,5 @@ [Unit] -Description=Promethues node exporter textfile collector apt +Description=Promethues node exporter textfile collector apt_info [Timer] OnBootSec=10s |