summaryrefslogtreecommitdiff
path: root/roles/monitoring/prometheus
diff options
context:
space:
mode:
authorChristian Pointner <equinox@spreadspace.org>2023-07-19 00:36:13 +0200
committerChristian Pointner <equinox@spreadspace.org>2023-07-19 00:36:13 +0200
commit5fe9978585f01bdf50ebaf761f96da90e31b8516 (patch)
treeaa4ab403acdb7e3bcf26e68b5b2067e9a11fd629 /roles/monitoring/prometheus
parentprometheus/node: update textfile collector deleted-libs (diff)
prometheus/node: satisfy linter...
Diffstat (limited to 'roles/monitoring/prometheus')
-rw-r--r--roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/deleted-libraries.j22
-rw-r--r--roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/smartmon.j244
2 files changed, 33 insertions, 13 deletions
diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/deleted-libraries.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/deleted-libraries.j2
index 21f0b5bd..66809a6c 100644
--- a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/deleted-libraries.j2
+++ b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/deleted-libraries.j2
@@ -7,10 +7,8 @@ The aim is to discover processes that are still using libraries that have since
been updated, perhaps due security vulnerabilities.
"""
-import errno
import glob
import os
-import sys
from prometheus_client import CollectorRegistry, Gauge, generate_latest
diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/smartmon.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/smartmon.j2
index 829383e2..c6bfba28 100644
--- a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/smartmon.j2
+++ b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/smartmon.j2
@@ -492,27 +492,49 @@ def collect_nvme_metrics(device):
for line in attribute_lines:
label, value = line.split(':')
if label == 'Available Spare':
- metrics['available_spare_ratio'].labels(device.base_labels["device"], device.base_labels["disk"]).set(value[0:-1])
+ metrics['available_spare_ratio'].labels(
+ device.base_labels["device"], device.base_labels["disk"]
+ ).set(value[0:-1])
elif label == 'Available Spare Threshold':
- metrics['available_spare_threshold_ratio'].labels(device.base_labels["device"], device.base_labels["disk"]).set(value[0:-1])
+ metrics['available_spare_threshold_ratio'].labels(
+ device.base_labels["device"], device.base_labels["disk"]
+ ).set(value[0:-1])
elif label == 'Percentage Used':
- metrics['percentage_used_ratio'].labels(device.base_labels["device"], device.base_labels["disk"]).set(value[0:-1])
+ metrics['percentage_used_ratio'].labels(
+ device.base_labels["device"], device.base_labels["disk"]
+ ).set(value[0:-1])
elif label == 'Power Cycle':
- metrics['power_cycles_total'].labels(device.base_labels["device"], device.base_labels["disk"]).set(value)
+ metrics['power_cycles_total'].labels(
+ device.base_labels["device"], device.base_labels["disk"]
+ ).set(value)
elif label == 'Power On Hours':
- metrics['power_on_hours_total'].labels(device.base_labels["device"], device.base_labels["disk"]).set(value.replace(',', ''))
+ metrics['power_on_hours_total'].labels(
+ device.base_labels["device"], device.base_labels["disk"]
+ ).set(value.replace(',', ''))
elif label == 'Temperature':
- metrics['temperature_celcius'].labels(device.base_labels["device"], device.base_labels["disk"]).set(value.replace(' Celsius', ''))
+ metrics['temperature_celcius'].labels(
+ device.base_labels["device"], device.base_labels["disk"]
+ ).set(value.replace(' Celsius', ''))
elif label == 'Unsafe Shutdowns':
- metrics['unsafe_shutdowns_total'].labels(device.base_labels["device"], device.base_labels["disk"]).set(value)
+ metrics['unsafe_shutdowns_total'].labels(
+ device.base_labels["device"], device.base_labels["disk"]
+ ).set(value)
elif label == 'Media and Data Integrity Errors':
- metrics['media_errors_total'].labels(device.base_labels["device"], device.base_labels["disk"]).set(value)
+ metrics['media_errors_total'].labels(
+ device.base_labels["device"], device.base_labels["disk"]
+ ).set(value)
elif label == 'Error Information Log Entries':
- metrics['num_err_log_entries_total'].labels(device.base_labels["device"], device.base_labels["disk"]).set(value)
+ metrics['num_err_log_entries_total'].labels(
+ device.base_labels["device"], device.base_labels["disk"]
+ ).set(value)
elif label == 'Warning Comp. Temperature Time':
- metrics['warning_temperature_time_total'].labels(device.base_labels["device"], device.base_labels["disk"]).set(value)
+ metrics['warning_temperature_time_total'].labels(
+ device.base_labels["device"], device.base_labels["disk"]
+ ).set(value)
elif label == 'Critical Comp. Temperature Time':
- metrics['critical_temperature_time_total'].labels(device.base_labels["device"], device.base_labels["disk"]).set(value)
+ metrics['critical_temperature_time_total'].labels(
+ device.base_labels["device"], device.base_labels["disk"]
+ ).set(value)
def collect_disks_smart_metrics(wakeup_disks, by_id, include_nvme):