From 31e3ea00dc1c8f22af13a3205835a678d269ca77 Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Tue, 22 Mar 2022 00:20:38 +0100 Subject: prometheus: minor fixes and improvements --- .../prometheus/server/defaults/main/rules_node.yml | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'roles/monitoring/prometheus/server/defaults') diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_node.yml b/roles/monitoring/prometheus/server/defaults/main/rules_node.yml index 7de36758..2278c70a 100644 --- a/roles/monitoring/prometheus/server/defaults/main/rules_node.yml +++ b/roles/monitoring/prometheus/server/defaults/main/rules_node.yml @@ -2,6 +2,15 @@ ## https://awesome-prometheus-alerts.grep.to/rules#host-and-hardware prometheus_server_rules_node_extra: [] prometheus_server_rules_node: + - alert: NodeTextfileScrapeFailed + expr: node_textfile_scrape_error != 0 + for: 0m + labels: + severity: warning + annotations: + summary: Scraping metrics from textfiles failed (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "The textfile collector failed to scrape at least one file\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + - alert: HostOutOfMemory expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10 for: 2m @@ -47,7 +56,7 @@ prometheus_server_rules_node: description: "Filesystem is predicted to run out of space within the next 24 hours at current write rate\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" - alert: HostOutOfInodes - expr: node_filesystem_files_free{mountpoint ="/rootfs"} / node_filesystem_files{mountpoint="/rootfs"} * 100 < 10 and ON (instance, device, mountpoint) node_filesystem_readonly{mountpoint="/rootfs"} == 0 + expr: node_filesystem_files_free{mountpoint="/rootfs"} / node_filesystem_files{mountpoint="/rootfs"} * 100 < 10 and ON (instance, device, mountpoint) node_filesystem_readonly{mountpoint="/rootfs"} == 0 for: 2m labels: severity: warning @@ -56,7 +65,7 @@ prometheus_server_rules_node: description: "Disk is almost running out of available inodes (< 10% left)\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" - alert: HostInodesWillFillIn24Hours - expr: node_filesystem_files_free{mountpoint ="/rootfs"} / node_filesystem_files{mountpoint="/rootfs"} * 100 < 10 and predict_linear(node_filesystem_files_free{mountpoint="/rootfs"}[1h], 24 * 3600) < 0 and ON (instance, device, mountpoint) node_filesystem_readonly{mountpoint="/rootfs"} == 0 + expr: node_filesystem_files_free{mountpoint="/rootfs"} / node_filesystem_files{mountpoint="/rootfs"} * 100 < 10 and predict_linear(node_filesystem_files_free{mountpoint="/rootfs"}[1h], 24 * 3600) < 0 and ON (instance, device, mountpoint) node_filesystem_readonly{mountpoint="/rootfs"} == 0 for: 2m labels: severity: warning -- cgit v1.2.3