From bdcafe51f1b40d2dab2d52136e1ce60ab95e2ed5 Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Fri, 15 Oct 2021 23:05:20 +0200 Subject: add alerts for zpool state --- .../prometheus/server/defaults/main/rules_node.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'roles/monitoring') diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_node.yml b/roles/monitoring/prometheus/server/defaults/main/rules_node.yml index 79a474e8..ffe616b7 100644 --- a/roles/monitoring/prometheus/server/defaults/main/rules_node.yml +++ b/roles/monitoring/prometheus/server/defaults/main/rules_node.yml @@ -227,6 +227,24 @@ prometheus_server_rules_node: summary: Host clock not synchronising (instance {{ '{{' }} $labels.instance {{ '}}' }}) description: "Clock not synchronising.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + - alert: ZpoolStateDegraded + expr: node_zfs_zpool_state{state="degraded"} == 1 + for: 0m + labels: + severity: warning + annotations: + summary: ZFS zpool is degraded (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "The ZFS zpool {{ '{{' }} $labels.zpool {{ '}}' }} is degraded.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + + - alert: ZpoolStateFaulted + expr: node_zfs_zpool_state{state="faulted"} == 1 + for: 0m + labels: + severity: critical + annotations: + summary: ZFS zpool is faulted (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "The ZFS zpool {{ '{{' }} $labels.zpool {{ '}}' }} is faulted.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + - alert: AptUpgradesPending expr: sum by (instance) (apt_upgrades_pending) > 0 for: 0m -- cgit v1.2.3