summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--inventory/group_vars/ups/vars.yml2
-rw-r--r--roles/monitoring/prometheus/server/defaults/main/rules_ipmi__remote.yml1
-rw-r--r--roles/monitoring/prometheus/server/defaults/main/rules_nut__ups.yml56
3 files changed, 54 insertions, 5 deletions
diff --git a/inventory/group_vars/ups/vars.yml b/inventory/group_vars/ups/vars.yml
index eaa36666..9b4f15d2 100644
--- a/inventory/group_vars/ups/vars.yml
+++ b/inventory/group_vars/ups/vars.yml
@@ -12,8 +12,6 @@ openwrt_target: mt7620
openwrt_profile: ravpower_rp-wd03
openwrt_output_image_suffixes:
- "{{ openwrt_profile }}-squashfs-sysupgrade.bin"
- - "{{ openwrt_profile }}-squashfs-kernel.bin" ## TODO: remove once all devices have been upgraded to 21.02.0++
- - "{{ openwrt_profile }}-squashfs-rootfs.bin" ## TODO: remove once all devices have been upgraded to 21.02.0++
openwrt_packages_remove:
- ppp
diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_ipmi__remote.yml b/roles/monitoring/prometheus/server/defaults/main/rules_ipmi__remote.yml
index 1f9338ea..b6163981 100644
--- a/roles/monitoring/prometheus/server/defaults/main/rules_ipmi__remote.yml
+++ b/roles/monitoring/prometheus/server/defaults/main/rules_ipmi__remote.yml
@@ -1,4 +1,3 @@
---
prometheus_server_rules_ipmi__remote_extra: []
prometheus_server_rules_ipmi__remote: []
-## TODO: add remote-IPMI specific alert rules
diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_nut__ups.yml b/roles/monitoring/prometheus/server/defaults/main/rules_nut__ups.yml
index 150a507e..274133e5 100644
--- a/roles/monitoring/prometheus/server/defaults/main/rules_nut__ups.yml
+++ b/roles/monitoring/prometheus/server/defaults/main/rules_nut__ups.yml
@@ -1,4 +1,56 @@
---
prometheus_server_rules_nut__ups_extra: []
-prometheus_server_rules_nut__ups: []
-## TODO: add NUT/UPS alert rules
+prometheus_server_rules_nut__ups:
+ - alert: UPSLoadHigh
+ expr: network_ups_tools_ups_load > 82
+ for: 1m
+ labels:
+ severity: warning
+ annotations:
+ summary: UPS load is high (instance {{ '{{' }} $labels.instance {{ '}}' }})
+ description: "The load of UPS {{ '{{' }} $labels.instance {{ '}}' }} is > 82 %.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
+
+ - alert: UPSLoadVeryHigh
+ expr: network_ups_tools_ups_load > 92
+ for: 1m
+ labels:
+ severity: critical
+ annotations:
+ summary: UPS load is very high (instance {{ '{{' }} $labels.instance {{ '}}' }})
+ description: "The load of UPS {{ '{{' }} $labels.instance {{ '}}' }} is > 92 %.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
+
+ - alert: UPSStatusOverload
+ expr: network_ups_tools_ups_status{flag="OVER"} == 1
+ for: 1m
+ labels:
+ severity: critical
+ annotations:
+ summary: UPS is overloaded (instance {{ '{{' }} $labels.instance {{ '}}' }})
+ description: "UPS {{ '{{' }} $labels.instance {{ '}}' }} is overloaded.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
+
+ - alert: UPSStatusOnBattery
+ expr: network_ups_tools_ups_status{flag="OB"} == 1
+ for: 1m
+ labels:
+ severity: warning
+ annotations:
+ summary: UPS is running on battery (instance {{ '{{' }} $labels.instance {{ '}}' }})
+ description: "UPS {{ '{{' }} $labels.instance {{ '}}' }} lost wall power and is running on battery.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
+
+ - alert: UPSStatusLowBattery
+ expr: network_ups_tools_ups_status{flag="LB"} == 1
+ for: 0m
+ labels:
+ severity: critical
+ annotations:
+ summary: UPS battery is low (instance {{ '{{' }} $labels.instance {{ '}}' }})
+ description: "UPS {{ '{{' }} $labels.instance {{ '}}' }} reports low battery.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
+
+ - alert: UPSStatusReplaceBattery
+ expr: network_ups_tools_ups_status{flag="RB"} == 1
+ for: 2m
+ labels:
+ severity: warning
+ annotations:
+ summary: UPS battery needs to be replaced (instance {{ '{{' }} $labels.instance {{ '}}' }})
+ description: "The battery of UPS {{ '{{' }} $labels.instance {{ '}}' }} needs to be replaced.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"