diff options
author | Christian Pointner <equinox@spreadspace.org> | 2021-09-27 23:40:51 +0200 |
---|---|---|
committer | Christian Pointner <equinox@spreadspace.org> | 2021-09-27 23:40:51 +0200 |
commit | 49114bef214614a636b7d529e41566fdabb1f2c6 (patch) | |
tree | 6136d6708561b3708d8fe6e51508e54847f9b422 | |
parent | upgrade openwrt for ups monitors (diff) |
finalize NUT monitoring
3 files changed, 54 insertions, 5 deletions
diff --git a/inventory/group_vars/ups/vars.yml b/inventory/group_vars/ups/vars.yml index eaa36666..9b4f15d2 100644 --- a/inventory/group_vars/ups/vars.yml +++ b/inventory/group_vars/ups/vars.yml @@ -12,8 +12,6 @@ openwrt_target: mt7620 openwrt_profile: ravpower_rp-wd03 openwrt_output_image_suffixes: - "{{ openwrt_profile }}-squashfs-sysupgrade.bin" - - "{{ openwrt_profile }}-squashfs-kernel.bin" ## TODO: remove once all devices have been upgraded to 21.02.0++ - - "{{ openwrt_profile }}-squashfs-rootfs.bin" ## TODO: remove once all devices have been upgraded to 21.02.0++ openwrt_packages_remove: - ppp diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_ipmi__remote.yml b/roles/monitoring/prometheus/server/defaults/main/rules_ipmi__remote.yml index 1f9338ea..b6163981 100644 --- a/roles/monitoring/prometheus/server/defaults/main/rules_ipmi__remote.yml +++ b/roles/monitoring/prometheus/server/defaults/main/rules_ipmi__remote.yml @@ -1,4 +1,3 @@ --- prometheus_server_rules_ipmi__remote_extra: [] prometheus_server_rules_ipmi__remote: [] -## TODO: add remote-IPMI specific alert rules diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_nut__ups.yml b/roles/monitoring/prometheus/server/defaults/main/rules_nut__ups.yml index 150a507e..274133e5 100644 --- a/roles/monitoring/prometheus/server/defaults/main/rules_nut__ups.yml +++ b/roles/monitoring/prometheus/server/defaults/main/rules_nut__ups.yml @@ -1,4 +1,56 @@ --- prometheus_server_rules_nut__ups_extra: [] -prometheus_server_rules_nut__ups: [] -## TODO: add NUT/UPS alert rules +prometheus_server_rules_nut__ups: + - alert: UPSLoadHigh + expr: network_ups_tools_ups_load > 82 + for: 1m + labels: + severity: warning + annotations: + summary: UPS load is high (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "The load of UPS {{ '{{' }} $labels.instance {{ '}}' }} is > 82 %.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + + - alert: UPSLoadVeryHigh + expr: network_ups_tools_ups_load > 92 + for: 1m + labels: + severity: critical + annotations: + summary: UPS load is very high (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "The load of UPS {{ '{{' }} $labels.instance {{ '}}' }} is > 92 %.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + + - alert: UPSStatusOverload + expr: network_ups_tools_ups_status{flag="OVER"} == 1 + for: 1m + labels: + severity: critical + annotations: + summary: UPS is overloaded (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "UPS {{ '{{' }} $labels.instance {{ '}}' }} is overloaded.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + + - alert: UPSStatusOnBattery + expr: network_ups_tools_ups_status{flag="OB"} == 1 + for: 1m + labels: + severity: warning + annotations: + summary: UPS is running on battery (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "UPS {{ '{{' }} $labels.instance {{ '}}' }} lost wall power and is running on battery.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + + - alert: UPSStatusLowBattery + expr: network_ups_tools_ups_status{flag="LB"} == 1 + for: 0m + labels: + severity: critical + annotations: + summary: UPS battery is low (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "UPS {{ '{{' }} $labels.instance {{ '}}' }} reports low battery.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + + - alert: UPSStatusReplaceBattery + expr: network_ups_tools_ups_status{flag="RB"} == 1 + for: 2m + labels: + severity: warning + annotations: + summary: UPS battery needs to be replaced (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "The battery of UPS {{ '{{' }} $labels.instance {{ '}}' }} needs to be replaced.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" |