summaryrefslogtreecommitdiff
path: root/roles
diff options
context:
space:
mode:
Diffstat (limited to 'roles')
-rw-r--r--roles/monitoring/prometheus/server/defaults/main/rules_node.yml8
1 files changed, 8 insertions, 0 deletions
diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_node.yml b/roles/monitoring/prometheus/server/defaults/main/rules_node.yml
index 8a02e67b..525355d5 100644
--- a/roles/monitoring/prometheus/server/defaults/main/rules_node.yml
+++ b/roles/monitoring/prometheus/server/defaults/main/rules_node.yml
@@ -20,6 +20,14 @@ prometheus_server_rules_node:
summary: Host memory under memory pressure (instance {{ '{{' }} $labels.instance {{ '}}' }})
description: "The node is under heavy memory pressure. High rate of major page faults\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
+ - alert: HostMemoryHardwareCorrupted
+ expr: node_memory_HardwareCorrupted_bytes > 0
+ labels:
+ severity: warning
+ annotations:
+ summary: Host memory is corrupted (instance {{ '{{' }} $labels.instance {{ '}}' }})
+ description: "The node reports {{ '{{' }} $value {{ '}}' }} bytes of corrupted memory.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
+
- alert: HostOutOfDiskSpace
expr: (node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes < 10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0
for: 2m