blob: 31c15d51edccf43c506c307a8e4f1ad9418a2ed4 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
|
---
prometheus_server_rules_ipmi_extra: []
prometheus_server_rules_ipmi:
- alert: IpmiFailedToScrapeCollector
expr: ipmi_up == 0
for: 2m
labels:
severity: warning
annotations:
summary: IPMI collector failed to scrape (instance {{ '{{' }} $labels.instance {{ '}}' }})
description: "The IPMI collector {{ '{{' }} $labels.collector {{ '}}' }} could not be scraped.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
- alert: IpmiSensorStateWarning
expr: ipmi_sensor_state == 1
for: 2m
labels:
severity: warning
annotations:
summary: IPMI Sensor state is warning (instance {{ '{{' }} $labels.instance {{ '}}' }})
description: "The IPMI sensor {{ '{{' }} $labels.name {{ '}}' }} has state: warning.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
- alert: IpmiSensorStateCritical
expr: ipmi_sensor_state == 2
for: 2m
labels:
severity: critical
annotations:
summary: IPMI Sensor state is critical (instance {{ '{{' }} $labels.instance {{ '}}' }})
description: "The IPMI sensor {{ '{{' }} $labels.name {{ '}}' }} has state: critical.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
- alert: IpmiFanSpeedStateWarning
expr: ipmi_fan_speed_state == 1
for: 2m
labels:
severity: warning
annotations:
summary: IPMI fan-speed state is warning (instance {{ '{{' }} $labels.instance {{ '}}' }})
description: "The IPMI fanspeed {{ '{{' }} $labels.name {{ '}}' }} has state: warning.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
- alert: IpmiFanSpeedStateCritical
expr: ipmi_fan_speed_state == 2
for: 2m
labels:
severity: critical
annotations:
summary: IPMI fan-speed state is critical (instance {{ '{{' }} $labels.instance {{ '}}' }})
description: "The IPMI fan speed {{ '{{' }} $labels.name {{ '}}' }} has state: critical.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
- alert: IpmiTemperatureStateWarning
expr: ipmi_temperature_state == 1
for: 2m
labels:
severity: warning
annotations:
summary: IPMI Temperature state is warning (instance {{ '{{' }} $labels.instance {{ '}}' }})
description: "The IPMI temperature {{ '{{' }} $labels.name {{ '}}' }} has state: warning.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
- alert: IpmiTemperatureStateCritical
expr: ipmi_temperature_state == 2
for: 2m
labels:
severity: critical
annotations:
summary: IPMI Temperature state is critical (instance {{ '{{' }} $labels.instance {{ '}}' }})
description: "The IPMI temperature {{ '{{' }} $labels.name {{ '}}' }} has state: critical.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
- alert: IpmiVoltageStateWarning
expr: ipmi_voltage_state == 1
for: 2m
labels:
severity: warning
annotations:
summary: IPMI Voltage state is warning (instance {{ '{{' }} $labels.instance {{ '}}' }})
description: "The IPMI voltage {{ '{{' }} $labels.name {{ '}}' }} has state: warning.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
- alert: IpmiVoltageStateCritical
expr: ipmi_voltage_state == 2
for: 2m
labels:
severity: critical
annotations:
summary: IPMI Voltage state is critical (instance {{ '{{' }} $labels.instance {{ '}}' }})
description: "The IPMI voltage {{ '{{' }} $labels.name {{ '}}' }} has state: critical.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
- alert: IpmiCurrentStateWarning
expr: ipmi_current_state == 1
for: 2m
labels:
severity: warning
annotations:
summary: IPMI Current state is warning (instance {{ '{{' }} $labels.instance {{ '}}' }})
description: "The IPMI current {{ '{{' }} $labels.name {{ '}}' }} has state: warning.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
- alert: IpmiCurrentStateCritical
expr: ipmi_current_state == 2
for: 2m
labels:
severity: critical
annotations:
summary: IPMI Current state is critical (instance {{ '{{' }} $labels.instance {{ '}}' }})
description: "The IPMI current {{ '{{' }} $labels.name {{ '}}' }} has state: critical.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
- alert: IpmiPowerStateWarning
expr: ipmi_power_state == 1
for: 2m
labels:
severity: warning
annotations:
summary: IPMI Power state is warning (instance {{ '{{' }} $labels.instance {{ '}}' }})
description: "The IPMI power {{ '{{' }} $labels.name {{ '}}' }} has state: warning.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
- alert: IpmiPowerStateCritical
expr: ipmi_power_state == 2
for: 2m
labels:
severity: critical
annotations:
summary: IPMI Power state is critical (instance {{ '{{' }} $labels.instance {{ '}}' }})
description: "The IPMI power {{ '{{' }} $labels.name {{ '}}' }} has state: critical.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
- alert: IpmiSystemEventLogManyEvents
expr: rate(ipmi_sel_logs_count[2m]) > 0.1
for: 1m
labels:
severity: warning
annotations:
summary: IPMI SEL grows quickly (instance {{ '{{' }} $labels.instance {{ '}}' }})
description: "The IPMI system log contains too many new events.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
- alert: IpmiSystemEventLogFull
expr: ipmi_sel_free_space_bytes < 100
for: 0m
labels:
severity: warning
annotations:
summary: IPMI SEL is full (instance {{ '{{' }} $labels.instance {{ '}}' }})
description: "The IPMI system log ran out of space.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
|