--- prometheus_server_rules_ipmi_extra: [] prometheus_server_rules_ipmi: - alert: IpmiFailedToScrapeCollector expr: ipmi_up == 0 for: 2m labels: severity: warning annotations: summary: IPMI collector failed to scrape (instance {{ '{{' }} $labels.instance {{ '}}' }}) description: "The IPMI collector {{ '{{' }} $labels.collector {{ '}}' }} could not be scraped.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" - alert: IpmiSensorStateWarning expr: ipmi_sensor_state == 1 for: 2m labels: severity: warning annotations: summary: IPMI Sensor state is warning (instance {{ '{{' }} $labels.instance {{ '}}' }}) description: "The IPMI sensor {{ '{{' }} $labels.name {{ '}}' }} has state: warning.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" - alert: IpmiSensorStateCritical expr: ipmi_sensor_state == 2 for: 2m labels: severity: critical annotations: summary: IPMI Sensor state is critical (instance {{ '{{' }} $labels.instance {{ '}}' }}) description: "The IPMI sensor {{ '{{' }} $labels.name {{ '}}' }} has state: critical.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" - alert: IpmiFanSpeedStateWarning expr: ipmi_fan_speed_state == 1 for: 2m labels: severity: warning annotations: summary: IPMI fan-speed state is warning (instance {{ '{{' }} $labels.instance {{ '}}' }}) description: "The IPMI fanspeed {{ '{{' }} $labels.name {{ '}}' }} has state: warning.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" - alert: IpmiFanSpeedStateCritical expr: ipmi_fan_speed_state == 2 for: 2m labels: severity: critical annotations: summary: IPMI fan-speed state is critical (instance {{ '{{' }} $labels.instance {{ '}}' }}) description: "The IPMI fan speed {{ '{{' }} $labels.name {{ '}}' }} has state: critical.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" - alert: IpmiTemperatureStateWarning expr: ipmi_temperature_state == 1 for: 2m labels: severity: warning annotations: summary: IPMI Temperature state is warning (instance {{ '{{' }} $labels.instance {{ '}}' }}) description: "The IPMI temperature {{ '{{' }} $labels.name {{ '}}' }} has state: warning.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" - alert: IpmiTemperatureStateCritical expr: ipmi_temperature_state == 2 for: 2m labels: severity: critical annotations: summary: IPMI Temperature state is critical (instance {{ '{{' }} $labels.instance {{ '}}' }}) description: "The IPMI temperature {{ '{{' }} $labels.name {{ '}}' }} has state: critical.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" - alert: IpmiVoltageStateWarning expr: ipmi_voltage_state == 1 for: 2m labels: severity: warning annotations: summary: IPMI Voltage state is warning (instance {{ '{{' }} $labels.instance {{ '}}' }}) description: "The IPMI voltage {{ '{{' }} $labels.name {{ '}}' }} has state: warning.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" - alert: IpmiVoltageStateCritical expr: ipmi_voltage_state == 2 for: 2m labels: severity: critical annotations: summary: IPMI Voltage state is critical (instance {{ '{{' }} $labels.instance {{ '}}' }}) description: "The IPMI voltage {{ '{{' }} $labels.name {{ '}}' }} has state: critical.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" - alert: IpmiCurrentStateWarning expr: ipmi_current_state == 1 for: 2m labels: severity: warning annotations: summary: IPMI Current state is warning (instance {{ '{{' }} $labels.instance {{ '}}' }}) description: "The IPMI current {{ '{{' }} $labels.name {{ '}}' }} has state: warning.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" - alert: IpmiCurrentStateCritical expr: ipmi_current_state == 2 for: 2m labels: severity: critical annotations: summary: IPMI Current state is critical (instance {{ '{{' }} $labels.instance {{ '}}' }}) description: "The IPMI current {{ '{{' }} $labels.name {{ '}}' }} has state: critical.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" - alert: IpmiPowerStateWarning expr: ipmi_power_state == 1 for: 2m labels: severity: warning annotations: summary: IPMI Power state is warning (instance {{ '{{' }} $labels.instance {{ '}}' }}) description: "The IPMI power {{ '{{' }} $labels.name {{ '}}' }} has state: warning.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" - alert: IpmiPowerStateCritical expr: ipmi_power_state == 2 for: 2m labels: severity: critical annotations: summary: IPMI Power state is critical (instance {{ '{{' }} $labels.instance {{ '}}' }}) description: "The IPMI power {{ '{{' }} $labels.name {{ '}}' }} has state: critical.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" - alert: IpmiSystemEventLogManyEvents expr: rate(ipmi_sel_logs_count[2m]) > 0.1 for: 1m labels: severity: warning annotations: summary: IPMI SEL grows quickly (instance {{ '{{' }} $labels.instance {{ '}}' }}) description: "The IPMI system log contains too many new events.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" - alert: IpmiSystemEventLogFull expr: ipmi_sel_free_space_bytes < 100 for: 0m labels: severity: warning annotations: summary: IPMI SEL is full (instance {{ '{{' }} $labels.instance {{ '}}' }}) description: "The IPMI system log ran out of space.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"