From 6cf380956bdd31292b4ccf51b1bbc217b93bf45f Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Wed, 23 Jun 2021 23:06:40 +0200 Subject: prometheus: connect server to alertmanager if configured --- .../prometheus/server/defaults/main/main.yml | 5 ++- .../server/defaults/main/rules_prometheus.yml | 47 ++++++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) (limited to 'roles/monitoring/prometheus/server/defaults') diff --git a/roles/monitoring/prometheus/server/defaults/main/main.yml b/roles/monitoring/prometheus/server/defaults/main/main.yml index b10d6f17..8e7fea4b 100644 --- a/roles/monitoring/prometheus/server/defaults/main/main.yml +++ b/roles/monitoring/prometheus/server/defaults/main/main.yml @@ -9,5 +9,8 @@ prometheus_server_jobs: - node prometheus_server_rules: - prometheus: "{{ prometheus_server_rules_prometheus + prometheus_server_rules_prometheus_extra }}" + prometheus: "{{ prometheus_server_rules_prometheus + ((prometheus_server_alertmanager is defined) | ternary(prometheus_server_rules_prometheus_alertmanager, [])) + prometheus_server_rules_prometheus_extra }}" node: "{{ prometheus_server_rules_node + prometheus_server_rules_prometheus_extra }}" + +# prometheus_server_alertmanager: +# url: "127.0.0.1:9093" diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_prometheus.yml b/roles/monitoring/prometheus/server/defaults/main/rules_prometheus.yml index 6d84efa4..8d4672b1 100644 --- a/roles/monitoring/prometheus/server/defaults/main/rules_prometheus.yml +++ b/roles/monitoring/prometheus/server/defaults/main/rules_prometheus.yml @@ -190,3 +190,50 @@ prometheus_server_rules_prometheus: annotations: summary: Prometheus TSDB WAL truncations failed (instance {{ '{{' }} $labels.instance {{ '}}' }}) description: "Prometheus encountered {{ '{{' }} $value {{ '}}' }} TSDB WAL truncation failures\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + + +prometheus_server_rules_prometheus_alertmanager: + - alert: PrometheusAlertmanagerConfigurationReloadFailure + expr: alertmanager_config_last_reload_successful != 1 + for: 0m + labels: + severity: warning + annotations: + summary: Prometheus AlertManager configuration reload failure (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "AlertManager configuration reload error\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + + - alert: PrometheusAlertmanagerConfigNotSynced + expr: count(count_values("config_hash", alertmanager_config_hash)) > 1 + for: 0m + labels: + severity: warning + annotations: + summary: Prometheus AlertManager config not synced (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "Configurations of AlertManager cluster instances are out of sync\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + + - alert: PrometheusAlertmanagerE2eDeadManSwitch + expr: vector(1) + for: 0m + labels: + severity: critical + annotations: + summary: Prometheus AlertManager E2E dead man switch (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "Prometheus DeadManSwitch is an always-firing alert. It's used as an end-to-end test of Prometheus through the Alertmanager.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + + - alert: PrometheusNotConnectedToAlertmanager + expr: prometheus_notifications_alertmanagers_discovered < 1 + for: 0m + labels: + severity: critical + annotations: + summary: Prometheus not connected to alertmanager (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "Prometheus cannot connect the alertmanager\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + + - alert: PrometheusAlertmanagerNotificationFailing + expr: rate(alertmanager_notifications_failed_total[1m]) > 0 + for: 0m + labels: + severity: critical + annotations: + summary: Prometheus AlertManager notification failing (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "Alertmanager is failing sending notifications\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" -- cgit v1.2.3