summaryrefslogtreecommitdiff
path: root/roles/monitoring/prometheus/server/defaults
diff options
context:
space:
mode:
Diffstat (limited to 'roles/monitoring/prometheus/server/defaults')
-rw-r--r--roles/monitoring/prometheus/server/defaults/main/main.yml4
-rw-r--r--roles/monitoring/prometheus/server/defaults/main/rules_blackbox.yml46
-rw-r--r--roles/monitoring/prometheus/server/defaults/main/rules_blackbox__https.yml20
-rw-r--r--roles/monitoring/prometheus/server/defaults/main/rules_blackbox__ping.yml11
-rw-r--r--roles/monitoring/prometheus/server/defaults/main/rules_blackbox__probe.yml74
-rw-r--r--roles/monitoring/prometheus/server/defaults/main/rules_blackbox__ssh.yml3
6 files changed, 76 insertions, 82 deletions
diff --git a/roles/monitoring/prometheus/server/defaults/main/main.yml b/roles/monitoring/prometheus/server/defaults/main/main.yml
index 09cd150c..7781fd69 100644
--- a/roles/monitoring/prometheus/server/defaults/main/main.yml
+++ b/roles/monitoring/prometheus/server/defaults/main/main.yml
@@ -17,9 +17,7 @@ prometheus_server_rules:
nut: "{{ prometheus_server_rules_nut + prometheus_server_rules_nut_extra }}"
nut/ups: "{{ prometheus_server_rules_nut__ups + prometheus_server_rules_nut__ups_extra }}"
blackbox: "{{ prometheus_server_rules_blackbox + prometheus_server_rules_blackbox_extra }}"
- blackbox/ping: "{{ prometheus_server_rules_blackbox__ping + prometheus_server_rules_blackbox__ping_extra }}"
- blackbox/https: "{{ prometheus_server_rules_blackbox__https + prometheus_server_rules_blackbox__https_extra }}"
- blackbox/ssh: "{{ prometheus_server_rules_blackbox__ssh + prometheus_server_rules_blackbox__ssh_extra }}"
+ blackbox/probe: "{{ prometheus_server_rules_blackbox__probe + prometheus_server_rules_blackbox__probe_extra }}"
# prometheus_server_alertmanager:
# url: "127.0.0.1:9093"
diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_blackbox.yml b/roles/monitoring/prometheus/server/defaults/main/rules_blackbox.yml
index d5c1fd42..99f2e83c 100644
--- a/roles/monitoring/prometheus/server/defaults/main/rules_blackbox.yml
+++ b/roles/monitoring/prometheus/server/defaults/main/rules_blackbox.yml
@@ -1,47 +1,3 @@
---
prometheus_server_rules_blackbox_extra: []
-prometheus_server_rules_blackbox:
- - alert: BlackboxProbeFailed
- expr: probe_success == 0
- for: 0m
- labels:
- severity: critical
- annotations:
- summary: Blackbox probe failed (instance {{ '{{' }} $labels.instance {{ '}}' }})
- description: "Probe failed\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
-
- - alert: BlackboxSlowProbe
- expr: avg_over_time(probe_duration_seconds[1m]) > 1
- for: 1m
- labels:
- severity: warning
- annotations:
- summary: Blackbox slow probe (instance {{ '{{' }} $labels.instance {{ '}}' }})
- description: "Blackbox probe took more than 1s to complete\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
-
- - alert: BlackboxSslCertificateWillExpireSoon
- expr: probe_ssl_earliest_cert_expiry - time() < 86400 * 30
- for: 0m
- labels:
- severity: warning
- annotations:
- summary: Blackbox SSL certificate will expire soon (instance {{ '{{' }} $labels.instance {{ '}}' }})
- description: "SSL certificate expires in 30 days\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
-
- - alert: BlackboxSslCertificateWillExpireSoon
- expr: probe_ssl_earliest_cert_expiry - time() < 86400 * 3
- for: 0m
- labels:
- severity: critical
- annotations:
- summary: Blackbox SSL certificate will expire soon (instance {{ '{{' }} $labels.instance {{ '}}' }})
- description: "SSL certificate expires in 3 days\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
-
- - alert: BlackboxSslCertificateExpired
- expr: probe_ssl_earliest_cert_expiry - time() <= 0
- for: 0m
- labels:
- severity: critical
- annotations:
- summary: Blackbox SSL certificate expired (instance {{ '{{' }} $labels.instance {{ '}}' }})
- description: "SSL certificate has expired already\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
+prometheus_server_rules_blackbox: []
diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__https.yml b/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__https.yml
deleted file mode 100644
index 140e3b4f..00000000
--- a/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__https.yml
+++ /dev/null
@@ -1,20 +0,0 @@
----
-prometheus_server_rules_blackbox__https_extra: []
-prometheus_server_rules_blackbox__https:
- - alert: BlackboxProbeHttpFailure
- expr: probe_http_status_code <= 199 OR probe_http_status_code >= 400
- for: 0m
- labels:
- severity: critical
- annotations:
- summary: Blackbox probe HTTP failure (instance {{ '{{' }} $labels.instance {{ '}}' }})
- description: "HTTP status code is not 200-399\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
-
- - alert: BlackboxProbeSlowHttp
- expr: avg_over_time(probe_http_duration_seconds[1m]) > 1
- for: 1m
- labels:
- severity: warning
- annotations:
- summary: Blackbox probe slow HTTP (instance {{ '{{' }} $labels.instance {{ '}}' }})
- description: "HTTP request took more than 1s\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__ping.yml b/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__ping.yml
deleted file mode 100644
index cc87b6b1..00000000
--- a/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__ping.yml
+++ /dev/null
@@ -1,11 +0,0 @@
----
-prometheus_server_rules_blackbox__ping_extra: []
-prometheus_server_rules_blackbox__ping:
- - alert: BlackboxProbeSlowPing
- expr: avg_over_time(probe_icmp_duration_seconds[1m]) > 1
- for: 1m
- labels:
- severity: warning
- annotations:
- summary: Blackbox probe slow ping (instance {{ '{{' }} $labels.instance {{ '}}' }})
- description: "Blackbox ping took more than 1s\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__probe.yml b/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__probe.yml
new file mode 100644
index 00000000..9f9d2292
--- /dev/null
+++ b/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__probe.yml
@@ -0,0 +1,74 @@
+---
+prometheus_server_rules_blackbox__probe_extra: []
+prometheus_server_rules_blackbox__probe:
+ - alert: BlackboxProbeFailed
+ expr: probe_success == 0
+ for: 0m
+ labels:
+ severity: critical
+ annotations:
+ summary: Blackbox probe failed (instance {{ '{{' }} $labels.instance {{ '}}' }})
+ description: "Probe failed\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
+
+ - alert: BlackboxSlowProbe
+ expr: avg_over_time(probe_duration_seconds[1m]) > 1
+ for: 1m
+ labels:
+ severity: warning
+ annotations:
+ summary: Blackbox slow probe (instance {{ '{{' }} $labels.instance {{ '}}' }})
+ description: "Blackbox probe took more than 1s to complete\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
+
+ - alert: BlackboxSslCertificateWillExpireSoon
+ expr: probe_ssl_earliest_cert_expiry - time() < 86400 * 30
+ for: 0m
+ labels:
+ severity: warning
+ annotations:
+ summary: Blackbox SSL certificate will expire soon (instance {{ '{{' }} $labels.instance {{ '}}' }})
+ description: "SSL certificate expires in 30 days\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
+
+ - alert: BlackboxSslCertificateWillExpireSoon
+ expr: probe_ssl_earliest_cert_expiry - time() < 86400 * 3
+ for: 0m
+ labels:
+ severity: critical
+ annotations:
+ summary: Blackbox SSL certificate will expire soon (instance {{ '{{' }} $labels.instance {{ '}}' }})
+ description: "SSL certificate expires in 3 days\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
+
+ - alert: BlackboxSslCertificateExpired
+ expr: probe_ssl_earliest_cert_expiry - time() <= 0
+ for: 0m
+ labels:
+ severity: critical
+ annotations:
+ summary: Blackbox SSL certificate expired (instance {{ '{{' }} $labels.instance {{ '}}' }})
+ description: "SSL certificate has expired already\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
+
+ - alert: BlackboxProbeHttpFailure
+ expr: probe_http_status_code <= 199 OR probe_http_status_code >= 400
+ for: 0m
+ labels:
+ severity: critical
+ annotations:
+ summary: Blackbox probe HTTP failure (instance {{ '{{' }} $labels.instance {{ '}}' }})
+ description: "HTTP status code is not 200-399\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
+
+ - alert: BlackboxProbeSlowHttp
+ expr: avg_over_time(probe_http_duration_seconds[1m]) > 1
+ for: 1m
+ labels:
+ severity: warning
+ annotations:
+ summary: Blackbox probe slow HTTP (instance {{ '{{' }} $labels.instance {{ '}}' }})
+ description: "HTTP request took more than 1s\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
+
+ - alert: BlackboxProbeSlowPing
+ expr: avg_over_time(probe_icmp_duration_seconds[1m]) > 1
+ for: 1m
+ labels:
+ severity: warning
+ annotations:
+ summary: Blackbox probe slow ping (instance {{ '{{' }} $labels.instance {{ '}}' }})
+ description: "Blackbox ping took more than 1s\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"
diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__ssh.yml b/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__ssh.yml
deleted file mode 100644
index 8e717c41..00000000
--- a/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__ssh.yml
+++ /dev/null
@@ -1,3 +0,0 @@
----
-prometheus_server_rules_blackbox__ssh_extra: []
-prometheus_server_rules_blackbox__ssh: []