--- ## https://awesome-prometheus-alerts.grep.to/rules#blackbox prometheus_server_rules_blackbox__probe_extra: [] prometheus_server_rules_blackbox__probe: - alert: BlackboxProbeFailed expr: probe_success == 0 for: 0m labels: severity: critical annotations: summary: Blackbox probe failed (instance {{ '{{' }} $labels.instance {{ '}}' }}) description: "Probe failed\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" - alert: BlackboxSlowProbe expr: avg_over_time(probe_duration_seconds[1m]) > 1 and probe_success == 1 for: 1m labels: severity: warning annotations: summary: Blackbox slow probe (instance {{ '{{' }} $labels.instance {{ '}}' }}) description: "Blackbox probe took more than 1s to complete\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" - alert: BlackboxSslCertificateWillExpireSoon expr: probe_ssl_earliest_cert_expiry - time() < 86400 * 28 for: 0m labels: severity: warning annotations: summary: Blackbox SSL certificate will expire soon (instance {{ '{{' }} $labels.instance {{ '}}' }}) description: "SSL certificate expires in less than 28 days\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" - alert: BlackboxSslCertificateWillExpireVerySoon expr: probe_ssl_earliest_cert_expiry - time() < 86400 * 7 for: 0m labels: severity: critical annotations: summary: Blackbox SSL certificate will expire very soon (instance {{ '{{' }} $labels.instance {{ '}}' }}) description: "SSL certificate expires in less than 7 days\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" - alert: BlackboxSslCertificateExpired expr: probe_ssl_earliest_cert_expiry - time() <= 0 for: 0m labels: severity: critical annotations: summary: Blackbox SSL certificate expired (instance {{ '{{' }} $labels.instance {{ '}}' }}) description: "SSL certificate has expired already\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" - alert: BlackboxProbeHttpFailure expr: probe_http_status_code <= 199 OR probe_http_status_code >= 400 for: 0m labels: severity: critical annotations: summary: Blackbox probe HTTP failure (instance {{ '{{' }} $labels.instance {{ '}}' }}) description: "HTTP status code is not 200-399\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" - alert: BlackboxProbeSlowHttp expr: avg_over_time(probe_http_duration_seconds[1m]) > 1 for: 1m labels: severity: warning annotations: summary: Blackbox probe slow HTTP (instance {{ '{{' }} $labels.instance {{ '}}' }}) description: "HTTP request took more than 1s\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" - alert: BlackboxProbeSlowPing expr: avg_over_time(probe_icmp_duration_seconds[1m]) > 1 for: 1m labels: severity: warning annotations: summary: Blackbox probe slow ping (instance {{ '{{' }} $labels.instance {{ '}}' }}) description: "Blackbox ping took more than 1s\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}"