From c92175810eb36514f6aa16e641551ef4d4f6b776 Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Sat, 5 Mar 2022 03:59:41 +0100 Subject: prometheus: cleanups and fixes in prep for alerta --- inventory/host_vars/ch-mon.yml | 8 ++++++-- inventory/host_vars/ele-mon.yml | 4 ++-- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'inventory/host_vars') diff --git a/inventory/host_vars/ch-mon.yml b/inventory/host_vars/ch-mon.yml index 0c27a522..c16398bc 100644 --- a/inventory/host_vars/ch-mon.yml +++ b/inventory/host_vars/ch-mon.yml @@ -61,6 +61,10 @@ prometheus_server_storage: size: 30G fs: ext4 +prometheus_server_external_labels: + environment: chaos-at-home + monitor: "{{ inventory_hostname }}" + prometheus_server_alertmanager: url: "127.0.0.1:9093" path_prefix: "/alertmanager/" @@ -68,7 +72,7 @@ prometheus_server_alertmanager: username: server password: "{{ vault_prometheus_alertmanager_auth_user_passwords['server'] }}" -prometheus_server_web_external_url: /prometheus/ +prometheus_server_web_external_url: "http://{{ network.primary.address | ipaddr('address') }}/prometheus/" prometheus_server_auth_users: server: "{{ vault_prometheus_server_auth_user_passwords['server'] }}" @@ -136,7 +140,7 @@ prometheus_alertmanager_smtp: from: "noreply@chaos-at-home.org" require_tls: no -prometheus_alertmanager_web_route_prefix: /alertmanager/ +prometheus_alertmanager_web_external_url: "http://{{ network.primary.address | ipaddr('address') }}/alertmanager/" prometheus_alertmanager_auth_users: server: "{{ vault_prometheus_alertmanager_auth_user_passwords['server'] }}" diff --git a/inventory/host_vars/ele-mon.yml b/inventory/host_vars/ele-mon.yml index 4ad37bce..5d03837c 100644 --- a/inventory/host_vars/ele-mon.yml +++ b/inventory/host_vars/ele-mon.yml @@ -70,7 +70,7 @@ prometheus_server_alertmanager: url: "127.0.0.1:9093" path_prefix: "/alertmanager/" -prometheus_server_web_external_url: /prometheus/ +prometheus_server_web_external_url: "http://{{ network.primary.address | ipaddr('address') }}/prometheus/" prometheus_exporters_extra: @@ -101,7 +101,7 @@ prometheus_exporter_mikrotik_devices: user: prometheus password: "{{ vault_prometheus_exporter_mikrotik_api_password }}" -prometheus_alertmanager_web_route_prefix: /alertmanager/ +prometheus_alertmanager_web_external_url: "http://{{ network.primary.address | ipaddr('address') }}/alertmanager/" prometheus_alertmanager_smtp: smarthost: "mailrelay.chaos-at-home.org:587" from: "noreply@elev8.at" -- cgit v1.2.3 From 9117d620ff6908ce92318db216403dab68c496ed Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Sat, 5 Mar 2022 21:55:14 +0100 Subject: initial commit for alerta role --- chaos-at-home/ch-apps.yml | 1 + inventory/host_vars/ch-apps.yml | 3 ++ inventory/host_vars/ch-mon.yml | 17 ++++++++ roles/monitoring/alerta/defaults/main.yml | 2 + roles/monitoring/alerta/tasks/main.yml | 32 +++++++++++++++ roles/monitoring/alerta/templates/pod-spec.yml.j2 | 48 +++++++++++++++++++++++ 6 files changed, 103 insertions(+) create mode 100644 roles/monitoring/alerta/defaults/main.yml create mode 100644 roles/monitoring/alerta/tasks/main.yml create mode 100644 roles/monitoring/alerta/templates/pod-spec.yml.j2 (limited to 'inventory/host_vars') diff --git a/chaos-at-home/ch-apps.yml b/chaos-at-home/ch-apps.yml index d264ffc2..f0347216 100644 --- a/chaos-at-home/ch-apps.yml +++ b/chaos-at-home/ch-apps.yml @@ -12,3 +12,4 @@ - role: apt-repo/spreadspace - role: kubernetes/base - role: kubernetes/standalone/base + - role: monitoring/alerta diff --git a/inventory/host_vars/ch-apps.yml b/inventory/host_vars/ch-apps.yml index bdbac832..ed2aeb70 100644 --- a/inventory/host_vars/ch-apps.yml +++ b/inventory/host_vars/ch-apps.yml @@ -113,3 +113,6 @@ kubernetes_version: 1.23.2 kubernetes_container_runtime: docker kubernetes_standalone_max_pods: 42 kubernetes_standalone_cni_variant: with-portmap + + +alerta_base_path: /srv/storage/alerta diff --git a/inventory/host_vars/ch-mon.yml b/inventory/host_vars/ch-mon.yml index c16398bc..bd952fc8 100644 --- a/inventory/host_vars/ch-mon.yml +++ b/inventory/host_vars/ch-mon.yml @@ -146,6 +146,23 @@ prometheus_alertmanager_auth_users: server: "{{ vault_prometheus_alertmanager_auth_user_passwords['server'] }}" admin: "{{ vault_prometheus_alertmanager_auth_user_passwords['admin'] }}" +prometheus_alertmanager_route: + receiver: alerta + group_by: ['...'] + group_wait: 0 + group_interval: 10s + repeat_interval: 5m + +prometheus_alertmanager_receivers: + - name: alerta + webhook_configs: + - url: http://192.168.32.1:8080/api/webhooks/prometheus + send_resolved: true + http_config: + basic_auth: + username: admin + password: alerta + grafana_secret_key: "{{ vault_grafana_secret_key }}" diff --git a/roles/monitoring/alerta/defaults/main.yml b/roles/monitoring/alerta/defaults/main.yml new file mode 100644 index 00000000..034c8268 --- /dev/null +++ b/roles/monitoring/alerta/defaults/main.yml @@ -0,0 +1,2 @@ +--- +# alerta_base_path: /srv/alerta diff --git a/roles/monitoring/alerta/tasks/main.yml b/roles/monitoring/alerta/tasks/main.yml new file mode 100644 index 00000000..af7530d8 --- /dev/null +++ b/roles/monitoring/alerta/tasks/main.yml @@ -0,0 +1,32 @@ +--- +- name: create alerta subdirectories + loop: + - config + - postgres + file: + path: "{{ alerta_base_path }}/{{ item }}" + state: directory + +- name: install alertad config template + copy: + content: | + DEBUG = {{ '{{' }} 'True' if env.DEBUG else 'False' {{ '}}' }} + SECRET = "{{ '{{' }} env.SECRET_KEY {{ '}}' }}" + PLUGINS = ['reject', 'blackout', 'heartbeat'] + DEFAULT_ENVIRONMENT = 'unknown' + ALLOWED_ENVIRONMENTS = ['unknown', 'chaos-at-.*'] + HEARTBEAT_EVENTS = ['PrometheusAlertmanagerE2eDeadManSwitch'] + dest: "{{ alerta_base_path }}/config/alertad.conf.j2" + +- name: install pod manifest + vars: + kubernetes_standalone_pod: + name: "alerta" + spec: "{{ lookup('template', 'pod-spec.yml.j2') }}" + mode: "0600" + config_hash_items: + - path: "{{ alerta_base_path }}/config/alertad.conf.j2" + properties: + - checksum + include_role: + name: kubernetes/standalone/pod diff --git a/roles/monitoring/alerta/templates/pod-spec.yml.j2 b/roles/monitoring/alerta/templates/pod-spec.yml.j2 new file mode 100644 index 00000000..e31686e3 --- /dev/null +++ b/roles/monitoring/alerta/templates/pod-spec.yml.j2 @@ -0,0 +1,48 @@ +containers: +- name: alerta + image: "alerta/alerta-web:8.7.0" + resources: + limits: + memory: "1Gi" + env: + - name: "DATABASE_URL" + value: "postgres://alerta:secret@127.0.0.1:5432/alerta" + - name: "AUTH_REQUIRED" + value: "True" + - name: "ADMIN_USERS" + value: "admin" + volumeMounts: + - name: config + mountPath: /app/alertad.conf.j2 + subPath: alertad.conf.j2 + readOnly: true + ports: + - containerPort: 8080 + hostPort: 8080 + +- name: postgresql + image: "postgres:14.2" + args: + - postgres + - -c + - listen_addresses=127.0.0.1 + env: + - name: "POSTGRES_DB" + value: "alerta" + - name: "POSTGRES_USER" + value: "alerta" + - name: "POSTGRES_PASSWORD" + value: "secret" + volumeMounts: + - name: postgres + mountPath: /var/lib/postgresql/data + +volumes: +- name: config + hostPath: + path: "{{ alerta_base_path }}/config" + type: Directory +- name: postgres + hostPath: + path: "{{ alerta_base_path }}/postgres" + type: Directory -- cgit v1.2.3 From 1bbe8582d9650eda816f9c596d8cd3b3fe9998e7 Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Sun, 6 Mar 2022 00:31:22 +0100 Subject: alerta: test alertmanager silence integration --- inventory/host_vars/ch-mon.yml | 3 ++- roles/monitoring/alerta/tasks/main.yml | 6 +++++- roles/monitoring/prometheus/server/defaults/main/rules_node.yml | 2 +- 3 files changed, 8 insertions(+), 3 deletions(-) (limited to 'inventory/host_vars') diff --git a/inventory/host_vars/ch-mon.yml b/inventory/host_vars/ch-mon.yml index bd952fc8..684cc722 100644 --- a/inventory/host_vars/ch-mon.yml +++ b/inventory/host_vars/ch-mon.yml @@ -145,6 +145,7 @@ prometheus_alertmanager_web_external_url: "http://{{ network.primary.address | i prometheus_alertmanager_auth_users: server: "{{ vault_prometheus_alertmanager_auth_user_passwords['server'] }}" admin: "{{ vault_prometheus_alertmanager_auth_user_passwords['admin'] }}" + alerta: "alerta" ## TODO: move this to vault prometheus_alertmanager_route: receiver: alerta @@ -161,7 +162,7 @@ prometheus_alertmanager_receivers: http_config: basic_auth: username: admin - password: alerta + password: alerta ## TODO: move this to vault grafana_secret_key: "{{ vault_grafana_secret_key }}" diff --git a/roles/monitoring/alerta/tasks/main.yml b/roles/monitoring/alerta/tasks/main.yml index af7530d8..a4fd3df9 100644 --- a/roles/monitoring/alerta/tasks/main.yml +++ b/roles/monitoring/alerta/tasks/main.yml @@ -12,10 +12,14 @@ content: | DEBUG = {{ '{{' }} 'True' if env.DEBUG else 'False' {{ '}}' }} SECRET = "{{ '{{' }} env.SECRET_KEY {{ '}}' }}" - PLUGINS = ['reject', 'blackout', 'heartbeat'] + PLUGINS = ['reject', 'blackout', 'heartbeat', 'prometheus'] DEFAULT_ENVIRONMENT = 'unknown' ALLOWED_ENVIRONMENTS = ['unknown', 'chaos-at-.*'] HEARTBEAT_EVENTS = ['PrometheusAlertmanagerE2eDeadManSwitch'] + ALERTMANAGER_USERNAME = 'alerta' + ALERTMANAGER_PASSWORD = 'alerta' + ALERTMANAGER_SILENCE_FROM_ACK = True + ALERTMANAGER_USE_EXTERNALURL_FOR_SILENCES = True dest: "{{ alerta_base_path }}/config/alertad.conf.j2" - name: install pod manifest diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_node.yml b/roles/monitoring/prometheus/server/defaults/main/rules_node.yml index 525355d5..75e96bca 100644 --- a/roles/monitoring/prometheus/server/defaults/main/rules_node.yml +++ b/roles/monitoring/prometheus/server/defaults/main/rules_node.yml @@ -206,7 +206,7 @@ prometheus_server_rules_node: severity: warning annotations: summary: Host Network Interface Saturated (instance {{ '{{' }} $labels.instance {{ '}}' }}) - description: "The network interface \"{{ '{{' }} $labels.interface {{ '}}' }}\" on \"{{ '{{' }} $labels.instance {{ '}}' }}\" is getting overloaded.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + description: "The network interface \"{{ '{{' }} $labels.device {{ '}}' }}\" on \"{{ '{{' }} $labels.instance {{ '}}' }}\" is getting overloaded.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" - alert: HostConntrackLimit expr: node_nf_conntrack_entries / node_nf_conntrack_entries_limit > 0.8 -- cgit v1.2.3