From a8e8cb2ed3d5e68d89edd8785ed59f0ee45f81bf Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Tue, 21 Sep 2021 19:34:25 +0200 Subject: prometheus: simplify job config --- .../prometheus/alertmanager/tasks/main.yml | 1 + .../prometheus/exporter/base/tasks/main.yml | 1 + .../prometheus/exporter/blackbox/tasks/main.yml | 1 + .../prometheus/exporter/mikrotik/tasks/main.yml | 1 + .../prometheus/exporter/node/tasks/main.yml | 1 + .../prometheus/exporter/nut/tasks/main.yml | 1 + .../prometheus/server/defaults/main/main.yml | 5 +- .../server/defaults/main/rules_nut-ups.yml | 3 ++ .../prometheus/server/defaults/main/rules_nut.yml | 3 -- .../prometheus/server/filter_plugins/prometheus.py | 31 ++++------- roles/monitoring/prometheus/server/tasks/main.yml | 61 ++++++++++------------ .../templates/job-snippets/blackbox-https.j2 | 2 +- .../server/templates/job-snippets/blackbox-ping.j2 | 2 +- .../server/templates/job-snippets/blackbox-ssh.j2 | 2 +- .../server/templates/job-snippets/generic.j2 | 2 +- .../server/templates/job-snippets/nut-ups.j2 | 13 +++++ .../server/templates/job-snippets/openwrt.j2 | 2 +- .../prometheus/server/templates/prometheus.yml.j2 | 2 +- .../server/templates/targets/generic.yml.j2 | 4 +- .../server/templates/targets/nut-ups.yml.j2 | 17 ++++++ .../prometheus/server/templates/targets/nut.yml.j2 | 17 ------ 21 files changed, 89 insertions(+), 83 deletions(-) create mode 100644 roles/monitoring/prometheus/server/defaults/main/rules_nut-ups.yml delete mode 100644 roles/monitoring/prometheus/server/defaults/main/rules_nut.yml create mode 100644 roles/monitoring/prometheus/server/templates/job-snippets/nut-ups.j2 create mode 100644 roles/monitoring/prometheus/server/templates/targets/nut-ups.yml.j2 delete mode 100644 roles/monitoring/prometheus/server/templates/targets/nut.yml.j2 (limited to 'roles/monitoring') diff --git a/roles/monitoring/prometheus/alertmanager/tasks/main.yml b/roles/monitoring/prometheus/alertmanager/tasks/main.yml index fe8ce9ca..0dce6ef4 100644 --- a/roles/monitoring/prometheus/alertmanager/tasks/main.yml +++ b/roles/monitoring/prometheus/alertmanager/tasks/main.yml @@ -6,6 +6,7 @@ - spreadspace_apt_repo_components is defined - "'prometheus' in spreadspace_apt_repo_components" + ## TODO: pin version - name: install apt packages apt: name: prom-alertmanager diff --git a/roles/monitoring/prometheus/exporter/base/tasks/main.yml b/roles/monitoring/prometheus/exporter/base/tasks/main.yml index 9a214f39..9e35c48d 100644 --- a/roles/monitoring/prometheus/exporter/base/tasks/main.yml +++ b/roles/monitoring/prometheus/exporter/base/tasks/main.yml @@ -6,6 +6,7 @@ - spreadspace_apt_repo_components is defined - "'prometheus' in spreadspace_apt_repo_components" + ## TODO: pin version - name: install apt packages apt: name: prom-exporter-exporter diff --git a/roles/monitoring/prometheus/exporter/blackbox/tasks/main.yml b/roles/monitoring/prometheus/exporter/blackbox/tasks/main.yml index 7ecd8113..96c247ec 100644 --- a/roles/monitoring/prometheus/exporter/blackbox/tasks/main.yml +++ b/roles/monitoring/prometheus/exporter/blackbox/tasks/main.yml @@ -1,4 +1,5 @@ --- + ## TODO: pin version - name: install apt packages apt: name: prom-exporter-blackbox diff --git a/roles/monitoring/prometheus/exporter/mikrotik/tasks/main.yml b/roles/monitoring/prometheus/exporter/mikrotik/tasks/main.yml index c3ffe31b..dda33e9f 100644 --- a/roles/monitoring/prometheus/exporter/mikrotik/tasks/main.yml +++ b/roles/monitoring/prometheus/exporter/mikrotik/tasks/main.yml @@ -1,4 +1,5 @@ --- + ## TODO: pin version - name: install apt packages apt: name: prom-exporter-mikrotik diff --git a/roles/monitoring/prometheus/exporter/node/tasks/main.yml b/roles/monitoring/prometheus/exporter/node/tasks/main.yml index 8392e580..00a4ab3f 100644 --- a/roles/monitoring/prometheus/exporter/node/tasks/main.yml +++ b/roles/monitoring/prometheus/exporter/node/tasks/main.yml @@ -1,4 +1,5 @@ --- + ## TODO: pin version - name: install apt packages apt: name: prom-exporter-node diff --git a/roles/monitoring/prometheus/exporter/nut/tasks/main.yml b/roles/monitoring/prometheus/exporter/nut/tasks/main.yml index 519ac7a0..78a8e817 100644 --- a/roles/monitoring/prometheus/exporter/nut/tasks/main.yml +++ b/roles/monitoring/prometheus/exporter/nut/tasks/main.yml @@ -1,4 +1,5 @@ --- + ## TODO: pin version - name: install apt packages apt: name: prom-exporter-nut diff --git a/roles/monitoring/prometheus/server/defaults/main/main.yml b/roles/monitoring/prometheus/server/defaults/main/main.yml index 95b9da6d..5be3ecd3 100644 --- a/roles/monitoring/prometheus/server/defaults/main/main.yml +++ b/roles/monitoring/prometheus/server/defaults/main/main.yml @@ -5,9 +5,8 @@ prometheus_server_retention: "15d" -prometheus_server_jobs_generic: +prometheus_server_jobs: - node -prometheus_server_jobs_special: [] #prometheus_server_jobs_extra: | # - job_name: ... @@ -15,7 +14,7 @@ prometheus_server_rules: prometheus: "{{ prometheus_server_rules_prometheus + ((prometheus_server_alertmanager is defined) | ternary(prometheus_server_rules_prometheus_alertmanager, [])) + prometheus_server_rules_prometheus_extra }}" node: "{{ prometheus_server_rules_node + prometheus_server_rules_node_extra }}" openwrt: "{{ prometheus_server_rules_openwrt + prometheus_server_rules_node_extra }}" - nut: "{{ prometheus_server_rules_nut + prometheus_server_rules_nut_extra }}" + "nut-ups": "{{ prometheus_server_rules_nut_ups + prometheus_server_rules_nut_ups_extra }}" "blackbox-ping": "{{ prometheus_server_rules_blackbox_ping + prometheus_server_rules_blackbox_ping_extra }}" "blackbox-https": "{{ prometheus_server_rules_blackbox_https + prometheus_server_rules_blackbox_https_extra }}" "blackbox-ssh": "{{ prometheus_server_rules_blackbox_ssh + prometheus_server_rules_blackbox_ssh_extra }}" diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_nut-ups.yml b/roles/monitoring/prometheus/server/defaults/main/rules_nut-ups.yml new file mode 100644 index 00000000..842007b4 --- /dev/null +++ b/roles/monitoring/prometheus/server/defaults/main/rules_nut-ups.yml @@ -0,0 +1,3 @@ +--- +prometheus_server_rules_nut_ups_extra: [] +prometheus_server_rules_nut_ups: [] diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_nut.yml b/roles/monitoring/prometheus/server/defaults/main/rules_nut.yml deleted file mode 100644 index d8d64f64..00000000 --- a/roles/monitoring/prometheus/server/defaults/main/rules_nut.yml +++ /dev/null @@ -1,3 +0,0 @@ ---- -prometheus_server_rules_nut_extra: [] -prometheus_server_rules_nut: [] diff --git a/roles/monitoring/prometheus/server/filter_plugins/prometheus.py b/roles/monitoring/prometheus/server/filter_plugins/prometheus.py index 5a8722c2..ab865f93 100644 --- a/roles/monitoring/prometheus/server/filter_plugins/prometheus.py +++ b/roles/monitoring/prometheus/server/filter_plugins/prometheus.py @@ -6,38 +6,29 @@ from functools import partial from ansible import errors -def prometheus_generic_job_targets(hostvars, jobs, targets): +def prometheus_job_targets(hostvars, jobs, targets): try: result = [] for job in jobs: for target in targets: - enabled = job in hostvars[target]['prometheus_exporters_default'] or job in hostvars[target]['prometheus_exporters_extra'] - result.append({'job': job, 'instance': target, 'enabled': enabled}) + special_config_varname = 'prometheus_special_job_' + job.replace('-', '_') + if special_config_varname in hostvars[target]: + for config in hostvars[target][special_config_varname]: + result.append({'job': job, 'instance': config['instance'], 'config': config, 'enabled': True}) + + else: + enabled = job in hostvars[target]['prometheus_exporters_default'] or job in hostvars[target]['prometheus_exporters_extra'] + result.append({'job': job, 'instance': target, 'enabled': enabled}) return result except Exception as e: - raise errors.AnsibleFilterError("prometheus_generic_job_targets(): %s" % str(e)) - - -def prometheus_special_job_targets(hostvars, jobs, targets): - try: - result = [] - for job in jobs: - for target in targets: - config_varname = 'prometheus_special_job_' + job.replace('-', '_') - if config_varname in hostvars[target]: - for config in hostvars[target][config_varname]: - result.append({'job': job, 'instance': config['instance'], 'config': config}) - return result - except Exception as e: - raise errors.AnsibleFilterError("prometheus_special_job_targets(): %s" % str(e)) + raise errors.AnsibleFilterError("prometheus_job_targets(): %s" % str(e)) class FilterModule(object): ''' prometheus filters ''' filter_map = { - 'prometheus_generic_job_targets': prometheus_generic_job_targets, - 'prometheus_special_job_targets': prometheus_special_job_targets, + 'prometheus_job_targets': prometheus_job_targets, } def filters(self): diff --git a/roles/monitoring/prometheus/server/tasks/main.yml b/roles/monitoring/prometheus/server/tasks/main.yml index d0ccd8af..4bcaa2d5 100644 --- a/roles/monitoring/prometheus/server/tasks/main.yml +++ b/roles/monitoring/prometheus/server/tasks/main.yml @@ -13,6 +13,7 @@ include_role: name: "storage/{{ prometheus_server_storage.type }}/volume" + ## TODO: pin version - name: install apt packages apt: name: prom-server @@ -37,50 +38,46 @@ - name: create configuration directories loop: - - jobs - rules - targets file: path: "/etc/prometheus/{{ item }}" state: directory -- name: create sub-directroy for all exporter types in jobs directory - loop: "{{ prometheus_server_jobs_generic + prometheus_server_jobs_special }}" +- name: create sub-directroy for all jobs in targets directory + loop: "{{ prometheus_server_jobs }}" file: - path: "/etc/prometheus/jobs/{{ item }}" + path: "/etc/prometheus/targets/{{ item }}" state: directory -- name: generate generic targets config - loop: "{{ prometheus_zone_targets }}" - loop_control: - loop_var: target - template: - src: targets/generic.yml.j2 - dest: "/etc/prometheus/targets/{{ target }}.yml" - notify: reload prometheus - -- name: enable targets for generic jobs - loop: "{{ hostvars | prometheus_generic_job_targets(prometheus_server_jobs_generic, prometheus_zone_targets) }}" - loop_control: - label: "{{ item.job }} -> {{ item.instance }}" - file: - src: "{{ item.enabled | ternary('/etc/prometheus/targets/' + item.instance + '.yml', omit) }}" - path: "/etc/prometheus/jobs/{{ item.job }}/{{ item.instance }}.yml" - state: "{{ item.enabled | ternary('link', 'absent') }}" - notify: reload prometheus +- name: enable/disable job targets + vars: + job_targets: "{{ hostvars | prometheus_job_targets(prometheus_server_jobs, prometheus_zone_targets) }}" + block: + - name: install files for enabled targets + loop: "{{ job_targets }}" + loop_control: + loop_var: target + label: "{{ target.job }} -> {{ target.instance }}" + when: target.enabled + template: + src: "{{ lookup('first_found', {'paths': ['templates/targets'], 'files': [target.job + '.yml.j2', 'generic.yml.j2']}) }}" + dest: "/etc/prometheus/targets/{{ target.job }}/{{ target.instance }}.yml" + notify: reload prometheus -- name: enable targets for special jobs - loop: "{{ hostvars | prometheus_special_job_targets(prometheus_server_jobs_special, prometheus_zone_targets) }}" - loop_control: - loop_var: target - label: "{{ target.job }} -> {{ target.instance }}" - template: - src: "targets/{{ target.job }}.yml.j2" - dest: "/etc/prometheus/jobs/{{ target.job }}/{{ target.instance }}.yml" - notify: reload prometheus + - name: remove files for disabled targets + loop: "{{ job_targets }}" + loop_control: + loop_var: target + label: "{{ target.job }} -> {{ target.instance }}" + when: not target.enabled + file: + path: "/etc/prometheus/targets/{{ target.job }}/{{ target.instance }}.yml" + state: absent + notify: reload prometheus - name: generate rules files for all jobs - loop: "{{ (prometheus_server_jobs_generic + prometheus_server_jobs_special) | union(['prometheus']) }}" + loop: "{{ prometheus_server_jobs | union(['prometheus']) }}" template: src: rules.yml.j2 dest: "/etc/prometheus/rules/{{ item }}.yml" diff --git a/roles/monitoring/prometheus/server/templates/job-snippets/blackbox-https.j2 b/roles/monitoring/prometheus/server/templates/job-snippets/blackbox-https.j2 index 0a6d2dfa..98a64121 100644 --- a/roles/monitoring/prometheus/server/templates/job-snippets/blackbox-https.j2 +++ b/roles/monitoring/prometheus/server/templates/job-snippets/blackbox-https.j2 @@ -11,4 +11,4 @@ key_file: /etc/ssl/prometheus/server/scrape-key.pem file_sd_configs: - files: - - "/etc/prometheus/jobs/{{ job }}/*.yml" + - "/etc/prometheus/targets/{{ job }}/*.yml" diff --git a/roles/monitoring/prometheus/server/templates/job-snippets/blackbox-ping.j2 b/roles/monitoring/prometheus/server/templates/job-snippets/blackbox-ping.j2 index 7f4f12df..736ffec1 100644 --- a/roles/monitoring/prometheus/server/templates/job-snippets/blackbox-ping.j2 +++ b/roles/monitoring/prometheus/server/templates/job-snippets/blackbox-ping.j2 @@ -11,4 +11,4 @@ key_file: /etc/ssl/prometheus/server/scrape-key.pem file_sd_configs: - files: - - "/etc/prometheus/jobs/{{ job }}/*.yml" + - "/etc/prometheus/targets/{{ job }}/*.yml" diff --git a/roles/monitoring/prometheus/server/templates/job-snippets/blackbox-ssh.j2 b/roles/monitoring/prometheus/server/templates/job-snippets/blackbox-ssh.j2 index 18381e32..166f37ad 100644 --- a/roles/monitoring/prometheus/server/templates/job-snippets/blackbox-ssh.j2 +++ b/roles/monitoring/prometheus/server/templates/job-snippets/blackbox-ssh.j2 @@ -11,4 +11,4 @@ key_file: /etc/ssl/prometheus/server/scrape-key.pem file_sd_configs: - files: - - "/etc/prometheus/jobs/{{ job }}/*.yml" + - "/etc/prometheus/targets/{{ job }}/*.yml" diff --git a/roles/monitoring/prometheus/server/templates/job-snippets/generic.j2 b/roles/monitoring/prometheus/server/templates/job-snippets/generic.j2 index 87992eeb..b155c5f7 100644 --- a/roles/monitoring/prometheus/server/templates/job-snippets/generic.j2 +++ b/roles/monitoring/prometheus/server/templates/job-snippets/generic.j2 @@ -10,4 +10,4 @@ key_file: /etc/ssl/prometheus/server/scrape-key.pem file_sd_configs: - files: - - "/etc/prometheus/jobs/{{ job }}/*.yml" + - "/etc/prometheus/targets/{{ job }}/*.yml" diff --git a/roles/monitoring/prometheus/server/templates/job-snippets/nut-ups.j2 b/roles/monitoring/prometheus/server/templates/job-snippets/nut-ups.j2 new file mode 100644 index 00000000..3a2c5c62 --- /dev/null +++ b/roles/monitoring/prometheus/server/templates/job-snippets/nut-ups.j2 @@ -0,0 +1,13 @@ + - job_name: '{{ job }}' + metrics_path: /proxy + params: + module: + - nut + scheme: https + tls_config: + ca_file: /etc/ssl/prometheus/ca-crt.pem + cert_file: /etc/ssl/prometheus/server/scrape-crt.pem + key_file: /etc/ssl/prometheus/server/scrape-key.pem + file_sd_configs: + - files: + - "/etc/prometheus/targets/{{ job }}/*.yml" diff --git a/roles/monitoring/prometheus/server/templates/job-snippets/openwrt.j2 b/roles/monitoring/prometheus/server/templates/job-snippets/openwrt.j2 index 493a4fdb..e93f8be7 100644 --- a/roles/monitoring/prometheus/server/templates/job-snippets/openwrt.j2 +++ b/roles/monitoring/prometheus/server/templates/job-snippets/openwrt.j2 @@ -2,4 +2,4 @@ scheme: http file_sd_configs: - files: - - "/etc/prometheus/jobs/{{ job }}/*.yml" + - "/etc/prometheus/targets/{{ job }}/*.yml" diff --git a/roles/monitoring/prometheus/server/templates/prometheus.yml.j2 b/roles/monitoring/prometheus/server/templates/prometheus.yml.j2 index 4cfcc498..3286bb82 100644 --- a/roles/monitoring/prometheus/server/templates/prometheus.yml.j2 +++ b/roles/monitoring/prometheus/server/templates/prometheus.yml.j2 @@ -35,7 +35,7 @@ scrape_configs: static_configs: - targets: ['{{ prometheus_server_alertmanager.url }}'] {% endif %} -{% for job in (prometheus_server_jobs_generic + prometheus_server_jobs_special) %} +{% for job in (prometheus_server_jobs) %} {% include 'job-snippets/' + (lookup('first_found', {'paths': ['templates/job-snippets'], 'files': [job + '.j2', 'generic.j2']}) | basename) %}{{ '' }} {% endfor %} diff --git a/roles/monitoring/prometheus/server/templates/targets/generic.yml.j2 b/roles/monitoring/prometheus/server/templates/targets/generic.yml.j2 index e83b6bf4..45af10c0 100644 --- a/roles/monitoring/prometheus/server/templates/targets/generic.yml.j2 +++ b/roles/monitoring/prometheus/server/templates/targets/generic.yml.j2 @@ -1,3 +1,3 @@ -- targets: [ "{{ hostvars[target].prometheus_scrape_endpoint }}" ] +- targets: [ "{{ hostvars[target.instance].prometheus_scrape_endpoint }}" ] labels: - instance: "{{ target }}" + instance: "{{ target.instance }}" diff --git a/roles/monitoring/prometheus/server/templates/targets/nut-ups.yml.j2 b/roles/monitoring/prometheus/server/templates/targets/nut-ups.yml.j2 new file mode 100644 index 00000000..da3de3d7 --- /dev/null +++ b/roles/monitoring/prometheus/server/templates/targets/nut-ups.yml.j2 @@ -0,0 +1,17 @@ +- targets: [ "{{ hostvars[target.config.exporter_hostname].prometheus_scrape_endpoint }}" ] + labels: + instance: "{{ target.instance }}" + __param_ups: {{ target.config.ups }} + __param_server: {{ target.config.server | default('127.0.0.1') }} +{% if 'username' in target.config %} + __param_username: {{ target.config.username }} +{% endif %} +{% if 'password' in target.config %} + __param_password: {{ target.config.password }} +{% endif %} +{% if 'variables' in target.config %} + __param_variables: {{ target.config.variables }} +{% endif %} +{% if 'statuses' in target.config %} + __param_statuses: {{ target.config.statuses }} +{% endif %} diff --git a/roles/monitoring/prometheus/server/templates/targets/nut.yml.j2 b/roles/monitoring/prometheus/server/templates/targets/nut.yml.j2 deleted file mode 100644 index da3de3d7..00000000 --- a/roles/monitoring/prometheus/server/templates/targets/nut.yml.j2 +++ /dev/null @@ -1,17 +0,0 @@ -- targets: [ "{{ hostvars[target.config.exporter_hostname].prometheus_scrape_endpoint }}" ] - labels: - instance: "{{ target.instance }}" - __param_ups: {{ target.config.ups }} - __param_server: {{ target.config.server | default('127.0.0.1') }} -{% if 'username' in target.config %} - __param_username: {{ target.config.username }} -{% endif %} -{% if 'password' in target.config %} - __param_password: {{ target.config.password }} -{% endif %} -{% if 'variables' in target.config %} - __param_variables: {{ target.config.variables }} -{% endif %} -{% if 'statuses' in target.config %} - __param_statuses: {{ target.config.statuses }} -{% endif %} -- cgit v1.2.3 From c4e17105ac2086033378b865c600d09d112cc6d3 Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Wed, 22 Sep 2021 01:10:47 +0200 Subject: cosmetic changes --- roles/monitoring/prometheus/exporter/blackbox/templates/config.yml.j2 | 2 +- roles/monitoring/prometheus/exporter/mikrotik/templates/config.yml.j2 | 4 ++-- roles/monitoring/prometheus/server/templates/prometheus.yml.j2 | 2 +- .../prometheus/server/templates/targets/blackbox-https.yml.j2 | 4 ++-- .../prometheus/server/templates/targets/blackbox-ping.yml.j2 | 4 ++-- .../prometheus/server/templates/targets/blackbox-ssh.yml.j2 | 4 ++-- roles/monitoring/prometheus/server/templates/targets/generic.yml.j2 | 4 ++-- roles/monitoring/prometheus/server/templates/targets/nut-ups.yml.j2 | 4 ++-- 8 files changed, 14 insertions(+), 14 deletions(-) (limited to 'roles/monitoring') diff --git a/roles/monitoring/prometheus/exporter/blackbox/templates/config.yml.j2 b/roles/monitoring/prometheus/exporter/blackbox/templates/config.yml.j2 index 01e3f7a0..0ff9db13 100644 --- a/roles/monitoring/prometheus/exporter/blackbox/templates/config.yml.j2 +++ b/roles/monitoring/prometheus/exporter/blackbox/templates/config.yml.j2 @@ -1,4 +1,4 @@ # {{ ansible_managed }} modules: - {{ prometheus_exporter_blackbox_modules | combine(prometheus_exporter_blackbox_modules_extra) | to_nice_yaml(indent=2) | indent(2)}} + {{ prometheus_exporter_blackbox_modules | combine(prometheus_exporter_blackbox_modules_extra) | to_nice_yaml(indent=2) | indent(2) }} diff --git a/roles/monitoring/prometheus/exporter/mikrotik/templates/config.yml.j2 b/roles/monitoring/prometheus/exporter/mikrotik/templates/config.yml.j2 index a2dc1c71..576ee12f 100644 --- a/roles/monitoring/prometheus/exporter/mikrotik/templates/config.yml.j2 +++ b/roles/monitoring/prometheus/exporter/mikrotik/templates/config.yml.j2 @@ -1,7 +1,7 @@ # {{ ansible_managed }} devices: - {{ prometheus_exporter_mikrotik_devices | to_nice_yaml(indent=2) | indent(2)}} + {{ prometheus_exporter_mikrotik_devices | to_nice_yaml(indent=2) | indent(2) }} features: - {{ prometheus_exporter_mikrotik_features | to_nice_yaml(indent=2) | indent(2)}} + {{ prometheus_exporter_mikrotik_features | to_nice_yaml(indent=2) | indent(2) }} diff --git a/roles/monitoring/prometheus/server/templates/prometheus.yml.j2 b/roles/monitoring/prometheus/server/templates/prometheus.yml.j2 index 3286bb82..4a079896 100644 --- a/roles/monitoring/prometheus/server/templates/prometheus.yml.j2 +++ b/roles/monitoring/prometheus/server/templates/prometheus.yml.j2 @@ -25,7 +25,7 @@ scrape_configs: static_configs: - targets: ['localhost:9090'] labels: - instance: "{{ inventory_hostname }}" + instance: '{{ inventory_hostname }}' {% if prometheus_server_alertmanager is defined %} - job_name: 'alertmanager' diff --git a/roles/monitoring/prometheus/server/templates/targets/blackbox-https.yml.j2 b/roles/monitoring/prometheus/server/templates/targets/blackbox-https.yml.j2 index e843de36..b1a33df3 100644 --- a/roles/monitoring/prometheus/server/templates/targets/blackbox-https.yml.j2 +++ b/roles/monitoring/prometheus/server/templates/targets/blackbox-https.yml.j2 @@ -1,4 +1,4 @@ -- targets: [ "{{ hostvars[target.config.exporter_hostname].prometheus_scrape_endpoint }}" ] +- targets: [ '{{ hostvars[target.config.exporter_hostname].prometheus_scrape_endpoint }}' ] labels: - instance: "{{ target.instance }}" + instance: '{{ target.instance }}' __param_target: {{ target.config.address }} diff --git a/roles/monitoring/prometheus/server/templates/targets/blackbox-ping.yml.j2 b/roles/monitoring/prometheus/server/templates/targets/blackbox-ping.yml.j2 index e843de36..b1a33df3 100644 --- a/roles/monitoring/prometheus/server/templates/targets/blackbox-ping.yml.j2 +++ b/roles/monitoring/prometheus/server/templates/targets/blackbox-ping.yml.j2 @@ -1,4 +1,4 @@ -- targets: [ "{{ hostvars[target.config.exporter_hostname].prometheus_scrape_endpoint }}" ] +- targets: [ '{{ hostvars[target.config.exporter_hostname].prometheus_scrape_endpoint }}' ] labels: - instance: "{{ target.instance }}" + instance: '{{ target.instance }}' __param_target: {{ target.config.address }} diff --git a/roles/monitoring/prometheus/server/templates/targets/blackbox-ssh.yml.j2 b/roles/monitoring/prometheus/server/templates/targets/blackbox-ssh.yml.j2 index e843de36..b1a33df3 100644 --- a/roles/monitoring/prometheus/server/templates/targets/blackbox-ssh.yml.j2 +++ b/roles/monitoring/prometheus/server/templates/targets/blackbox-ssh.yml.j2 @@ -1,4 +1,4 @@ -- targets: [ "{{ hostvars[target.config.exporter_hostname].prometheus_scrape_endpoint }}" ] +- targets: [ '{{ hostvars[target.config.exporter_hostname].prometheus_scrape_endpoint }}' ] labels: - instance: "{{ target.instance }}" + instance: '{{ target.instance }}' __param_target: {{ target.config.address }} diff --git a/roles/monitoring/prometheus/server/templates/targets/generic.yml.j2 b/roles/monitoring/prometheus/server/templates/targets/generic.yml.j2 index 45af10c0..6591362b 100644 --- a/roles/monitoring/prometheus/server/templates/targets/generic.yml.j2 +++ b/roles/monitoring/prometheus/server/templates/targets/generic.yml.j2 @@ -1,3 +1,3 @@ -- targets: [ "{{ hostvars[target.instance].prometheus_scrape_endpoint }}" ] +- targets: [ '{{ hostvars[target.instance].prometheus_scrape_endpoint }}' ] labels: - instance: "{{ target.instance }}" + instance: '{{ target.instance }}' diff --git a/roles/monitoring/prometheus/server/templates/targets/nut-ups.yml.j2 b/roles/monitoring/prometheus/server/templates/targets/nut-ups.yml.j2 index da3de3d7..d63d79a7 100644 --- a/roles/monitoring/prometheus/server/templates/targets/nut-ups.yml.j2 +++ b/roles/monitoring/prometheus/server/templates/targets/nut-ups.yml.j2 @@ -1,6 +1,6 @@ -- targets: [ "{{ hostvars[target.config.exporter_hostname].prometheus_scrape_endpoint }}" ] +- targets: [ '{{ hostvars[target.config.exporter_hostname].prometheus_scrape_endpoint }}' ] labels: - instance: "{{ target.instance }}" + instance: '{{ target.instance }}' __param_ups: {{ target.config.ups }} __param_server: {{ target.config.server | default('127.0.0.1') }} {% if 'username' in target.config %} -- cgit v1.2.3 From ad08b01391c404d4e0356467fc627d711ece8916 Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Wed, 22 Sep 2021 01:25:59 +0200 Subject: prometheus: no more special jobs --- inventory/group_vars/chaos-at-home-ups/vars.yml | 2 +- inventory/group_vars/ele-ups/vars.yml | 2 +- inventory/host_vars/ch-mon.yml | 6 +++--- roles/monitoring/prometheus/server/filter_plugins/prometheus.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'roles/monitoring') diff --git a/inventory/group_vars/chaos-at-home-ups/vars.yml b/inventory/group_vars/chaos-at-home-ups/vars.yml index 4f003a7a..99868165 100644 --- a/inventory/group_vars/chaos-at-home-ups/vars.yml +++ b/inventory/group_vars/chaos-at-home-ups/vars.yml @@ -11,7 +11,7 @@ prometheus_scrape_endpoint: "{{ network_mgmt_zone.prefix | ipaddr(network_mgmt_z prometheus_exporters_default: - openwrt -prometheus_special_job_nut_ups: +prometheus_job_nut_ups: - exporter_hostname: ch-mon instance: "ups-{{ ups_name }}" ups: "{{ ups_name }}" diff --git a/inventory/group_vars/ele-ups/vars.yml b/inventory/group_vars/ele-ups/vars.yml index a57382ff..0d22f770 100644 --- a/inventory/group_vars/ele-ups/vars.yml +++ b/inventory/group_vars/ele-ups/vars.yml @@ -14,7 +14,7 @@ prometheus_scrape_endpoint: "{{ network_mgmt_zone.prefix | ipaddr(network_mgmt_z prometheus_exporters_default: - openwrt -prometheus_special_job_nut_ups: +prometheus_job_nut_ups: exporter_hostname: ele-mon instance: "ups-{{ ups_name }}" ups: "{{ ups_name }}" diff --git a/inventory/host_vars/ch-mon.yml b/inventory/host_vars/ch-mon.yml index 7d8e334b..a211d4bb 100644 --- a/inventory/host_vars/ch-mon.yml +++ b/inventory/host_vars/ch-mon.yml @@ -76,7 +76,7 @@ prometheus_exporter_blackbox_modules_extra: icmp: prober: icmp -prometheus_special_job_blackbox_ping: +prometheus_job_blackbox_ping: - exporter_hostname: ch-mon instance: "ping-magentagw" address: 62.99.185.129 @@ -84,12 +84,12 @@ prometheus_special_job_blackbox_ping: instance: "ping-quad9" address: 9.9.9.9 -prometheus_special_job_blackbox_https: +prometheus_job_blackbox_https: - exporter_hostname: ch-mon instance: "https-web.chaos-at-home.org" address: web.chaos-at-home.org -prometheus_special_job_blackbox_ssh: +prometheus_job_blackbox_ssh: - exporter_hostname: ch-mon instance: "ssh-{{ inventory_hostname }}" address: "{{ network_zones.svc.prefix | ipaddr(network_zones.svc.offsets[inventory_hostname]) | ipaddr('address') }}:{{ ansible_port | default(22) }}" diff --git a/roles/monitoring/prometheus/server/filter_plugins/prometheus.py b/roles/monitoring/prometheus/server/filter_plugins/prometheus.py index ab865f93..056d216f 100644 --- a/roles/monitoring/prometheus/server/filter_plugins/prometheus.py +++ b/roles/monitoring/prometheus/server/filter_plugins/prometheus.py @@ -11,7 +11,7 @@ def prometheus_job_targets(hostvars, jobs, targets): result = [] for job in jobs: for target in targets: - special_config_varname = 'prometheus_special_job_' + job.replace('-', '_') + special_config_varname = 'prometheus_job_' + job.replace('-', '_') if special_config_varname in hostvars[target]: for config in hostvars[target][special_config_varname]: result.append({'job': job, 'instance': config['instance'], 'config': config, 'enabled': True}) -- cgit v1.2.3 From d4a5276b2813f95d56e8fadb0e6d8ff169b8eecb Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Thu, 23 Sep 2021 14:40:38 +0200 Subject: move exporter_exporter configuration into canonical config dir --- roles/monitoring/prometheus/exporter/base/tasks/main.yml | 2 +- roles/monitoring/prometheus/exporter/base/templates/service.j2 | 2 +- roles/monitoring/prometheus/exporter/blackbox/tasks/main.yml | 2 +- roles/monitoring/prometheus/exporter/mikrotik/tasks/main.yml | 2 +- roles/monitoring/prometheus/exporter/node/tasks/main.yml | 2 +- roles/monitoring/prometheus/exporter/nut/tasks/main.yml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) (limited to 'roles/monitoring') diff --git a/roles/monitoring/prometheus/exporter/base/tasks/main.yml b/roles/monitoring/prometheus/exporter/base/tasks/main.yml index 9e35c48d..eeb2a23d 100644 --- a/roles/monitoring/prometheus/exporter/base/tasks/main.yml +++ b/roles/monitoring/prometheus/exporter/base/tasks/main.yml @@ -14,7 +14,7 @@ - name: create configuration directories file: - path: /etc/prometheus/exporter/enabled + path: /etc/prometheus/exporter/exporter state: directory - name: add user for prometheus-exporter diff --git a/roles/monitoring/prometheus/exporter/base/templates/service.j2 b/roles/monitoring/prometheus/exporter/base/templates/service.j2 index c24baf43..3d44744a 100644 --- a/roles/monitoring/prometheus/exporter/base/templates/service.j2 +++ b/roles/monitoring/prometheus/exporter/base/templates/service.j2 @@ -4,7 +4,7 @@ Description=Prometheus exporter proxy [Service] Restart=always User=prometheus-exporter -ExecStart=/usr/bin/prometheus-exporter-exporter -config.dirs=/etc/prometheus/exporter/enabled -config.file="" -web.listen-address="" -web.tls.listen-address="{{ prometheus_exporter_listen }}" -web.tls.cert="/etc/ssl/prometheus/exporter/crt.pem" -web.tls.key="/etc/ssl/prometheus/exporter/key.pem" --web.tls.ca="/etc/ssl/prometheus/ca-crt.pem" -web.tls.verify +ExecStart=/usr/bin/prometheus-exporter-exporter -config.dirs=/etc/prometheus/exporter/exporter -config.file="" -web.listen-address="" -web.tls.listen-address="{{ prometheus_exporter_listen }}" -web.tls.cert="/etc/ssl/prometheus/exporter/crt.pem" -web.tls.key="/etc/ssl/prometheus/exporter/key.pem" --web.tls.ca="/etc/ssl/prometheus/ca-crt.pem" -web.tls.verify {# TODO: implement reloading once the exporter_exporter supports this #} # systemd hardening-options diff --git a/roles/monitoring/prometheus/exporter/blackbox/tasks/main.yml b/roles/monitoring/prometheus/exporter/blackbox/tasks/main.yml index 96c247ec..cab521cc 100644 --- a/roles/monitoring/prometheus/exporter/blackbox/tasks/main.yml +++ b/roles/monitoring/prometheus/exporter/blackbox/tasks/main.yml @@ -36,5 +36,5 @@ http: port: 9115 path: '/probe' - dest: /etc/prometheus/exporter/enabled/blackbox.yml + dest: /etc/prometheus/exporter/exporter/blackbox.yml notify: reload prometheus-exporter-exporter diff --git a/roles/monitoring/prometheus/exporter/mikrotik/tasks/main.yml b/roles/monitoring/prometheus/exporter/mikrotik/tasks/main.yml index dda33e9f..07219c68 100644 --- a/roles/monitoring/prometheus/exporter/mikrotik/tasks/main.yml +++ b/roles/monitoring/prometheus/exporter/mikrotik/tasks/main.yml @@ -38,5 +38,5 @@ method: http http: port: 9436 - dest: /etc/prometheus/exporter/enabled/mikrotik.yml + dest: /etc/prometheus/exporter/exporter/mikrotik.yml notify: reload prometheus-exporter-exporter diff --git a/roles/monitoring/prometheus/exporter/node/tasks/main.yml b/roles/monitoring/prometheus/exporter/node/tasks/main.yml index 00a4ab3f..3fa0a1ec 100644 --- a/roles/monitoring/prometheus/exporter/node/tasks/main.yml +++ b/roles/monitoring/prometheus/exporter/node/tasks/main.yml @@ -29,5 +29,5 @@ method: http http: port: 9100 - dest: /etc/prometheus/exporter/enabled/node.yml + dest: /etc/prometheus/exporter/exporter/node.yml notify: reload prometheus-exporter-exporter diff --git a/roles/monitoring/prometheus/exporter/nut/tasks/main.yml b/roles/monitoring/prometheus/exporter/nut/tasks/main.yml index 78a8e817..8245feae 100644 --- a/roles/monitoring/prometheus/exporter/nut/tasks/main.yml +++ b/roles/monitoring/prometheus/exporter/nut/tasks/main.yml @@ -25,5 +25,5 @@ http: port: 9199 path: /ups_metrics - dest: /etc/prometheus/exporter/enabled/nut.yml + dest: /etc/prometheus/exporter/exporter/nut.yml notify: reload prometheus-exporter-exporter -- cgit v1.2.3 From ef4432c51bacb5b92c03a42cb1ea7f9d837ec8b6 Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Thu, 23 Sep 2021 16:50:24 +0200 Subject: use / as spereator for jobs formerly known as special --- inventory/group_vars/chaos-at-home-ups/vars.yml | 2 +- inventory/group_vars/ele-ups/vars.yml | 2 +- inventory/group_vars/promzone-chaos-at-home/vars.yml | 8 ++++---- inventory/group_vars/promzone-elevate-festival/vars.yml | 2 +- inventory/host_vars/ch-mon.yml | 6 +++--- .../monitoring/prometheus/server/defaults/main/main.yml | 8 ++++---- .../server/defaults/main/rules_blackbox-https.yml | 3 --- .../server/defaults/main/rules_blackbox-ping.yml | 3 --- .../server/defaults/main/rules_blackbox-ssh.yml | 3 --- .../server/defaults/main/rules_blackbox__https.yml | 3 +++ .../server/defaults/main/rules_blackbox__ping.yml | 3 +++ .../server/defaults/main/rules_blackbox__ssh.yml | 3 +++ .../prometheus/server/defaults/main/rules_nut-ups.yml | 3 --- .../prometheus/server/defaults/main/rules_nut__ups.yml | 3 +++ .../prometheus/server/filter_plugins/prometheus.py | 2 +- roles/monitoring/prometheus/server/tasks/main.yml | 8 +++++++- .../server/templates/job-snippets/blackbox-https.j2 | 14 -------------- .../server/templates/job-snippets/blackbox-ping.j2 | 14 -------------- .../server/templates/job-snippets/blackbox-ssh.j2 | 14 -------------- .../server/templates/job-snippets/blackbox/https.j2 | 14 ++++++++++++++ .../server/templates/job-snippets/blackbox/ping.j2 | 14 ++++++++++++++ .../server/templates/job-snippets/blackbox/ssh.j2 | 14 ++++++++++++++ .../prometheus/server/templates/job-snippets/nut-ups.j2 | 13 ------------- .../prometheus/server/templates/job-snippets/nut/ups.j2 | 13 +++++++++++++ .../prometheus/server/templates/prometheus.yml.j2 | 5 ++++- .../server/templates/targets/blackbox-https.yml.j2 | 4 ---- .../server/templates/targets/blackbox-ping.yml.j2 | 4 ---- .../server/templates/targets/blackbox-ssh.yml.j2 | 4 ---- .../server/templates/targets/blackbox/https.yml.j2 | 4 ++++ .../server/templates/targets/blackbox/ping.yml.j2 | 4 ++++ .../server/templates/targets/blackbox/ssh.yml.j2 | 4 ++++ .../prometheus/server/templates/targets/nut-ups.yml.j2 | 17 ----------------- .../prometheus/server/templates/targets/nut/ups.yml.j2 | 17 +++++++++++++++++ 33 files changed, 122 insertions(+), 113 deletions(-) delete mode 100644 roles/monitoring/prometheus/server/defaults/main/rules_blackbox-https.yml delete mode 100644 roles/monitoring/prometheus/server/defaults/main/rules_blackbox-ping.yml delete mode 100644 roles/monitoring/prometheus/server/defaults/main/rules_blackbox-ssh.yml create mode 100644 roles/monitoring/prometheus/server/defaults/main/rules_blackbox__https.yml create mode 100644 roles/monitoring/prometheus/server/defaults/main/rules_blackbox__ping.yml create mode 100644 roles/monitoring/prometheus/server/defaults/main/rules_blackbox__ssh.yml delete mode 100644 roles/monitoring/prometheus/server/defaults/main/rules_nut-ups.yml create mode 100644 roles/monitoring/prometheus/server/defaults/main/rules_nut__ups.yml delete mode 100644 roles/monitoring/prometheus/server/templates/job-snippets/blackbox-https.j2 delete mode 100644 roles/monitoring/prometheus/server/templates/job-snippets/blackbox-ping.j2 delete mode 100644 roles/monitoring/prometheus/server/templates/job-snippets/blackbox-ssh.j2 create mode 100644 roles/monitoring/prometheus/server/templates/job-snippets/blackbox/https.j2 create mode 100644 roles/monitoring/prometheus/server/templates/job-snippets/blackbox/ping.j2 create mode 100644 roles/monitoring/prometheus/server/templates/job-snippets/blackbox/ssh.j2 delete mode 100644 roles/monitoring/prometheus/server/templates/job-snippets/nut-ups.j2 create mode 100644 roles/monitoring/prometheus/server/templates/job-snippets/nut/ups.j2 delete mode 100644 roles/monitoring/prometheus/server/templates/targets/blackbox-https.yml.j2 delete mode 100644 roles/monitoring/prometheus/server/templates/targets/blackbox-ping.yml.j2 delete mode 100644 roles/monitoring/prometheus/server/templates/targets/blackbox-ssh.yml.j2 create mode 100644 roles/monitoring/prometheus/server/templates/targets/blackbox/https.yml.j2 create mode 100644 roles/monitoring/prometheus/server/templates/targets/blackbox/ping.yml.j2 create mode 100644 roles/monitoring/prometheus/server/templates/targets/blackbox/ssh.yml.j2 delete mode 100644 roles/monitoring/prometheus/server/templates/targets/nut-ups.yml.j2 create mode 100644 roles/monitoring/prometheus/server/templates/targets/nut/ups.yml.j2 (limited to 'roles/monitoring') diff --git a/inventory/group_vars/chaos-at-home-ups/vars.yml b/inventory/group_vars/chaos-at-home-ups/vars.yml index 99868165..f8c1bdf1 100644 --- a/inventory/group_vars/chaos-at-home-ups/vars.yml +++ b/inventory/group_vars/chaos-at-home-ups/vars.yml @@ -11,7 +11,7 @@ prometheus_scrape_endpoint: "{{ network_mgmt_zone.prefix | ipaddr(network_mgmt_z prometheus_exporters_default: - openwrt -prometheus_job_nut_ups: +prometheus_job_nut__ups: - exporter_hostname: ch-mon instance: "ups-{{ ups_name }}" ups: "{{ ups_name }}" diff --git a/inventory/group_vars/ele-ups/vars.yml b/inventory/group_vars/ele-ups/vars.yml index 0d22f770..1c4613a3 100644 --- a/inventory/group_vars/ele-ups/vars.yml +++ b/inventory/group_vars/ele-ups/vars.yml @@ -14,7 +14,7 @@ prometheus_scrape_endpoint: "{{ network_mgmt_zone.prefix | ipaddr(network_mgmt_z prometheus_exporters_default: - openwrt -prometheus_job_nut_ups: +prometheus_job_nut__ups: exporter_hostname: ele-mon instance: "ups-{{ ups_name }}" ups: "{{ ups_name }}" diff --git a/inventory/group_vars/promzone-chaos-at-home/vars.yml b/inventory/group_vars/promzone-chaos-at-home/vars.yml index d1958d47..84ed1263 100644 --- a/inventory/group_vars/promzone-chaos-at-home/vars.yml +++ b/inventory/group_vars/promzone-chaos-at-home/vars.yml @@ -9,10 +9,10 @@ prometheus_server: ch-mon prometheus_server_jobs: - node - openwrt - - nut-ups - - blackbox-ping - - blackbox-https - - blackbox-ssh + - nut/ups + - blackbox/ping + - blackbox/https + - blackbox/ssh prometheus_zone_name: chaos@home prometheus_zone_targets: "{{ groups['promzone-chaos-at-home'] }}" diff --git a/inventory/group_vars/promzone-elevate-festival/vars.yml b/inventory/group_vars/promzone-elevate-festival/vars.yml index a65a0cb7..43115dc4 100644 --- a/inventory/group_vars/promzone-elevate-festival/vars.yml +++ b/inventory/group_vars/promzone-elevate-festival/vars.yml @@ -9,7 +9,7 @@ prometheus_server: ele-mon prometheus_server_jobs: - node - openwrt - - nut-ups + - nut/ups prometheus_zone_name: Elevate Festival prometheus_zone_targets: "{{ groups['promzone-elevate-festival'] }}" diff --git a/inventory/host_vars/ch-mon.yml b/inventory/host_vars/ch-mon.yml index a211d4bb..242c4835 100644 --- a/inventory/host_vars/ch-mon.yml +++ b/inventory/host_vars/ch-mon.yml @@ -76,7 +76,7 @@ prometheus_exporter_blackbox_modules_extra: icmp: prober: icmp -prometheus_job_blackbox_ping: +prometheus_job_blackbox__ping: - exporter_hostname: ch-mon instance: "ping-magentagw" address: 62.99.185.129 @@ -84,12 +84,12 @@ prometheus_job_blackbox_ping: instance: "ping-quad9" address: 9.9.9.9 -prometheus_job_blackbox_https: +prometheus_job_blackbox__https: - exporter_hostname: ch-mon instance: "https-web.chaos-at-home.org" address: web.chaos-at-home.org -prometheus_job_blackbox_ssh: +prometheus_job_blackbox__ssh: - exporter_hostname: ch-mon instance: "ssh-{{ inventory_hostname }}" address: "{{ network_zones.svc.prefix | ipaddr(network_zones.svc.offsets[inventory_hostname]) | ipaddr('address') }}:{{ ansible_port | default(22) }}" diff --git a/roles/monitoring/prometheus/server/defaults/main/main.yml b/roles/monitoring/prometheus/server/defaults/main/main.yml index 5be3ecd3..bae0cdba 100644 --- a/roles/monitoring/prometheus/server/defaults/main/main.yml +++ b/roles/monitoring/prometheus/server/defaults/main/main.yml @@ -14,10 +14,10 @@ prometheus_server_rules: prometheus: "{{ prometheus_server_rules_prometheus + ((prometheus_server_alertmanager is defined) | ternary(prometheus_server_rules_prometheus_alertmanager, [])) + prometheus_server_rules_prometheus_extra }}" node: "{{ prometheus_server_rules_node + prometheus_server_rules_node_extra }}" openwrt: "{{ prometheus_server_rules_openwrt + prometheus_server_rules_node_extra }}" - "nut-ups": "{{ prometheus_server_rules_nut_ups + prometheus_server_rules_nut_ups_extra }}" - "blackbox-ping": "{{ prometheus_server_rules_blackbox_ping + prometheus_server_rules_blackbox_ping_extra }}" - "blackbox-https": "{{ prometheus_server_rules_blackbox_https + prometheus_server_rules_blackbox_https_extra }}" - "blackbox-ssh": "{{ prometheus_server_rules_blackbox_ssh + prometheus_server_rules_blackbox_ssh_extra }}" + nut/ups: "{{ prometheus_server_rules_nut__ups + prometheus_server_rules_nut__ups_extra }}" + blackbox/ping: "{{ prometheus_server_rules_blackbox__ping + prometheus_server_rules_blackbox__ping_extra }}" + blackbox/https: "{{ prometheus_server_rules_blackbox__https + prometheus_server_rules_blackbox__https_extra }}" + blackbox/ssh: "{{ prometheus_server_rules_blackbox__ssh + prometheus_server_rules_blackbox__ssh_extra }}" # prometheus_server_alertmanager: # url: "127.0.0.1:9093" diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_blackbox-https.yml b/roles/monitoring/prometheus/server/defaults/main/rules_blackbox-https.yml deleted file mode 100644 index bb806075..00000000 --- a/roles/monitoring/prometheus/server/defaults/main/rules_blackbox-https.yml +++ /dev/null @@ -1,3 +0,0 @@ ---- -prometheus_server_rules_blackbox_https_extra: [] -prometheus_server_rules_blackbox_https: [] diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_blackbox-ping.yml b/roles/monitoring/prometheus/server/defaults/main/rules_blackbox-ping.yml deleted file mode 100644 index 56c122f5..00000000 --- a/roles/monitoring/prometheus/server/defaults/main/rules_blackbox-ping.yml +++ /dev/null @@ -1,3 +0,0 @@ ---- -prometheus_server_rules_blackbox_ping_extra: [] -prometheus_server_rules_blackbox_ping: [] diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_blackbox-ssh.yml b/roles/monitoring/prometheus/server/defaults/main/rules_blackbox-ssh.yml deleted file mode 100644 index 727d2292..00000000 --- a/roles/monitoring/prometheus/server/defaults/main/rules_blackbox-ssh.yml +++ /dev/null @@ -1,3 +0,0 @@ ---- -prometheus_server_rules_blackbox_ssh_extra: [] -prometheus_server_rules_blackbox_ssh: [] diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__https.yml b/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__https.yml new file mode 100644 index 00000000..cfdc10bd --- /dev/null +++ b/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__https.yml @@ -0,0 +1,3 @@ +--- +prometheus_server_rules_blackbox__https_extra: [] +prometheus_server_rules_blackbox__https: [] diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__ping.yml b/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__ping.yml new file mode 100644 index 00000000..06ce8607 --- /dev/null +++ b/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__ping.yml @@ -0,0 +1,3 @@ +--- +prometheus_server_rules_blackbox__ping_extra: [] +prometheus_server_rules_blackbox__ping: [] diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__ssh.yml b/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__ssh.yml new file mode 100644 index 00000000..8e717c41 --- /dev/null +++ b/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__ssh.yml @@ -0,0 +1,3 @@ +--- +prometheus_server_rules_blackbox__ssh_extra: [] +prometheus_server_rules_blackbox__ssh: [] diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_nut-ups.yml b/roles/monitoring/prometheus/server/defaults/main/rules_nut-ups.yml deleted file mode 100644 index 842007b4..00000000 --- a/roles/monitoring/prometheus/server/defaults/main/rules_nut-ups.yml +++ /dev/null @@ -1,3 +0,0 @@ ---- -prometheus_server_rules_nut_ups_extra: [] -prometheus_server_rules_nut_ups: [] diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_nut__ups.yml b/roles/monitoring/prometheus/server/defaults/main/rules_nut__ups.yml new file mode 100644 index 00000000..bccb0ca8 --- /dev/null +++ b/roles/monitoring/prometheus/server/defaults/main/rules_nut__ups.yml @@ -0,0 +1,3 @@ +--- +prometheus_server_rules_nut__ups_extra: [] +prometheus_server_rules_nut__ups: [] diff --git a/roles/monitoring/prometheus/server/filter_plugins/prometheus.py b/roles/monitoring/prometheus/server/filter_plugins/prometheus.py index 056d216f..1443e837 100644 --- a/roles/monitoring/prometheus/server/filter_plugins/prometheus.py +++ b/roles/monitoring/prometheus/server/filter_plugins/prometheus.py @@ -11,7 +11,7 @@ def prometheus_job_targets(hostvars, jobs, targets): result = [] for job in jobs: for target in targets: - special_config_varname = 'prometheus_job_' + job.replace('-', '_') + special_config_varname = 'prometheus_job_' + job.replace('-', '_').replace('/', '__') if special_config_varname in hostvars[target]: for config in hostvars[target][special_config_varname]: result.append({'job': job, 'instance': config['instance'], 'config': config, 'enabled': True}) diff --git a/roles/monitoring/prometheus/server/tasks/main.yml b/roles/monitoring/prometheus/server/tasks/main.yml index 4bcaa2d5..16167c9c 100644 --- a/roles/monitoring/prometheus/server/tasks/main.yml +++ b/roles/monitoring/prometheus/server/tasks/main.yml @@ -44,7 +44,7 @@ path: "/etc/prometheus/{{ item }}" state: directory -- name: create sub-directroy for all jobs in targets directory +- name: create sub-directories for all jobs in targets directory loop: "{{ prometheus_server_jobs }}" file: path: "/etc/prometheus/targets/{{ item }}" @@ -76,6 +76,12 @@ state: absent notify: reload prometheus +- name: create sub-directories for all jobs in rules directory + loop: "{{ prometheus_server_jobs | select('match', '.*/.*') | map('dirname') | unique }}" + file: + path: "/etc/prometheus/rules/{{ item }}" + state: directory + - name: generate rules files for all jobs loop: "{{ prometheus_server_jobs | union(['prometheus']) }}" template: diff --git a/roles/monitoring/prometheus/server/templates/job-snippets/blackbox-https.j2 b/roles/monitoring/prometheus/server/templates/job-snippets/blackbox-https.j2 deleted file mode 100644 index 98a64121..00000000 --- a/roles/monitoring/prometheus/server/templates/job-snippets/blackbox-https.j2 +++ /dev/null @@ -1,14 +0,0 @@ - - job_name: '{{ job }}' - metrics_path: /proxy - params: - module: - - blackbox - - http_tls_2xx - scheme: https - tls_config: - ca_file: /etc/ssl/prometheus/ca-crt.pem - cert_file: /etc/ssl/prometheus/server/scrape-crt.pem - key_file: /etc/ssl/prometheus/server/scrape-key.pem - file_sd_configs: - - files: - - "/etc/prometheus/targets/{{ job }}/*.yml" diff --git a/roles/monitoring/prometheus/server/templates/job-snippets/blackbox-ping.j2 b/roles/monitoring/prometheus/server/templates/job-snippets/blackbox-ping.j2 deleted file mode 100644 index 736ffec1..00000000 --- a/roles/monitoring/prometheus/server/templates/job-snippets/blackbox-ping.j2 +++ /dev/null @@ -1,14 +0,0 @@ - - job_name: '{{ job }}' - metrics_path: /proxy - params: - module: - - blackbox - - icmp - scheme: https - tls_config: - ca_file: /etc/ssl/prometheus/ca-crt.pem - cert_file: /etc/ssl/prometheus/server/scrape-crt.pem - key_file: /etc/ssl/prometheus/server/scrape-key.pem - file_sd_configs: - - files: - - "/etc/prometheus/targets/{{ job }}/*.yml" diff --git a/roles/monitoring/prometheus/server/templates/job-snippets/blackbox-ssh.j2 b/roles/monitoring/prometheus/server/templates/job-snippets/blackbox-ssh.j2 deleted file mode 100644 index 166f37ad..00000000 --- a/roles/monitoring/prometheus/server/templates/job-snippets/blackbox-ssh.j2 +++ /dev/null @@ -1,14 +0,0 @@ - - job_name: '{{ job }}' - metrics_path: /proxy - params: - module: - - blackbox - - ssh_banner - scheme: https - tls_config: - ca_file: /etc/ssl/prometheus/ca-crt.pem - cert_file: /etc/ssl/prometheus/server/scrape-crt.pem - key_file: /etc/ssl/prometheus/server/scrape-key.pem - file_sd_configs: - - files: - - "/etc/prometheus/targets/{{ job }}/*.yml" diff --git a/roles/monitoring/prometheus/server/templates/job-snippets/blackbox/https.j2 b/roles/monitoring/prometheus/server/templates/job-snippets/blackbox/https.j2 new file mode 100644 index 00000000..98a64121 --- /dev/null +++ b/roles/monitoring/prometheus/server/templates/job-snippets/blackbox/https.j2 @@ -0,0 +1,14 @@ + - job_name: '{{ job }}' + metrics_path: /proxy + params: + module: + - blackbox + - http_tls_2xx + scheme: https + tls_config: + ca_file: /etc/ssl/prometheus/ca-crt.pem + cert_file: /etc/ssl/prometheus/server/scrape-crt.pem + key_file: /etc/ssl/prometheus/server/scrape-key.pem + file_sd_configs: + - files: + - "/etc/prometheus/targets/{{ job }}/*.yml" diff --git a/roles/monitoring/prometheus/server/templates/job-snippets/blackbox/ping.j2 b/roles/monitoring/prometheus/server/templates/job-snippets/blackbox/ping.j2 new file mode 100644 index 00000000..736ffec1 --- /dev/null +++ b/roles/monitoring/prometheus/server/templates/job-snippets/blackbox/ping.j2 @@ -0,0 +1,14 @@ + - job_name: '{{ job }}' + metrics_path: /proxy + params: + module: + - blackbox + - icmp + scheme: https + tls_config: + ca_file: /etc/ssl/prometheus/ca-crt.pem + cert_file: /etc/ssl/prometheus/server/scrape-crt.pem + key_file: /etc/ssl/prometheus/server/scrape-key.pem + file_sd_configs: + - files: + - "/etc/prometheus/targets/{{ job }}/*.yml" diff --git a/roles/monitoring/prometheus/server/templates/job-snippets/blackbox/ssh.j2 b/roles/monitoring/prometheus/server/templates/job-snippets/blackbox/ssh.j2 new file mode 100644 index 00000000..166f37ad --- /dev/null +++ b/roles/monitoring/prometheus/server/templates/job-snippets/blackbox/ssh.j2 @@ -0,0 +1,14 @@ + - job_name: '{{ job }}' + metrics_path: /proxy + params: + module: + - blackbox + - ssh_banner + scheme: https + tls_config: + ca_file: /etc/ssl/prometheus/ca-crt.pem + cert_file: /etc/ssl/prometheus/server/scrape-crt.pem + key_file: /etc/ssl/prometheus/server/scrape-key.pem + file_sd_configs: + - files: + - "/etc/prometheus/targets/{{ job }}/*.yml" diff --git a/roles/monitoring/prometheus/server/templates/job-snippets/nut-ups.j2 b/roles/monitoring/prometheus/server/templates/job-snippets/nut-ups.j2 deleted file mode 100644 index 3a2c5c62..00000000 --- a/roles/monitoring/prometheus/server/templates/job-snippets/nut-ups.j2 +++ /dev/null @@ -1,13 +0,0 @@ - - job_name: '{{ job }}' - metrics_path: /proxy - params: - module: - - nut - scheme: https - tls_config: - ca_file: /etc/ssl/prometheus/ca-crt.pem - cert_file: /etc/ssl/prometheus/server/scrape-crt.pem - key_file: /etc/ssl/prometheus/server/scrape-key.pem - file_sd_configs: - - files: - - "/etc/prometheus/targets/{{ job }}/*.yml" diff --git a/roles/monitoring/prometheus/server/templates/job-snippets/nut/ups.j2 b/roles/monitoring/prometheus/server/templates/job-snippets/nut/ups.j2 new file mode 100644 index 00000000..3a2c5c62 --- /dev/null +++ b/roles/monitoring/prometheus/server/templates/job-snippets/nut/ups.j2 @@ -0,0 +1,13 @@ + - job_name: '{{ job }}' + metrics_path: /proxy + params: + module: + - nut + scheme: https + tls_config: + ca_file: /etc/ssl/prometheus/ca-crt.pem + cert_file: /etc/ssl/prometheus/server/scrape-crt.pem + key_file: /etc/ssl/prometheus/server/scrape-key.pem + file_sd_configs: + - files: + - "/etc/prometheus/targets/{{ job }}/*.yml" diff --git a/roles/monitoring/prometheus/server/templates/prometheus.yml.j2 b/roles/monitoring/prometheus/server/templates/prometheus.yml.j2 index 4a079896..8156341d 100644 --- a/roles/monitoring/prometheus/server/templates/prometheus.yml.j2 +++ b/roles/monitoring/prometheus/server/templates/prometheus.yml.j2 @@ -6,6 +6,9 @@ global: rule_files: - /etc/prometheus/rules/*.yml +{% for subdir in (prometheus_server_jobs | select('match', '.*/.*') | map('dirname') | unique) %} + - /etc/prometheus/rules/{{ subdir }}/*.yml +{% endfor %} {% if prometheus_server_alertmanager is defined %} alerting: @@ -37,7 +40,7 @@ scrape_configs: {% endif %} {% for job in (prometheus_server_jobs) %} -{% include 'job-snippets/' + (lookup('first_found', {'paths': ['templates/job-snippets'], 'files': [job + '.j2', 'generic.j2']}) | basename) %}{{ '' }} +{% include lookup('first_found', {'paths': ['templates/job-snippets'], 'files': [job + '.j2', 'generic.j2']}) | relpath(template_fullpath | dirname) %}{{ '' }} {% endfor %} {% if prometheus_server_jobs_extra is defined %} diff --git a/roles/monitoring/prometheus/server/templates/targets/blackbox-https.yml.j2 b/roles/monitoring/prometheus/server/templates/targets/blackbox-https.yml.j2 deleted file mode 100644 index b1a33df3..00000000 --- a/roles/monitoring/prometheus/server/templates/targets/blackbox-https.yml.j2 +++ /dev/null @@ -1,4 +0,0 @@ -- targets: [ '{{ hostvars[target.config.exporter_hostname].prometheus_scrape_endpoint }}' ] - labels: - instance: '{{ target.instance }}' - __param_target: {{ target.config.address }} diff --git a/roles/monitoring/prometheus/server/templates/targets/blackbox-ping.yml.j2 b/roles/monitoring/prometheus/server/templates/targets/blackbox-ping.yml.j2 deleted file mode 100644 index b1a33df3..00000000 --- a/roles/monitoring/prometheus/server/templates/targets/blackbox-ping.yml.j2 +++ /dev/null @@ -1,4 +0,0 @@ -- targets: [ '{{ hostvars[target.config.exporter_hostname].prometheus_scrape_endpoint }}' ] - labels: - instance: '{{ target.instance }}' - __param_target: {{ target.config.address }} diff --git a/roles/monitoring/prometheus/server/templates/targets/blackbox-ssh.yml.j2 b/roles/monitoring/prometheus/server/templates/targets/blackbox-ssh.yml.j2 deleted file mode 100644 index b1a33df3..00000000 --- a/roles/monitoring/prometheus/server/templates/targets/blackbox-ssh.yml.j2 +++ /dev/null @@ -1,4 +0,0 @@ -- targets: [ '{{ hostvars[target.config.exporter_hostname].prometheus_scrape_endpoint }}' ] - labels: - instance: '{{ target.instance }}' - __param_target: {{ target.config.address }} diff --git a/roles/monitoring/prometheus/server/templates/targets/blackbox/https.yml.j2 b/roles/monitoring/prometheus/server/templates/targets/blackbox/https.yml.j2 new file mode 100644 index 00000000..29c89590 --- /dev/null +++ b/roles/monitoring/prometheus/server/templates/targets/blackbox/https.yml.j2 @@ -0,0 +1,4 @@ +- targets: [ '{{ hostvars[target.config.exporter_hostname].prometheus_scrape_endpoint }}' ] + labels: + instance: '{{ target.instance }}' + __param_target: '{{ target.config.address }}' diff --git a/roles/monitoring/prometheus/server/templates/targets/blackbox/ping.yml.j2 b/roles/monitoring/prometheus/server/templates/targets/blackbox/ping.yml.j2 new file mode 100644 index 00000000..29c89590 --- /dev/null +++ b/roles/monitoring/prometheus/server/templates/targets/blackbox/ping.yml.j2 @@ -0,0 +1,4 @@ +- targets: [ '{{ hostvars[target.config.exporter_hostname].prometheus_scrape_endpoint }}' ] + labels: + instance: '{{ target.instance }}' + __param_target: '{{ target.config.address }}' diff --git a/roles/monitoring/prometheus/server/templates/targets/blackbox/ssh.yml.j2 b/roles/monitoring/prometheus/server/templates/targets/blackbox/ssh.yml.j2 new file mode 100644 index 00000000..29c89590 --- /dev/null +++ b/roles/monitoring/prometheus/server/templates/targets/blackbox/ssh.yml.j2 @@ -0,0 +1,4 @@ +- targets: [ '{{ hostvars[target.config.exporter_hostname].prometheus_scrape_endpoint }}' ] + labels: + instance: '{{ target.instance }}' + __param_target: '{{ target.config.address }}' diff --git a/roles/monitoring/prometheus/server/templates/targets/nut-ups.yml.j2 b/roles/monitoring/prometheus/server/templates/targets/nut-ups.yml.j2 deleted file mode 100644 index d63d79a7..00000000 --- a/roles/monitoring/prometheus/server/templates/targets/nut-ups.yml.j2 +++ /dev/null @@ -1,17 +0,0 @@ -- targets: [ '{{ hostvars[target.config.exporter_hostname].prometheus_scrape_endpoint }}' ] - labels: - instance: '{{ target.instance }}' - __param_ups: {{ target.config.ups }} - __param_server: {{ target.config.server | default('127.0.0.1') }} -{% if 'username' in target.config %} - __param_username: {{ target.config.username }} -{% endif %} -{% if 'password' in target.config %} - __param_password: {{ target.config.password }} -{% endif %} -{% if 'variables' in target.config %} - __param_variables: {{ target.config.variables }} -{% endif %} -{% if 'statuses' in target.config %} - __param_statuses: {{ target.config.statuses }} -{% endif %} diff --git a/roles/monitoring/prometheus/server/templates/targets/nut/ups.yml.j2 b/roles/monitoring/prometheus/server/templates/targets/nut/ups.yml.j2 new file mode 100644 index 00000000..6003cd46 --- /dev/null +++ b/roles/monitoring/prometheus/server/templates/targets/nut/ups.yml.j2 @@ -0,0 +1,17 @@ +- targets: [ '{{ hostvars[target.config.exporter_hostname].prometheus_scrape_endpoint }}' ] + labels: + instance: '{{ target.instance }}' + __param_ups: '{{ target.config.ups }}' + __param_server: '{{ target.config.server | default('127.0.0.1') }}' +{% if 'username' in target.config %} + __param_username: '{{ target.config.username }}' +{% endif %} +{% if 'password' in target.config %} + __param_password: '{{ target.config.password }}' +{% endif %} +{% if 'variables' in target.config %} + __param_variables: '{{ target.config.variables }}' +{% endif %} +{% if 'statuses' in target.config %} + __param_statuses: '{{ target.config.statuses }}' +{% endif %} -- cgit v1.2.3 From 2e2087393521703bbe1394afc6c0dcc4943e2b35 Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Thu, 23 Sep 2021 17:03:06 +0200 Subject: rename prometheus job-snippets to jobs --- .../server/templates/job-snippets/blackbox/https.j2 | 14 -------------- .../server/templates/job-snippets/blackbox/ping.j2 | 14 -------------- .../server/templates/job-snippets/blackbox/ssh.j2 | 14 -------------- .../prometheus/server/templates/job-snippets/generic.j2 | 13 ------------- .../prometheus/server/templates/job-snippets/nut/ups.j2 | 13 ------------- .../prometheus/server/templates/job-snippets/openwrt.j2 | 5 ----- .../prometheus/server/templates/jobs/blackbox/https.j2 | 14 ++++++++++++++ .../prometheus/server/templates/jobs/blackbox/ping.j2 | 14 ++++++++++++++ .../prometheus/server/templates/jobs/blackbox/ssh.j2 | 14 ++++++++++++++ .../monitoring/prometheus/server/templates/jobs/generic.j2 | 13 +++++++++++++ .../monitoring/prometheus/server/templates/jobs/nut/ups.j2 | 13 +++++++++++++ .../monitoring/prometheus/server/templates/jobs/openwrt.j2 | 5 +++++ .../prometheus/server/templates/prometheus.yml.j2 | 2 +- 13 files changed, 74 insertions(+), 74 deletions(-) delete mode 100644 roles/monitoring/prometheus/server/templates/job-snippets/blackbox/https.j2 delete mode 100644 roles/monitoring/prometheus/server/templates/job-snippets/blackbox/ping.j2 delete mode 100644 roles/monitoring/prometheus/server/templates/job-snippets/blackbox/ssh.j2 delete mode 100644 roles/monitoring/prometheus/server/templates/job-snippets/generic.j2 delete mode 100644 roles/monitoring/prometheus/server/templates/job-snippets/nut/ups.j2 delete mode 100644 roles/monitoring/prometheus/server/templates/job-snippets/openwrt.j2 create mode 100644 roles/monitoring/prometheus/server/templates/jobs/blackbox/https.j2 create mode 100644 roles/monitoring/prometheus/server/templates/jobs/blackbox/ping.j2 create mode 100644 roles/monitoring/prometheus/server/templates/jobs/blackbox/ssh.j2 create mode 100644 roles/monitoring/prometheus/server/templates/jobs/generic.j2 create mode 100644 roles/monitoring/prometheus/server/templates/jobs/nut/ups.j2 create mode 100644 roles/monitoring/prometheus/server/templates/jobs/openwrt.j2 (limited to 'roles/monitoring') diff --git a/roles/monitoring/prometheus/server/templates/job-snippets/blackbox/https.j2 b/roles/monitoring/prometheus/server/templates/job-snippets/blackbox/https.j2 deleted file mode 100644 index 98a64121..00000000 --- a/roles/monitoring/prometheus/server/templates/job-snippets/blackbox/https.j2 +++ /dev/null @@ -1,14 +0,0 @@ - - job_name: '{{ job }}' - metrics_path: /proxy - params: - module: - - blackbox - - http_tls_2xx - scheme: https - tls_config: - ca_file: /etc/ssl/prometheus/ca-crt.pem - cert_file: /etc/ssl/prometheus/server/scrape-crt.pem - key_file: /etc/ssl/prometheus/server/scrape-key.pem - file_sd_configs: - - files: - - "/etc/prometheus/targets/{{ job }}/*.yml" diff --git a/roles/monitoring/prometheus/server/templates/job-snippets/blackbox/ping.j2 b/roles/monitoring/prometheus/server/templates/job-snippets/blackbox/ping.j2 deleted file mode 100644 index 736ffec1..00000000 --- a/roles/monitoring/prometheus/server/templates/job-snippets/blackbox/ping.j2 +++ /dev/null @@ -1,14 +0,0 @@ - - job_name: '{{ job }}' - metrics_path: /proxy - params: - module: - - blackbox - - icmp - scheme: https - tls_config: - ca_file: /etc/ssl/prometheus/ca-crt.pem - cert_file: /etc/ssl/prometheus/server/scrape-crt.pem - key_file: /etc/ssl/prometheus/server/scrape-key.pem - file_sd_configs: - - files: - - "/etc/prometheus/targets/{{ job }}/*.yml" diff --git a/roles/monitoring/prometheus/server/templates/job-snippets/blackbox/ssh.j2 b/roles/monitoring/prometheus/server/templates/job-snippets/blackbox/ssh.j2 deleted file mode 100644 index 166f37ad..00000000 --- a/roles/monitoring/prometheus/server/templates/job-snippets/blackbox/ssh.j2 +++ /dev/null @@ -1,14 +0,0 @@ - - job_name: '{{ job }}' - metrics_path: /proxy - params: - module: - - blackbox - - ssh_banner - scheme: https - tls_config: - ca_file: /etc/ssl/prometheus/ca-crt.pem - cert_file: /etc/ssl/prometheus/server/scrape-crt.pem - key_file: /etc/ssl/prometheus/server/scrape-key.pem - file_sd_configs: - - files: - - "/etc/prometheus/targets/{{ job }}/*.yml" diff --git a/roles/monitoring/prometheus/server/templates/job-snippets/generic.j2 b/roles/monitoring/prometheus/server/templates/job-snippets/generic.j2 deleted file mode 100644 index b155c5f7..00000000 --- a/roles/monitoring/prometheus/server/templates/job-snippets/generic.j2 +++ /dev/null @@ -1,13 +0,0 @@ - - job_name: '{{ job }}' - metrics_path: /proxy - params: - module: - - {{ job }} - scheme: https - tls_config: - ca_file: /etc/ssl/prometheus/ca-crt.pem - cert_file: /etc/ssl/prometheus/server/scrape-crt.pem - key_file: /etc/ssl/prometheus/server/scrape-key.pem - file_sd_configs: - - files: - - "/etc/prometheus/targets/{{ job }}/*.yml" diff --git a/roles/monitoring/prometheus/server/templates/job-snippets/nut/ups.j2 b/roles/monitoring/prometheus/server/templates/job-snippets/nut/ups.j2 deleted file mode 100644 index 3a2c5c62..00000000 --- a/roles/monitoring/prometheus/server/templates/job-snippets/nut/ups.j2 +++ /dev/null @@ -1,13 +0,0 @@ - - job_name: '{{ job }}' - metrics_path: /proxy - params: - module: - - nut - scheme: https - tls_config: - ca_file: /etc/ssl/prometheus/ca-crt.pem - cert_file: /etc/ssl/prometheus/server/scrape-crt.pem - key_file: /etc/ssl/prometheus/server/scrape-key.pem - file_sd_configs: - - files: - - "/etc/prometheus/targets/{{ job }}/*.yml" diff --git a/roles/monitoring/prometheus/server/templates/job-snippets/openwrt.j2 b/roles/monitoring/prometheus/server/templates/job-snippets/openwrt.j2 deleted file mode 100644 index e93f8be7..00000000 --- a/roles/monitoring/prometheus/server/templates/job-snippets/openwrt.j2 +++ /dev/null @@ -1,5 +0,0 @@ - - job_name: '{{ job }}' - scheme: http - file_sd_configs: - - files: - - "/etc/prometheus/targets/{{ job }}/*.yml" diff --git a/roles/monitoring/prometheus/server/templates/jobs/blackbox/https.j2 b/roles/monitoring/prometheus/server/templates/jobs/blackbox/https.j2 new file mode 100644 index 00000000..98a64121 --- /dev/null +++ b/roles/monitoring/prometheus/server/templates/jobs/blackbox/https.j2 @@ -0,0 +1,14 @@ + - job_name: '{{ job }}' + metrics_path: /proxy + params: + module: + - blackbox + - http_tls_2xx + scheme: https + tls_config: + ca_file: /etc/ssl/prometheus/ca-crt.pem + cert_file: /etc/ssl/prometheus/server/scrape-crt.pem + key_file: /etc/ssl/prometheus/server/scrape-key.pem + file_sd_configs: + - files: + - "/etc/prometheus/targets/{{ job }}/*.yml" diff --git a/roles/monitoring/prometheus/server/templates/jobs/blackbox/ping.j2 b/roles/monitoring/prometheus/server/templates/jobs/blackbox/ping.j2 new file mode 100644 index 00000000..736ffec1 --- /dev/null +++ b/roles/monitoring/prometheus/server/templates/jobs/blackbox/ping.j2 @@ -0,0 +1,14 @@ + - job_name: '{{ job }}' + metrics_path: /proxy + params: + module: + - blackbox + - icmp + scheme: https + tls_config: + ca_file: /etc/ssl/prometheus/ca-crt.pem + cert_file: /etc/ssl/prometheus/server/scrape-crt.pem + key_file: /etc/ssl/prometheus/server/scrape-key.pem + file_sd_configs: + - files: + - "/etc/prometheus/targets/{{ job }}/*.yml" diff --git a/roles/monitoring/prometheus/server/templates/jobs/blackbox/ssh.j2 b/roles/monitoring/prometheus/server/templates/jobs/blackbox/ssh.j2 new file mode 100644 index 00000000..166f37ad --- /dev/null +++ b/roles/monitoring/prometheus/server/templates/jobs/blackbox/ssh.j2 @@ -0,0 +1,14 @@ + - job_name: '{{ job }}' + metrics_path: /proxy + params: + module: + - blackbox + - ssh_banner + scheme: https + tls_config: + ca_file: /etc/ssl/prometheus/ca-crt.pem + cert_file: /etc/ssl/prometheus/server/scrape-crt.pem + key_file: /etc/ssl/prometheus/server/scrape-key.pem + file_sd_configs: + - files: + - "/etc/prometheus/targets/{{ job }}/*.yml" diff --git a/roles/monitoring/prometheus/server/templates/jobs/generic.j2 b/roles/monitoring/prometheus/server/templates/jobs/generic.j2 new file mode 100644 index 00000000..b155c5f7 --- /dev/null +++ b/roles/monitoring/prometheus/server/templates/jobs/generic.j2 @@ -0,0 +1,13 @@ + - job_name: '{{ job }}' + metrics_path: /proxy + params: + module: + - {{ job }} + scheme: https + tls_config: + ca_file: /etc/ssl/prometheus/ca-crt.pem + cert_file: /etc/ssl/prometheus/server/scrape-crt.pem + key_file: /etc/ssl/prometheus/server/scrape-key.pem + file_sd_configs: + - files: + - "/etc/prometheus/targets/{{ job }}/*.yml" diff --git a/roles/monitoring/prometheus/server/templates/jobs/nut/ups.j2 b/roles/monitoring/prometheus/server/templates/jobs/nut/ups.j2 new file mode 100644 index 00000000..3a2c5c62 --- /dev/null +++ b/roles/monitoring/prometheus/server/templates/jobs/nut/ups.j2 @@ -0,0 +1,13 @@ + - job_name: '{{ job }}' + metrics_path: /proxy + params: + module: + - nut + scheme: https + tls_config: + ca_file: /etc/ssl/prometheus/ca-crt.pem + cert_file: /etc/ssl/prometheus/server/scrape-crt.pem + key_file: /etc/ssl/prometheus/server/scrape-key.pem + file_sd_configs: + - files: + - "/etc/prometheus/targets/{{ job }}/*.yml" diff --git a/roles/monitoring/prometheus/server/templates/jobs/openwrt.j2 b/roles/monitoring/prometheus/server/templates/jobs/openwrt.j2 new file mode 100644 index 00000000..e93f8be7 --- /dev/null +++ b/roles/monitoring/prometheus/server/templates/jobs/openwrt.j2 @@ -0,0 +1,5 @@ + - job_name: '{{ job }}' + scheme: http + file_sd_configs: + - files: + - "/etc/prometheus/targets/{{ job }}/*.yml" diff --git a/roles/monitoring/prometheus/server/templates/prometheus.yml.j2 b/roles/monitoring/prometheus/server/templates/prometheus.yml.j2 index 8156341d..e73ca354 100644 --- a/roles/monitoring/prometheus/server/templates/prometheus.yml.j2 +++ b/roles/monitoring/prometheus/server/templates/prometheus.yml.j2 @@ -40,7 +40,7 @@ scrape_configs: {% endif %} {% for job in (prometheus_server_jobs) %} -{% include lookup('first_found', {'paths': ['templates/job-snippets'], 'files': [job + '.j2', 'generic.j2']}) | relpath(template_fullpath | dirname) %}{{ '' }} +{% include lookup('first_found', {'paths': ['templates/jobs'], 'files': [job + '.j2', 'generic.j2']}) | relpath(template_fullpath | dirname) %}{{ '' }} {% endfor %} {% if prometheus_server_jobs_extra is defined %} -- cgit v1.2.3 From ec55b1572702b91184d99fec89fec537cfe2ea1f Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Thu, 23 Sep 2021 20:18:08 +0200 Subject: force blackbox exporter to ipv4 by default --- inventory/host_vars/ch-mon.yml | 7 +++++-- roles/monitoring/prometheus/exporter/blackbox/defaults/main.yml | 7 +++++++ 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'roles/monitoring') diff --git a/inventory/host_vars/ch-mon.yml b/inventory/host_vars/ch-mon.yml index 242c4835..d1a710b9 100644 --- a/inventory/host_vars/ch-mon.yml +++ b/inventory/host_vars/ch-mon.yml @@ -86,8 +86,11 @@ prometheus_job_blackbox__ping: prometheus_job_blackbox__https: - exporter_hostname: ch-mon - instance: "https-web.chaos-at-home.org" - address: web.chaos-at-home.org + instance: "https-pan.chaos-at-home.org" + address: "https://pan.chaos-at-home.org" + - exporter_hostname: ch-mon + instance: "https-mimas.chaos-at-home.org" + address: "https://mimas.chaos-at-home.org" prometheus_job_blackbox__ssh: - exporter_hostname: ch-mon diff --git a/roles/monitoring/prometheus/exporter/blackbox/defaults/main.yml b/roles/monitoring/prometheus/exporter/blackbox/defaults/main.yml index 4e7d8d9a..73b9fde1 100644 --- a/roles/monitoring/prometheus/exporter/blackbox/defaults/main.yml +++ b/roles/monitoring/prometheus/exporter/blackbox/defaults/main.yml @@ -2,23 +2,30 @@ prometheus_exporter_blackbox_modules: tcp_connect: prober: tcp + tcp: + preferred_ip_protocol: "ip4" tcp_tls_connect: prober: tcp tcp: + preferred_ip_protocol: "ip4" tls: true tls_config: insecure_skip_verify: true http_2xx: prober: http + http: + preferred_ip_protocol: "ip4" http_tls_2xx: prober: http http: + preferred_ip_protocol: "ip4" fail_if_not_ssl: true tls_config: insecure_skip_verify: true ssh_banner: prober: tcp tcp: + preferred_ip_protocol: "ip4" query_response: - expect: "^SSH-2.0-" - send: "SSH-2.0-blackbox-ssh-check" -- cgit v1.2.3 From 486c84d53244e44ff72a3c2db42ee12afdb083e8 Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Thu, 23 Sep 2021 20:37:20 +0200 Subject: add some more prometheus rules for blackbox exporter --- .../prometheus/server/defaults/main/main.yml | 2 + .../server/defaults/main/rules_blackbox.yml | 47 ++++++++++++++++++++++ .../server/defaults/main/rules_blackbox__https.yml | 19 ++++++++- .../server/defaults/main/rules_blackbox__ping.yml | 10 ++++- .../prometheus/server/defaults/main/rules_nut.yml | 3 ++ roles/monitoring/prometheus/server/tasks/main.yml | 2 +- 6 files changed, 80 insertions(+), 3 deletions(-) create mode 100644 roles/monitoring/prometheus/server/defaults/main/rules_blackbox.yml create mode 100644 roles/monitoring/prometheus/server/defaults/main/rules_nut.yml (limited to 'roles/monitoring') diff --git a/roles/monitoring/prometheus/server/defaults/main/main.yml b/roles/monitoring/prometheus/server/defaults/main/main.yml index bae0cdba..09cd150c 100644 --- a/roles/monitoring/prometheus/server/defaults/main/main.yml +++ b/roles/monitoring/prometheus/server/defaults/main/main.yml @@ -14,7 +14,9 @@ prometheus_server_rules: prometheus: "{{ prometheus_server_rules_prometheus + ((prometheus_server_alertmanager is defined) | ternary(prometheus_server_rules_prometheus_alertmanager, [])) + prometheus_server_rules_prometheus_extra }}" node: "{{ prometheus_server_rules_node + prometheus_server_rules_node_extra }}" openwrt: "{{ prometheus_server_rules_openwrt + prometheus_server_rules_node_extra }}" + nut: "{{ prometheus_server_rules_nut + prometheus_server_rules_nut_extra }}" nut/ups: "{{ prometheus_server_rules_nut__ups + prometheus_server_rules_nut__ups_extra }}" + blackbox: "{{ prometheus_server_rules_blackbox + prometheus_server_rules_blackbox_extra }}" blackbox/ping: "{{ prometheus_server_rules_blackbox__ping + prometheus_server_rules_blackbox__ping_extra }}" blackbox/https: "{{ prometheus_server_rules_blackbox__https + prometheus_server_rules_blackbox__https_extra }}" blackbox/ssh: "{{ prometheus_server_rules_blackbox__ssh + prometheus_server_rules_blackbox__ssh_extra }}" diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_blackbox.yml b/roles/monitoring/prometheus/server/defaults/main/rules_blackbox.yml new file mode 100644 index 00000000..d5c1fd42 --- /dev/null +++ b/roles/monitoring/prometheus/server/defaults/main/rules_blackbox.yml @@ -0,0 +1,47 @@ +--- +prometheus_server_rules_blackbox_extra: [] +prometheus_server_rules_blackbox: + - alert: BlackboxProbeFailed + expr: probe_success == 0 + for: 0m + labels: + severity: critical + annotations: + summary: Blackbox probe failed (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "Probe failed\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + + - alert: BlackboxSlowProbe + expr: avg_over_time(probe_duration_seconds[1m]) > 1 + for: 1m + labels: + severity: warning + annotations: + summary: Blackbox slow probe (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "Blackbox probe took more than 1s to complete\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + + - alert: BlackboxSslCertificateWillExpireSoon + expr: probe_ssl_earliest_cert_expiry - time() < 86400 * 30 + for: 0m + labels: + severity: warning + annotations: + summary: Blackbox SSL certificate will expire soon (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "SSL certificate expires in 30 days\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + + - alert: BlackboxSslCertificateWillExpireSoon + expr: probe_ssl_earliest_cert_expiry - time() < 86400 * 3 + for: 0m + labels: + severity: critical + annotations: + summary: Blackbox SSL certificate will expire soon (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "SSL certificate expires in 3 days\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + + - alert: BlackboxSslCertificateExpired + expr: probe_ssl_earliest_cert_expiry - time() <= 0 + for: 0m + labels: + severity: critical + annotations: + summary: Blackbox SSL certificate expired (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "SSL certificate has expired already\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__https.yml b/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__https.yml index cfdc10bd..140e3b4f 100644 --- a/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__https.yml +++ b/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__https.yml @@ -1,3 +1,20 @@ --- prometheus_server_rules_blackbox__https_extra: [] -prometheus_server_rules_blackbox__https: [] +prometheus_server_rules_blackbox__https: + - alert: BlackboxProbeHttpFailure + expr: probe_http_status_code <= 199 OR probe_http_status_code >= 400 + for: 0m + labels: + severity: critical + annotations: + summary: Blackbox probe HTTP failure (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "HTTP status code is not 200-399\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + + - alert: BlackboxProbeSlowHttp + expr: avg_over_time(probe_http_duration_seconds[1m]) > 1 + for: 1m + labels: + severity: warning + annotations: + summary: Blackbox probe slow HTTP (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "HTTP request took more than 1s\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__ping.yml b/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__ping.yml index 06ce8607..cc87b6b1 100644 --- a/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__ping.yml +++ b/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__ping.yml @@ -1,3 +1,11 @@ --- prometheus_server_rules_blackbox__ping_extra: [] -prometheus_server_rules_blackbox__ping: [] +prometheus_server_rules_blackbox__ping: + - alert: BlackboxProbeSlowPing + expr: avg_over_time(probe_icmp_duration_seconds[1m]) > 1 + for: 1m + labels: + severity: warning + annotations: + summary: Blackbox probe slow ping (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "Blackbox ping took more than 1s\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_nut.yml b/roles/monitoring/prometheus/server/defaults/main/rules_nut.yml new file mode 100644 index 00000000..d8d64f64 --- /dev/null +++ b/roles/monitoring/prometheus/server/defaults/main/rules_nut.yml @@ -0,0 +1,3 @@ +--- +prometheus_server_rules_nut_extra: [] +prometheus_server_rules_nut: [] diff --git a/roles/monitoring/prometheus/server/tasks/main.yml b/roles/monitoring/prometheus/server/tasks/main.yml index 16167c9c..c0928cc3 100644 --- a/roles/monitoring/prometheus/server/tasks/main.yml +++ b/roles/monitoring/prometheus/server/tasks/main.yml @@ -83,7 +83,7 @@ state: directory - name: generate rules files for all jobs - loop: "{{ prometheus_server_jobs | union(['prometheus']) }}" + loop: "{{ prometheus_server_jobs | union(['prometheus']) | union(prometheus_server_jobs | select('match', '.*/.*') | map('dirname') | unique) }}" template: src: rules.yml.j2 dest: "/etc/prometheus/rules/{{ item }}.yml" -- cgit v1.2.3 From 6320da1262c1f44ac773c6b6578a59ba286ce973 Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Fri, 24 Sep 2021 00:57:01 +0200 Subject: add some basic prometheus node exporter textfile collector scripts --- .../prometheus/exporter/node/defaults/main.yml | 5 +- .../monitoring/prometheus/exporter/node/files/apt | 40 ++++++++++++ .../exporter/node/files/deleted-libraries | 75 ++++++++++++++++++++++ .../prometheus/exporter/node/tasks/main.yml | 35 ++++++++++ .../node/tasks/textfile_collector_script.yml | 21 ++++++ .../textfile-collector-scripts/apt.service.j2 | 30 +++++++++ .../textfile-collector-scripts/apt.timer.j2 | 9 +++ .../deleted-libraries.service.j2 | 30 +++++++++ .../deleted-libraries.timer.j2 | 9 +++ 9 files changed, 253 insertions(+), 1 deletion(-) create mode 100755 roles/monitoring/prometheus/exporter/node/files/apt create mode 100755 roles/monitoring/prometheus/exporter/node/files/deleted-libraries create mode 100644 roles/monitoring/prometheus/exporter/node/tasks/textfile_collector_script.yml create mode 100644 roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.service.j2 create mode 100644 roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.timer.j2 create mode 100644 roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/deleted-libraries.service.j2 create mode 100644 roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/deleted-libraries.timer.j2 (limited to 'roles/monitoring') diff --git a/roles/monitoring/prometheus/exporter/node/defaults/main.yml b/roles/monitoring/prometheus/exporter/node/defaults/main.yml index 56227fbb..4a9b40cd 100644 --- a/roles/monitoring/prometheus/exporter/node/defaults/main.yml +++ b/roles/monitoring/prometheus/exporter/node/defaults/main.yml @@ -9,4 +9,7 @@ prometheus_exporter_node_timesync_collector: "{{ _prometheus_exporter_node_time_ prometheus_exporter_node_disable_collectors: [] prometheus_exporter_node_extra_collectors: -- "{{ prometheus_exporter_node_timesync_collector }}" + - "{{ prometheus_exporter_node_timesync_collector }}" + +prometheus_exporter_node_textfile_collector_scripts: + - deleted-libraries diff --git a/roles/monitoring/prometheus/exporter/node/files/apt b/roles/monitoring/prometheus/exporter/node/files/apt new file mode 100755 index 00000000..015addb0 --- /dev/null +++ b/roles/monitoring/prometheus/exporter/node/files/apt @@ -0,0 +1,40 @@ +#!/bin/bash +# +# Description: Expose metrics from apt updates. +# +# Author: Ben Kochie + +upgrades="$(/usr/bin/apt-get --just-print dist-upgrade \ + | /usr/bin/awk -F'[()]' \ + '/^Inst/ { sub("^[^ ]+ ", "", $2); gsub(" ","",$2); + sub("\\[", " ", $2); sub("\\]", "", $2); print $2 }' \ + | /usr/bin/sort \ + | /usr/bin/uniq -c \ + | awk '{ gsub(/\\\\/, "\\\\", $2); gsub(/"/, "\\\"", $2); + gsub(/\[/, "", $3); gsub(/\]/, "", $3); + print "apt_upgrades_pending{origin=\"" $2 "\",arch=\"" $NF "\"} " $1}' +)" + +autoremove="$(/usr/bin/apt-get --just-print autoremove \ + | /usr/bin/awk '/^Remv/{a++}END{printf "apt_autoremove_pending %d", a}' +)" + +echo '# HELP apt_upgrades_pending Apt package pending updates by origin.' +echo '# TYPE apt_upgrades_pending gauge' +if [[ -n "${upgrades}" ]] ; then + echo "${upgrades}" +else + echo 'apt_upgrades_pending{origin="",arch=""} 0' +fi + +echo '# HELP apt_autoremove_pending Apt package pending autoremove.' +echo '# TYPE apt_autoremove_pending gauge' +echo "${autoremove}" + +echo '# HELP node_reboot_required Node reboot is required for software updates.' +echo '# TYPE node_reboot_required gauge' +if [[ -f '/run/reboot-required' ]] ; then + echo 'node_reboot_required 1' +else + echo 'node_reboot_required 0' +fi diff --git a/roles/monitoring/prometheus/exporter/node/files/deleted-libraries b/roles/monitoring/prometheus/exporter/node/files/deleted-libraries new file mode 100755 index 00000000..e3e19cbd --- /dev/null +++ b/roles/monitoring/prometheus/exporter/node/files/deleted-libraries @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 +""" +Script to count the number of deleted libraries that are linked by running +processes and expose a summary as Prometheus metrics. + +The aim is to discover processes that are still using libraries that have since +been updated, perhaps due security vulnerabilities. +""" + +import errno +import glob +import os +import sys + + +def main(): + processes_linking_deleted_libraries = {} + + for path in glob.glob('/proc/*/maps'): + try: + with open(path, 'rb') as file: + for line in file: + part = line.decode().strip().split() + + if len(part) == 7: + library = part[5] + comment = part[6] + + if '/lib/' in library and '(deleted)' in comment: + if path not in processes_linking_deleted_libraries: + processes_linking_deleted_libraries[path] = {} + + if library in processes_linking_deleted_libraries[path]: + processes_linking_deleted_libraries[path][library] += 1 + else: + processes_linking_deleted_libraries[path][library] = 1 + except EnvironmentError as e: + # Ignore non-existent files, since the files may have changed since + # we globbed. + if e.errno != errno.ENOENT: + sys.exit('Failed to open file: {0}'.format(path)) + + num_processes_per_library = {} + + for process, library_count in processes_linking_deleted_libraries.items(): + libraries_seen = set() + for library, count in library_count.items(): + if library in libraries_seen: + continue + + libraries_seen.add(library) + if library in num_processes_per_library: + num_processes_per_library[library] += 1 + else: + num_processes_per_library[library] = 1 + + metric_name = 'node_processes_linking_deleted_libraries' + description = 'Count of running processes that link a deleted library' + print('# HELP {0} {1}'.format(metric_name, description)) + print('# TYPE {0} gauge'.format(metric_name)) + + for library, count in num_processes_per_library.items(): + dir_path, basename = os.path.split(library) + basename = basename.replace('"', '\\"') + dir_path = dir_path.replace('"', '\\"') + print('{0}{{library_path="{1}", library_name="{2}"}} {3}'.format( + metric_name, + dir_path, + basename, + count) + ) + + +if __name__ == "__main__": + main() diff --git a/roles/monitoring/prometheus/exporter/node/tasks/main.yml b/roles/monitoring/prometheus/exporter/node/tasks/main.yml index 3fa0a1ec..61e385f7 100644 --- a/roles/monitoring/prometheus/exporter/node/tasks/main.yml +++ b/roles/monitoring/prometheus/exporter/node/tasks/main.yml @@ -31,3 +31,38 @@ port: 9100 dest: /etc/prometheus/exporter/exporter/node.yml notify: reload prometheus-exporter-exporter + +- name: create directory for textfile collector scripts + file: + path: /usr/local/lib/prometheus-node-exporter + state: directory + +- name: install textfile collector script wrapper + copy: + content: | + #!/bin/bash + + if [ -z "$1" ]; then + echo "Please specify which collector script to call!" + exit 1 + fi + collector="$1" + + set -e + rm -f "/var/lib/prometheus-node-exporter/textfile-collector/$collector.prom".* + "/usr/local/lib/prometheus-node-exporter/$collector" > "/var/lib/prometheus-node-exporter/textfile-collector/$collector.prom.$$" + mv "/var/lib/prometheus-node-exporter/textfile-collector/$collector.prom.$$" "/var/lib/prometheus-node-exporter/textfile-collector/$collector.prom" + dest: /usr/local/lib/prometheus-node-exporter/run-collector + mode: 0755 + +- name: install the apt textfile collector script + when: ansible_pkg_mgr == "apt" + vars: + textfile_collector_name: "apt" + include_tasks: textfile_collector_script.yml + +- name: install all other textfile collector scripts + loop: "{{ prometheus_exporter_node_textfile_collector_scripts }}" + loop_control: + loop_var: textfile_collector_name + include_tasks: textfile_collector_script.yml diff --git a/roles/monitoring/prometheus/exporter/node/tasks/textfile_collector_script.yml b/roles/monitoring/prometheus/exporter/node/tasks/textfile_collector_script.yml new file mode 100644 index 00000000..1a39bb4c --- /dev/null +++ b/roles/monitoring/prometheus/exporter/node/tasks/textfile_collector_script.yml @@ -0,0 +1,21 @@ +--- +- name: install the collector script + copy: + src: "{{ textfile_collector_name }}" + dest: "/usr/local/lib/prometheus-node-exporter/{{ textfile_collector_name }}" + mode: 0755 + +- name: install systemd service units + loop: + - service + - timer + template: + src: "textfile-collector-scripts/{{ textfile_collector_name }}.{{ item }}.j2" + dest: "/etc/systemd/system/prometheus-node-exporter_{{ textfile_collector_name }}.{{ item }}" + +- name: make sure the systemd timer is enabled and started + systemd: + daemon_reload: yes + name: "prometheus-node-exporter_{{ textfile_collector_name }}.timer" + state: started + enabled: yes diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.service.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.service.j2 new file mode 100644 index 00000000..b0e9d167 --- /dev/null +++ b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.service.j2 @@ -0,0 +1,30 @@ +[Unit] +Description=Promethues node exporter textfile collector apt + +[Service] +Type=oneshot +ExecStart=/usr/local/lib/prometheus-node-exporter/run-collector apt + +# systemd hardening-options +AmbientCapabilities= +CapabilityBoundingSet= +DeviceAllow=/dev/null rw +DevicePolicy=strict +LockPersonality=true +MemoryDenyWriteExecute=true +NoNewPrivileges=true +PrivateDevices=true +PrivateTmp=true +ProtectControlGroups=true +ProtectHome=true +ProtectKernelModules=true +ProtectKernelTunables=true +ProtectSystem=strict +ReadWritePaths=/var/lib/prometheus-node-exporter/textfile-collector +RemoveIPC=true +RestrictNamespaces=true +RestrictRealtime=true +SystemCallArchitectures=native + +[Install] +WantedBy=multi-user.target diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.timer.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.timer.j2 new file mode 100644 index 00000000..5e7d3062 --- /dev/null +++ b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.timer.j2 @@ -0,0 +1,9 @@ +[Unit] +Description=Promethues node exporter textfile collector apt + +[Timer] +OnCalendar=*-*-* *:1/30:17 +AccuracySec=10s + +[Install] +WantedBy=timers.target diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/deleted-libraries.service.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/deleted-libraries.service.j2 new file mode 100644 index 00000000..9dbc822f --- /dev/null +++ b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/deleted-libraries.service.j2 @@ -0,0 +1,30 @@ +[Unit] +Description=Promethues node exporter textfile collector deleted-libraries + +[Service] +Type=oneshot +ExecStart=/usr/local/lib/prometheus-node-exporter/run-collector deleted-libraries + +# systemd hardening-options +AmbientCapabilities=CAP_SYS_PTRACE +CapabilityBoundingSet=CAP_SYS_PTRACE +DeviceAllow=/dev/null rw +DevicePolicy=strict +LockPersonality=true +MemoryDenyWriteExecute=true +NoNewPrivileges=true +PrivateDevices=true +PrivateTmp=true +ProtectControlGroups=true +ProtectHome=true +ProtectKernelModules=true +ProtectKernelTunables=true +ProtectSystem=strict +ReadWritePaths=/var/lib/prometheus-node-exporter/textfile-collector +RemoveIPC=true +RestrictNamespaces=true +RestrictRealtime=true +SystemCallArchitectures=native + +[Install] +WantedBy=multi-user.target diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/deleted-libraries.timer.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/deleted-libraries.timer.j2 new file mode 100644 index 00000000..8f38050a --- /dev/null +++ b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/deleted-libraries.timer.j2 @@ -0,0 +1,9 @@ +[Unit] +Description=Promethues node exporter textfile collector deleted-libraries + +[Timer] +OnCalendar=*-*-* *:2/30:22 +AccuracySec=10s + +[Install] +WantedBy=timers.target -- cgit v1.2.3 From c36e7b7a8f2dfe1c54e537b737340e025fa81467 Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Fri, 24 Sep 2021 02:04:31 +0200 Subject: add some alert rule for newly added node exporter metrics --- .../prometheus/server/defaults/main/rules_node.yml | 27 ++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'roles/monitoring') diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_node.yml b/roles/monitoring/prometheus/server/defaults/main/rules_node.yml index ab7317ac..2c7f9319 100644 --- a/roles/monitoring/prometheus/server/defaults/main/rules_node.yml +++ b/roles/monitoring/prometheus/server/defaults/main/rules_node.yml @@ -217,3 +217,30 @@ prometheus_server_rules_node: annotations: summary: Host clock not synchronising (instance {{ '{{' }} $labels.instance {{ '}}' }}) description: "Clock not synchronising.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + + - alert: AptUpgradesPending + expr: sum by (instance) (apt_upgrades_pending) > 0 + for: 0m + labels: + severity: warning + annotations: + summary: Host has upgradeable packages (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "Host {{ '{{' }} $labels.instance {{ '}}' }} has {{ '{{' }} $value {{ '}}' }} upgradable packages.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + + - alert: AptAutoremovePending + expr: sum by (instance) (apt_autoremove_pending) > 0 + for: 0m + labels: + severity: warning + annotations: + summary: Host has packages that can be autoremoved (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "Host {{ '{{' }} $labels.instance {{ '}}' }} has {{ '{{' }} $value {{ '}}' }} that can be autoremoved.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + + - alert: HostNeedsRebooting + expr: node_reboot_required > 0 + for: 0m + labels: + severity: warning + annotations: + summary: Host must be rebootet (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "Host {{ '{{' }} $labels.instance {{ '}}' }} must be rebootet for security uppdates to take effect.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" -- cgit v1.2.3 From bb9f5e0b165895e748ca1e6d83c1b3404c7cef71 Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Fri, 24 Sep 2021 12:56:42 +0200 Subject: also run new textcollector scripts after reboot --- roles/monitoring/prometheus/exporter/node/defaults/main.yml | 2 ++ .../exporter/node/templates/textfile-collector-scripts/apt.timer.j2 | 1 + .../templates/textfile-collector-scripts/deleted-libraries.timer.j2 | 1 + 3 files changed, 4 insertions(+) (limited to 'roles/monitoring') diff --git a/roles/monitoring/prometheus/exporter/node/defaults/main.yml b/roles/monitoring/prometheus/exporter/node/defaults/main.yml index 4a9b40cd..2714a7fe 100644 --- a/roles/monitoring/prometheus/exporter/node/defaults/main.yml +++ b/roles/monitoring/prometheus/exporter/node/defaults/main.yml @@ -7,6 +7,8 @@ _prometheus_exporter_node_time_collector_map_: prometheus_exporter_node_timesync_collector: "{{ _prometheus_exporter_node_time_collector_map_[ntp_variant | default('')] }}" +## TODO: systemd state collector??? + prometheus_exporter_node_disable_collectors: [] prometheus_exporter_node_extra_collectors: - "{{ prometheus_exporter_node_timesync_collector }}" diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.timer.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.timer.j2 index 5e7d3062..b8a9c34e 100644 --- a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.timer.j2 +++ b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.timer.j2 @@ -2,6 +2,7 @@ Description=Promethues node exporter textfile collector apt [Timer] +OnBootSec=50s OnCalendar=*-*-* *:1/30:17 AccuracySec=10s diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/deleted-libraries.timer.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/deleted-libraries.timer.j2 index 8f38050a..1646ac73 100644 --- a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/deleted-libraries.timer.j2 +++ b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/deleted-libraries.timer.j2 @@ -2,6 +2,7 @@ Description=Promethues node exporter textfile collector deleted-libraries [Timer] +OnBootSec=60s OnCalendar=*-*-* *:2/30:22 AccuracySec=10s -- cgit v1.2.3 From 4ec26b272ab6090498a9eefa4a0efb06248b1ef4 Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Fri, 24 Sep 2021 14:11:35 +0200 Subject: prometheus: add monitoring for systemd units --- .../prometheus/exporter/node/defaults/main.yml | 1 + .../prometheus/server/defaults/main/rules_node.yml | 11 ++++++++++- .../prometheus/server/templates/jobs/node.j2 | 20 ++++++++++++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 roles/monitoring/prometheus/server/templates/jobs/node.j2 (limited to 'roles/monitoring') diff --git a/roles/monitoring/prometheus/exporter/node/defaults/main.yml b/roles/monitoring/prometheus/exporter/node/defaults/main.yml index 2714a7fe..491e70f6 100644 --- a/roles/monitoring/prometheus/exporter/node/defaults/main.yml +++ b/roles/monitoring/prometheus/exporter/node/defaults/main.yml @@ -12,6 +12,7 @@ prometheus_exporter_node_timesync_collector: "{{ _prometheus_exporter_node_time_ prometheus_exporter_node_disable_collectors: [] prometheus_exporter_node_extra_collectors: - "{{ prometheus_exporter_node_timesync_collector }}" + - systemd prometheus_exporter_node_textfile_collector_scripts: - deleted-libraries diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_node.yml b/roles/monitoring/prometheus/server/defaults/main/rules_node.yml index 2c7f9319..64a7d562 100644 --- a/roles/monitoring/prometheus/server/defaults/main/rules_node.yml +++ b/roles/monitoring/prometheus/server/defaults/main/rules_node.yml @@ -92,6 +92,15 @@ prometheus_server_rules_node: summary: Host CPU steal noisy neighbor (instance {{ '{{' }} $labels.instance {{ '}}' }}) description: "CPU steal is > 10%. A noisy neighbor is killing VM performances or a spot instance may be out of credit.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + - alert: HostSystemdNotRunning + expr: node_systemd_system_running == 0 + for: 0m + labels: + severity: warning + annotations: + summary: Host systemd is not in running state (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "systemd is not in running state.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + - alert: HostSystemdServiceCrashed expr: node_systemd_unit_state{state="failed"} == 1 for: 0m @@ -99,7 +108,7 @@ prometheus_server_rules_node: severity: warning annotations: summary: Host systemd service crashed (instance {{ '{{' }} $labels.instance {{ '}}' }}) - description: "systemd service crashed\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + description: "The systemd service unit {{ '{{' }} $labels.name {{ '}}' }} is in failed state.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" - alert: HostPhysicalComponentTooHot expr: node_hwmon_temp_celsius > 75 diff --git a/roles/monitoring/prometheus/server/templates/jobs/node.j2 b/roles/monitoring/prometheus/server/templates/jobs/node.j2 new file mode 100644 index 00000000..ba9eab31 --- /dev/null +++ b/roles/monitoring/prometheus/server/templates/jobs/node.j2 @@ -0,0 +1,20 @@ + - job_name: '{{ job }}' + metrics_path: /proxy + params: + module: + - {{ job }} + scheme: https + tls_config: + ca_file: /etc/ssl/prometheus/ca-crt.pem + cert_file: /etc/ssl/prometheus/server/scrape-crt.pem + key_file: /etc/ssl/prometheus/server/scrape-key.pem + file_sd_configs: + - files: + - "/etc/prometheus/targets/{{ job }}/*.yml" + metric_relabel_configs: + - source_labels: [ "mountpoint" ] + regex: ".*/\\.snapshot/.*" + action: drop + - source_labels: [ "__name__", "state" ] + regex: "node_systemd_unit_state;(activating|deactivating|inactive)" + action: drop -- cgit v1.2.3 From 9da269b334fc9a1949c787ea37a3d5879bc2b865 Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Fri, 24 Sep 2021 14:13:51 +0200 Subject: fix some todos --- roles/monitoring/prometheus/exporter/node/defaults/main.yml | 2 -- roles/monitoring/prometheus/server/defaults/main/rules_nut__ups.yml | 1 + roles/monitoring/prometheus/server/defaults/main/rules_openwrt.yml | 1 + 3 files changed, 2 insertions(+), 2 deletions(-) (limited to 'roles/monitoring') diff --git a/roles/monitoring/prometheus/exporter/node/defaults/main.yml b/roles/monitoring/prometheus/exporter/node/defaults/main.yml index 491e70f6..9309562f 100644 --- a/roles/monitoring/prometheus/exporter/node/defaults/main.yml +++ b/roles/monitoring/prometheus/exporter/node/defaults/main.yml @@ -7,8 +7,6 @@ _prometheus_exporter_node_time_collector_map_: prometheus_exporter_node_timesync_collector: "{{ _prometheus_exporter_node_time_collector_map_[ntp_variant | default('')] }}" -## TODO: systemd state collector??? - prometheus_exporter_node_disable_collectors: [] prometheus_exporter_node_extra_collectors: - "{{ prometheus_exporter_node_timesync_collector }}" diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_nut__ups.yml b/roles/monitoring/prometheus/server/defaults/main/rules_nut__ups.yml index bccb0ca8..150a507e 100644 --- a/roles/monitoring/prometheus/server/defaults/main/rules_nut__ups.yml +++ b/roles/monitoring/prometheus/server/defaults/main/rules_nut__ups.yml @@ -1,3 +1,4 @@ --- prometheus_server_rules_nut__ups_extra: [] prometheus_server_rules_nut__ups: [] +## TODO: add NUT/UPS alert rules diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_openwrt.yml b/roles/monitoring/prometheus/server/defaults/main/rules_openwrt.yml index 88d84f31..04b178f1 100644 --- a/roles/monitoring/prometheus/server/defaults/main/rules_openwrt.yml +++ b/roles/monitoring/prometheus/server/defaults/main/rules_openwrt.yml @@ -1,3 +1,4 @@ --- prometheus_server_rules_openwrt_extra: [] prometheus_server_rules_openwrt: [] +## TODO: add openwrt specific alert rules -- cgit v1.2.3 From 9a47d5c3ef94cb09338a1b64d4dc9365d526bb54 Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Fri, 24 Sep 2021 15:05:48 +0200 Subject: refactor textfile collector script handling --- roles/monitoring/prometheus/exporter/TODO | 4 - .../prometheus/exporter/node/defaults/main.yml | 1 + .../prometheus/exporter/node/files/smartmon | 391 +++++++++++++++++++++ .../prometheus/exporter/node/tasks/main.yml | 24 +- .../node/tasks/textfile_collector_script.yml | 2 +- .../textfile-collector-scripts/apt.service.j2 | 3 +- .../textfile-collector-scripts/apt.timer.j2 | 5 +- .../deleted-libraries.service.j2 | 3 +- .../deleted-libraries.timer.j2 | 5 +- .../textfile-collector-scripts/smartmon.service.j2 | 29 ++ .../textfile-collector-scripts/smartmon.timer.j2 | 13 + 11 files changed, 447 insertions(+), 33 deletions(-) create mode 100644 roles/monitoring/prometheus/exporter/node/files/smartmon create mode 100644 roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/smartmon.service.j2 create mode 100644 roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/smartmon.timer.j2 (limited to 'roles/monitoring') diff --git a/roles/monitoring/prometheus/exporter/TODO b/roles/monitoring/prometheus/exporter/TODO index c02e5699..79ff8721 100644 --- a/roles/monitoring/prometheus/exporter/TODO +++ b/roles/monitoring/prometheus/exporter/TODO @@ -1,7 +1,3 @@ -Node Exporter - Text Collector Scripts: - - https://github.com/prometheus-community/node-exporter-textfile-collector-scripts - - https://packages.debian.org/bullseye/prometheus-node-exporter-collectors - IPMI Exporter: - https://github.com/soundcloud/ipmi_exporter - https://packages.debian.org/bullseye/prometheus-ipmi-exporter diff --git a/roles/monitoring/prometheus/exporter/node/defaults/main.yml b/roles/monitoring/prometheus/exporter/node/defaults/main.yml index 9309562f..870753c3 100644 --- a/roles/monitoring/prometheus/exporter/node/defaults/main.yml +++ b/roles/monitoring/prometheus/exporter/node/defaults/main.yml @@ -14,3 +14,4 @@ prometheus_exporter_node_extra_collectors: prometheus_exporter_node_textfile_collector_scripts: - deleted-libraries + - smartmon diff --git a/roles/monitoring/prometheus/exporter/node/files/smartmon b/roles/monitoring/prometheus/exporter/node/files/smartmon new file mode 100644 index 00000000..1c39b492 --- /dev/null +++ b/roles/monitoring/prometheus/exporter/node/files/smartmon @@ -0,0 +1,391 @@ +#!/usr/bin/env python3 +import argparse +import collections +import csv +import datetime +import decimal +import re +import shlex +import subprocess +import sys + +device_info_re = re.compile(r'^(?P[^:]+?)(?:(?:\sis|):)\s*(?P.*)$') + +ata_error_count_re = re.compile( + r'^Error (\d+) \[\d+\] occurred', re.MULTILINE) + +self_test_re = re.compile(r'^SMART.*(PASSED|OK)$', re.MULTILINE) + +device_info_map = { + 'Vendor': 'vendor', + 'Product': 'product', + 'Revision': 'revision', + 'Logical Unit id': 'lun_id', + 'Model Family': 'model_family', + 'Device Model': 'device_model', + 'Serial Number': 'serial_number', + 'Firmware Version': 'firmware_version', +} + +smart_attributes_whitelist = { + 'airflow_temperature_cel', + 'command_timeout', + 'current_pending_sector', + 'end_to_end_error', + 'erase_fail_count_total', + 'g_sense_error_rate', + 'hardware_ecc_recovered', + 'host_reads_mib', + 'host_reads_32mib', + 'host_writes_mib', + 'host_writes_32mib', + 'load_cycle_count', + 'media_wearout_indicator', + 'wear_leveling_count', + 'nand_writes_1gib', + 'offline_uncorrectable', + 'power_cycle_count', + 'power_on_hours', + 'program_fail_count', + 'raw_read_error_rate', + 'reallocated_event_count', + 'reallocated_sector_ct', + 'reported_uncorrect', + 'sata_downshift_count', + 'seek_error_rate', + 'spin_retry_count', + 'spin_up_time', + 'start_stop_count', + 'temperature_case', + 'temperature_celsius', + 'temperature_internal', + 'total_lbas_read', + 'total_lbas_written', + 'udma_crc_error_count', + 'unsafe_shutdown_count', + 'workld_host_reads_perc', + 'workld_media_wear_indic', + 'workload_minutes', +} + +Metric = collections.namedtuple('Metric', 'name labels value') + +SmartAttribute = collections.namedtuple('SmartAttribute', [ + 'id', 'name', 'flag', 'value', 'worst', 'threshold', 'type', 'updated', + 'when_failed', 'raw_value', +]) + + +class Device(collections.namedtuple('DeviceBase', 'path opts')): + """Representation of a device as found by smartctl --scan output.""" + + @property + def type(self): + return self.opts.type + + @property + def base_labels(self): + return {'device': self.path, 'disk': self.type.partition('+')[2] or '0'} + + def smartctl_select(self): + return ['--device', self.type, self.path] + + +def metric_key(metric, prefix=''): + return '{prefix}{metric.name}'.format(prefix=prefix, metric=metric) + + +def metric_format(metric, prefix=''): + key = metric_key(metric, prefix) + labels = ','.join( + '{k}="{v}"'.format(k=k, v=v.replace('"', '\\"')) for k, v in metric.labels.items()) + value = decimal.Decimal(metric.value) + + return '{key}{{{labels}}} {value}'.format( + key=key, labels=labels, value=value) + + +def metric_print_meta(metric, prefix=''): + key = metric_key(metric, prefix) + print('# HELP {key} SMART metric {metric.name}'.format( + key=key, metric=metric)) + print('# TYPE {key} gauge'.format(key=key)) + + +def metric_print(metric, prefix=''): + print(metric_format(metric, prefix)) + + +def smart_ctl(*args, check=True): + """Wrapper around invoking the smartctl binary. + + Returns: + (str) Data piped to stdout by the smartctl subprocess. + """ + return subprocess.run( + ['smartctl', *args], stdout=subprocess.PIPE, check=check + ).stdout.decode('utf-8') + + +def smart_ctl_version(): + return smart_ctl('-V').split('\n')[0].split()[1] + + +def find_devices(): + """Find SMART devices. + + Yields: + (Device) Single device found by smartctl. + """ + parser = argparse.ArgumentParser() + parser.add_argument('-d', '--device', dest='type') + + devices = smart_ctl('--scan-open') + + for device in devices.split('\n'): + device = device.strip() + if not device: + continue + + tokens = shlex.split(device, comments=True) + if not tokens: + continue + + yield Device(tokens[0], parser.parse_args(tokens[1:])) + + +def device_is_active(device): + """Returns whenever the given device is currently active or not. + + Args: + device: (Device) Device in question. + + Returns: + (bool) True if the device is active and False otherwise. + """ + try: + smart_ctl('--nocheck', 'standby', *device.smartctl_select()) + except subprocess.CalledProcessError: + return False + + return True + + +def device_info(device): + """Query device for basic model information. + + Args: + device: (Device) Device in question. + + Returns: + (generator): Generator yielding: + + key (str): Key describing the value. + value (str): Actual value. + """ + info_lines = smart_ctl( + '--info', *device.smartctl_select() + ).strip().split('\n')[3:] + + matches = (device_info_re.match(line) for line in info_lines) + return (m.groups() for m in matches if m is not None) + + +def device_smart_capabilities(device): + """Returns SMART capabilities of the given device. + + Args: + device: (Device) Device in question. + + Returns: + (tuple): tuple containing: + + (bool): True whenever SMART is available, False otherwise. + (bool): True whenever SMART is enabled, False otherwise. + """ + groups = device_info(device) + + state = { + g[1].split(' ', 1)[0] + for g in groups if g[0] == 'SMART support'} + + smart_available = 'Available' in state + smart_enabled = 'Enabled' in state + + return smart_available, smart_enabled + + +def collect_device_info(device): + """Collect basic device information. + + Args: + device: (Device) Device in question. + + Yields: + (Metric) metrics describing general device information. + """ + values = dict(device_info(device)) + yield Metric('device_info', { + **device.base_labels, + **{v: values[k] for k, v in device_info_map.items() if k in values} + }, True) + + +def collect_device_health_self_assessment(device): + """Collect metric about the device health self assessment. + + Args: + device: (Device) Device in question. + + Yields: + (Metric) Device health self assessment. + """ + out = smart_ctl('--health', *device.smartctl_select(), check=False) + + self_assessment_passed = bool(self_test_re.search(out)) + + yield Metric( + 'device_smart_healthy', device.base_labels, self_assessment_passed) + + +def collect_ata_metrics(device): + # Fetch SMART attributes for the given device. + attributes = smart_ctl( + '--attributes', *device.smartctl_select() + ) + + # replace multiple occurrences of whitespace with a single whitespace + # so that the CSV Parser recognizes individual columns properly. + attributes = re.sub(r'[\t\x20]+', ' ', attributes) + + # Turn smartctl output into a list of lines and skip to the table of + # SMART attributes. + attribute_lines = attributes.strip().split('\n')[7:] + + # Some attributes have multiple IDs but have the same name. Don't + # yield attributes that already have been reported before. + seen = set() + + reader = csv.DictReader( + (line.strip() for line in attribute_lines), + fieldnames=SmartAttribute._fields[:-1], + restkey=SmartAttribute._fields[-1], delimiter=' ') + for entry in reader: + # We're only interested in the SMART attributes that are + # whitelisted here. + entry['name'] = entry['name'].lower() + if entry['name'] not in smart_attributes_whitelist: + continue + + # Ensure that only the numeric parts are fetched from the raw_value. + # Attributes such as 194 Temperature_Celsius reported by my SSD + # are in the format of "36 (Min/Max 24/40)" which can't be expressed + # properly as a prometheus metric. + m = re.match(r'^(\d+)', ' '.join(entry['raw_value'])) + if not m: + continue + entry['raw_value'] = m.group(1) + + # Some device models report "---" in the threshold value where most + # devices would report "000". We do the substitution here because + # downstream code expects values to be convertable to integer. + if entry['threshold'] == '---': + entry['threshold'] = '0' + + if entry['name'] in smart_attributes_whitelist and entry['name'] not in seen: + labels = { + 'name': entry['name'], + **device.base_labels, + } + + for col in 'value', 'worst', 'threshold', 'raw_value': + yield Metric( + 'attr_{col}'.format(col=col), + labels, entry[col]) + + seen.add(entry['name']) + + +def collect_ata_error_count(device): + """Inspect the device error log and report the amount of entries. + + Args: + device: (Device) Device in question. + + Yields: + (Metric) Device error count. + """ + error_log = smart_ctl( + '-l', 'xerror,1', *device.smartctl_select(), check=False) + + m = ata_error_count_re.search(error_log) + + error_count = m.group(1) if m is not None else 0 + + yield Metric('device_errors', device.base_labels, error_count) + + +def collect_disks_smart_metrics(wakeup_disks): + now = int(datetime.datetime.utcnow().timestamp()) + + for device in find_devices(): + yield Metric('smartctl_run', device.base_labels, now) + + is_active = device_is_active(device) + + yield Metric('device_active', device.base_labels, is_active) + + # Skip further metrics collection to prevent the disk from + # spinning up. + if not is_active and not wakeup_disks: + continue + + yield from collect_device_info(device) + + smart_available, smart_enabled = device_smart_capabilities(device) + + yield Metric( + 'device_smart_available', device.base_labels, smart_available) + yield Metric( + 'device_smart_enabled', device.base_labels, smart_enabled) + + # Skip further metrics collection here if SMART is disabled + # on the device. Further smartctl invocations would fail + # anyways. + if not smart_available: + continue + + yield from collect_device_health_self_assessment(device) + + if device.type.startswith('sat'): + yield from collect_ata_metrics(device) + + yield from collect_ata_error_count(device) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('-s', '--wakeup-disks', dest='wakeup_disks', action='store_true') + args = parser.parse_args(sys.argv[1:]) + + version_metric = Metric('smartctl_version', { + 'version': smart_ctl_version() + }, True) + metric_print_meta(version_metric, 'smartmon_') + metric_print(version_metric, 'smartmon_') + + metrics = list(collect_disks_smart_metrics(args.wakeup_disks)) + metrics.sort(key=lambda i: i.name) + + previous_name = None + for m in metrics: + if m.name != previous_name: + metric_print_meta(m, 'smartmon_') + + previous_name = m.name + + metric_print(m, 'smartmon_') + + +if __name__ == '__main__': + main() diff --git a/roles/monitoring/prometheus/exporter/node/tasks/main.yml b/roles/monitoring/prometheus/exporter/node/tasks/main.yml index 61e385f7..56903a33 100644 --- a/roles/monitoring/prometheus/exporter/node/tasks/main.yml +++ b/roles/monitoring/prometheus/exporter/node/tasks/main.yml @@ -2,7 +2,9 @@ ## TODO: pin version - name: install apt packages apt: - name: prom-exporter-node + name: + - prom-exporter-node + - moreutils state: present - name: create directory for textfile collector @@ -34,27 +36,9 @@ - name: create directory for textfile collector scripts file: - path: /usr/local/lib/prometheus-node-exporter + path: /usr/local/share/prometheus-node-exporter state: directory -- name: install textfile collector script wrapper - copy: - content: | - #!/bin/bash - - if [ -z "$1" ]; then - echo "Please specify which collector script to call!" - exit 1 - fi - collector="$1" - - set -e - rm -f "/var/lib/prometheus-node-exporter/textfile-collector/$collector.prom".* - "/usr/local/lib/prometheus-node-exporter/$collector" > "/var/lib/prometheus-node-exporter/textfile-collector/$collector.prom.$$" - mv "/var/lib/prometheus-node-exporter/textfile-collector/$collector.prom.$$" "/var/lib/prometheus-node-exporter/textfile-collector/$collector.prom" - dest: /usr/local/lib/prometheus-node-exporter/run-collector - mode: 0755 - - name: install the apt textfile collector script when: ansible_pkg_mgr == "apt" vars: diff --git a/roles/monitoring/prometheus/exporter/node/tasks/textfile_collector_script.yml b/roles/monitoring/prometheus/exporter/node/tasks/textfile_collector_script.yml index 1a39bb4c..5c068fe7 100644 --- a/roles/monitoring/prometheus/exporter/node/tasks/textfile_collector_script.yml +++ b/roles/monitoring/prometheus/exporter/node/tasks/textfile_collector_script.yml @@ -2,7 +2,7 @@ - name: install the collector script copy: src: "{{ textfile_collector_name }}" - dest: "/usr/local/lib/prometheus-node-exporter/{{ textfile_collector_name }}" + dest: "/usr/local/share/prometheus-node-exporter/{{ textfile_collector_name }}" mode: 0755 - name: install systemd service units diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.service.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.service.j2 index b0e9d167..7eca94fb 100644 --- a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.service.j2 +++ b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.service.j2 @@ -3,7 +3,8 @@ Description=Promethues node exporter textfile collector apt [Service] Type=oneshot -ExecStart=/usr/local/lib/prometheus-node-exporter/run-collector apt +Environment=TMPDIR=/var/lib/prometheus-node-exporter/textfile-collector +ExecStart=bash -c "/usr/local/share/prometheus-node-exporter/apt | sponge /var/lib/prometheus-node-exporter/textfile-collector/apt.prom" # systemd hardening-options AmbientCapabilities= diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.timer.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.timer.j2 index b8a9c34e..dc473749 100644 --- a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.timer.j2 +++ b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/apt.timer.j2 @@ -2,9 +2,8 @@ Description=Promethues node exporter textfile collector apt [Timer] -OnBootSec=50s -OnCalendar=*-*-* *:1/30:17 -AccuracySec=10s +OnBootSec=10s +OnUnitActiveSec=15min [Install] WantedBy=timers.target diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/deleted-libraries.service.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/deleted-libraries.service.j2 index 9dbc822f..7b15e558 100644 --- a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/deleted-libraries.service.j2 +++ b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/deleted-libraries.service.j2 @@ -3,7 +3,8 @@ Description=Promethues node exporter textfile collector deleted-libraries [Service] Type=oneshot -ExecStart=/usr/local/lib/prometheus-node-exporter/run-collector deleted-libraries +Environment=TMPDIR=/var/lib/prometheus-node-exporter/textfile-collector +ExecStart=bash -c "/usr/local/share/prometheus-node-exporter/deleted-libraries | sponge /var/lib/prometheus-node-exporter/textfile-collector/deleted-libraries.prom" # systemd hardening-options AmbientCapabilities=CAP_SYS_PTRACE diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/deleted-libraries.timer.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/deleted-libraries.timer.j2 index 1646ac73..c09acecf 100644 --- a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/deleted-libraries.timer.j2 +++ b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/deleted-libraries.timer.j2 @@ -2,9 +2,8 @@ Description=Promethues node exporter textfile collector deleted-libraries [Timer] -OnBootSec=60s -OnCalendar=*-*-* *:2/30:22 -AccuracySec=10s +OnBootSec=20s +OnUnitActiveSec=15min [Install] WantedBy=timers.target diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/smartmon.service.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/smartmon.service.j2 new file mode 100644 index 00000000..fc7c9f3f --- /dev/null +++ b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/smartmon.service.j2 @@ -0,0 +1,29 @@ +[Unit] +Description=Promethues node exporter textfile collector smartmon + +[Service] +Type=oneshot +Environment=TMPDIR=/var/lib/prometheus-node-exporter/textfile-collector +Environment=LC_NUMERIC=C +ExecStart=bash -c "/usr/local/share/prometheus-node-exporter/smartmon | sponge /var/lib/prometheus-node-exporter/textfile-collector/smartmon.prom" + +# systemd hardening-options +AmbientCapabilities= +CapabilityBoundingSet= +LockPersonality=true +MemoryDenyWriteExecute=true +NoNewPrivileges=true +PrivateTmp=true +ProtectControlGroups=true +ProtectHome=true +ProtectKernelModules=true +ProtectKernelTunables=true +ProtectSystem=strict +ReadWritePaths=/var/lib/prometheus-node-exporter/textfile-collector +RemoveIPC=true +RestrictNamespaces=true +RestrictRealtime=true +SystemCallArchitectures=native + +[Install] +WantedBy=multi-user.target diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/smartmon.timer.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/smartmon.timer.j2 new file mode 100644 index 00000000..438da6b0 --- /dev/null +++ b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/smartmon.timer.j2 @@ -0,0 +1,13 @@ +[Unit] +Description=Promethues node exporter textfile collector smartmon +ConditionPathExists=/usr/sbin/smartctl +ConditionPathExistsGlob=|/dev/sd* +ConditionPathExistsGlob=|/dev/hd* +ConditionPathExistsGlob=|/dev/nvme* + +[Timer] +OnBootSec=30s +OnUnitActiveSec=15min + +[Install] +WantedBy=timers.target -- cgit v1.2.3 From 2316917055ec9399966033cc4944f5e5662c0136 Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Fri, 24 Sep 2021 22:33:33 +0200 Subject: add prometheus exporter IPMI --- roles/monitoring/prometheus/exporter/TODO | 4 -- .../prometheus/exporter/blackbox/tasks/main.yml | 2 +- .../prometheus/exporter/ipmi/defaults/main.yml | 25 ++++++++++++ .../prometheus/exporter/ipmi/handlers/main.yml | 16 ++++++++ .../prometheus/exporter/ipmi/tasks/main.yml | 47 ++++++++++++++++++++++ .../exporter/ipmi/templates/config.yml.j2 | 4 ++ .../prometheus/exporter/ipmi/templates/service.j2 | 32 +++++++++++++++ 7 files changed, 125 insertions(+), 5 deletions(-) create mode 100644 roles/monitoring/prometheus/exporter/ipmi/defaults/main.yml create mode 100644 roles/monitoring/prometheus/exporter/ipmi/handlers/main.yml create mode 100644 roles/monitoring/prometheus/exporter/ipmi/tasks/main.yml create mode 100644 roles/monitoring/prometheus/exporter/ipmi/templates/config.yml.j2 create mode 100644 roles/monitoring/prometheus/exporter/ipmi/templates/service.j2 (limited to 'roles/monitoring') diff --git a/roles/monitoring/prometheus/exporter/TODO b/roles/monitoring/prometheus/exporter/TODO index 79ff8721..57179464 100644 --- a/roles/monitoring/prometheus/exporter/TODO +++ b/roles/monitoring/prometheus/exporter/TODO @@ -1,7 +1,3 @@ -IPMI Exporter: - - https://github.com/soundcloud/ipmi_exporter - - https://packages.debian.org/bullseye/prometheus-ipmi-exporter - Postfix Exporter: - https://github.com/kumina/postfix_exporter - https://packages.debian.org/bullseye/prometheus-postfix-exporter diff --git a/roles/monitoring/prometheus/exporter/blackbox/tasks/main.yml b/roles/monitoring/prometheus/exporter/blackbox/tasks/main.yml index cab521cc..782c3561 100644 --- a/roles/monitoring/prometheus/exporter/blackbox/tasks/main.yml +++ b/roles/monitoring/prometheus/exporter/blackbox/tasks/main.yml @@ -35,6 +35,6 @@ method: http http: port: 9115 - path: '/probe' + path: /probe dest: /etc/prometheus/exporter/exporter/blackbox.yml notify: reload prometheus-exporter-exporter diff --git a/roles/monitoring/prometheus/exporter/ipmi/defaults/main.yml b/roles/monitoring/prometheus/exporter/ipmi/defaults/main.yml new file mode 100644 index 00000000..9b99f9a5 --- /dev/null +++ b/roles/monitoring/prometheus/exporter/ipmi/defaults/main.yml @@ -0,0 +1,25 @@ +--- +prometheus_exporter_ipmi_modules: + default: {} + # collectors: + # - bmc + # - ipmi + # - chassis + # - dcmi + # - sel + # - sm-lan-mode + # exclude_sensor_ids: + # - 2 + # - 29 + # - 32 + # thatspecialhost: + # user: "some_user" + # pass: "secret_pw" + # privilege: "admin" + # driver: "LAN" + # collectors: + # - ipmi + # - sel + # custom_args: + # ipmi: + # - "--bridge-sensors" diff --git a/roles/monitoring/prometheus/exporter/ipmi/handlers/main.yml b/roles/monitoring/prometheus/exporter/ipmi/handlers/main.yml new file mode 100644 index 00000000..40a945ae --- /dev/null +++ b/roles/monitoring/prometheus/exporter/ipmi/handlers/main.yml @@ -0,0 +1,16 @@ +--- +- name: restart prometheus-ipmi-exporter + service: + name: prometheus-ipmi-exporter + state: restarted + +- name: reload prometheus-ipmi-exporter + service: + name: prometheus-ipmi-exporter + state: reloaded + +- name: reload prometheus-exporter-exporter + service: + name: prometheus-exporter-exporter + ## TODO: implement reload once exporter_exporter supports this... + state: restarted diff --git a/roles/monitoring/prometheus/exporter/ipmi/tasks/main.yml b/roles/monitoring/prometheus/exporter/ipmi/tasks/main.yml new file mode 100644 index 00000000..9e63f692 --- /dev/null +++ b/roles/monitoring/prometheus/exporter/ipmi/tasks/main.yml @@ -0,0 +1,47 @@ +--- + ## TODO: pin version +- name: install apt packages + apt: + name: prom-exporter-ipmi + state: present + +- name: create config directory + file: + path: /etc/prometheus/exporter/ipmi + state: directory + +- name: generate configuration + template: + src: config.yml.j2 + dest: /etc/prometheus/exporter/ipmi/config.yml + notify: reload prometheus-ipmi-exporter + +- name: generate systemd service unit + template: + src: service.j2 + dest: /etc/systemd/system/prometheus-ipmi-exporter.service + notify: restart prometheus-ipmi-exporter + +- name: make sure prometheus-ipmi-exporter is enabled and started + systemd: + name: prometheus-ipmi-exporter.service + daemon_reload: yes + state: started + enabled: yes + +- name: register exporter + loop: + - name: local + path: /metrics + - name: remote + path: /ipmi + loop_control: + label: "{{ item.name }}" + copy: + content: | + method: http + http: + port: 9290 + path: {{ item.path }} + dest: "/etc/prometheus/exporter/exporter/ipmi-{{ item.name }}.yml" + notify: reload prometheus-exporter-exporter diff --git a/roles/monitoring/prometheus/exporter/ipmi/templates/config.yml.j2 b/roles/monitoring/prometheus/exporter/ipmi/templates/config.yml.j2 new file mode 100644 index 00000000..32d0b34a --- /dev/null +++ b/roles/monitoring/prometheus/exporter/ipmi/templates/config.yml.j2 @@ -0,0 +1,4 @@ +# {{ ansible_managed }} + +modules: + {{ prometheus_exporter_ipmi_modules | to_nice_yaml(indent=2) | indent(2) }} diff --git a/roles/monitoring/prometheus/exporter/ipmi/templates/service.j2 b/roles/monitoring/prometheus/exporter/ipmi/templates/service.j2 new file mode 100644 index 00000000..465215e8 --- /dev/null +++ b/roles/monitoring/prometheus/exporter/ipmi/templates/service.j2 @@ -0,0 +1,32 @@ +[Unit] +Description=Prometheus ipmi exporter + +[Service] +Restart=always +User=prometheus-exporter +ExecStart=/usr/bin/prometheus-ipmi-exporter --web.listen-address="127.0.0.1:9290" --config.file=/etc/prometheus/exporter/ipmi/config.yml --freeipmi.path="/usr/sbin" +ExecReload=/bin/kill -HUP $MAINPID + +{# TODO: test which hardening options need to be removed for IPMI to work... #} +# systemd hardening-options +AmbientCapabilities= +CapabilityBoundingSet= +DeviceAllow=/dev/null rw +DevicePolicy=strict +LockPersonality=true +MemoryDenyWriteExecute=true +NoNewPrivileges=true +PrivateDevices=true +PrivateTmp=true +ProtectControlGroups=true +ProtectHome=true +ProtectKernelModules=true +ProtectKernelTunables=true +ProtectSystem=strict +RemoveIPC=true +RestrictNamespaces=true +RestrictRealtime=true +SystemCallArchitectures=native + +[Install] +WantedBy=multi-user.target -- cgit v1.2.3 From 4921bb0dc32811aa40cf07ec8ad83f6f197ada0e Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Sat, 25 Sep 2021 19:11:28 +0200 Subject: disabling smartmon textfile collector by default since this can lead to idempotence issues with systemd that don't have smartcl installed --- roles/monitoring/prometheus/exporter/node/defaults/main.yml | 1 - .../prometheus/exporter/node/tasks/textfile_collector_script.yml | 3 +++ .../node/templates/textfile-collector-scripts/smartmon.timer.j2 | 4 ---- 3 files changed, 3 insertions(+), 5 deletions(-) (limited to 'roles/monitoring') diff --git a/roles/monitoring/prometheus/exporter/node/defaults/main.yml b/roles/monitoring/prometheus/exporter/node/defaults/main.yml index 870753c3..9309562f 100644 --- a/roles/monitoring/prometheus/exporter/node/defaults/main.yml +++ b/roles/monitoring/prometheus/exporter/node/defaults/main.yml @@ -14,4 +14,3 @@ prometheus_exporter_node_extra_collectors: prometheus_exporter_node_textfile_collector_scripts: - deleted-libraries - - smartmon diff --git a/roles/monitoring/prometheus/exporter/node/tasks/textfile_collector_script.yml b/roles/monitoring/prometheus/exporter/node/tasks/textfile_collector_script.yml index 5c068fe7..80390a15 100644 --- a/roles/monitoring/prometheus/exporter/node/tasks/textfile_collector_script.yml +++ b/roles/monitoring/prometheus/exporter/node/tasks/textfile_collector_script.yml @@ -19,3 +19,6 @@ name: "prometheus-node-exporter_{{ textfile_collector_name }}.timer" state: started enabled: yes + + +## TODO: install deps for textfile collectors: i.e. smartmontools for collector smartmon diff --git a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/smartmon.timer.j2 b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/smartmon.timer.j2 index 438da6b0..576f5a9f 100644 --- a/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/smartmon.timer.j2 +++ b/roles/monitoring/prometheus/exporter/node/templates/textfile-collector-scripts/smartmon.timer.j2 @@ -1,9 +1,5 @@ [Unit] Description=Promethues node exporter textfile collector smartmon -ConditionPathExists=/usr/sbin/smartctl -ConditionPathExistsGlob=|/dev/sd* -ConditionPathExistsGlob=|/dev/hd* -ConditionPathExistsGlob=|/dev/nvme* [Timer] OnBootSec=30s -- cgit v1.2.3 From 063bdb70a8e8353908ca9742e05be8fac65a61bf Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Sat, 25 Sep 2021 23:36:40 +0200 Subject: move away from exporter-exporter in favor for nginx --- chaos-at-home/ch-testvm-prometheus.yml | 1 + inventory/host_vars/ch-testvm-prometheus.yml | 3 ++ .../prometheus/exporter/base/defaults/main.yml | 2 +- .../prometheus/exporter/base/handlers/main.yml | 6 ++-- .../prometheus/exporter/base/tasks/main.yml | 30 +++++--------------- .../exporter/base/templates/nginx-vhost.j2 | 19 +++++++++++++ .../prometheus/exporter/base/templates/service.j2 | 32 ---------------------- .../prometheus/exporter/blackbox/handlers/main.yml | 7 ++--- .../prometheus/exporter/blackbox/tasks/main.yml | 11 ++++---- .../prometheus/exporter/ipmi/handlers/main.yml | 7 ++--- .../prometheus/exporter/ipmi/tasks/main.yml | 21 ++++++-------- roles/monitoring/prometheus/exporter/meta/main.yml | 10 ++++--- .../prometheus/exporter/mikrotik/handlers/main.yml | 7 ++--- .../prometheus/exporter/mikrotik/tasks/main.yml | 10 +++---- .../prometheus/exporter/node/handlers/main.yml | 7 ++--- .../prometheus/exporter/node/tasks/main.yml | 10 +++---- .../prometheus/exporter/nut/handlers/main.yml | 7 ++--- .../prometheus/exporter/nut/tasks/main.yml | 14 ++++++---- .../server/templates/jobs/blackbox/https.j2 | 3 +- .../server/templates/jobs/blackbox/ping.j2 | 3 +- .../server/templates/jobs/blackbox/ssh.j2 | 3 +- .../prometheus/server/templates/jobs/generic.j2 | 5 +--- .../prometheus/server/templates/jobs/node.j2 | 5 +--- .../prometheus/server/templates/jobs/nut/ups.j2 | 5 +--- 24 files changed, 92 insertions(+), 136 deletions(-) create mode 100644 roles/monitoring/prometheus/exporter/base/templates/nginx-vhost.j2 delete mode 100644 roles/monitoring/prometheus/exporter/base/templates/service.j2 (limited to 'roles/monitoring') diff --git a/chaos-at-home/ch-testvm-prometheus.yml b/chaos-at-home/ch-testvm-prometheus.yml index 3fd99d41..c0f33b8f 100644 --- a/chaos-at-home/ch-testvm-prometheus.yml +++ b/chaos-at-home/ch-testvm-prometheus.yml @@ -7,6 +7,7 @@ - role: core/sshd/base - role: core/zsh - role: core/ntp + - role: nginx/base - role: apt-repo/spreadspace - role: monitoring/prometheus/exporter # - role: kubernetes/base diff --git a/inventory/host_vars/ch-testvm-prometheus.yml b/inventory/host_vars/ch-testvm-prometheus.yml index e539735f..939fa398 100644 --- a/inventory/host_vars/ch-testvm-prometheus.yml +++ b/inventory/host_vars/ch-testvm-prometheus.yml @@ -36,6 +36,9 @@ network: spreadspace_apt_repo_components: - prometheus +prometheus_exporters_extra: + - ipmi + containerd_storage: type: lvm diff --git a/roles/monitoring/prometheus/exporter/base/defaults/main.yml b/roles/monitoring/prometheus/exporter/base/defaults/main.yml index 963763a5..613943d8 100644 --- a/roles/monitoring/prometheus/exporter/base/defaults/main.yml +++ b/roles/monitoring/prometheus/exporter/base/defaults/main.yml @@ -1,2 +1,2 @@ --- -prometheus_exporter_listen: ":9999" +prometheus_exporter_listen: "9999" diff --git a/roles/monitoring/prometheus/exporter/base/handlers/main.yml b/roles/monitoring/prometheus/exporter/base/handlers/main.yml index ebd760cf..d4e42ca0 100644 --- a/roles/monitoring/prometheus/exporter/base/handlers/main.yml +++ b/roles/monitoring/prometheus/exporter/base/handlers/main.yml @@ -1,5 +1,5 @@ --- -- name: restart prometheus-exporter-exporter +- name: reload nginx service: - name: prometheus-exporter-exporter - state: restarted + name: nginx + state: reloaded diff --git a/roles/monitoring/prometheus/exporter/base/tasks/main.yml b/roles/monitoring/prometheus/exporter/base/tasks/main.yml index eeb2a23d..5f42867d 100644 --- a/roles/monitoring/prometheus/exporter/base/tasks/main.yml +++ b/roles/monitoring/prometheus/exporter/base/tasks/main.yml @@ -6,17 +6,6 @@ - spreadspace_apt_repo_components is defined - "'prometheus' in spreadspace_apt_repo_components" - ## TODO: pin version -- name: install apt packages - apt: - name: prom-exporter-exporter - state: present - -- name: create configuration directories - file: - path: /etc/prometheus/exporter/exporter - state: directory - - name: add user for prometheus-exporter user: name: prometheus-exporter @@ -27,15 +16,10 @@ - name: create TLS certificate and key import_tasks: tls.yml -- name: generate systemd service unit - template: - src: service.j2 - dest: /etc/systemd/system/prometheus-exporter-exporter.service - notify: restart prometheus-exporter-exporter - -- name: make sure prometheus-exporter-exporter is enabled and started - systemd: - name: prometheus-exporter-exporter.service - daemon_reload: yes - state: started - enabled: yes +- name: configure nginx vhost + import_role: + name: nginx/vhost + vars: + nginx_vhost: + name: prometheus-exporter + content: "{{ lookup('template', 'nginx-vhost.j2') }}" diff --git a/roles/monitoring/prometheus/exporter/base/templates/nginx-vhost.j2 b/roles/monitoring/prometheus/exporter/base/templates/nginx-vhost.j2 new file mode 100644 index 00000000..70e65b29 --- /dev/null +++ b/roles/monitoring/prometheus/exporter/base/templates/nginx-vhost.j2 @@ -0,0 +1,19 @@ +server { + listen {{ prometheus_exporter_listen }} ssl; + server_name _; + + ssl_certificate /etc/ssl/prometheus/exporter/crt.pem; + ssl_certificate_key /etc/ssl/prometheus/exporter/key.pem; + ssl_client_certificate /etc/ssl/prometheus/ca-crt.pem; + ssl_verify_client on; + + root /nonexistent; + + location = / { + return 404 'please specify the exporter you want to reach!'; + } + + include snippets/proxy-nobuff.conf; + + include /etc/prometheus/exporter/*.locations; +} diff --git a/roles/monitoring/prometheus/exporter/base/templates/service.j2 b/roles/monitoring/prometheus/exporter/base/templates/service.j2 deleted file mode 100644 index 3d44744a..00000000 --- a/roles/monitoring/prometheus/exporter/base/templates/service.j2 +++ /dev/null @@ -1,32 +0,0 @@ -[Unit] -Description=Prometheus exporter proxy - -[Service] -Restart=always -User=prometheus-exporter -ExecStart=/usr/bin/prometheus-exporter-exporter -config.dirs=/etc/prometheus/exporter/exporter -config.file="" -web.listen-address="" -web.tls.listen-address="{{ prometheus_exporter_listen }}" -web.tls.cert="/etc/ssl/prometheus/exporter/crt.pem" -web.tls.key="/etc/ssl/prometheus/exporter/key.pem" --web.tls.ca="/etc/ssl/prometheus/ca-crt.pem" -web.tls.verify -{# TODO: implement reloading once the exporter_exporter supports this #} - -# systemd hardening-options -AmbientCapabilities= -CapabilityBoundingSet= -DeviceAllow=/dev/null rw -DevicePolicy=strict -LockPersonality=true -MemoryDenyWriteExecute=true -NoNewPrivileges=true -PrivateDevices=true -PrivateTmp=true -PrivateUsers=true -ProtectControlGroups=true -ProtectHome=true -ProtectKernelModules=true -ProtectKernelTunables=true -ProtectSystem=strict -RemoveIPC=true -RestrictNamespaces=true -RestrictRealtime=true -SystemCallArchitectures=native - -[Install] -WantedBy=multi-user.target diff --git a/roles/monitoring/prometheus/exporter/blackbox/handlers/main.yml b/roles/monitoring/prometheus/exporter/blackbox/handlers/main.yml index 99a416e2..12250769 100644 --- a/roles/monitoring/prometheus/exporter/blackbox/handlers/main.yml +++ b/roles/monitoring/prometheus/exporter/blackbox/handlers/main.yml @@ -9,8 +9,7 @@ name: prometheus-blackbox-exporter state: reloaded -- name: reload prometheus-exporter-exporter +- name: reload nginx service: - name: prometheus-exporter-exporter - ## TODO: implement reload once exporter_exporter supports this... - state: restarted + name: nginx + state: reloaded diff --git a/roles/monitoring/prometheus/exporter/blackbox/tasks/main.yml b/roles/monitoring/prometheus/exporter/blackbox/tasks/main.yml index 782c3561..f9793df6 100644 --- a/roles/monitoring/prometheus/exporter/blackbox/tasks/main.yml +++ b/roles/monitoring/prometheus/exporter/blackbox/tasks/main.yml @@ -32,9 +32,8 @@ - name: register exporter copy: content: | - method: http - http: - port: 9115 - path: /probe - dest: /etc/prometheus/exporter/exporter/blackbox.yml - notify: reload prometheus-exporter-exporter + location = /blackbox { + proxy_pass http://127.0.0.1:9115/probe; + } + dest: /etc/prometheus/exporter/blackbox.locations + notify: reload nginx diff --git a/roles/monitoring/prometheus/exporter/ipmi/handlers/main.yml b/roles/monitoring/prometheus/exporter/ipmi/handlers/main.yml index 40a945ae..a8eb55b3 100644 --- a/roles/monitoring/prometheus/exporter/ipmi/handlers/main.yml +++ b/roles/monitoring/prometheus/exporter/ipmi/handlers/main.yml @@ -9,8 +9,7 @@ name: prometheus-ipmi-exporter state: reloaded -- name: reload prometheus-exporter-exporter +- name: reload nginx service: - name: prometheus-exporter-exporter - ## TODO: implement reload once exporter_exporter supports this... - state: restarted + name: nginx + state: reloaded diff --git a/roles/monitoring/prometheus/exporter/ipmi/tasks/main.yml b/roles/monitoring/prometheus/exporter/ipmi/tasks/main.yml index 9e63f692..91318f16 100644 --- a/roles/monitoring/prometheus/exporter/ipmi/tasks/main.yml +++ b/roles/monitoring/prometheus/exporter/ipmi/tasks/main.yml @@ -30,18 +30,13 @@ enabled: yes - name: register exporter - loop: - - name: local - path: /metrics - - name: remote - path: /ipmi - loop_control: - label: "{{ item.name }}" copy: content: | - method: http - http: - port: 9290 - path: {{ item.path }} - dest: "/etc/prometheus/exporter/exporter/ipmi-{{ item.name }}.yml" - notify: reload prometheus-exporter-exporter + location = /ipmi { + proxy_pass http://127.0.0.1:9290/metrics; + } + location = /ipmi/remote { + proxy_pass http://127.0.0.1:9290/ipmi; + } + dest: /etc/prometheus/exporter/ipmi.locations + notify: reload nginx diff --git a/roles/monitoring/prometheus/exporter/meta/main.yml b/roles/monitoring/prometheus/exporter/meta/main.yml index 22131422..68fce6cb 100644 --- a/roles/monitoring/prometheus/exporter/meta/main.yml +++ b/roles/monitoring/prometheus/exporter/meta/main.yml @@ -1,11 +1,13 @@ --- dependencies: - role: monitoring/prometheus/exporter/base - - role: monitoring/prometheus/exporter/node - when: "'node' in (prometheus_exporters_default | union(prometheus_exporters_extra))" - role: monitoring/prometheus/exporter/blackbox when: "'blackbox' in (prometheus_exporters_default | union(prometheus_exporters_extra))" - - role: monitoring/prometheus/exporter/nut - when: "'nut' in (prometheus_exporters_default | union(prometheus_exporters_extra))" + - role: monitoring/prometheus/exporter/ipmi + when: "'ipmi' in (prometheus_exporters_default | union(prometheus_exporters_extra))" - role: monitoring/prometheus/exporter/mikrotik when: "'mikrotik' in (prometheus_exporters_default | union(prometheus_exporters_extra))" + - role: monitoring/prometheus/exporter/node + when: "'node' in (prometheus_exporters_default | union(prometheus_exporters_extra))" + - role: monitoring/prometheus/exporter/nut + when: "'nut' in (prometheus_exporters_default | union(prometheus_exporters_extra))" diff --git a/roles/monitoring/prometheus/exporter/mikrotik/handlers/main.yml b/roles/monitoring/prometheus/exporter/mikrotik/handlers/main.yml index cb85d0d9..c5844220 100644 --- a/roles/monitoring/prometheus/exporter/mikrotik/handlers/main.yml +++ b/roles/monitoring/prometheus/exporter/mikrotik/handlers/main.yml @@ -4,8 +4,7 @@ name: prometheus-mikrotik-exporter state: restarted -- name: reload prometheus-exporter-exporter +- name: reload nginx service: - name: prometheus-exporter-exporter - ## TODO: implement reload once exporter_exporter supports this... - state: restarted + name: nginx + state: reloaded diff --git a/roles/monitoring/prometheus/exporter/mikrotik/tasks/main.yml b/roles/monitoring/prometheus/exporter/mikrotik/tasks/main.yml index 07219c68..72c78e4a 100644 --- a/roles/monitoring/prometheus/exporter/mikrotik/tasks/main.yml +++ b/roles/monitoring/prometheus/exporter/mikrotik/tasks/main.yml @@ -35,8 +35,8 @@ - name: register exporter copy: content: | - method: http - http: - port: 9436 - dest: /etc/prometheus/exporter/exporter/mikrotik.yml - notify: reload prometheus-exporter-exporter + location = /mikrotik { + proxy_pass http://127.0.0.1:9436/metrics; + } + dest: /etc/prometheus/exporter/mikrotik.locations + notify: reload nginx diff --git a/roles/monitoring/prometheus/exporter/node/handlers/main.yml b/roles/monitoring/prometheus/exporter/node/handlers/main.yml index 3e1b2000..56056ea6 100644 --- a/roles/monitoring/prometheus/exporter/node/handlers/main.yml +++ b/roles/monitoring/prometheus/exporter/node/handlers/main.yml @@ -4,8 +4,7 @@ name: prometheus-node-exporter state: restarted -- name: reload prometheus-exporter-exporter +- name: reload nginx service: - name: prometheus-exporter-exporter - ## TODO: implement reload once exporter_exporter supports this... - state: restarted + name: nginx + state: reloaded diff --git a/roles/monitoring/prometheus/exporter/node/tasks/main.yml b/roles/monitoring/prometheus/exporter/node/tasks/main.yml index 56903a33..2811c759 100644 --- a/roles/monitoring/prometheus/exporter/node/tasks/main.yml +++ b/roles/monitoring/prometheus/exporter/node/tasks/main.yml @@ -28,11 +28,11 @@ - name: register exporter copy: content: | - method: http - http: - port: 9100 - dest: /etc/prometheus/exporter/exporter/node.yml - notify: reload prometheus-exporter-exporter + location = /node { + proxy_pass http://127.0.0.1:9100/metrics; + } + dest: /etc/prometheus/exporter/node.locations + notify: reload nginx - name: create directory for textfile collector scripts file: diff --git a/roles/monitoring/prometheus/exporter/nut/handlers/main.yml b/roles/monitoring/prometheus/exporter/nut/handlers/main.yml index 6e10f43b..edd87ed5 100644 --- a/roles/monitoring/prometheus/exporter/nut/handlers/main.yml +++ b/roles/monitoring/prometheus/exporter/nut/handlers/main.yml @@ -4,8 +4,7 @@ name: prometheus-nut-exporter state: restarted -- name: reload prometheus-exporter-exporter +- name: reload ngnix service: - name: prometheus-exporter-exporter - ## TODO: implement reload once exporter_exporter supports this... - state: restarted + name: nginx + state: reloaded diff --git a/roles/monitoring/prometheus/exporter/nut/tasks/main.yml b/roles/monitoring/prometheus/exporter/nut/tasks/main.yml index 8245feae..f602472d 100644 --- a/roles/monitoring/prometheus/exporter/nut/tasks/main.yml +++ b/roles/monitoring/prometheus/exporter/nut/tasks/main.yml @@ -21,9 +21,11 @@ - name: register exporter copy: content: | - method: http - http: - port: 9199 - path: /ups_metrics - dest: /etc/prometheus/exporter/exporter/nut.yml - notify: reload prometheus-exporter-exporter + location = /nut { + proxy_pass http://127.0.0.1:9199/metrics; + } + location = /nut/ups { + proxy_pass http://127.0.0.1:9199/ups_metrics; + } + dest: /etc/prometheus/exporter/nut.locations + notify: reload nginx diff --git a/roles/monitoring/prometheus/server/templates/jobs/blackbox/https.j2 b/roles/monitoring/prometheus/server/templates/jobs/blackbox/https.j2 index 98a64121..86ff88dd 100644 --- a/roles/monitoring/prometheus/server/templates/jobs/blackbox/https.j2 +++ b/roles/monitoring/prometheus/server/templates/jobs/blackbox/https.j2 @@ -1,8 +1,7 @@ - job_name: '{{ job }}' - metrics_path: /proxy + metrics_path: /blackbox params: module: - - blackbox - http_tls_2xx scheme: https tls_config: diff --git a/roles/monitoring/prometheus/server/templates/jobs/blackbox/ping.j2 b/roles/monitoring/prometheus/server/templates/jobs/blackbox/ping.j2 index 736ffec1..2d3889d2 100644 --- a/roles/monitoring/prometheus/server/templates/jobs/blackbox/ping.j2 +++ b/roles/monitoring/prometheus/server/templates/jobs/blackbox/ping.j2 @@ -1,8 +1,7 @@ - job_name: '{{ job }}' - metrics_path: /proxy + metrics_path: /blackbox params: module: - - blackbox - icmp scheme: https tls_config: diff --git a/roles/monitoring/prometheus/server/templates/jobs/blackbox/ssh.j2 b/roles/monitoring/prometheus/server/templates/jobs/blackbox/ssh.j2 index 166f37ad..97565673 100644 --- a/roles/monitoring/prometheus/server/templates/jobs/blackbox/ssh.j2 +++ b/roles/monitoring/prometheus/server/templates/jobs/blackbox/ssh.j2 @@ -1,8 +1,7 @@ - job_name: '{{ job }}' - metrics_path: /proxy + metrics_path: /blackbox params: module: - - blackbox - ssh_banner scheme: https tls_config: diff --git a/roles/monitoring/prometheus/server/templates/jobs/generic.j2 b/roles/monitoring/prometheus/server/templates/jobs/generic.j2 index b155c5f7..65a95007 100644 --- a/roles/monitoring/prometheus/server/templates/jobs/generic.j2 +++ b/roles/monitoring/prometheus/server/templates/jobs/generic.j2 @@ -1,8 +1,5 @@ - job_name: '{{ job }}' - metrics_path: /proxy - params: - module: - - {{ job }} + metrics_path: /{{ job }} scheme: https tls_config: ca_file: /etc/ssl/prometheus/ca-crt.pem diff --git a/roles/monitoring/prometheus/server/templates/jobs/node.j2 b/roles/monitoring/prometheus/server/templates/jobs/node.j2 index ba9eab31..1b14e1f6 100644 --- a/roles/monitoring/prometheus/server/templates/jobs/node.j2 +++ b/roles/monitoring/prometheus/server/templates/jobs/node.j2 @@ -1,8 +1,5 @@ - job_name: '{{ job }}' - metrics_path: /proxy - params: - module: - - {{ job }} + metrics_path: /{{ job }} scheme: https tls_config: ca_file: /etc/ssl/prometheus/ca-crt.pem diff --git a/roles/monitoring/prometheus/server/templates/jobs/nut/ups.j2 b/roles/monitoring/prometheus/server/templates/jobs/nut/ups.j2 index 3a2c5c62..0cf4ae4e 100644 --- a/roles/monitoring/prometheus/server/templates/jobs/nut/ups.j2 +++ b/roles/monitoring/prometheus/server/templates/jobs/nut/ups.j2 @@ -1,8 +1,5 @@ - job_name: '{{ job }}' - metrics_path: /proxy - params: - module: - - nut + metrics_path: /nut/ups scheme: https tls_config: ca_file: /etc/ssl/prometheus/ca-crt.pem -- cgit v1.2.3 From cc89d6d4211aa5aec8e5bef8c854d4929c337887 Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Sun, 26 Sep 2021 03:32:47 +0200 Subject: improved promethues multitarget support --- inventory/group_vars/chaos-at-home-ups/vars.yml | 6 +- inventory/group_vars/ele-ups/vars.yml | 10 +-- .../group_vars/promzone-chaos-at-home/vars.yml | 6 +- .../group_vars/promzone-elevate-festival/vars.yml | 1 + inventory/host_vars/ch-mon.yml | 39 ++++++------ .../prometheus/exporter/blackbox/tasks/main.yml | 3 + .../prometheus/server/defaults/main/main.yml | 4 +- .../server/defaults/main/rules_blackbox.yml | 46 +------------- .../server/defaults/main/rules_blackbox__https.yml | 20 ------ .../server/defaults/main/rules_blackbox__ping.yml | 11 ---- .../server/defaults/main/rules_blackbox__probe.yml | 74 ++++++++++++++++++++++ .../server/defaults/main/rules_blackbox__ssh.yml | 3 - .../prometheus/server/filter_plugins/prometheus.py | 10 +-- roles/monitoring/prometheus/server/tasks/main.yml | 2 +- .../server/templates/jobs/blackbox/https.j2 | 13 ---- .../server/templates/jobs/blackbox/ping.j2 | 13 ---- .../server/templates/jobs/blackbox/ssh.j2 | 13 ---- .../prometheus/server/templates/jobs/nut/ups.j2 | 10 --- .../server/templates/targets/blackbox/https.yml.j2 | 4 -- .../server/templates/targets/blackbox/ping.yml.j2 | 4 -- .../server/templates/targets/blackbox/probe.yml.j2 | 5 ++ .../server/templates/targets/blackbox/ssh.yml.j2 | 4 -- .../server/templates/targets/nut/ups.yml.j2 | 2 +- 23 files changed, 123 insertions(+), 180 deletions(-) delete mode 100644 roles/monitoring/prometheus/server/defaults/main/rules_blackbox__https.yml delete mode 100644 roles/monitoring/prometheus/server/defaults/main/rules_blackbox__ping.yml create mode 100644 roles/monitoring/prometheus/server/defaults/main/rules_blackbox__probe.yml delete mode 100644 roles/monitoring/prometheus/server/defaults/main/rules_blackbox__ssh.yml delete mode 100644 roles/monitoring/prometheus/server/templates/jobs/blackbox/https.j2 delete mode 100644 roles/monitoring/prometheus/server/templates/jobs/blackbox/ping.j2 delete mode 100644 roles/monitoring/prometheus/server/templates/jobs/blackbox/ssh.j2 delete mode 100644 roles/monitoring/prometheus/server/templates/jobs/nut/ups.j2 delete mode 100644 roles/monitoring/prometheus/server/templates/targets/blackbox/https.yml.j2 delete mode 100644 roles/monitoring/prometheus/server/templates/targets/blackbox/ping.yml.j2 create mode 100644 roles/monitoring/prometheus/server/templates/targets/blackbox/probe.yml.j2 delete mode 100644 roles/monitoring/prometheus/server/templates/targets/blackbox/ssh.yml.j2 (limited to 'roles/monitoring') diff --git a/inventory/group_vars/chaos-at-home-ups/vars.yml b/inventory/group_vars/chaos-at-home-ups/vars.yml index f8c1bdf1..7b60e893 100644 --- a/inventory/group_vars/chaos-at-home-ups/vars.yml +++ b/inventory/group_vars/chaos-at-home-ups/vars.yml @@ -11,8 +11,8 @@ prometheus_scrape_endpoint: "{{ network_mgmt_zone.prefix | ipaddr(network_mgmt_z prometheus_exporters_default: - openwrt -prometheus_job_nut__ups: - - exporter_hostname: ch-mon - instance: "ups-{{ ups_name }}" +prometheus_job_multitarget_nut__ups: + ch-mon: + - instance: "ups-{{ ups_name }}" ups: "{{ ups_name }}" server: "{{ network_mgmt_zone.prefix | ipaddr(network_mgmt_zone.offsets[inventory_hostname]) | ipaddr('address') }}" diff --git a/inventory/group_vars/ele-ups/vars.yml b/inventory/group_vars/ele-ups/vars.yml index 1c4613a3..28a5eaff 100644 --- a/inventory/group_vars/ele-ups/vars.yml +++ b/inventory/group_vars/ele-ups/vars.yml @@ -14,8 +14,8 @@ prometheus_scrape_endpoint: "{{ network_mgmt_zone.prefix | ipaddr(network_mgmt_z prometheus_exporters_default: - openwrt -prometheus_job_nut__ups: - exporter_hostname: ele-mon - instance: "ups-{{ ups_name }}" - ups: "{{ ups_name }}" - server: "{{ network_mgmt_zone.prefix | ipaddr(network_mgmt_zone.offsets[inventory_hostname]) | ipaddr('address') }}" +prometheus_job_multitarget_nut__ups: + ele-mon: + - instance: "ups-{{ ups_name }}" + ups: "{{ ups_name }}" + server: "{{ network_mgmt_zone.prefix | ipaddr(network_mgmt_zone.offsets[inventory_hostname]) | ipaddr('address') }}" diff --git a/inventory/group_vars/promzone-chaos-at-home/vars.yml b/inventory/group_vars/promzone-chaos-at-home/vars.yml index 84ed1263..529bf3e7 100644 --- a/inventory/group_vars/promzone-chaos-at-home/vars.yml +++ b/inventory/group_vars/promzone-chaos-at-home/vars.yml @@ -9,10 +9,10 @@ prometheus_server: ch-mon prometheus_server_jobs: - node - openwrt + - nut - nut/ups - - blackbox/ping - - blackbox/https - - blackbox/ssh + - blackbox + - blackbox/probe prometheus_zone_name: chaos@home prometheus_zone_targets: "{{ groups['promzone-chaos-at-home'] }}" diff --git a/inventory/group_vars/promzone-elevate-festival/vars.yml b/inventory/group_vars/promzone-elevate-festival/vars.yml index 43115dc4..b3321614 100644 --- a/inventory/group_vars/promzone-elevate-festival/vars.yml +++ b/inventory/group_vars/promzone-elevate-festival/vars.yml @@ -9,6 +9,7 @@ prometheus_server: ele-mon prometheus_server_jobs: - node - openwrt + - nut - nut/ups prometheus_zone_name: Elevate Festival diff --git a/inventory/host_vars/ch-mon.yml b/inventory/host_vars/ch-mon.yml index d1a710b9..b2402d0c 100644 --- a/inventory/host_vars/ch-mon.yml +++ b/inventory/host_vars/ch-mon.yml @@ -76,26 +76,25 @@ prometheus_exporter_blackbox_modules_extra: icmp: prober: icmp -prometheus_job_blackbox__ping: - - exporter_hostname: ch-mon - instance: "ping-magentagw" - address: 62.99.185.129 - - exporter_hostname: ch-mon - instance: "ping-quad9" - address: 9.9.9.9 - -prometheus_job_blackbox__https: - - exporter_hostname: ch-mon - instance: "https-pan.chaos-at-home.org" - address: "https://pan.chaos-at-home.org" - - exporter_hostname: ch-mon - instance: "https-mimas.chaos-at-home.org" - address: "https://mimas.chaos-at-home.org" - -prometheus_job_blackbox__ssh: - - exporter_hostname: ch-mon - instance: "ssh-{{ inventory_hostname }}" - address: "{{ network_zones.svc.prefix | ipaddr(network_zones.svc.offsets[inventory_hostname]) | ipaddr('address') }}:{{ ansible_port | default(22) }}" +prometheus_job_multitarget_blackbox__probe: + ch-mon: + - instance: "ping-magentagw" + target: 62.99.185.129 + module: icmp + - instance: "ping-quad9" + target: 9.9.9.9 + module: icmp + + - instance: "https-pan.chaos-at-home.org" + target: "https://pan.chaos-at-home.org" + module: http_tls_2xx + - instance: "https-mimas.chaos-at-home.org" + target: "https://mimas.chaos-at-home.org" + module: http_tls_2xx + + - instance: "ssh-{{ inventory_hostname }}" + target: "{{ network_zones.svc.prefix | ipaddr(network_zones.svc.offsets[inventory_hostname]) | ipaddr('address') }}:{{ ansible_port | default(22) }}" + module: ssh_banner promethues_alertmanager_smtp: diff --git a/roles/monitoring/prometheus/exporter/blackbox/tasks/main.yml b/roles/monitoring/prometheus/exporter/blackbox/tasks/main.yml index f9793df6..c4cabfce 100644 --- a/roles/monitoring/prometheus/exporter/blackbox/tasks/main.yml +++ b/roles/monitoring/prometheus/exporter/blackbox/tasks/main.yml @@ -33,6 +33,9 @@ copy: content: | location = /blackbox { + proxy_pass http://127.0.0.1:9115/metrics; + } + location = /blackbox/probe { proxy_pass http://127.0.0.1:9115/probe; } dest: /etc/prometheus/exporter/blackbox.locations diff --git a/roles/monitoring/prometheus/server/defaults/main/main.yml b/roles/monitoring/prometheus/server/defaults/main/main.yml index 09cd150c..7781fd69 100644 --- a/roles/monitoring/prometheus/server/defaults/main/main.yml +++ b/roles/monitoring/prometheus/server/defaults/main/main.yml @@ -17,9 +17,7 @@ prometheus_server_rules: nut: "{{ prometheus_server_rules_nut + prometheus_server_rules_nut_extra }}" nut/ups: "{{ prometheus_server_rules_nut__ups + prometheus_server_rules_nut__ups_extra }}" blackbox: "{{ prometheus_server_rules_blackbox + prometheus_server_rules_blackbox_extra }}" - blackbox/ping: "{{ prometheus_server_rules_blackbox__ping + prometheus_server_rules_blackbox__ping_extra }}" - blackbox/https: "{{ prometheus_server_rules_blackbox__https + prometheus_server_rules_blackbox__https_extra }}" - blackbox/ssh: "{{ prometheus_server_rules_blackbox__ssh + prometheus_server_rules_blackbox__ssh_extra }}" + blackbox/probe: "{{ prometheus_server_rules_blackbox__probe + prometheus_server_rules_blackbox__probe_extra }}" # prometheus_server_alertmanager: # url: "127.0.0.1:9093" diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_blackbox.yml b/roles/monitoring/prometheus/server/defaults/main/rules_blackbox.yml index d5c1fd42..99f2e83c 100644 --- a/roles/monitoring/prometheus/server/defaults/main/rules_blackbox.yml +++ b/roles/monitoring/prometheus/server/defaults/main/rules_blackbox.yml @@ -1,47 +1,3 @@ --- prometheus_server_rules_blackbox_extra: [] -prometheus_server_rules_blackbox: - - alert: BlackboxProbeFailed - expr: probe_success == 0 - for: 0m - labels: - severity: critical - annotations: - summary: Blackbox probe failed (instance {{ '{{' }} $labels.instance {{ '}}' }}) - description: "Probe failed\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" - - - alert: BlackboxSlowProbe - expr: avg_over_time(probe_duration_seconds[1m]) > 1 - for: 1m - labels: - severity: warning - annotations: - summary: Blackbox slow probe (instance {{ '{{' }} $labels.instance {{ '}}' }}) - description: "Blackbox probe took more than 1s to complete\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" - - - alert: BlackboxSslCertificateWillExpireSoon - expr: probe_ssl_earliest_cert_expiry - time() < 86400 * 30 - for: 0m - labels: - severity: warning - annotations: - summary: Blackbox SSL certificate will expire soon (instance {{ '{{' }} $labels.instance {{ '}}' }}) - description: "SSL certificate expires in 30 days\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" - - - alert: BlackboxSslCertificateWillExpireSoon - expr: probe_ssl_earliest_cert_expiry - time() < 86400 * 3 - for: 0m - labels: - severity: critical - annotations: - summary: Blackbox SSL certificate will expire soon (instance {{ '{{' }} $labels.instance {{ '}}' }}) - description: "SSL certificate expires in 3 days\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" - - - alert: BlackboxSslCertificateExpired - expr: probe_ssl_earliest_cert_expiry - time() <= 0 - for: 0m - labels: - severity: critical - annotations: - summary: Blackbox SSL certificate expired (instance {{ '{{' }} $labels.instance {{ '}}' }}) - description: "SSL certificate has expired already\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" +prometheus_server_rules_blackbox: [] diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__https.yml b/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__https.yml deleted file mode 100644 index 140e3b4f..00000000 --- a/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__https.yml +++ /dev/null @@ -1,20 +0,0 @@ ---- -prometheus_server_rules_blackbox__https_extra: [] -prometheus_server_rules_blackbox__https: - - alert: BlackboxProbeHttpFailure - expr: probe_http_status_code <= 199 OR probe_http_status_code >= 400 - for: 0m - labels: - severity: critical - annotations: - summary: Blackbox probe HTTP failure (instance {{ '{{' }} $labels.instance {{ '}}' }}) - description: "HTTP status code is not 200-399\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" - - - alert: BlackboxProbeSlowHttp - expr: avg_over_time(probe_http_duration_seconds[1m]) > 1 - for: 1m - labels: - severity: warning - annotations: - summary: Blackbox probe slow HTTP (instance {{ '{{' }} $labels.instance {{ '}}' }}) - description: "HTTP request took more than 1s\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__ping.yml b/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__ping.yml deleted file mode 100644 index cc87b6b1..00000000 --- a/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__ping.yml +++ /dev/null @@ -1,11 +0,0 @@ ---- -prometheus_server_rules_blackbox__ping_extra: [] -prometheus_server_rules_blackbox__ping: - - alert: BlackboxProbeSlowPing - expr: avg_over_time(probe_icmp_duration_seconds[1m]) > 1 - for: 1m - labels: - severity: warning - annotations: - summary: Blackbox probe slow ping (instance {{ '{{' }} $labels.instance {{ '}}' }}) - description: "Blackbox ping took more than 1s\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__probe.yml b/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__probe.yml new file mode 100644 index 00000000..9f9d2292 --- /dev/null +++ b/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__probe.yml @@ -0,0 +1,74 @@ +--- +prometheus_server_rules_blackbox__probe_extra: [] +prometheus_server_rules_blackbox__probe: + - alert: BlackboxProbeFailed + expr: probe_success == 0 + for: 0m + labels: + severity: critical + annotations: + summary: Blackbox probe failed (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "Probe failed\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + + - alert: BlackboxSlowProbe + expr: avg_over_time(probe_duration_seconds[1m]) > 1 + for: 1m + labels: + severity: warning + annotations: + summary: Blackbox slow probe (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "Blackbox probe took more than 1s to complete\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + + - alert: BlackboxSslCertificateWillExpireSoon + expr: probe_ssl_earliest_cert_expiry - time() < 86400 * 30 + for: 0m + labels: + severity: warning + annotations: + summary: Blackbox SSL certificate will expire soon (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "SSL certificate expires in 30 days\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + + - alert: BlackboxSslCertificateWillExpireSoon + expr: probe_ssl_earliest_cert_expiry - time() < 86400 * 3 + for: 0m + labels: + severity: critical + annotations: + summary: Blackbox SSL certificate will expire soon (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "SSL certificate expires in 3 days\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + + - alert: BlackboxSslCertificateExpired + expr: probe_ssl_earliest_cert_expiry - time() <= 0 + for: 0m + labels: + severity: critical + annotations: + summary: Blackbox SSL certificate expired (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "SSL certificate has expired already\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + + - alert: BlackboxProbeHttpFailure + expr: probe_http_status_code <= 199 OR probe_http_status_code >= 400 + for: 0m + labels: + severity: critical + annotations: + summary: Blackbox probe HTTP failure (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "HTTP status code is not 200-399\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + + - alert: BlackboxProbeSlowHttp + expr: avg_over_time(probe_http_duration_seconds[1m]) > 1 + for: 1m + labels: + severity: warning + annotations: + summary: Blackbox probe slow HTTP (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "HTTP request took more than 1s\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + + - alert: BlackboxProbeSlowPing + expr: avg_over_time(probe_icmp_duration_seconds[1m]) > 1 + for: 1m + labels: + severity: warning + annotations: + summary: Blackbox probe slow ping (instance {{ '{{' }} $labels.instance {{ '}}' }}) + description: "Blackbox ping took more than 1s\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__ssh.yml b/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__ssh.yml deleted file mode 100644 index 8e717c41..00000000 --- a/roles/monitoring/prometheus/server/defaults/main/rules_blackbox__ssh.yml +++ /dev/null @@ -1,3 +0,0 @@ ---- -prometheus_server_rules_blackbox__ssh_extra: [] -prometheus_server_rules_blackbox__ssh: [] diff --git a/roles/monitoring/prometheus/server/filter_plugins/prometheus.py b/roles/monitoring/prometheus/server/filter_plugins/prometheus.py index 1443e837..d91ef619 100644 --- a/roles/monitoring/prometheus/server/filter_plugins/prometheus.py +++ b/roles/monitoring/prometheus/server/filter_plugins/prometheus.py @@ -11,10 +11,12 @@ def prometheus_job_targets(hostvars, jobs, targets): result = [] for job in jobs: for target in targets: - special_config_varname = 'prometheus_job_' + job.replace('-', '_').replace('/', '__') - if special_config_varname in hostvars[target]: - for config in hostvars[target][special_config_varname]: - result.append({'job': job, 'instance': config['instance'], 'config': config, 'enabled': True}) + multitarget_config_varname = 'prometheus_job_multitarget_' + job.replace('-', '_').replace('/', '__') + if multitarget_config_varname in hostvars[target]: + for exporter_hostname, configs in hostvars[target][multitarget_config_varname].items(): + for config in configs: + result.append({'job': job, 'instance': config['instance'], 'enabled': True, + 'exporter_hostname': exporter_hostname, 'config': config}) else: enabled = job in hostvars[target]['prometheus_exporters_default'] or job in hostvars[target]['prometheus_exporters_extra'] diff --git a/roles/monitoring/prometheus/server/tasks/main.yml b/roles/monitoring/prometheus/server/tasks/main.yml index c0928cc3..16167c9c 100644 --- a/roles/monitoring/prometheus/server/tasks/main.yml +++ b/roles/monitoring/prometheus/server/tasks/main.yml @@ -83,7 +83,7 @@ state: directory - name: generate rules files for all jobs - loop: "{{ prometheus_server_jobs | union(['prometheus']) | union(prometheus_server_jobs | select('match', '.*/.*') | map('dirname') | unique) }}" + loop: "{{ prometheus_server_jobs | union(['prometheus']) }}" template: src: rules.yml.j2 dest: "/etc/prometheus/rules/{{ item }}.yml" diff --git a/roles/monitoring/prometheus/server/templates/jobs/blackbox/https.j2 b/roles/monitoring/prometheus/server/templates/jobs/blackbox/https.j2 deleted file mode 100644 index 86ff88dd..00000000 --- a/roles/monitoring/prometheus/server/templates/jobs/blackbox/https.j2 +++ /dev/null @@ -1,13 +0,0 @@ - - job_name: '{{ job }}' - metrics_path: /blackbox - params: - module: - - http_tls_2xx - scheme: https - tls_config: - ca_file: /etc/ssl/prometheus/ca-crt.pem - cert_file: /etc/ssl/prometheus/server/scrape-crt.pem - key_file: /etc/ssl/prometheus/server/scrape-key.pem - file_sd_configs: - - files: - - "/etc/prometheus/targets/{{ job }}/*.yml" diff --git a/roles/monitoring/prometheus/server/templates/jobs/blackbox/ping.j2 b/roles/monitoring/prometheus/server/templates/jobs/blackbox/ping.j2 deleted file mode 100644 index 2d3889d2..00000000 --- a/roles/monitoring/prometheus/server/templates/jobs/blackbox/ping.j2 +++ /dev/null @@ -1,13 +0,0 @@ - - job_name: '{{ job }}' - metrics_path: /blackbox - params: - module: - - icmp - scheme: https - tls_config: - ca_file: /etc/ssl/prometheus/ca-crt.pem - cert_file: /etc/ssl/prometheus/server/scrape-crt.pem - key_file: /etc/ssl/prometheus/server/scrape-key.pem - file_sd_configs: - - files: - - "/etc/prometheus/targets/{{ job }}/*.yml" diff --git a/roles/monitoring/prometheus/server/templates/jobs/blackbox/ssh.j2 b/roles/monitoring/prometheus/server/templates/jobs/blackbox/ssh.j2 deleted file mode 100644 index 97565673..00000000 --- a/roles/monitoring/prometheus/server/templates/jobs/blackbox/ssh.j2 +++ /dev/null @@ -1,13 +0,0 @@ - - job_name: '{{ job }}' - metrics_path: /blackbox - params: - module: - - ssh_banner - scheme: https - tls_config: - ca_file: /etc/ssl/prometheus/ca-crt.pem - cert_file: /etc/ssl/prometheus/server/scrape-crt.pem - key_file: /etc/ssl/prometheus/server/scrape-key.pem - file_sd_configs: - - files: - - "/etc/prometheus/targets/{{ job }}/*.yml" diff --git a/roles/monitoring/prometheus/server/templates/jobs/nut/ups.j2 b/roles/monitoring/prometheus/server/templates/jobs/nut/ups.j2 deleted file mode 100644 index 0cf4ae4e..00000000 --- a/roles/monitoring/prometheus/server/templates/jobs/nut/ups.j2 +++ /dev/null @@ -1,10 +0,0 @@ - - job_name: '{{ job }}' - metrics_path: /nut/ups - scheme: https - tls_config: - ca_file: /etc/ssl/prometheus/ca-crt.pem - cert_file: /etc/ssl/prometheus/server/scrape-crt.pem - key_file: /etc/ssl/prometheus/server/scrape-key.pem - file_sd_configs: - - files: - - "/etc/prometheus/targets/{{ job }}/*.yml" diff --git a/roles/monitoring/prometheus/server/templates/targets/blackbox/https.yml.j2 b/roles/monitoring/prometheus/server/templates/targets/blackbox/https.yml.j2 deleted file mode 100644 index 29c89590..00000000 --- a/roles/monitoring/prometheus/server/templates/targets/blackbox/https.yml.j2 +++ /dev/null @@ -1,4 +0,0 @@ -- targets: [ '{{ hostvars[target.config.exporter_hostname].prometheus_scrape_endpoint }}' ] - labels: - instance: '{{ target.instance }}' - __param_target: '{{ target.config.address }}' diff --git a/roles/monitoring/prometheus/server/templates/targets/blackbox/ping.yml.j2 b/roles/monitoring/prometheus/server/templates/targets/blackbox/ping.yml.j2 deleted file mode 100644 index 29c89590..00000000 --- a/roles/monitoring/prometheus/server/templates/targets/blackbox/ping.yml.j2 +++ /dev/null @@ -1,4 +0,0 @@ -- targets: [ '{{ hostvars[target.config.exporter_hostname].prometheus_scrape_endpoint }}' ] - labels: - instance: '{{ target.instance }}' - __param_target: '{{ target.config.address }}' diff --git a/roles/monitoring/prometheus/server/templates/targets/blackbox/probe.yml.j2 b/roles/monitoring/prometheus/server/templates/targets/blackbox/probe.yml.j2 new file mode 100644 index 00000000..4e336873 --- /dev/null +++ b/roles/monitoring/prometheus/server/templates/targets/blackbox/probe.yml.j2 @@ -0,0 +1,5 @@ +- targets: [ '{{ hostvars[target.exporter_hostname].prometheus_scrape_endpoint }}' ] + labels: + instance: '{{ target.instance }}' + __param_target: '{{ target.config.target }}' + __param_module: '{{ target.config.module }}' diff --git a/roles/monitoring/prometheus/server/templates/targets/blackbox/ssh.yml.j2 b/roles/monitoring/prometheus/server/templates/targets/blackbox/ssh.yml.j2 deleted file mode 100644 index 29c89590..00000000 --- a/roles/monitoring/prometheus/server/templates/targets/blackbox/ssh.yml.j2 +++ /dev/null @@ -1,4 +0,0 @@ -- targets: [ '{{ hostvars[target.config.exporter_hostname].prometheus_scrape_endpoint }}' ] - labels: - instance: '{{ target.instance }}' - __param_target: '{{ target.config.address }}' diff --git a/roles/monitoring/prometheus/server/templates/targets/nut/ups.yml.j2 b/roles/monitoring/prometheus/server/templates/targets/nut/ups.yml.j2 index 6003cd46..c60077c7 100644 --- a/roles/monitoring/prometheus/server/templates/targets/nut/ups.yml.j2 +++ b/roles/monitoring/prometheus/server/templates/targets/nut/ups.yml.j2 @@ -1,4 +1,4 @@ -- targets: [ '{{ hostvars[target.config.exporter_hostname].prometheus_scrape_endpoint }}' ] +- targets: [ '{{ hostvars[target.exporter_hostname].prometheus_scrape_endpoint }}' ] labels: instance: '{{ target.instance }}' __param_ups: '{{ target.config.ups }}' -- cgit v1.2.3 From 419ede2858769e4414a23a42b57931b83cf70d8c Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Sun, 26 Sep 2021 04:08:58 +0200 Subject: add job configs for ipmi and ipmi/remote --- inventory/host_vars/ch-testvm-prometheus.yml | 3 --- roles/monitoring/prometheus/exporter/ipmi/defaults/main.yml | 3 ++- roles/monitoring/prometheus/server/defaults/main/main.yml | 2 ++ roles/monitoring/prometheus/server/defaults/main/rules_ipmi.yml | 4 ++++ .../prometheus/server/defaults/main/rules_ipmi__remote.yml | 4 ++++ .../prometheus/server/templates/targets/ipmi/remote.yml.j2 | 5 +++++ 6 files changed, 17 insertions(+), 4 deletions(-) create mode 100644 roles/monitoring/prometheus/server/defaults/main/rules_ipmi.yml create mode 100644 roles/monitoring/prometheus/server/defaults/main/rules_ipmi__remote.yml create mode 100644 roles/monitoring/prometheus/server/templates/targets/ipmi/remote.yml.j2 (limited to 'roles/monitoring') diff --git a/inventory/host_vars/ch-testvm-prometheus.yml b/inventory/host_vars/ch-testvm-prometheus.yml index 939fa398..e539735f 100644 --- a/inventory/host_vars/ch-testvm-prometheus.yml +++ b/inventory/host_vars/ch-testvm-prometheus.yml @@ -36,9 +36,6 @@ network: spreadspace_apt_repo_components: - prometheus -prometheus_exporters_extra: - - ipmi - containerd_storage: type: lvm diff --git a/roles/monitoring/prometheus/exporter/ipmi/defaults/main.yml b/roles/monitoring/prometheus/exporter/ipmi/defaults/main.yml index 9b99f9a5..6cf14f76 100644 --- a/roles/monitoring/prometheus/exporter/ipmi/defaults/main.yml +++ b/roles/monitoring/prometheus/exporter/ipmi/defaults/main.yml @@ -1,6 +1,7 @@ --- prometheus_exporter_ipmi_modules: - default: {} + default: + collectors: [] # collectors: # - bmc # - ipmi diff --git a/roles/monitoring/prometheus/server/defaults/main/main.yml b/roles/monitoring/prometheus/server/defaults/main/main.yml index 7781fd69..1e0ccf78 100644 --- a/roles/monitoring/prometheus/server/defaults/main/main.yml +++ b/roles/monitoring/prometheus/server/defaults/main/main.yml @@ -18,6 +18,8 @@ prometheus_server_rules: nut/ups: "{{ prometheus_server_rules_nut__ups + prometheus_server_rules_nut__ups_extra }}" blackbox: "{{ prometheus_server_rules_blackbox + prometheus_server_rules_blackbox_extra }}" blackbox/probe: "{{ prometheus_server_rules_blackbox__probe + prometheus_server_rules_blackbox__probe_extra }}" + ipmi: "{{ prometheus_server_rules_ipmi + prometheus_server_rules_ipmi_extra }}" + ipmi/remote: "{{ prometheus_server_rules_ipmi__remote + prometheus_server_rules_ipmi__remote_extra }}" # prometheus_server_alertmanager: # url: "127.0.0.1:9093" diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_ipmi.yml b/roles/monitoring/prometheus/server/defaults/main/rules_ipmi.yml new file mode 100644 index 00000000..41dcd7e9 --- /dev/null +++ b/roles/monitoring/prometheus/server/defaults/main/rules_ipmi.yml @@ -0,0 +1,4 @@ +--- +prometheus_server_rules_ipmi_extra: [] +prometheus_server_rules_ipmi: [] +## TODO: add common IPMI alert rules diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_ipmi__remote.yml b/roles/monitoring/prometheus/server/defaults/main/rules_ipmi__remote.yml new file mode 100644 index 00000000..1f9338ea --- /dev/null +++ b/roles/monitoring/prometheus/server/defaults/main/rules_ipmi__remote.yml @@ -0,0 +1,4 @@ +--- +prometheus_server_rules_ipmi__remote_extra: [] +prometheus_server_rules_ipmi__remote: [] +## TODO: add remote-IPMI specific alert rules diff --git a/roles/monitoring/prometheus/server/templates/targets/ipmi/remote.yml.j2 b/roles/monitoring/prometheus/server/templates/targets/ipmi/remote.yml.j2 new file mode 100644 index 00000000..4e336873 --- /dev/null +++ b/roles/monitoring/prometheus/server/templates/targets/ipmi/remote.yml.j2 @@ -0,0 +1,5 @@ +- targets: [ '{{ hostvars[target.exporter_hostname].prometheus_scrape_endpoint }}' ] + labels: + instance: '{{ target.instance }}' + __param_target: '{{ target.config.target }}' + __param_module: '{{ target.config.module }}' -- cgit v1.2.3 From 93afffd62ab0da48230985440aae11afbe4de79b Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Sun, 26 Sep 2021 04:21:56 +0200 Subject: fix alert wording --- roles/monitoring/prometheus/server/defaults/main/rules_node.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'roles/monitoring') diff --git a/roles/monitoring/prometheus/server/defaults/main/rules_node.yml b/roles/monitoring/prometheus/server/defaults/main/rules_node.yml index 64a7d562..55641534 100644 --- a/roles/monitoring/prometheus/server/defaults/main/rules_node.yml +++ b/roles/monitoring/prometheus/server/defaults/main/rules_node.yml @@ -243,7 +243,7 @@ prometheus_server_rules_node: severity: warning annotations: summary: Host has packages that can be autoremoved (instance {{ '{{' }} $labels.instance {{ '}}' }}) - description: "Host {{ '{{' }} $labels.instance {{ '}}' }} has {{ '{{' }} $value {{ '}}' }} that can be autoremoved.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" + description: "Host {{ '{{' }} $labels.instance {{ '}}' }} has {{ '{{' }} $value {{ '}}' }} packages that can be autoremoved.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" - alert: HostNeedsRebooting expr: node_reboot_required > 0 -- cgit v1.2.3