--- install_jumphost: ch-jump system_lvm_volume_size_root: 3G install: vm: memory: 8G numcpus: 8 autostart: yes disks: primary: /dev/sda scsi: sda: type: zfs name: root size: 10g sdb: type: zfs name: data size: 50g interfaces: - bridge: br-svc name: svc0 - bridge: br-iot name: iot0 - bridge: br-mgmt name: mgmt0 network: nameservers: "{{ network_zones.svc.dns }}" domain: "{{ host_domain }}" systemd_link: interfaces: "{{ install.interfaces }}" primary: &_network_primary_ name: svc0 address: "{{ network_zones.svc.prefix | ansible.utils.ipaddr(network_zones.svc.offsets[inventory_hostname]) }}" gateway: "{{ network_zones.svc.gateway }}" static_routes: - destination: "{{ network_zones.lan.prefix }}" gateway: "{{ network_zones.svc.prefix | ansible.utils.ipaddr(network_zones.svc.offsets['ch-gw-lan']) | ansible.utils.ipaddr('address') }}" interfaces: - *_network_primary_ - name: iot0 address: "{{ network_zones.iot.prefix | ansible.utils.ipaddr(network_zones.iot.offsets[inventory_hostname]) }}" - name: mgmt0 address: "{{ network_zones.mgmt.prefix | ansible.utils.ipaddr(network_zones.mgmt.offsets[inventory_hostname]) }}" lvm_groups: mondata: pvs: - /dev/disk/by-id/scsi-0QEMU_QEMU_HARDDISK_drive-scsi0-0-0-1 spreadspace_apt_repo_components: - prometheus prometheus_server_storage: type: lvm vg: mondata lv: prometheus size: 30G fs: ext4 prometheus_server_external_labels: environment: chaos-at-home monitor: "{{ inventory_hostname }}" prometheus_server_alertmanager: url: "127.0.0.1:9093" path_prefix: "/alertmanager/" basic_auth: username: server password: "{{ vault_prometheus_alertmanager_auth_user_passwords['server'] }}" prometheus_server_web_external_url: "http://{{ network.primary.address | ansible.utils.ipaddr('address') }}/prometheus/" prometheus_server_auth_users: server: "{{ vault_prometheus_server_auth_user_passwords['server'] }}" grafana: "{{ vault_prometheus_server_auth_user_passwords['grafana'] }}" admin: "{{ vault_prometheus_server_auth_user_passwords['admin'] }}" prometheus_server_selfscraping_auth: username: server password: "{{ vault_prometheus_server_auth_user_passwords['server'] }}" prometheus_exporters_extra: - blackbox - nut - ssl - smokeping # prometheus_exporter_blackbox_modules_extra: # icmp: # prober: icmp prometheus_exporter_smokeping_targets: - hosts: - "{{ network_zones.magenta.gateway }}" - "{{ network_zones.magenta.dns[0] }}" - "{{ network_zones.magenta.dns[1] }}" - 9.9.9.9 - 8.8.8.8 - 1.1.1.1 - "{{ hostvars['ch-atlas'].vm_host.network.bridges.public.prefix | ansible.utils.ipaddr(hostvars['ch-atlas'].vm_host.network.bridges.public.offsets['ch-atlas']) | ansible.utils.ipaddr('address') }}" prometheus_job_multitarget_blackbox__probe: ch-mon: # - instance: "ping-magentagw" # target: 62.99.185.129 # module: icmp # - instance: "ping-magentadns1" # target: 195.58.160.194 # module: icmp # - instance: "ping-magentadns2" # target: 195.58.161.122 # module: icmp # - instance: "ping-quad9" # target: 9.9.9.9 # module: icmp # - instance: "ping-google-dns" # target: 8.8.8.8 # module: icmp # - instance: "ping-cloudflare-dns" # target: 1.1.1.1 # module: icmp - instance: "ssh-{{ inventory_hostname }}" target: "{{ network_zones.svc.prefix | ansible.utils.ipaddr(network_zones.svc.offsets[inventory_hostname]) | ansible.utils.ipaddr('address') }}:{{ ansible_port | default(22) }}" module: ssh_banner prometheus_job_multitarget_ssl__probe: ch-mon: - instance: "sslcert-prometheus-{{ inventory_hostname }}" target: "/etc/ssl/prometheus/**/*.pem" module: file prometheus_server_rules_node_extra: - alert: GitFsckMetricsOutdated expr: time() - git_fsck_run > 100000 for: 0m labels: severity: warning annotations: summary: Metrics from git-fsck are too old (instance {{ '{{' }} $labels.instance {{ '}}' }}) description: "The exported values from git-fsck on host {{ '{{' }} $labels.instance {{ '}}' }} are {{ '{{' }} $value {{ '}}' }} seconds old.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" - alert: GitFsckFailed expr: git_fsck_failed != 0 for: 0m labels: severity: warning annotations: summary: git-filesystem check failed (instance {{ '{{' }} $labels.instance {{ '}}' }}) description: "The git repostory {{ '{{' }} $labels.repository {{ '}}' }}@{{ '{{' }} $labels.gitolite_instance {{ '}}' }} on host {{ '{{' }} $labels.instance {{ '}}' }} is corrupt.\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS = {{ '{{' }} $labels {{ '}}' }}" prometheus_alertmanager_smtp: smarthost: "{{ network_zones.lan.prefix | ansible.utils.ipaddr(network_zones.lan.offsets['ch-prometheus-legacy']) | ansible.utils.ipaddr('address') }}:25" from: "noreply@chaos-at-home.org" require_tls: no prometheus_alertmanager_web_external_url: "http://{{ network.primary.address | ansible.utils.ipaddr('address') }}/alertmanager/" prometheus_alertmanager_auth_users: server: "{{ vault_prometheus_alertmanager_auth_user_passwords['server'] }}" admin: "{{ vault_prometheus_alertmanager_auth_user_passwords['admin'] }}" prometheus_alertmanager_route: receiver: empty routes: - receiver: equinox-mail matchers: - 'alertname != PrometheusAlertmanagerE2eDeadManSwitch' group_by: - instance prometheus_alertmanager_receivers: - name: empty - name: equinox-mail email_configs: - to: equinox@chaos-at-home.org send_resolved: yes grafana_secret_key: "{{ vault_grafana_secret_key }}" grafana_datasources: - name: "Prometheus" type: "prometheus" access: "proxy" url: "http://127.0.0.1:9090/prometheus" basicAuth: true basicAuthUser: "grafana" isDefault: yes secureJsonData: basicAuthPassword: "{{ vault_prometheus_server_auth_user_passwords['grafana'] }}" jsonData: manageAlerts: no grafana_dashboards: - file: node-full datasource: "Prometheus" - file: openwrt datasource: "Prometheus" - file: chronyd datasource: "Prometheus" - file: environment-sensors datasource: "Prometheus" - file: blackbox datasource: "Prometheus" - file: smokeping datasource: "Prometheus" - file: bind datasource: "Prometheus" - file: ipmi datasource: "Prometheus" monitoring_landingpage_hostnames: - "mon.chaos-at-home.org" monitoring_landingpage_title: "chaos@home Monitoring Host"