Compare commits

...

5 Commits

Author SHA1 Message Date
Gregor Michels f01e35c531 monitoring: fix prometheus instance generation from inventory
no idea why it broke
2022-07-13 01:38:23 +02:00
Gregor Michels fcc5e277dd monitoring: rename ansible install prometheus task 2022-07-13 01:31:09 +02:00
Gregor Michels fb8b843489 monitoring: configure alertmanager to send mails 2022-07-13 01:29:46 +02:00
Gregor Michels 5a21b2cd88 monitoring: prometheus: add simple alerting rule 2022-07-13 01:27:07 +02:00
Gregor Michels a164059a77 pass: add mailbox pw ffl-eae-adp-mon01@brainpeach.de 2022-07-13 01:22:44 +02:00
5 changed files with 106 additions and 3 deletions

12
files/alerting_rules.yml Normal file
View File

@ -0,0 +1,12 @@
groups:
- name: Basic
rules:
# from https://awesome-prometheus-alerts.grep.to/rules.html#rule-prometheus-self-monitoring-1-2
- alert: PrometheusTargetMissing
expr: up == 0
for: 0m
labels:
severity: critical
annotations:
summary: Prometheus target missing (instance {{ $labels.instance }})
description: "A Prometheus target has disappeared. An exporter might be crashed.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

View File

@ -8,12 +8,49 @@
name:
- gpg
- name: install prometheus
- name: install prometheus stack
package:
name:
- prometheus
- prometheus-alertmanager
# stolen from usr/share/prometheus/alertmanager/generate-ui.sh
# script calls apt without "-y" therefore we need to install them beforehand
- name: install dependencies for alertmanager ui generation
package:
name:
- libjs-bootstrap4
- fonts-font-awesome
- curl
- uglifyjs
- golang-github-prometheus-alertmanager-dev
- name: configure alertmanager
template:
src: templates/alertmanager.yml.j2
dest: /etc/prometheus/alertmanager.yml
validate: "/usr/bin/amtool check-config %s"
notify:
- reload prometheus-alertmanager
- name: generate alertmanager ui
shell:
cmd: /usr/share/prometheus/alertmanager/generate-ui.sh
creates: "/usr/share/prometheus/alertmanager/ui/index.html"
notify:
- restart prometheus-alertmanager
- name: configure prometheus alerting rules
copy:
src: files/alerting_rules.yml
dest: /etc/prometheus/alerting_rules.yml
owner: root
group: root
mode: 0644
validate: "/usr/bin/promtool check rules %s"
notify:
- reload prometheus
- name: configure prometheus
template:
src: templates/prometheus.yml
@ -58,6 +95,16 @@
name: prometheus
state: reloaded
- name: reload prometheus-alertmanager
service:
name: prometheus-alertmanager
state: reloaded
- name: restart prometheus-alertmanager
service:
name: prometheus-alertmanager
state: restarted
- name: restart grafana
service:
name: grafana-server

View File

@ -0,0 +1,44 @@
# Sample configuration.
# See https://prometheus.io/docs/alerting/configuration/ for documentation.
global:
# The smarthost and SMTP sender used for mail notifications.
smtp_smarthost: 'harald.brainpeach.de:587'
smtp_from: 'ffl-eae-adp-mon01@brainpeach.de'
smtp_auth_username: 'ffl-eae-adp-mon01@brainpeach.de'
smtp_auth_password: '{{ lookup("passwordstore", "mailboxes/ffl-eae-adp-mon01@brainpeach.de") }}'
# The directory from which notification templates are read.
templates:
- '/etc/prometheus/alertmanager_templates/*.tmpl'
# The root route on which each incoming alert enters.
route:
# The labels by which incoming alerts are grouped together. For example,
# multiple alerts coming in for cluster=A and alertname=LatencyHigh would
# be batched into a single group.
group_by: ['alertname']
# When a new group of alerts is created by an incoming alert, wait at
# least 'group_wait' to send the initial notification.
# This way ensures that you get multiple alerts for the same group that start
# firing shortly after another are batched together on the first
# notification.
group_wait: 30s
# When the first notification was sent, wait 'group_interval' to send a batch
# of new alerts that started firing for that group.
group_interval: 5m
# If an alert has successfully been sent, wait 'repeat_interval' to
# resend them.
repeat_interval: 3h
# A default receiver
receiver: admins
receivers:
- name: 'admins'
email_configs:
- to: 'hirnpfirsich@brainpeach.de'
send_resolved: true

View File

@ -9,7 +9,7 @@ alerting:
- targets: ['localhost:9093']
rule_files:
# - "first_rules.yml"
- "/etc/prometheus/alerting_rules.yml"
scrape_configs:
- job_name: 'prometheus'
@ -18,7 +18,7 @@ scrape_configs:
static_configs:
- targets: ['localhost:9090']
{% for group in groups.keys() | reject('all') | reject('ungrouped') %}
{% for group in groups.keys() | difference(['all', 'ungrouped']) %}
- job_name: {{ group }}
static_configs:
{% for host in groups[group] %}