monitoring: move prometheus stack onto eae-adp-jump01
to be able to also monitor the new site. custom grafana dashboard broke while transfering stack. will fix next
This commit is contained in:
parent
258355170b
commit
8389a18488
|
@ -21,7 +21,7 @@ groups:
|
|||
description: "The uptime of a node changed in the last two hours. VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||
|
||||
- alert: PublicWifiUpstreamLost
|
||||
expr: sum(probe_success{job="e2e_clients_v4"}) == 0
|
||||
expr: sum(probe_success{job="e2e_adp_clients_v4"}) == 0
|
||||
for: 0m
|
||||
labels:
|
||||
severity: critical
|
||||
|
|
|
@ -55,53 +55,28 @@
|
|||
|
||||
- name: provision monitoring
|
||||
hosts:
|
||||
- monitoring01
|
||||
- eae-adp-jump01
|
||||
tasks:
|
||||
- name: install playbook requirements
|
||||
package:
|
||||
name:
|
||||
- gpg
|
||||
|
||||
- name: install prometheus stack
|
||||
package:
|
||||
name:
|
||||
- prometheus
|
||||
- prometheus-alertmanager
|
||||
|
||||
# stolen from usr/share/prometheus/alertmanager/generate-ui.sh
|
||||
# script calls apt without "-y" therefore we need to install them beforehand
|
||||
- name: install dependencies for alertmanager ui generation
|
||||
package:
|
||||
name:
|
||||
- libjs-bootstrap4
|
||||
- fonts-font-awesome
|
||||
- curl
|
||||
- uglifyjs
|
||||
- golang-github-prometheus-alertmanager-dev
|
||||
- alertmanager
|
||||
- grafana
|
||||
|
||||
- name: configure alertmanager
|
||||
template:
|
||||
src: templates/alertmanager.yml.j2
|
||||
dest: /etc/prometheus/alertmanager.yml
|
||||
validate: "/usr/bin/amtool check-config %s"
|
||||
dest: /etc/alertmanager/alertmanager.yml
|
||||
validate: "/usr/local/bin/amtool check-config %s"
|
||||
notify:
|
||||
- reload prometheus-alertmanager
|
||||
|
||||
- name: generate alertmanager ui
|
||||
shell:
|
||||
cmd: /usr/share/prometheus/alertmanager/generate-ui.sh
|
||||
creates: "/usr/share/prometheus/alertmanager/ui/index.html"
|
||||
notify:
|
||||
- restart prometheus-alertmanager
|
||||
- reload alertmanager
|
||||
|
||||
- name: configure prometheus alerting rules
|
||||
copy:
|
||||
src: files/alerting_rules.yml
|
||||
dest: /etc/prometheus/alerting_rules.yml
|
||||
owner: root
|
||||
group: root
|
||||
mode: 0644
|
||||
validate: "/usr/bin/promtool check rules %s"
|
||||
validate: "/usr/local/bin/promtool check rules %s"
|
||||
notify:
|
||||
- reload prometheus
|
||||
|
||||
|
@ -109,32 +84,13 @@
|
|||
template:
|
||||
src: templates/prometheus.yml
|
||||
dest: /etc/prometheus/prometheus.yml
|
||||
validate: "/usr/bin/promtool check config %s"
|
||||
validate: "/usr/local/bin/promtool check config %s"
|
||||
notify:
|
||||
- reload prometheus
|
||||
|
||||
- name: add grafana oss repo gpg key
|
||||
apt_key:
|
||||
url: "https://packages.grafana.com/gpg.key"
|
||||
id: "4E40DDF6D76E284A4A6780E48C8C34C524098CB6"
|
||||
|
||||
- name: add grafana oss repo
|
||||
apt_repository:
|
||||
repo: "deb https://packages.grafana.com/oss/deb stable main"
|
||||
|
||||
- name: install grafana oss
|
||||
package:
|
||||
name: grafana
|
||||
|
||||
- name: enable and start grafana
|
||||
service:
|
||||
name: grafana-server
|
||||
state: started
|
||||
enabled: yes
|
||||
|
||||
- name: enable anonymous login in grafana
|
||||
blockinfile:
|
||||
path: /etc/grafana/grafana.ini
|
||||
path: /etc/grafana/config.ini
|
||||
block: |
|
||||
[auth.anonymous]
|
||||
enabled = true
|
||||
|
@ -148,7 +104,7 @@
|
|||
src: "{{ item }}"
|
||||
dest: /etc/grafana/provisioning/datasources/
|
||||
owner: root
|
||||
group: grafana
|
||||
group: _grafana
|
||||
mode: 0640
|
||||
with_fileglob:
|
||||
- "templates/grafana/provisioning/datasources/*"
|
||||
|
@ -160,7 +116,7 @@
|
|||
path: /etc/grafana/dashboards
|
||||
state: directory
|
||||
owner: root
|
||||
group: grafana
|
||||
group: _grafana
|
||||
mode: 0755
|
||||
|
||||
- name: install dashboards
|
||||
|
@ -168,7 +124,7 @@
|
|||
src: "{{ item }}"
|
||||
dest: /etc/grafana/dashboards/
|
||||
owner: root
|
||||
group: grafana
|
||||
group: _grafana
|
||||
mode: 0640
|
||||
with_fileglob:
|
||||
- "templates/grafana/dashboards/*"
|
||||
|
@ -178,30 +134,33 @@
|
|||
src: "{{ item }}"
|
||||
dest: /etc/grafana/provisioning/dashboards/
|
||||
owner: root
|
||||
group: grafana
|
||||
group: _grafana
|
||||
mode: 0644
|
||||
with_fileglob:
|
||||
- "templates/grafana/provisioning/dashboards/*"
|
||||
notify:
|
||||
- restart grafana
|
||||
|
||||
- name: enable and start monitoring stack
|
||||
service:
|
||||
name: "{{ item }}"
|
||||
enabled: true
|
||||
state: started
|
||||
with_items:
|
||||
- prometheus
|
||||
- alertmanager
|
||||
- grafana
|
||||
|
||||
handlers:
|
||||
- name: reload prometheus
|
||||
service:
|
||||
name: prometheus
|
||||
state: reloaded
|
||||
shell:
|
||||
cmd: "kill -SIGHUP $(pgrep prometheus)"
|
||||
|
||||
- name: reload prometheus-alertmanager
|
||||
service:
|
||||
name: prometheus-alertmanager
|
||||
state: reloaded
|
||||
|
||||
- name: restart prometheus-alertmanager
|
||||
service:
|
||||
name: prometheus-alertmanager
|
||||
state: restarted
|
||||
- name: reload alertmanager
|
||||
shell:
|
||||
cmd: "kill -SIGHUP $(pgrep alertmanager)"
|
||||
|
||||
- name: restart grafana
|
||||
service:
|
||||
name: grafana-server
|
||||
name: grafana
|
||||
state: restarted
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
global:
|
||||
# The smarthost and SMTP sender used for mail notifications.
|
||||
smtp_smarthost: 'harald.brainpeach.de:587'
|
||||
smtp_from: 'ffl-eae-adp-mon01@brainpeach.de'
|
||||
smtp_from: 'ffl-eae-adp-jump01@brainpeach.de'
|
||||
smtp_auth_username: 'ffl-eae-adp-mon01@brainpeach.de'
|
||||
smtp_auth_password: '{{ lookup("passwordstore", "mailboxes/ffl-eae-adp-mon01@brainpeach.de") }}'
|
||||
|
||||
|
|
|
@ -17,14 +17,18 @@ scrape_configs:
|
|||
scrape_timeout: 5s
|
||||
static_configs:
|
||||
- targets: ['localhost:9090']
|
||||
- targets: ["{{ hostvars['monitoring01']['ip'] }}:9090"]
|
||||
|
||||
{% for group in groups.keys() | difference(['all', 'ungrouped']) %}
|
||||
{% for group in ['accesspoints', 'switches', 'gateways', 'server', 'vms'] %}
|
||||
- job_name: {{ group }}
|
||||
static_configs:
|
||||
{% for host in groups[group] %}
|
||||
- targets: ["{{ hostvars[host]['monitoring_ip'] | default(hostvars[host]['ip']) }}:9100"]
|
||||
labels:
|
||||
instance: "{{ host }}:9100"
|
||||
{% if hostvars[host]['site'] is defined %}
|
||||
site: "{{ hostvars[host]['site'] }}"
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
|
||||
{% endfor %}
|
||||
|
@ -33,10 +37,8 @@ scrape_configs:
|
|||
static_configs:
|
||||
- targets:
|
||||
- {{ hostvars['mon-e2e-clients01']['ip'] }}:9115
|
||||
- {{ hostvars['mon-e2e-wan01']['ip'] }}:9115
|
||||
- {{ hostvars['monitoring01']['ip'] }}:9115
|
||||
|
||||
- job_name: 'e2e_clients_v4'
|
||||
- job_name: 'e2e_adp_clients_v4'
|
||||
metrics_path: /probe
|
||||
params:
|
||||
module: [icmp_v4]
|
||||
|
@ -51,36 +53,3 @@ scrape_configs:
|
|||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: {{ hostvars['mon-e2e-clients01']['ip'] }}:9115
|
||||
|
||||
- job_name: 'e2e_default_v4'
|
||||
metrics_path: /probe
|
||||
params:
|
||||
module: [icmp_v4]
|
||||
static_configs:
|
||||
- targets:
|
||||
- 192.168.0.1 # gigacube
|
||||
- freifunk-leipzig.de
|
||||
- harald.brainpeach.de
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: {{ hostvars['monitoring01']['ip'] }}:9115
|
||||
|
||||
- job_name: 'e2e_wan_v4'
|
||||
metrics_path: /probe
|
||||
params:
|
||||
module: [icmp_v4]
|
||||
static_configs:
|
||||
- targets:
|
||||
- freifunk-leipzig.de
|
||||
- harald.brainpeach.de
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: {{ hostvars['mon-e2e-wan01']['ip'] }}:9115
|
||||
|
|
Loading…
Reference in New Issue