Compare commits
8 Commits
c058853f73
...
9cfee1f384
Author | SHA1 | Date |
---|---|---|
Gregor Michels | 9cfee1f384 | |
Gregor Michels | dca1261f07 | |
Gregor Michels | ffb7617db8 | |
Gregor Michels | 8389a18488 | |
Gregor Michels | 258355170b | |
Gregor Michels | 74075f307f | |
Gregor Michels | d4b0e622ef | |
Gregor Michels | 2a781ae751 |
|
@ -83,6 +83,7 @@ wifi_encryption=none
|
||||||
backoffice_wifi_ssid="GU Deutscher Platz Backoffice"
|
backoffice_wifi_ssid="GU Deutscher Platz Backoffice"
|
||||||
backoffice_wifi_encryption=psk2
|
backoffice_wifi_encryption=psk2
|
||||||
backoffice_wifi_psk="{{ lookup('passwordstore', 'wifi/GU_Deutscher_Platz_Backoffice') }}"
|
backoffice_wifi_psk="{{ lookup('passwordstore', 'wifi/GU_Deutscher_Platz_Backoffice') }}"
|
||||||
|
site=adp
|
||||||
|
|
||||||
[site_ans]
|
[site_ans]
|
||||||
ap-b641
|
ap-b641
|
||||||
|
@ -96,6 +97,7 @@ ap-b634
|
||||||
ap-b5df
|
ap-b5df
|
||||||
ap-b682
|
ap-b682
|
||||||
ap-b6cc
|
ap-b6cc
|
||||||
|
ffl-ans-gw-core01
|
||||||
ffl-ans-sw-distribution01
|
ffl-ans-sw-distribution01
|
||||||
ffl-ans-sw-access01
|
ffl-ans-sw-access01
|
||||||
ffl-ans-sw-access02
|
ffl-ans-sw-access02
|
||||||
|
@ -108,3 +110,4 @@ backoffice_wifi_ssid="GU Arno-Nitzsche-Strasse BO"
|
||||||
backoffice_wifi_encryption=psk2
|
backoffice_wifi_encryption=psk2
|
||||||
backoffice_wifi_psk="{{ lookup('passwordstore', 'wifi/GU_Arno-Nitzsche-Straße_Backoffice') }}"
|
backoffice_wifi_psk="{{ lookup('passwordstore', 'wifi/GU_Arno-Nitzsche-Straße_Backoffice') }}"
|
||||||
mgmt_gateway=10.85.1.1
|
mgmt_gateway=10.85.1.1
|
||||||
|
site=ans
|
||||||
|
|
|
@ -21,10 +21,36 @@ groups:
|
||||||
description: "The uptime of a node changed in the last two hours. VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
description: "The uptime of a node changed in the last two hours. VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||||
|
|
||||||
- alert: PublicWifiUpstreamLost
|
- alert: PublicWifiUpstreamLost
|
||||||
expr: sum(probe_success{job="e2e_clients_v4"}) == 0
|
expr: sum(probe_success{job="e2e_adp_clients_v4"}) == 0
|
||||||
for: 0m
|
for: 0m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: critical
|
||||||
annotations:
|
annotations:
|
||||||
summary: The public wifi lost its ability to route into the internet
|
summary: The public wifi lost its ability to route into the internet
|
||||||
description: "check the vpn connection"
|
description: "check the vpn connection"
|
||||||
|
|
||||||
|
- name: ServerSpecific
|
||||||
|
rules:
|
||||||
|
# https://awesome-prometheus-alerts.grep.to/rules#rule-host-and-hardware-1-7
|
||||||
|
#
|
||||||
|
# Please add ignored mountpoints in node_exporter parameters like
|
||||||
|
# "--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|run)($|/)".
|
||||||
|
# Same rule using "node_filesystem_free_bytes" will fire when disk fills for non-root users.
|
||||||
|
- alert: HostOutOfDiskSpace
|
||||||
|
expr: (node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes < 10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0
|
||||||
|
for: 2m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: Host out of disk space (instance {{ $labels.instance }})
|
||||||
|
description: "Disk is almost full (< 10% left)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||||
|
|
||||||
|
# https://awesome-prometheus-alerts.grep.to/rules#rule-host-and-hardware-1-9
|
||||||
|
- alert: HostOutOfInodes
|
||||||
|
expr: node_filesystem_files_free / node_filesystem_files * 100 < 10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0
|
||||||
|
for: 2m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: Host out of inodes (instance {{ $labels.instance }})
|
||||||
|
description: "Disk is almost running out of available inodes (< 10% left)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||||
|
|
Binary file not shown.
|
@ -55,53 +55,28 @@
|
||||||
|
|
||||||
- name: provision monitoring
|
- name: provision monitoring
|
||||||
hosts:
|
hosts:
|
||||||
- monitoring01
|
- eae-adp-jump01
|
||||||
tasks:
|
tasks:
|
||||||
- name: install playbook requirements
|
|
||||||
package:
|
|
||||||
name:
|
|
||||||
- gpg
|
|
||||||
|
|
||||||
- name: install prometheus stack
|
- name: install prometheus stack
|
||||||
package:
|
package:
|
||||||
name:
|
name:
|
||||||
- prometheus
|
- prometheus
|
||||||
- prometheus-alertmanager
|
- alertmanager
|
||||||
|
- grafana
|
||||||
# stolen from usr/share/prometheus/alertmanager/generate-ui.sh
|
|
||||||
# script calls apt without "-y" therefore we need to install them beforehand
|
|
||||||
- name: install dependencies for alertmanager ui generation
|
|
||||||
package:
|
|
||||||
name:
|
|
||||||
- libjs-bootstrap4
|
|
||||||
- fonts-font-awesome
|
|
||||||
- curl
|
|
||||||
- uglifyjs
|
|
||||||
- golang-github-prometheus-alertmanager-dev
|
|
||||||
|
|
||||||
- name: configure alertmanager
|
- name: configure alertmanager
|
||||||
template:
|
template:
|
||||||
src: templates/alertmanager.yml.j2
|
src: templates/alertmanager.yml.j2
|
||||||
dest: /etc/prometheus/alertmanager.yml
|
dest: /etc/alertmanager/alertmanager.yml
|
||||||
validate: "/usr/bin/amtool check-config %s"
|
validate: "/usr/local/bin/amtool check-config %s"
|
||||||
notify:
|
notify:
|
||||||
- reload prometheus-alertmanager
|
- reload alertmanager
|
||||||
|
|
||||||
- name: generate alertmanager ui
|
|
||||||
shell:
|
|
||||||
cmd: /usr/share/prometheus/alertmanager/generate-ui.sh
|
|
||||||
creates: "/usr/share/prometheus/alertmanager/ui/index.html"
|
|
||||||
notify:
|
|
||||||
- restart prometheus-alertmanager
|
|
||||||
|
|
||||||
- name: configure prometheus alerting rules
|
- name: configure prometheus alerting rules
|
||||||
copy:
|
copy:
|
||||||
src: files/alerting_rules.yml
|
src: files/alerting_rules.yml
|
||||||
dest: /etc/prometheus/alerting_rules.yml
|
dest: /etc/prometheus/alerting_rules.yml
|
||||||
owner: root
|
validate: "/usr/local/bin/promtool check rules %s"
|
||||||
group: root
|
|
||||||
mode: 0644
|
|
||||||
validate: "/usr/bin/promtool check rules %s"
|
|
||||||
notify:
|
notify:
|
||||||
- reload prometheus
|
- reload prometheus
|
||||||
|
|
||||||
|
@ -109,32 +84,13 @@
|
||||||
template:
|
template:
|
||||||
src: templates/prometheus.yml
|
src: templates/prometheus.yml
|
||||||
dest: /etc/prometheus/prometheus.yml
|
dest: /etc/prometheus/prometheus.yml
|
||||||
validate: "/usr/bin/promtool check config %s"
|
validate: "/usr/local/bin/promtool check config %s"
|
||||||
notify:
|
notify:
|
||||||
- reload prometheus
|
- reload prometheus
|
||||||
|
|
||||||
- name: add grafana oss repo gpg key
|
|
||||||
apt_key:
|
|
||||||
url: "https://packages.grafana.com/gpg.key"
|
|
||||||
id: "4E40DDF6D76E284A4A6780E48C8C34C524098CB6"
|
|
||||||
|
|
||||||
- name: add grafana oss repo
|
|
||||||
apt_repository:
|
|
||||||
repo: "deb https://packages.grafana.com/oss/deb stable main"
|
|
||||||
|
|
||||||
- name: install grafana oss
|
|
||||||
package:
|
|
||||||
name: grafana
|
|
||||||
|
|
||||||
- name: enable and start grafana
|
|
||||||
service:
|
|
||||||
name: grafana-server
|
|
||||||
state: started
|
|
||||||
enabled: yes
|
|
||||||
|
|
||||||
- name: enable anonymous login in grafana
|
- name: enable anonymous login in grafana
|
||||||
blockinfile:
|
blockinfile:
|
||||||
path: /etc/grafana/grafana.ini
|
path: /etc/grafana/config.ini
|
||||||
block: |
|
block: |
|
||||||
[auth.anonymous]
|
[auth.anonymous]
|
||||||
enabled = true
|
enabled = true
|
||||||
|
@ -148,7 +104,7 @@
|
||||||
src: "{{ item }}"
|
src: "{{ item }}"
|
||||||
dest: /etc/grafana/provisioning/datasources/
|
dest: /etc/grafana/provisioning/datasources/
|
||||||
owner: root
|
owner: root
|
||||||
group: grafana
|
group: _grafana
|
||||||
mode: 0640
|
mode: 0640
|
||||||
with_fileglob:
|
with_fileglob:
|
||||||
- "templates/grafana/provisioning/datasources/*"
|
- "templates/grafana/provisioning/datasources/*"
|
||||||
|
@ -160,7 +116,7 @@
|
||||||
path: /etc/grafana/dashboards
|
path: /etc/grafana/dashboards
|
||||||
state: directory
|
state: directory
|
||||||
owner: root
|
owner: root
|
||||||
group: grafana
|
group: _grafana
|
||||||
mode: 0755
|
mode: 0755
|
||||||
|
|
||||||
- name: install dashboards
|
- name: install dashboards
|
||||||
|
@ -168,7 +124,7 @@
|
||||||
src: "{{ item }}"
|
src: "{{ item }}"
|
||||||
dest: /etc/grafana/dashboards/
|
dest: /etc/grafana/dashboards/
|
||||||
owner: root
|
owner: root
|
||||||
group: grafana
|
group: _grafana
|
||||||
mode: 0640
|
mode: 0640
|
||||||
with_fileglob:
|
with_fileglob:
|
||||||
- "templates/grafana/dashboards/*"
|
- "templates/grafana/dashboards/*"
|
||||||
|
@ -178,30 +134,33 @@
|
||||||
src: "{{ item }}"
|
src: "{{ item }}"
|
||||||
dest: /etc/grafana/provisioning/dashboards/
|
dest: /etc/grafana/provisioning/dashboards/
|
||||||
owner: root
|
owner: root
|
||||||
group: grafana
|
group: _grafana
|
||||||
mode: 0644
|
mode: 0644
|
||||||
with_fileglob:
|
with_fileglob:
|
||||||
- "templates/grafana/provisioning/dashboards/*"
|
- "templates/grafana/provisioning/dashboards/*"
|
||||||
notify:
|
notify:
|
||||||
- restart grafana
|
- restart grafana
|
||||||
|
|
||||||
|
- name: enable and start monitoring stack
|
||||||
|
service:
|
||||||
|
name: "{{ item }}"
|
||||||
|
enabled: true
|
||||||
|
state: started
|
||||||
|
with_items:
|
||||||
|
- prometheus
|
||||||
|
- alertmanager
|
||||||
|
- grafana
|
||||||
|
|
||||||
handlers:
|
handlers:
|
||||||
- name: reload prometheus
|
- name: reload prometheus
|
||||||
service:
|
shell:
|
||||||
name: prometheus
|
cmd: "kill -SIGHUP $(pgrep prometheus)"
|
||||||
state: reloaded
|
|
||||||
|
|
||||||
- name: reload prometheus-alertmanager
|
- name: reload alertmanager
|
||||||
service:
|
shell:
|
||||||
name: prometheus-alertmanager
|
cmd: "kill -SIGHUP $(pgrep alertmanager)"
|
||||||
state: reloaded
|
|
||||||
|
|
||||||
- name: restart prometheus-alertmanager
|
|
||||||
service:
|
|
||||||
name: prometheus-alertmanager
|
|
||||||
state: restarted
|
|
||||||
|
|
||||||
- name: restart grafana
|
- name: restart grafana
|
||||||
service:
|
service:
|
||||||
name: grafana-server
|
name: grafana
|
||||||
state: restarted
|
state: restarted
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
global:
|
global:
|
||||||
# The smarthost and SMTP sender used for mail notifications.
|
# The smarthost and SMTP sender used for mail notifications.
|
||||||
smtp_smarthost: 'harald.brainpeach.de:587'
|
smtp_smarthost: 'harald.brainpeach.de:587'
|
||||||
smtp_from: 'ffl-eae-adp-mon01@brainpeach.de'
|
smtp_from: 'ffl-eae-adp-jump01@brainpeach.de'
|
||||||
smtp_auth_username: 'ffl-eae-adp-mon01@brainpeach.de'
|
smtp_auth_username: 'ffl-eae-adp-mon01@brainpeach.de'
|
||||||
smtp_auth_password: '{{ lookup("passwordstore", "mailboxes/ffl-eae-adp-mon01@brainpeach.de") }}'
|
smtp_auth_password: '{{ lookup("passwordstore", "mailboxes/ffl-eae-adp-mon01@brainpeach.de") }}'
|
||||||
|
|
||||||
|
|
|
@ -51,6 +51,13 @@ config rule
|
||||||
option proto ospf
|
option proto ospf
|
||||||
option target ACCEPT
|
option target ACCEPT
|
||||||
|
|
||||||
|
config rule
|
||||||
|
option name From-BACKBONE-Allow-Prometheus
|
||||||
|
option src backbone
|
||||||
|
option proto tcp
|
||||||
|
option dest_port 9100
|
||||||
|
option target ACCEPT
|
||||||
|
|
||||||
config rule
|
config rule
|
||||||
option name From-Any-Allow-SSH
|
option name From-Any-Allow-SSH
|
||||||
option src *
|
option src *
|
||||||
|
|
|
@ -63,6 +63,13 @@ config rule
|
||||||
option proto ospf
|
option proto ospf
|
||||||
option target ACCEPT
|
option target ACCEPT
|
||||||
|
|
||||||
|
config rule
|
||||||
|
option name From-BACKBONE-Allow-Prometheus
|
||||||
|
option src backbone
|
||||||
|
option proto tcp
|
||||||
|
option dest_port 9100
|
||||||
|
option target ACCEPT
|
||||||
|
|
||||||
config rule
|
config rule
|
||||||
option name From-Any-Allow-SSH
|
option name From-Any-Allow-SSH
|
||||||
option src *
|
option src *
|
||||||
|
|
|
@ -118,7 +118,7 @@ config wireguard_wg1 'mullvad_fr'
|
||||||
|
|
||||||
config rule
|
config rule
|
||||||
option in 'clients'
|
option in 'clients'
|
||||||
option dest '10.84.1.0/24'
|
option dest '10.0.0.0/8'
|
||||||
option lookup 'main'
|
option lookup 'main'
|
||||||
option priority 49
|
option priority 49
|
||||||
option disabled '0'
|
option disabled '0'
|
||||||
|
|
|
@ -17,14 +17,21 @@ scrape_configs:
|
||||||
scrape_timeout: 5s
|
scrape_timeout: 5s
|
||||||
static_configs:
|
static_configs:
|
||||||
- targets: ['localhost:9090']
|
- targets: ['localhost:9090']
|
||||||
|
- targets: ["{{ hostvars['monitoring01']['ip'] }}:9090"]
|
||||||
|
|
||||||
{% for group in groups.keys() | difference(['all', 'ungrouped']) %}
|
{% for group in ['accesspoints', 'switches', 'gateways', 'server', 'vms'] %}
|
||||||
- job_name: {{ group }}
|
- job_name: {{ group }}
|
||||||
static_configs:
|
static_configs:
|
||||||
{% for host in groups[group] %}
|
{% for host in groups[group] %}
|
||||||
- targets: ["{{ hostvars[host]['monitoring_ip'] | default(hostvars[host]['ip']) }}:9100"]
|
- targets: ["{{ hostvars[host]['monitoring_ip'] | default(hostvars[host]['ip']) }}:9100"]
|
||||||
labels:
|
labels:
|
||||||
instance: "{{ host }}:9100"
|
instance: "{{ host }}:9100"
|
||||||
|
{% if hostvars[host]['site'] is defined %}
|
||||||
|
site: "{{ hostvars[host]['site'] }}"
|
||||||
|
{% endif %}
|
||||||
|
{% if hostvars[host]['location'] is defined %}
|
||||||
|
location: "{{ hostvars[host]['location'] }}"
|
||||||
|
{% endif %}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
@ -33,10 +40,8 @@ scrape_configs:
|
||||||
static_configs:
|
static_configs:
|
||||||
- targets:
|
- targets:
|
||||||
- {{ hostvars['mon-e2e-clients01']['ip'] }}:9115
|
- {{ hostvars['mon-e2e-clients01']['ip'] }}:9115
|
||||||
- {{ hostvars['mon-e2e-wan01']['ip'] }}:9115
|
|
||||||
- {{ hostvars['monitoring01']['ip'] }}:9115
|
|
||||||
|
|
||||||
- job_name: 'e2e_clients_v4'
|
- job_name: 'e2e_adp_clients_v4'
|
||||||
metrics_path: /probe
|
metrics_path: /probe
|
||||||
params:
|
params:
|
||||||
module: [icmp_v4]
|
module: [icmp_v4]
|
||||||
|
@ -51,36 +56,3 @@ scrape_configs:
|
||||||
target_label: instance
|
target_label: instance
|
||||||
- target_label: __address__
|
- target_label: __address__
|
||||||
replacement: {{ hostvars['mon-e2e-clients01']['ip'] }}:9115
|
replacement: {{ hostvars['mon-e2e-clients01']['ip'] }}:9115
|
||||||
|
|
||||||
- job_name: 'e2e_default_v4'
|
|
||||||
metrics_path: /probe
|
|
||||||
params:
|
|
||||||
module: [icmp_v4]
|
|
||||||
static_configs:
|
|
||||||
- targets:
|
|
||||||
- 192.168.0.1 # gigacube
|
|
||||||
- freifunk-leipzig.de
|
|
||||||
- harald.brainpeach.de
|
|
||||||
relabel_configs:
|
|
||||||
- source_labels: [__address__]
|
|
||||||
target_label: __param_target
|
|
||||||
- source_labels: [__param_target]
|
|
||||||
target_label: instance
|
|
||||||
- target_label: __address__
|
|
||||||
replacement: {{ hostvars['monitoring01']['ip'] }}:9115
|
|
||||||
|
|
||||||
- job_name: 'e2e_wan_v4'
|
|
||||||
metrics_path: /probe
|
|
||||||
params:
|
|
||||||
module: [icmp_v4]
|
|
||||||
static_configs:
|
|
||||||
- targets:
|
|
||||||
- freifunk-leipzig.de
|
|
||||||
- harald.brainpeach.de
|
|
||||||
relabel_configs:
|
|
||||||
- source_labels: [__address__]
|
|
||||||
target_label: __param_target
|
|
||||||
- source_labels: [__param_target]
|
|
||||||
target_label: instance
|
|
||||||
- target_label: __address__
|
|
||||||
replacement: {{ hostvars['mon-e2e-wan01']['ip'] }}:9115
|
|
||||||
|
|
Reference in New Issue