This repository has been archived on 2024-05-11. You can view files and clone it, but cannot push or open issues or pull requests.
eae-am-deutschen-platz/files/alerting_rules.yml

31 lines
1.2 KiB
YAML

groups:
- name: Basic
rules:
# from https://awesome-prometheus-alerts.grep.to/rules.html#rule-prometheus-self-monitoring-1-2
- alert: PrometheusTargetMissing
expr: up == 0
for: 0m
labels:
severity: critical
annotations:
summary: Prometheus target missing (instance {{ $labels.instance }})
description: "A Prometheus target has disappeared. An exporter might be crashed.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: NodeRebooted
expr: changes(node_boot_time_seconds[2h]) > 0
for: 0m
labels:
severity: critical
annotations:
summary: A node rebooted in the last 2 hours (instance {{ $labels.instance }})
description: "The uptime of a node changed in the last two hours. VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: PublicWifiUpstreamLost
expr: sum(probe_success{job="e2e_adp_clients_v4"}) == 0
for: 0m
labels:
severity: critical
annotations:
summary: The public wifi lost its ability to route into the internet
description: "check the vpn connection"