31 lines
1.2 KiB
YAML
31 lines
1.2 KiB
YAML
groups:
|
|
- name: Basic
|
|
rules:
|
|
# from https://awesome-prometheus-alerts.grep.to/rules.html#rule-prometheus-self-monitoring-1-2
|
|
- alert: PrometheusTargetMissing
|
|
expr: up == 0
|
|
for: 0m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: Prometheus target missing (instance {{ $labels.instance }})
|
|
description: "A Prometheus target has disappeared. An exporter might be crashed.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
|
|
- alert: NodeRebooted
|
|
expr: changes(node_boot_time_seconds[2h]) > 0
|
|
for: 0m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: A node rebooted in the last 2 hours (instance {{ $labels.instance }})
|
|
description: "The uptime of a node changed in the last two hours. VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
|
|
- alert: PublicWifiUpstreamLost
|
|
expr: sum(probe_success{job="e2e_adp_clients_v4"}) == 0
|
|
for: 0m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: The public wifi lost its ability to route into the internet
|
|
description: "check the vpn connection"
|