monitoring: alert on node reboots

This commit is contained in:
Gregor Michels 2022-09-14 02:16:15 +02:00
parent 79d46e3100
commit 6623cc0e09
1 changed files with 9 additions and 0 deletions

View File

@ -10,3 +10,12 @@ groups:
annotations:
summary: Prometheus target missing (instance {{ $labels.instance }})
description: "A Prometheus target has disappeared. An exporter might be crashed.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: NodeRebooted
expr: changes(node_boot_time_seconds[2h]) > 0
for: 0m
labels:
severity: critical
annotations:
summary: A node rebooted in the last 2 hours (instance {{ $labels.instance }})
description: "The uptime of a node changed in the last two hours. VALUE = {{ $value }}\n LABELS = {{ $labels }}"