monitoring: alert on node reboots
This commit is contained in:
parent
79d46e3100
commit
6623cc0e09
|
@ -10,3 +10,12 @@ groups:
|
|||
annotations:
|
||||
summary: Prometheus target missing (instance {{ $labels.instance }})
|
||||
description: "A Prometheus target has disappeared. An exporter might be crashed.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||
|
||||
- alert: NodeRebooted
|
||||
expr: changes(node_boot_time_seconds[2h]) > 0
|
||||
for: 0m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: A node rebooted in the last 2 hours (instance {{ $labels.instance }})
|
||||
description: "The uptime of a node changed in the last two hours. VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||
|
|
Reference in New Issue