| Rule |
State |
Error |
Last Evaluation |
Evaluation Time |
| alert: HighCPULoad
expr: 100
- (avg by (instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100)
> 85
for: 5m
labels:
severity: warning
annotations:
description: 'CPU usage is {{ printf "%.1f" $value }}% (threshold: 85%)'
summary: High CPU load on {{ $labels.instance }}
|
ok
|
|
15.343s ago
|
663.4us |
| alert: CriticalCPULoad
expr: 100
- (avg by (instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100)
> 95
for: 2m
labels:
severity: critical
annotations:
description: 'CPU usage is {{ printf "%.1f" $value }}% (threshold: 95%)'
summary: Critical CPU load on {{ $labels.instance }}
|
ok
|
|
15.342s ago
|
383.6us |
| alert: HighMemoryUsage
expr: (1
- (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 85
for: 5m
labels:
severity: warning
annotations:
description: 'Memory usage is {{ printf "%.1f" $value }}% (threshold: 85%)'
summary: High memory usage on {{ $labels.instance }}
|
ok
|
|
15.342s ago
|
314.9us |
| alert: CriticalMemoryUsage
expr: (1
- (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 95
for: 2m
labels:
severity: critical
annotations:
description: Memory usage is {{ printf "%.1f" $value }}%
summary: Critical memory usage on {{ $labels.instance }}
|
ok
|
|
15.342s ago
|
295.7us |
| alert: DiskSpaceWarning
expr: (1
- (node_filesystem_avail_bytes{fstype!~"tmpfs|fuse.lxcfs"} / node_filesystem_size_bytes{fstype!~"tmpfs|fuse.lxcfs"}))
* 100 > 80
for: 5m
labels:
severity: warning
annotations:
description: Disk {{ $labels.mountpoint }} is {{ printf "%.1f" $value }}%
full
summary: Disk space warning on {{ $labels.instance }}
|
ok
|
|
15.342s ago
|
522us |
| alert: DiskSpaceCritical
expr: (1
- (node_filesystem_avail_bytes{fstype!~"tmpfs|fuse.lxcfs"} / node_filesystem_size_bytes{fstype!~"tmpfs|fuse.lxcfs"}))
* 100 > 90
for: 2m
labels:
severity: critical
annotations:
description: Disk {{ $labels.mountpoint }} is {{ printf "%.1f" $value }}%
full
summary: Critical disk space on {{ $labels.instance }}
|
ok
|
|
15.341s ago
|
514.1us |
| alert: DiskWillFillIn24h
expr: predict_linear(node_filesystem_avail_bytes{fstype!~"tmpfs|fuse.lxcfs"}[6h],
24 * 3600) < 0
for: 30m
labels:
severity: warning
annotations:
description: '{{ $labels.mountpoint }} is predicted to be full within 24 hours'
summary: Disk will fill within 24h on {{ $labels.instance }}
|
ok
|
|
15.341s ago
|
1.134ms |
| alert: HighLoadAverage
expr: node_load15
/ on (instance) count by (instance) (node_cpu_seconds_total{mode="idle"})
> 0.8
for: 10m
labels:
severity: warning
annotations:
description: 15-minute load average per CPU is {{ printf "%.2f" $value }}
summary: High load average on {{ $labels.instance }}
|
ok
|
|
15.34s ago
|
275.3us |
| alert: ServerDown
expr: up == 0
for: 1m
labels:
severity: critical
annotations:
description: '{{ $labels.instance }} ({{ $labels.job }}) has been unreachable for
more than 1 minute'
summary: Instance {{ $labels.instance }} is down
|
ok
|
|
15.34s ago
|
409.9us |
| alert: UnexpectedReboot
expr: node_time_seconds
- node_boot_time_seconds < 300
labels:
severity: warning
annotations:
description: Server has been up for less than 5 minutes — possible unexpected reboot
summary: 'Server rebooted: {{ $labels.instance }}'
|
ok
|
|
15.339s ago
|
229us |
| alert: SystemdServiceFailed
expr: node_systemd_unit_state{state="failed"}
== 1
for: 2m
labels:
severity: warning
annotations:
description: Service {{ $labels.name }} is in failed state
summary: Systemd service failed on {{ $labels.instance }}
|
ok
|
|
15.339s ago
|
2.85ms |
|
22.569s ago |
1.011ms |