Files
consultoria-as 3792e4053c feat(monitoring): add Alertmanager with alert rules
- docker-compose.monitoring.yml: added alertmanager service (port 9093)
- prometheus.yml: alerting config + rule_files entry
- alerts.yml: 5 alert rules (PostgreSQLDown, RedisDown, HighDiskUsage,
  HighMemoryUsage, NodeDown)
- alertmanager.yml: SMTP + webhook receiver, inhibit rules
2026-04-29 07:10:22 +00:00

48 lines
1.4 KiB
YAML

groups:
- name: nexus-alerts
rules:
- alert: PostgreSQLDown
expr: pg_up == 0
for: 1m
labels:
severity: critical
annotations:
summary: "PostgreSQL is down"
description: "PostgreSQL has been down for more than 1 minute."
- alert: RedisDown
expr: redis_up == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Redis is down"
description: "Redis has been down for more than 1 minute."
- alert: HighDiskUsage
expr: (node_filesystem_avail_bytes / node_filesystem_size_bytes) * 100 < 10
for: 5m
labels:
severity: warning
annotations:
summary: "Disk usage is high"
description: "Disk usage is above 90% on {{ $labels.device }}."
- alert: HighMemoryUsage
expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100 > 85
for: 5m
labels:
severity: warning
annotations:
summary: "Memory usage is high"
description: "Memory usage is above 85%."
- alert: NodeDown
expr: up{job="node"} == 0
for: 2m
labels:
severity: critical
annotations:
summary: "Node exporter is down"
description: "Node exporter has been down for more than 2 minutes."