state(monitoring): update encrypted state
This commit is contained in:
parent
b1b408ff0e
commit
0901dd5f61
2 changed files with 28 additions and 0 deletions
|
|
@ -30,6 +30,26 @@ module "nfs_prometheus_backup_host" {
|
|||
nfs_path = "/srv/nfs/prometheus-backup"
|
||||
}
|
||||
|
||||
resource "kubernetes_persistent_volume_claim" "alertmanager_pvc" {
|
||||
wait_until_bound = false
|
||||
metadata {
|
||||
name = "alertmanager-pvc"
|
||||
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||
annotations = {
|
||||
"resize.topolvm.io/threshold" = "80%"
|
||||
"resize.topolvm.io/increase" = "100%"
|
||||
"resize.topolvm.io/storage_limit" = "10Gi"
|
||||
}
|
||||
}
|
||||
spec {
|
||||
access_modes = ["ReadWriteOnce"]
|
||||
storage_class_name = "proxmox-lvm-encrypted"
|
||||
resources {
|
||||
requests = { storage = "2Gi" }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "helm_release" "prometheus" {
|
||||
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||
create_namespace = true
|
||||
|
|
|
|||
|
|
@ -1717,6 +1717,14 @@ serverFiles:
|
|||
severity: critical
|
||||
annotations:
|
||||
summary: ">5 pods stuck in ContainerCreating with sudden increase — possible NFS or storage outage"
|
||||
- alert: NFSHighRPCRetransmissions
|
||||
expr: |
|
||||
sum by (instance) (rate(node_nfs_rpc_retransmissions_total[5m])) > 5
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Node {{ $labels.instance }}: NFS RPC retransmission rate {{ $value | printf \"%.1f\" }}/s — NFS server (192.168.1.127) may be degraded or unreachable"
|
||||
- name: "Application Health"
|
||||
rules:
|
||||
- alert: MailServerDown
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue