state(monitoring): update encrypted state
This commit is contained in:
parent
b1b408ff0e
commit
0901dd5f61
2 changed files with 28 additions and 0 deletions
|
|
@ -30,6 +30,26 @@ module "nfs_prometheus_backup_host" {
|
||||||
nfs_path = "/srv/nfs/prometheus-backup"
|
nfs_path = "/srv/nfs/prometheus-backup"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
resource "kubernetes_persistent_volume_claim" "alertmanager_pvc" {
|
||||||
|
wait_until_bound = false
|
||||||
|
metadata {
|
||||||
|
name = "alertmanager-pvc"
|
||||||
|
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||||
|
annotations = {
|
||||||
|
"resize.topolvm.io/threshold" = "80%"
|
||||||
|
"resize.topolvm.io/increase" = "100%"
|
||||||
|
"resize.topolvm.io/storage_limit" = "10Gi"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
spec {
|
||||||
|
access_modes = ["ReadWriteOnce"]
|
||||||
|
storage_class_name = "proxmox-lvm-encrypted"
|
||||||
|
resources {
|
||||||
|
requests = { storage = "2Gi" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
resource "helm_release" "prometheus" {
|
resource "helm_release" "prometheus" {
|
||||||
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||||
create_namespace = true
|
create_namespace = true
|
||||||
|
|
|
||||||
|
|
@ -1717,6 +1717,14 @@ serverFiles:
|
||||||
severity: critical
|
severity: critical
|
||||||
annotations:
|
annotations:
|
||||||
summary: ">5 pods stuck in ContainerCreating with sudden increase — possible NFS or storage outage"
|
summary: ">5 pods stuck in ContainerCreating with sudden increase — possible NFS or storage outage"
|
||||||
|
- alert: NFSHighRPCRetransmissions
|
||||||
|
expr: |
|
||||||
|
sum by (instance) (rate(node_nfs_rpc_retransmissions_total[5m])) > 5
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "Node {{ $labels.instance }}: NFS RPC retransmission rate {{ $value | printf \"%.1f\" }}/s — NFS server (192.168.1.127) may be degraded or unreachable"
|
||||||
- name: "Application Health"
|
- name: "Application Health"
|
||||||
rules:
|
rules:
|
||||||
- alert: MailServerDown
|
- alert: MailServerDown
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue