resilience improvements: MySQL anti-affinity comment, descheduler 5min, prometheus termination 60s

- MySQL InnoDB: keep required anti-affinity but document why (2/3 members OK during node loss)
- Descheduler: increase frequency from hourly to every 5 min for faster rebalancing
- Prometheus: set terminationGracePeriodSeconds=60 to prevent drain timeout [ci skip]
This commit is contained in:
Viktor Barzin 2026-04-06 00:25:49 +03:00
parent 3eb15149e1
commit c8be07c403
3 changed files with 6 additions and 1 deletions

View file

@ -237,6 +237,10 @@ resource "helm_release" "mysql_cluster" {
}
}
podAntiAffinity = {
# Required anti-affinity: MySQL pods MUST be on different nodes.
# During node loss, one pod will be Pending this is acceptable because
# InnoDB Cluster operates with 2/3 members (OK_NO_TOLERANCE).
# The descheduler (every 5 min) handles violations if any occur.
requiredDuringSchedulingIgnoredDuringExecution = [{
labelSelector = {
matchLabels = {

View file

@ -52,7 +52,7 @@ namespaceOverride: ""
commonLabels: {}
cronJobApiVersion: "batch/v1"
schedule: "0 * * * *"
schedule: "*/5 * * * *"
suspend: false
# startingDeadlineSeconds: 200
successfulJobsHistoryLimit: 10

View file

@ -169,6 +169,7 @@ server:
memory: 4Gi
livenessProbeInitialDelay: 300
readinessProbeInitialDelay: 60
terminationGracePeriodSeconds: 60
strategy:
type: Recreate
baseURL: "https://prometheus.viktorbarzin.me"