infra/stacks/descheduler/values.yaml

# Source from https://github.com/kubernetes-sigs/descheduler/blob/master/charts/descheduler/values.yaml

# Default values for descheduler.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.

# CronJob or Deployment
kind: CronJob

image:
  repository: registry.k8s.io/descheduler/descheduler
  # Overrides the image tag whose default is the chart version
  tag: ""
  pullPolicy: IfNotPresent

imagePullSecrets:
#   - name: container-registry-secret

resources:
  requests:
    cpu: 500m
    memory: 256Mi
  limits:
    memory: 256Mi

ports:
  - containerPort: 10258
    protocol: TCP

securityContext:
  allowPrivilegeEscalation: false
  capabilities:
    drop:
      - ALL
  privileged: false
  readOnlyRootFilesystem: true
  runAsNonRoot: true
  runAsUser: 1000

# podSecurityContext -- [Security context for pod](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/)
podSecurityContext:
  {}
  # fsGroup: 1000

nameOverride: ""
fullnameOverride: ""

# -- Override the deployment namespace; defaults to .Release.Namespace
namespaceOverride: ""

# labels that'll be applied to all resources
commonLabels: {}

cronJobApiVersion: "batch/v1"
schedule: "0 * * * *"  # hourly (was */5; 2026-06-12 etcd-load-reduction — fewer list/evict cycles, rebalancing isn't time-critical)
suspend: false
# startingDeadlineSeconds: 200
successfulJobsHistoryLimit: 10
# failedJobsHistoryLimit: 1
# ttlSecondsAfterFinished 600
# timeZone: Etc/UTC

# Required when running as a Deployment
deschedulingInterval: 5m

# Specifies the replica count for Deployment
# Set leaderElection if you want to use more than 1 replica
# Set affinity.podAntiAffinity rule if you want to schedule onto a node
# only if that node is in the same zone as at least one already-running descheduler
replicas: 1

# Specifies whether Leader Election resources should be created
# Required when running as a Deployment
# NOTE: Leader election can't be activated if DryRun enabled
leaderElection: {}
#  enabled: true
#  leaseDuration: 15s
#  renewDeadline: 10s
#  retryPeriod: 2s
#  resourceLock: "leases"
#  resourceName: "descheduler"
#  resourceNamespace: "kube-system"

command:
  - "/bin/descheduler"

cmdOptions:
  v: 3

# Recommended to use the latest Policy API version supported by the Descheduler app version
deschedulerPolicyAPIVersion: "descheduler/v1alpha2"

# deschedulerPolicy contains the policies the descheduler will execute.
# To use policies stored in an existing configMap use:
# NOTE: The name of the cm should comply to {{ template "descheduler.fullname" . }}
# deschedulerPolicy: {}
deschedulerPolicy:
  # nodeSelector: "key1=value1,key2=value2"
  # maxNoOfPodsToEvictPerNode: 10
  maxNoOfPodsToEvictTotal: 10
  # maxNoOfPodsToEvictPerNamespace: 10
  # ignorePvcPods: true
  # evictLocalStoragePods: true
  # evictDaemonSetPods: true
  # tracing:
  #   collectorEndpoint: otel-collector.observability.svc.cluster.local:4317
  #   transportCert: ""
  #   serviceName: ""
  #   serviceNamespace: ""
  #   sampleRate: 1.0
  #   fallbackToNoOpProviderOnError: true

  metricsCollector:
    enabled: true
  profiles:
    - name: default
      pluginConfig:
        - name: DefaultEvictor
          args:
            ignorePvcPods: true
            evictLocalStoragePods: true
        - name: RemoveDuplicates
        - name: RemovePodsHavingTooManyRestarts
          args:
            podRestartThreshold: 2
            includingInitContainers: true
            states:
              - CrashLoopBackOff
        - name: RemovePodsViolatingNodeAffinity
          args:
            nodeAffinityType:
              - requiredDuringSchedulingIgnoredDuringExecution
        - name: RemovePodsViolatingNodeTaints
        - name: RemovePodsViolatingInterPodAntiAffinity
        - name: RemovePodsViolatingTopologySpreadConstraint
        - name: LowNodeUtilization
          args:
            evictableNamespaces:
              exclude:
                - "dbaas" # let's not meddle with the dbs
            thresholds:
              cpu: 50
              memory: 50
              # pods: 20
            targetThresholds:
              cpu: 80
              memory: 80
              # pods: 30
            metricsUtilization:
              metricsServer: true
        - name: PodLifeTime
          args:
            maxPodLifeTimeSeconds: 604800
            namespaces:
              exclude:
                - "dbaas" # let's not meddle with the dbs
                - "kube-system"
                - "calico-system"
                - "calico-apiserver"
                - "metallb-system"
                - "monitoring"
                - "authentik"
        - name: "RemoveFailedPods"
          args:
            reasons:
              - "CrashLoopBackOff"
              - "Error"
              - "ContainerStatusUnknown"
              - "ImagePullBackOff"
            # exitCodes:
            #   - 1
            includingInitContainers: true
            # minPodLifetimeSeconds: 0
      plugins:
        balance:
          enabled:
            - RemoveDuplicates
            - RemovePodsViolatingTopologySpreadConstraint
            - LowNodeUtilization
        deschedule:
          enabled:
            - RemovePodsHavingTooManyRestarts
            - RemovePodsViolatingNodeTaints
            - RemovePodsViolatingNodeAffinity
            - RemovePodsViolatingInterPodAntiAffinity
            - PodLifeTime
            - RemoveFailedPods
    - name: idrac-restart
      pluginConfig:
        - name: DefaultEvictor
          args:
            ignorePvcPods: true
            evictLocalStoragePods: true
        - name: PodLifeTime
          args:
            maxPodLifeTimeSeconds: 21600
            namespaces:
              include:
                - "monitoring"
            labelSelector:
              matchLabels:
                app: idrac-redfish-exporter
      plugins:
        deschedule:
          enabled:
            - PodLifeTime

priorityClassName: system-cluster-critical

nodeSelector: {}
#  foo: bar

affinity: {}
# nodeAffinity:
#   requiredDuringSchedulingIgnoredDuringExecution:
#     nodeSelectorTerms:
#     - matchExpressions:
#       - key: kubernetes.io/e2e-az-name
#         operator: In
#         values:
#         - e2e-az1
#         - e2e-az2
#  podAntiAffinity:
#    requiredDuringSchedulingIgnoredDuringExecution:
#      - labelSelector:
#          matchExpressions:
#            - key: app.kubernetes.io/name
#              operator: In
#              values:
#                - descheduler
#        topologyKey: "kubernetes.io/hostname"
topologySpreadConstraints: []
# - maxSkew: 1
#   topologyKey: kubernetes.io/hostname
#   whenUnsatisfiable: DoNotSchedule
#   labelSelector:
#     matchLabels:
#       app.kubernetes.io/name: descheduler
tolerations: []
# - key: 'management'
#   operator: 'Equal'
#   value: 'tool'
#   effect: 'NoSchedule'

rbac:
  # Specifies whether RBAC resources should be created
  create: true

serviceAccount:
  # Specifies whether a ServiceAccount should be created
  create: false
  # The name of the ServiceAccount to use.
  # If not set and create is true, a name is generated using the fullname template
  name: "descheduler-sa"
  # Specifies custom annotations for the serviceAccount
  annotations: {}

podAnnotations: {}

podLabels: {}

dnsConfig: {}

livenessProbe:
  failureThreshold: 3
  httpGet:
    path: /healthz
    port: 10258
    scheme: HTTPS
  initialDelaySeconds: 3
  periodSeconds: 10

service:
  enabled: false
  # @param service.ipFamilyPolicy [string], support SingleStack, PreferDualStack and RequireDualStack
  #
  ipFamilyPolicy: ""
  # @param service.ipFamilies [array] List of IP families (e.g. IPv4, IPv6) assigned to the service.
  # Ref: https://kubernetes.io/docs/concepts/services-networking/dual-stack/
  # E.g.
  # ipFamilies:
  #   - IPv6
  #   - IPv4
  ipFamilies: []

serviceMonitor:
  enabled: false
  # The namespace where Prometheus expects to find service monitors.
  # namespace: ""
  # Add custom labels to the ServiceMonitor resource
  additionalLabels:
    {}
    # prometheus: kube-prometheus-stack
  interval: ""
  # honorLabels: true
  insecureSkipVerify: true
  serverName: null
  metricRelabelings:
    []
    # - action: keep
    #   regex: 'descheduler_(build_info|pods_evicted)'
    #   sourceLabels: [__name__]
  relabelings:
    []
    # - sourceLabels: [__meta_kubernetes_pod_node_name]
    #   separator: ;
    #   regex: ^(.*)$
    #   targetLabel: nodename
    #   replacement: $1
    #   action: replace
fix: restore tree dropped by 6d224861; land stem95su gdrive-sync (10m) [ci skip] 6d224861 came from a --no-checkout worktree whose empty index made the commit drop every file except two. This restores 05b50d2b's full tree and correctly adds stacks/stem95su/gdrive-sync.tf + the service-catalog stem95su entry. Forward-only (parent=6d224861, no force-push); [ci skip] since the live infra was never applied from the broken commit. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> 2026-06-09 08:45:33 +00:00			`# Source from https://github.com/kubernetes-sigs/descheduler/blob/master/charts/descheduler/values.yaml`

			`# Default values for descheduler.`
			`# This is a YAML-formatted file.`
			`# Declare variables to be passed into your templates.`

			`# CronJob or Deployment`
			`kind: CronJob`

			`image:`
			`repository: registry.k8s.io/descheduler/descheduler`
			`# Overrides the image tag whose default is the chart version`
			`tag: ""`
			`pullPolicy: IfNotPresent`

			`imagePullSecrets:`
			`# - name: container-registry-secret`

			`resources:`
			`requests:`
			`cpu: 500m`
			`memory: 256Mi`
			`limits:`
			`memory: 256Mi`

			`ports:`
			`- containerPort: 10258`
			`protocol: TCP`

			`securityContext:`
			`allowPrivilegeEscalation: false`
			`capabilities:`
			`drop:`
			`- ALL`
			`privileged: false`
			`readOnlyRootFilesystem: true`
			`runAsNonRoot: true`
			`runAsUser: 1000`

			`# podSecurityContext -- [Security context for pod](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/)`
			`podSecurityContext:`
			`{}`
			`# fsGroup: 1000`

			`nameOverride: ""`
			`fullnameOverride: ""`

			`# -- Override the deployment namespace; defaults to .Release.Namespace`
			`namespaceOverride: ""`

			`# labels that'll be applied to all resources`
			`commonLabels: {}`

			`cronJobApiVersion: "batch/v1"`
etcd-load-reduction: remove VPA/Goldilocks, disable kyverno reporting, descheduler hourly The control-plane flap (etcd lease-renewal timeouts) recurred. Rather than move etcd to SSD (code-oflt, deferred again), the chosen direction is to REDUCE etcd load enough that the leader-election-timeout band-aid (renew 10s->30s) becomes removable. These are the big, clean cuts: 1. Remove VPA/Goldilocks (stacks/vpa emptied). All 349 VPAs ran updateMode=Off (no auto-right-sizing) yet cost ~800 etcd objects + continuous recommender writes + a pod-creation admission webhook, purely to feed a dashboard. krr (Dockerized, on-demand) replaces it. Reverses the re-add after memory 2431. 2. Disable kyverno reporting (admission/aggregate/background). policyReports were already off, so the pipeline generated ephemeralreports + an hourly all-resource etcd re-scan for NO user-facing output. Admission enforcement (deny-* policies) and Keel mutation are unaffected; violations surface via Loki->Slack. 3. descheduler */5 -> hourly (fewer list/evict cycles; rebalancing isn't urgent). Deferred (poor ROI / unsafe as planned): ESO refreshInterval 15m->1h is a ~20-stack sprawl for ~0.1 writes/s; keel background=false is invalid for a mutate-existing policy and its churn is apply-time not steady-state. Both filed as follow-up beads. Post-apply: delete the chart-orphaned VPA CRDs to cascade-clean leftover CRs. Then measure etcd apply-latency and revert the timeouts. Docs updated (VPA/Goldilocks -> krr). See memory 5402-5407. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> 2026-06-12 19:41:22 +00:00			`schedule: "0 * * * " # hourly (was /5; 2026-06-12 etcd-load-reduction — fewer list/evict cycles, rebalancing isn't time-critical)`
fix: restore tree dropped by 6d224861; land stem95su gdrive-sync (10m) [ci skip] 6d224861 came from a --no-checkout worktree whose empty index made the commit drop every file except two. This restores 05b50d2b's full tree and correctly adds stacks/stem95su/gdrive-sync.tf + the service-catalog stem95su entry. Forward-only (parent=6d224861, no force-push); [ci skip] since the live infra was never applied from the broken commit. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> 2026-06-09 08:45:33 +00:00			`suspend: false`
			`# startingDeadlineSeconds: 200`
			`successfulJobsHistoryLimit: 10`
			`# failedJobsHistoryLimit: 1`
			`# ttlSecondsAfterFinished 600`
			`# timeZone: Etc/UTC`

			`# Required when running as a Deployment`
			`deschedulingInterval: 5m`

			`# Specifies the replica count for Deployment`
			`# Set leaderElection if you want to use more than 1 replica`
			`# Set affinity.podAntiAffinity rule if you want to schedule onto a node`
			`# only if that node is in the same zone as at least one already-running descheduler`
			`replicas: 1`

			`# Specifies whether Leader Election resources should be created`
			`# Required when running as a Deployment`
			`# NOTE: Leader election can't be activated if DryRun enabled`
			`leaderElection: {}`
			`# enabled: true`
			`# leaseDuration: 15s`
			`# renewDeadline: 10s`
			`# retryPeriod: 2s`
			`# resourceLock: "leases"`
			`# resourceName: "descheduler"`
			`# resourceNamespace: "kube-system"`

			`command:`
			`- "/bin/descheduler"`

			`cmdOptions:`
			`v: 3`

			`# Recommended to use the latest Policy API version supported by the Descheduler app version`
			`deschedulerPolicyAPIVersion: "descheduler/v1alpha2"`

			`# deschedulerPolicy contains the policies the descheduler will execute.`
			`# To use policies stored in an existing configMap use:`
			`# NOTE: The name of the cm should comply to {{ template "descheduler.fullname" . }}`
			`# deschedulerPolicy: {}`
			`deschedulerPolicy:`
			`# nodeSelector: "key1=value1,key2=value2"`
			`# maxNoOfPodsToEvictPerNode: 10`
			`maxNoOfPodsToEvictTotal: 10`
			`# maxNoOfPodsToEvictPerNamespace: 10`
			`# ignorePvcPods: true`
			`# evictLocalStoragePods: true`
			`# evictDaemonSetPods: true`
			`# tracing:`
			`# collectorEndpoint: otel-collector.observability.svc.cluster.local:4317`
			`# transportCert: ""`
			`# serviceName: ""`
			`# serviceNamespace: ""`
			`# sampleRate: 1.0`
			`# fallbackToNoOpProviderOnError: true`

			`metricsCollector:`
			`enabled: true`
			`profiles:`
			`- name: default`
			`pluginConfig:`
			`- name: DefaultEvictor`
			`args:`
			`ignorePvcPods: true`
			`evictLocalStoragePods: true`
			`- name: RemoveDuplicates`
			`- name: RemovePodsHavingTooManyRestarts`
			`args:`
			`podRestartThreshold: 2`
			`includingInitContainers: true`
			`states:`
			`- CrashLoopBackOff`
			`- name: RemovePodsViolatingNodeAffinity`
			`args:`
			`nodeAffinityType:`
			`- requiredDuringSchedulingIgnoredDuringExecution`
			`- name: RemovePodsViolatingNodeTaints`
			`- name: RemovePodsViolatingInterPodAntiAffinity`
			`- name: RemovePodsViolatingTopologySpreadConstraint`
			`- name: LowNodeUtilization`
			`args:`
			`evictableNamespaces:`
			`exclude:`
			`- "dbaas" # let's not meddle with the dbs`
			`thresholds:`
			`cpu: 50`
			`memory: 50`
			`# pods: 20`
			`targetThresholds:`
			`cpu: 80`
			`memory: 80`
			`# pods: 30`
			`metricsUtilization:`
			`metricsServer: true`
			`- name: PodLifeTime`
			`args:`
			`maxPodLifeTimeSeconds: 604800`
			`namespaces:`
			`exclude:`
			`- "dbaas" # let's not meddle with the dbs`
			`- "kube-system"`
			`- "calico-system"`
			`- "calico-apiserver"`
			`- "metallb-system"`
			`- "monitoring"`
			`- "authentik"`
			`- name: "RemoveFailedPods"`
			`args:`
			`reasons:`
			`- "CrashLoopBackOff"`
			`- "Error"`
			`- "ContainerStatusUnknown"`
			`- "ImagePullBackOff"`
			`# exitCodes:`
			`# - 1`
			`includingInitContainers: true`
			`# minPodLifetimeSeconds: 0`
			`plugins:`
			`balance:`
			`enabled:`
			`- RemoveDuplicates`
			`- RemovePodsViolatingTopologySpreadConstraint`
			`- LowNodeUtilization`
			`deschedule:`
			`enabled:`
			`- RemovePodsHavingTooManyRestarts`
			`- RemovePodsViolatingNodeTaints`
			`- RemovePodsViolatingNodeAffinity`
			`- RemovePodsViolatingInterPodAntiAffinity`
			`- PodLifeTime`
			`- RemoveFailedPods`
			`- name: idrac-restart`
			`pluginConfig:`
			`- name: DefaultEvictor`
			`args:`
			`ignorePvcPods: true`
			`evictLocalStoragePods: true`
			`- name: PodLifeTime`
			`args:`
			`maxPodLifeTimeSeconds: 21600`
			`namespaces:`
			`include:`
			`- "monitoring"`
			`labelSelector:`
			`matchLabels:`
			`app: idrac-redfish-exporter`
			`plugins:`
			`deschedule:`
			`enabled:`
			`- PodLifeTime`

			`priorityClassName: system-cluster-critical`

			`nodeSelector: {}`
			`# foo: bar`

			`affinity: {}`
			`# nodeAffinity:`
			`# requiredDuringSchedulingIgnoredDuringExecution:`
			`# nodeSelectorTerms:`
			`# - matchExpressions:`
			`# - key: kubernetes.io/e2e-az-name`
			`# operator: In`
			`# values:`
			`# - e2e-az1`
			`# - e2e-az2`
			`# podAntiAffinity:`
			`# requiredDuringSchedulingIgnoredDuringExecution:`
			`# - labelSelector:`
			`# matchExpressions:`
			`# - key: app.kubernetes.io/name`
			`# operator: In`
			`# values:`
			`# - descheduler`
			`# topologyKey: "kubernetes.io/hostname"`
			`topologySpreadConstraints: []`
			`# - maxSkew: 1`
			`# topologyKey: kubernetes.io/hostname`
			`# whenUnsatisfiable: DoNotSchedule`
			`# labelSelector:`
			`# matchLabels:`
			`# app.kubernetes.io/name: descheduler`
			`tolerations: []`
			`# - key: 'management'`
			`# operator: 'Equal'`
			`# value: 'tool'`
			`# effect: 'NoSchedule'`

			`rbac:`
			`# Specifies whether RBAC resources should be created`
			`create: true`

			`serviceAccount:`
			`# Specifies whether a ServiceAccount should be created`
			`create: false`
			`# The name of the ServiceAccount to use.`
			`# If not set and create is true, a name is generated using the fullname template`
			`name: "descheduler-sa"`
			`# Specifies custom annotations for the serviceAccount`
			`annotations: {}`

			`podAnnotations: {}`

			`podLabels: {}`

			`dnsConfig: {}`

			`livenessProbe:`
			`failureThreshold: 3`
			`httpGet:`
			`path: /healthz`
			`port: 10258`
			`scheme: HTTPS`
			`initialDelaySeconds: 3`
			`periodSeconds: 10`

			`service:`
			`enabled: false`
			`# @param service.ipFamilyPolicy [string], support SingleStack, PreferDualStack and RequireDualStack`
			`#`
			`ipFamilyPolicy: ""`
			`# @param service.ipFamilies [array] List of IP families (e.g. IPv4, IPv6) assigned to the service.`
			`# Ref: https://kubernetes.io/docs/concepts/services-networking/dual-stack/`
			`# E.g.`
			`# ipFamilies:`
			`# - IPv6`
			`# - IPv4`
			`ipFamilies: []`

			`serviceMonitor:`
			`enabled: false`
			`# The namespace where Prometheus expects to find service monitors.`
			`# namespace: ""`
			`# Add custom labels to the ServiceMonitor resource`
			`additionalLabels:`
			`{}`
			`# prometheus: kube-prometheus-stack`
			`interval: ""`
			`# honorLabels: true`
			`insecureSkipVerify: true`
			`serverName: null`
			`metricRelabelings:`
			`[]`
			`# - action: keep`
			`# regex: 'descheduler_(build_info\|pods_evicted)'`
			`# sourceLabels: [__name__]`
			`relabelings:`
			`[]`
			`# - sourceLabels: [__meta_kubernetes_pod_node_name]`
			`# separator: ;`
			`# regex: ^(.*)$`
			`# targetLabel: nodename`
			`# replacement: $1`
			`# action: replace`