feat: auto-cleanup failed/evicted pods via Kyverno ClusterCleanupPolicy
Add cleanup-failed-pods policy that runs hourly (at :15) to delete all pods in Failed phase cluster-wide. Prevents stale evicted and failed CronJob pods from accumulating and creating healthcheck noise. Also adds ClusterRole + ClusterRoleBinding to grant Kyverno cleanup controller permission to delete Pods (not included by default). [ci skip] Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
f726d1c3fd
commit
cf578516e9
1 changed files with 77 additions and 0 deletions
|
|
@ -948,3 +948,80 @@ resource "kubernetes_manifest" "mutate_gpu_priority" {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Layer 5: Automatic Cleanup of Failed/Evicted Pods
|
||||
# -----------------------------------------------------------------------------
|
||||
# Deletes pods in Failed phase every hour, cluster-wide.
|
||||
# Prevents stale evicted pods and failed CronJob pods from accumulating.
|
||||
|
||||
# Grant Kyverno cleanup controller permission to delete Pods
|
||||
resource "kubernetes_cluster_role_v1" "kyverno_cleanup_pods" {
|
||||
metadata {
|
||||
name = "kyverno:cleanup-controller:pods"
|
||||
labels = {
|
||||
"app.kubernetes.io/part-of" = "kyverno"
|
||||
"app.kubernetes.io/instance" = "kyverno"
|
||||
}
|
||||
}
|
||||
rule {
|
||||
api_groups = [""]
|
||||
resources = ["pods"]
|
||||
verbs = ["list", "watch", "delete"]
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_cluster_role_binding_v1" "kyverno_cleanup_pods" {
|
||||
metadata {
|
||||
name = "kyverno:cleanup-controller:pods"
|
||||
labels = {
|
||||
"app.kubernetes.io/part-of" = "kyverno"
|
||||
"app.kubernetes.io/instance" = "kyverno"
|
||||
}
|
||||
}
|
||||
role_ref {
|
||||
api_group = "rbac.authorization.k8s.io"
|
||||
kind = "ClusterRole"
|
||||
name = kubernetes_cluster_role_v1.kyverno_cleanup_pods.metadata[0].name
|
||||
}
|
||||
subject {
|
||||
kind = "ServiceAccount"
|
||||
name = "kyverno-cleanup-controller"
|
||||
namespace = "kyverno"
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_manifest" "cleanup_failed_pods" {
|
||||
manifest = {
|
||||
apiVersion = "kyverno.io/v2"
|
||||
kind = "ClusterCleanupPolicy"
|
||||
metadata = {
|
||||
name = "cleanup-failed-pods"
|
||||
annotations = {
|
||||
"policies.kyverno.io/title" = "Cleanup Failed Pods"
|
||||
"policies.kyverno.io/description" = "Automatically deletes pods in Failed phase (evicted, error, completed CronJob failures)."
|
||||
}
|
||||
}
|
||||
spec = {
|
||||
match = {
|
||||
any = [
|
||||
{
|
||||
resources = {
|
||||
kinds = ["Pod"]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
conditions = {
|
||||
any = [
|
||||
{
|
||||
key = "{{ request.object.status.phase }}"
|
||||
operator = "Equals"
|
||||
value = "Failed"
|
||||
}
|
||||
]
|
||||
}
|
||||
schedule = "15 * * * *"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue