feat: auto-cleanup failed/evicted pods via Kyverno ClusterCleanupPolicy
Add cleanup-failed-pods policy that runs hourly (at :15) to delete all pods in Failed phase cluster-wide. Prevents stale evicted and failed CronJob pods from accumulating and creating healthcheck noise. Also adds ClusterRole + ClusterRoleBinding to grant Kyverno cleanup controller permission to delete Pods (not included by default). [ci skip] Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
f726d1c3fd
commit
cf578516e9
1 changed files with 77 additions and 0 deletions
|
|
@ -948,3 +948,80 @@ resource "kubernetes_manifest" "mutate_gpu_priority" {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# Layer 5: Automatic Cleanup of Failed/Evicted Pods
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# Deletes pods in Failed phase every hour, cluster-wide.
|
||||||
|
# Prevents stale evicted pods and failed CronJob pods from accumulating.
|
||||||
|
|
||||||
|
# Grant Kyverno cleanup controller permission to delete Pods
|
||||||
|
resource "kubernetes_cluster_role_v1" "kyverno_cleanup_pods" {
|
||||||
|
metadata {
|
||||||
|
name = "kyverno:cleanup-controller:pods"
|
||||||
|
labels = {
|
||||||
|
"app.kubernetes.io/part-of" = "kyverno"
|
||||||
|
"app.kubernetes.io/instance" = "kyverno"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rule {
|
||||||
|
api_groups = [""]
|
||||||
|
resources = ["pods"]
|
||||||
|
verbs = ["list", "watch", "delete"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "kubernetes_cluster_role_binding_v1" "kyverno_cleanup_pods" {
|
||||||
|
metadata {
|
||||||
|
name = "kyverno:cleanup-controller:pods"
|
||||||
|
labels = {
|
||||||
|
"app.kubernetes.io/part-of" = "kyverno"
|
||||||
|
"app.kubernetes.io/instance" = "kyverno"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
role_ref {
|
||||||
|
api_group = "rbac.authorization.k8s.io"
|
||||||
|
kind = "ClusterRole"
|
||||||
|
name = kubernetes_cluster_role_v1.kyverno_cleanup_pods.metadata[0].name
|
||||||
|
}
|
||||||
|
subject {
|
||||||
|
kind = "ServiceAccount"
|
||||||
|
name = "kyverno-cleanup-controller"
|
||||||
|
namespace = "kyverno"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "kubernetes_manifest" "cleanup_failed_pods" {
|
||||||
|
manifest = {
|
||||||
|
apiVersion = "kyverno.io/v2"
|
||||||
|
kind = "ClusterCleanupPolicy"
|
||||||
|
metadata = {
|
||||||
|
name = "cleanup-failed-pods"
|
||||||
|
annotations = {
|
||||||
|
"policies.kyverno.io/title" = "Cleanup Failed Pods"
|
||||||
|
"policies.kyverno.io/description" = "Automatically deletes pods in Failed phase (evicted, error, completed CronJob failures)."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
spec = {
|
||||||
|
match = {
|
||||||
|
any = [
|
||||||
|
{
|
||||||
|
resources = {
|
||||||
|
kinds = ["Pod"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
conditions = {
|
||||||
|
any = [
|
||||||
|
{
|
||||||
|
key = "{{ request.object.status.phase }}"
|
||||||
|
operator = "Equals"
|
||||||
|
value = "Failed"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
schedule = "15 * * * *"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue