[ci skip] Reduce healthcheck frequency to 8h, fix apiserver audit duplication bug

Change cluster-healthcheck CronJob from every 30min to every 8h.
Replace fragile sed-based audit config in apiserver manifest with
idempotent Python script that deduplicates by name/mountPath,
preventing the duplicate volume entries that crashed the API server.
This commit is contained in:
Viktor Barzin 2026-02-22 23:18:30 +00:00
parent 27dc486a4d
commit ddb293b2b7
2 changed files with 50 additions and 9 deletions

View file

@ -539,7 +539,7 @@ resource "kubernetes_cron_job_v1" "cluster_healthcheck" {
}
}
spec {
schedule = "*/30 * * * *"
schedule = "0 */8 * * *"
concurrency_policy = "Forbid"
failed_jobs_history_limit = 3
successful_jobs_history_limit = 3

View file

@ -68,17 +68,58 @@ resource "null_resource" "audit_policy" {
# Create audit log directory
"sudo mkdir -p /var/log/kubernetes",
# Check if audit flags already present
"if grep -q 'audit-policy-file' /etc/kubernetes/manifests/kube-apiserver.yaml; then echo 'Audit flags already configured'; exit 0; fi",
# Idempotently add audit flags, volumes, and volumeMounts using Python
# to avoid sed duplication bugs on re-runs
<<-SCRIPT
sudo python3 -c "
import yaml
# Add audit flags to kube-apiserver manifest
"sudo sed -i '/- --oidc-groups-claim/a\\ - --audit-policy-file=/etc/kubernetes/policies/audit-policy.yaml\\n - --audit-log-path=/var/log/kubernetes/audit.log\\n - --audit-log-maxage=7\\n - --audit-log-maxbackup=3\\n - --audit-log-maxsize=100' /etc/kubernetes/manifests/kube-apiserver.yaml",
path = '/etc/kubernetes/manifests/kube-apiserver.yaml'
with open(path) as f:
doc = yaml.safe_load(f)
# Add volume mount for audit policy (hostPath)
# The kube-apiserver pod needs access to the policy file and log directory
"sudo sed -i '/volumes:/a\\ - hostPath:\\n path: /etc/kubernetes/policies\\n type: DirectoryOrCreate\\n name: audit-policy\\n - hostPath:\\n path: /var/log/kubernetes\\n type: DirectoryOrCreate\\n name: audit-log' /etc/kubernetes/manifests/kube-apiserver.yaml",
container = doc['spec']['containers'][0]
cmd = container['command']
"sudo sed -i '/volumeMounts:/a\\ - mountPath: /etc/kubernetes/policies\\n name: audit-policy\\n readOnly: true\\n - mountPath: /var/log/kubernetes\\n name: audit-log' /etc/kubernetes/manifests/kube-apiserver.yaml",
# Add audit flags if missing
audit_flags = {
'--audit-policy-file=/etc/kubernetes/policies/audit-policy.yaml': True,
'--audit-log-path=/var/log/kubernetes/audit.log': True,
'--audit-log-maxage=7': True,
'--audit-log-maxbackup=3': True,
'--audit-log-maxsize=100': True,
}
existing = set(cmd)
for flag in audit_flags:
if flag not in existing:
cmd.append(flag)
# Add volumes if missing (deduplicate by name)
vol_names = {v['name'] for v in doc['spec']['volumes']}
for vol in [
{'name': 'audit-policy', 'hostPath': {'path': '/etc/kubernetes/policies', 'type': 'DirectoryOrCreate'}},
{'name': 'audit-log', 'hostPath': {'path': '/var/log/kubernetes', 'type': 'DirectoryOrCreate'}},
]:
if vol['name'] not in vol_names:
doc['spec']['volumes'].append(vol)
vol_names.add(vol['name'])
# Add volumeMounts if missing (deduplicate by mountPath)
mount_paths = {vm['mountPath'] for vm in container['volumeMounts']}
for vm in [
{'mountPath': '/etc/kubernetes/policies', 'name': 'audit-policy', 'readOnly': True},
{'mountPath': '/var/log/kubernetes', 'name': 'audit-log'},
]:
if vm['mountPath'] not in mount_paths:
container['volumeMounts'].append(vm)
mount_paths.add(vm['mountPath'])
with open(path, 'w') as f:
yaml.dump(doc, f, default_flow_style=False, sort_keys=False)
print('Audit config applied (idempotent)')
"
SCRIPT
# Wait for API server to restart
"echo 'Waiting for API server to restart with audit logging...'",