Phase 0: install Keel + Kyverno auto-update annotation injector

Foundation for opt-out-pure auto-update model per
docs/plans/2026-05-16-auto-upgrade-apps-{design,plan}.md.

- New stack `stacks/keel/` deploys Keel via Helm (charts.keel.sh, v1.0.6).
  Polls registries hourly per design decision #8. Default schedule
  overridable per-workload via keel.sh/pollSchedule annotation.
- New Kyverno ClusterPolicy `inject-keel-annotations` mutates Deployments,
  StatefulSets, and DaemonSets in namespaces labeled `keel.sh/enrolled=true`
  with keel.sh/policy=force + trigger=poll + pollSchedule=@every 1h.
- Phase 0 enrolls no namespaces. Phase 1 (next session) labels the
  self-hosted set.
- Per-workload opt-out: label `keel.sh/policy: never` (used by rollback
  runbook and chrome-service-style deliberate pins).
- Keel namespace excluded from the mutate — supervisor self-update has
  too-bad a failure mode (decision #11).
- AGENTS.md: KYVERNO_LIFECYCLE_V2 marker convention added for the
  ignore_changes block enrolled workloads need.
- .claude/CLAUDE.md: docker-images rule flagged as transitional.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Viktor Barzin 2026-05-16 12:19:34 +00:00
parent a8302072eb
commit 910167105e
7 changed files with 679 additions and 1 deletions

65
stacks/keel/main.tf Normal file
View file

@ -0,0 +1,65 @@
# Keel automated Kubernetes Deployment image updates.
# Design: docs/plans/2026-05-16-auto-upgrade-apps-design.md
# Plan: docs/plans/2026-05-16-auto-upgrade-apps-plan.md
#
# Operation: Keel polls each watched workload's registry hourly (default
# schedule below; overridable per-workload via keel.sh/pollSchedule).
# Detection of a new digest under the watched tag triggers a Deployment
# update (pod template hash bump rolling restart). Workloads opt in by
# carrying keel.sh/policy + keel.sh/trigger annotations those are
# injected cluster-wide by the inject-keel-annotations ClusterPolicy
# (stacks/kyverno/modules/kyverno/keel-annotations.tf) on namespaces
# labeled keel.sh/enrolled=true.
resource "kubernetes_namespace" "keel" {
metadata {
name = "keel"
labels = {
tier = local.tiers.cluster
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
}
}
resource "helm_release" "keel" {
name = "keel"
namespace = kubernetes_namespace.keel.metadata[0].name
repository = "https://charts.keel.sh"
chart = "keel"
version = "1.0.6"
# Atomic mitigates partial-deploy state. Keel itself is exempt from
# auto-update (Kyverno mutate excludes the keel namespace), so it only
# rolls when this stack applies making atomic safe here.
atomic = true
values = [yamlencode({
polling = {
enabled = true
# Default poll cadence for workloads that don't override per-Deployment
# via keel.sh/pollSchedule. Decision #8 in the design doc.
defaultSchedule = "@every 1h"
}
helmProvider = {
enabled = false # We use annotations, not Helm hooks
}
notificationLevel = "info"
persistence = {
enabled = false
}
# Keel uses each watched Deployment's own imagePullSecrets to query
# its registry. Forgejo creds (`registry-credentials`) are auto-synced
# to every namespace by Kyverno already, so Keel pods don't need a
# separate pull-secret for their own image (ghcr.io is public).
rbac = {
enabled = true
}
resources = {
requests = { cpu = "50m", memory = "64Mi" }
limits = { memory = "256Mi" }
}
})]
}

View file

@ -0,0 +1,13 @@
include "root" {
path = find_in_parent_folders()
}
dependency "platform" {
config_path = "../platform"
skip_outputs = true
}
dependency "kyverno" {
config_path = "../kyverno"
skip_outputs = true
}

View file

@ -0,0 +1,82 @@
# =============================================================================
# Keel Auto-Update Annotation Injector
# =============================================================================
# Design: infra/docs/plans/2026-05-16-auto-upgrade-apps-design.md
# Plan: infra/docs/plans/2026-05-16-auto-upgrade-apps-plan.md
#
# Mutate policy that adds keel.sh/* annotations to Deployments,
# StatefulSets and DaemonSets in *opted-in* namespaces. Opt-in is via a
# label on the namespace:
#
# labels = { "keel.sh/enrolled" = "true" }
#
# Phase rollout = label more namespaces. No edit to this file per phase.
#
# Workloads can individually opt out with the label keel.sh/policy=never
# (used by the rollback runbook). The keel namespace itself is always
# excluded (design decision #11 supervisor must not auto-update).
resource "kubernetes_manifest" "policy_inject_keel_annotations" {
manifest = {
apiVersion = "kyverno.io/v1"
kind = "ClusterPolicy"
metadata = {
name = "inject-keel-annotations"
annotations = {
"policies.kyverno.io/title" = "Inject Keel Auto-Update Annotations"
"policies.kyverno.io/category" = "Automation"
"policies.kyverno.io/severity" = "low"
"policies.kyverno.io/description" = "Adds keel.sh/policy: force + trigger: poll annotations to workloads in namespaces labeled keel.sh/enrolled=true. Phase rollout per docs/plans/2026-05-16-auto-upgrade-apps-{design,plan}.md."
}
}
spec = {
background = true
rules = [{
name = "add-keel-annotations"
match = {
any = [{
resources = {
kinds = ["Deployment", "StatefulSet", "DaemonSet"]
namespaceSelector = {
matchLabels = {
"keel.sh/enrolled" = "true"
}
}
}
}]
}
exclude = {
any = [
{
resources = {
namespaces = ["keel"]
}
},
{
resources = {
selector = {
matchLabels = {
"keel.sh/policy" = "never"
}
}
}
},
]
}
mutate = {
patchStrategicMerge = {
metadata = {
annotations = {
# `+(...)` only adds if not present; per-workload overrides win.
"+(keel.sh/policy)" = "force"
"+(keel.sh/trigger)" = "poll"
"+(keel.sh/pollSchedule)" = "@every 1h"
}
}
}
}
}]
}
}
depends_on = [helm_release.kyverno]
}