stem95su: scheduled Drive->site sync CronJob (every 10m)
CronJob stem95su-gdrive-sync (*/10) mounts the content PVC RW and rclone-syncs the read-only Drive folder "claude" (stem claude/files) onto it (rclone/rclone:1.74.3, scope=drive.readonly, empty-source guard + --max-delete 25). ESO ExternalSecret stem95su-rclone <- Vault secret/stem95su. Requires the GCP OAuth app published to Production or the refresh token expires ~weekly. Lands the gdrive-sync stack on master (it had landed on a feature branch by accident on the shared devvm checkout). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
05b50d2b96
commit
6d224861c4
1168 changed files with 120 additions and 358547 deletions
|
|
@ -1,106 +0,0 @@
|
|||
# Keel — automated Kubernetes Deployment image updates.
|
||||
# Design: docs/plans/2026-05-16-auto-upgrade-apps-design.md
|
||||
# Plan: docs/plans/2026-05-16-auto-upgrade-apps-plan.md
|
||||
#
|
||||
# Operation: Keel polls each watched workload's registry hourly (default
|
||||
# schedule below; overridable per-workload via keel.sh/pollSchedule).
|
||||
# Detection of a new digest under the watched tag triggers a Deployment
|
||||
# update (pod template hash bump → rolling restart). Workloads opt in by
|
||||
# carrying keel.sh/policy + keel.sh/trigger annotations — those are
|
||||
# injected cluster-wide by the inject-keel-annotations ClusterPolicy
|
||||
# (stacks/kyverno/modules/kyverno/keel-annotations.tf) on namespaces
|
||||
# labeled keel.sh/enrolled=true.
|
||||
|
||||
# Slack bot token for posting upgrade notifications. Existing token in
|
||||
# Vault — same one used elsewhere — see secret/viktor -> slack_bot_token.
|
||||
data "vault_kv_secret_v2" "viktor" {
|
||||
mount = "secret"
|
||||
name = "viktor"
|
||||
}
|
||||
|
||||
resource "kubernetes_namespace" "keel" {
|
||||
metadata {
|
||||
name = "keel"
|
||||
labels = {
|
||||
tier = local.tiers.cluster
|
||||
}
|
||||
}
|
||||
lifecycle {
|
||||
# KYVERNO_LIFECYCLE_V1
|
||||
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
|
||||
}
|
||||
}
|
||||
|
||||
resource "helm_release" "keel" {
|
||||
name = "keel"
|
||||
namespace = kubernetes_namespace.keel.metadata[0].name
|
||||
repository = "https://charts.keel.sh"
|
||||
chart = "keel"
|
||||
# Latest stable per `helm search repo keel/keel -l` 2026-05-16
|
||||
# (app version 0.21.1). 1.0.6 doesn't exist — verify before bumping.
|
||||
version = "1.2.0"
|
||||
|
||||
# Atomic mitigates partial-deploy state. Keel itself is exempt from
|
||||
# auto-update (Kyverno mutate excludes the keel namespace), so it only
|
||||
# rolls when this stack applies — making atomic safe here.
|
||||
atomic = true
|
||||
|
||||
values = [yamlencode({
|
||||
# 2026-05-26 17:30: re-enabled after switching the Kyverno-injected
|
||||
# default from `force + match-tag=true` (proven unreliable — see
|
||||
# stacks/kyverno/modules/kyverno/keel-annotations.tf) to `patch` which
|
||||
# is semver-parser-bounded. Under `patch`:
|
||||
# - Semver-tagged workloads get patch bumps only (1.2.3 → 1.2.4).
|
||||
# - Float / SHA / non-semver tags are IGNORED — no tag rewriting.
|
||||
# The 2026-05-26 emergency-stop scope (replicaCount=0) is reverted now
|
||||
# that the default is safe. Workloads pinned out-of-band (uptime-kuma
|
||||
# via keel.sh/policy=never LABEL) stay opted-out via the Kyverno
|
||||
# exclude rule, not via Keel's own annotation.
|
||||
replicaCount = 1
|
||||
# Prometheus pod-annotation scrape — picks up Keel-specific metrics
|
||||
# (pending_approvals, poll_trigger_tracked_images, registries_scanned_total{image,registry})
|
||||
# on container port 9300 /metrics. The cluster's `kubernetes-pods`
|
||||
# Prometheus job keys on these annotations. Used by
|
||||
# infra/scripts/upgrade_state.sh (the /upgrade-state skill).
|
||||
podAnnotations = {
|
||||
"prometheus.io/scrape" = "true"
|
||||
"prometheus.io/port" = "9300"
|
||||
"prometheus.io/path" = "/metrics"
|
||||
}
|
||||
polling = {
|
||||
enabled = true
|
||||
# Default poll cadence for workloads that don't override per-Deployment
|
||||
# via keel.sh/pollSchedule. Decision #8 in the design doc.
|
||||
defaultSchedule = "@every 1h"
|
||||
}
|
||||
helmProvider = {
|
||||
enabled = false # We use annotations, not Helm hooks
|
||||
}
|
||||
notificationLevel = "info"
|
||||
persistence = {
|
||||
enabled = false
|
||||
}
|
||||
# Slack notifications: post every rollout to the configured channel.
|
||||
# Bot token from Vault (secret/viktor -> slack_bot_token). The Keel
|
||||
# chart sets SLACK_BOT_TOKEN, SLACK_CHANNELS, etc. on the deployment
|
||||
# from these values.
|
||||
slack = {
|
||||
enabled = true
|
||||
botToken = data.vault_kv_secret_v2.viktor.data["slack_bot_token"]
|
||||
channel = "general"
|
||||
# No approval flow — opt-out-pure means everything auto-rolls.
|
||||
# If we ever introduce gated rollouts, set approvalsChannel here.
|
||||
}
|
||||
# Keel uses each watched Deployment's own imagePullSecrets to query
|
||||
# its registry. Forgejo creds (`registry-credentials`) are auto-synced
|
||||
# to every namespace by Kyverno already, so Keel pods don't need a
|
||||
# separate pull-secret for their own image (ghcr.io is public).
|
||||
rbac = {
|
||||
enabled = true
|
||||
}
|
||||
resources = {
|
||||
requests = { cpu = "50m", memory = "64Mi" }
|
||||
limits = { memory = "256Mi" }
|
||||
}
|
||||
})]
|
||||
}
|
||||
|
|
@ -1,13 +0,0 @@
|
|||
include "root" {
|
||||
path = find_in_parent_folders()
|
||||
}
|
||||
|
||||
dependency "platform" {
|
||||
config_path = "../platform"
|
||||
skip_outputs = true
|
||||
}
|
||||
|
||||
dependency "kyverno" {
|
||||
config_path = "../kyverno"
|
||||
skip_outputs = true
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue