Stage 1 of moving private images off the registry:2 container at registry.viktorbarzin.me:5050 (which has hit distribution#3324 corruption 3x in 3 weeks) onto Forgejo's built-in OCI registry. No cutover risk — pods still pull from the existing registry until Phase 3. What changes: * Forgejo deployment: memory 384Mi→1Gi, PVC 5Gi→15Gi (cap 50Gi). Explicit FORGEJO__packages__ENABLED + CHUNKED_UPLOAD_PATH (defensive, v11 default-on). * ingress_factory: max_body_size variable was declared but never wired in after the nginx→Traefik migration. Now creates a per-ingress Buffering middleware when set; default null = no limit (preserves existing behavior). Forgejo ingress sets max_body_size=5g to allow multi-GB layer pushes. * Cluster-wide registry-credentials Secret: 4th auths entry for forgejo.viktorbarzin.me, populated from Vault secret/viktor/ forgejo_pull_token (cluster-puller PAT, read:package). Existing Kyverno ClusterPolicy syncs cluster-wide — no policy edits. * Containerd hosts.toml redirect: forgejo.viktorbarzin.me → in-cluster Traefik LB 10.0.20.200 (avoids hairpin NAT for in-cluster pulls). Cloud-init for new VMs + scripts/setup-forgejo-containerd-mirror.sh for existing nodes. * Forgejo retention CronJob (0 4 * * *): keeps newest 10 versions per package + always :latest. First 7 days dry-run (DRY_RUN=true); flip the local in cleanup.tf after log review. * Forgejo integrity probe CronJob (*/15): same algorithm as the existing registry-integrity-probe. Existing Prometheus alerts (RegistryManifestIntegrityFailure et al) made instance-aware so they cover both registries during the bake. * Docs: design+plan in docs/plans/, setup runbook in docs/runbooks/. Operational note — the apply order is non-trivial because the new Vault keys (forgejo_pull_token, forgejo_cleanup_token, secret/ci/global/forgejo_*) must exist BEFORE terragrunt apply in the kyverno + monitoring + forgejo stacks. The setup runbook documents the bootstrap sequence. Phase 1 (per-project dual-push pipelines) follows in subsequent commits. Bake clock starts when the last project goes dual-push. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
120 lines
3.5 KiB
HCL
120 lines
3.5 KiB
HCL
# Forgejo container-package retention CronJob.
|
|
#
|
|
# Forgejo's per-package "Cleanup Rules" UI is not exposed via Terraform —
|
|
# it's per-user runtime state inside the Forgejo DB. Driving retention from
|
|
# a CronJob hitting the public API keeps the policy versioned in this repo.
|
|
#
|
|
# Auth: a write:package PAT belonging to ci-pusher (same user that pushes
|
|
# from CI). DELETE on packages requires write:package scope. PAT lives in
|
|
# Vault at secret/viktor/forgejo_cleanup_token.
|
|
|
|
data "vault_kv_secret_v2" "forgejo_viktor" {
|
|
mount = "secret"
|
|
name = "viktor"
|
|
}
|
|
|
|
locals {
|
|
# Flip to false after first 7 days of dry-run logs look correct.
|
|
forgejo_cleanup_dry_run = true
|
|
}
|
|
|
|
resource "kubernetes_config_map" "forgejo_cleanup_script" {
|
|
metadata {
|
|
name = "forgejo-cleanup-script"
|
|
namespace = kubernetes_namespace.forgejo.metadata[0].name
|
|
}
|
|
data = {
|
|
"cleanup.sh" = file("${path.module}/files/cleanup.sh")
|
|
}
|
|
}
|
|
|
|
resource "kubernetes_secret" "forgejo_cleanup_token" {
|
|
metadata {
|
|
name = "forgejo-cleanup-token"
|
|
namespace = kubernetes_namespace.forgejo.metadata[0].name
|
|
}
|
|
type = "Opaque"
|
|
data = {
|
|
FORGEJO_TOKEN = data.vault_kv_secret_v2.forgejo_viktor.data["forgejo_cleanup_token"]
|
|
}
|
|
}
|
|
|
|
resource "kubernetes_cron_job_v1" "forgejo_cleanup" {
|
|
metadata {
|
|
name = "forgejo-cleanup"
|
|
namespace = kubernetes_namespace.forgejo.metadata[0].name
|
|
}
|
|
spec {
|
|
concurrency_policy = "Forbid"
|
|
schedule = "0 4 * * *"
|
|
failed_jobs_history_limit = 3
|
|
successful_jobs_history_limit = 3
|
|
job_template {
|
|
metadata {}
|
|
spec {
|
|
backoff_limit = 1
|
|
ttl_seconds_after_finished = 3600
|
|
template {
|
|
metadata {}
|
|
spec {
|
|
container {
|
|
name = "cleanup"
|
|
image = "docker.io/library/alpine:3.20"
|
|
command = ["/bin/sh", "/scripts/cleanup.sh"]
|
|
env {
|
|
name = "FORGEJO_TOKEN"
|
|
value_from {
|
|
secret_key_ref {
|
|
name = kubernetes_secret.forgejo_cleanup_token.metadata[0].name
|
|
key = "FORGEJO_TOKEN"
|
|
}
|
|
}
|
|
}
|
|
env {
|
|
name = "FORGEJO_HOST"
|
|
value = "http://forgejo.forgejo.svc.cluster.local"
|
|
}
|
|
env {
|
|
name = "FORGEJO_OWNER"
|
|
value = "viktor"
|
|
}
|
|
env {
|
|
name = "KEEP_LAST_N"
|
|
value = "10"
|
|
}
|
|
env {
|
|
name = "DRY_RUN"
|
|
value = local.forgejo_cleanup_dry_run ? "true" : "false"
|
|
}
|
|
volume_mount {
|
|
name = "scripts"
|
|
mount_path = "/scripts"
|
|
}
|
|
resources {
|
|
requests = {
|
|
cpu = "10m"
|
|
memory = "32Mi"
|
|
}
|
|
limits = {
|
|
memory = "96Mi"
|
|
}
|
|
}
|
|
}
|
|
volume {
|
|
name = "scripts"
|
|
config_map {
|
|
name = kubernetes_config_map.forgejo_cleanup_script.metadata[0].name
|
|
default_mode = "0755"
|
|
}
|
|
}
|
|
restart_policy = "OnFailure"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
lifecycle {
|
|
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
|
|
ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config]
|
|
}
|
|
}
|