stem95su: scheduled Drive->site sync CronJob (every 10m)
CronJob stem95su-gdrive-sync (*/10) mounts the content PVC RW and rclone-syncs the read-only Drive folder "claude" (stem claude/files) onto it (rclone/rclone:1.74.3, scope=drive.readonly, empty-source guard + --max-delete 25). ESO ExternalSecret stem95su-rclone <- Vault secret/stem95su. Requires the GCP OAuth app published to Production or the refresh token expires ~weekly. Lands the gdrive-sync stack on master (it had landed on a feature branch by accident on the shared devvm checkout). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
05b50d2b96
commit
6d224861c4
1168 changed files with 120 additions and 358547 deletions
|
|
@ -1,185 +0,0 @@
|
|||
#cloud-config
|
||||
# Hostname intentionally NOT set here — cloud-init reads it from
|
||||
# Proxmox's auto-generated meta-data (which uses `qm set --name <X>`),
|
||||
# so a single shared snippet works for every node.
|
||||
manage_etc_hosts: true
|
||||
users:
|
||||
- name: wizard
|
||||
sudo: ALL=(ALL) NOPASSWD:ALL
|
||||
ssh_authorized_keys:
|
||||
- ${authorized_ssh_key}
|
||||
passwd: ${passwd}
|
||||
lock_passwd: false # enable passwd login
|
||||
shell: /bin/bash
|
||||
package_update: true
|
||||
package_upgrade: true
|
||||
packages:
|
||||
- htop
|
||||
- vim
|
||||
- curl
|
||||
- jq
|
||||
- tcpdump
|
||||
- tree
|
||||
- tmux
|
||||
- wget
|
||||
- net-tools
|
||||
- zsh
|
||||
- apt-transport-https
|
||||
- ca-certificates
|
||||
- gpg
|
||||
- isc-dhcp-client
|
||||
- cloud-guest-utils # to enable resizing of disk via growpart
|
||||
- qemu-guest-agent
|
||||
- nginx
|
||||
# docker
|
||||
- docker-ce
|
||||
- docker-ce-cli
|
||||
- containerd.io
|
||||
- docker-buildx-plugin
|
||||
- docker-compose-plugin
|
||||
%{if is_k8s_template}
|
||||
# kubernetes
|
||||
- kubeadm
|
||||
- kubelet
|
||||
# iSCSI client for CSI-backed database storage
|
||||
- open-iscsi
|
||||
%{endif}
|
||||
|
||||
apt:
|
||||
sources:
|
||||
%{if is_k8s_template}
|
||||
kubernetes:
|
||||
source: "deb https://pkgs.k8s.io/core:/stable:/v1.34/deb/ /"
|
||||
keyid: "DE15B14486CD377B9E876E1A234654DA9A296436"
|
||||
filename: kubernetes.list
|
||||
%{endif}
|
||||
docker:
|
||||
source: "deb https://download.docker.com/linux/ubuntu noble stable"
|
||||
keyid: "9DC858229FC7DD38854AE2D88D81803C0EBFCD88"
|
||||
filename: docker.list
|
||||
|
||||
%{if is_k8s_template}
|
||||
# Setup script is base64-encoded by the module so YAML whitespace
|
||||
# handling never touches the heredoc bodies inside it. Replaces an
|
||||
# earlier `indent(6, …)` approach that put `[plugins.*]` TOML
|
||||
# sections at col 6 inside `cat >> /etc/containerd/config.toml`
|
||||
# heredocs — containerd refused to parse the result and the node5 v1
|
||||
# boot failed there (2026-05-26). Source: modules/create-template-vm/k8s-node-containerd-setup.sh
|
||||
write_files:
|
||||
- path: /usr/local/bin/k8s-node-containerd-setup.sh
|
||||
permissions: '0755'
|
||||
owner: root:root
|
||||
encoding: b64
|
||||
content: ${k8s_node_setup_script_b64}
|
||||
- path: /usr/local/bin/k8s-node-post-join-tune.sh
|
||||
permissions: '0755'
|
||||
owner: root:root
|
||||
encoding: b64
|
||||
content: ${k8s_node_post_join_script_b64}
|
||||
%{endif}
|
||||
|
||||
runcmd:
|
||||
# Enable weekly TRIM/discard to reclaim freed blocks in LVM thin pool
|
||||
- systemctl enable --now fstrim.timer
|
||||
# Enable persistent journald logging for crash forensics, with size limits to reduce disk wear
|
||||
- mkdir -p /var/log/journal
|
||||
- sed -i 's/#Storage=auto/Storage=persistent/' /etc/systemd/journald.conf
|
||||
- sed -i 's/#SystemMaxUse=/SystemMaxUse=500M/' /etc/systemd/journald.conf
|
||||
- sed -i 's/#MaxRetentionSec=/MaxRetentionSec=7day/' /etc/systemd/journald.conf
|
||||
- sed -i 's/#MaxFileSec=/MaxFileSec=1day/' /etc/systemd/journald.conf
|
||||
- sed -i 's/#Compress=yes/Compress=yes/' /etc/systemd/journald.conf
|
||||
- systemctl restart systemd-journald
|
||||
%{if is_k8s_template}
|
||||
# systemd-resolved global DNS fallback. Without this, only the
|
||||
# link-level DNS from Proxmox's `qm set --nameserver` (Technitium,
|
||||
# 10.0.20.201) is consulted — and Technitium returns NXDOMAIN for
|
||||
# forgejo.viktorbarzin.me, so kubelet image pulls from the Forgejo
|
||||
# registry break. Public DNS upstream + Technitium fallback matches
|
||||
# the pre-existing manual setup on k8s-node1..4.
|
||||
- mkdir -p /etc/systemd/resolved.conf.d
|
||||
- |
|
||||
cat > /etc/systemd/resolved.conf.d/global-dns.conf <<'EOF'
|
||||
[Resolve]
|
||||
DNS=8.8.8.8 1.1.1.1
|
||||
FallbackDNS=10.0.20.201
|
||||
EOF
|
||||
- systemctl restart systemd-resolved
|
||||
# Re-enabled 2026-05-10: unattended-upgrades is back on, but with a tight
|
||||
# Allowed-Origins list, a Package-Blacklist for k8s/containerd/runc/calico,
|
||||
# and Automatic-Reboot disabled (kured + sentinel-gate handles reboots in a
|
||||
# 24h-soaked rolling window, gated by Prometheus alerts).
|
||||
# Original outage (March 2026) was kernel update → containerd overlayfs corruption.
|
||||
# Mitigations: 24h cool-down between node reboots, Prometheus halt-on-alert,
|
||||
# apt-mark hold on k8s components, Package-Blacklist for runtime components.
|
||||
- apt-get install -y unattended-upgrades update-notifier-common
|
||||
- |
|
||||
cat > /etc/apt/apt.conf.d/52unattended-upgrades-k8s <<'EOF'
|
||||
Unattended-Upgrade::Allowed-Origins {
|
||||
"$${distro_id}:$${distro_codename}";
|
||||
"$${distro_id}:$${distro_codename}-security";
|
||||
"$${distro_id}:$${distro_codename}-updates";
|
||||
"$${distro_id}ESMApps:$${distro_codename}-apps-security";
|
||||
"$${distro_id}ESM:$${distro_codename}-infra-security";
|
||||
};
|
||||
Unattended-Upgrade::Package-Blacklist {
|
||||
"^containerd(\.io)?$$";
|
||||
"^runc$$";
|
||||
"^cri-tools$$";
|
||||
"^kubernetes-cni$$";
|
||||
"^calico-.*";
|
||||
"^cni-plugins-.*";
|
||||
"^docker-ce$$";
|
||||
};
|
||||
Unattended-Upgrade::DevRelease "false";
|
||||
Unattended-Upgrade::Automatic-Reboot "false";
|
||||
EOF
|
||||
- |
|
||||
cat > /etc/apt/apt.conf.d/20auto-upgrades <<'EOF'
|
||||
APT::Periodic::Update-Package-Lists "1";
|
||||
APT::Periodic::Unattended-Upgrade "1";
|
||||
EOF
|
||||
- systemctl unmask unattended-upgrades 2>/dev/null || true
|
||||
- systemctl enable --now unattended-upgrades
|
||||
- apt-mark hold kubelet kubeadm kubectl
|
||||
- apt-mark hold containerd containerd.io runc 2>/dev/null || true
|
||||
- systemctl stop kubelet
|
||||
- containerd config default | sudo tee /etc/containerd/config.toml
|
||||
# The containerd/kubelet setup is delivered as /usr/local/bin/k8s-node-containerd-setup.sh
|
||||
# via the write_files: block at the top of this file. We run it as a single
|
||||
# bash invocation here so cloud-init only sees a one-line runcmd item.
|
||||
# (Previous inline `- $${containerd_config_update_command}` broke YAML parsing
|
||||
# because the heredoc contains mixed-indent inner shell heredocs.)
|
||||
- bash /usr/local/bin/k8s-node-containerd-setup.sh
|
||||
- systemctl restart containerd
|
||||
- systemctl enable --now iscsid
|
||||
# Harden iSCSI: increase recovery timeout (300s vs 120s default) and enable
|
||||
# CRC32C data/header digests to detect bit flips over the network.
|
||||
# Prevents SQLite corruption from transient iSCSI session drops.
|
||||
- sed -i 's/^node.session.timeo.replacement_timeout = .*/node.session.timeo.replacement_timeout = 300/' /etc/iscsi/iscsid.conf
|
||||
- sed -i 's/^node.conn\[0\].timeo.noop_out_interval = .*/node.conn[0].timeo.noop_out_interval = 10/' /etc/iscsi/iscsid.conf
|
||||
- sed -i 's/^node.conn\[0\].timeo.noop_out_timeout = .*/node.conn[0].timeo.noop_out_timeout = 15/' /etc/iscsi/iscsid.conf
|
||||
- |
|
||||
if ! grep -q '^node.conn\[0\].iscsi.HeaderDigest' /etc/iscsi/iscsid.conf; then
|
||||
echo 'node.conn[0].iscsi.HeaderDigest = CRC32C,None' >> /etc/iscsi/iscsid.conf
|
||||
echo 'node.conn[0].iscsi.DataDigest = CRC32C,None' >> /etc/iscsi/iscsid.conf
|
||||
fi
|
||||
- systemctl restart iscsid
|
||||
# Create /sentinel directory for kured reboot gating (sentinel gate DaemonSet)
|
||||
- mkdir -p /sentinel
|
||||
# Disable swap — kubelet defaults to failSwapOn=true and won't start otherwise.
|
||||
# (Previously this snippet created a 4G swapfile for "memory pressure relief"
|
||||
# but never set failSwapOn=false / memorySwap.swapBehavior together, so the
|
||||
# join consistently bricked kubelet — observed on node6 boot v3 2026-05-26.)
|
||||
- swapoff -a
|
||||
- sed -i '/ swap / s/^/#/' /etc/fstab
|
||||
- ${k8s_join_command}
|
||||
- systemctl enable kubelet
|
||||
- systemctl start kubelet
|
||||
# Kubelet tuning runs AFTER kubeadm join — that's when
|
||||
# /var/lib/kubelet/config.yaml gets written. Restarts kubelet at the
|
||||
# end to pick up the patched config.
|
||||
- bash /usr/local/bin/k8s-node-post-join-tune.sh
|
||||
%{ endif }
|
||||
%{ for provision_cmd in provision_cmds ~}
|
||||
- ${provision_cmd}
|
||||
%{ endfor ~}
|
||||
|
|
@ -1,147 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
#
|
||||
# K8s node containerd + kubelet bootstrap. Runs once via cloud-init runcmd.
|
||||
# Embedded into the cloud-init snippet base64-encoded by main.tf so YAML
|
||||
# whitespace handling never touches the heredoc bodies — TOML / Python
|
||||
# blocks below land in /etc/containerd/config.toml etc. with their leading
|
||||
# whitespace intact.
|
||||
#
|
||||
# Layout:
|
||||
# 1. /etc/containerd/config.toml — config_path + mirror dirs + GC tuning
|
||||
# 2. /etc/containerd/certs.d/*/hosts.toml — per-registry mirror configs
|
||||
# 3. /var/lib/kubelet/config.yaml — eviction + shutdown grace + log rotation
|
||||
# 4. /etc/systemd/logind.conf.d + kubelet.service.d — graceful shutdown
|
||||
# 5. (master-only) /etc/kubernetes/manifests — apiserver + controller flags
|
||||
set -euo pipefail
|
||||
|
||||
# 1. config_path — match BOTH quote styles. containerd v1 writes `""`,
|
||||
# containerd v2.x writes `''`. Without the v2 match, hosts.toml mirror
|
||||
# config is silently ignored — observed 2026-05-26 on k8s-node4
|
||||
# (containerd v2.2.4) and reproduced on k8s-node5 v1 boot.
|
||||
sed -i "s|config_path = \"\"|config_path = \"/etc/containerd/certs.d\"|g" /etc/containerd/config.toml
|
||||
sed -i "s|config_path = ''|config_path = \"/etc/containerd/certs.d\"|g" /etc/containerd/config.toml
|
||||
|
||||
# 2. Per-registry hosts.toml — pull-through caches on docker-registry VM
|
||||
# (10.0.20.10) for high-traffic registries, Traefik LB (10.0.20.200) for
|
||||
# forgejo. Low-traffic registries (registry.k8s.io, reg.kyverno.io) skip
|
||||
# the cache and pull direct because past pull-through cache attempts
|
||||
# truncated downloads and broke VPA certgen + Kyverno image pulls.
|
||||
|
||||
mkdir -p /etc/containerd/certs.d/docker.io
|
||||
cat > /etc/containerd/certs.d/docker.io/hosts.toml <<'DOCKERIO'
|
||||
server = "https://registry-1.docker.io"
|
||||
|
||||
[host."http://10.0.20.10:5000"]
|
||||
capabilities = ["pull", "resolve"]
|
||||
|
||||
[host."https://registry-1.docker.io"]
|
||||
capabilities = ["pull", "resolve"]
|
||||
DOCKERIO
|
||||
|
||||
mkdir -p /etc/containerd/certs.d/ghcr.io
|
||||
cat > /etc/containerd/certs.d/ghcr.io/hosts.toml <<'GHCR'
|
||||
server = "https://ghcr.io"
|
||||
|
||||
[host."http://10.0.20.10:5010"]
|
||||
capabilities = ["pull", "resolve"]
|
||||
|
||||
[host."https://ghcr.io"]
|
||||
capabilities = ["pull", "resolve"]
|
||||
GHCR
|
||||
|
||||
# Forgejo OCI registry: prefer in-cluster Traefik LB (10.0.20.200) to
|
||||
# avoid hairpin NAT. Traefik serves the *.viktorbarzin.me wildcard so
|
||||
# SNI verification succeeds. If the mirror is unreachable, fall back to
|
||||
# public DNS resolution (needs the global DNS fallback set up below).
|
||||
mkdir -p /etc/containerd/certs.d/forgejo.viktorbarzin.me
|
||||
cat > /etc/containerd/certs.d/forgejo.viktorbarzin.me/hosts.toml <<'FORGEJO'
|
||||
server = "https://forgejo.viktorbarzin.me"
|
||||
|
||||
[host."https://10.0.20.203"]
|
||||
capabilities = ["pull", "resolve"]
|
||||
skip_verify = true
|
||||
FORGEJO
|
||||
|
||||
# quay.io + registry.k8s.io: include mirror configs that match node4's
|
||||
# layout (no real pull-through cache today, server line is the direct
|
||||
# upstream). Keeping these present makes the per-node config uniform and
|
||||
# lets us flip a cache on later by editing only the [host."..."] block.
|
||||
mkdir -p /etc/containerd/certs.d/quay.io
|
||||
cat > /etc/containerd/certs.d/quay.io/hosts.toml <<'QUAY'
|
||||
server = "https://quay.io"
|
||||
|
||||
[host."http://10.0.20.10:5020"]
|
||||
capabilities = ["pull", "resolve"]
|
||||
QUAY
|
||||
|
||||
mkdir -p /etc/containerd/certs.d/registry.k8s.io
|
||||
cat > /etc/containerd/certs.d/registry.k8s.io/hosts.toml <<'K8SREG'
|
||||
server = "https://registry.k8s.io"
|
||||
|
||||
[host."http://10.0.20.10:5030"]
|
||||
capabilities = ["pull", "resolve"]
|
||||
K8SREG
|
||||
|
||||
# 3. containerd tuning: parallel pulls + selective GC overrides.
|
||||
# containerd v2's `config default` ALREADY emits `[plugins.'io.containerd.gc.v1.scheduler']`,
|
||||
# `[plugins.'io.containerd.runtime.v2.task']`, and `[plugins.'io.containerd.metadata.v1.bolt']`
|
||||
# sections — declaring them again fails with `toml: table … already exists`
|
||||
# (observed on node6 boot 2026-05-26). Patch values in place instead.
|
||||
sed -i 's/.*max_concurrent_downloads = 3/max_concurrent_downloads = 20/g' /etc/containerd/config.toml
|
||||
# pause_threshold: 0.5 → 0.02 (run GC more aggressively when images dirty %)
|
||||
sed -i "s/^[[:space:]]*pause_threshold = .*/ pause_threshold = 0.02/" /etc/containerd/config.toml
|
||||
# schedule_delay: 0s/1ms → 30 min (longer cool-down between GC runs)
|
||||
sed -i "s/^[[:space:]]*schedule_delay = .*/ schedule_delay = '1800s'/" /etc/containerd/config.toml
|
||||
# exit_timeout: 0s → 5m (more aggressive container cleanup)
|
||||
sed -i "s/^[[:space:]]*exit_timeout = .*/ exit_timeout = '5m'/" /etc/containerd/config.toml
|
||||
|
||||
# 4. (kubelet tuning intentionally NOT here — /var/lib/kubelet/config.yaml
|
||||
# only exists AFTER kubeadm join. That work runs in
|
||||
# k8s-node-post-join-tune.sh, invoked as a separate cloud-init runcmd
|
||||
# step after the join completes.)
|
||||
|
||||
# 5. logind + kubelet systemd unit — total kubelet shutdown 310s, so
|
||||
# logind InhibitDelay > that and kubelet TimeoutStopSec > that.
|
||||
mkdir -p /etc/systemd/logind.conf.d
|
||||
cat > /etc/systemd/logind.conf.d/kubelet-shutdown.conf <<'LOGIND_CONF'
|
||||
[Login]
|
||||
InhibitDelayMaxSec=480
|
||||
LOGIND_CONF
|
||||
systemctl restart systemd-logind
|
||||
|
||||
mkdir -p /etc/systemd/system/kubelet.service.d
|
||||
cat > /etc/systemd/system/kubelet.service.d/20-shutdown.conf <<'KUBELET_SHUTDOWN'
|
||||
[Service]
|
||||
TimeoutStopSec=420s
|
||||
KUBELET_SHUTDOWN
|
||||
systemctl daemon-reload
|
||||
|
||||
# 6. (master-only) faster pod eviction + attach-detach reconcile.
|
||||
if [ -f /etc/kubernetes/manifests/kube-controller-manager.yaml ]; then
|
||||
python3 - <<'CM_PATCH'
|
||||
import yaml
|
||||
with open('/etc/kubernetes/manifests/kube-controller-manager.yaml') as f:
|
||||
m = yaml.safe_load(f)
|
||||
args = m['spec']['containers'][0]['command']
|
||||
for flag in ['--attach-detach-reconcile-sync-period=15s']:
|
||||
key = flag.split('=')[0]
|
||||
args = [a for a in args if not a.startswith(key)]
|
||||
args.append(flag)
|
||||
m['spec']['containers'][0]['command'] = args
|
||||
with open('/etc/kubernetes/manifests/kube-controller-manager.yaml', 'w') as f:
|
||||
yaml.dump(m, f, default_flow_style=False)
|
||||
CM_PATCH
|
||||
python3 - <<'AS_PATCH'
|
||||
import yaml
|
||||
with open('/etc/kubernetes/manifests/kube-apiserver.yaml') as f:
|
||||
m = yaml.safe_load(f)
|
||||
args = m['spec']['containers'][0]['command']
|
||||
for flag in ['--default-unreachable-toleration-seconds=60', '--default-not-ready-toleration-seconds=60']:
|
||||
key = flag.split('=')[0]
|
||||
args = [a for a in args if not a.startswith(key)]
|
||||
args.append(flag)
|
||||
m['spec']['containers'][0]['command'] = args
|
||||
with open('/etc/kubernetes/manifests/kube-apiserver.yaml', 'w') as f:
|
||||
yaml.dump(m, f, default_flow_style=False)
|
||||
AS_PATCH
|
||||
fi
|
||||
|
|
@ -1,78 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
#
|
||||
# Runs AFTER `kubeadm join` has written /var/lib/kubelet/config.yaml.
|
||||
# Patches kubelet config in place (parallel image pulls, eviction
|
||||
# thresholds, priority-based shutdown grace, container log rotation)
|
||||
# and (on master) tightens controller-manager / apiserver flags.
|
||||
#
|
||||
# Embedded into the cloud-init snippet base64-encoded by main.tf so
|
||||
# YAML whitespace doesn't touch the heredoc bodies inside.
|
||||
set -euo pipefail
|
||||
|
||||
if [ ! -f /var/lib/kubelet/config.yaml ]; then
|
||||
echo "post-join-tune: /var/lib/kubelet/config.yaml not found — was kubeadm join run?" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Parallel image pulls.
|
||||
sed -i '/serializeImagePulls:/d' /var/lib/kubelet/config.yaml
|
||||
sed -i '/maxParallelImagePulls:/d' /var/lib/kubelet/config.yaml
|
||||
printf 'serializeImagePulls: false\nmaxParallelImagePulls: 50\n' >> /var/lib/kubelet/config.yaml
|
||||
|
||||
# Memory / disk eviction. Aggressive disk thresholds (15%/20%)
|
||||
# prevent the 2026-03-13 containerd image-store corruption that took
|
||||
# down k8s-node2.
|
||||
sed -i '/systemReserved:/d; /kubeReserved:/d; /evictionHard:/,/^[^ ]/{ /evictionHard:/d; /^ /d }; /evictionSoft:/,/^[^ ]/{ /evictionSoft:/d; /^ /d }; /evictionSoftGracePeriod:/,/^[^ ]/{ /evictionSoftGracePeriod:/d; /^ /d }' /var/lib/kubelet/config.yaml
|
||||
|
||||
cat >> /var/lib/kubelet/config.yaml <<'KUBELET_PATCH'
|
||||
systemReserved:
|
||||
memory: "512Mi"
|
||||
cpu: "200m"
|
||||
kubeReserved:
|
||||
memory: "512Mi"
|
||||
cpu: "200m"
|
||||
evictionHard:
|
||||
memory.available: "500Mi"
|
||||
nodefs.available: "15%"
|
||||
imagefs.available: "20%"
|
||||
evictionSoft:
|
||||
memory.available: "1Gi"
|
||||
nodefs.available: "20%"
|
||||
imagefs.available: "25%"
|
||||
evictionSoftGracePeriod:
|
||||
memory.available: "30s"
|
||||
nodefs.available: "60s"
|
||||
imagefs.available: "30s"
|
||||
memorySwap:
|
||||
swapBehavior: "LimitedSwap"
|
||||
KUBELET_PATCH
|
||||
|
||||
# Container log rotation + priority-based shutdown grace.
|
||||
sed -i '/^shutdownGracePeriod:/d; /^shutdownGracePeriodCriticalPods:/d' /var/lib/kubelet/config.yaml
|
||||
python3 - <<'KUBELET_FINAL'
|
||||
import yaml
|
||||
with open('/var/lib/kubelet/config.yaml') as f:
|
||||
cfg = yaml.safe_load(f)
|
||||
cfg.pop('shutdownGracePeriod', None)
|
||||
cfg.pop('shutdownGracePeriodCriticalPods', None)
|
||||
cfg.pop('shutdownGracePeriodByPodPriority', None)
|
||||
cfg['containerLogMaxSize'] = '10Mi'
|
||||
cfg['containerLogMaxFiles'] = 3
|
||||
cfg['shutdownGracePeriodByPodPriority'] = [
|
||||
{'priority': 0, 'shutdownGracePeriodSeconds': 20},
|
||||
{'priority': 200000, 'shutdownGracePeriodSeconds': 20},
|
||||
{'priority': 400000, 'shutdownGracePeriodSeconds': 30},
|
||||
{'priority': 600000, 'shutdownGracePeriodSeconds': 30},
|
||||
{'priority': 800000, 'shutdownGracePeriodSeconds': 90},
|
||||
{'priority': 1000000, 'shutdownGracePeriodSeconds': 30},
|
||||
{'priority': 1200000, 'shutdownGracePeriodSeconds': 30},
|
||||
{'priority': 2000000000, 'shutdownGracePeriodSeconds': 30},
|
||||
{'priority': 2000001000, 'shutdownGracePeriodSeconds': 30},
|
||||
]
|
||||
with open('/var/lib/kubelet/config.yaml', 'w') as f:
|
||||
yaml.dump(cfg, f, default_flow_style=False)
|
||||
KUBELET_FINAL
|
||||
|
||||
# Reload kubelet to pick up new config (it's already started by the
|
||||
# preceding cloud-init runcmd line — restart, not start).
|
||||
systemctl restart kubelet
|
||||
|
|
@ -1,104 +0,0 @@
|
|||
variable "proxmox_host" { type = string }
|
||||
variable "proxmox_user" { type = string }
|
||||
variable "cloud_image_url" { type = string }
|
||||
variable "image_path" { type = string }
|
||||
variable "template_id" {
|
||||
type = number
|
||||
default = 8000
|
||||
}
|
||||
variable "template_name" { type = string }
|
||||
variable "snippet_name" { type = string }
|
||||
variable "user_passwd" { type = string } # hashed pw
|
||||
variable "k8s_join_command" {
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
variable "containerd_config_update_command" {
|
||||
type = string
|
||||
default = ""
|
||||
description = "DEPRECATED: was inlined into write_files via indent(); the heredoc-TOML interaction broke containerd config parsing on node5 v1 boot 2026-05-26. The k8s setup script is now bundled inside the module at k8s-node-containerd-setup.sh — pass nothing here. Kept to avoid breaking stacks that still reference it; ignored when is_k8s_template=true."
|
||||
}
|
||||
variable "is_k8s_template" { type = bool }
|
||||
variable "ssh_private_key" {
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
variable "ssh_public_key" {
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
variable "provision_cmds" {
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
# SSH connection to Proxmox
|
||||
resource "null_resource" "create_template_remote" {
|
||||
connection {
|
||||
type = "ssh"
|
||||
user = var.proxmox_user
|
||||
host = var.proxmox_host
|
||||
private_key = var.ssh_private_key
|
||||
}
|
||||
|
||||
# Commands executed *on Proxmox host*
|
||||
provisioner "remote-exec" {
|
||||
inline = [
|
||||
"set -e",
|
||||
# download the cloud image if missing
|
||||
"if [ ! -f ${var.image_path} ]; then wget -O ${var.image_path} ${var.cloud_image_url}; fi",
|
||||
# create template only if not existing
|
||||
"if ! qm status ${var.template_id} >/dev/null 2>&1; then",
|
||||
" echo 'Creating cloud-init template...';",
|
||||
" qm create ${var.template_id} --name ${var.template_name} --memory 8192 --cores 8 --net0 virtio,bridge=vmbr0;",
|
||||
" qm importdisk ${var.template_id} ${var.image_path} local-lvm;",
|
||||
" qm set ${var.template_id} --scsihw virtio-scsi-pci --scsi0 local-lvm:vm-${var.template_id}-disk-0;",
|
||||
" qm set ${var.template_id} --ide2 local-lvm:cloudinit;",
|
||||
" qm set ${var.template_id} --boot c --bootdisk scsi0;",
|
||||
" qm set ${var.template_id} --serial0 socket --vga serial0;",
|
||||
" qm template ${var.template_id};",
|
||||
"else",
|
||||
" echo 'Template ${var.template_id} already exists — skipping.';",
|
||||
"fi"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
resource "null_resource" "upload_cloud_init" {
|
||||
connection {
|
||||
type = "ssh"
|
||||
host = var.proxmox_host
|
||||
user = var.proxmox_user
|
||||
private_key = var.ssh_private_key
|
||||
}
|
||||
|
||||
provisioner "remote-exec" {
|
||||
inline = ["mkdir -p /var/lib/vz/snippets"]
|
||||
}
|
||||
|
||||
provisioner "file" {
|
||||
destination = "/var/lib/vz/snippets/${var.snippet_name}"
|
||||
content = templatefile("${path.module}/cloud_init.yaml", {
|
||||
is_k8s_template = var.is_k8s_template,
|
||||
authorized_ssh_key = var.ssh_public_key,
|
||||
passwd = var.user_passwd,
|
||||
provision_cmds = var.provision_cmds,
|
||||
k8s_join_command = var.k8s_join_command,
|
||||
k8s_node_setup_script_b64 = var.is_k8s_template ? base64encode(file("${path.module}/k8s-node-containerd-setup.sh")) : ""
|
||||
k8s_node_post_join_script_b64 = var.is_k8s_template ? base64encode(file("${path.module}/k8s-node-post-join-tune.sh")) : ""
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
# Force recreate when the below changes
|
||||
triggers = {
|
||||
file_hash = filesha256("${path.module}/cloud_init.yaml")
|
||||
setup_script_hash = var.is_k8s_template ? filesha256("${path.module}/k8s-node-containerd-setup.sh") : ""
|
||||
post_join_script_hash = var.is_k8s_template ? filesha256("${path.module}/k8s-node-post-join-tune.sh") : ""
|
||||
provision_cmds = join(", ", var.provision_cmds)
|
||||
is_k8s_template = var.is_k8s_template,
|
||||
passwd = var.user_passwd,
|
||||
k8s_join_command = var.k8s_join_command,
|
||||
ssh_public_key = var.ssh_public_key,
|
||||
}
|
||||
}
|
||||
|
|
@ -1,313 +0,0 @@
|
|||
# ---------------------------------------------------------------------------
|
||||
# Variables — Required
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
variable "vm_name" { type = string }
|
||||
variable "vmid" {
|
||||
type = number
|
||||
default = 0
|
||||
}
|
||||
variable "cisnippet_name" {
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
variable "bridge" { type = string }
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Variables — VM sizing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
variable "vm_cpus" {
|
||||
type = number
|
||||
default = 4
|
||||
}
|
||||
variable "cpu_sockets" {
|
||||
type = number
|
||||
default = 1
|
||||
}
|
||||
variable "vm_mem_mb" {
|
||||
type = number
|
||||
default = 8192
|
||||
}
|
||||
variable "vm_disk_size" {
|
||||
type = string
|
||||
default = "64G"
|
||||
}
|
||||
variable "balloon" {
|
||||
type = number
|
||||
default = 0 # 0 = disabled (recommended for k8s nodes)
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Variables — VM identity & networking
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
variable "vm_mac_address" {
|
||||
type = string
|
||||
default = null
|
||||
}
|
||||
variable "vlan_tag" {
|
||||
type = string
|
||||
default = null
|
||||
}
|
||||
variable "ipconfig0" {
|
||||
type = string
|
||||
default = "ip=dhcp,ip6=dhcp"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Variables — Boot & hardware
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
variable "template_name" {
|
||||
type = string
|
||||
default = "" # empty = no clone (for importing existing VMs)
|
||||
}
|
||||
variable "scsihw" {
|
||||
type = string
|
||||
default = "virtio-scsi-pci"
|
||||
}
|
||||
variable "boot" {
|
||||
type = string
|
||||
default = "order=scsi0"
|
||||
}
|
||||
variable "boot_disk" {
|
||||
type = string
|
||||
default = "" # e.g., "scsi0" — only set if boot = "c" (legacy)
|
||||
}
|
||||
variable "disk_slot" {
|
||||
type = string
|
||||
default = "scsi0" # which SCSI slot the OS disk is on
|
||||
}
|
||||
variable "agent" {
|
||||
type = number
|
||||
default = 1
|
||||
}
|
||||
variable "qemu_os" {
|
||||
type = string
|
||||
default = "l26"
|
||||
}
|
||||
variable "numa" {
|
||||
type = bool
|
||||
default = false
|
||||
}
|
||||
variable "machine" {
|
||||
type = string
|
||||
default = "" # empty = provider default. Use "q35" for GPU passthrough
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Variables — Startup/shutdown ordering
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
variable "startup_order" {
|
||||
type = number
|
||||
default = -1
|
||||
}
|
||||
variable "startup_delay" {
|
||||
type = number
|
||||
default = -1
|
||||
}
|
||||
variable "shutdown_timeout" {
|
||||
type = number
|
||||
default = -1
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Variables — Cloud-Init (optional — disable for non-cloud-init VMs)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
variable "use_cloud_init" {
|
||||
type = bool
|
||||
default = true
|
||||
}
|
||||
variable "ssh_keys" {
|
||||
type = string
|
||||
default = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDHLhYDfyx237eJgOGVoJRECpUS95+7rEBS9vacsIxtx devvm"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Variables — GPU / PCI passthrough
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
variable "hostpci0" {
|
||||
type = string
|
||||
default = "" # e.g., "0000:06:00.0" for Tesla T4 passthrough
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Variables — Disk I/O throttling (bytes/sec; 0 = uncapped)
|
||||
# ---------------------------------------------------------------------------
|
||||
# Caps any single VM's share of the underlying disk so a runaway workload
|
||||
# (e.g. the 2026-05-23/26 alloy IO storm — memory id=2726) cannot wedge the
|
||||
# whole Proxmox host's sdc thin pool. Values inferred from PVE RRD p99/max
|
||||
# observed in /nodes/pve/qemu/<vmid>/rrddata.
|
||||
variable "mbps_rd" {
|
||||
type = number
|
||||
default = 0
|
||||
}
|
||||
variable "mbps_wr" {
|
||||
type = number
|
||||
default = 0
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Resource
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
resource "proxmox_vm_qemu" "cloudinit-vm" {
|
||||
vmid = var.vmid
|
||||
name = var.vm_name
|
||||
target_node = "pve"
|
||||
agent = var.agent
|
||||
memory = var.vm_mem_mb
|
||||
balloon = var.balloon
|
||||
boot = var.boot
|
||||
bootdisk = var.boot_disk != "" ? var.boot_disk : null
|
||||
clone = var.template_name != "" ? var.template_name : null
|
||||
full_clone = var.template_name != "" ? true : false
|
||||
scsihw = var.scsihw
|
||||
vm_state = "running"
|
||||
automatic_reboot = false # never let Terraform reboot VMs — use /reboot-server skill instead
|
||||
os_type = var.use_cloud_init ? "cloud-init" : null
|
||||
machine = var.machine != "" ? var.machine : null
|
||||
|
||||
# Cloud-Init configuration (only when use_cloud_init = true)
|
||||
cicustom = var.use_cloud_init && var.cisnippet_name != "" ? "vendor=local:snippets/${var.cisnippet_name}" : null
|
||||
ciupgrade = var.use_cloud_init ? true : null
|
||||
nameserver = var.use_cloud_init ? "1.1.1.1 8.8.8.8" : null
|
||||
ipconfig0 = var.use_cloud_init ? var.ipconfig0 : null
|
||||
skip_ipv6 = var.use_cloud_init ? true : null
|
||||
ciuser = var.use_cloud_init ? "root" : null
|
||||
cipassword = var.use_cloud_init ? "root" : null
|
||||
sshkeys = var.use_cloud_init ? var.ssh_keys : null
|
||||
searchdomain = var.use_cloud_init ? "viktorbarzin.lan" : null
|
||||
|
||||
start_at_node_boot = true
|
||||
qemu_os = var.qemu_os
|
||||
|
||||
cpu {
|
||||
cores = var.vm_cpus
|
||||
sockets = var.cpu_sockets
|
||||
type = "host"
|
||||
}
|
||||
|
||||
startup_shutdown {
|
||||
order = var.startup_order
|
||||
shutdown_timeout = var.shutdown_timeout
|
||||
startup_delay = var.startup_delay
|
||||
}
|
||||
|
||||
serial {
|
||||
id = 0
|
||||
}
|
||||
|
||||
disks {
|
||||
scsi {
|
||||
dynamic "scsi0" {
|
||||
for_each = var.disk_slot == "scsi0" ? [1] : []
|
||||
content {
|
||||
disk {
|
||||
storage = "local-lvm"
|
||||
size = var.vm_disk_size
|
||||
discard = true # Enable TRIM passthrough to LVM thin pool — reduces CoW overhead
|
||||
mbps_r_concurrent = var.mbps_rd
|
||||
mbps_wr_concurrent = var.mbps_wr
|
||||
}
|
||||
}
|
||||
}
|
||||
dynamic "scsi1" {
|
||||
for_each = var.disk_slot == "scsi1" ? [1] : []
|
||||
content {
|
||||
disk {
|
||||
storage = "local-lvm"
|
||||
size = var.vm_disk_size
|
||||
discard = true
|
||||
mbps_r_concurrent = var.mbps_rd
|
||||
mbps_wr_concurrent = var.mbps_wr
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
dynamic "ide" {
|
||||
for_each = var.use_cloud_init ? [1] : []
|
||||
content {
|
||||
ide1 {
|
||||
cloudinit {
|
||||
storage = "local-lvm"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
network {
|
||||
id = 0
|
||||
bridge = var.bridge
|
||||
model = "virtio"
|
||||
macaddr = var.vm_mac_address
|
||||
tag = var.vlan_tag
|
||||
}
|
||||
|
||||
# Safety: ignore dynamically-attached iSCSI PVC disks (managed by democratic-csi)
|
||||
# and cloud-init changes that drift after initial provisioning
|
||||
lifecycle {
|
||||
prevent_destroy = true
|
||||
ignore_changes = [
|
||||
# proxmox-csi dynamically attaches/detaches PVC disks. K8s workers
|
||||
# have up to ~30 slots in use simultaneously (k8s-node1: scsi1-29 +
|
||||
# unused0-29). The k8s-master only uses scsi0 (boot) so most of
|
||||
# these are no-ops for that VM but harmless.
|
||||
disks[0].scsi[0].scsi1,
|
||||
disks[0].scsi[0].scsi2,
|
||||
disks[0].scsi[0].scsi3,
|
||||
disks[0].scsi[0].scsi4,
|
||||
disks[0].scsi[0].scsi5,
|
||||
disks[0].scsi[0].scsi6,
|
||||
disks[0].scsi[0].scsi7,
|
||||
disks[0].scsi[0].scsi8,
|
||||
disks[0].scsi[0].scsi9,
|
||||
disks[0].scsi[0].scsi10,
|
||||
disks[0].scsi[0].scsi11,
|
||||
disks[0].scsi[0].scsi12,
|
||||
disks[0].scsi[0].scsi13,
|
||||
disks[0].scsi[0].scsi14,
|
||||
disks[0].scsi[0].scsi15,
|
||||
disks[0].scsi[0].scsi16,
|
||||
disks[0].scsi[0].scsi17,
|
||||
disks[0].scsi[0].scsi18,
|
||||
disks[0].scsi[0].scsi19,
|
||||
disks[0].scsi[0].scsi20,
|
||||
disks[0].scsi[0].scsi21,
|
||||
disks[0].scsi[0].scsi22,
|
||||
disks[0].scsi[0].scsi23,
|
||||
disks[0].scsi[0].scsi24,
|
||||
disks[0].scsi[0].scsi25,
|
||||
disks[0].scsi[0].scsi26,
|
||||
disks[0].scsi[0].scsi27,
|
||||
disks[0].scsi[0].scsi28,
|
||||
disks[0].scsi[0].scsi29,
|
||||
# cloud-init config may drift after first boot
|
||||
cicustom,
|
||||
ciupgrade,
|
||||
ciuser,
|
||||
cipassword,
|
||||
sshkeys,
|
||||
# SMBIOS UUID and vmgenid are auto-generated
|
||||
smbios,
|
||||
# Tags and description may be edited in Proxmox UI
|
||||
tags,
|
||||
desc,
|
||||
# Provider defaults that differ from imported state
|
||||
define_connection_info,
|
||||
full_clone,
|
||||
# scsihw varies per VM (virtio-scsi-pci / virtio-scsi-single / lsi)
|
||||
# and changing it on a running VM is risky — leave whatever's live.
|
||||
scsihw,
|
||||
# qemu_os is a hint to qemu about the guest OS; some live VMs have
|
||||
# "other" (unset originally) and the module's "l26" default would
|
||||
# otherwise force an unnecessary write on apply.
|
||||
qemu_os,
|
||||
]
|
||||
}
|
||||
}
|
||||
|
|
@ -1,9 +0,0 @@
|
|||
|
||||
terraform {
|
||||
required_providers {
|
||||
proxmox = {
|
||||
source = "telmate/proxmox"
|
||||
version = "3.0.2-rc07"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,49 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Keeps only the N most recent tags per image in pull-through cache registries.
|
||||
Deletes old tag links directly from the filesystem since the API doesn't support
|
||||
DELETE on proxy registries. Run garbage-collect after to reclaim blob storage."""
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
|
||||
sys.stdout.reconfigure(line_buffering=True)
|
||||
|
||||
KEEP = int(sys.argv[1]) if len(sys.argv) > 1 else 10
|
||||
BASE = sys.argv[2] if len(sys.argv) > 2 else "/opt/registry/data"
|
||||
|
||||
total_deleted = 0
|
||||
|
||||
for registry_name in sorted(os.listdir(BASE)):
|
||||
storage = os.path.join(BASE, registry_name, "docker/registry/v2/repositories")
|
||||
if not os.path.isdir(storage):
|
||||
continue
|
||||
|
||||
for root, dirs, _ in os.walk(storage):
|
||||
if not root.endswith("_manifests/tags"):
|
||||
continue
|
||||
|
||||
repo = root.replace(storage + "/", "").replace("/_manifests/tags", "")
|
||||
|
||||
tag_times = []
|
||||
for tag in os.listdir(root):
|
||||
tag_path = os.path.join(root, tag)
|
||||
if os.path.isdir(tag_path):
|
||||
mtime = os.path.getmtime(tag_path)
|
||||
tag_times.append((mtime, tag, tag_path))
|
||||
|
||||
if len(tag_times) <= KEEP:
|
||||
continue
|
||||
|
||||
tag_times.sort(reverse=True)
|
||||
to_delete = tag_times[KEEP:]
|
||||
|
||||
print(f"[{registry_name}/{repo}] {len(tag_times)} tags -> keeping {KEEP}, deleting {len(to_delete)}")
|
||||
|
||||
for _, tag, tag_path in to_delete:
|
||||
shutil.rmtree(tag_path)
|
||||
total_deleted += 1
|
||||
|
||||
print(f" done")
|
||||
|
||||
print(f"\nDeleted {total_deleted} tags. Run garbage-collect to reclaim space.")
|
||||
|
|
@ -1,31 +0,0 @@
|
|||
version: 0.1
|
||||
log:
|
||||
fields:
|
||||
service: registry-private
|
||||
storage:
|
||||
cache:
|
||||
blobdescriptor: inmemory
|
||||
filesystem:
|
||||
rootdirectory: /var/lib/registry
|
||||
maxsize: 100GiB
|
||||
delete:
|
||||
enabled: true
|
||||
maintenance:
|
||||
uploadpurging:
|
||||
enabled: true
|
||||
age: 168h
|
||||
interval: 4h
|
||||
dryrun: false
|
||||
auth:
|
||||
htpasswd:
|
||||
realm: "Registry Realm"
|
||||
path: /auth/htpasswd
|
||||
http:
|
||||
addr: :5000
|
||||
headers:
|
||||
X-Content-Type-Options: [nosniff]
|
||||
health:
|
||||
storagedriver:
|
||||
enabled: true
|
||||
interval: 10s
|
||||
threshold: 3
|
||||
|
|
@ -1,30 +0,0 @@
|
|||
version: 0.1
|
||||
log:
|
||||
fields:
|
||||
service: registry-${name}
|
||||
storage:
|
||||
cache:
|
||||
blobdescriptor: inmemory
|
||||
filesystem:
|
||||
rootdirectory: /var/lib/registry
|
||||
delete:
|
||||
enabled: true
|
||||
maintenance:
|
||||
uploadpurging:
|
||||
enabled: true
|
||||
age: 24h
|
||||
interval: 4h
|
||||
dryrun: false
|
||||
http:
|
||||
addr: :5000
|
||||
draintimeout: 60s
|
||||
headers:
|
||||
X-Content-Type-Options: [nosniff]
|
||||
health:
|
||||
storagedriver:
|
||||
enabled: true
|
||||
interval: 10s
|
||||
threshold: 3
|
||||
proxy:
|
||||
remoteurl: ${remote_url}
|
||||
ttl: 0
|
||||
|
|
@ -1,41 +0,0 @@
|
|||
version: 0.1
|
||||
log:
|
||||
fields:
|
||||
service: registry
|
||||
storage:
|
||||
cache:
|
||||
blobdescriptor: inmemory
|
||||
filesystem:
|
||||
rootdirectory: /var/lib/registry
|
||||
delete:
|
||||
enabled: true
|
||||
maintenance:
|
||||
uploadpurging:
|
||||
enabled: true
|
||||
age: 24h
|
||||
interval: 4h
|
||||
dryrun: false
|
||||
readonly:
|
||||
enabled: false
|
||||
http:
|
||||
addr: :5000
|
||||
draintimeout: 60s
|
||||
headers:
|
||||
X-Content-Type-Options: [nosniff]
|
||||
debug:
|
||||
addr: ":5001"
|
||||
# Enable proxy on nodes - https://github.com/containerd/containerd/blob/main/docs/cri/registry.md
|
||||
# https://ops.tips/gists/retrieving-docker-registry-metrics-using-prometheus/
|
||||
prometheus:
|
||||
enabled: true
|
||||
path: "/metrics"
|
||||
health:
|
||||
storagedriver:
|
||||
enabled: true
|
||||
interval: 10s
|
||||
threshold: 3
|
||||
proxy:
|
||||
remoteurl: https://registry-1.docker.io
|
||||
username: vbarzin@gmail.com
|
||||
password: ${password}
|
||||
ttl: 0
|
||||
|
|
@ -1,158 +0,0 @@
|
|||
networks:
|
||||
registry:
|
||||
driver: bridge
|
||||
|
||||
services:
|
||||
# registry:2 is pinned after the 2026-04-13 + 2026-04-19 orphan-index incidents.
|
||||
# Floating tags were swapping to regressed versions between GC runs. Upgrade
|
||||
# path: bump all six registry-* services in lockstep and bounce via
|
||||
# `systemctl restart docker-compose-registry.service`.
|
||||
registry-dockerhub:
|
||||
image: registry:2.8.3
|
||||
container_name: registry-dockerhub
|
||||
restart: always
|
||||
volumes:
|
||||
- /opt/registry/data/dockerhub:/var/lib/registry
|
||||
- /opt/registry/config-dockerhub.yml:/etc/docker/registry/config.yml:ro
|
||||
networks:
|
||||
- registry
|
||||
ports:
|
||||
- "5001:5001"
|
||||
healthcheck:
|
||||
test: ["CMD", "sh", "-c", "wget -qO- http://127.0.0.1:5000/v2/ >/dev/null 2>&1"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
|
||||
registry-ghcr:
|
||||
image: registry:2.8.3
|
||||
container_name: registry-ghcr
|
||||
restart: always
|
||||
volumes:
|
||||
- /opt/registry/data/ghcr:/var/lib/registry
|
||||
- /opt/registry/config-ghcr.yml:/etc/docker/registry/config.yml:ro
|
||||
networks:
|
||||
- registry
|
||||
healthcheck:
|
||||
test: ["CMD", "sh", "-c", "wget -qO- http://127.0.0.1:5000/v2/ >/dev/null 2>&1"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
|
||||
registry-quay:
|
||||
image: registry:2.8.3
|
||||
container_name: registry-quay
|
||||
restart: always
|
||||
volumes:
|
||||
- /opt/registry/data/quay:/var/lib/registry
|
||||
- /opt/registry/config-quay.yml:/etc/docker/registry/config.yml:ro
|
||||
networks:
|
||||
- registry
|
||||
healthcheck:
|
||||
test: ["CMD", "sh", "-c", "wget -qO- http://127.0.0.1:5000/v2/ >/dev/null 2>&1"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
|
||||
registry-k8s:
|
||||
image: registry:2.8.3
|
||||
container_name: registry-k8s
|
||||
restart: always
|
||||
volumes:
|
||||
- /opt/registry/data/k8s:/var/lib/registry
|
||||
- /opt/registry/config-k8s.yml:/etc/docker/registry/config.yml:ro
|
||||
networks:
|
||||
- registry
|
||||
healthcheck:
|
||||
test: ["CMD", "sh", "-c", "wget -qO- http://127.0.0.1:5000/v2/ >/dev/null 2>&1"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
|
||||
registry-kyverno:
|
||||
image: registry:2.8.3
|
||||
container_name: registry-kyverno
|
||||
restart: always
|
||||
volumes:
|
||||
- /opt/registry/data/kyverno:/var/lib/registry
|
||||
- /opt/registry/config-kyverno.yml:/etc/docker/registry/config.yml:ro
|
||||
networks:
|
||||
- registry
|
||||
healthcheck:
|
||||
test: ["CMD", "sh", "-c", "wget -qO- http://127.0.0.1:5000/v2/ >/dev/null 2>&1"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
|
||||
# registry-private decommissioned in Phase 4 of
|
||||
# forgejo-registry-consolidation 2026-05-07 — image migration completed,
|
||||
# cluster flipped to forgejo.viktorbarzin.me/viktor/<image>. The remaining
|
||||
# five services on this VM are pull-through caches for upstream registries.
|
||||
# After 1 week of no incidents, `rm -rf /opt/registry/data/private/` on the
|
||||
# VM frees ~2.6 GB. The tarball break-glass under
|
||||
# /opt/registry/data/private/_breakglass/ stays — it's how we recover
|
||||
# infra-ci if Forgejo ever goes fully down.
|
||||
|
||||
nginx:
|
||||
image: nginx:alpine
|
||||
container_name: registry-nginx
|
||||
restart: always
|
||||
# 5050 dropped Phase 4 of forgejo-registry-consolidation 2026-05-07.
|
||||
ports:
|
||||
- "5000:5000"
|
||||
- "5010:5010"
|
||||
- "5020:5020"
|
||||
- "5030:5030"
|
||||
- "5040:5040"
|
||||
volumes:
|
||||
- /opt/registry/nginx.conf:/etc/nginx/nginx.conf:ro
|
||||
- /opt/registry/tls:/etc/nginx/tls:ro
|
||||
- nginx-cache:/var/cache/nginx
|
||||
networks:
|
||||
- registry
|
||||
depends_on:
|
||||
registry-dockerhub:
|
||||
condition: service_healthy
|
||||
registry-ghcr:
|
||||
condition: service_healthy
|
||||
registry-quay:
|
||||
condition: service_healthy
|
||||
registry-k8s:
|
||||
condition: service_healthy
|
||||
registry-kyverno:
|
||||
condition: service_healthy
|
||||
healthcheck:
|
||||
test: ["CMD", "sh", "-c", "wget -qO- http://127.0.0.1:5000/v2/ >/dev/null 2>&1"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 15s
|
||||
|
||||
registry-ui:
|
||||
image: joxit/docker-registry-ui:latest
|
||||
container_name: registry-ui
|
||||
restart: always
|
||||
ports:
|
||||
- "8080:80"
|
||||
environment:
|
||||
- NGINX_PROXY_PASS_URL=http://registry-dockerhub:5000
|
||||
- DELETE_IMAGES=true
|
||||
- SINGLE_REGISTRY=true
|
||||
- SHOW_CONTENT_DIGEST=true
|
||||
- SHOW_CATALOG_NB_TAGS=true
|
||||
- CATALOG_ELEMENTS_LIMIT=1000
|
||||
- TAGLIST_PAGE_SIZE=100
|
||||
- REGISTRY_TITLE=viktorbarzin.me
|
||||
networks:
|
||||
- registry
|
||||
depends_on:
|
||||
registry-dockerhub:
|
||||
condition: service_healthy
|
||||
|
||||
volumes:
|
||||
nginx-cache:
|
||||
|
|
@ -1,158 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Registry integrity scanner — two classes of brokenness.
|
||||
|
||||
1. Orphaned layer links: the cleanup-tags.sh + garbage-collect cycle can delete
|
||||
blob data while leaving _layers/ link files intact. The registry then returns
|
||||
HTTP 200 with 0 bytes for those layers (it finds the link, trusts the blob
|
||||
exists, but the data is gone). Containerd sees "unexpected EOF".
|
||||
Action: delete the orphan link so the next pull re-fetches cleanly.
|
||||
|
||||
2. Orphaned OCI-index children: an image index (multi-platform manifest list)
|
||||
references child manifests by digest. If a child's blob has been deleted —
|
||||
by a cleanup-tags.sh tag rmtree followed by garbage-collect walking the
|
||||
children wrong (distribution/distribution#3324 class), or by an incomplete
|
||||
`buildx --push` whose partial blob was later purged by `uploadpurging` —
|
||||
the index survives but pulls fail with `manifest unknown`.
|
||||
Action: log loudly. Deleting an index is a conscious decision (the image
|
||||
was published; removing it breaks downstream consumers), so we surface
|
||||
the problem and leave repair to a human or to the rebuild runbook.
|
||||
|
||||
Run after garbage-collect (Sunday 03:30) and daily (Mon-Sat 02:30).
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.stdout.reconfigure(line_buffering=True)
|
||||
|
||||
parser = argparse.ArgumentParser(description="Scan registry for orphaned blobs and indexes")
|
||||
parser.add_argument("base", nargs="?", default="/opt/registry/data", help="Registry data directory")
|
||||
parser.add_argument("--dry-run", action="store_true", help="Report but don't delete")
|
||||
args = parser.parse_args()
|
||||
|
||||
BASE = args.base
|
||||
DRY_RUN = args.dry_run
|
||||
|
||||
INDEX_MEDIA_TYPES = (
|
||||
"application/vnd.oci.image.index.v1+json",
|
||||
"application/vnd.docker.distribution.manifest.list.v2+json",
|
||||
)
|
||||
|
||||
# Only the private R/W registry is authoritative for every child of every
|
||||
# index it stores — we pushed those indexes ourselves, so a missing child is
|
||||
# always a bug (the 2026-04-13 + 2026-04-19 failure mode).
|
||||
#
|
||||
# Pull-through caches (dockerhub, ghcr, quay, k8s, kyverno) are ALLOWED to
|
||||
# have missing children: they only fetch what someone actually pulls.
|
||||
# Uncached arm64 / arm / attestation variants of a multi-platform index are
|
||||
# normal partial state, not orphans. Scanning them generates hundreds of
|
||||
# false-positive warnings — noise that would mask the real signal from the
|
||||
# private registry. Scan 2 is therefore private-only.
|
||||
INDEX_SCAN_REGISTRIES = ("private",)
|
||||
|
||||
total_layer_removed = 0
|
||||
total_layer_checked = 0
|
||||
total_index_scanned = 0
|
||||
total_index_orphans = 0
|
||||
|
||||
|
||||
def load_manifest_blob(blobs_root, digest_hex):
|
||||
blob_path = os.path.join(blobs_root, digest_hex[:2], digest_hex, "data")
|
||||
if not os.path.isfile(blob_path):
|
||||
return None
|
||||
try:
|
||||
with open(blob_path, "rb") as f:
|
||||
raw = f.read(1024 * 1024)
|
||||
except OSError:
|
||||
return None
|
||||
try:
|
||||
return json.loads(raw)
|
||||
except (json.JSONDecodeError, UnicodeDecodeError):
|
||||
return None
|
||||
|
||||
|
||||
for registry_name in sorted(os.listdir(BASE)):
|
||||
repos_dir = os.path.join(BASE, registry_name, "docker/registry/v2/repositories")
|
||||
blobs_root = os.path.join(BASE, registry_name, "docker/registry/v2/blobs/sha256")
|
||||
|
||||
if not os.path.isdir(repos_dir):
|
||||
continue
|
||||
|
||||
for root, _, _ in os.walk(repos_dir):
|
||||
# --- Scan 1: orphan layer links ----------------------------------------
|
||||
if root.endswith("/_layers/sha256"):
|
||||
repo = root.replace(repos_dir + "/", "").replace("/_layers/sha256", "")
|
||||
|
||||
for digest_dir in os.listdir(root):
|
||||
link_file = os.path.join(root, digest_dir, "link")
|
||||
if not os.path.isfile(link_file):
|
||||
continue
|
||||
|
||||
total_layer_checked += 1
|
||||
blob_data = os.path.join(blobs_root, digest_dir[:2], digest_dir, "data")
|
||||
if os.path.isfile(blob_data):
|
||||
continue
|
||||
|
||||
prefix = "[DRY RUN] " if DRY_RUN else ""
|
||||
print(f"{prefix}[{registry_name}/{repo}] removing orphaned layer link: {digest_dir[:12]}...")
|
||||
if not DRY_RUN:
|
||||
import shutil
|
||||
shutil.rmtree(os.path.join(root, digest_dir))
|
||||
total_layer_removed += 1
|
||||
|
||||
# --- Scan 2: orphan OCI-index children (private registry only) --------
|
||||
elif root.endswith("/_manifests/revisions/sha256") and registry_name in INDEX_SCAN_REGISTRIES:
|
||||
repo = root.replace(repos_dir + "/", "").replace("/_manifests/revisions/sha256", "")
|
||||
|
||||
for digest_dir in os.listdir(root):
|
||||
# Manifest revision entry. Load the blob it points to.
|
||||
manifest = load_manifest_blob(blobs_root, digest_dir)
|
||||
if manifest is None:
|
||||
continue
|
||||
|
||||
media_type = manifest.get("mediaType", "")
|
||||
if media_type not in INDEX_MEDIA_TYPES:
|
||||
continue
|
||||
|
||||
total_index_scanned += 1
|
||||
|
||||
# Per-repo revision links — serving a child manifest via the API
|
||||
# requires <repo>/_manifests/revisions/sha256/<child-digest>/link
|
||||
# to exist. The blob data alone is not enough: cleanup-tags.sh
|
||||
# rmtrees tag dirs (which on 2.8.x also orphans the per-repo
|
||||
# revision links for index children), while the upstream blob
|
||||
# data survives in /blobs/. That's exactly the 2026-04-19
|
||||
# failure mode — the probe sees 404 even though the blob file
|
||||
# is still on disk.
|
||||
revisions_root = os.path.dirname(root) # …/_manifests/revisions
|
||||
for child in manifest.get("manifests", []):
|
||||
child_digest = child.get("digest", "")
|
||||
if not child_digest.startswith("sha256:"):
|
||||
continue
|
||||
child_hex = child_digest[len("sha256:"):]
|
||||
child_link = os.path.join(revisions_root, "sha256", child_hex, "link")
|
||||
if os.path.isfile(child_link):
|
||||
continue
|
||||
|
||||
platform = child.get("platform", {})
|
||||
arch = platform.get("architecture", "?")
|
||||
os_ = platform.get("os", "?")
|
||||
child_blob = os.path.join(blobs_root, child_hex[:2], child_hex, "data")
|
||||
blob_state = "blob-data-present" if os.path.isfile(child_blob) else "blob-data-gone"
|
||||
print(
|
||||
f"WARNING [{registry_name}/{repo}] ORPHAN INDEX: "
|
||||
f"{digest_dir[:12]} references missing child {child_hex[:12]} "
|
||||
f"({arch}/{os_}, {blob_state}) — registry returns 404, rebuild required"
|
||||
)
|
||||
total_index_orphans += 1
|
||||
|
||||
|
||||
mode = "DRY RUN — " if DRY_RUN else ""
|
||||
print(f"\n{mode}Layer scan: checked {total_layer_checked} links, removed {total_layer_removed} orphaned.")
|
||||
print(f"{mode}Index scan: inspected {total_index_scanned} image indexes, found {total_index_orphans} orphaned children.")
|
||||
if total_index_orphans > 0:
|
||||
print(f"\nACTION REQUIRED: {total_index_orphans} orphan index child(ren) detected. "
|
||||
"See docs/runbooks/registry-rebuild-image.md — the affected image must be rebuilt "
|
||||
"(a registry DELETE on an index is a conscious decision, not an automated repair).")
|
||||
|
|
@ -1,174 +0,0 @@
|
|||
worker_processes auto;
|
||||
error_log /var/log/nginx/error.log warn;
|
||||
pid /tmp/nginx.pid;
|
||||
|
||||
events {
|
||||
worker_connections 1024;
|
||||
}
|
||||
|
||||
http {
|
||||
proxy_cache_path /var/cache/nginx/registry
|
||||
levels=1:2
|
||||
keys_zone=registry:500m
|
||||
max_size=50g
|
||||
inactive=24h
|
||||
use_temp_path=off;
|
||||
|
||||
log_format registry '$remote_addr [$time_local] "$request" '
|
||||
'$status $body_bytes_sent '
|
||||
'upstream=$upstream_addr time=$upstream_response_time '
|
||||
'cache=$upstream_cache_status';
|
||||
|
||||
access_log /var/log/nginx/access.log registry;
|
||||
|
||||
# --- Upstreams ---
|
||||
|
||||
upstream dockerhub {
|
||||
server registry-dockerhub:5000;
|
||||
keepalive 32;
|
||||
}
|
||||
|
||||
upstream ghcr {
|
||||
server registry-ghcr:5000;
|
||||
keepalive 32;
|
||||
}
|
||||
|
||||
# `upstream private` removed in Phase 4 of forgejo-registry-consolidation
|
||||
# 2026-05-07. The /v2/ private registry is now Forgejo at
|
||||
# forgejo.viktorbarzin.me/viktor/.
|
||||
|
||||
# --- Docker Hub (port 5000) ---
|
||||
|
||||
server {
|
||||
listen 5000;
|
||||
server_name _;
|
||||
|
||||
client_max_body_size 0;
|
||||
proxy_request_buffering off;
|
||||
proxy_buffering on;
|
||||
|
||||
# Blobs are content-addressed (sha256) — immutable, safe to cache aggressively
|
||||
location ~ /v2/.*/blobs/ {
|
||||
proxy_pass http://dockerhub;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header Connection "";
|
||||
|
||||
# Reject truncated upstream responses
|
||||
proxy_intercept_errors on;
|
||||
error_page 502 503 504 = @upstream_error;
|
||||
|
||||
proxy_cache registry;
|
||||
proxy_cache_lock on;
|
||||
proxy_cache_lock_timeout 5m;
|
||||
proxy_cache_lock_age 5m;
|
||||
proxy_cache_use_stale updating;
|
||||
proxy_cache_valid 200 24h;
|
||||
proxy_cache_valid any 0;
|
||||
proxy_cache_min_uses 2;
|
||||
proxy_cache_methods GET;
|
||||
|
||||
proxy_read_timeout 900;
|
||||
proxy_send_timeout 900;
|
||||
}
|
||||
|
||||
# Manifests are mutable (tags can change) — no cache, pass through to registry
|
||||
location /v2/ {
|
||||
proxy_pass http://dockerhub;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header Connection "";
|
||||
|
||||
proxy_cache off;
|
||||
|
||||
proxy_read_timeout 900;
|
||||
proxy_send_timeout 900;
|
||||
}
|
||||
|
||||
location @upstream_error {
|
||||
return 502 "upstream error";
|
||||
}
|
||||
|
||||
location /healthz {
|
||||
proxy_pass http://dockerhub/v2/;
|
||||
proxy_read_timeout 5s;
|
||||
proxy_connect_timeout 3s;
|
||||
access_log off;
|
||||
}
|
||||
|
||||
location / {
|
||||
return 200 'ok';
|
||||
add_header Content-Type text/plain;
|
||||
}
|
||||
}
|
||||
|
||||
# --- GHCR (port 5010) ---
|
||||
|
||||
server {
|
||||
listen 5010;
|
||||
server_name _;
|
||||
|
||||
client_max_body_size 0;
|
||||
proxy_request_buffering off;
|
||||
proxy_buffering on;
|
||||
|
||||
# Blobs are content-addressed (sha256) — immutable, safe to cache aggressively
|
||||
location ~ /v2/.*/blobs/ {
|
||||
proxy_pass http://ghcr;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header Connection "";
|
||||
|
||||
# Reject truncated upstream responses
|
||||
proxy_intercept_errors on;
|
||||
error_page 502 503 504 = @upstream_error;
|
||||
|
||||
proxy_cache registry;
|
||||
proxy_cache_lock on;
|
||||
proxy_cache_lock_timeout 5m;
|
||||
proxy_cache_lock_age 5m;
|
||||
proxy_cache_use_stale updating;
|
||||
proxy_cache_valid 200 24h;
|
||||
proxy_cache_valid any 0;
|
||||
proxy_cache_min_uses 2;
|
||||
proxy_cache_methods GET;
|
||||
|
||||
proxy_read_timeout 900;
|
||||
proxy_send_timeout 900;
|
||||
}
|
||||
|
||||
# Manifests are mutable (tags can change) — no cache, pass through to registry
|
||||
location /v2/ {
|
||||
proxy_pass http://ghcr;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header Connection "";
|
||||
|
||||
proxy_cache off;
|
||||
|
||||
proxy_read_timeout 900;
|
||||
proxy_send_timeout 900;
|
||||
}
|
||||
|
||||
location @upstream_error {
|
||||
return 502 "upstream error";
|
||||
}
|
||||
|
||||
location /healthz {
|
||||
proxy_pass http://ghcr/v2/;
|
||||
proxy_read_timeout 5s;
|
||||
proxy_connect_timeout 3s;
|
||||
access_log off;
|
||||
}
|
||||
|
||||
location / {
|
||||
return 200 'ok';
|
||||
add_header Content-Type text/plain;
|
||||
}
|
||||
}
|
||||
|
||||
# --- Private R/W Registry (port 5050) decommissioned Phase 4 2026-05-07 ---
|
||||
# The TLS port 5050 server block previously fronted `registry-private`.
|
||||
# Migrated to Forgejo at forgejo.viktorbarzin.me/viktor/. Both
|
||||
# docker-compose.yml and this nginx config no longer reference port 5050.
|
||||
}
|
||||
|
|
@ -1,474 +0,0 @@
|
|||
terraform {
|
||||
required_providers {
|
||||
kubernetes = {
|
||||
source = "hashicorp/kubernetes"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Per-site Anubis reverse proxy.
|
||||
# Sits between Traefik and the real backend. On first visit, serves a
|
||||
# proof-of-work challenge; on success, drops a long-lived JWT cookie and
|
||||
# proxies the request through to `target_url`.
|
||||
#
|
||||
# Sharing a single ed25519 signing key across instances + COOKIE_DOMAIN at
|
||||
# the registrable domain means a token solved on one viktorbarzin.me subdomain
|
||||
# is honoured by every other Anubis-fronted site.
|
||||
|
||||
variable "name" {
|
||||
type = string
|
||||
description = "Short logical name (e.g. \"blog\"). Used to derive Service / Deployment / Secret names as anubis-<name>."
|
||||
}
|
||||
|
||||
variable "namespace" {
|
||||
type = string
|
||||
description = "Namespace to deploy into — typically the same as the protected backend service."
|
||||
}
|
||||
|
||||
variable "target_url" {
|
||||
type = string
|
||||
description = "Backend URL Anubis forwards passing requests to (e.g. http://blog.website.svc.cluster.local)."
|
||||
}
|
||||
|
||||
variable "cookie_domain" {
|
||||
type = string
|
||||
default = "viktorbarzin.me"
|
||||
description = "Cookie domain — set to the registrable domain so a single PoW solve covers every Anubis-fronted subdomain."
|
||||
}
|
||||
|
||||
variable "difficulty" {
|
||||
type = number
|
||||
default = 2
|
||||
description = "PoW difficulty (leading-zero hex chars). 2 = ~250ms desktop / ~700ms mobile. Bump for stronger filtering."
|
||||
}
|
||||
|
||||
variable "cookie_expiration_hours" {
|
||||
type = number
|
||||
default = 720 # 30 days
|
||||
description = "Lifetime of the issued JWT cookie in hours."
|
||||
}
|
||||
|
||||
variable "image_tag" {
|
||||
type = string
|
||||
default = "v1.25.0"
|
||||
description = "ghcr.io/techarohq/anubis tag — pin to a release, never :latest."
|
||||
}
|
||||
|
||||
variable "replicas" {
|
||||
type = number
|
||||
default = null
|
||||
description = "Optional replica count override. When null, defaults to 1 if shared_store_url is null and 2 otherwise. Capped at 2 — Redis can handle more but anti-affinity assumes ≤2 replicas per Anubis instance on a 5-node cluster."
|
||||
|
||||
validation {
|
||||
condition = var.replicas == null ? true : (var.replicas >= 1 && var.replicas <= 2)
|
||||
error_message = "replicas must be 1 or 2 (or null to auto-pick from shared_store_url presence)."
|
||||
}
|
||||
}
|
||||
|
||||
variable "shared_store_url" {
|
||||
type = string
|
||||
default = null
|
||||
description = "If set, Anubis stores in-flight challenge state in this Valkey/Redis-protocol URL instead of in-process memory, enabling HA across replicas. Format: redis://host:port/<db-index>. The DB index MUST be unique per Anubis instance (this module assumes 16 DBs available, common in standalone Redis). Cluster Redis is redis-master.redis.svc.cluster.local:6379 with HA via Sentinel + haproxy. Without this, replicas>1 causes ~50% PoW failures (challenge issued by pod A, solved against pod B → 500)."
|
||||
|
||||
validation {
|
||||
condition = var.shared_store_url == null || can(regex("^redis://[a-zA-Z0-9_.-]+:[0-9]+/[0-9]+$", var.shared_store_url))
|
||||
error_message = "shared_store_url must look like redis://host:port/<db-index> (explicit DB index required)."
|
||||
}
|
||||
}
|
||||
|
||||
variable "memory" {
|
||||
type = string
|
||||
default = "128Mi"
|
||||
description = "requests==limits memory. Anubis docs suggest 128Mi handles many concurrent clients."
|
||||
}
|
||||
|
||||
variable "policy_yaml" {
|
||||
type = string
|
||||
default = null
|
||||
description = "Override the strict default bot-policy YAML. Leave null to use the catch-all CHALLENGE policy."
|
||||
}
|
||||
|
||||
variable "cpu_request" {
|
||||
type = string
|
||||
default = "20m"
|
||||
description = "CPU request. PoW verification is server-cheap (just hash check)."
|
||||
}
|
||||
|
||||
locals {
|
||||
full_name = "anubis-${var.name}"
|
||||
labels = {
|
||||
"app" = local.full_name
|
||||
"app.kubernetes.io/name" = "anubis"
|
||||
"app.kubernetes.io/instance" = local.full_name
|
||||
"app.kubernetes.io/component" = "ai-bot-challenge"
|
||||
"app.kubernetes.io/managed-by" = "terraform"
|
||||
}
|
||||
|
||||
# Effective replicas: caller-override > shared-store-aware default.
|
||||
effective_replicas = coalesce(var.replicas, var.shared_store_url == null ? 1 : 2)
|
||||
|
||||
# Anubis store config. With backend=valkey, multiple Anubis pods can share
|
||||
# in-flight PoW state and a challenge issued by pod A is verifiable by pod
|
||||
# B. Default backend is in-process memory which only works at replicas=1.
|
||||
store_yaml_block = var.shared_store_url == null ? "" : <<-EOT
|
||||
|
||||
|
||||
store:
|
||||
backend: valkey
|
||||
parameters:
|
||||
url: "${var.shared_store_url}"
|
||||
EOT
|
||||
|
||||
# Strict bot policy. Default Anubis policy only WEIGHs Mozilla|Opera UAs
|
||||
# and lets unmatched UAs (curl, wget, Python-requests, scrapy, headless
|
||||
# CLI scrapers) fall through to ALLOW. We import the same upstream
|
||||
# snippets and append a catch-all CHALLENGE so anyone without JS+PoW
|
||||
# capability is filtered.
|
||||
default_policy_yaml = <<-EOT
|
||||
bots:
|
||||
# Hard-deny known-bad bots first — runs before the method bypass so
|
||||
# a declared bad bot can't sneak through by sending a POST.
|
||||
- import: (data)/bots/_deny-pathological.yaml
|
||||
- import: (data)/bots/aggressive-brazilian-scrapers.yaml
|
||||
# Hard-deny declared AI/LLM crawlers (ClaudeBot, GPTBot, Bytespider, …).
|
||||
- import: (data)/meta/ai-block-aggressive.yaml
|
||||
# Whitelist legitimate search-engine crawlers (Googlebot, Bingbot, …).
|
||||
- import: (data)/crawlers/_allow-good.yaml
|
||||
# Challenge Firefox AI previews specifically.
|
||||
- import: (data)/clients/x-firefox-ai.yaml
|
||||
# Allow /.well-known, /robots.txt, /favicon.*, /sitemap.xml — keeps
|
||||
# the internet working for benign crawlers and discovery clients.
|
||||
- import: (data)/common/keep-internet-working.yaml
|
||||
# Allow every non-GET request through. Rationale: AI scrapers steal
|
||||
# the body of GETs (page content) — they don't POST. State-mutating
|
||||
# methods come from app XHRs (PrivateBin paste creation, Komga
|
||||
# uploads, SPA actions) and CORS preflight (OPTIONS). Challenging
|
||||
# those breaks the app, because the JS expects JSON and gets the
|
||||
# Anubis HTML challenge page. CrowdSec + rate-limit + per-app auth
|
||||
# already cover abuse on these methods.
|
||||
- name: allow-non-get-methods
|
||||
action: ALLOW
|
||||
expression: method != "GET"
|
||||
# Catch-all: every remaining (GET) request must solve the challenge.
|
||||
# This closes the "unmatched UA falls through to ALLOW" gap that
|
||||
# lets curl/wget/Python-requests scrape non-CDN-fronted hosts.
|
||||
- name: catchall-challenge
|
||||
path_regex: .*
|
||||
action: CHALLENGE
|
||||
EOT
|
||||
|
||||
# Final policy YAML: defaults (or caller override) plus an optional store
|
||||
# block when shared_store_url is set. Store block is module-managed and
|
||||
# appended universally — callers passing a custom policy_yaml shouldn't
|
||||
# include their own `store:` block (they would collide).
|
||||
rendered_policy_yaml = "${coalesce(var.policy_yaml, local.default_policy_yaml)}${local.store_yaml_block}"
|
||||
}
|
||||
|
||||
# Bot policy ConfigMap. Mounted into the pod and referenced by POLICY_FNAME.
|
||||
resource "kubernetes_config_map" "policy" {
|
||||
metadata {
|
||||
name = "${local.full_name}-policy"
|
||||
namespace = var.namespace
|
||||
labels = local.labels
|
||||
}
|
||||
data = {
|
||||
"botPolicies.yaml" = local.rendered_policy_yaml
|
||||
}
|
||||
}
|
||||
|
||||
# ED25519 signing key — pulled from Vault `secret/viktor` -> field
|
||||
# `anubis_ed25519_key`. Same key across every instance so JWTs are
|
||||
# cross-validatable, enabling cross-subdomain SSO.
|
||||
resource "kubernetes_manifest" "ed25519_secret" {
|
||||
manifest = {
|
||||
apiVersion = "external-secrets.io/v1beta1"
|
||||
kind = "ExternalSecret"
|
||||
metadata = {
|
||||
name = "${local.full_name}-key"
|
||||
namespace = var.namespace
|
||||
}
|
||||
spec = {
|
||||
refreshInterval = "1h"
|
||||
secretStoreRef = {
|
||||
name = "vault-kv"
|
||||
kind = "ClusterSecretStore"
|
||||
}
|
||||
target = {
|
||||
name = "${local.full_name}-key"
|
||||
creationPolicy = "Owner"
|
||||
}
|
||||
data = [{
|
||||
secretKey = "key"
|
||||
remoteRef = {
|
||||
key = "viktor"
|
||||
property = "anubis_ed25519_key"
|
||||
}
|
||||
}]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_deployment" "anubis" {
|
||||
metadata {
|
||||
name = local.full_name
|
||||
namespace = var.namespace
|
||||
labels = local.labels
|
||||
}
|
||||
|
||||
spec {
|
||||
replicas = local.effective_replicas
|
||||
|
||||
selector {
|
||||
match_labels = { app = local.full_name }
|
||||
}
|
||||
|
||||
strategy {
|
||||
type = "RollingUpdate"
|
||||
rolling_update {
|
||||
max_surge = 1
|
||||
max_unavailable = 0
|
||||
}
|
||||
}
|
||||
|
||||
template {
|
||||
metadata {
|
||||
labels = local.labels
|
||||
annotations = {
|
||||
# Roll the deployment whenever the policy YAML changes — Anubis
|
||||
# reads the policy at startup, so a ConfigMap update alone
|
||||
# doesn't take effect until pods restart.
|
||||
"checksum/policy" = sha256(local.rendered_policy_yaml)
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
# Spread replicas across nodes to survive a single node failure.
|
||||
# DoNotSchedule (not ScheduleAnyway) so 2 replicas are forced onto
|
||||
# different hosts — otherwise the scheduler may pile them on the
|
||||
# same node and a single node reboot takes the whole Anubis instance
|
||||
# down despite replicas=2. On a 5-node cluster the spread is always
|
||||
# satisfiable; the worst case (4 nodes unavailable) leaves one
|
||||
# replica Pending, but the other keeps serving.
|
||||
topology_spread_constraint {
|
||||
max_skew = 1
|
||||
topology_key = "kubernetes.io/hostname"
|
||||
when_unsatisfiable = "DoNotSchedule"
|
||||
label_selector {
|
||||
match_labels = { app = local.full_name }
|
||||
}
|
||||
}
|
||||
|
||||
container {
|
||||
name = "anubis"
|
||||
image = "ghcr.io/techarohq/anubis:${var.image_tag}"
|
||||
|
||||
port {
|
||||
name = "http"
|
||||
container_port = 8923
|
||||
}
|
||||
port {
|
||||
name = "metrics"
|
||||
container_port = 9090
|
||||
}
|
||||
|
||||
env {
|
||||
name = "BIND"
|
||||
value = ":8923"
|
||||
}
|
||||
env {
|
||||
name = "METRICS_BIND"
|
||||
value = ":9090"
|
||||
}
|
||||
env {
|
||||
name = "TARGET"
|
||||
value = var.target_url
|
||||
}
|
||||
env {
|
||||
name = "DIFFICULTY"
|
||||
value = tostring(var.difficulty)
|
||||
}
|
||||
env {
|
||||
name = "COOKIE_EXPIRATION_TIME"
|
||||
value = "${var.cookie_expiration_hours}h"
|
||||
}
|
||||
# Cross-subdomain SSO: cookie scoped to the registrable domain so
|
||||
# a JWT solved on any Anubis-fronted subdomain is honoured on every
|
||||
# other one. (COOKIE_DOMAIN and COOKIE_DYNAMIC_DOMAIN are mutually
|
||||
# exclusive — picking the explicit form.)
|
||||
env {
|
||||
name = "COOKIE_DOMAIN"
|
||||
value = var.cookie_domain
|
||||
}
|
||||
env {
|
||||
name = "COOKIE_SECURE"
|
||||
value = "true"
|
||||
}
|
||||
env {
|
||||
name = "COOKIE_SAME_SITE"
|
||||
value = "Lax"
|
||||
}
|
||||
# Built-in robots.txt that disallows known AI scrapers — well-behaved
|
||||
# bots get blocked here without ever paying the PoW cost.
|
||||
env {
|
||||
name = "SERVE_ROBOTS_TXT"
|
||||
value = "true"
|
||||
}
|
||||
# Drop cluster-internal IPs from XFF so Anubis sees the real client.
|
||||
env {
|
||||
name = "XFF_STRIP_PRIVATE"
|
||||
value = "true"
|
||||
}
|
||||
env {
|
||||
name = "SLOG_LEVEL"
|
||||
value = "INFO"
|
||||
}
|
||||
env {
|
||||
name = "ED25519_PRIVATE_KEY_HEX_FILE"
|
||||
# Mounted from the ESO-managed Secret below.
|
||||
value = "/keys/key"
|
||||
}
|
||||
env {
|
||||
name = "POLICY_FNAME"
|
||||
value = "/config/botPolicies.yaml"
|
||||
}
|
||||
|
||||
volume_mount {
|
||||
name = "ed25519-key"
|
||||
mount_path = "/keys"
|
||||
read_only = true
|
||||
}
|
||||
volume_mount {
|
||||
name = "policy"
|
||||
mount_path = "/config"
|
||||
read_only = true
|
||||
}
|
||||
|
||||
resources {
|
||||
requests = {
|
||||
cpu = var.cpu_request
|
||||
memory = var.memory
|
||||
}
|
||||
limits = {
|
||||
memory = var.memory
|
||||
}
|
||||
}
|
||||
|
||||
# Liveness + readiness on the metrics endpoint (zero auth, always 200).
|
||||
liveness_probe {
|
||||
http_get {
|
||||
path = "/metrics"
|
||||
port = "metrics"
|
||||
}
|
||||
initial_delay_seconds = 10
|
||||
period_seconds = 30
|
||||
failure_threshold = 3
|
||||
}
|
||||
readiness_probe {
|
||||
http_get {
|
||||
path = "/metrics"
|
||||
port = "metrics"
|
||||
}
|
||||
initial_delay_seconds = 2
|
||||
period_seconds = 5
|
||||
failure_threshold = 2
|
||||
}
|
||||
|
||||
security_context {
|
||||
run_as_non_root = true
|
||||
run_as_user = 1000
|
||||
run_as_group = 1000
|
||||
allow_privilege_escalation = false
|
||||
read_only_root_filesystem = true
|
||||
capabilities {
|
||||
drop = ["ALL"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
volume {
|
||||
name = "ed25519-key"
|
||||
secret {
|
||||
secret_name = "${local.full_name}-key"
|
||||
items {
|
||||
key = "key"
|
||||
path = "key"
|
||||
}
|
||||
}
|
||||
}
|
||||
volume {
|
||||
name = "policy"
|
||||
config_map {
|
||||
name = kubernetes_config_map.policy.metadata[0].name
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
lifecycle {
|
||||
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
|
||||
ignore_changes = [spec[0].template[0].spec[0].dns_config]
|
||||
}
|
||||
|
||||
depends_on = [kubernetes_manifest.ed25519_secret]
|
||||
}
|
||||
|
||||
resource "kubernetes_service" "anubis" {
|
||||
metadata {
|
||||
name = local.full_name
|
||||
namespace = var.namespace
|
||||
labels = local.labels
|
||||
annotations = {
|
||||
"prometheus.io/scrape" = "true"
|
||||
"prometheus.io/path" = "/metrics"
|
||||
"prometheus.io/port" = "9090"
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
selector = { app = local.full_name }
|
||||
port {
|
||||
name = "http"
|
||||
port = 8080
|
||||
target_port = 8923
|
||||
protocol = "TCP"
|
||||
}
|
||||
port {
|
||||
name = "metrics"
|
||||
port = 9090
|
||||
target_port = 9090
|
||||
protocol = "TCP"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_pod_disruption_budget_v1" "anubis" {
|
||||
metadata {
|
||||
name = local.full_name
|
||||
namespace = var.namespace
|
||||
}
|
||||
spec {
|
||||
# max_unavailable=1 means: at most one pod can be voluntarily disrupted
|
||||
# at a time. With replicas=2 this allows clean rolling drains (one pod
|
||||
# goes down → other serves traffic → first recreates elsewhere). With
|
||||
# replicas=1 (no shared store) this is functionally equivalent to no
|
||||
# PDB — drain proceeds, brief outage, new pod schedules elsewhere.
|
||||
# Was min_available=1 before 2026-05-16 which deadlocked drains on
|
||||
# single-replica instances (eviction API can never satisfy the
|
||||
# constraint at replicas=1). See PM-2026-05-11.
|
||||
max_unavailable = "1"
|
||||
selector {
|
||||
match_labels = { app = local.full_name }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
output "service_name" {
|
||||
value = kubernetes_service.anubis.metadata[0].name
|
||||
description = "ClusterIP service name. Pass this to ingress_factory's `service_name` so Traefik routes through Anubis."
|
||||
}
|
||||
|
||||
output "service_port" {
|
||||
value = 8080
|
||||
description = "Service port. Anubis listens on 8923 inside; the Service exposes 8080."
|
||||
}
|
||||
|
|
@ -1,431 +0,0 @@
|
|||
terraform {
|
||||
required_providers {
|
||||
cloudflare = {
|
||||
source = "cloudflare/cloudflare"
|
||||
version = "~> 4"
|
||||
}
|
||||
kubernetes = {
|
||||
source = "hashicorp/kubernetes"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
variable "name" { type = string }
|
||||
variable "service_name" {
|
||||
type = string
|
||||
default = null # defaults to name
|
||||
}
|
||||
variable "host" {
|
||||
type = string
|
||||
default = null
|
||||
}
|
||||
variable "namespace" { type = string }
|
||||
variable "external_name" {
|
||||
type = string
|
||||
default = null
|
||||
}
|
||||
variable "port" {
|
||||
default = "80"
|
||||
}
|
||||
variable "tls_secret_name" {}
|
||||
variable "backend_protocol" {
|
||||
default = "HTTP"
|
||||
}
|
||||
variable "auth" {
|
||||
type = string
|
||||
default = "required"
|
||||
description = <<-EOT
|
||||
Auth posture for this ingress. Pick by asking "what gates the app?":
|
||||
|
||||
* "required" (default, fail-closed): Authentik forward-auth gates every
|
||||
request. Pick this when the backend has NO built-in user auth and
|
||||
Authentik is the only thing standing between strangers and the app.
|
||||
Examples: prowlarr, qbittorrent, netbox, phpipam, k8s-dashboard, any
|
||||
admin UI shipped without its own login.
|
||||
|
||||
* "app": the backend handles its own user authentication (NextAuth,
|
||||
Django sessions, OAuth, bearer-token API, etc.) and Authentik would
|
||||
only get in the way. No Authentik middleware is attached; the app's
|
||||
own login is the gate. Examples: immich, linkwarden, tandoor,
|
||||
freshrss, affine, actualbudget, audiobookshelf, novelapp.
|
||||
**Functionally identical to "none"** — the distinct name exists to
|
||||
record intent at the call site so future readers don't have to guess.
|
||||
|
||||
* "public": Authentik anonymous binding via the `public` outpost.
|
||||
Strangers are auto-bound to the `guest` Authentik user; logged-in
|
||||
users keep their identity in X-authentik-username. Only works for
|
||||
top-level browser navigation — CORS preflight rejects XHR/fetch and
|
||||
automation can't replay the cookie dance. Audit trail, not a gate.
|
||||
|
||||
* "none": no Authentik middleware, no own-auth claim — explicitly
|
||||
public or unauthenticated-by-design. Use for: Anubis-fronted content
|
||||
sites (where Anubis is the gate), native-client APIs that auth
|
||||
themselves (Git, /v2/, WebDAV/CalDAV, CardDAV), webhook receivers,
|
||||
OAuth callbacks, and Authentik outposts themselves.
|
||||
|
||||
**Anti-exposure rule** (the reason "app" exists as a distinct mode):
|
||||
only pick "app" or "none" AFTER you have verified the app has its own
|
||||
user auth (for "app") OR the endpoint is intentionally public (for
|
||||
"none"). Picking either of these on a naked admin UI exposes it to the
|
||||
internet. The default is "required" specifically so accidental omission
|
||||
fails closed.
|
||||
|
||||
**Convention**: when using "app" or "none", add a comment line above
|
||||
the `auth = "..."` line stating what gates the app or why it's public.
|
||||
Future-you reads the call site, not the module description.
|
||||
EOT
|
||||
validation {
|
||||
condition = contains(["required", "app", "public", "none"], var.auth)
|
||||
error_message = "auth must be one of: required, app, public, none."
|
||||
}
|
||||
}
|
||||
variable "ingress_path" {
|
||||
type = list(string)
|
||||
default = ["/"]
|
||||
}
|
||||
variable "max_body_size" {
|
||||
type = string
|
||||
default = null
|
||||
description = "Maximum request body size, e.g. '5g'. null = no limit (Traefik default). When set, a per-ingress Buffering middleware is created and attached."
|
||||
}
|
||||
variable "extra_annotations" {
|
||||
default = {}
|
||||
}
|
||||
variable "ssl_redirect" {
|
||||
default = true
|
||||
type = bool
|
||||
}
|
||||
variable "allow_local_access_only" {
|
||||
default = false
|
||||
type = bool
|
||||
}
|
||||
variable "root_domain" {
|
||||
default = "viktorbarzin.me"
|
||||
type = string
|
||||
}
|
||||
variable "custom_content_security_policy" {
|
||||
type = string
|
||||
default = null
|
||||
}
|
||||
variable "exclude_crowdsec" {
|
||||
type = bool
|
||||
default = false
|
||||
}
|
||||
variable "full_host" {
|
||||
type = string
|
||||
default = null
|
||||
}
|
||||
variable "extra_middlewares" {
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
variable "skip_default_rate_limit" {
|
||||
type = bool
|
||||
default = false
|
||||
}
|
||||
variable "anti_ai_scraping" {
|
||||
type = bool
|
||||
default = null # null = auto (enabled when not protected, disabled when protected)
|
||||
}
|
||||
|
||||
variable "dns_type" {
|
||||
type = string
|
||||
default = "none"
|
||||
description = "Cloudflare DNS: 'proxied' (CNAME to tunnel), 'non-proxied' (A/AAAA to public IP), or 'none'"
|
||||
validation {
|
||||
condition = contains(["proxied", "non-proxied", "none"], var.dns_type)
|
||||
error_message = "dns_type must be 'proxied', 'non-proxied', or 'none'."
|
||||
}
|
||||
}
|
||||
|
||||
# Uptime Kuma external monitor: when true, annotate the ingress so the
|
||||
# external-monitor-sync CronJob creates a `[External] <name>` monitor pointing
|
||||
# at https://<host>. Null means "follow dns_type" — enabled when proxied.
|
||||
variable "external_monitor" {
|
||||
type = bool
|
||||
default = null
|
||||
description = "Enable Uptime Kuma external monitor. null = auto (enabled when dns_type == 'proxied')."
|
||||
}
|
||||
|
||||
variable "external_monitor_name" {
|
||||
type = string
|
||||
default = null
|
||||
description = "Override the monitor label. Defaults to the ingress hostname label (e.g. 'dawarich' for dawarich.viktorbarzin.me)."
|
||||
}
|
||||
|
||||
# Cloudflare config defaults — override via variables if these change.
|
||||
# Source of truth: config.tfvars (cloudflare_zone_id, cloudflare_tunnel_id, public_ip, public_ipv6)
|
||||
variable "cloudflare_zone_id" {
|
||||
type = string
|
||||
default = "fd2c5dd4efe8fe38958944e74d0ced6d"
|
||||
}
|
||||
|
||||
variable "cloudflare_tunnel_id" {
|
||||
type = string
|
||||
default = "75182cd7-bb91-4310-b961-5d8967da8b41"
|
||||
}
|
||||
|
||||
variable "public_ip" {
|
||||
type = string
|
||||
default = "176.12.22.76"
|
||||
}
|
||||
|
||||
variable "public_ipv6" {
|
||||
type = string
|
||||
default = "2001:470:6e:43d::2"
|
||||
}
|
||||
|
||||
variable "homepage_group" {
|
||||
type = string
|
||||
default = null # auto-detect from namespace
|
||||
}
|
||||
|
||||
variable "homepage_enabled" {
|
||||
type = bool
|
||||
default = true
|
||||
}
|
||||
|
||||
locals {
|
||||
effective_host = var.full_host != null ? var.full_host : "${var.host != null ? var.host : var.name}.${var.root_domain}"
|
||||
# Anti-AI default: ON when no Authentik auth fronts the ingress (auth =
|
||||
# "none" or auth = "app" — either the app gates users itself or the site
|
||||
# is intentionally public). When Authentik gates the request
|
||||
# (required/public), the auth flow already discourages bots.
|
||||
effective_anti_ai = var.anti_ai_scraping != null ? var.anti_ai_scraping : (var.auth == "none" || var.auth == "app")
|
||||
|
||||
# Auth middleware selection. "app" and "none" both attach no Authentik
|
||||
# middleware — "app" signals "the backend has its own user auth", "none"
|
||||
# signals "intentionally public / native-client API / webhook". The
|
||||
# distinction lives at the call site for human readers; the runtime
|
||||
# effect is identical.
|
||||
auth_middleware = (
|
||||
var.auth == "required" ? "traefik-authentik-forward-auth@kubernetescrd" :
|
||||
var.auth == "public" ? "traefik-authentik-forward-auth-public@kubernetescrd" :
|
||||
null
|
||||
)
|
||||
|
||||
# External monitor enabled by default when the ingress has a public DNS
|
||||
# record (either CF-proxied or direct A/AAAA). Explicit bool overrides.
|
||||
effective_external_monitor = var.external_monitor != null ? var.external_monitor : (var.dns_type != "none")
|
||||
|
||||
# Emit the annotation when effective is true (positive signal), or when the
|
||||
# caller explicitly set external_monitor=false (opt-out). When the caller
|
||||
# leaves it null AND dns_type="none", emit nothing — the sync script's
|
||||
# default opt-in (any *.viktorbarzin.me ingress) keeps monitoring services
|
||||
# that are publicly reachable via routes we don't manage here (e.g.
|
||||
# helm-provisioned ingresses, services behind cloudflared tunnel with DNS
|
||||
# set elsewhere).
|
||||
external_monitor_annotations = local.effective_external_monitor ? merge(
|
||||
{ "uptime.viktorbarzin.me/external-monitor" = "true" },
|
||||
var.external_monitor_name != null ? { "uptime.viktorbarzin.me/external-monitor-name" = var.external_monitor_name } : {},
|
||||
) : (var.external_monitor == false ?
|
||||
{ "uptime.viktorbarzin.me/external-monitor" = "false" } : {}
|
||||
)
|
||||
|
||||
ns_to_group = {
|
||||
monitoring = "Infrastructure"
|
||||
prometheus = "Infrastructure"
|
||||
technitium = "Infrastructure"
|
||||
traefik = "Infrastructure"
|
||||
metallb-system = "Infrastructure"
|
||||
kyverno = "Infrastructure"
|
||||
authentik = "Identity & Security"
|
||||
crowdsec = "Identity & Security"
|
||||
woodpecker = "Development & CI"
|
||||
forgejo = "Development & CI"
|
||||
immich = "Media & Entertainment"
|
||||
frigate = "Smart Home"
|
||||
home-assistant = "Smart Home"
|
||||
ollama = "AI & Data"
|
||||
dbaas = "Infrastructure"
|
||||
servarr = "Media & Entertainment"
|
||||
navidrome = "Media & Entertainment"
|
||||
nextcloud = "Productivity"
|
||||
n8n = "Automation"
|
||||
changedetection = "Automation"
|
||||
finance = "Finance & Personal"
|
||||
homepage = "Core Platform"
|
||||
reverse-proxy = "Smart Home"
|
||||
mailserver = "Infrastructure"
|
||||
}
|
||||
|
||||
homepage_group = coalesce(
|
||||
var.homepage_group,
|
||||
lookup(local.ns_to_group, var.namespace, "Other")
|
||||
)
|
||||
|
||||
dns_name = local.effective_host == var.root_domain ? "@" : replace(local.effective_host, ".${var.root_domain}", "")
|
||||
|
||||
homepage_defaults = var.homepage_enabled ? {
|
||||
"gethomepage.dev/enabled" = "true"
|
||||
"gethomepage.dev/name" = replace(replace(var.name, "-", " "), "_", " ")
|
||||
"gethomepage.dev/group" = local.homepage_group
|
||||
"gethomepage.dev/href" = "https://${local.effective_host}"
|
||||
"gethomepage.dev/icon" = "${replace(var.name, "-", "")}.png"
|
||||
} : {}
|
||||
|
||||
# Parse "5g"/"50m"/"1024k"/"42" into bytes. Traefik's Buffering middleware
|
||||
# takes maxRequestBodyBytes as an integer. Empty unit = bytes.
|
||||
body_size_match = var.max_body_size == null ? null : regex("^([0-9]+)([kmgKMG]?)$", var.max_body_size)
|
||||
body_size_unit_multiplier = var.max_body_size == null ? 0 : (
|
||||
lower(local.body_size_match[1]) == "g" ? 1073741824 :
|
||||
lower(local.body_size_match[1]) == "m" ? 1048576 :
|
||||
lower(local.body_size_match[1]) == "k" ? 1024 :
|
||||
1
|
||||
)
|
||||
max_body_size_bytes = var.max_body_size == null ? 0 : tonumber(local.body_size_match[0]) * local.body_size_unit_multiplier
|
||||
}
|
||||
|
||||
|
||||
resource "kubernetes_service" "proxied-service" {
|
||||
count = var.external_name == null ? 0 : 1
|
||||
metadata {
|
||||
name = var.name
|
||||
namespace = var.namespace
|
||||
labels = {
|
||||
"app" = var.name
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
type = var.external_name != null ? "ExternalName" : "ClusterIP"
|
||||
external_name = var.name
|
||||
|
||||
port {
|
||||
name = "${var.name}-web"
|
||||
port = var.port
|
||||
protocol = "TCP"
|
||||
target_port = var.port
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_ingress_v1" "proxied-ingress" {
|
||||
metadata {
|
||||
name = var.name
|
||||
namespace = var.namespace
|
||||
annotations = merge({
|
||||
"traefik.ingress.kubernetes.io/router.middlewares" = join(",", compact(concat([
|
||||
"traefik-retry@kubernetescrd",
|
||||
"traefik-error-pages@kubernetescrd",
|
||||
var.skip_default_rate_limit ? null : "traefik-rate-limit@kubernetescrd",
|
||||
var.custom_content_security_policy == null ? "traefik-csp-headers@kubernetescrd" : null,
|
||||
var.exclude_crowdsec ? null : "traefik-crowdsec@kubernetescrd",
|
||||
local.effective_anti_ai ? "traefik-ai-bot-block@kubernetescrd" : null,
|
||||
local.effective_anti_ai ? "traefik-anti-ai-headers@kubernetescrd" : null,
|
||||
local.auth_middleware,
|
||||
var.allow_local_access_only ? "traefik-local-only@kubernetescrd" : null,
|
||||
var.custom_content_security_policy != null ? "${var.namespace}-custom-csp-${var.name}@kubernetescrd" : null,
|
||||
var.max_body_size != null ? "${var.namespace}-buffering-${var.name}@kubernetescrd" : null,
|
||||
], var.extra_middlewares)))
|
||||
"traefik.ingress.kubernetes.io/router.entrypoints" = "websecure"
|
||||
}, local.homepage_defaults, var.extra_annotations,
|
||||
var.dns_type != "none" ? { "cloudflare.viktorbarzin.me/dns-type" = var.dns_type } : {},
|
||||
local.external_monitor_annotations,
|
||||
)
|
||||
}
|
||||
|
||||
spec {
|
||||
ingress_class_name = "traefik"
|
||||
tls {
|
||||
hosts = [local.effective_host]
|
||||
secret_name = var.tls_secret_name
|
||||
}
|
||||
rule {
|
||||
host = local.effective_host
|
||||
http {
|
||||
dynamic "path" {
|
||||
for_each = var.ingress_path
|
||||
|
||||
content {
|
||||
path = path.value
|
||||
backend {
|
||||
service {
|
||||
|
||||
name = var.service_name != null ? var.service_name : var.name
|
||||
port {
|
||||
number = var.port
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Custom CSP headers middleware - created per service when custom_content_security_policy is set
|
||||
resource "kubernetes_manifest" "custom_csp" {
|
||||
count = var.custom_content_security_policy != null ? 1 : 0
|
||||
|
||||
manifest = {
|
||||
apiVersion = "traefik.io/v1alpha1"
|
||||
kind = "Middleware"
|
||||
metadata = {
|
||||
name = "custom-csp-${var.name}"
|
||||
namespace = var.namespace
|
||||
}
|
||||
spec = {
|
||||
headers = {
|
||||
contentSecurityPolicy = var.custom_content_security_policy
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Buffering middleware - created per service when max_body_size is set.
|
||||
# Traefik default is unlimited; setting maxRequestBodyBytes enforces a limit
|
||||
# (e.g. Forgejo container pushes can ship multi-GB layer blobs).
|
||||
resource "kubernetes_manifest" "buffering" {
|
||||
count = var.max_body_size != null ? 1 : 0
|
||||
|
||||
manifest = {
|
||||
apiVersion = "traefik.io/v1alpha1"
|
||||
kind = "Middleware"
|
||||
metadata = {
|
||||
name = "buffering-${var.name}"
|
||||
namespace = var.namespace
|
||||
}
|
||||
spec = {
|
||||
buffering = {
|
||||
maxRequestBodyBytes = local.max_body_size_bytes
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Cloudflare DNS records — created automatically when dns_type is set.
|
||||
# Proxied: CNAME to Cloudflare tunnel. Non-proxied: A + AAAA to public IP.
|
||||
resource "cloudflare_record" "proxied" {
|
||||
count = var.dns_type == "proxied" ? 1 : 0
|
||||
name = local.dns_name
|
||||
content = "${var.cloudflare_tunnel_id}.cfargotunnel.com"
|
||||
proxied = true
|
||||
ttl = 1
|
||||
type = "CNAME"
|
||||
zone_id = var.cloudflare_zone_id
|
||||
allow_overwrite = true
|
||||
}
|
||||
|
||||
resource "cloudflare_record" "non_proxied_a" {
|
||||
count = var.dns_type == "non-proxied" ? 1 : 0
|
||||
name = local.dns_name
|
||||
content = var.public_ip
|
||||
proxied = false
|
||||
ttl = 1
|
||||
type = "A"
|
||||
zone_id = var.cloudflare_zone_id
|
||||
allow_overwrite = true
|
||||
}
|
||||
|
||||
resource "cloudflare_record" "non_proxied_aaaa" {
|
||||
count = var.dns_type == "non-proxied" ? 1 : 0
|
||||
name = local.dns_name
|
||||
content = var.public_ipv6
|
||||
proxied = false
|
||||
ttl = 1
|
||||
type = "AAAA"
|
||||
zone_id = var.cloudflare_zone_id
|
||||
allow_overwrite = true
|
||||
}
|
||||
|
|
@ -1,88 +0,0 @@
|
|||
variable "name" {
|
||||
description = "Unique name for PV and PVC (convention: <service>-<purpose>)"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "namespace" {
|
||||
description = "Kubernetes namespace for the PVC"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "nfs_server" {
|
||||
description = "NFS server address"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "nfs_path" {
|
||||
description = "NFS export path (e.g. /mnt/main/myservice)"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "storage" {
|
||||
description = "Storage capacity (informational for NFS)"
|
||||
type = string
|
||||
default = "10Gi"
|
||||
}
|
||||
|
||||
variable "access_modes" {
|
||||
description = "PV/PVC access modes"
|
||||
type = list(string)
|
||||
default = ["ReadWriteMany"]
|
||||
}
|
||||
|
||||
resource "kubernetes_persistent_volume" "this" {
|
||||
metadata {
|
||||
name = var.name
|
||||
}
|
||||
spec {
|
||||
capacity = {
|
||||
storage = var.storage
|
||||
}
|
||||
access_modes = var.access_modes
|
||||
persistent_volume_reclaim_policy = "Retain"
|
||||
storage_class_name = "nfs-truenas"
|
||||
volume_mode = "Filesystem"
|
||||
|
||||
mount_options = [
|
||||
"nfsvers=4",
|
||||
"soft",
|
||||
"timeo=30",
|
||||
"retrans=3",
|
||||
"actimeo=5",
|
||||
]
|
||||
|
||||
persistent_volume_source {
|
||||
csi {
|
||||
driver = "nfs.csi.k8s.io"
|
||||
volume_handle = var.name
|
||||
volume_attributes = {
|
||||
server = var.nfs_server
|
||||
share = var.nfs_path
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_persistent_volume_claim" "this" {
|
||||
metadata {
|
||||
name = var.name
|
||||
namespace = var.namespace
|
||||
}
|
||||
spec {
|
||||
access_modes = var.access_modes
|
||||
storage_class_name = "nfs-truenas"
|
||||
volume_name = kubernetes_persistent_volume.this.metadata[0].name
|
||||
|
||||
resources {
|
||||
requests = {
|
||||
storage = var.storage
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
output "claim_name" {
|
||||
description = "PVC name to use in pod spec persistent_volume_claim blocks"
|
||||
value = kubernetes_persistent_volume_claim.this.metadata[0].name
|
||||
}
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
variable "namespace" { type = string }
|
||||
variable "tls_secret_name" {}
|
||||
variable "tls_crt" {
|
||||
default = ""
|
||||
}
|
||||
variable "tls_key" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
resource "kubernetes_secret" "tls_secret" {
|
||||
metadata {
|
||||
name = var.tls_secret_name
|
||||
namespace = var.namespace
|
||||
}
|
||||
data = {
|
||||
# Cannot set default function in variable so use default behaviour here
|
||||
"tls.crt" = var.tls_crt == "" ? file("${path.root}/secrets/fullchain.pem") : var.tls_crt
|
||||
"tls.key" = var.tls_key == "" ? file("${path.root}/secrets/privkey.pem") : var.tls_key
|
||||
}
|
||||
type = "kubernetes.io/tls"
|
||||
lifecycle {
|
||||
# KYVERNO_LIFECYCLE_V1: the sync-tls-secret policy stamps generate.kyverno.io/* + app.kubernetes.io/managed-by labels on this generated Secret
|
||||
ignore_changes = [metadata[0].labels]
|
||||
}
|
||||
}
|
||||
|
|
@ -1,89 +0,0 @@
|
|||
#!/usr/bin/env sh
|
||||
|
||||
set -e
|
||||
|
||||
|
||||
export le_dir="/tmp/le/"
|
||||
export config_dir="$le_dir/out/config"
|
||||
export technitium_token="$TECHNITIUM_API_KEY"
|
||||
export certbot_auth="$le_dir/certbot_auth.sh"
|
||||
export certbot_cleanup="$le_dir/certbot_cleanup.sh"
|
||||
|
||||
mkdir $le_dir
|
||||
echo "Creating $certbot_auth"
|
||||
cat << EOF > $certbot_auth
|
||||
#!/usr/bin/env sh
|
||||
# Generate API token from DNS web console
|
||||
|
||||
# Create challenge TXT record
|
||||
|
||||
# TECHNITIUM
|
||||
#API_TOKEN="$technitium_token"
|
||||
# curl "http://technitium-web.technitium.svc.cluster.local:5380/api/zones/records/add?token=\$API_TOKEN&domain=_acme-challenge.\$CERTBOT_DOMAIN&type=TXT&ttl=60&text=\$CERTBOT_VALIDATION"
|
||||
|
||||
# CLOUDFLARE
|
||||
curl https://api.cloudflare.com/client/v4/zones/$CLOUDFLARE_ZONE_ID/dns_records \
|
||||
-H 'Content-Type: application/json' \
|
||||
-H "Authorization: Bearer $CLOUDFLARE_TOKEN" \
|
||||
-d "{
|
||||
\"comment\": \"certbot temporary challenge\",
|
||||
\"content\": \"\$CERTBOT_VALIDATION\",
|
||||
\"name\": \"_acme-challenge.\$CERTBOT_DOMAIN\",
|
||||
\"proxied\": false,
|
||||
\"ttl\": 60,
|
||||
\"type\": \"TXT\"
|
||||
}"
|
||||
|
||||
|
||||
# Sleep to make sure the change has time to propagate from primary to secondary name servers
|
||||
sleep 25
|
||||
EOF
|
||||
|
||||
chmod 700 $certbot_auth
|
||||
cat $certbot_auth
|
||||
|
||||
|
||||
echo "Creating $certbot_cleanup"
|
||||
cat << EOF > $certbot_cleanup
|
||||
#!/usr/bin/env sh
|
||||
# Generate API token from DNS web console
|
||||
|
||||
# Delete challenge TXT record
|
||||
|
||||
# TECHNIUM
|
||||
#API_TOKEN="$technitium_token"
|
||||
#curl "http://technitium-web.technitium.svc.cluster.local:5380/api/zones/records/delete?token=\$API_TOKEN&domain=_acme-challenge.\$CERTBOT_DOMAIN&type=TXT&text=\$CERTBOT_VALIDATION"
|
||||
|
||||
# CLOUDFLARE
|
||||
curl https://api.cloudflare.com/client/v4/zones/$CLOUDFLARE_ZONE_ID/dns_records -H "Authorization: Bearer $CLOUDFLARE_TOKEN" | jq -r '.result[] | select(.name | contains("acme")) | .id' | while read -r record_id; do
|
||||
curl https://api.cloudflare.com/client/v4/zones/$CLOUDFLARE_ZONE_ID/dns_records/\$record_id \
|
||||
-X DELETE \
|
||||
-H "Authorization: Bearer $CLOUDFLARE_TOKEN"
|
||||
done
|
||||
|
||||
EOF
|
||||
|
||||
chmod 700 $certbot_cleanup
|
||||
cat $certbot_cleanup
|
||||
|
||||
|
||||
echo "Cleaning up stale _acme-challenge TXT records from Cloudflare"
|
||||
curl -s "https://api.cloudflare.com/client/v4/zones/$CLOUDFLARE_ZONE_ID/dns_records?type=TXT&name=_acme-challenge.viktorbarzin.me" \
|
||||
-H "Authorization: Bearer $CLOUDFLARE_TOKEN" | jq -r '.result[].id' | while read -r old_id; do
|
||||
echo "Deleting stale record $old_id"
|
||||
curl -s -X DELETE "https://api.cloudflare.com/client/v4/zones/$CLOUDFLARE_ZONE_ID/dns_records/$old_id" \
|
||||
-H "Authorization: Bearer $CLOUDFLARE_TOKEN" > /dev/null
|
||||
done
|
||||
|
||||
echo "Executing certbot renew command"
|
||||
certbot certonly --manual --preferred-challenges=dns --email me@viktorbarzin.me --server https://acme-v02.api.letsencrypt.org/directory --agree-tos --manual-auth-hook $certbot_auth --config-dir $config_dir --work-dir $le_dir/workdir --logs-dir $le_dir/logsdir --no-eff-email --manual-cleanup-hook $certbot_cleanup -d viktorbarzin.me -d *.viktorbarzin.me
|
||||
|
||||
cat $config_dir/live/viktorbarzin.me/fullchain.pem
|
||||
cat $config_dir/live/viktorbarzin.me/privkey.pem
|
||||
|
||||
cp --remove-destination $config_dir/live/viktorbarzin.me/fullchain.pem ./secrets
|
||||
cp --remove-destination $config_dir/live/viktorbarzin.me/privkey.pem ./secrets
|
||||
|
||||
|
||||
echo "Done renewing cert. Output certificates stored in ./secrets\n"
|
||||
ls ./secrets
|
||||
Loading…
Add table
Add a link
Reference in a new issue