fix: restore tree dropped by 6d224861; land stem95su gdrive-sync (10m) [ci skip]
6d224861 came from a --no-checkout worktree whose empty index made the
commit drop every file except two. This restores 05b50d2b's full tree and
correctly adds stacks/stem95su/gdrive-sync.tf + the service-catalog stem95su
entry. Forward-only (parent=6d224861, no force-push); [ci skip] since the
live infra was never applied from the broken commit.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
6d224861c4
commit
fd0f4a0365
1166 changed files with 358546 additions and 0 deletions
133
stacks/calico/main.tf
Normal file
133
stacks/calico/main.tf
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
# Calico CNI
|
||||
#
|
||||
# Calico has underpinned this cluster's pod networking since 2024-07-30, installed
|
||||
# as raw kubectl manifests (tigera-operator Deployment + CRDs + Installation CR).
|
||||
# Bringing the full stack under Terraform is high-blast — the operator and its
|
||||
# Deployment must never flap during node pressure or during any apply, because
|
||||
# new pod scheduling breaks within ~seconds of a CNI outage.
|
||||
#
|
||||
# This stack (created 2026-04-18 Wave 5b) adopts the three namespaces only:
|
||||
# calico-system, calico-apiserver, tigera-operator. The `tigera-operator`
|
||||
# Deployment, the 20+ CRDs it manages, and the `Installation` CR itself are
|
||||
# intentionally *not* adopted yet — they require a low-traffic window and a
|
||||
# careful ignore_changes set to cover operator-generated defaults on the
|
||||
# Installation CR. Follow-up tracked in beads code-3ad.
|
||||
#
|
||||
# The namespaces are safe to adopt (no networking impact — they're just label
|
||||
# containers) and give TF an audit trail entry for the labels/tier Kyverno
|
||||
# cares about.
|
||||
|
||||
resource "kubernetes_namespace" "calico_system" {
|
||||
metadata {
|
||||
name = "calico-system"
|
||||
labels = {
|
||||
name = "calico-system"
|
||||
# calico-system namespace is managed by tigera-operator — auto-update is
|
||||
# incompatible (operator reverts DaemonSet image from its Installation CR).
|
||||
# "keel.sh/enrolled" = "true"
|
||||
}
|
||||
}
|
||||
lifecycle {
|
||||
# KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode label on every namespace.
|
||||
# pod-security.kubernetes.io/* labels are applied by the tigera-operator
|
||||
# reconciler on calico-system + calico-apiserver for PSA 'privileged'.
|
||||
ignore_changes = [
|
||||
metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"],
|
||||
metadata[0].labels["pod-security.kubernetes.io/enforce"],
|
||||
metadata[0].labels["pod-security.kubernetes.io/enforce-version"],
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_namespace" "calico_apiserver" {
|
||||
metadata {
|
||||
name = "calico-apiserver"
|
||||
labels = {
|
||||
name = "calico-apiserver"
|
||||
}
|
||||
}
|
||||
lifecycle {
|
||||
# KYVERNO_LIFECYCLE_V1 + PSA labels applied by tigera-operator (see calico_system).
|
||||
ignore_changes = [
|
||||
metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"],
|
||||
metadata[0].labels["pod-security.kubernetes.io/enforce"],
|
||||
metadata[0].labels["pod-security.kubernetes.io/enforce-version"],
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_namespace" "tigera_operator" {
|
||||
metadata {
|
||||
name = "tigera-operator"
|
||||
labels = {
|
||||
name = "tigera-operator"
|
||||
}
|
||||
}
|
||||
lifecycle {
|
||||
# KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace
|
||||
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
|
||||
}
|
||||
}
|
||||
|
||||
# Wave 1 W1.6 (beads code-8ywc): observation phase via Calico GlobalNetworkPolicy
|
||||
# `action: Log`. This is the supported primitive on Calico OSS v3.26 — the
|
||||
# Calico-Enterprise FelixConfiguration.flowLogsFileEnabled approach is NOT
|
||||
# accepted by the OSS CRD (verified 2026-05-19: "strict decoding error").
|
||||
#
|
||||
# How it works:
|
||||
# - GNP selects pods by namespaceSelector
|
||||
# - egress rule action=Log writes an iptables NFLOG entry that lands in the
|
||||
# kernel log / journald with prefix "calico-packet:" on each node
|
||||
# - Alloy DaemonSet already ships node-journal to Loki (job=node-journal)
|
||||
# - LogQL query: {job="node-journal"} |= "calico-packet" surfaces egress flows
|
||||
# - After ~1 week of observation, build the empirical per-namespace egress
|
||||
# allowlist; then flip the same GNP to [Allow specific dests, Deny rest]
|
||||
#
|
||||
# Started with `recruiter-responder` as the pilot on 2026-05-19; expanded
|
||||
# 2026-05-19 to all tier 3+4 namespaces (per locked plan — tier 3-edge has
|
||||
# 17 ns, tier 4-aux has 65 ns, all use Calico's WorkloadEndpoint policy
|
||||
# path). Tier 0/1/2 stay out of observation in wave 1 (cluster infra +
|
||||
# GPU workloads, deferred per the plan).
|
||||
#
|
||||
# `apply_only = true` on the kubectl_manifest means renaming the TF resource
|
||||
# does NOT destroy the old GNP via TF — we kubectl delete the legacy pilot
|
||||
# GNP after this applies to clean it up. (Tracked manually.)
|
||||
resource "kubectl_manifest" "wave1_egress_observe_tier34" {
|
||||
yaml_body = yamlencode({
|
||||
apiVersion = "projectcalico.org/v3"
|
||||
kind = "GlobalNetworkPolicy"
|
||||
metadata = {
|
||||
name = "wave1-egress-observe-tier34"
|
||||
annotations = {
|
||||
"security.viktorbarzin.me/wave" = "1"
|
||||
"security.viktorbarzin.me/purpose" = "observe-then-enforce egress for tier 3-edge + 4-aux"
|
||||
}
|
||||
}
|
||||
spec = {
|
||||
order = 2000
|
||||
selector = "all()"
|
||||
namespaceSelector = "tier in {\"3-edge\", \"4-aux\"}"
|
||||
types = ["Egress"]
|
||||
egress = [
|
||||
# Rule 1: log every egress packet (LOG target writes to kernel/journal,
|
||||
# alloy ships to Loki with job=node-journal,transport=kernel).
|
||||
# LogQL: {job="node-journal"} |~ "calico-packet"
|
||||
{ action = "Log" },
|
||||
# Rule 2: allow everything (observation must NOT break workloads).
|
||||
{ action = "Allow" },
|
||||
]
|
||||
}
|
||||
})
|
||||
apply_only = true
|
||||
}
|
||||
|
||||
# CI retrigger 2026-05-16T13:42:57+00:00 — bulk enrollment apply (pipeline #689 killed)
|
||||
# CI retrigger v2 2026-05-16T13:46:35+00:00
|
||||
|
||||
# CI retrigger v3 2026-05-16T14:06:39Z
|
||||
|
||||
# CI retrigger v4 2026-05-16T14:13:59Z
|
||||
|
||||
# CI retrigger v5 2026-05-16T23:10:38Z
|
||||
|
||||
# CI retrigger v6 2026-05-16T23:18:58Z
|
||||
1
stacks/calico/secrets
Symbolic link
1
stacks/calico/secrets
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
../../secrets
|
||||
6
stacks/calico/terragrunt.hcl
Normal file
6
stacks/calico/terragrunt.hcl
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
include "root" {
|
||||
path = find_in_parent_folders()
|
||||
}
|
||||
|
||||
# No platform dependency — Calico provides the cluster network the rest
|
||||
# of the platform runs on. This stack must not introduce a dep cycle.
|
||||
Loading…
Add table
Add a link
Reference in a new issue