From 612a83f8ce0b0bf283efb25cdb8b0cf80fde5fbc Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Tue, 19 May 2026 22:14:16 +0000 Subject: [PATCH] =?UTF-8?q?security(wave1):=20W1.6=20expand=20observation?= =?UTF-8?q?=20from=20recruiter-responder=20pilot=20=E2=86=92=20tier=203+4?= =?UTF-8?q?=20(82=20namespaces)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Change - Replaced kubectl_manifest.wave1_egress_observe_recruiter_responder with kubectl_manifest.wave1_egress_observe_tier34 - namespaceSelector changed from `kubernetes.io/metadata.name == 'recruiter-responder'` to `tier in {"3-edge", "4-aux"}` — covers 82 namespaces (17 tier-3-edge + 65 tier-4-aux) - Legacy pilot GNP wave1-egress-observe-recruiter-responder kubectl-deleted (apply_only=true means TF rename does NOT destroy the live old resource; cleanup done manually) - Tier 0/1/2 namespaces explicitly out of wave 1 observation per locked plan (cluster infra + GPU workloads, deferred) ## Verification (live cluster, 2026-05-19) - 82 namespaces match `tier in (3-edge,4-aux)` - Felix translated the new policy into iptables LOG rule in cali-po-* chain - LogQL `{job="node-journal"} |~ "calico-packet"` returns real packet metadata from multiple namespaces with distinct destinations: - east-west pod-to-pod (10.10.108.48, 10.10.122.131) - in-cluster service VIP (10.96.0.10 — kube-dns) - external (149.154.166.110 — Telegram API from recruiter-responder) ## W1.7 next step (calendar-bound, ~1 week) - Let observation run for ~1 week - Aggregate distinct destinations per namespace via LogQL - Build per-namespace egress allowlist module `tier3_egress_baseline` - Flip GNP rules from `[Log, Allow]` to `[Allow , Deny]` - Phased per-namespace as originally planned Co-Authored-By: Claude Opus 4.7 --- stacks/calico/main.tf | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/stacks/calico/main.tf b/stacks/calico/main.tf index 677cde60..19484b08 100644 --- a/stacks/calico/main.tf +++ b/stacks/calico/main.tf @@ -83,31 +83,37 @@ resource "kubernetes_namespace" "tigera_operator" { # - After ~1 week of observation, build the empirical per-namespace egress # allowlist; then flip the same GNP to [Allow specific dests, Deny rest] # -# Starting with `recruiter-responder` as the W1.7 pilot per the locked plan -# (smallest egress footprint, local llama-cpp). Expand by adding namespaces -# to namespaceSelector.matchExpressions over time. -resource "kubectl_manifest" "wave1_egress_observe_recruiter_responder" { +# Started with `recruiter-responder` as the pilot on 2026-05-19; expanded +# 2026-05-19 to all tier 3+4 namespaces (per locked plan — tier 3-edge has +# 17 ns, tier 4-aux has 65 ns, all use Calico's WorkloadEndpoint policy +# path). Tier 0/1/2 stay out of observation in wave 1 (cluster infra + +# GPU workloads, deferred per the plan). +# +# `apply_only = true` on the kubectl_manifest means renaming the TF resource +# does NOT destroy the old GNP via TF — we kubectl delete the legacy pilot +# GNP after this applies to clean it up. (Tracked manually.) +resource "kubectl_manifest" "wave1_egress_observe_tier34" { yaml_body = yamlencode({ apiVersion = "projectcalico.org/v3" kind = "GlobalNetworkPolicy" metadata = { - name = "wave1-egress-observe-recruiter-responder" + name = "wave1-egress-observe-tier34" annotations = { "security.viktorbarzin.me/wave" = "1" - "security.viktorbarzin.me/purpose" = "observe-then-enforce egress; observation phase only" + "security.viktorbarzin.me/purpose" = "observe-then-enforce egress for tier 3-edge + 4-aux" } } spec = { - # Order high (numerically lower priority — Calico evaluates lowest order - # first, but here we just want to run before any default-deny gets added). - order = 2000 - selector = "all()" - namespaceSelector = "kubernetes.io/metadata.name == 'recruiter-responder'" - types = ["Egress"] + order = 2000 + selector = "all()" + namespaceSelector = "tier in {\"3-edge\", \"4-aux\"}" + types = ["Egress"] egress = [ - # Rule 1: log every egress packet (does not terminate; falls through) + # Rule 1: log every egress packet (LOG target writes to kernel/journal, + # alloy ships to Loki with job=node-journal,transport=kernel). + # LogQL: {job="node-journal"} |~ "calico-packet" { action = "Log" }, - # Rule 2: allow everything (so observation does NOT break the namespace) + # Rule 2: allow everything (observation must NOT break workloads). { action = "Allow" }, ] }