From 92361f36dbcd367f0a8866bbd0d62fea8bb00113 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Wed, 24 Jun 2026 12:22:48 +0000 Subject: [PATCH] calico: enable Goldmane + Whisker (Calico 3.30 OSS flow observability) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Turns on Calico 3.30's native east-west flow observability so we can see which Service talks to which (ADR-0014, issue #57). Enabled via the operator CRs directly (kubectl_manifest Goldmane + Whisker, name=default) rather than the Helm goldmane/whisker flags, because the goldmanes/whiskers CRDs already exist and this sidesteps the helm-upgrade CR-before-CRD ordering issue. Whisker notifications=Disabled so the UI doesn't call the external Tigera endpoint. Applied supervised: creating the Goldmane CR re-rendered calico-node with the FELIX_FLOWLOGSGOLDMANESERVER env (operator auto-wires Felix — no manual FelixConfiguration); calico-node rolled cleanly 7/7, tigerastatus healthy, goldmane is receiving flows from all nodes, Whisker UI serves. Durable Loki persistence is NOT included here: the Goldmane emitter is Calico Cloud/Enterprise-gated with no OSS knob to aim it at Loki (the CR can override only name+resources, not env), so a durable trail needs a small custom gRPC consumer of goldmane:7443 — tracked in issue #58. Co-Authored-By: Claude Opus 4.8 --- stacks/calico/main.tf | 50 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 4 deletions(-) diff --git a/stacks/calico/main.tf b/stacks/calico/main.tf index dd2ef2a4..39550024 100644 --- a/stacks/calico/main.tf +++ b/stacks/calico/main.tf @@ -156,10 +156,11 @@ resource "helm_release" "tigera_operator" { values = [yamlencode({ installation = { enabled = false } apiServer = { enabled = false } - # Goldmane (flow aggregator) + Whisker (observability UI) are new in Calico - # 3.30 and default-on; disabled — we use Prometheus/Loki, and on a helm - # UPGRADE their CRs render before their crds/ (which helm skips on upgrade) - # are installed -> "ensure CRDs are installed first". Not needed here. + # Goldmane (flow aggregator) + Whisker (observability UI), new in Calico + # 3.30, are kept disabled IN HELM on purpose: on a helm UPGRADE their CRs + # render before their crds/ (which helm skips on upgrade) -> "ensure CRDs + # are installed first". We instead enable them via the operator CRs applied + # directly below (kubectl_manifest) now that the CRDs exist — see ADR-0014. goldmane = { enabled = false } whisker = { enabled = false } # 512Mi (was 256Mi): the operator idles at ~38Mi but its STARTUP spike @@ -170,3 +171,44 @@ resource "helm_release" "tigera_operator" { resources = { limits = { memory = "512Mi" } } })] } + +# --------------------------------------------------------------------------- +# Goldmane + Whisker (Calico 3.30 OSS flow observability) — ADR-0014. +# +# Enabled via the operator CRs directly (NOT the Helm goldmane/whisker flags, +# which stay false above): the goldmanes/whiskers.operator.tigera.io CRDs are +# already installed (operator adopted them 2026-06-19), so we sidestep the +# helm-upgrade "CRs render before crds/" ordering issue by applying the CRs +# ourselves — the running operator reconciles them. Same kubectl_manifest +# pattern as the wave1 GNP above (no plan-time CRD requirement). +# +# Creating the Goldmane CR makes the operator re-render calico-node with the +# FELIX_FLOWLOGSGOLDMANESERVER env (operator auto-wires Felix — do NOT patch +# FelixConfiguration) => a supervised calico-node DaemonSet roll. Goldmane: +# Deployment + Service goldmane:7443 (gRPC/mTLS) in calico-system. Whisker: +# Deployment + Service whisker:8081 in calico-system; its backend dials +# goldmane, so Goldmane must exist first (depends_on). notifications=Disabled +# so the UI does not call the external Tigera notifications endpoint. +# +# NOTE: durable Loki persistence is NOT these CRs. The Goldmane emitter is +# Calico Cloud/Enterprise-gated (no OSS knob to aim it at Loki), so the trail +# is a separate consumer of goldmane's gRPC Flows API (ADR-0014 / issue #58). +# Whisker alone is a ~60-min in-memory live view. Reversible: delete to disable. +resource "kubectl_manifest" "goldmane" { + depends_on = [helm_release.tigera_operator] + yaml_body = yamlencode({ + apiVersion = "operator.tigera.io/v1" + kind = "Goldmane" + metadata = { name = "default" } + }) +} + +resource "kubectl_manifest" "whisker" { + depends_on = [kubectl_manifest.goldmane] + yaml_body = yamlencode({ + apiVersion = "operator.tigera.io/v1" + kind = "Whisker" + metadata = { name = "default" } + spec = { notifications = "Disabled" } + }) +}