infra/stacks/platform/modules/vpa/main.tf
Viktor Barzin 28ac1382d1 Remove all CPU limits cluster-wide to eliminate CFS throttling
CPU limits cause CFS throttling even when nodes have idle capacity.
Move to a request-only CPU model: keep CPU requests for scheduling
fairness but remove all CPU limits. Memory limits stay (incompressible).

Changes across 108 files:
- Kyverno LimitRange policy: remove cpu from default/max in all 6 tiers
- Kyverno ResourceQuota policy: remove limits.cpu from all 5 tiers
- Custom ResourceQuotas: remove limits.cpu from 8 namespace quotas
- Custom LimitRanges: remove cpu from default/max (nextcloud, onlyoffice)
- RBAC module: remove cpu_limits variable and quota reference
- Freedify factory: remove cpu_limit variable and limits reference
- 86 deployment files: remove cpu from all limits blocks
- 6 Helm values files: remove cpu under limits sections
2026-03-18 08:03:58 +00:00

146 lines
4.2 KiB
HCL

variable "tls_secret_name" {
type = string
sensitive = true
}
variable "tier" { type = string }
resource "kubernetes_namespace" "vpa" {
metadata {
name = "vpa"
labels = {
tier = var.tier
}
}
}
module "tls_secret" {
source = "../../../../modules/kubernetes/setup_tls_secret"
namespace = kubernetes_namespace.vpa.metadata[0].name
tls_secret_name = var.tls_secret_name
}
# -----------------------------------------------------------------------------
# VPA — Vertical Pod Autoscaler (Fairwinds Helm chart)
# -----------------------------------------------------------------------------
resource "helm_release" "vpa" {
namespace = kubernetes_namespace.vpa.metadata[0].name
create_namespace = false
name = "vpa"
atomic = true
repository = "https://charts.fairwinds.com/stable"
chart = "vpa"
values = [yamlencode({
recommender = {
enabled = true
}
updater = {
enabled = true
}
admissionController = {
enabled = true
}
})]
}
# -----------------------------------------------------------------------------
# Goldilocks — VPA dashboard (Fairwinds Helm chart)
# -----------------------------------------------------------------------------
resource "helm_release" "goldilocks" {
namespace = kubernetes_namespace.vpa.metadata[0].name
create_namespace = false
name = "goldilocks"
atomic = true
repository = "https://charts.fairwinds.com/stable"
chart = "goldilocks"
values = [yamlencode({
controller = {
flags = {
on-by-default = "true"
}
}
dashboard = {
replicaCount = 1
flags = {
on-by-default = "true"
}
}
})]
depends_on = [helm_release.vpa]
}
# -----------------------------------------------------------------------------
# Ingress — Goldilocks dashboard at goldilocks.viktorbarzin.me
# -----------------------------------------------------------------------------
module "ingress" {
source = "../../../../modules/kubernetes/ingress_factory"
namespace = kubernetes_namespace.vpa.metadata[0].name
name = "goldilocks"
service_name = "goldilocks-dashboard"
port = 80
tls_secret_name = var.tls_secret_name
protected = true
extra_annotations = {
"gethomepage.dev/enabled" = "true"
"gethomepage.dev/name" = "Goldilocks"
"gethomepage.dev/description" = "Resource recommendations"
"gethomepage.dev/icon" = "mdi-scale-balance"
"gethomepage.dev/group" = "Core Platform"
"gethomepage.dev/pod-selector" = ""
}
depends_on = [helm_release.goldilocks]
}
# -----------------------------------------------------------------------------
# Kyverno policy — label namespaces for VPA observe-only mode
# -----------------------------------------------------------------------------
# Goldilocks reads the goldilocks.fairwinds.com/vpa-update-mode label on
# namespaces to decide the updateMode for VPA objects it creates.
# All namespaces get "off" — Terraform is the authoritative source of truth
# for container resources. Goldilocks provides recommendations only.
resource "kubernetes_manifest" "vpa_auto_mode_label" {
manifest = {
apiVersion = "kyverno.io/v1"
kind = "ClusterPolicy"
metadata = {
name = "goldilocks-vpa-auto-mode"
annotations = {
"policies.kyverno.io/title" = "Goldilocks VPA Observe-Only Mode"
"policies.kyverno.io/description" = "Sets VPA update mode to off for all namespaces. Terraform owns container resources; Goldilocks provides recommendations only."
}
}
spec = {
rules = [
{
name = "label-vpa-off-all"
match = {
any = [
{
resources = {
kinds = ["Namespace"]
}
}
]
}
mutate = {
patchStrategicMerge = {
metadata = {
labels = {
"goldilocks.fairwinds.com/vpa-update-mode" = "off"
}
}
}
}
},
]
}
}
depends_on = [helm_release.goldilocks]
}