From 2102cb2d737794a8a59ed1ee89f61b0cd9dc69b8 Mon Sep 17 00:00:00 2001
From: Viktor Barzin <viktorbarzin@meta.com>
Date: Sat, 14 Mar 2026 09:22:24 +0000
Subject: [PATCH] Right-size CPU requests cluster-wide and remove missed CPU
 limits
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Increase requests for under-requested pods (dashy 50m→250m, frigate 500m→1500m,
clickhouse 100m→500m, otp 100m→300m, linkwarden 25m→50m, authentik worker 50m→100m).

Reduce requests for over-requested pods (crowdsec agent/lapi 500m→25m each,
prometheus 200m→100m, dbaas mysql 1800m→100m, pg-cluster 250m→50m,
shlink-web 250m→10m, gpu-pod-exporter 50m→10m, stirling-pdf 100m→25m,
technitium 100m→25m, celery 50m→15m). Reduce crowdsec quota from 8→1 CPU.

Remove missed CPU limits in prometheus (cpu: "2") and dbaas (cpu: "3600m") tpl files.
---
 stacks/dashy/main.tf                                 |  2 +-
 stacks/frigate/main.tf                               |  2 +-
 stacks/linkwarden/main.tf                            |  2 +-
 stacks/osm_routing/main.tf                           |  2 +-
 stacks/platform/modules/authentik/values.yaml        |  2 +-
 stacks/platform/modules/crowdsec/main.tf             |  2 +-
 stacks/platform/modules/crowdsec/values.yaml         | 12 ++++++++++++
 stacks/platform/modules/dbaas/chart_values.tpl       |  3 +--
 stacks/platform/modules/dbaas/main.tf                |  2 +-
 .../modules/monitoring/prometheus_chart_values.tpl   |  3 +--
 stacks/platform/modules/nvidia/main.tf               |  2 +-
 stacks/platform/modules/technitium/ha.tf             |  2 +-
 stacks/platform/modules/technitium/main.tf           |  2 +-
 stacks/real-estate-crawler/main.tf                   |  2 +-
 stacks/rybbit/main.tf                                |  2 +-
 stacks/stirling-pdf/main.tf                          |  2 +-
 stacks/url/main.tf                                   |  2 +-
 17 files changed, 28 insertions(+), 18 deletions(-)

diff --git a/stacks/dashy/main.tf b/stacks/dashy/main.tf
index 2dbc570b..03a4a67b 100644
--- a/stacks/dashy/main.tf
+++ b/stacks/dashy/main.tf
@@ -70,7 +70,7 @@ resource "kubernetes_deployment" "dashy" {
 
           resources {
             requests = {
-              cpu    = "50m"
+              cpu    = "250m"
               memory = "512Mi"
             }
             limits = {
diff --git a/stacks/frigate/main.tf b/stacks/frigate/main.tf
index 26a0a63d..0e44d5bb 100644
--- a/stacks/frigate/main.tf
+++ b/stacks/frigate/main.tf
@@ -85,7 +85,7 @@ resource "kubernetes_deployment" "frigate" {
 
           resources {
             requests = {
-              cpu    = "500m"
+              cpu    = "1500m"
               memory = "2Gi"
             }
             limits = {
diff --git a/stacks/linkwarden/main.tf b/stacks/linkwarden/main.tf
index f53e0e56..e3a65d1f 100644
--- a/stacks/linkwarden/main.tf
+++ b/stacks/linkwarden/main.tf
@@ -106,7 +106,7 @@ resource "kubernetes_deployment" "linkwarden" {
           }
           resources {
             requests = {
-              cpu    = "25m"
+              cpu    = "50m"
               memory = "256Mi"
             }
             limits = {
diff --git a/stacks/osm_routing/main.tf b/stacks/osm_routing/main.tf
index 18da7ffc..a1708c50 100644
--- a/stacks/osm_routing/main.tf
+++ b/stacks/osm_routing/main.tf
@@ -254,7 +254,7 @@ resource "kubernetes_deployment" "otp" {
           }
           resources {
             requests = {
-              cpu    = "100m"
+              cpu    = "300m"
               memory = "2Gi"
             }
             limits = {
diff --git a/stacks/platform/modules/authentik/values.yaml b/stacks/platform/modules/authentik/values.yaml
index e542c8f7..cdcee927 100644
--- a/stacks/platform/modules/authentik/values.yaml
+++ b/stacks/platform/modules/authentik/values.yaml
@@ -47,7 +47,7 @@ worker:
   replicas: 3
   resources:
     requests:
-      cpu: 50m
+      cpu: 100m
       memory: 384Mi
     limits:
       memory: 1Gi
diff --git a/stacks/platform/modules/crowdsec/main.tf b/stacks/platform/modules/crowdsec/main.tf
index cec55b6e..9a060f4b 100644
--- a/stacks/platform/modules/crowdsec/main.tf
+++ b/stacks/platform/modules/crowdsec/main.tf
@@ -365,7 +365,7 @@ resource "kubernetes_resource_quota" "crowdsec" {
   }
   spec {
     hard = {
-      "requests.cpu"    = "8"
+      "requests.cpu"    = "1"
       "requests.memory" = "8Gi"
       "limits.memory"   = "16Gi"
       pods              = "30"
diff --git a/stacks/platform/modules/crowdsec/values.yaml b/stacks/platform/modules/crowdsec/values.yaml
index b7b016d9..95593a33 100644
--- a/stacks/platform/modules/crowdsec/values.yaml
+++ b/stacks/platform/modules/crowdsec/values.yaml
@@ -2,6 +2,12 @@
 container_runtime: containerd
 
 agent:
+  resources:
+    requests:
+      cpu: 25m
+      memory: 64Mi
+    limits:
+      memory: 512Mi
   priorityClassName: "tier-1-cluster"
   # To specify each pod you want to process it logs (pods present in the node)
   acquisition:
@@ -44,6 +50,12 @@ agent:
       configMap:
         name: crowdsec-whitelist
 lapi:
+  resources:
+    requests:
+      cpu: 25m
+      memory: 128Mi
+    limits:
+      memory: 1Gi
   priorityClassName: "tier-1-cluster"
   replicas: 3
   topologySpreadConstraints:
diff --git a/stacks/platform/modules/dbaas/chart_values.tpl b/stacks/platform/modules/dbaas/chart_values.tpl
index 438ddc0e..06062b62 100644
--- a/stacks/platform/modules/dbaas/chart_values.tpl
+++ b/stacks/platform/modules/dbaas/chart_values.tpl
@@ -11,7 +11,6 @@ podSpec:
    resources:
      requests:
        memory: "1024Mi"  # adapt to your needs
-       cpu: "1800m"      # adapt to your needs
+       cpu: "100m"       # adapt to your needs
      limits:
        memory: "2048Mi"  # adapt to your needs
-       cpu: "3600m"      # adapt to your needs
diff --git a/stacks/platform/modules/dbaas/main.tf b/stacks/platform/modules/dbaas/main.tf
index 8c01763b..66bd60ff 100644
--- a/stacks/platform/modules/dbaas/main.tf
+++ b/stacks/platform/modules/dbaas/main.tf
@@ -867,7 +867,7 @@ resource "null_resource" "pg_cluster" {
           storageClass: iscsi-truenas
         resources:
           requests:
-            cpu: "250m"
+            cpu: "50m"
             memory: "512Mi"
           limits:
             memory: "4Gi"
diff --git a/stacks/platform/modules/monitoring/prometheus_chart_values.tpl b/stacks/platform/modules/monitoring/prometheus_chart_values.tpl
index 511a9475..1de71cb1 100755
--- a/stacks/platform/modules/monitoring/prometheus_chart_values.tpl
+++ b/stacks/platform/modules/monitoring/prometheus_chart_values.tpl
@@ -145,10 +145,9 @@ server:
   retention: "52w"
   resources:
     requests:
-      cpu: 200m
+      cpu: 100m
       memory: 1Gi
     limits:
-      cpu: "2"
       memory: 4Gi
   strategy:
     type: Recreate
diff --git a/stacks/platform/modules/nvidia/main.tf b/stacks/platform/modules/nvidia/main.tf
index 7dc9ed42..b01cef08 100644
--- a/stacks/platform/modules/nvidia/main.tf
+++ b/stacks/platform/modules/nvidia/main.tf
@@ -613,7 +613,7 @@ resource "kubernetes_daemonset" "gpu_pod_exporter" {
 
           resources {
             requests = {
-              cpu    = "50m"
+              cpu    = "10m"
               memory = "128Mi"
             }
             limits = {
diff --git a/stacks/platform/modules/technitium/ha.tf b/stacks/platform/modules/technitium/ha.tf
index 0bab6b15..1cab8289 100644
--- a/stacks/platform/modules/technitium/ha.tf
+++ b/stacks/platform/modules/technitium/ha.tf
@@ -105,7 +105,7 @@ resource "kubernetes_deployment" "technitium_secondary" {
           }
           resources {
             requests = {
-              cpu    = "100m"
+              cpu    = "25m"
               memory = "128Mi"
             }
             limits = {
diff --git a/stacks/platform/modules/technitium/main.tf b/stacks/platform/modules/technitium/main.tf
index 6fc0cb06..cd959115 100644
--- a/stacks/platform/modules/technitium/main.tf
+++ b/stacks/platform/modules/technitium/main.tf
@@ -165,7 +165,7 @@ resource "kubernetes_deployment" "technitium" {
           name  = "technitium"
           resources {
             requests = {
-              cpu    = "100m"
+              cpu    = "25m"
               memory = "128Mi"
             }
             limits = {
diff --git a/stacks/real-estate-crawler/main.tf b/stacks/real-estate-crawler/main.tf
index 22ae4d9f..b891e973 100644
--- a/stacks/real-estate-crawler/main.tf
+++ b/stacks/real-estate-crawler/main.tf
@@ -321,7 +321,7 @@ resource "kubernetes_deployment" "realestate-crawler-celery" {
           command           = ["python", "-m", "celery", "-A", "celery_app", "worker", "--loglevel=info", "--pool=threads"]
           resources {
             requests = {
-              cpu    = "50m"
+              cpu    = "15m"
               memory = "512Mi"
             }
             limits = {
diff --git a/stacks/rybbit/main.tf b/stacks/rybbit/main.tf
index 587cf02c..b300c8cd 100644
--- a/stacks/rybbit/main.tf
+++ b/stacks/rybbit/main.tf
@@ -115,7 +115,7 @@ resource "kubernetes_deployment" "clickhouse" {
           }
           resources {
             requests = {
-              cpu    = "100m"
+              cpu    = "500m"
               memory = "512Mi"
             }
             limits = {
diff --git a/stacks/stirling-pdf/main.tf b/stacks/stirling-pdf/main.tf
index 47b9c2ca..d04da374 100644
--- a/stacks/stirling-pdf/main.tf
+++ b/stacks/stirling-pdf/main.tf
@@ -57,7 +57,7 @@ resource "kubernetes_deployment" "stirling-pdf" {
           name  = "stirling-pdf"
           resources {
             requests = {
-              cpu    = "100m"
+              cpu    = "25m"
               memory = "512Mi"
             }
             limits = {
diff --git a/stacks/url/main.tf b/stacks/url/main.tf
index 2416d41a..c2110a20 100644
--- a/stacks/url/main.tf
+++ b/stacks/url/main.tf
@@ -283,7 +283,7 @@ resource "kubernetes_deployment" "shlink-web" {
               memory = "512Mi"
             }
             requests = {
-              cpu    = "250m"
+              cpu    = "10m"
               memory = "50Mi"
             }
           }