Phase 1 - OOM fixes: - dashy: increase memory limit 512Mi→1Gi (was at 99% utilization) - caretta DaemonSet: set explicit resources 300Mi/512Mi (was at 85-98%) - mysql-operator: add Helm resource values 256Mi/512Mi, create namespace with tier label (was at 92% of LimitRange default) - prowlarr, flaresolverr, annas-archive-stacks: add explicit resources (outgrowing 256Mi LimitRange defaults) - real-estate-crawler celery: add resources 512Mi/3Gi (608Mi actual, no explicit resources) Phase 2 - Close quota gaps: - nvidia, real-estate-crawler, trading-bot: remove custom-quota=true labels so Kyverno generates tier-appropriate quotas - descheduler: add tier=1-cluster label for proper classification Phase 3 - Reduce excessive quotas: - monitoring: limits.memory 240Gi→64Gi, limits.cpu 120→64 - woodpecker: limits.memory 128Gi→32Gi, limits.cpu 64→16 - GPU tier default: limits.memory 96Gi→32Gi, limits.cpu 48→16 Phase 4 - Kubelet protection: - Add cpu: 200m to systemReserved and kubeReserved in kubelet template Phase 5 - HA improvements: - cloudflared: add topology spread (ScheduleAnyway) + PDB (maxUnavailable:1) - grafana: add topology spread + PDB via Helm values - crowdsec LAPI: add topology spread + PDB via Helm values - authentik server: add topology spread via Helm values - authentik worker: add topology spread + PDB via Helm values
131 lines
2.7 KiB
HCL
131 lines
2.7 KiB
HCL
# Contents for cloudflare tunnel
|
|
|
|
variable "tls_secret_name" {}
|
|
variable "cloudflare_tunnel_token" {}
|
|
resource "kubernetes_namespace" "cloudflared" {
|
|
metadata {
|
|
name = "cloudflared"
|
|
labels = {
|
|
tier = var.tier
|
|
}
|
|
}
|
|
}
|
|
variable "tier" { type = string }
|
|
|
|
module "tls_secret" {
|
|
source = "../../../../modules/kubernetes/setup_tls_secret"
|
|
namespace = kubernetes_namespace.cloudflared.metadata[0].name
|
|
tls_secret_name = var.tls_secret_name
|
|
}
|
|
|
|
resource "kubernetes_deployment" "cloudflared" {
|
|
metadata {
|
|
name = "cloudflared"
|
|
namespace = kubernetes_namespace.cloudflared.metadata[0].name
|
|
labels = {
|
|
app = "cloudflared"
|
|
tier = var.tier
|
|
}
|
|
annotations = {
|
|
"reloader.stakater.com/search" = "true"
|
|
}
|
|
}
|
|
spec {
|
|
replicas = 3
|
|
strategy {
|
|
type = "RollingUpdate"
|
|
}
|
|
selector {
|
|
match_labels = {
|
|
app = "cloudflared"
|
|
}
|
|
}
|
|
template {
|
|
metadata {
|
|
labels = {
|
|
app = "cloudflared"
|
|
}
|
|
}
|
|
spec {
|
|
topology_spread_constraint {
|
|
max_skew = 1
|
|
topology_key = "kubernetes.io/hostname"
|
|
when_unsatisfiable = "ScheduleAnyway"
|
|
label_selector {
|
|
match_labels = {
|
|
app = "cloudflared"
|
|
}
|
|
}
|
|
}
|
|
container {
|
|
# image = "wisdomsky/cloudflared-web:latest"
|
|
image = "cloudflare/cloudflared"
|
|
name = "cloudflared"
|
|
command = ["cloudflared", "tunnel", "run"]
|
|
env {
|
|
name = "TUNNEL_TOKEN"
|
|
value = var.cloudflare_tunnel_token
|
|
}
|
|
|
|
port {
|
|
container_port = 14333
|
|
}
|
|
resources {
|
|
requests = {
|
|
cpu = "15m"
|
|
memory = "32Mi"
|
|
}
|
|
limits = {
|
|
cpu = "200m"
|
|
memory = "256Mi"
|
|
}
|
|
}
|
|
}
|
|
dns_config {
|
|
option {
|
|
name = "ndots"
|
|
value = "2"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
resource "kubernetes_pod_disruption_budget_v1" "cloudflared" {
|
|
metadata {
|
|
name = "cloudflared"
|
|
namespace = kubernetes_namespace.cloudflared.metadata[0].name
|
|
}
|
|
spec {
|
|
max_unavailable = "1"
|
|
selector {
|
|
match_labels = {
|
|
app = "cloudflared"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
resource "kubernetes_service" "cloudflared" {
|
|
metadata {
|
|
name = "cloudflared"
|
|
namespace = kubernetes_namespace.cloudflared.metadata[0].name
|
|
labels = {
|
|
"app" = "cloudflared"
|
|
}
|
|
}
|
|
|
|
spec {
|
|
selector = {
|
|
app = "cloudflared"
|
|
}
|
|
port {
|
|
name = "http"
|
|
target_port = 14333
|
|
port = 80
|
|
protocol = "TCP"
|
|
}
|
|
}
|
|
}
|
|
|