2023-11-24 19:04:44 +00:00
|
|
|
# values from - https://github.com/crowdsecurity/helm-charts/blob/main/charts/crowdsec/values.yaml
|
2023-11-24 11:08:25 +00:00
|
|
|
container_runtime: containerd
|
|
|
|
|
|
|
|
|
|
agent:
|
Right-size CPU requests cluster-wide and remove missed CPU limits
Increase requests for under-requested pods (dashy 50m→250m, frigate 500m→1500m,
clickhouse 100m→500m, otp 100m→300m, linkwarden 25m→50m, authentik worker 50m→100m).
Reduce requests for over-requested pods (crowdsec agent/lapi 500m→25m each,
prometheus 200m→100m, dbaas mysql 1800m→100m, pg-cluster 250m→50m,
shlink-web 250m→10m, gpu-pod-exporter 50m→10m, stirling-pdf 100m→25m,
technitium 100m→25m, celery 50m→15m). Reduce crowdsec quota from 8→1 CPU.
Remove missed CPU limits in prometheus (cpu: "2") and dbaas (cpu: "3600m") tpl files.
2026-03-14 09:22:24 +00:00
|
|
|
resources:
|
|
|
|
|
requests:
|
|
|
|
|
cpu: 25m
|
|
|
|
|
memory: 64Mi
|
|
|
|
|
limits:
|
|
|
|
|
memory: 512Mi
|
2026-02-21 19:18:15 +00:00
|
|
|
priorityClassName: "tier-1-cluster"
|
2023-11-24 11:08:25 +00:00
|
|
|
# To specify each pod you want to process it logs (pods present in the node)
|
|
|
|
|
acquisition:
|
|
|
|
|
# The namespace where the pod is located
|
2026-02-11 22:25:03 +00:00
|
|
|
- namespace: traefik
|
2023-11-24 11:08:25 +00:00
|
|
|
# The pod name
|
2026-02-11 22:25:03 +00:00
|
|
|
podName: traefik-*
|
2023-11-24 11:08:25 +00:00
|
|
|
# as in crowdsec configuration, we need to specify the program name so the parser will match and parse logs
|
2026-02-11 22:25:03 +00:00
|
|
|
program: traefik
|
2023-11-24 11:08:25 +00:00
|
|
|
# Those are ENV variables
|
|
|
|
|
env:
|
|
|
|
|
# As it's a test, we don't want to share signals with CrowdSec so disable the Online API.
|
2023-11-25 13:44:06 +00:00
|
|
|
# - name: DISABLE_ONLINE_API
|
|
|
|
|
# value: "true"
|
2026-02-11 22:25:03 +00:00
|
|
|
# As we are running Traefik, we want to install the Traefik collection
|
2023-11-24 11:08:25 +00:00
|
|
|
- name: COLLECTIONS
|
2026-02-11 22:25:03 +00:00
|
|
|
value: "crowdsecurity/traefik crowdsecurity/base-http-scenarios crowdsecurity/http-cve"
|
2025-11-29 11:37:20 +00:00
|
|
|
- name: SCENARIOS
|
|
|
|
|
value: ""
|
|
|
|
|
# value: "crowdsecurity/http-crawl-aggressive"
|
2025-10-13 20:12:37 +00:00
|
|
|
# Mount custom scenarios into /etc/crowdsec/scenarios
|
|
|
|
|
extraVolumeMounts:
|
|
|
|
|
- name: custom-scenarios
|
|
|
|
|
mountPath: /etc/crowdsec/scenarios/http-403-abuse.yaml
|
|
|
|
|
subPath: "http-403-abuse.yaml"
|
|
|
|
|
readonly: true
|
|
|
|
|
- name: custom-scenarios
|
|
|
|
|
mountPath: /etc/crowdsec/scenarios/http-429-abuse.yaml
|
|
|
|
|
subPath: "http-429-abuse.yaml"
|
|
|
|
|
readonly: true
|
2026-01-24 18:46:12 +00:00
|
|
|
- name: whitelist
|
|
|
|
|
mountPath: /etc/crowdsec/parsers/s02-enrich/whitelist.yaml
|
|
|
|
|
subPath: "whitelist.yaml"
|
|
|
|
|
readonly: true
|
2025-10-13 20:12:37 +00:00
|
|
|
extraVolumes:
|
|
|
|
|
- name: custom-scenarios
|
|
|
|
|
configMap:
|
|
|
|
|
name: crowdsec-custom-scenarios
|
2026-01-24 18:46:12 +00:00
|
|
|
- name: whitelist
|
|
|
|
|
configMap:
|
|
|
|
|
name: crowdsec-whitelist
|
2026-03-19 20:23:59 +00:00
|
|
|
podAnnotations:
|
|
|
|
|
dependency.kyverno.io/wait-for: "mysql.dbaas:3306"
|
|
|
|
|
|
2023-11-24 11:08:25 +00:00
|
|
|
lapi:
|
Right-size CPU requests cluster-wide and remove missed CPU limits
Increase requests for under-requested pods (dashy 50m→250m, frigate 500m→1500m,
clickhouse 100m→500m, otp 100m→300m, linkwarden 25m→50m, authentik worker 50m→100m).
Reduce requests for over-requested pods (crowdsec agent/lapi 500m→25m each,
prometheus 200m→100m, dbaas mysql 1800m→100m, pg-cluster 250m→50m,
shlink-web 250m→10m, gpu-pod-exporter 50m→10m, stirling-pdf 100m→25m,
technitium 100m→25m, celery 50m→15m). Reduce crowdsec quota from 8→1 CPU.
Remove missed CPU limits in prometheus (cpu: "2") and dbaas (cpu: "3600m") tpl files.
2026-03-14 09:22:24 +00:00
|
|
|
resources:
|
|
|
|
|
requests:
|
|
|
|
|
cpu: 25m
|
|
|
|
|
memory: 128Mi
|
|
|
|
|
limits:
|
|
|
|
|
memory: 1Gi
|
2026-03-14 12:47:56 +00:00
|
|
|
startupProbe:
|
|
|
|
|
httpGet:
|
|
|
|
|
path: /health
|
|
|
|
|
port: 8080
|
|
|
|
|
failureThreshold: 30
|
|
|
|
|
periodSeconds: 10
|
2026-02-21 19:18:15 +00:00
|
|
|
priorityClassName: "tier-1-cluster"
|
2025-08-31 19:21:26 +00:00
|
|
|
replicas: 3
|
resource quota review: fix OOM risks, close quota gaps, add HA protections
Phase 1 - OOM fixes:
- dashy: increase memory limit 512Mi→1Gi (was at 99% utilization)
- caretta DaemonSet: set explicit resources 300Mi/512Mi (was at 85-98%)
- mysql-operator: add Helm resource values 256Mi/512Mi, create namespace
with tier label (was at 92% of LimitRange default)
- prowlarr, flaresolverr, annas-archive-stacks: add explicit resources
(outgrowing 256Mi LimitRange defaults)
- real-estate-crawler celery: add resources 512Mi/3Gi (608Mi actual, no
explicit resources)
Phase 2 - Close quota gaps:
- nvidia, real-estate-crawler, trading-bot: remove custom-quota=true
labels so Kyverno generates tier-appropriate quotas
- descheduler: add tier=1-cluster label for proper classification
Phase 3 - Reduce excessive quotas:
- monitoring: limits.memory 240Gi→64Gi, limits.cpu 120→64
- woodpecker: limits.memory 128Gi→32Gi, limits.cpu 64→16
- GPU tier default: limits.memory 96Gi→32Gi, limits.cpu 48→16
Phase 4 - Kubelet protection:
- Add cpu: 200m to systemReserved and kubeReserved in kubelet template
Phase 5 - HA improvements:
- cloudflared: add topology spread (ScheduleAnyway) + PDB (maxUnavailable:1)
- grafana: add topology spread + PDB via Helm values
- crowdsec LAPI: add topology spread + PDB via Helm values
- authentik server: add topology spread via Helm values
- authentik worker: add topology spread + PDB via Helm values
2026-03-08 18:17:46 +00:00
|
|
|
topologySpreadConstraints:
|
|
|
|
|
- maxSkew: 1
|
|
|
|
|
topologyKey: kubernetes.io/hostname
|
|
|
|
|
whenUnsatisfiable: ScheduleAnyway
|
|
|
|
|
labelSelector:
|
|
|
|
|
matchLabels:
|
|
|
|
|
app.kubernetes.io/name: crowdsec
|
|
|
|
|
type: lapi
|
|
|
|
|
pdb:
|
|
|
|
|
enabled: true
|
|
|
|
|
maxUnavailable: 1
|
2025-08-31 15:20:57 +00:00
|
|
|
extraSecrets:
|
|
|
|
|
dbPassword: "${DB_PASSWORD}"
|
|
|
|
|
storeCAPICredentialsInSecret: true
|
|
|
|
|
persistentVolume:
|
|
|
|
|
config:
|
|
|
|
|
enabled: false
|
|
|
|
|
data:
|
|
|
|
|
enabled: false
|
2023-11-24 11:08:25 +00:00
|
|
|
env:
|
2025-08-31 15:20:57 +00:00
|
|
|
- name: ENROLL_KEY
|
|
|
|
|
value: "${ENROLL_KEY}"
|
|
|
|
|
- name: ENROLL_INSTANCE_NAME
|
|
|
|
|
value: "k8s-cluster"
|
|
|
|
|
- name: ENROLL_TAGS
|
|
|
|
|
value: "k8s linux"
|
|
|
|
|
- name: DB_PASSWORD
|
|
|
|
|
valueFrom:
|
|
|
|
|
secretKeyRef:
|
|
|
|
|
name: crowdsec-lapi-secrets
|
|
|
|
|
key: dbPassword
|
2023-11-24 11:08:25 +00:00
|
|
|
# As it's a test, we don't want to share signals with CrowdSec, so disable the Online API.
|
2023-11-25 13:44:06 +00:00
|
|
|
# - name: DISABLE_ONLINE_API
|
|
|
|
|
# value: "true"
|
2023-11-24 11:08:25 +00:00
|
|
|
dashboard:
|
|
|
|
|
enabled: true
|
2025-08-31 15:20:57 +00:00
|
|
|
env:
|
|
|
|
|
- name: MB_DB_TYPE
|
|
|
|
|
value: "mysql"
|
|
|
|
|
- name: MB_DB_DBNAME
|
|
|
|
|
value: crowdsec-metabase
|
|
|
|
|
- name: MB_DB_USER
|
|
|
|
|
value: "crowdsec"
|
|
|
|
|
- name: MB_DB_PASS
|
|
|
|
|
value: "${DB_PASSWORD}"
|
|
|
|
|
- name: MB_DB_HOST
|
[ci skip] Infrastructure hardening: security, monitoring, reliability, maintainability
Phase 1 - Critical Security:
- Netbox: move hardcoded DB/superuser passwords to variables
- MeshCentral: disable public registration, add Authentik auth
- Traefik: disable insecure API dashboard (api.insecure=false)
- Traefik: configure forwarded headers with Cloudflare trusted IPs
Phase 2 - Security Hardening:
- Add security headers middleware (HSTS, X-Frame-Options, nosniff, etc.)
- Add Kyverno pod security policies in audit mode (privileged, host
namespaces, SYS_ADMIN, trusted registries)
- Tighten rate limiting (avg=10, burst=50)
- Add Authentik protection to grampsweb
Phase 3 - Monitoring & Alerting:
- Add critical service alerts (PostgreSQL, MySQL, Redis, Headscale,
Authentik, Loki)
- Increase Loki retention from 7 to 30 days (720h)
- Add predictive PV filling alert (predict_linear)
- Re-enable Hackmd and Privatebin down alerts
Phase 4 - Reliability:
- Add resource requests/limits to Redis, DBaaS, Technitium, Headscale,
Vaultwarden, Uptime Kuma
- Increase Alloy DaemonSet memory to 512Mi/1Gi
Phase 6 - Maintainability:
- Extract duplicated tiers locals to terragrunt.hcl generate block
(removed from 67 stacks)
- Replace hardcoded NFS IP 10.0.10.15 with var.nfs_server (114
instances across 63 files)
- Replace hardcoded Redis/PostgreSQL/MySQL/Ollama/mail host references
with variables across ~35 stacks
- Migrate xray raw ingress resources to ingress_factory modules
2026-02-23 22:05:28 +00:00
|
|
|
value: "${mysql_host}"
|
2025-08-31 15:20:57 +00:00
|
|
|
|
|
|
|
|
- name: MB_EMAIL_SMTP_USERNAME
|
|
|
|
|
value: "info@viktorbarzin.me"
|
|
|
|
|
- name: MB_EMAIL_FROM_ADDRESS
|
|
|
|
|
value: "info@viktorbarzin.me"
|
|
|
|
|
- name: MB_EMAIL_SMTP_HOST
|
|
|
|
|
value: "mailserver.mailserver.svc.cluster.local"
|
|
|
|
|
- name: MB_EMAIL_SMTP_PASSWORD
|
|
|
|
|
value: "" # Ignore for now as it's unclear what notifications we can get
|
|
|
|
|
- name: MB_EMAIL_SMTP_PORT
|
|
|
|
|
value: "587"
|
|
|
|
|
- name: MB_EMAIL_SMTP_SECURITY
|
|
|
|
|
value: "starttls"
|
2023-11-24 19:04:44 +00:00
|
|
|
ingress:
|
|
|
|
|
enabled: true
|
|
|
|
|
annotations:
|
|
|
|
|
nginx.ingress.kubernetes.io/backend-protocol: "HTTP"
|
2024-11-18 22:06:31 +00:00
|
|
|
#nginx.ingress.kubernetes.io/auth-url: "https://oauth2.viktorbarzin.me/oauth2/auth"
|
|
|
|
|
nginx.ingress.kubernetes.io/auth-url: "http://ak-outpost-authentik-embedded-outpost.authentik.svc.cluster.local:9000/outpost.goauthentik.io/auth/nginx"
|
|
|
|
|
# nginx.ingress.kubernetes.io/auth-signin: "https://oauth2.viktorbarzin.me/oauth2/start?rd=/redirect/$http_host$escaped_request_uri"
|
|
|
|
|
nginx.ingress.kubernetes.io/auth-signin: "https://authentik.viktorbarzin.me/outpost.goauthentik.io/start?rd=$scheme%3A%2F%2F$host$escaped_request_uri"
|
|
|
|
|
nginx.ingress.kubernetes.io/auth-response-headers: "Set-Cookie,X-authentik-username,X-authentik-groups,X-authentik-email,X-authentik-name,X-authentik-uid"
|
|
|
|
|
nginx.ingress.kubernetes.io/auth-snippet: "proxy_set_header X-Forwarded-Host $http_host;"
|
2024-10-20 12:19:12 +00:00
|
|
|
gethomepage.dev/enabled: "true"
|
|
|
|
|
gethomepage.dev/description: "Web Application Firewall"
|
|
|
|
|
gethomepage.dev/icon: "crowdsec.png"
|
|
|
|
|
gethomepage.dev/name: "CrowdSec"
|
2026-03-07 16:41:36 +00:00
|
|
|
gethomepage.dev/group: "Identity & Security"
|
2024-10-20 12:19:12 +00:00
|
|
|
gethomepage.dev/widget.type: "crowdsec"
|
|
|
|
|
gethomepage.dev/widget.url: "http://crowdsec-service.crowdsec.svc.cluster.local:8080"
|
|
|
|
|
gethomepage.dev/widget.username: "${homepage_username}"
|
|
|
|
|
gethomepage.dev/widget.password: "${homepage_password}"
|
|
|
|
|
gethomepage.dev/pod-selector: ""
|
2023-11-24 19:04:44 +00:00
|
|
|
ingressClassName: "nginx"
|
|
|
|
|
host: "crowdsec.viktorbarzin.me"
|
|
|
|
|
tls:
|
|
|
|
|
- hosts:
|
|
|
|
|
- crowdsec.viktorbarzin.me
|
|
|
|
|
secretName: "tls-secret"
|
2023-11-25 13:03:25 +00:00
|
|
|
metrics:
|
|
|
|
|
enabled: true
|
|
|
|
|
strategy:
|
|
|
|
|
type: RollingUpdate
|
2025-08-31 15:20:57 +00:00
|
|
|
|
|
|
|
|
config:
|
2026-01-24 18:46:12 +00:00
|
|
|
# Custom profiles: captcha for rate limiting, ban for attacks
|
|
|
|
|
profiles.yaml: |
|
|
|
|
|
# Captcha for rate limiting and 403 abuse (user can unblock themselves)
|
|
|
|
|
name: captcha_remediation
|
|
|
|
|
filters:
|
|
|
|
|
- Alert.Remediation == true && Alert.GetScope() == "Ip" && Alert.GetScenario() in ["crowdsecurity/http-429-abuse", "crowdsecurity/http-403-abuse", "crowdsecurity/http-crawl-non_statics", "crowdsecurity/http-sensitive-files"]
|
|
|
|
|
decisions:
|
|
|
|
|
- type: captcha
|
|
|
|
|
duration: 4h
|
2026-02-11 22:25:03 +00:00
|
|
|
notifications:
|
|
|
|
|
- slack_alerts
|
2026-01-24 18:46:12 +00:00
|
|
|
on_success: break
|
|
|
|
|
---
|
|
|
|
|
# Default: Ban for serious attacks (CVE exploits, scanners, brute force)
|
|
|
|
|
name: default_ip_remediation
|
|
|
|
|
filters:
|
|
|
|
|
- Alert.Remediation == true && Alert.GetScope() == "Ip"
|
|
|
|
|
decisions:
|
|
|
|
|
- type: ban
|
|
|
|
|
duration: 4h
|
2026-02-11 22:25:03 +00:00
|
|
|
notifications:
|
|
|
|
|
- slack_alerts
|
2026-01-24 18:46:12 +00:00
|
|
|
on_success: break
|
|
|
|
|
---
|
|
|
|
|
name: default_range_remediation
|
|
|
|
|
filters:
|
|
|
|
|
- Alert.Remediation == true && Alert.GetScope() == "Range"
|
|
|
|
|
decisions:
|
|
|
|
|
- type: ban
|
|
|
|
|
duration: 4h
|
2026-02-11 22:25:03 +00:00
|
|
|
notifications:
|
|
|
|
|
- slack_alerts
|
2026-01-24 18:46:12 +00:00
|
|
|
on_success: break
|
|
|
|
|
|
2025-08-31 15:20:57 +00:00
|
|
|
config.yaml.local: |
|
|
|
|
|
db_config:
|
|
|
|
|
type: mysql
|
|
|
|
|
user: crowdsec
|
|
|
|
|
password: ${DB_PASSWORD}
|
|
|
|
|
db_name: crowdsec
|
[ci skip] Infrastructure hardening: security, monitoring, reliability, maintainability
Phase 1 - Critical Security:
- Netbox: move hardcoded DB/superuser passwords to variables
- MeshCentral: disable public registration, add Authentik auth
- Traefik: disable insecure API dashboard (api.insecure=false)
- Traefik: configure forwarded headers with Cloudflare trusted IPs
Phase 2 - Security Hardening:
- Add security headers middleware (HSTS, X-Frame-Options, nosniff, etc.)
- Add Kyverno pod security policies in audit mode (privileged, host
namespaces, SYS_ADMIN, trusted registries)
- Tighten rate limiting (avg=10, burst=50)
- Add Authentik protection to grampsweb
Phase 3 - Monitoring & Alerting:
- Add critical service alerts (PostgreSQL, MySQL, Redis, Headscale,
Authentik, Loki)
- Increase Loki retention from 7 to 30 days (720h)
- Add predictive PV filling alert (predict_linear)
- Re-enable Hackmd and Privatebin down alerts
Phase 4 - Reliability:
- Add resource requests/limits to Redis, DBaaS, Technitium, Headscale,
Vaultwarden, Uptime Kuma
- Increase Alloy DaemonSet memory to 512Mi/1Gi
Phase 6 - Maintainability:
- Extract duplicated tiers locals to terragrunt.hcl generate block
(removed from 67 stacks)
- Replace hardcoded NFS IP 10.0.10.15 with var.nfs_server (114
instances across 63 files)
- Replace hardcoded Redis/PostgreSQL/MySQL/Ollama/mail host references
with variables across ~35 stacks
- Migrate xray raw ingress resources to ingress_factory modules
2026-02-23 22:05:28 +00:00
|
|
|
host: ${mysql_host}
|
2025-08-31 15:20:57 +00:00
|
|
|
port: 3306
|
|
|
|
|
api:
|
|
|
|
|
server:
|
|
|
|
|
auto_registration: # Activate if not using TLS for authentication
|
|
|
|
|
enabled: true
|
|
|
|
|
token: "$${REGISTRATION_TOKEN}" # /!\ do not change
|
|
|
|
|
allowed_ranges: # /!\ adapt to the pod IP ranges used by your cluster
|
|
|
|
|
- "127.0.0.1/32"
|
|
|
|
|
- "192.168.0.0/16"
|
|
|
|
|
- "10.0.0.0/8"
|
|
|
|
|
- "172.16.0.0/12"
|
2026-02-11 22:25:03 +00:00
|
|
|
|
|
|
|
|
notifications:
|
|
|
|
|
slack.yaml: |
|
|
|
|
|
type: slack
|
|
|
|
|
name: slack_alerts
|
|
|
|
|
log_level: info
|
|
|
|
|
format: |
|
|
|
|
|
:rotating_light: *CrowdSec Alert*
|
|
|
|
|
{{range .}}
|
|
|
|
|
*Scenario:* {{.Alert.Scenario}}
|
|
|
|
|
*Source IP:* {{.Alert.Source.IP}} ({{.Alert.Source.Cn}})
|
|
|
|
|
*Decisions:*
|
|
|
|
|
{{range .Alert.Decisions}} - {{.Type}} for {{.Duration}} (scope: {{.Scope}}, value: {{.Value}})
|
|
|
|
|
{{end}}
|
|
|
|
|
{{end}}
|
|
|
|
|
webhook: ${SLACK_WEBHOOK_URL}
|