fix: cluster healthcheck fixes + Authentik upgrade to 2026.2.2
- Authentik: upgrade 2025.10.3 → 2025.12.4 → 2026.2.2 with DB restore and stepped migration. Switch to existingSecret, PgBouncer session mode. - Mailserver: migrate email roundtrip probe from Mailgun to Brevo API - Redis: fix HAProxy tcp-check regex (rstring), faster health intervals - Nextcloud: fix Redis fallback to HAProxy service, update dependency - MeshCentral: fix TLSOffload + certUrl init container for first-run - Monitoring: remove authentik from latency alert exclusion - Diun: simplify to webhook notifier, remove git auto-update [ci skip] Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
d31bbc9a18
commit
bd41bb9230
11 changed files with 115 additions and 282 deletions
|
|
@ -40,8 +40,8 @@ graph TB
|
||||||
|
|
||||||
| Component | Version | Location | Purpose |
|
| Component | Version | Location | Purpose |
|
||||||
|-----------|---------|----------|---------|
|
|-----------|---------|----------|---------|
|
||||||
| Authentik Server | Latest | `stacks/authentik/` | Core IdP application servers (3 replicas) |
|
| Authentik Server | 2026.2.2 | `stacks/authentik/` | Core IdP application servers (2 replicas) |
|
||||||
| Authentik Worker | Latest | `stacks/authentik/` | Background task processors (3 replicas) |
|
| Authentik Worker | 2026.2.2 | `stacks/authentik/` | Background task processors (2 replicas) |
|
||||||
| PgBouncer | Latest | `stacks/authentik/` | PostgreSQL connection pooler (3 replicas) |
|
| PgBouncer | Latest | `stacks/authentik/` | PostgreSQL connection pooler (3 replicas) |
|
||||||
| Embedded Outpost | - | Built into Authentik | Forward auth endpoint for Traefik |
|
| Embedded Outpost | - | Built into Authentik | Forward auth endpoint for Traefik |
|
||||||
| Traefik ForwardAuth | - | `ingress_factory` module | Middleware for protected ingresses |
|
| Traefik ForwardAuth | - | `ingress_factory` module | Middleware for protected ingresses |
|
||||||
|
|
|
||||||
|
|
@ -55,21 +55,23 @@ resource "helm_release" "authentik" {
|
||||||
|
|
||||||
repository = "https://charts.goauthentik.io/"
|
repository = "https://charts.goauthentik.io/"
|
||||||
chart = "authentik"
|
chart = "authentik"
|
||||||
# version = "2025.8.1"
|
# version = "2025.10.3"
|
||||||
version = "2025.10.3"
|
# version = "2025.12.4"
|
||||||
|
version = "2026.2.2"
|
||||||
atomic = true
|
atomic = true
|
||||||
timeout = 6000
|
timeout = 6000
|
||||||
|
|
||||||
values = [templatefile("${path.module}/values.yaml", { postgres_password = var.postgres_password, secret_key = var.secret_key, redis_host = var.redis_host })]
|
values = [templatefile("${path.module}/values.yaml", { postgres_password = var.postgres_password, secret_key = var.secret_key })]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
module "ingress" {
|
module "ingress" {
|
||||||
source = "../../../../modules/kubernetes/ingress_factory"
|
source = "../../../../modules/kubernetes/ingress_factory"
|
||||||
namespace = kubernetes_namespace.authentik.metadata[0].name
|
namespace = kubernetes_namespace.authentik.metadata[0].name
|
||||||
name = "authentik"
|
name = "authentik"
|
||||||
service_name = "goauthentik-server"
|
service_name = "goauthentik-server"
|
||||||
tls_secret_name = var.tls_secret_name
|
tls_secret_name = var.tls_secret_name
|
||||||
|
anti_ai_scraping = false
|
||||||
extra_annotations = {
|
extra_annotations = {
|
||||||
"gethomepage.dev/enabled" = "true"
|
"gethomepage.dev/enabled" = "true"
|
||||||
"gethomepage.dev/name" = "Authentik"
|
"gethomepage.dev/name" = "Authentik"
|
||||||
|
|
@ -84,12 +86,14 @@ module "ingress" {
|
||||||
}
|
}
|
||||||
|
|
||||||
module "ingress-outpost" {
|
module "ingress-outpost" {
|
||||||
source = "../../../../modules/kubernetes/ingress_factory"
|
source = "../../../../modules/kubernetes/ingress_factory"
|
||||||
namespace = kubernetes_namespace.authentik.metadata[0].name
|
namespace = kubernetes_namespace.authentik.metadata[0].name
|
||||||
name = "authentik-outpost"
|
name = "authentik-outpost"
|
||||||
host = "authentik"
|
host = "authentik"
|
||||||
service_name = "ak-outpost-authentik-embedded-outpost"
|
service_name = "ak-outpost-authentik-embedded-outpost"
|
||||||
port = 9000
|
port = 9000
|
||||||
ingress_path = ["/outpost.goauthentik.io"]
|
ingress_path = ["/outpost.goauthentik.io"]
|
||||||
tls_secret_name = var.tls_secret_name
|
tls_secret_name = var.tls_secret_name
|
||||||
|
anti_ai_scraping = false
|
||||||
|
exclude_crowdsec = true
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ listen_addr = 0.0.0.0
|
||||||
listen_port = 6432
|
listen_port = 6432
|
||||||
auth_type = md5
|
auth_type = md5
|
||||||
auth_file = /etc/pgbouncer/userlist.txt
|
auth_file = /etc/pgbouncer/userlist.txt
|
||||||
pool_mode = transaction
|
pool_mode = session
|
||||||
max_client_conn = 200
|
max_client_conn = 200
|
||||||
default_pool_size = 20
|
default_pool_size = 20
|
||||||
reserve_pool_size = 5
|
reserve_pool_size = 5
|
||||||
|
|
|
||||||
|
|
@ -1,19 +1,19 @@
|
||||||
authentik:
|
authentik:
|
||||||
log_level: warning
|
log_level: warning
|
||||||
# log_level: trace
|
# log_level: trace
|
||||||
secret_key: "${secret_key}"
|
secret_key: ""
|
||||||
|
existingSecret:
|
||||||
|
secretName: "goauthentik"
|
||||||
# This sends anonymous usage-data, stack traces on errors and
|
# This sends anonymous usage-data, stack traces on errors and
|
||||||
# performance data to authentik.error-reporting.a7k.io, and is fully opt-in
|
# performance data to authentik.error-reporting.a7k.io, and is fully opt-in
|
||||||
error_reporting:
|
error_reporting:
|
||||||
enabled: true
|
enabled: false
|
||||||
postgresql:
|
postgresql:
|
||||||
# host: postgresql.dbaas
|
# host: postgresql.dbaas
|
||||||
host: pgbouncer.authentik
|
host: pgbouncer.authentik
|
||||||
port: 6432
|
port: 6432
|
||||||
user: authentik
|
user: authentik
|
||||||
password: ${postgres_password}
|
password: ""
|
||||||
redis:
|
|
||||||
host: ${redis_host}
|
|
||||||
|
|
||||||
server:
|
server:
|
||||||
replicas: 2
|
replicas: 2
|
||||||
|
|
@ -58,9 +58,9 @@ worker:
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 100m
|
cpu: 100m
|
||||||
memory: 1Gi
|
memory: 1.5Gi
|
||||||
limits:
|
limits:
|
||||||
memory: 1Gi
|
memory: 1.5Gi
|
||||||
topologySpreadConstraints:
|
topologySpreadConstraints:
|
||||||
- maxSkew: 1
|
- maxSkew: 1
|
||||||
topologyKey: kubernetes.io/hostname
|
topologyKey: kubernetes.io/hostname
|
||||||
|
|
@ -71,3 +71,6 @@ worker:
|
||||||
pdb:
|
pdb:
|
||||||
enabled: true
|
enabled: true
|
||||||
maxUnavailable: 1
|
maxUnavailable: 1
|
||||||
|
|
||||||
|
postgresql:
|
||||||
|
enabled: false
|
||||||
|
|
|
||||||
|
|
@ -41,44 +41,6 @@ resource "kubernetes_manifest" "external_secret" {
|
||||||
depends_on = [kubernetes_namespace.diun]
|
depends_on = [kubernetes_namespace.diun]
|
||||||
}
|
}
|
||||||
|
|
||||||
resource "kubernetes_manifest" "external_secret_git" {
|
|
||||||
manifest = {
|
|
||||||
apiVersion = "external-secrets.io/v1beta1"
|
|
||||||
kind = "ExternalSecret"
|
|
||||||
metadata = {
|
|
||||||
name = "diun-git-secrets"
|
|
||||||
namespace = "diun"
|
|
||||||
}
|
|
||||||
spec = {
|
|
||||||
refreshInterval = "15m"
|
|
||||||
secretStoreRef = {
|
|
||||||
name = "vault-kv"
|
|
||||||
kind = "ClusterSecretStore"
|
|
||||||
}
|
|
||||||
target = {
|
|
||||||
name = "diun-git-secrets"
|
|
||||||
}
|
|
||||||
data = [
|
|
||||||
{
|
|
||||||
secretKey = "git_token"
|
|
||||||
remoteRef = {
|
|
||||||
key = "viktor"
|
|
||||||
property = "webhook_handler_git_token"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
secretKey = "git_user"
|
|
||||||
remoteRef = {
|
|
||||||
key = "viktor"
|
|
||||||
property = "webhook_handler_git_user"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
depends_on = [kubernetes_namespace.diun]
|
|
||||||
}
|
|
||||||
|
|
||||||
module "tls_secret" {
|
module "tls_secret" {
|
||||||
source = "../../modules/kubernetes/setup_tls_secret"
|
source = "../../modules/kubernetes/setup_tls_secret"
|
||||||
namespace = kubernetes_namespace.diun.metadata[0].name
|
namespace = kubernetes_namespace.diun.metadata[0].name
|
||||||
|
|
@ -119,28 +81,6 @@ resource "kubernetes_cluster_role_binding" "diun" {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
resource "kubernetes_persistent_volume_claim" "repo" {
|
|
||||||
wait_until_bound = false
|
|
||||||
metadata {
|
|
||||||
name = "diun-repo"
|
|
||||||
namespace = kubernetes_namespace.diun.metadata[0].name
|
|
||||||
annotations = {
|
|
||||||
"resize.topolvm.io/threshold" = "80%"
|
|
||||||
"resize.topolvm.io/increase" = "100%"
|
|
||||||
"resize.topolvm.io/storage_limit" = "5Gi"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
spec {
|
|
||||||
access_modes = ["ReadWriteOnce"]
|
|
||||||
storage_class_name = "proxmox-lvm"
|
|
||||||
resources {
|
|
||||||
requests = {
|
|
||||||
storage = "1Gi"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "kubernetes_persistent_volume_claim" "data_proxmox" {
|
resource "kubernetes_persistent_volume_claim" "data_proxmox" {
|
||||||
wait_until_bound = false
|
wait_until_bound = false
|
||||||
metadata {
|
metadata {
|
||||||
|
|
@ -163,81 +103,6 @@ resource "kubernetes_persistent_volume_claim" "data_proxmox" {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
resource "kubernetes_config_map_v1" "auto_update_script" {
|
|
||||||
metadata {
|
|
||||||
name = "diun-auto-update-script"
|
|
||||||
namespace = kubernetes_namespace.diun.metadata[0].name
|
|
||||||
}
|
|
||||||
data = {
|
|
||||||
"auto-update.sh" = <<-SCRIPT
|
|
||||||
#!/bin/sh
|
|
||||||
set -e
|
|
||||||
|
|
||||||
# Only act on updates (not new or unchanged)
|
|
||||||
[ "$$DIUN_ENTRY_STATUS" = "update" ] || exit 0
|
|
||||||
|
|
||||||
IMAGE="$$DIUN_ENTRY_IMAGE"
|
|
||||||
NEW_TAG="$$DIUN_ENTRY_IMAGETAG"
|
|
||||||
|
|
||||||
echo "[auto-update] Detected update: $$IMAGE -> $$NEW_TAG"
|
|
||||||
|
|
||||||
# Skip databases
|
|
||||||
case "$$IMAGE" in
|
|
||||||
*postgres*|*mysql*|*redis*|*clickhouse*|*etcd*) echo "[auto-update] Skipping database image"; exit 0 ;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
# Skip custom images (handled by CI/CD)
|
|
||||||
case "$$IMAGE" in
|
|
||||||
viktorbarzin/*|registry.viktorbarzin.me/*|ancamilea/*|mghee/*) echo "[auto-update] Skipping CI/CD-managed image"; exit 0 ;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
# Skip kube-system / infrastructure images
|
|
||||||
case "$$IMAGE" in
|
|
||||||
registry.k8s.io/*|quay.io/tigera/*|quay.io/metallb/*|nvcr.io/*|reg.kyverno.io/*) echo "[auto-update] Skipping infrastructure image"; exit 0 ;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
# Acquire lock (serialize concurrent DIUN notifications)
|
|
||||||
exec 200>/tmp/auto-update.lock
|
|
||||||
flock -n 200 || { echo "[auto-update] Another update in progress, skipping"; exit 0; }
|
|
||||||
|
|
||||||
cd /repo
|
|
||||||
|
|
||||||
# Configure git
|
|
||||||
git config user.email "diun@viktorbarzin.me"
|
|
||||||
git config user.name "DIUN Auto-Update"
|
|
||||||
|
|
||||||
# Pull latest using HTTPS with token
|
|
||||||
git remote set-url origin "https://$${GIT_USER}:$${GIT_TOKEN}@github.com/ViktorBarzin/infra.git"
|
|
||||||
git pull --rebase origin master || { echo "[auto-update] git pull failed"; exit 1; }
|
|
||||||
|
|
||||||
# Find .tf files containing this image
|
|
||||||
MATCHES=$$(grep -rl "\"$${IMAGE}:" stacks/ --include="*.tf" 2>/dev/null || true)
|
|
||||||
[ -z "$$MATCHES" ] && { echo "[auto-update] No .tf file found for $$IMAGE"; exit 0; }
|
|
||||||
|
|
||||||
# Update the image tag in each matching file
|
|
||||||
UPDATED=0
|
|
||||||
for FILE in $$MATCHES; do
|
|
||||||
if sed -i "s|\"$${IMAGE}:[^\"]*\"|\"$${IMAGE}:$${NEW_TAG}\"|g" "$$FILE"; then
|
|
||||||
echo "[auto-update] Updated $$FILE"
|
|
||||||
UPDATED=1
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
# Check if anything actually changed
|
|
||||||
if git diff --quiet; then
|
|
||||||
echo "[auto-update] No changes after update for $$IMAGE:$$NEW_TAG (already up to date)"
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Commit and push
|
|
||||||
git add -A stacks/
|
|
||||||
git commit -m "auto-update: $${IMAGE} -> $${NEW_TAG}"
|
|
||||||
git push origin master
|
|
||||||
echo "[auto-update] Pushed update: $${IMAGE}:$${NEW_TAG}"
|
|
||||||
SCRIPT
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "kubernetes_deployment" "diun" {
|
resource "kubernetes_deployment" "diun" {
|
||||||
metadata {
|
metadata {
|
||||||
name = "diun"
|
name = "diun"
|
||||||
|
|
@ -269,50 +134,6 @@ resource "kubernetes_deployment" "diun" {
|
||||||
}
|
}
|
||||||
spec {
|
spec {
|
||||||
service_account_name = "diun"
|
service_account_name = "diun"
|
||||||
init_container {
|
|
||||||
name = "clone-repo"
|
|
||||||
image = "alpine/git:latest"
|
|
||||||
command = ["/bin/sh", "-c"]
|
|
||||||
args = [<<-EOF
|
|
||||||
if [ -d /repo/.git ]; then
|
|
||||||
cd /repo && git pull --rebase origin master || true
|
|
||||||
else
|
|
||||||
git clone https://$${GIT_USER}:$${GIT_TOKEN}@github.com/ViktorBarzin/infra.git /repo
|
|
||||||
fi
|
|
||||||
EOF
|
|
||||||
]
|
|
||||||
env {
|
|
||||||
name = "GIT_USER"
|
|
||||||
value_from {
|
|
||||||
secret_key_ref {
|
|
||||||
name = "diun-git-secrets"
|
|
||||||
key = "git_user"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
env {
|
|
||||||
name = "GIT_TOKEN"
|
|
||||||
value_from {
|
|
||||||
secret_key_ref {
|
|
||||||
name = "diun-git-secrets"
|
|
||||||
key = "git_token"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
volume_mount {
|
|
||||||
name = "repo"
|
|
||||||
mount_path = "/repo"
|
|
||||||
}
|
|
||||||
resources {
|
|
||||||
requests = {
|
|
||||||
cpu = "10m"
|
|
||||||
memory = "64Mi"
|
|
||||||
}
|
|
||||||
limits = {
|
|
||||||
memory = "128Mi"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
container {
|
container {
|
||||||
image = "viktorbarzin/diun:latest"
|
image = "viktorbarzin/diun:latest"
|
||||||
name = "diun"
|
name = "diun"
|
||||||
|
|
@ -349,12 +170,25 @@ resource "kubernetes_deployment" "diun" {
|
||||||
name = "DIUN_DEFAULTS_SORTTAGS"
|
name = "DIUN_DEFAULTS_SORTTAGS"
|
||||||
value = "reverse"
|
value = "reverse"
|
||||||
}
|
}
|
||||||
# Script notifier for auto-updates
|
# Webhook notifier for upgrade agent (via n8n)
|
||||||
env {
|
env {
|
||||||
name = "DIUN_NOTIF_SCRIPT_CMD"
|
name = "DIUN_NOTIF_WEBHOOK_ENDPOINT"
|
||||||
value = "/scripts/auto-update.sh"
|
value_from {
|
||||||
|
secret_key_ref {
|
||||||
|
name = "diun-secrets"
|
||||||
|
key = "n8n_webhook_url"
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
# Slack notifier (kept alongside script notifier)
|
env {
|
||||||
|
name = "DIUN_NOTIF_WEBHOOK_METHOD"
|
||||||
|
value = "POST"
|
||||||
|
}
|
||||||
|
env {
|
||||||
|
name = "DIUN_NOTIF_WEBHOOK_HEADERS_CONTENT-TYPE"
|
||||||
|
value = "application/json"
|
||||||
|
}
|
||||||
|
# Slack notifier (independent notification channel)
|
||||||
env {
|
env {
|
||||||
name = "DIUN_NOTIF_SLACK_WEBHOOKURL"
|
name = "DIUN_NOTIF_SLACK_WEBHOOKURL"
|
||||||
value_from {
|
value_from {
|
||||||
|
|
@ -364,25 +198,6 @@ resource "kubernetes_deployment" "diun" {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
# Git credentials for auto-update script
|
|
||||||
env {
|
|
||||||
name = "GIT_USER"
|
|
||||||
value_from {
|
|
||||||
secret_key_ref {
|
|
||||||
name = "diun-git-secrets"
|
|
||||||
key = "git_user"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
env {
|
|
||||||
name = "GIT_TOKEN"
|
|
||||||
value_from {
|
|
||||||
secret_key_ref {
|
|
||||||
name = "diun-git-secrets"
|
|
||||||
key = "git_token"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
env {
|
env {
|
||||||
name = "LOG_LEVEL"
|
name = "LOG_LEVEL"
|
||||||
value = "debug"
|
value = "debug"
|
||||||
|
|
@ -391,14 +206,6 @@ resource "kubernetes_deployment" "diun" {
|
||||||
name = "data"
|
name = "data"
|
||||||
mount_path = "/data"
|
mount_path = "/data"
|
||||||
}
|
}
|
||||||
volume_mount {
|
|
||||||
name = "scripts"
|
|
||||||
mount_path = "/scripts"
|
|
||||||
}
|
|
||||||
volume_mount {
|
|
||||||
name = "repo"
|
|
||||||
mount_path = "/repo"
|
|
||||||
}
|
|
||||||
resources {
|
resources {
|
||||||
requests = {
|
requests = {
|
||||||
cpu = "10m"
|
cpu = "10m"
|
||||||
|
|
@ -415,19 +222,6 @@ resource "kubernetes_deployment" "diun" {
|
||||||
claim_name = kubernetes_persistent_volume_claim.data_proxmox.metadata[0].name
|
claim_name = kubernetes_persistent_volume_claim.data_proxmox.metadata[0].name
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
volume {
|
|
||||||
name = "scripts"
|
|
||||||
config_map {
|
|
||||||
name = kubernetes_config_map_v1.auto_update_script.metadata[0].name
|
|
||||||
default_mode = "0755"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
volume {
|
|
||||||
name = "repo"
|
|
||||||
persistent_volume_claim {
|
|
||||||
claim_name = kubernetes_persistent_volume_claim.repo.metadata[0].name
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,6 @@ module "mailserver" {
|
||||||
sasl_passwd = local.mailserver_sasl_passwd
|
sasl_passwd = local.mailserver_sasl_passwd
|
||||||
roundcube_db_password = data.vault_kv_secret_v2.secrets.data["mailserver_roundcubemail_db_password"]
|
roundcube_db_password = data.vault_kv_secret_v2.secrets.data["mailserver_roundcubemail_db_password"]
|
||||||
tier = local.tiers.edge
|
tier = local.tiers.edge
|
||||||
mailgun_api_key = data.vault_kv_secret_v2.viktor.data["mailgun_api_key"]
|
brevo_api_key = jsondecode(base64decode(data.vault_kv_secret_v2.viktor.data["brevo_api_key"]))["api_key"]
|
||||||
email_monitor_imap_password = local.mailserver_accounts["spam@viktorbarzin.me"]
|
email_monitor_imap_password = local.mailserver_accounts["spam@viktorbarzin.me"]
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ variable "postfix_account_aliases" {}
|
||||||
variable "opendkim_key" {}
|
variable "opendkim_key" {}
|
||||||
variable "sasl_passwd" {} # For sendgrid i.e relayhost
|
variable "sasl_passwd" {} # For sendgrid i.e relayhost
|
||||||
variable "nfs_server" { type = string }
|
variable "nfs_server" { type = string }
|
||||||
variable "mailgun_api_key" {
|
variable "brevo_api_key" {
|
||||||
type = string
|
type = string
|
||||||
sensitive = true
|
sensitive = true
|
||||||
}
|
}
|
||||||
|
|
@ -537,7 +537,7 @@ resource "kubernetes_service" "mailserver" {
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# E2E Email Roundtrip Monitor
|
# E2E Email Roundtrip Monitor
|
||||||
# Sends test email via Mailgun API, verifies delivery via IMAP, pushes metrics
|
# Sends test email via Brevo API, verifies delivery via IMAP, pushes metrics
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
resource "kubernetes_cron_job_v1" "email_roundtrip_monitor" {
|
resource "kubernetes_cron_job_v1" "email_roundtrip_monitor" {
|
||||||
metadata {
|
metadata {
|
||||||
|
|
@ -562,9 +562,9 @@ resource "kubernetes_cron_job_v1" "email_roundtrip_monitor" {
|
||||||
image = "docker.io/library/python:3.12-alpine"
|
image = "docker.io/library/python:3.12-alpine"
|
||||||
command = ["/bin/sh", "-c", <<-EOT
|
command = ["/bin/sh", "-c", <<-EOT
|
||||||
pip install --quiet --disable-pip-version-check requests && python3 -c '
|
pip install --quiet --disable-pip-version-check requests && python3 -c '
|
||||||
import requests, imaplib, email, time, os, uuid, sys, ssl
|
import requests, imaplib, email, time, os, uuid, sys, ssl, json
|
||||||
|
|
||||||
MAILGUN_API_KEY = os.environ["MAILGUN_API_KEY"]
|
BREVO_API_KEY = os.environ["BREVO_API_KEY"]
|
||||||
IMAP_USER = "spam@viktorbarzin.me"
|
IMAP_USER = "spam@viktorbarzin.me"
|
||||||
IMAP_PASS = os.environ["EMAIL_MONITOR_IMAP_PASSWORD"]
|
IMAP_PASS = os.environ["EMAIL_MONITOR_IMAP_PASSWORD"]
|
||||||
IMAP_HOST = "mailserver.mailserver.svc.cluster.local"
|
IMAP_HOST = "mailserver.mailserver.svc.cluster.local"
|
||||||
|
|
@ -578,20 +578,24 @@ success = 0
|
||||||
duration = 0
|
duration = 0
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Step 1: Send via Mailgun HTTP API to smoke-test@ (hits catch-all -> spam@)
|
# Step 1: Send via Brevo Transactional Email API to smoke-test@ (hits catch-all -> spam@)
|
||||||
resp = requests.post(
|
resp = requests.post(
|
||||||
f"https://api.eu.mailgun.net/v3/{DOMAIN}/messages",
|
"https://api.brevo.com/v3/smtp/email",
|
||||||
auth=("api", MAILGUN_API_KEY),
|
headers={
|
||||||
data={
|
"api-key": BREVO_API_KEY,
|
||||||
"from": f"monitoring@{DOMAIN}",
|
"Content-Type": "application/json",
|
||||||
"to": f"smoke-test@{DOMAIN}",
|
"Accept": "application/json",
|
||||||
|
},
|
||||||
|
json={
|
||||||
|
"sender": {"name": "Monitoring", "email": f"monitoring@{DOMAIN}"},
|
||||||
|
"to": [{"email": f"smoke-test@{DOMAIN}"}],
|
||||||
"subject": subject,
|
"subject": subject,
|
||||||
"text": f"E2E email monitoring probe {marker}. Auto-generated, will be deleted.",
|
"textContent": f"E2E email monitoring probe {marker}. Auto-generated, will be deleted.",
|
||||||
},
|
},
|
||||||
timeout=30,
|
timeout=30,
|
||||||
)
|
)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
print(f"Sent test email via Mailgun: {resp.status_code} marker={marker}")
|
print(f"Sent test email via Brevo: {resp.status_code} marker={marker}")
|
||||||
|
|
||||||
# Step 2: Wait for delivery, retry IMAP up to 3 min
|
# Step 2: Wait for delivery, retry IMAP up to 3 min
|
||||||
ctx = ssl.create_default_context()
|
ctx = ssl.create_default_context()
|
||||||
|
|
@ -667,8 +671,8 @@ sys.exit(0 if success else 1)
|
||||||
EOT
|
EOT
|
||||||
]
|
]
|
||||||
env {
|
env {
|
||||||
name = "MAILGUN_API_KEY"
|
name = "BREVO_API_KEY"
|
||||||
value = var.mailgun_api_key
|
value = var.brevo_api_key
|
||||||
}
|
}
|
||||||
env {
|
env {
|
||||||
name = "EMAIL_MONITOR_IMAP_PASSWORD"
|
name = "EMAIL_MONITOR_IMAP_PASSWORD"
|
||||||
|
|
|
||||||
|
|
@ -114,19 +114,43 @@ resource "kubernetes_deployment" "meshcentral" {
|
||||||
image_pull_policy = "IfNotPresent"
|
image_pull_policy = "IfNotPresent"
|
||||||
command = ["/bin/sh"]
|
command = ["/bin/sh"]
|
||||||
args = ["-c", <<-EOT
|
args = ["-c", <<-EOT
|
||||||
if [ -f /opt/meshcentral/meshcentral-data/config.json ]; then
|
CONFIG=/opt/meshcentral/meshcentral-data/config.json
|
||||||
|
if [ -f "$CONFIG" ]; then
|
||||||
# Disable certUrl when using Traefik reverse proxy with TLS offload
|
# Disable certUrl when using Traefik reverse proxy with TLS offload
|
||||||
sed -i 's/"certUrl":/"_certUrl":/g' /opt/meshcentral/meshcentral-data/config.json
|
sed -i 's/"certUrl":/"_certUrl":/g' "$CONFIG"
|
||||||
|
|
||||||
# Fix WebRTC value from string to boolean
|
# Fix WebRTC value from string to boolean
|
||||||
sed -i 's/"WebRTC": "[^"]*"/"WebRTC": false/g' /opt/meshcentral/meshcentral-data/config.json
|
sed -i 's/"WebRTC": "[^"]*"/"WebRTC": false/g' "$CONFIG"
|
||||||
|
|
||||||
# Ensure TLSOffload is enabled (Traefik terminates TLS, MeshCentral serves HTTP on 443)
|
# Ensure TLSOffload is enabled (Traefik terminates TLS, MeshCentral serves HTTP on 443)
|
||||||
# Re-enable if previously disabled by restoring _TLSOffload back to TLSOffload
|
sed -i 's/"_TLSOffload":/"TLSOffload":/g' "$CONFIG"
|
||||||
sed -i 's/"_TLSOffload":/"TLSOffload":/g' /opt/meshcentral/meshcentral-data/config.json
|
sed -i 's/"TLSOffload": "[^"]*"/"TLSOffload": true/g' "$CONFIG"
|
||||||
# Set TLSOffload to true (accepts any reverse proxy)
|
sed -i 's/"TLSOffload": false/"TLSOffload": true/g' "$CONFIG"
|
||||||
sed -i 's/"TLSOffload": "[^"]*"/"TLSOffload": true/g' /opt/meshcentral/meshcentral-data/config.json
|
else
|
||||||
sed -i 's/"TLSOffload": false/"TLSOffload": true/g' /opt/meshcentral/meshcentral-data/config.json
|
# First run: create config from template before startup.sh runs, so REVERSE_PROXY
|
||||||
|
# env var doesn't generate a bad certUrl. Pre-seed with correct values.
|
||||||
|
cat > "$CONFIG" <<'CONF'
|
||||||
|
{
|
||||||
|
"$schema": "http://info.meshcentral.com/downloads/meshcentral-config-schema.json",
|
||||||
|
"settings": {
|
||||||
|
"cert": "meshcentral.viktorbarzin.me",
|
||||||
|
"_WANonly": true,
|
||||||
|
"_LANonly": true,
|
||||||
|
"port": 443,
|
||||||
|
"redirPort": 80,
|
||||||
|
"AgentPong": 300,
|
||||||
|
"TLSOffload": true,
|
||||||
|
"SelfUpdate": false,
|
||||||
|
"AllowFraming": false,
|
||||||
|
"WebRTC": false
|
||||||
|
},
|
||||||
|
"domains": {
|
||||||
|
"": {
|
||||||
|
"NewAccounts": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
CONF
|
||||||
fi
|
fi
|
||||||
EOT
|
EOT
|
||||||
]
|
]
|
||||||
|
|
@ -153,7 +177,7 @@ EOT
|
||||||
}
|
}
|
||||||
env {
|
env {
|
||||||
name = "REVERSE_PROXY"
|
name = "REVERSE_PROXY"
|
||||||
value = "true"
|
value = "false"
|
||||||
}
|
}
|
||||||
env {
|
env {
|
||||||
name = "ALLOW_NEW_ACCOUNTS"
|
name = "ALLOW_NEW_ACCOUNTS"
|
||||||
|
|
|
||||||
|
|
@ -1594,10 +1594,10 @@ serverFiles:
|
||||||
- alert: HighServiceLatency
|
- alert: HighServiceLatency
|
||||||
expr: |
|
expr: |
|
||||||
(
|
(
|
||||||
sum(rate(traefik_service_request_duration_seconds_sum{service!~".*idrac.*|.*headscale.*|.*authentik.*"}[5m])) by (service)
|
sum(rate(traefik_service_request_duration_seconds_sum{service!~".*idrac.*|.*headscale.*"}[5m])) by (service)
|
||||||
/ sum(rate(traefik_service_request_duration_seconds_count{service!~".*idrac.*|.*headscale.*|.*authentik.*"}[5m])) by (service)
|
/ sum(rate(traefik_service_request_duration_seconds_count{service!~".*idrac.*|.*headscale.*"}[5m])) by (service)
|
||||||
) > 10
|
) > 10
|
||||||
and sum(rate(traefik_service_request_duration_seconds_count{service!~".*idrac.*|.*headscale.*|.*authentik.*"}[5m])) by (service) > 0.01
|
and sum(rate(traefik_service_request_duration_seconds_count{service!~".*idrac.*|.*headscale.*"}[5m])) by (service) > 0.01
|
||||||
and on() (time() - process_start_time_seconds{job="prometheus"}) > 900
|
and on() (time() - process_start_time_seconds{job="prometheus"}) > 900
|
||||||
for: 5m
|
for: 5m
|
||||||
labels:
|
labels:
|
||||||
|
|
|
||||||
|
|
@ -30,12 +30,14 @@ nextcloud:
|
||||||
zzz-redis.config.php: |
|
zzz-redis.config.php: |
|
||||||
<?php
|
<?php
|
||||||
// Redis with Sentinel-based master discovery
|
// Redis with Sentinel-based master discovery
|
||||||
// Queries Sentinel at startup to find the current master, falls back to direct host
|
// Queries Sentinel to find the current master, falls back to HAProxy service
|
||||||
|
// which health-checks Redis nodes and routes only to the master.
|
||||||
$sentinels = [
|
$sentinels = [
|
||||||
['redis-node-0.redis-headless.redis.svc.cluster.local', 26379],
|
['redis-node-0.redis-headless.redis.svc.cluster.local', 26379],
|
||||||
['redis-node-1.redis-headless.redis.svc.cluster.local', 26379],
|
['redis-node-1.redis-headless.redis.svc.cluster.local', 26379],
|
||||||
];
|
];
|
||||||
$redisHost = 'redis-node-0.redis-headless.redis.svc.cluster.local';
|
// Fallback: HAProxy master-only service (safe even if Sentinel is unavailable)
|
||||||
|
$redisHost = 'redis-master.redis.svc.cluster.local';
|
||||||
$redisPort = 6379;
|
$redisPort = 6379;
|
||||||
foreach ($sentinels as [$sHost, $sPort]) {
|
foreach ($sentinels as [$sHost, $sPort]) {
|
||||||
try {
|
try {
|
||||||
|
|
@ -145,7 +147,7 @@ readinessProbe:
|
||||||
podAnnotations:
|
podAnnotations:
|
||||||
diun.enable: "true"
|
diun.enable: "true"
|
||||||
diun.include_tags: "^[0-9]+(?:.[0-9]+)?(?:.[0-9]+)?.*"
|
diun.include_tags: "^[0-9]+(?:.[0-9]+)?(?:.[0-9]+)?.*"
|
||||||
dependency.kyverno.io/wait-for: "mysql.dbaas:3306,redis.redis:6379"
|
dependency.kyverno.io/wait-for: "mysql.dbaas:3306,redis-master.redis:6379"
|
||||||
secret.reloader.stakater.com/reload: "nextcloud-db-creds"
|
secret.reloader.stakater.com/reload: "nextcloud-db-creds"
|
||||||
|
|
||||||
collabora:
|
collabora:
|
||||||
|
|
|
||||||
|
|
@ -154,11 +154,13 @@ resource "kubernetes_config_map" "haproxy" {
|
||||||
tcp-check send "PING\r\n"
|
tcp-check send "PING\r\n"
|
||||||
tcp-check expect string +PONG
|
tcp-check expect string +PONG
|
||||||
tcp-check send "INFO replication\r\n"
|
tcp-check send "INFO replication\r\n"
|
||||||
tcp-check expect string role:master
|
# Match "role:master" only — cannot appear in slave responses
|
||||||
|
# (slave has "role:slave" then "master_host:..." which doesn't match)
|
||||||
|
tcp-check expect rstring role:master
|
||||||
tcp-check send "QUIT\r\n"
|
tcp-check send "QUIT\r\n"
|
||||||
tcp-check expect string +OK
|
tcp-check expect string +OK
|
||||||
server redis-node-0 redis-node-0.redis-headless.redis.svc.cluster.local:6379 check inter 3s fall 3 rise 2
|
server redis-node-0 redis-node-0.redis-headless.redis.svc.cluster.local:6379 check inter 1s fall 2 rise 2
|
||||||
server redis-node-1 redis-node-1.redis-headless.redis.svc.cluster.local:6379 check inter 3s fall 3 rise 2
|
server redis-node-1 redis-node-1.redis-headless.redis.svc.cluster.local:6379 check inter 1s fall 2 rise 2
|
||||||
|
|
||||||
backend redis_sentinel
|
backend redis_sentinel
|
||||||
balance roundrobin
|
balance roundrobin
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue