uptime-kuma: codify Traefik LB internal monitor at .203 (was stale .200)
A hand-created (non-TF) uptime-kuma monitor "Traefik LoadBalancer" (id=95) port-checked 10.0.20.200:443 — the shared LB IP Traefik moved OFF on 2026-05-30 when it took its dedicated .203 (ETP=Local). It had been DOWN for ~5 days, surfacing as the cluster-health "uptime_kuma internal down(1)" WARN. Add it to local.internal_monitors as "Traefik LoadBalancer (10.0.20.203)" (port 10.0.20.203:443) so it's managed like the TP-Link/Proxmox direct-IP probes — a direct check of the MetalLB L2 + Traefik bind, complementing the [External] traefik (full CF path) and Traefik Dashboard (in-cluster) monitors. The sync CronJob created it (id=902, reporting UP @1ms); the orphan id=95 was deleted via the uptime-kuma API.
This commit is contained in:
parent
011c63c92d
commit
4ed0c5a834
1 changed files with 27 additions and 4 deletions
|
|
@ -26,7 +26,7 @@ resource "kubernetes_namespace" "uptime-kuma" {
|
||||||
metadata {
|
metadata {
|
||||||
name = "uptime-kuma"
|
name = "uptime-kuma"
|
||||||
labels = {
|
labels = {
|
||||||
tier = var.tier
|
tier = var.tier
|
||||||
"keel.sh/enrolled" = "true"
|
"keel.sh/enrolled" = "true"
|
||||||
}
|
}
|
||||||
# labels = {
|
# labels = {
|
||||||
|
|
@ -192,12 +192,12 @@ resource "kubernetes_deployment" "uptime-kuma" {
|
||||||
# as `never` so a Kyverno reconcile (or manual kubectl) can't flip it
|
# as `never` so a Kyverno reconcile (or manual kubectl) can't flip it
|
||||||
# back to `force` and re-enable auto-updates.
|
# back to `force` and re-enable auto-updates.
|
||||||
metadata[0].annotations["keel.sh/trigger"],
|
metadata[0].annotations["keel.sh/trigger"],
|
||||||
metadata[0].annotations["keel.sh/pollSchedule"], # KYVERNO_LIFECYCLE_V2
|
metadata[0].annotations["keel.sh/pollSchedule"], # KYVERNO_LIFECYCLE_V2
|
||||||
spec[0].template[0].spec[0].container[0].image, # KEEL_IGNORE_IMAGE — Keel manages tag updates
|
spec[0].template[0].spec[0].container[0].image, # KEEL_IGNORE_IMAGE — Keel manages tag updates
|
||||||
metadata[0].annotations["kubernetes.io/change-cause"],
|
metadata[0].annotations["kubernetes.io/change-cause"],
|
||||||
metadata[0].annotations["deployment.kubernetes.io/revision"],
|
metadata[0].annotations["deployment.kubernetes.io/revision"],
|
||||||
spec[0].template[0].metadata[0].annotations["keel.sh/update-time"], # KEEL_LIFECYCLE_V1
|
spec[0].template[0].metadata[0].annotations["keel.sh/update-time"], # KEEL_LIFECYCLE_V1
|
||||||
metadata[0].annotations["keel.sh/match-tag"], # injected by Kyverno
|
metadata[0].annotations["keel.sh/match-tag"], # injected by Kyverno
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -699,6 +699,29 @@ locals {
|
||||||
retry_interval = 60
|
retry_interval = 60
|
||||||
max_retries = 2
|
max_retries = 2
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# Direct port probe of the Traefik MetalLB LB IP. Complements the
|
||||||
|
# `[External] traefik` HTTPS monitor (full DNS→CF→tunnel path) and the
|
||||||
|
# in-cluster `Traefik Dashboard` monitor: this one checks the dedicated
|
||||||
|
# LB IP + :443 bind directly, so a MetalLB L2 / Traefik-bind failure is
|
||||||
|
# distinguishable from a Cloudflare/tunnel outage. The IP is .203 (the
|
||||||
|
# DEDICATED Traefik LB, ETP=Local) — NOT the shared .200, which Traefik
|
||||||
|
# moved off on 2026-05-30. Replaces a hand-created monitor that still
|
||||||
|
# pointed at the dead .200:443. Keep this IP in sync with the Traefik LB
|
||||||
|
# in `docs/architecture/networking.md`.
|
||||||
|
name = "Traefik LoadBalancer (10.0.20.203)"
|
||||||
|
type = "port"
|
||||||
|
database_connection_string = null
|
||||||
|
database_password_vault_key = null
|
||||||
|
hostname = "10.0.20.203"
|
||||||
|
port = 443
|
||||||
|
url = null
|
||||||
|
accepted_statuscodes = null
|
||||||
|
ignore_tls = null
|
||||||
|
interval = 60
|
||||||
|
retry_interval = 30
|
||||||
|
max_retries = 3
|
||||||
|
},
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue