Add Uptime Kuma monitor check to cluster health script [ci skip]
Adds check #14 that queries Uptime Kuma API for application-level monitor status, complementing the kubectl-level checks with HTTP/ping health data. Reports down monitors by name with PASS/WARN/FAIL thresholds.
This commit is contained in:
parent
719e3c6244
commit
2bae6ccce3
1 changed files with 90 additions and 3 deletions
|
|
@ -1,7 +1,7 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# Cluster health check script.
|
||||
# Runs 13 diagnostic checks against the Kubernetes cluster and prints
|
||||
# Runs 14 diagnostic checks against the Kubernetes cluster and prints
|
||||
# a colour-coded report with PASS / WARN / FAIL for each section.
|
||||
#
|
||||
# Usage: ./scripts/cluster_healthcheck.sh [--fix] [--quiet|-q] [--json] [--kubeconfig <path>]
|
||||
|
|
@ -38,14 +38,14 @@ section() {
|
|||
[[ "$JSON" == true ]] && return 0
|
||||
[[ "$QUIET" == true ]] && return 0
|
||||
echo ""
|
||||
echo -e "${BOLD}[$num/13] $title${NC}"
|
||||
echo -e "${BOLD}[$num/14] $title${NC}"
|
||||
}
|
||||
|
||||
section_always() {
|
||||
local num="$1" title="$2"
|
||||
[[ "$JSON" == true ]] && return 0
|
||||
echo ""
|
||||
echo -e "${BOLD}[$num/13] $title${NC}"
|
||||
echo -e "${BOLD}[$num/14] $title${NC}"
|
||||
}
|
||||
|
||||
json_add() {
|
||||
|
|
@ -579,6 +579,92 @@ except:
|
|||
fi
|
||||
}
|
||||
|
||||
# --- 14. Uptime Kuma ---
|
||||
check_uptime_kuma() {
|
||||
section 14 "Uptime Kuma Monitors"
|
||||
local result
|
||||
|
||||
result=$(~/.venvs/claude/bin/python3 -c '
|
||||
import sys
|
||||
try:
|
||||
from uptime_kuma_api import UptimeKumaApi
|
||||
except ImportError:
|
||||
print("ERROR:uptime-kuma-api not installed")
|
||||
sys.exit(0)
|
||||
|
||||
try:
|
||||
api = UptimeKumaApi("https://uptime.viktorbarzin.me")
|
||||
api.login("admin", "EUxhLr4w4NFsGehy")
|
||||
|
||||
monitors = api.get_monitors()
|
||||
down = []
|
||||
up_count = 0
|
||||
paused_count = 0
|
||||
|
||||
for m in monitors:
|
||||
name = m.get("name", "unknown")
|
||||
active = m.get("active", True)
|
||||
if not active:
|
||||
paused_count += 1
|
||||
continue
|
||||
# Check heartbeat list for latest status
|
||||
try:
|
||||
hb = api.get_monitor_beats(m["id"], 1)
|
||||
if hb and len(hb) > 0:
|
||||
status = hb[-1].get("status", 0)
|
||||
else:
|
||||
status = m.get("status", 0)
|
||||
except Exception:
|
||||
status = m.get("status", 0)
|
||||
# status: 0=DOWN, 1=UP, 2=PENDING, 3=MAINTENANCE
|
||||
if status == 1:
|
||||
up_count += 1
|
||||
elif status == 3:
|
||||
paused_count += 1
|
||||
else:
|
||||
down.append(name)
|
||||
|
||||
api.disconnect()
|
||||
|
||||
down_count = len(down)
|
||||
total_active = up_count + down_count
|
||||
down_names = ", ".join(down) if down else ""
|
||||
print(f"{down_count}:{up_count}:{paused_count}:{total_active}:{down_names}")
|
||||
except Exception as e:
|
||||
print(f"CONN_ERROR:{e}")
|
||||
' 2>/dev/null) || result="CONN_ERROR:python execution failed"
|
||||
|
||||
if [[ "$result" == "ERROR:"* ]]; then
|
||||
[[ "$QUIET" == true ]] && section_always 14 "Uptime Kuma Monitors"
|
||||
warn "Uptime Kuma: ${result#ERROR:}"
|
||||
json_add "uptime_kuma" "WARN" "${result#ERROR:}"
|
||||
elif [[ "$result" == "CONN_ERROR:"* ]]; then
|
||||
[[ "$QUIET" == true ]] && section_always 14 "Uptime Kuma Monitors"
|
||||
warn "Cannot connect to Uptime Kuma: ${result#CONN_ERROR:}"
|
||||
json_add "uptime_kuma" "WARN" "Connection failed"
|
||||
else
|
||||
local down_count up_count paused_count total_active down_names
|
||||
down_count=$(echo "$result" | cut -d: -f1)
|
||||
up_count=$(echo "$result" | cut -d: -f2)
|
||||
paused_count=$(echo "$result" | cut -d: -f3)
|
||||
total_active=$(echo "$result" | cut -d: -f4)
|
||||
down_names=$(echo "$result" | cut -d: -f5-)
|
||||
|
||||
if [[ "$down_count" -eq 0 ]]; then
|
||||
pass "All $total_active active monitors up ($paused_count paused)"
|
||||
json_add "uptime_kuma" "PASS" "$total_active up, $paused_count paused"
|
||||
elif [[ "$down_count" -le 3 ]]; then
|
||||
[[ "$QUIET" == true ]] && section_always 14 "Uptime Kuma Monitors"
|
||||
warn "$down_count/$total_active monitor(s) down: $down_names"
|
||||
json_add "uptime_kuma" "WARN" "$down_count down: $down_names"
|
||||
else
|
||||
[[ "$QUIET" == true ]] && section_always 14 "Uptime Kuma Monitors"
|
||||
fail "$down_count/$total_active monitors down: $down_names"
|
||||
json_add "uptime_kuma" "FAIL" "$down_count down: $down_names"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
# --- Summary ---
|
||||
print_summary() {
|
||||
if [[ "$JSON" == true ]]; then
|
||||
|
|
@ -644,6 +730,7 @@ main() {
|
|||
check_crowdsec
|
||||
check_ingresses
|
||||
check_alerts
|
||||
check_uptime_kuma
|
||||
print_summary
|
||||
|
||||
# Exit code: 2 for failures, 1 for warnings, 0 for clean
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue