fix: update healthcheck to report internal and external monitors separately

- Increase Uptime Kuma API timeout to 120s with wait_events=0.2
- Remove hardcoded password, use Vault or UPTIME_KUMA_PASSWORD env var
- Report internal and external monitor status separately
- Install uptime-kuma-api in local venv

[ci skip]

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Viktor Barzin 2026-04-14 19:44:20 +00:00
parent 26cc6fdf2f
commit 24a23709a5
2 changed files with 126 additions and 81 deletions

View file

@ -637,56 +637,60 @@ except ImportError:
sys.exit(0) sys.exit(0)
try: try:
api = UptimeKumaApi("https://uptime.viktorbarzin.me", timeout=30)
password = os.environ.get("UPTIME_KUMA_PASSWORD", "") password = os.environ.get("UPTIME_KUMA_PASSWORD", "")
if not password: if not password:
print("ERROR:UPTIME_KUMA_PASSWORD not set") print("ERROR:UPTIME_KUMA_PASSWORD not set")
sys.exit(0) sys.exit(0)
api = UptimeKumaApi("https://uptime.viktorbarzin.me", timeout=120, wait_events=0.2)
api.login("admin", password) api.login("admin", password)
monitors = api.get_monitors() monitors = api.get_monitors()
# Build id->name map and track active/paused heartbeats = api.get_heartbeats()
id_to_name = {}
internal_up = 0
internal_down = []
external_up = 0
external_down = []
paused_count = 0 paused_count = 0
for m in monitors: for m in monitors:
mid = m.get("id") mid = m.get("id")
name = m.get("name", "unknown") name = m.get("name", "unknown")
active = m.get("active", True) active = m.get("active", True)
is_external = name.startswith("[External] ")
if not active: if not active:
paused_count += 1 paused_count += 1
else: continue
id_to_name[mid] = name
# Use bulk heartbeat fetch (single API call) instead of per-monitor calls
heartbeats = api.get_heartbeats()
down = []
up_count = 0
for mid, name in id_to_name.items():
beats = heartbeats.get(mid, []) beats = heartbeats.get(mid, [])
if beats: if beats:
last_beat = beats[-1] last_beat = beats[-1]
# Handle nested lists (some monitors return list of lists)
if isinstance(last_beat, list): if isinstance(last_beat, list):
last_beat = last_beat[-1] if last_beat else {} last_beat = last_beat[-1] if last_beat else {}
status = last_beat.get("status", 0) if isinstance(last_beat, dict) else 0 status = last_beat.get("status", 0) if isinstance(last_beat, dict) else 0
# Handle both enum and int (MonitorStatus.UP == 1) if hasattr(status, "value"):
if status == 1: status = status.value
up_count += 1 is_up = (status == 1)
elif status == 3:
paused_count += 1
else:
down.append(name)
else: else:
# No heartbeats = unknown, treat as down is_up = False
down.append(name)
if is_external:
if is_up:
external_up += 1
else:
external_down.append(name.replace("[External] ", ""))
else:
if is_up:
internal_up += 1
else:
internal_down.append(name)
api.disconnect() api.disconnect()
down_count = len(down) int_down_names = ", ".join(internal_down) if internal_down else ""
total_active = up_count + down_count ext_down_names = ", ".join(external_down) if external_down else ""
down_names = ", ".join(down) if down else "" print(f"{len(internal_down)}:{internal_up}:{len(external_down)}:{external_up}:{paused_count}:{int_down_names}|{ext_down_names}")
print(f"{down_count}:{up_count}:{paused_count}:{total_active}:{down_names}")
except Exception as e: except Exception as e:
print(f"CONN_ERROR:{e}") print(f"CONN_ERROR:{e}")
' 2>/dev/null) || result="CONN_ERROR:python execution failed" ' 2>/dev/null) || result="CONN_ERROR:python execution failed"
@ -700,24 +704,35 @@ except Exception as e:
warn "Cannot connect to Uptime Kuma: ${result#CONN_ERROR:}" warn "Cannot connect to Uptime Kuma: ${result#CONN_ERROR:}"
json_add "uptime_kuma" "WARN" "Connection failed" json_add "uptime_kuma" "WARN" "Connection failed"
else else
local down_count up_count paused_count total_active down_names local int_down int_up ext_down ext_up paused_count down_details
down_count=$(echo "$result" | cut -d: -f1) int_down=$(echo "$result" | cut -d: -f1)
up_count=$(echo "$result" | cut -d: -f2) int_up=$(echo "$result" | cut -d: -f2)
paused_count=$(echo "$result" | cut -d: -f3) ext_down=$(echo "$result" | cut -d: -f3)
total_active=$(echo "$result" | cut -d: -f4) ext_up=$(echo "$result" | cut -d: -f4)
down_names=$(echo "$result" | cut -d: -f5-) paused_count=$(echo "$result" | cut -d: -f5)
down_details=$(echo "$result" | cut -d: -f6-)
local int_down_names="${down_details%%|*}"
local ext_down_names="${down_details#*|}"
if [[ "$down_count" -eq 0 ]]; then local total_down=$((int_down + ext_down))
pass "All $total_active active monitors up ($paused_count paused)" local total_up=$((int_up + ext_up))
json_add "uptime_kuma" "PASS" "$total_active up, $paused_count paused" local total_active=$((total_up + total_down))
elif [[ "$down_count" -le 3 ]]; then
[[ "$QUIET" == true ]] && section_always 14 "Uptime Kuma Monitors" if [[ "$total_down" -eq 0 ]]; then
warn "$down_count/$total_active monitor(s) down: $down_names" pass "All monitors up — internal: ${int_up}, external: ${ext_up} ($paused_count paused)"
json_add "uptime_kuma" "WARN" "$down_count down: $down_names" json_add "uptime_kuma" "PASS" "internal: $int_up up, external: $ext_up up, $paused_count paused"
else else
[[ "$QUIET" == true ]] && section_always 14 "Uptime Kuma Monitors" [[ "$QUIET" == true ]] && section_always 14 "Uptime Kuma Monitors"
fail "$down_count/$total_active monitors down: $down_names" local details=""
json_add "uptime_kuma" "FAIL" "$down_count down: $down_names" [[ "$int_down" -gt 0 ]] && details="internal down($int_down): $int_down_names"
[[ "$ext_down" -gt 0 ]] && { [[ -n "$details" ]] && details="$details; "; details="${details}external down($ext_down): $ext_down_names"; }
if [[ "$total_down" -le 3 ]]; then
warn "$total_down/$total_active down: $details"
json_add "uptime_kuma" "WARN" "$details"
else
fail "$total_down/$total_active down: $details"
json_add "uptime_kuma" "FAIL" "$details"
fi
fi fi
fi fi
} }

View file

@ -585,8 +585,19 @@ check_uptime_kuma() {
section 14 "Uptime Kuma Monitors" section 14 "Uptime Kuma Monitors"
local result local result
result=$(~/.venvs/claude/bin/python3 -c ' # Get password from Vault (or env var fallback)
import sys local uk_pass="${UPTIME_KUMA_PASSWORD:-}"
if [[ -z "$uk_pass" ]]; then
uk_pass=$(vault kv get -field=uptime_kuma_admin_password secret/viktor 2>/dev/null) || true
fi
if [[ -z "$uk_pass" ]]; then
warn "Uptime Kuma: password not available (set UPTIME_KUMA_PASSWORD or vault login)"
json_add "uptime_kuma" "WARN" "password not available"
return 0
fi
result=$(UPTIME_KUMA_PASSWORD="$uk_pass" ~/.venvs/claude/bin/python3 -c '
import sys, os
try: try:
from uptime_kuma_api import UptimeKumaApi from uptime_kuma_api import UptimeKumaApi
except ImportError: except ImportError:
@ -594,50 +605,58 @@ except ImportError:
sys.exit(0) sys.exit(0)
try: try:
api = UptimeKumaApi("https://uptime.viktorbarzin.me", timeout=30) api = UptimeKumaApi("https://uptime.viktorbarzin.me", timeout=120, wait_events=0.2)
api.login("admin", "EUxhLr4w4NFsGehy") api.login("admin", os.environ["UPTIME_KUMA_PASSWORD"])
monitors = api.get_monitors() monitors = api.get_monitors()
# Build id->name map and track active/paused heartbeats = api.get_heartbeats()
id_to_name = {}
# Separate internal and external monitors
internal_up = 0
internal_down = []
external_up = 0
external_down = []
paused_count = 0 paused_count = 0
for m in monitors: for m in monitors:
mid = m.get("id") mid = m.get("id")
name = m.get("name", "unknown") name = m.get("name", "unknown")
active = m.get("active", True) active = m.get("active", True)
is_external = name.startswith("[External] ")
if not active: if not active:
paused_count += 1 paused_count += 1
else: continue
id_to_name[mid] = name
# Use bulk heartbeat fetch (single API call) instead of per-monitor calls
heartbeats = api.get_heartbeats()
down = []
up_count = 0
for mid, name in id_to_name.items():
beats = heartbeats.get(mid, []) beats = heartbeats.get(mid, [])
if beats: if beats:
last_beat = beats[-1] last_beat = beats[-1]
# Handle nested lists (some monitors return list of lists)
if isinstance(last_beat, list): if isinstance(last_beat, list):
last_beat = last_beat[-1] if last_beat else {} last_beat = last_beat[-1] if last_beat else {}
status = last_beat.get("status", 0) if isinstance(last_beat, dict) else 0 status = last_beat.get("status", 0) if isinstance(last_beat, dict) else 0
if status == 1: if hasattr(status, "value"):
up_count += 1 status = status.value
elif status == 3: is_up = (status == 1)
paused_count += 1
else:
down.append(name)
else: else:
down.append(name) is_up = False
if is_external:
if is_up:
external_up += 1
else:
external_down.append(name.replace("[External] ", ""))
else:
if is_up:
internal_up += 1
else:
internal_down.append(name)
api.disconnect() api.disconnect()
down_count = len(down) int_down_names = ", ".join(internal_down) if internal_down else ""
total_active = up_count + down_count ext_down_names = ", ".join(external_down) if external_down else ""
down_names = ", ".join(down) if down else "" # Format: int_down:int_up:ext_down:ext_up:paused:int_down_names|ext_down_names
print(f"{down_count}:{up_count}:{paused_count}:{total_active}:{down_names}") print(f"{len(internal_down)}:{internal_up}:{len(external_down)}:{external_up}:{paused_count}:{int_down_names}|{ext_down_names}")
except Exception as e: except Exception as e:
print(f"CONN_ERROR:{e}") print(f"CONN_ERROR:{e}")
' 2>/dev/null) || result="CONN_ERROR:python execution failed" ' 2>/dev/null) || result="CONN_ERROR:python execution failed"
@ -651,24 +670,35 @@ except Exception as e:
warn "Cannot connect to Uptime Kuma: ${result#CONN_ERROR:}" warn "Cannot connect to Uptime Kuma: ${result#CONN_ERROR:}"
json_add "uptime_kuma" "WARN" "Connection failed" json_add "uptime_kuma" "WARN" "Connection failed"
else else
local down_count up_count paused_count total_active down_names local int_down int_up ext_down ext_up paused_count down_details
down_count=$(echo "$result" | cut -d: -f1) int_down=$(echo "$result" | cut -d: -f1)
up_count=$(echo "$result" | cut -d: -f2) int_up=$(echo "$result" | cut -d: -f2)
paused_count=$(echo "$result" | cut -d: -f3) ext_down=$(echo "$result" | cut -d: -f3)
total_active=$(echo "$result" | cut -d: -f4) ext_up=$(echo "$result" | cut -d: -f4)
down_names=$(echo "$result" | cut -d: -f5-) paused_count=$(echo "$result" | cut -d: -f5)
down_details=$(echo "$result" | cut -d: -f6-)
local int_down_names="${down_details%%|*}"
local ext_down_names="${down_details#*|}"
if [[ "$down_count" -eq 0 ]]; then local total_down=$((int_down + ext_down))
pass "All $total_active active monitors up ($paused_count paused)" local total_up=$((int_up + ext_up))
json_add "uptime_kuma" "PASS" "$total_active up, $paused_count paused" local total_active=$((total_up + total_down))
elif [[ "$down_count" -le 3 ]]; then
[[ "$QUIET" == true ]] && section_always 14 "Uptime Kuma Monitors" if [[ "$total_down" -eq 0 ]]; then
warn "$down_count/$total_active monitor(s) down: $down_names" pass "All monitors up — internal: ${int_up}, external: ${ext_up} ($paused_count paused)"
json_add "uptime_kuma" "WARN" "$down_count down: $down_names" json_add "uptime_kuma" "PASS" "internal: $int_up up, external: $ext_up up, $paused_count paused"
else else
[[ "$QUIET" == true ]] && section_always 14 "Uptime Kuma Monitors" [[ "$QUIET" == true ]] && section_always 14 "Uptime Kuma Monitors"
fail "$down_count/$total_active monitors down: $down_names" local details=""
json_add "uptime_kuma" "FAIL" "$down_count down: $down_names" [[ "$int_down" -gt 0 ]] && details="internal down($int_down): $int_down_names"
[[ "$ext_down" -gt 0 ]] && { [[ -n "$details" ]] && details="$details; "; details="${details}external down($ext_down): $ext_down_names"; }
if [[ "$total_down" -le 3 ]]; then
warn "$total_down/$total_active down: $details"
json_add "uptime_kuma" "WARN" "$details"
else
fail "$total_down/$total_active down: $details"
json_add "uptime_kuma" "FAIL" "$details"
fi
fi fi
fi fi
} }