infra/.claude/scripts/crowdsec-status.sh
Viktor Barzin ff83ec3325 add infrastructure agent team: 8 specialized agents + 14 diagnostic scripts
Agents: devops-engineer, dba, security-engineer, sre, network-engineer,
platform-engineer, observability-engineer, home-automation-engineer.
Scripts: deploy-status, db-health, backup-verify, tls-check, crowdsec-status,
authentik-audit, oom-investigator, resource-report, dns-check, network-health,
nfs-health, truenas-status, platform-status, monitoring-health.
Also: known-issues.md suppression list, cluster-health-checker port-forward fix.
2026-03-15 02:01:07 +00:00

166 lines
5.1 KiB
Bash
Executable file

#!/usr/bin/env bash
set -euo pipefail
KUBECTL="kubectl --kubeconfig /Users/viktorbarzin/code/infra/config"
AGENT="crowdsec-status"
DRY_RUN=false
for arg in "$@"; do
case "$arg" in
--dry-run) DRY_RUN=true ;;
esac
done
checks=()
add_check() {
local name="$1" status="$2" message="$3"
checks+=("{\"name\": \"$name\", \"status\": \"$status\", \"message\": \"$message\"}")
}
find_crowdsec_namespace() {
$KUBECTL get pods -A -l app.kubernetes.io/name=crowdsec --no-headers 2>/dev/null | head -1 | awk '{print $1}' || \
$KUBECTL get pods -A --no-headers 2>/dev/null | grep -i crowdsec | head -1 | awk '{print $1}' || \
echo "crowdsec"
}
check_lapi_health() {
if $DRY_RUN; then
add_check "crowdsec-lapi" "ok" "dry-run: would check CrowdSec LAPI pod health"
return
fi
local ns
ns=$(find_crowdsec_namespace)
local lapi_pod
lapi_pod=$($KUBECTL get pods -n "$ns" -l app.kubernetes.io/name=crowdsec,app.kubernetes.io/component=lapi --no-headers 2>/dev/null | head -1) || true
if [ -z "$lapi_pod" ]; then
lapi_pod=$($KUBECTL get pods -n "$ns" --no-headers 2>/dev/null | grep -i "crowdsec.*lapi" | head -1) || true
fi
if [ -z "$lapi_pod" ]; then
add_check "crowdsec-lapi" "fail" "No CrowdSec LAPI pod found in namespace ${ns}"
return
fi
local pod_name status
pod_name=$(echo "$lapi_pod" | awk '{print $1}')
status=$(echo "$lapi_pod" | awk '{print $3}')
if [ "$status" != "Running" ]; then
add_check "crowdsec-lapi" "fail" "LAPI pod ${pod_name} is ${status}"
return
fi
add_check "crowdsec-lapi" "ok" "LAPI pod ${pod_name} is Running"
}
check_cscli_metrics() {
if $DRY_RUN; then
add_check "crowdsec-metrics" "ok" "dry-run: would run cscli metrics via kubectl exec"
return
fi
local ns
ns=$(find_crowdsec_namespace)
local lapi_pod
lapi_pod=$($KUBECTL get pods -n "$ns" -l app.kubernetes.io/name=crowdsec,app.kubernetes.io/component=lapi -o jsonpath='{.items[0].metadata.name}' 2>/dev/null) || \
lapi_pod=$($KUBECTL get pods -n "$ns" --no-headers 2>/dev/null | grep -i "crowdsec.*lapi" | head -1 | awk '{print $1}') || true
if [ -z "$lapi_pod" ]; then
add_check "crowdsec-metrics" "warn" "No LAPI pod found to run cscli metrics"
return
fi
local metrics_output
metrics_output=$($KUBECTL exec -n "$ns" "$lapi_pod" -- cscli metrics 2>/dev/null) || {
add_check "crowdsec-metrics" "warn" "Failed to run cscli metrics on ${lapi_pod}"
return
}
add_check "crowdsec-metrics" "ok" "cscli metrics returned successfully"
}
check_decisions() {
if $DRY_RUN; then
add_check "crowdsec-decisions" "ok" "dry-run: would check cscli decisions list"
return
fi
local ns
ns=$(find_crowdsec_namespace)
local lapi_pod
lapi_pod=$($KUBECTL get pods -n "$ns" -l app.kubernetes.io/name=crowdsec,app.kubernetes.io/component=lapi -o jsonpath='{.items[0].metadata.name}' 2>/dev/null) || \
lapi_pod=$($KUBECTL get pods -n "$ns" --no-headers 2>/dev/null | grep -i "crowdsec.*lapi" | head -1 | awk '{print $1}') || true
if [ -z "$lapi_pod" ]; then
add_check "crowdsec-decisions" "warn" "No LAPI pod found to check decisions"
return
fi
local decisions
decisions=$($KUBECTL exec -n "$ns" "$lapi_pod" -- cscli decisions list -o json 2>/dev/null) || {
add_check "crowdsec-decisions" "ok" "No active decisions (or failed to query)"
return
}
local count
count=$(echo "$decisions" | jq 'if type == "array" then length else 0 end' 2>/dev/null || echo "0")
if [ "$count" -gt 0 ]; then
add_check "crowdsec-decisions" "ok" "${count} active decision(s)"
else
add_check "crowdsec-decisions" "ok" "No active decisions"
fi
}
check_agent_daemonset() {
if $DRY_RUN; then
add_check "crowdsec-agents" "ok" "dry-run: would check CrowdSec agent DaemonSet"
return
fi
local ns
ns=$(find_crowdsec_namespace)
local ds_json
ds_json=$($KUBECTL get daemonset -n "$ns" -l app.kubernetes.io/name=crowdsec -o json 2>/dev/null) || {
# Fallback: search by name
ds_json=$($KUBECTL get daemonset -n "$ns" -o json 2>/dev/null | jq '{items: [.items[] | select(.metadata.name | test("crowdsec"))]}') || {
add_check "crowdsec-agents" "warn" "No CrowdSec DaemonSet found"
return
}
}
local desired ready
desired=$(echo "$ds_json" | jq '[.items[].status.desiredNumberScheduled] | add // 0' 2>/dev/null || echo "0")
ready=$(echo "$ds_json" | jq '[.items[].status.numberReady] | add // 0' 2>/dev/null || echo "0")
if [ "$ready" -lt "$desired" ]; then
add_check "crowdsec-agents" "warn" "CrowdSec agents: ${ready}/${desired} ready"
elif [ "$desired" -eq 0 ]; then
add_check "crowdsec-agents" "warn" "No CrowdSec agent DaemonSet pods scheduled"
else
add_check "crowdsec-agents" "ok" "CrowdSec agents: ${ready}/${desired} ready"
fi
}
check_lapi_health
check_cscli_metrics
check_decisions
check_agent_daemonset
# Output JSON
overall="ok"
for c in "${checks[@]}"; do
s=$(echo "$c" | jq -r '.status')
if [ "$s" = "fail" ]; then overall="fail"; break; fi
if [ "$s" = "warn" ]; then overall="warn"; fi
done
printf '{"status": "%s", "agent": "%s", "checks": [%s]}\n' \
"$overall" "$AGENT" "$(IFS=,; echo "${checks[*]}")"