# On-demand lifecycle: the emulator scales to ZERO when idle and wakes on # visit. The gate (tiny stdlib-python HTTP server) owns `/` on both emulator # hostnames — it scales the deployment up and hands the browser to noVNC once # ready; agents use GET /wake + /status. The idle CronJob scales back to zero # after ~1h with no adb/VNC connections. Decision: Viktor 2026-06-12 — # dev-only usage, and an always-on GPU emulator would permanently hold T4 # VRAM that the LLM jobs need. resource "kubernetes_service_account" "gate" { metadata { name = "android-emulator-gate" namespace = kubernetes_namespace.android-emulator.metadata[0].name } } resource "kubernetes_role" "gate" { metadata { name = "android-emulator-gate" namespace = kubernetes_namespace.android-emulator.metadata[0].name } rule { api_groups = ["apps"] resources = ["deployments"] resource_names = ["android-emulator"] verbs = ["get", "patch"] } rule { api_groups = [""] resources = ["pods"] verbs = ["get", "list"] } rule { api_groups = [""] resources = ["pods/exec"] verbs = ["create"] } } resource "kubernetes_role_binding" "gate" { metadata { name = "android-emulator-gate" namespace = kubernetes_namespace.android-emulator.metadata[0].name } role_ref { api_group = "rbac.authorization.k8s.io" kind = "Role" name = kubernetes_role.gate.metadata[0].name } subject { kind = "ServiceAccount" name = kubernetes_service_account.gate.metadata[0].name namespace = kubernetes_namespace.android-emulator.metadata[0].name } } resource "kubernetes_config_map" "gate" { metadata { name = "android-emulator-gate" namespace = kubernetes_namespace.android-emulator.metadata[0].name } data = { "gate.py" = file("${path.module}/gate.py") } } resource "kubernetes_deployment" "gate" { metadata { name = "android-emulator-gate" namespace = kubernetes_namespace.android-emulator.metadata[0].name labels = { app = "android-emulator-gate" } } spec { replicas = 1 selector { match_labels = { app = "android-emulator-gate" } } template { metadata { labels = { app = "android-emulator-gate" } annotations = { "checksum/gate" = sha1(file("${path.module}/gate.py")) } } spec { service_account_name = kubernetes_service_account.gate.metadata[0].name container { name = "gate" image = "python:3.12-alpine" command = ["python", "/app/gate.py"] env { name = "NAMESPACE" value = kubernetes_namespace.android-emulator.metadata[0].name } env { name = "DEPLOYMENT" value = "android-emulator" } port { container_port = 8080 } volume_mount { name = "app" mount_path = "/app" } resources { requests = { cpu = "10m" memory = "64Mi" } limits = { memory = "64Mi" } } readiness_probe { http_get { path = "/healthz" port = 8080 } period_seconds = 10 } } volume { name = "app" config_map { name = kubernetes_config_map.gate.metadata[0].name } } } } } lifecycle { ignore_changes = [spec[0].template[0].spec[0].dns_config] # KYVERNO_LIFECYCLE_V1 } } resource "kubernetes_service" "gate" { metadata { name = "android-emulator-gate" namespace = kubernetes_namespace.android-emulator.metadata[0].name } spec { selector = { app = "android-emulator-gate" } port { name = "http" port = 80 target_port = 8080 } } } # Sleep side: every 15 min, ask the emulator how long since it was actually # USED — dumpsys power's last user-activity time (taps/keys/app-launches, # including noVNC clicks) vs guest uptime. No activity for 6h → scale the # deployment to zero. This deliberately IGNORES open adb/noVNC connections: # a forgotten adb transport (connect with no disconnect) stays ESTABLISHED # forever, so the old connection-count check kept resetting and the emulator # never slept (up 6+ days while idle ~5). Reads activity via `kubectl exec` # (the SA has pods/exec) and scales down with a direct replicas patch on the # named deployment — the SAME path the wake gate scales UP — so it needs only # the existing `deployments` patch grant, NOT `deployments/scale` (which the # SA lacks; the old `kubectl scale` here failed Forbidden). Stateless: no # idle-counter annotation. Fail-safe: any read error → do NOT sleep. resource "kubernetes_cron_job_v1" "idle_sleeper" { metadata { name = "android-emulator-idle-sleeper" namespace = kubernetes_namespace.android-emulator.metadata[0].name } spec { schedule = "*/15 * * * *" concurrency_policy = "Forbid" successful_jobs_history_limit = 1 failed_jobs_history_limit = 2 job_template { metadata {} spec { backoff_limit = 0 ttl_seconds_after_finished = 3600 template { metadata {} spec { service_account_name = kubernetes_service_account.gate.metadata[0].name restart_policy = "Never" container { name = "sleeper" image = "bitnami/kubectl:latest" command = ["/bin/bash", "-c"] args = [<<-EOT set -eu NS=android-emulator DEPLOY=android-emulator IDLE_LIMIT_SECONDS=21600 # 6h with no user activity -> sleep spec=$(kubectl -n $NS get deploy $DEPLOY -o jsonpath='{.spec.replicas}') [ "$spec" = "0" ] && { echo "already asleep"; exit 0; } pod=$(kubectl -n $NS get pods -l app=$DEPLOY --field-selector=status.phase=Running -o jsonpath='{.items[0].metadata.name}') [ -z "$pod" ] && { echo "no running pod (booting?) — not sleeping"; exit 0; } # How long since the emulator was actually used? Compare the last # user-activity time from dumpsys power (taps/keys/app-launches, # incl. noVNC clicks) with current guest uptime, both in ms on # the guest uptime clock. Capture first, then parse: NO pipefail # and no early `exit` in awk, so a streaming `dumpsys` can't # SIGPIPE the exec and trip set -e (that bug made every run die # 141 with no output). Fail-safe: a still-booting emulator (adb # not ready) yields empty values -> do NOT sleep. uptime_raw=$(kubectl -n $NS exec $pod -- adb shell cat /proc/uptime 2>/dev/null || true) dump=$(kubectl -n $NS exec $pod -- adb shell dumpsys power 2>/dev/null || true) uptime_ms=$(printf '%s' "$uptime_raw" | awk '{printf "%d", $1*1000}') last_ms=$(printf '%s' "$dump" | awk -F= '/mLastUserActivityTime\(excludingAttention\)/{v=$2} END{gsub(/[^0-9]/,"",v); print v}') if [ -z "$uptime_ms" ] || [ -z "$last_ms" ]; then echo "could not read activity (emulator booting / adb not ready) — not sleeping" exit 0 fi idle_s=$(( (uptime_ms - last_ms) / 1000 )) echo "idle for $idle_s s (limit $IDLE_LIMIT_SECONDS s / 6h)" if [ "$idle_s" -ge "$IDLE_LIMIT_SECONDS" ]; then echo "idle >= 6h with no user activity — scaling to zero" kubectl -n $NS patch deploy $DEPLOY --type=merge -p '{"spec":{"replicas":0}}' else echo "used within 6h — staying up" fi EOT ] resources { requests = { cpu = "10m" memory = "64Mi" } limits = { memory = "128Mi" } } } } } } } } lifecycle { ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config] # KYVERNO_LIFECYCLE_V1 } }