android-emulator: GPU rendering on node1 + scale-to-zero wake gate
Viktor's direction (2026-06-12): the emulator is dev-only, so it should be on-demand, and it should use the T4 where applicable. (1) api36-v5 runs '-gpu host' on the GPU node (nodeSelector + time-slice + EGL libs; automatic swiftshader fallback if GPU init dies) — screen-on rendering moves off the CPU (~5 cores → expected 1-2). (2) The wake gate (stdlib python, owns / on both hostnames) scales the deployment 0→1 on visit and hands the browser to noVNC when ready; agents GET /wake + /status. The idle-sleeper CronJob counts established adb/noVNC connections via /proc/net/tcp (excluding the in-container loopback adb client) and scales to zero after 4 idle checks (~1h). TF ignores replicas drift. VRAM cost (~0.5-1GiB) is held only while awake, protecting llama-swap headroom.
This commit is contained in:
parent
39a22b352e
commit
f4dd515fd7
7 changed files with 467 additions and 32 deletions
|
|
@ -80,6 +80,15 @@ resource "kubernetes_deployment" "android-emulator" {
|
|||
labels = { app = "android-emulator" }
|
||||
}
|
||||
spec {
|
||||
node_selector = {
|
||||
"nvidia.com/gpu.present" : "true"
|
||||
}
|
||||
toleration {
|
||||
key = "nvidia.com/gpu"
|
||||
operator = "Equal"
|
||||
value = "true"
|
||||
effect = "NoSchedule"
|
||||
}
|
||||
image_pull_secrets {
|
||||
name = "registry-credentials"
|
||||
}
|
||||
|
|
@ -121,7 +130,8 @@ resource "kubernetes_deployment" "android-emulator" {
|
|||
memory = "3Gi"
|
||||
}
|
||||
limits = {
|
||||
memory = "8Gi"
|
||||
memory = "8Gi"
|
||||
"nvidia.com/gpu" = "1" # T4 time-slice; ~0.5-1GiB VRAM while awake
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -167,7 +177,12 @@ resource "kubernetes_deployment" "android-emulator" {
|
|||
}
|
||||
}
|
||||
lifecycle {
|
||||
ignore_changes = [spec[0].template[0].spec[0].dns_config] # KYVERNO_LIFECYCLE_V1
|
||||
ignore_changes = [
|
||||
spec[0].template[0].spec[0].dns_config, # KYVERNO_LIFECYCLE_V1
|
||||
# the wake gate + idle sleeper own replicas (scale-to-zero on demand);
|
||||
# an apply must not resurrect or kill the emulator.
|
||||
spec[0].replicas,
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -215,45 +230,82 @@ resource "kubernetes_service" "novnc" {
|
|||
}
|
||||
}
|
||||
|
||||
# Browser screen view (noVNC) — LAN only.
|
||||
module "ingress-internal" {
|
||||
# Ingress layout, same on both hostnames: the wake gate owns `/` (visiting
|
||||
# wakes a sleeping emulator), while the noVNC asset/socket paths go straight
|
||||
# to the emulator service. LAN (.lan) is unauthenticated local-only for
|
||||
# agents; public (.me) is Authentik-gated for humans.
|
||||
locals {
|
||||
novnc_paths = [
|
||||
"/vnc.html", "/app", "/core", "/vendor",
|
||||
"/websockify", "/package.json", "/defaults.json", "/mandatory.json",
|
||||
]
|
||||
}
|
||||
|
||||
module "ingress-internal-gate" {
|
||||
source = "../../modules/kubernetes/ingress_factory"
|
||||
# auth = "none": LAN-only (allow_local_access_only) noVNC screen view of the
|
||||
# shared test emulator — no user data behind it; Authentik would break the
|
||||
# websocket flow agents and users rely on.
|
||||
# auth = "none": LAN-only (allow_local_access_only) wake gate + screen for
|
||||
# the shared test emulator — no user data behind it; agents need cookie-free
|
||||
# curl access and Authentik would break the noVNC websocket flow.
|
||||
auth = "none"
|
||||
namespace = kubernetes_namespace.android-emulator.metadata[0].name
|
||||
name = "android-emulator"
|
||||
root_domain = "viktorbarzin.lan"
|
||||
service_name = kubernetes_service.gate.metadata[0].name
|
||||
tls_secret_name = var.tls_secret_name
|
||||
allow_local_access_only = true
|
||||
ssl_redirect = false
|
||||
extra_annotations = {
|
||||
"gethomepage.dev/enabled" = "false"
|
||||
}
|
||||
# noVNC loads ~60 unbundled ES modules in parallel; the default 10/50
|
||||
# limiter 429s the tail and the loader hangs forever. Dedicated limiter,
|
||||
# same pattern as actualbudget/immich.
|
||||
skip_default_rate_limit = true
|
||||
extra_middlewares = ["traefik-android-emulator-rate-limit@kubernetescrd"]
|
||||
}
|
||||
|
||||
# Remote (off-LAN) screen access — Authentik-gated at the edge; WebSockets
|
||||
# work through forward-auth same-origin (proven by stacks/terminal's ttyd).
|
||||
# adb (5555) deliberately stays LAN-only: it is unauthenticated and must
|
||||
# never be exposed publicly.
|
||||
module "ingress-public" {
|
||||
module "ingress-internal-novnc" {
|
||||
source = "../../modules/kubernetes/ingress_factory"
|
||||
# auth = "none": LAN-only noVNC paths (see ingress-internal-gate above).
|
||||
auth = "none"
|
||||
namespace = kubernetes_namespace.android-emulator.metadata[0].name
|
||||
name = "android-emulator-novnc"
|
||||
host = "android-emulator"
|
||||
root_domain = "viktorbarzin.lan"
|
||||
service_name = kubernetes_service.novnc.metadata[0].name
|
||||
ingress_path = local.novnc_paths
|
||||
tls_secret_name = var.tls_secret_name
|
||||
allow_local_access_only = true
|
||||
ssl_redirect = false
|
||||
# noVNC loads ~60 unbundled ES modules in parallel; the default 10/50
|
||||
# limiter 429s the tail and the loader hangs forever.
|
||||
skip_default_rate_limit = true
|
||||
extra_middlewares = ["traefik-android-emulator-rate-limit@kubernetescrd"]
|
||||
extra_annotations = {
|
||||
"gethomepage.dev/enabled" = "false"
|
||||
}
|
||||
}
|
||||
|
||||
# Remote (off-LAN) access — Authentik-gated at the edge; WebSockets work
|
||||
# through forward-auth same-origin (proven by stacks/terminal's ttyd).
|
||||
# adb (5555) deliberately stays LAN-only: it is unauthenticated.
|
||||
module "ingress-public-gate" {
|
||||
source = "../../modules/kubernetes/ingress_factory"
|
||||
auth = "required"
|
||||
dns_type = "proxied"
|
||||
namespace = kubernetes_namespace.android-emulator.metadata[0].name
|
||||
name = "android-emulator-public"
|
||||
host = "android-emulator"
|
||||
service_name = kubernetes_service.novnc.metadata[0].name
|
||||
service_name = kubernetes_service.gate.metadata[0].name
|
||||
tls_secret_name = var.tls_secret_name
|
||||
# noVNC loads ~60 unbundled ES modules in parallel; the default 10/50
|
||||
# limiter 429s the tail and the loader hangs forever. Dedicated limiter,
|
||||
# same pattern as actualbudget/immich.
|
||||
}
|
||||
|
||||
module "ingress-public-novnc" {
|
||||
source = "../../modules/kubernetes/ingress_factory"
|
||||
auth = "required"
|
||||
namespace = kubernetes_namespace.android-emulator.metadata[0].name
|
||||
name = "android-emulator-public-novnc"
|
||||
host = "android-emulator"
|
||||
service_name = kubernetes_service.novnc.metadata[0].name
|
||||
ingress_path = local.novnc_paths
|
||||
tls_secret_name = var.tls_secret_name
|
||||
# see ingress-internal-novnc — noVNC's parallel module storm needs the
|
||||
# dedicated limiter.
|
||||
skip_default_rate_limit = true
|
||||
extra_middlewares = ["traefik-android-emulator-rate-limit@kubernetescrd"]
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue