From e63a8120621f296a24bd0e893c17b4fa300d0c19 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Mon, 1 Jun 2026 08:24:08 +0000 Subject: [PATCH] kms: dedicated vlmcs.viktorbarzin.me endpoint + Anubis /scripts carve-out MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Internal split-horizon resolves kms.viktorbarzin.me to Traefik (10.0.20.203), which has no :1688 listener — so LAN clients pointed at kms.viktorbarzin.me:1688 failed with 0xC004F074 "no KMS could be contacted". Add a dedicated A-only vlmcs.viktorbarzin.me (cloudflare_record.vlmcs -> 176.12.22.76 for the public WAN NAT; Technitium -> 10.0.20.202 internal, set via API) so it resolves to vlmcsd both ways. Also carve /scripts/* out of Anubis (module.ingress_scripts -> bare kms-web-page service) so `iwr | iex` downloads the real script instead of the PoW challenge HTML. Verified end-to-end on Win VM 300: reproduced 0xC004F074 on the old host, then slmgr + ospp + both PowerShell one-liners all -> Licensed via vlmcs (10.0.20.202). Docs: kms-public-exposure runbook + service-catalog entry. Co-Authored-By: Claude Opus 4.7 --- .claude/reference/service-catalog.md | 2 +- docs/runbooks/kms-public-exposure.md | 37 +++++++++++++++++++++---- stacks/kms/main.tf | 41 +++++++++++++++++++++++++++- 3 files changed, 72 insertions(+), 8 deletions(-) diff --git a/.claude/reference/service-catalog.md b/.claude/reference/service-catalog.md index 08a13a0b..7ef62e1c 100644 --- a/.claude/reference/service-catalog.md +++ b/.claude/reference/service-catalog.md @@ -62,7 +62,7 @@ | blog | Personal blog | blog | | descheduler | Pod descheduler | descheduler | | hackmd | Collaborative markdown | hackmd | -| kms | Key management | kms | +| kms | Windows/Office volume-license activation (vlmcsd); site kms.viktorbarzin.me, endpoint vlmcs.viktorbarzin.me:1688 | kms | | privatebin | Encrypted pastebin | privatebin | | vault | HashiCorp Vault | vault | | reloader | ConfigMap/Secret reloader | reloader | diff --git a/docs/runbooks/kms-public-exposure.md b/docs/runbooks/kms-public-exposure.md index 2e727003..049f8c5d 100644 --- a/docs/runbooks/kms-public-exposure.md +++ b/docs/runbooks/kms-public-exposure.md @@ -1,9 +1,24 @@ -# Runbook: KMS public exposure (kms.viktorbarzin.me:1688) +# Runbook: KMS public exposure (vlmcs.viktorbarzin.me:1688) -`kms.viktorbarzin.me:1688/TCP` is intentionally open to the internet so any +`vlmcs.viktorbarzin.me:1688/TCP` is intentionally open to the internet so any visitor can activate Volume License Microsoft products. The webpage at `https://kms.viktorbarzin.me/` documents how to use it. +**Two hostnames, on purpose** (do not merge them): + +- `kms.viktorbarzin.me` — the **website** (Traefik). Serves the docs and the + `/scripts/*.ps1` activators. Internally resolves to the Traefik LB + (`10.0.20.203`), which has **no** `:1688` listener. +- `vlmcs.viktorbarzin.me` — the **KMS endpoint** (vlmcsd). A-only (no AAAA — + the IPv6 tunnel doesn't forward 1688). Resolves to `10.0.20.202` on the LAN + (Technitium split-horizon, set via API — `cloudflare_record.vlmcs` in + `stacks/kms` owns the public A) and to `176.12.22.76` on the internet + (Cloudflare → pfSense WAN NAT :1688). Every `slmgr` / `ospp` command on the + page points here. + +Pointing a client at `kms.viktorbarzin.me:1688` fails from the LAN with "KMS +server cannot be reached" — that name is the website, not the KMS server. + This runbook covers operations on the public exposure: where to find logs, how to tune the rate limit, how to revoke if abused. @@ -25,9 +40,10 @@ how to tune the rate limit, how to revoke if abused. - `kms.viktorbarzin.lan` A `10.0.20.200` (Traefik — for the user-facing website at `https://kms.viktorbarzin.lan/`; **not** the KMS server) Manual override (e.g., for clients without the suffix or for clients - on the public internet): `slmgr /skms kms.viktorbarzin.me:1688` (WAN - path via pfSense forward) or `slmgr /skms 10.0.20.202:1688` (direct). - To revert a manually-overridden client back to auto-discovery: + on the public internet): `slmgr /skms vlmcs.viktorbarzin.me:1688` (works + LAN + WAN) or `slmgr /skms 10.0.20.202:1688` (LAN, direct). Do **not** use + `kms.viktorbarzin.me:1688` — that name is the website (Traefik), not the + KMS server. To revert a manually-overridden client back to auto-discovery: `slmgr /ckms`. - **Pod fluidity**: deployment has `replicas=1` (notifier dedup state is per-pod) with no node affinity. TCP readiness/liveness probes on 1688 @@ -54,6 +70,14 @@ how to tune the rate limit, how to revoke if abused. `kms_connection_probes_total{source}` (`source` ∈ `internal_pod`, `cluster_node`, `external`) and log to stdout, but never post to Slack. Real activations still post. +- **Website `/scripts` carve-out**: the website is Anubis-fronted (PoW + challenge). `/scripts/*` is carved out to the bare nginx backend + (`module.ingress_scripts` in `stacks/kms`) because PowerShell `iwr | iex` + is a non-JS client and can't solve the PoW — without the carve-out the + one-liner downloads the Anubis challenge HTML and `iex` chokes on it. + Everything except `/scripts/*` stays behind Anubis. Verify: + `curl -A curl https://kms.viktorbarzin.me/scripts/setup-kms.ps1` returns + the script (not "Making sure you're not a bot!"). ## Where the logs are @@ -153,6 +177,7 @@ itself is independent of any forward and persists across delete/restore. - Stack: `stacks/kms/` (Terraform; deployment, MetalLB Service, ingress, ExternalSecret for the Slack webhook) -- Webpage source: `kms-website/` repo (Hugo + nginx, deployed via Drone CI) +- Webpage source: `kms-website/` repo (Hugo + nginx; Woodpecker builds + + pushes to forgejo, then `kubectl set image deployment/kms-web-page`) - Networking architecture footnote: `docs/architecture/networking.md` § "MetalLB & Load Balancing" diff --git a/stacks/kms/main.tf b/stacks/kms/main.tf index 83c9dd7f..63140ced 100644 --- a/stacks/kms/main.tf +++ b/stacks/kms/main.tf @@ -9,7 +9,7 @@ resource "kubernetes_namespace" "kms" { name = "kms" labels = { "istio-injection" : "disabled" - tier = local.tiers.aux + tier = local.tiers.aux "keel.sh/enrolled" = "true" } } @@ -133,6 +133,45 @@ module "ingress" { } } +# Carve-out for /scripts/* — the PowerShell activators (kms-bootstrap.ps1, +# setup-kms.ps1) that visitors fetch with `iwr ... | iex`. Anubis cannot gate +# this path: PowerShell/curl are non-JS clients and can't solve the PoW +# challenge, so they'd receive the challenge HTML and `iex` would choke on it. +# Points at the bare kms-web-page nginx service, bypassing the Anubis proxy. +# Traefik prioritises the longer /scripts prefix over the main "/" router. +module "ingress_scripts" { + source = "../../modules/kubernetes/ingress_factory" + # auth = "none": public read-only static scripts (iwr|iex). No login, no PoW. + auth = "none" + namespace = kubernetes_namespace.kms.metadata[0].name + name = "kms-scripts" + service_name = kubernetes_service.kms-web-page.metadata[0].name + port = "80" + ingress_path = ["/scripts"] + full_host = "kms.viktorbarzin.me" # MUST match the main ingress host; without this the factory derives kms-scripts.viktorbarzin.me and the carve-out never matches. + dns_type = "none" # DNS already owned by the main kms ingress. + tls_secret_name = var.tls_secret_name + anti_ai_scraping = false # Two static scripts; nothing for scrapers to mine. +} + +# Dedicated KMS endpoint hostname. kms.viktorbarzin.me is the *website* (Traefik +# 10.0.20.203 internally / :443 externally) and cannot also serve raw KMS on +# :1688, so clients pointed at kms.viktorbarzin.me:1688 from the LAN hit Traefik +# (no 1688 listener) and fail with "KMS server cannot be reached". vlmcs.* is +# A-only (NO AAAA — the IPv6 tunnel doesn't forward 1688) and resolves to the +# vlmcsd MetalLB IP both ways: +# external: vlmcs.viktorbarzin.me -> 176.12.22.76 -> pfSense WAN NAT :1688 -> 10.0.20.202 +# internal: vlmcs.viktorbarzin.me -> 10.0.20.202 (Technitium split-horizon, set via API) +resource "cloudflare_record" "vlmcs" { + name = "vlmcs" + content = "176.12.22.76" # public_ip (mirrors config.tfvars / ingress_factory default) + proxied = false # raw TCP 1688 — Cloudflare proxy is HTTP-only + ttl = 1 + type = "A" + zone_id = "fd2c5dd4efe8fe38958944e74d0ced6d" # cloudflare_zone_id + allow_overwrite = true +} + resource "kubernetes_config_map" "kms_slack_notifier" { metadata { name = "kms-slack-notifier"