add agent route to k8s-portal

2026-03-23 02:25:08 +02:00 · 2026-03-23 02:25:08 +02:00 · e9311915cb
commit e9311915cb
parent 6bfade3013
2 changed files with 222 additions and 0 deletions
--- a/stacks/platform/modules/k8s-portal/files/src/routes/agent/+page.svelte
+++ b/stacks/platform/modules/k8s-portal/files/src/routes/agent/+page.svelte
@ -0,0 +1,61 @@
+<main class="content">
+	<h1>Agent Bootstrap</h1>
+	<p>Point any AI coding agent at this cluster and it can bootstrap itself automatically.</p>
+
+	<section>
+		<h2>For AI Agents</h2>
+		<p>Fetch the machine-readable bootstrap document:</p>
+		<pre>curl -fsSL https://k8s-portal.viktorbarzin.me/agent</pre>
+		<p>This returns a plain-text markdown document with everything an agent needs: setup commands, critical rules, secrets workflow, Terraform conventions, key file paths, and common operations.</p>
+	</section>
+
+	<section>
+		<h2>Usage with Claude Code</h2>
+		<pre>claude "$(curl -fsSL https://k8s-portal.viktorbarzin.me/agent)" "Deploy a new echo service"</pre>
+		<p>Or within a session:</p>
+		<ol>
+			<li>Clone the repo: <code>git clone https://github.com/ViktorBarzin/infra.git && cd infra</code></li>
+			<li>Start Claude Code: <code>claude</code></li>
+			<li>Claude auto-reads <code>AGENTS.md</code> and <code>.claude/CLAUDE.md</code> from the repo</li>
+		</ol>
+	</section>
+
+	<section>
+		<h2>Usage with Codex / Other Agents</h2>
+		<ol>
+			<li>Clone the repo and <code>cd</code> into it</li>
+			<li>Run the setup script: <code>bash &lt;(curl -fsSL https://k8s-portal.viktorbarzin.me/setup/script?os=linux)</code></li>
+			<li>Start the agent — it will read <code>AGENTS.md</code> for instructions</li>
+		</ol>
+		<p>If the agent doesn't auto-read <code>AGENTS.md</code>, feed it the bootstrap doc:</p>
+		<pre>curl -fsSL https://k8s-portal.viktorbarzin.me/agent</pre>
+	</section>
+
+	<section>
+		<h2>What the Agent Gets</h2>
+		<ul>
+			<li>Quick-start commands (setup script, repo clone)</li>
+			<li>Critical rules (no kubectl apply, no plaintext secrets, no NFS restart)</li>
+			<li>Sealed Secrets workflow (kubeseal self-service)</li>
+			<li>Terraform conventions (fileset pattern, tiers, storage, shared vars)</li>
+			<li>Key file paths in the repo</li>
+			<li>Common operations (deploy, fix pods, add secrets)</li>
+			<li>Contributing workflow (branch, PR, review, CI)</li>
+		</ul>
+	</section>
+
+	<section>
+		<h2>The <code>/agent</code> Endpoint</h2>
+		<p>The endpoint is <strong>unauthenticated</strong> — no login required. Agents can <code>curl</code> or <code>WebFetch</code> it directly without a browser session, just like the setup script.</p>
+		<p>Content-Type: <code>text/plain</code> — no HTML parsing needed.</p>
+	</section>
+</main>
+
+<style>
+	.content { max-width: 768px; margin: 2rem auto; padding: 0 1rem; font-family: system-ui, -apple-system, sans-serif; line-height: 1.6; }
+	.content h1 { border-bottom: 1px solid #e0e0e0; padding-bottom: 0.5rem; }
+	.content h2 { margin-top: 2rem; color: #333; }
+	.content pre { background: #1e1e1e; color: #d4d4d4; padding: 1rem; border-radius: 6px; overflow-x: auto; }
+	.content code { background: #f0f0f0; padding: 2px 6px; border-radius: 3px; }
+	section { margin: 2rem 0; }
+</style>
--- a/stacks/platform/modules/k8s-portal/files/src/routes/agent/+server.ts
+++ b/stacks/platform/modules/k8s-portal/files/src/routes/agent/+server.ts
@ -0,0 +1,161 @@
+import type { RequestHandler } from './$types';
+
+const BOOTSTRAP_DOC = `# Infrastructure Cluster — AI Agent Bootstrap
+
+> Fetch this document: \`curl -fsSL https://k8s-portal.viktorbarzin.me/agent\`
+
+## Quick Start
+
+\`\`\`bash
+# 1. Install tools (kubectl, kubelogin, kubeseal)
+bash <(curl -fsSL https://k8s-portal.viktorbarzin.me/setup/script?os=linux)
+
+# 2. Clone the infrastructure repo
+git clone https://github.com/ViktorBarzin/infra.git && cd infra
+
+# 3. Verify cluster access (opens browser for OIDC login on first run)
+kubectl get namespaces
+\`\`\`
+
+## Critical Rules (MUST FOLLOW)
+
+- **ALL changes through Terraform/Terragrunt** — NEVER \`kubectl apply/edit/patch/delete\` for persistent changes. Read-only kubectl is fine.
+- **NEVER put secrets in plaintext** — use Sealed Secrets (\`kubeseal\`) or \`secrets.sops.json\` (SOPS-encrypted).
+- **NEVER restart NFS on TrueNAS** — causes cluster-wide mount failures across all pods.
+- **NEVER commit secrets** — triple-check before every commit.
+- **\`[ci skip]\` in commit messages** when changes were already applied locally.
+- **Ask before \`git push\`** — always confirm with the user first.
+
+## Sealed Secrets (Self-Service)
+
+You can manage your own secrets without SOPS access using \`kubeseal\`:
+
+\`\`\`bash
+# 1. Create a sealed secret
+kubectl create secret generic <name> \\
+  --from-literal=key=value -n <namespace> \\
+  --dry-run=client -o yaml | \\
+  kubeseal --controller-name sealed-secrets \\
+    --controller-namespace sealed-secrets -o yaml > sealed-<name>.yaml
+
+# 2. Place the file in the stack directory: stacks/<service>/sealed-<name>.yaml
+
+# 3. Ensure the stack's main.tf has the fileset block (add if missing):
+\`\`\`
+
+\`\`\`hcl
+resource "kubernetes_manifest" "sealed_secrets" {
+  for_each = fileset(path.module, "sealed-*.yaml")
+  manifest = yamldecode(file("\${path.module}/\${each.value}"))
+}
+\`\`\`
+
+\`\`\`bash
+# 4. Push to PR — CI runs terragrunt apply — controller decrypts into real K8s Secrets
+\`\`\`
+
+- Files MUST match the \`sealed-*.yaml\` glob pattern.
+- Only the in-cluster controller has the private key. \`kubeseal\` uses the public key — safe to distribute.
+- The \`kubernetes_manifest\` block is safe to add even with zero sealed-*.yaml files (empty for_each).
+
+## SOPS Secrets (Admin-Only Fallback)
+
+For secrets requiring admin access (shared infra passwords, API keys):
+- **\`secrets.sops.json\`** — SOPS-encrypted secrets (JSON format)
+- **Edit**: \`sops secrets.sops.json\` (opens $EDITOR, re-encrypts on save)
+- **Add**: \`sops set secrets.sops.json '["new_key"]' '"value"'\`
+- **Operators without SOPS keys**: comment on your PR asking Viktor to add the secret.
+
+## Terraform Conventions
+
+### Execution
+- **Apply a service**: \`scripts/tg apply --non-interactive\` (auto-decrypts SOPS secrets)
+- **Plan**: \`scripts/tg plan --non-interactive\`
+- **kubectl**: \`kubectl --kubeconfig $(pwd)/config\`
+- **Health check**: \`bash scripts/cluster_healthcheck.sh --quiet\`
+
+### Key Paths
+| Path | Purpose |
+|------|---------|
+| \`stacks/<service>/main.tf\` | Service definition |
+| \`stacks/platform/modules/<module>/\` | Core infra modules (~22) |
+| \`modules/kubernetes/ingress_factory/\` | Standardized ingress (auth, rate limiting, anti-AI) |
+| \`modules/kubernetes/nfs_volume/\` | NFS volume module (CSI-backed, soft mount) |
+| \`config.tfvars\` | Non-secret configuration (plaintext) |
+| \`secrets.sops.json\` | All secrets (SOPS-encrypted JSON) |
+| \`scripts/cluster_healthcheck.sh\` | 25-check cluster health script |
+| \`AGENTS.md\` | Full AI agent instructions (auto-loaded by most agents) |
+
+### Tier System
+\`0-core\` | \`1-cluster\` | \`2-gpu\` | \`3-edge\` | \`4-aux\`
+
+Kyverno auto-generates LimitRange + ResourceQuota per namespace based on tier label.
+- Containers without explicit \`resources {}\` get default limits (256Mi for edge/aux — causes OOMKill for heavy apps)
+- Always set explicit resources on containers that need more than defaults
+- Opt-out labels: \`resource-governance/custom-quota=true\` / \`resource-governance/custom-limitrange=true\`
+
+### Storage
+- **NFS** (\`nfs-truenas\` StorageClass): For app data. Use the \`nfs_volume\` module.
+- **iSCSI** (\`iscsi-truenas\` StorageClass): For databases (PostgreSQL, MySQL).
+
+### Shared Variables (never hardcode)
+\`var.nfs_server\`, \`var.redis_host\`, \`var.postgresql_host\`, \`var.mysql_host\`, \`var.ollama_host\`, \`var.mail_host\`
+
+## Architecture
+
+- Terragrunt-based homelab managing a Kubernetes cluster (5 nodes, v1.34.2) on Proxmox VMs
+- 70+ services, each in \`stacks/<service>/\` with its own Terraform state
+- Core platform: \`stacks/platform/modules/\` (Traefik, Kyverno, monitoring, dbaas, sealed-secrets, etc.)
+- Public domain: \`viktorbarzin.me\` (Cloudflare) | Internal: \`viktorbarzin.lan\` (Technitium DNS)
+- CI/CD: Woodpecker CI — PRs run plan, merges to master auto-apply platform stack
+
+## Common Operations
+
+### Deploy a New Service
+1. Copy an existing stack as template: \`cp -r stacks/echo stacks/my-service\`
+2. Edit \`main.tf\` — update image, ports, ingress, resources
+3. Add DNS in \`config.tfvars\`
+4. Apply platform first if needed, then the service
+
+### Fix Crashed Pods
+1. Run \`bash scripts/cluster_healthcheck.sh --quiet\`
+2. Safe to delete evicted/failed pods and CrashLoopBackOff pods with >10 restarts
+3. OOMKilled? Check \`kubectl describe limitrange tier-defaults -n <ns>\` and increase \`resources.limits.memory\`
+
+### Add a Secret
+- **Self-service**: Use \`kubeseal\` (see Sealed Secrets section above)
+- **Admin**: \`sops set secrets.sops.json '["key"]' '"value"'\` then commit
+
+## Contributing Workflow
+
+1. Create a branch: \`git checkout -b fix/my-change\`
+2. Make changes in \`stacks/<service>/main.tf\`
+3. Push and open a PR: \`git push -u origin fix/my-change\`
+4. Viktor reviews and merges
+5. CI applies automatically — Slack notification when done
+
+## Infrastructure Details
+
+- **Proxmox**: 192.168.1.127 (Dell R730, 22c/44t, 142GB RAM)
+- **Nodes**: k8s-master (10.0.20.100), node1 (GPU, Tesla T4), node2-4
+- **GPU workloads**: \`node_selector = { "gpu": "true" }\` + toleration \`nvidia.com/gpu\`
+- **Pull-through cache**: 10.0.20.10 — use versioned image tags (cache serves stale :latest manifests)
+- **MySQL InnoDB Cluster**: 3 instances on iSCSI
+- **SMTP**: \`var.mail_host\` port 587 STARTTLS
+
+## Further Reading
+
+- Full agent instructions: \`AGENTS.md\` in the repo root
+- Patterns and examples: \`.claude/reference/patterns.md\`
+- Service catalog: \`.claude/reference/service-catalog.md\`
+- Onboarding guide: https://k8s-portal.viktorbarzin.me/onboarding
+`;
+
+export const GET: RequestHandler = async () => {
+	return new Response(BOOTSTRAP_DOC, {
+		headers: {
+			'Content-Type': 'text/plain; charset=utf-8',
+			'Cache-Control': 'public, max-age=3600'
+		}
+	});
+};