[ci skip] Remove legacy files and orphaned modules

Delete 20 orphaned module directories and 3 stray files from
modules/kubernetes/ that are no longer referenced by any stack.
Remove 7 root-level legacy files including the empty tfstate,
27MB terraform zip, commented-out main.tf, and migration notes.
Clean up commented-out dockerhub_secret and oauth-proxy references
in blog, travel_blog, and city-guesser stacks. Remove stale
frigate config.yaml entry from .gitignore. Remove ephemeral
docs/plans/ directory.
This commit is contained in:
Viktor Barzin 2026-02-22 15:23:27 +00:00
parent c7c7047f1c
commit 116c4d9c30
56 changed files with 2 additions and 9402 deletions

View file

@ -39,9 +39,8 @@ Terragrunt-based infrastructure repository managing a home Kubernetes cluster on
- **Per-stack state**: `state/stacks/<service>/terraform.tfstate` - Per-stack state files (gitignored)
- **Service resources**: `stacks/<service>/main.tf` - Service resources defined directly in stack root
- **Platform modules**: `stacks/platform/modules/<service>/` - Platform service modules
- **Shared modules**: `modules/kubernetes/ingress_factory/`, `modules/kubernetes/setup_tls_secret/`, `modules/kubernetes/dockerhub_secret/`, `modules/kubernetes/oauth-proxy/`
- **Shared modules**: `modules/kubernetes/ingress_factory/`, `modules/kubernetes/setup_tls_secret/`
- **Secrets**: `secrets/` - git-crypt encrypted TLS certs and keys
- **Legacy (unused)**: `main.tf`, `modules/kubernetes/main.tf` - Old monolithic entry points (kept for reference)
## Network Topology (Static IPs)
```
@ -81,7 +80,7 @@ Terragrunt-based infrastructure repository managing a home Kubernetes cluster on
- `stacks/platform/` - Core infrastructure (22 services in `stacks/platform/modules/`)
- `stacks/<service>/` - Individual service stacks (resources directly in `main.tf`)
- `stacks/platform/modules/<service>/` - Platform service module source code
- `modules/kubernetes/` - **Only shared utility modules**: `ingress_factory/`, `setup_tls_secret/`, `dockerhub_secret/`, `oauth-proxy/`
- `modules/kubernetes/` - **Only shared utility modules**: `ingress_factory/`, `setup_tls_secret/`
- `modules/create-vm/` - Proxmox VM creation module
- `state/` - Per-stack Terraform state files (gitignored)
- `secrets/` - Encrypted secrets (TLS certs, keys) via git-crypt

2
.gitignore vendored
View file

@ -35,8 +35,6 @@ override.tf.json
git_crypt.key
modules/kubernetes/frigate/config.yaml
# Claude Code - temporary/sensitive files
.claude/cmd_input.txt
.claude/cmd_output.txt

130
.terraform.lock.hcl generated
View file

@ -1,130 +0,0 @@
# This file is maintained automatically by "terraform init".
# Manual edits may be lost in future updates.
provider "registry.terraform.io/cloudflare/cloudflare" {
version = "4.52.5"
constraints = "~> 4.0"
hashes = [
"h1:+rfzF+16ZcWZWnTyW/p1HHTzYbPKX8Zt2nIFtR/+f+E=",
"h1:18bXaaOSq8MWKuMxo/4y7EB7/i7G90y5QsKHZRmkoDo=",
"zh:1a3400cb38863b2585968d1876706bcfc67a148e1318a1d325c6c7704adc999b",
"zh:4c5062cb9e9da1676f06ae92b8370186d98976cc4c7030d3cd76df12af54282a",
"zh:52110f493b5f0587ef77a1cfd1a67001fd4c617b14c6502d732ab47352bdc2f7",
"zh:5aa536f9eaeb43823aaf2aa80e7d39b25ef2b383405ed034aa16a28b446a9238",
"zh:5cc39459a1c6be8a918f17054e4fbba573825ed5597dcada588fe99614d98a5b",
"zh:629ae6a7ba298815131da826474d199312d21cec53a4d5ded4fa56a692e6f072",
"zh:719cc7c75dc1d3eb30c22ff5102a017996d9788b948078c7e1c5b3446aeca661",
"zh:8698635a3ca04383c1e93b21d6963346bdae54d27177a48e4b1435b7f731731c",
"zh:890df766e9b839623b1f0437355032a3c006226a6c200cd911e15ee1a9014e9f",
"zh:8a9993f1dcadf1dd6ca43b23348abe374605d29945a2fafc07fb3457644e6a54",
"zh:b1b9a1e6bcc24d5863a664a411d2dc906373ae7a2399d2d65548ce7377057852",
"zh:b270184cdeec277218e84b94cb136fead753da717f9b9dc378e51907f3f00bb0",
"zh:dff2bc10071210181726ce270f954995fe42c696e61e2e8f874021fed02521e5",
"zh:e8e87b40b6a87dc097b0fdc20d3f725cec0d82abc9cc3755c1f89f8f6e8b0036",
"zh:ee964a6573d399a5dd22ce328fb38ca1207797a02248f14b2e4913ee390e7803",
]
}
provider "registry.terraform.io/hashicorp/helm" {
version = "3.1.1"
hashes = [
"h1:47CqNwkxctJtL/N/JuEj+8QMg8mRNI/NWeKO5/ydfZU=",
"h1:5b2ojWKT0noujHiweCds37ZreRFRQLNaErdJLusJN88=",
"zh:1a6d5ce931708aec29d1f3d9e360c2a0c35ba5a54d03eeaff0ce3ca597cd0275",
"zh:3411919ba2a5941801e677f0fea08bdd0ae22ba3c9ce3309f55554699e06524a",
"zh:81b36138b8f2320dc7f877b50f9e38f4bc614affe68de885d322629dd0d16a29",
"zh:95a2a0a497a6082ee06f95b38bd0f0d6924a65722892a856cfd914c0d117f104",
"zh:9d3e78c2d1bb46508b972210ad706dd8c8b106f8b206ecf096cd211c54f46990",
"zh:a79139abf687387a6efdbbb04289a0a8e7eaca2bd91cdc0ce68ea4f3286c2c34",
"zh:aaa8784be125fbd50c48d84d6e171d3fb6ef84a221dbc5165c067ce05faab4c8",
"zh:afecd301f469975c9d8f350cc482fe656e082b6ab0f677d1a816c3c615837cc1",
"zh:c54c22b18d48ff9053d899d178d9ffef7d9d19785d9bf310a07d648b7aac075b",
"zh:db2eefd55aea48e73384a555c72bac3f7d428e24147bedb64e1a039398e5b903",
"zh:ee61666a233533fd2be971091cecc01650561f1585783c381b6f6e8a390198a4",
"zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c",
]
}
provider "registry.terraform.io/hashicorp/kubernetes" {
version = "3.0.1"
constraints = "3.0.1"
hashes = [
"h1:P0c8knzZnouTNFIRij8IS7+pqd0OKaFDYX0j4GRsiqo=",
"h1:vyHdH0p6bf9xp1NPePObAJkXTJb/I09FQQmmevTzZe0=",
"zh:02d55b0b2238fd17ffa12d5464593864e80f402b90b31f6e1bd02249b9727281",
"zh:20b93a51bfeed82682b3c12f09bac3031f5bdb4977c47c97a042e4df4fb2f9ba",
"zh:6e14486ecfaee38c09ccf33d4fdaf791409f90795c1b66e026c226fad8bc03c7",
"zh:8d0656ff422df94575668e32c310980193fccb1c28117e5c78dd2d4050a760a6",
"zh:9795119b30ec0c1baa99a79abace56ac850b6e6fbce60e7f6067792f6eb4b5f4",
"zh:b388c87acc40f6bd9620f4e23f01f3c7b41d9b88a68d5255dec0a72f0bdec249",
"zh:b59abd0a980649c2f97f172392f080eaeb18e486b603f83bf95f5d93aeccc090",
"zh:ba6e3060fddf4a022087d8f09e38aa0001c705f21170c2ded3d1c26c12f70d97",
"zh:c12626d044b1d5501cf95ca78cbe507c13ad1dd9f12d4736df66eb8e5f336eb8",
"zh:c55203240d50f4cdeb3df1e1760630d677679f5b1a6ffd9eba23662a4ad05119",
"zh:ea206a5a32d6e0d6e32f1849ad703da9a28355d9c516282a8458b5cf1502b2a1",
"zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c",
]
}
provider "registry.terraform.io/hashicorp/null" {
version = "3.2.4"
hashes = [
"h1:L5V05xwp/Gto1leRryuesxjMfgZwjb7oool4WS1UEFQ=",
"h1:hkf5w5B6q8e2A42ND2CjAvgvSN3puAosDmOJb3zCVQM=",
"zh:59f6b52ab4ff35739647f9509ee6d93d7c032985d9f8c6237d1f8a59471bbbe2",
"zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3",
"zh:795c897119ff082133150121d39ff26cb5f89a730a2c8c26f3a9c1abf81a9c43",
"zh:7b9c7b16f118fbc2b05a983817b8ce2f86df125857966ad356353baf4bff5c0a",
"zh:85e33ab43e0e1726e5f97a874b8e24820b6565ff8076523cc2922ba671492991",
"zh:9d32ac3619cfc93eb3c4f423492a8e0f79db05fec58e449dee9b2d5873d5f69f",
"zh:9e15c3c9dd8e0d1e3731841d44c34571b6c97f5b95e8296a45318b94e5287a6e",
"zh:b4c2ab35d1b7696c30b64bf2c0f3a62329107bd1a9121ce70683dec58af19615",
"zh:c43723e8cc65bcdf5e0c92581dcbbdcbdcf18b8d2037406a5f2033b1e22de442",
"zh:ceb5495d9c31bfb299d246ab333f08c7fb0d67a4f82681fbf47f2a21c3e11ab5",
"zh:e171026b3659305c558d9804062762d168f50ba02b88b231d20ec99578a6233f",
"zh:ed0fe2acdb61330b01841fa790be00ec6beaac91d41f311fb8254f74eb6a711f",
]
}
provider "registry.terraform.io/hashicorp/random" {
version = "3.8.1"
hashes = [
"h1:Eexl06+6J+s75uD46+WnZtpJZYRVUMB0AiuPBifK6Jc=",
"h1:u8AKlWVDTH5r9YLSeswoVEjiY72Rt4/ch7U+61ZDkiQ=",
"zh:08dd03b918c7b55713026037c5400c48af5b9f468f483463321bd18e17b907b4",
"zh:0eee654a5542dc1d41920bbf2419032d6f0d5625b03bd81339e5b33394a3e0ae",
"zh:229665ddf060aa0ed315597908483eee5b818a17d09b6417a0f52fd9405c4f57",
"zh:2469d2e48f28076254a2a3fc327f184914566d9e40c5780b8d96ebf7205f8bc0",
"zh:37d7eb334d9561f335e748280f5535a384a88675af9a9eac439d4cfd663bcb66",
"zh:741101426a2f2c52dee37122f0f4a2f2d6af6d852cb1db634480a86398fa3511",
"zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3",
"zh:a902473f08ef8df62cfe6116bd6c157070a93f66622384300de235a533e9d4a9",
"zh:b85c511a23e57a2147355932b3b6dce2a11e856b941165793a0c3d7578d94d05",
"zh:c5172226d18eaac95b1daac80172287b69d4ce32750c82ad77fa0768be4ea4b8",
"zh:dab4434dba34aad569b0bc243c2d3f3ff86dd7740def373f2a49816bd2ff819b",
"zh:f49fd62aa8c5525a5c17abd51e27ca5e213881d58882fd42fec4a545b53c9699",
]
}
provider "registry.terraform.io/telmate/proxmox" {
version = "3.0.2-rc07"
constraints = "3.0.2-rc07"
hashes = [
"h1:0UpRJ8PFsu9lhD3p2KUdUNVsDPbjZLPR46wYRpt1dxc=",
"h1:zp5hpQJQ4t4zROSLqdltVpBO+Riy9VugtfFbpyTw1aM=",
"zh:2ee860cd0a368b3eaa53f4a9ea46f16dab8a97929e813ea6ef55183f8112c2ca",
"zh:415965fd915bae2040d7f79e45f64d6e3ae61149c10114efeac1b34687d7296c",
"zh:6584b2055df0e32062561c615e3b6b2c291ca8c959440adda09ef3ec1e1436bd",
"zh:65dcfad71928e0a8dd9befc22524ed686be5020b0024dc5cca5184c7420eeb6b",
"zh:7253dc29bd265d33f2791ac4f779c5413f16720bb717de8e6c5fcb2c858648ea",
"zh:7ec8993da10a47606670f9f67cfd10719a7580641d11c7aa761121c4a2bd66fb",
"zh:999a3f7a9dcf517967fc537e6ec930a8172203642fb01b8e1f78f908373db210",
"zh:a50e6df7280eb6584a5fd2456e3f5b6df13b2ec8a7fa4605511e438e1863be42",
"zh:b25b329a1e42681c509d027fee0365414f0cc5062b65690cfc3386aab16132ae",
"zh:c028877fdb438ece48f7bc02b65bbae9ca7b7befbd260e519ccab6c0cbb39f26",
"zh:cf0eaa3ea9fcc6d62793637947f1b8d7c885b6ad74695ab47e134e4ff132190f",
"zh:d5ade3fae031cc629b7c512a7b60e46570f4c41665e88a595d7efd943dde5ab2",
"zh:f388c15ad1ecfc09e7361e3b98bae9b627a3a85f7b908c9f40650969c949901c",
"zh:f415cc6f735a3971faae6ac24034afdb9ee83373ef8de19a9631c187d5adc7db",
]
}

View file

@ -1,42 +0,0 @@
apiVersion: v1
data:
Corefile: |
.:53 {
#log
errors
health {
lameduck 5s
}
ready
kubernetes cluster.local in-addr.arpa ip6.arpa {
pods insecure
fallthrough in-addr.arpa ip6.arpa
ttl 30
}
prometheus :9153
#forward . 1.1.1.1
forward . 10.0.20.1
#forward . /etc/resolv.conf
cache {
success 10000 300 6
denial 10000 300 60
}
loop
reload
loadbalance
}
viktorbarzin.lan:53 {
#log
errors
#forward . 10.102.184.76
forward . 10.0.20.101:30053 # this must be the same as the technitium nodeport svc
#forward . technitium-dns.technitium.svc.cluster.local
cache {
success 10000 300 6
denial 10000 300 60
}
}
kind: ConfigMap
metadata:
name: coredns
namespace: kube-system

View file

@ -1,140 +0,0 @@
# Centralized Log Collection Design
## Date: 2026-02-13
## Goal
Centrally collect logs from all Kubernetes pods for monitoring and alerting. Minimize disk I/O by holding logs in memory for extended periods, flushing to NFS once daily. Alert on log patterns via existing Alertmanager pipeline.
## Requirements
- **Primary use case**: Monitoring and alerting (log-based alert rules evaluated in real-time)
- **Retention**: 7 days on disk after flush
- **Memory budget**: 4-8GB total (~6.6GB used)
- **Disk strategy**: 24h in-memory chunks, WAL on tmpfs, single daily flush to NFS
- **Crash policy**: Accept up to 24h log loss on pod/node crash (alerts still fire in real-time before flush)
- **Alert delivery**: Loki Ruler -> existing Alertmanager -> Slack/email
## Architecture
```
┌──────────────────┐ ┌──────────────────────┐ ┌──────────────┐
│ Alloy DaemonSet │ │ Loki SingleBinary │ │ Grafana │
│ 5 pods, 128Mi ea │────>│ 1 pod, 6Gi RAM │<────│ (existing) │
│ tails /var/log/ │ │ │ │ + Loki │
│ pods on each node│ │ Ingester: 24h chunks │ │ datasource │
└──────────────────┘ │ WAL: tmpfs (in-memory) │ └──────────────┘
│ Storage: NFS 15Gi │
┌──────────────────┐ │ Ruler ──> Alertmanager │
│ Sysctl DaemonSet │ └──────────────────────┘
│ 5 pods (pause) │
│ sets inotify │
│ limits on nodes │
└──────────────────┘
```
## Components
### 1. Sysctl DaemonSet
Solves the `too many open files` / fsnotify watcher exhaustion problem that previously blocked Alloy.
- Privileged init container runs `sysctl -w` on each node
- Settings: `fs.inotify.max_user_watches=1048576`, `fs.inotify.max_user_instances=512`, `fs.inotify.max_queued_events=1048576`
- Main container: `pause` image (near-zero resources)
- Survives node reboots (DaemonSet recreates pod)
- Namespace: `monitoring`
### 2. Loki (Helm Release)
Single-binary deployment. Existing Helm chart config in `loki.yaml`, updated with:
**Ingester tuning (disk-friendly):**
- `chunk_idle_period: 12h` — don't flush idle streams quickly
- `max_chunk_age: 24h` — hold chunks in memory for full day
- `chunk_retain_period: 1m` — brief retain after flush
- `chunk_target_size: 1572864` (1.5MB) — larger chunks = fewer writes
- WAL: tmpfs emptyDir (`medium: Memory`, 2Gi limit)
**Retention:**
- `retention_period: 168h` (7 days)
- Compactor enabled for retention enforcement
**Ruler:**
- Evaluates LogQL alert rules in real-time (before chunk flush)
- Fires to `http://prometheus-alertmanager.monitoring.svc.cluster.local:9093`
**Storage:**
- NFS PV/PVC at `/mnt/main/loki/loki` (15Gi, existing)
- TSDB index with 24h period
**Resources:**
- Memory: 6Gi limit
- CPU: 1 limit
### 3. Alloy (Helm Release)
DaemonSet log collector. Existing config in `alloy.yaml` is complete:
- Discovers pods via `discovery.kubernetes`
- Labels: namespace, pod, container, app, job, container_runtime, cluster
- Tails `/var/log/pods/` on each node
- Forwards to `http://loki.monitoring.svc.cluster.local:3100/loki/api/v1/push`
**Resources per pod:**
- Memory: 128Mi limit
- CPU: 200m limit
### 4. Grafana Datasource
ConfigMap with label `grafana_datasource: "1"` for sidecar auto-discovery:
- Name: Loki
- Type: loki
- URL: `http://loki.monitoring.svc.cluster.local:3100`
- Existing `loki.json` dashboard already in dashboards directory
### 5. Starter Alert Rules
Configured in Loki Ruler (evaluated in real-time, before disk flush):
| Alert | LogQL Expression | Severity |
|-------|-----------------|----------|
| HighErrorRate | `sum(rate({namespace=~".+"} \|= "error" [5m])) by (namespace) > 10` | warning |
| PodCrashLoopBackOff | `count_over_time({namespace=~".+"} \|= "CrashLoopBackOff" [5m]) > 0` | critical |
| OOMKilled | `count_over_time({namespace=~".+"} \|= "OOMKilled" [5m]) > 0` | critical |
## Memory Budget
| Component | Per-pod | Pods | Total |
|-----------|---------|------|-------|
| Alloy | 128Mi | 5 | 640Mi |
| Loki | 6Gi | 1 | 6Gi |
| Sysctl DS | ~0 (pause) | 5 | ~0 |
| **Total** | | | **~6.6 GB** |
## Files to Change
| File | Action |
|------|--------|
| `modules/kubernetes/monitoring/loki.tf` | Uncomment Loki + Alloy helm releases, add sysctl DaemonSet, add Grafana Loki datasource ConfigMap |
| `modules/kubernetes/monitoring/loki.yaml` | Update with ingester tuning, ruler config, retention, resource limits |
| `modules/kubernetes/monitoring/alloy.yaml` | Add resource limits in Helm values wrapper |
| `secrets/nfs_directories.txt` | Ensure `/mnt/main/loki` entries exist |
## Implementation Steps
1. Add sysctl DaemonSet to `loki.tf`
2. Update `loki.yaml` with disk-friendly tuning, ruler, retention, resources
3. Update `alloy.yaml` with resource limits
4. Uncomment Loki Helm release in `loki.tf`, wire up NFS PV/PVC
5. Uncomment Alloy Helm release in `loki.tf`
6. Add Grafana Loki datasource ConfigMap to `loki.tf`
7. Add alert rules to Loki config
8. Ensure NFS exports exist in `secrets/nfs_directories.txt`
9. `terraform apply -target=module.kubernetes_cluster.module.monitoring`
10. Verify: Grafana Explore -> Loki datasource -> query `{namespace="monitoring"}`
## Risks
- **24h data loss on crash**: Accepted trade-off. Alerts fire in real-time before flush, so alert coverage is not affected — only historical log browsing is at risk.
- **Memory pressure**: 6Gi for Loki on a 16GB node is significant. Monitor with existing Prometheus memory alerts.
- **Log volume spikes**: A chatty pod could cause Loki to OOM. Alloy can be configured with rate limiting if needed (future enhancement).

View file

@ -1,532 +0,0 @@
# Centralized Log Collection Implementation Plan
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
**Goal:** Deploy Loki + Alloy for centralized Kubernetes log collection with 24h in-memory chunks, 7-day disk retention, and log-based alerting via existing Alertmanager.
**Architecture:** Alloy DaemonSet tails pod logs on all 5 nodes, forwards to single-binary Loki which holds chunks in 6Gi RAM for 24h before flushing to NFS. Loki Ruler evaluates LogQL alert rules in real-time and fires to Alertmanager. Grafana gets a Loki datasource via sidecar auto-provisioning.
**Tech Stack:** Terraform, Helm (Loki chart, Alloy chart), Kubernetes DaemonSet, NFS, Grafana
**Design doc:** `docs/plans/2026-02-13-centralized-log-collection-design.md`
---
### Task 1: Add sysctl DaemonSet for inotify limits
Alloy uses fsnotify to tail log files. Default kernel limits cause "too many open files" errors. This DaemonSet sets the limits on every node persistently.
**Files:**
- Modify: `modules/kubernetes/monitoring/loki.tf` (replace the comment block at lines 67-71)
**Step 1: Write the sysctl DaemonSet resource**
Replace lines 67-71 (the comment block about sysctl) with this Terraform resource in `loki.tf`:
```hcl
resource "kubernetes_daemon_set_v1" "sysctl-inotify" {
metadata {
name = "sysctl-inotify"
namespace = kubernetes_namespace.monitoring.metadata[0].name
labels = {
app = "sysctl-inotify"
}
}
spec {
selector {
match_labels = {
app = "sysctl-inotify"
}
}
template {
metadata {
labels = {
app = "sysctl-inotify"
}
}
spec {
init_container {
name = "sysctl"
image = "busybox:1.37"
command = [
"sh", "-c",
"sysctl -w fs.inotify.max_user_watches=1048576 && sysctl -w fs.inotify.max_user_instances=512 && sysctl -w fs.inotify.max_queued_events=1048576"
]
security_context {
privileged = true
}
}
container {
name = "pause"
image = "registry.k8s.io/pause:3.10"
resources {
requests = {
cpu = "1m"
memory = "4Mi"
}
limits = {
cpu = "1m"
memory = "4Mi"
}
}
}
host_pid = true
toleration {
operator = "Exists"
}
}
}
}
}
```
**Step 2: Run terraform fmt**
Run: `terraform fmt -recursive modules/kubernetes/monitoring/`
**Step 3: Run terraform plan to verify**
Run: `terraform plan -target=module.kubernetes_cluster.module.monitoring -var="kube_config_path=$(pwd)/config" 2>&1 | tail -30`
Expected: Plan shows 1 resource to add (kubernetes_daemon_set_v1.sysctl-inotify)
**Step 4: Commit**
```bash
git add modules/kubernetes/monitoring/loki.tf
git commit -m "[ci skip] Add sysctl DaemonSet for inotify limits"
```
---
### Task 2: Update Loki Helm values with disk-friendly tuning
Configure ingester for 24h in-memory chunks, WAL on tmpfs, 7-day retention, ruler for alerting, and resource limits.
**Files:**
- Modify: `modules/kubernetes/monitoring/loki.yaml` (full rewrite)
**Step 1: Write updated loki.yaml**
Replace entire contents of `loki.yaml` with:
```yaml
loki:
commonConfig:
replication_factor: 1
schemaConfig:
configs:
- from: "2025-04-01"
store: tsdb
object_store: filesystem
schema: v13
index:
prefix: loki_index_
period: 24h
ingester:
chunk_idle_period: 12h
max_chunk_age: 24h
chunk_retain_period: 1m
chunk_target_size: 1572864
wal:
dir: /loki-wal
pattern_ingester:
enabled: true
limits_config:
allow_structured_metadata: true
volume_enabled: true
retention_period: 168h
compactor:
retention_enabled: true
working_directory: /loki/compactor
compaction_interval: 1h
delete_request_store: filesystem
ruler:
enable_api: true
storage:
type: local
local:
directory: /loki/rules
alertmanager_url: http://alertmanager.monitoring.svc.cluster.local:9093
ring:
kvstore:
store: inmemory
rule_path: /loki/scratch
storage:
type: "filesystem"
auth_enabled: false
minio:
enabled: false
deploymentMode: SingleBinary
singleBinary:
replicas: 1
persistence:
enabled: true
size: 15Gi
storageClass: ""
extraVolumes:
- name: wal
emptyDir:
medium: Memory
sizeLimit: 2Gi
- name: rules
configMap:
name: loki-alert-rules
extraVolumeMounts:
- name: wal
mountPath: /loki-wal
- name: rules
mountPath: /loki/rules/fake
resources:
requests:
cpu: 250m
memory: 4Gi
limits:
cpu: "1"
memory: 6Gi
# Zero out replica counts of other deployment modes
backend:
replicas: 0
read:
replicas: 0
write:
replicas: 0
ingester:
replicas: 0
querier:
replicas: 0
queryFrontend:
replicas: 0
queryScheduler:
replicas: 0
distributor:
replicas: 0
compactor:
replicas: 0
indexGateway:
replicas: 0
bloomCompactor:
replicas: 0
bloomGateway:
replicas: 0
```
**Step 2: Commit**
```bash
git add modules/kubernetes/monitoring/loki.yaml
git commit -m "[ci skip] Update Loki config with disk-friendly tuning and ruler"
```
---
### Task 3: Update Alloy Helm values with resource limits
The Alloy config content is already complete. Wrap it in proper Helm values with resource limits.
**Files:**
- Modify: `modules/kubernetes/monitoring/alloy.yaml` (add resource limits)
**Step 1: Add resource limits to alloy.yaml**
Append after the existing `alloy.configMap.content` block (after the last line):
```yaml
# Resource limits for DaemonSet pods
resources:
requests:
cpu: 50m
memory: 64Mi
limits:
cpu: 200m
memory: 128Mi
```
The final file should have the `alloy.configMap.content` block unchanged, with `alloy.resources` added as a sibling under `alloy:`.
**Step 2: Commit**
```bash
git add modules/kubernetes/monitoring/alloy.yaml
git commit -m "[ci skip] Add resource limits to Alloy config"
```
---
### Task 4: Uncomment Loki Helm release and PV in loki.tf
Enable the Loki Helm release and its NFS persistent volume. Remove minio PV (not needed with filesystem storage).
**Files:**
- Modify: `modules/kubernetes/monitoring/loki.tf` (uncomment Loki resources, remove minio PV)
**Step 1: Uncomment the Loki Helm release (lines 1-12)**
Uncomment and update the helm_release to:
```hcl
resource "helm_release" "loki" {
namespace = kubernetes_namespace.monitoring.metadata[0].name
create_namespace = true
name = "loki"
repository = "https://grafana.github.io/helm-charts"
chart = "loki"
values = [templatefile("${path.module}/loki.yaml", {})]
timeout = 300
depends_on = [kubernetes_config_map.loki_alert_rules]
}
```
**Step 2: Uncomment the Loki NFS PV (lines 14-32)**
Uncomment the `kubernetes_persistent_volume.loki` resource as-is.
**Step 3: Remove the minio PV block (lines 34-52)**
Delete the entire `kubernetes_persistent_volume.loki-minio` commented block — minio is disabled.
**Step 4: Run terraform fmt**
Run: `terraform fmt -recursive modules/kubernetes/monitoring/`
**Step 5: Commit**
```bash
git add modules/kubernetes/monitoring/loki.tf
git commit -m "[ci skip] Enable Loki Helm release and NFS PV"
```
---
### Task 5: Uncomment Alloy Helm release in loki.tf
Enable the Alloy Helm release.
**Files:**
- Modify: `modules/kubernetes/monitoring/loki.tf` (uncomment Alloy helm release)
**Step 1: Uncomment and update the Alloy Helm release**
Replace the commented Alloy block with:
```hcl
resource "helm_release" "alloy" {
namespace = kubernetes_namespace.monitoring.metadata[0].name
create_namespace = true
name = "alloy"
repository = "https://grafana.github.io/helm-charts"
chart = "alloy"
values = [file("${path.module}/alloy.yaml")]
atomic = true
depends_on = [helm_release.loki]
}
```
**Step 2: Run terraform fmt**
Run: `terraform fmt -recursive modules/kubernetes/monitoring/`
**Step 3: Commit**
```bash
git add modules/kubernetes/monitoring/loki.tf
git commit -m "[ci skip] Enable Alloy Helm release"
```
---
### Task 6: Add Grafana Loki datasource ConfigMap
Grafana's sidecar auto-discovers ConfigMaps with label `grafana_datasource: "1"`. Create one for Loki.
**Files:**
- Modify: `modules/kubernetes/monitoring/loki.tf` (add ConfigMap resource)
**Step 1: Add the datasource ConfigMap**
Add to `loki.tf`:
```hcl
resource "kubernetes_config_map" "grafana_loki_datasource" {
metadata {
name = "grafana-loki-datasource"
namespace = kubernetes_namespace.monitoring.metadata[0].name
labels = {
grafana_datasource = "1"
}
}
data = {
"loki-datasource.yaml" = yamlencode({
apiVersion = 1
datasources = [{
name = "Loki"
type = "loki"
access = "proxy"
url = "http://loki.monitoring.svc.cluster.local:3100"
isDefault = false
}]
})
}
}
```
**Step 2: Run terraform fmt**
Run: `terraform fmt -recursive modules/kubernetes/monitoring/`
**Step 3: Commit**
```bash
git add modules/kubernetes/monitoring/loki.tf
git commit -m "[ci skip] Add Grafana Loki datasource ConfigMap"
```
---
### Task 7: Add Loki alert rules ConfigMap
Create the ConfigMap that Loki's ruler reads for alert rules. Mounted into the Loki pod at `/loki/rules/fake/`.
**Files:**
- Modify: `modules/kubernetes/monitoring/loki.tf` (add alert rules ConfigMap)
**Step 1: Add the alert rules ConfigMap**
Add to `loki.tf`:
```hcl
resource "kubernetes_config_map" "loki_alert_rules" {
metadata {
name = "loki-alert-rules"
namespace = kubernetes_namespace.monitoring.metadata[0].name
}
data = {
"rules.yaml" = yamlencode({
groups = [{
name = "log-alerts"
rules = [
{
alert = "HighErrorRate"
expr = "sum(rate({namespace=~\".+\"} |= \"error\" [5m])) by (namespace) > 10"
for = "5m"
labels = {
severity = "warning"
}
annotations = {
summary = "High error rate in {{ $labels.namespace }}"
}
},
{
alert = "PodCrashLoopBackOff"
expr = "count_over_time({namespace=~\".+\"} |= \"CrashLoopBackOff\" [5m]) > 0"
for = "1m"
labels = {
severity = "critical"
}
annotations = {
summary = "CrashLoopBackOff detected in {{ $labels.namespace }}"
}
},
{
alert = "OOMKilled"
expr = "count_over_time({namespace=~\".+\"} |= \"OOMKilled\" [5m]) > 0"
for = "1m"
labels = {
severity = "critical"
}
annotations = {
summary = "OOMKilled detected in {{ $labels.namespace }}"
}
}
]
}]
})
}
}
```
**Step 2: Run terraform fmt**
Run: `terraform fmt -recursive modules/kubernetes/monitoring/`
**Step 3: Commit**
```bash
git add modules/kubernetes/monitoring/loki.tf
git commit -m "[ci skip] Add Loki alert rules ConfigMap"
```
---
### Task 8: Deploy and verify
Apply all changes via Terraform and verify the stack is working.
**Files:** None (deployment only)
**Step 1: Run terraform apply for monitoring module**
Run: `terraform apply -target=module.kubernetes_cluster.module.monitoring -var="kube_config_path=$(pwd)/config" -auto-approve`
Expected: Multiple resources created (sysctl DaemonSet, Loki Helm release, Alloy Helm release, PV, ConfigMaps)
**Step 2: Verify sysctl DaemonSet is running on all nodes**
Run: `kubectl --kubeconfig $(pwd)/config get ds -n monitoring sysctl-inotify`
Expected: DESIRED=5, CURRENT=5, READY=5
**Step 3: Verify Loki pod is running**
Run: `kubectl --kubeconfig $(pwd)/config get pods -n monitoring -l app.kubernetes.io/name=loki`
Expected: 1/1 Running
**Step 4: Verify Alloy DaemonSet is running**
Run: `kubectl --kubeconfig $(pwd)/config get ds -n monitoring -l app.kubernetes.io/name=alloy`
Expected: DESIRED=5, CURRENT=5, READY=5
**Step 5: Verify Loki is receiving logs**
Run: `kubectl --kubeconfig $(pwd)/config exec -n monitoring deploy/loki -- wget -qO- 'http://localhost:3100/loki/api/v1/labels'`
Expected: JSON response with labels like `namespace`, `pod`, `container`
**Step 6: Verify Grafana has Loki datasource**
Open `https://grafana.viktorbarzin.me/explore`, select "Loki" datasource, run query: `{namespace="monitoring"}`
Expected: Log lines from monitoring namespace pods
**Step 7: Commit final state**
```bash
git add -A
git commit -m "[ci skip] Deploy centralized log collection (Loki + Alloy)"
```
---
### Troubleshooting
**If Alloy pods crash with inotify errors:**
- Check sysctl DaemonSet init logs: `kubectl --kubeconfig $(pwd)/config logs -n monitoring ds/sysctl-inotify -c sysctl`
- Verify sysctl values on node: `kubectl --kubeconfig $(pwd)/config debug node/k8s-node2 -it --image=busybox -- sysctl fs.inotify.max_user_watches`
**If Loki OOMs:**
- Check memory usage: `kubectl --kubeconfig $(pwd)/config top pod -n monitoring -l app.kubernetes.io/name=loki`
- Reduce `max_chunk_age` from 24h to 12h in `loki.yaml` to flush more frequently
**If Grafana doesn't show Loki datasource:**
- Verify ConfigMap has correct label: `kubectl --kubeconfig $(pwd)/config get cm -n monitoring grafana-loki-datasource -o yaml`
- Restart Grafana sidecar: `kubectl --kubeconfig $(pwd)/config rollout restart deploy -n monitoring grafana`
**If Loki PV won't bind:**
- Check NFS export exists: `ssh root@10.0.10.15 'showmount -e localhost | grep loki'`
- Run NFS export script: `cd secrets && bash nfs_exports.sh`

View file

@ -1,154 +0,0 @@
# Multi-User Kubernetes Access Design
**Date**: 2026-02-17
**Status**: Approved
## Problem
The cluster uses a single `kubernetes-admin` client certificate for all access. There is no way to:
- Give different users different levels of access
- Track who performed which actions
- Enforce resource limits per user
- Onboard new users without sharing admin credentials
## Decision
Native OIDC authentication on the kube-apiserver using Authentik as the identity provider, with Terraform-managed RBAC and a self-service Svelte portal for user onboarding.
### Alternatives Considered
1. **Pinniped (Concierge + Supervisor)**: Avoids API server changes but adds two components to maintain. Requires Pinniped CLI on user machines. Overkill for a single-cluster setup.
2. **kube-oidc-proxy**: Avoids API server changes but adds a proxy in the request path (single point of failure, extra latency). Sporadic maintenance from JetStack.
## Architecture
```
User → Self-Service Portal → Authentik Login → Download Kubeconfig
User → kubectl (with kubelogin) → kube-apiserver → OIDC validation → Authentik
RBAC evaluation
Audit logging → Alloy → Loki → Grafana
```
### User Roles
| Role | Scope | Access |
|------|-------|--------|
| `admin` | Cluster-wide | Full `cluster-admin` access |
| `power-user` | Cluster-wide | Deploy/manage workloads, view all resources, no RBAC/node modification |
| `namespace-owner` | Specific namespaces | Full `admin` within assigned namespaces only |
## Components
### 1. Authentik OIDC Provider
New OAuth2/OIDC application in Authentik configured via Terraform (`modules/kubernetes/authentik/`).
- **Application name**: `kubernetes`
- **Provider type**: OAuth2/OpenID Connect
- **Client type**: Public (no client secret, kubelogin uses PKCE)
- **Redirect URIs**: `http://localhost:8000/callback` (kubelogin default)
- **Scopes**: `openid`, `email`, `profile`, `groups`
- **Property mappings**: Include `groups` claim for RBAC group matching
### 2. kube-apiserver OIDC Flags
One-time change on k8s-master (`10.0.20.100`), automated via Terraform `null_resource` with `remote-exec`.
Added to `/etc/kubernetes/manifests/kube-apiserver.yaml`:
```yaml
- --oidc-issuer-url=https://authentik.viktorbarzin.me/application/o/kubernetes/
- --oidc-client-id=kubernetes
- --oidc-username-claim=email
- --oidc-groups-claim=groups
```
Kubelet auto-restarts the API server pod when the manifest changes. These flags persist through `kubeadm upgrade apply`.
### 3. RBAC (Terraform-managed)
New module: `modules/kubernetes/rbac/main.tf`
**User definition** in `terraform.tfvars`:
```hcl
k8s_users = {
"viktor" = {
role = "admin"
email = "viktor@viktorbarzin.me"
}
"alice" = {
role = "power-user"
email = "alice@example.com"
}
"bob" = {
role = "namespace-owner"
namespaces = ["bob-apps", "bob-dev"]
email = "bob@example.com"
}
}
```
**Resources created per role:**
| Role | Terraform Resources |
|------|-------------------|
| `admin` | `ClusterRoleBinding``cluster-admin` for user email |
| `power-user` | Custom `ClusterRole` (workload management, no RBAC/node access) + `ClusterRoleBinding` |
| `namespace-owner` | `Namespace`(s) + `RoleBinding` → built-in `admin` ClusterRole + `ResourceQuota` per namespace |
### 4. Self-Service Portal
Svelte (SvelteKit) app at `https://k8s-portal.viktorbarzin.me`.
**Flow:**
1. User visits portal → Authentik login via Traefik forward auth
2. Portal displays user's role and assigned namespaces
3. User downloads pre-configured kubeconfig (generated server-side)
4. Portal shows setup instructions (install kubectl + kubelogin)
**Kubeconfig template** includes:
- Cluster: `https://10.0.20.100:6443` with CA cert
- Auth: `exec` credential plugin pointing to kubelogin
- OIDC issuer URL and client ID pre-configured
**Deployment**: Standard Kubernetes deployment + service + ingress, Terraform-managed like other services. No database needed — user role info read from Kubernetes RBAC bindings or a Terraform-generated ConfigMap.
### 5. Audit Logging
Kubernetes audit policy deployed to master via the same `null_resource`.
**Policy** (`/etc/kubernetes/audit-policy.yaml`):
- `RequestResponse` level for OIDC-authenticated users (captures what they changed)
- `Metadata` level for system/service accounts (keeps volume down)
- Secrets logged at `Metadata` level only (no request/response bodies)
**Log pipeline**: Audit log file → Alloy (DaemonSet on master) → Loki → Grafana dashboard
**Grafana dashboard** shows: who accessed what resource, when, from where, and the outcome (allow/deny).
### 6. Resource Quotas
Each namespace-owner namespace gets a `ResourceQuota`:
```hcl
requests.cpu = "2"
requests.memory = "4Gi"
limits.cpu = "4"
limits.memory = "8Gi"
pods = "20"
```
Defaults can be overridden per-user via an optional `quota` field in the `k8s_users` variable.
## Implementation Order
1. Authentik OIDC application setup
2. kube-apiserver OIDC flag configuration
3. RBAC Terraform module
4. Audit logging
5. Self-service portal
6. Grafana dashboard for audit logs

File diff suppressed because it is too large Load diff

View file

@ -1,111 +0,0 @@
# OpenClaw Cluster Management Agent — Design
**Date**: 2026-02-21
**Status**: Approved
## Goal
Build a proactive cluster management agent that runs scheduled health checks every 30 minutes, auto-fixes safe issues, and alerts via Slack. The agent is "taught" via an OpenClaw skill and a reusable health check script.
## Architecture
```
CronJob (every 30min)
└─ kubectl exec into OpenClaw pod
└─ /workspace/infra/.claude/cluster-health.sh
├─ kubectl get nodes (check health)
├─ kubectl get pods -A (find problems)
├─ kubectl delete pod (evicted/stuck)
└─ curl Slack webhook (report)
```
Interactive path: User asks OpenClaw via UI -> `cluster-health` skill triggers -> runs same script -> LLM analyzes output and can do deeper investigation.
## Components
### 1. `cluster-health` skill (`.claude/skills/cluster-health/SKILL.md`)
Teaches OpenClaw:
- What health checks to run
- What's safe to auto-fix vs alert-only
- How to format Slack alerts
- How to do deeper investigation when asked interactively
Trigger conditions: "check cluster", "cluster health", "what's wrong", "health check", etc.
### 2. `cluster-health.sh` helper script (`.claude/cluster-health.sh`)
Reusable script that performs all checks:
**Checks:**
- Node health (NotReady, MemoryPressure, DiskPressure, PIDPressure)
- Pod health (CrashLoopBackOff, ImagePullBackOff, Error, OOMKilled, Pending)
- Evicted pods
- Failed deployments (unavailable replicas)
- Pending PVCs
- Resource pressure (high CPU/memory allocation)
- Failed CronJobs
- DaemonSet health (missing pods)
**Safe auto-fix actions:**
- Delete evicted pods
- Delete completed/succeeded pods older than 24h
- Restart (delete) pods in CrashLoopBackOff for more than 1 hour
**Alert-only (never auto-fix):**
- Node NotReady
- Persistent OOMKilled
- ImagePullBackOff
- Pending PVCs
- Failed deployments with 0 available replicas
**Output:**
- Structured text summary
- Posts to Slack via webhook
- Exit code 0 = healthy, 1 = issues found
### 3. Kubernetes CronJob (in `modules/kubernetes/openclaw/main.tf`)
- Schedule: `*/30 * * * *`
- Container: `bitnami/kubectl` (minimal image with kubectl)
- Command: `kubectl exec deploy/openclaw -n openclaw -- /bin/bash /workspace/infra/.claude/cluster-health.sh`
- ServiceAccount with RBAC to exec into pods in `openclaw` namespace
- `concurrencyPolicy: Forbid`
- `failedJobsHistoryLimit: 3`
- `successfulJobsHistoryLimit: 3`
### 4. Slack Integration
- Webhook URL from `openclaw_skill_secrets["slack"]` (already configured)
- Passed as `SLACK_WEBHOOK_URL` env var to the OpenClaw pod
## Slack Message Format
```
:white_check_mark: Cluster Health Check — All Clear
Nodes: 5/5 Ready | Pods: 142 Running | 0 Issues
```
```
:warning: Cluster Health Check — 3 Issues Found
Auto-fixed:
- Deleted 4 evicted pods in monitoring namespace
- Restarted stuck pod calibre-web-xyz (CrashLoopBackOff >1h)
Needs attention:
- Node k8s-node3: MemoryPressure condition detected
- PVC data-tandoor pending for 45 minutes
```
## Decisions
| Decision | Choice | Rationale |
|----------|--------|-----------|
| Mode | Proactive (scheduled) | Want automated monitoring |
| Alert channel | Slack | Existing webhook in openclaw_skill_secrets |
| Auto-fix | Safe fixes only | Delete evicted, restart stuck; alert for the rest |
| Frequency | 30 minutes | Balance between detection speed and overhead |
| Checks scope | Standard K8s health | Pod/node/deployment/PVC/CronJob/DaemonSet |
| Trigger mechanism | CronJob execs into OpenClaw pod | Reuses OpenClaw's tools; LLM available interactively |
| Fallback | None | Uptime Kuma monitors OpenClaw availability |

View file

@ -1,800 +0,0 @@
# OpenClaw Cluster Management Agent — Implementation Plan
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
**Goal:** Build a proactive cluster health agent — a skill that teaches OpenClaw to check the cluster, a helper script that runs the checks and posts to Slack, and a CronJob that triggers it every 30 minutes via `kubectl exec`.
**Architecture:** CronJob (bitnami/kubectl) -> `kubectl exec` into OpenClaw pod -> runs `cluster-health.sh` which performs 8 health checks, auto-fixes safe issues, and posts a summary to Slack. The same script is available as an OpenClaw skill for interactive use.
**Tech Stack:** Bash (health check script), Terraform/HCL (CronJob + RBAC), Slack webhook API, kubectl
---
### Task 1: Add Slack webhook to openclaw_skill_secrets
**Files:**
- Modify: `terraform.tfvars:1291-1295` (add slack_webhook key)
- Modify: `modules/kubernetes/openclaw/main.tf:350-376` (add SLACK_WEBHOOK_URL env var)
**Step 1: Add slack_webhook to openclaw_skill_secrets in tfvars**
Add a new key `slack_webhook` to the existing `openclaw_skill_secrets` map. The user must provide the webhook URL. For now, use the existing `alertmanager_slack_api_url` value or a dedicated one.
In `terraform.tfvars`, change:
```hcl
openclaw_skill_secrets = {
home_assistant_token = "..."
home_assistant_sofia_token = "..."
uptime_kuma_password = "..."
}
```
to:
```hcl
openclaw_skill_secrets = {
home_assistant_token = "..."
home_assistant_sofia_token = "..."
uptime_kuma_password = "..."
slack_webhook = "https://hooks.slack.com/services/YOUR/WEBHOOK/URL"
}
```
**NOTE:** Ask the user which Slack webhook URL to use. Candidates:
- `alertmanager_slack_api_url` (line 4 in tfvars)
- `tiny_tuya_slack_url` (line 1213, comment says "K8s bot slack")
- A new webhook the user creates
**Step 2: Add SLACK_WEBHOOK_URL env var to OpenClaw container**
In `modules/kubernetes/openclaw/main.tf`, add after the `UPTIME_KUMA_PASSWORD` env block (around line 370):
```hcl
# Skill secrets - Slack
env {
name = "SLACK_WEBHOOK_URL"
value = var.skill_secrets["slack_webhook"]
}
```
**Step 3: Commit**
```bash
git add modules/kubernetes/openclaw/main.tf
git commit -m "[ci skip] Add Slack webhook env var to OpenClaw deployment"
```
Do NOT commit `terraform.tfvars` separately — it will be committed with the full set of changes at the end.
---
### Task 2: Create the cluster-health.sh helper script
**Files:**
- Create: `.claude/cluster-health.sh`
**Step 1: Write the health check script**
Create `.claude/cluster-health.sh` with the following structure. The script:
- Uses `$KUBECONFIG` (already set in OpenClaw pod) or falls back to in-cluster config
- Runs 8 checks: nodes, pods, evicted, deployments, PVCs, resources, CronJobs, DaemonSets
- Auto-fixes: deletes evicted pods, restarts CrashLoopBackOff pods stuck >1 hour
- Posts structured Slack message via `$SLACK_WEBHOOK_URL`
- Exit code 0 = healthy, 1 = issues found, 2 = critical
```bash
#!/usr/bin/env bash
# Cluster health check script for OpenClaw.
# Runs health checks, auto-fixes safe issues, posts to Slack.
# Designed to run inside the OpenClaw pod (has kubectl via $KUBECONFIG).
#
# Usage: ./cluster-health.sh [--no-slack] [--no-fix]
# --no-slack Skip Slack notification (useful for interactive/debug runs)
# --no-fix Skip auto-fix actions (report only)
set -euo pipefail
SEND_SLACK=true
AUTO_FIX=true
ISSUES=()
FIXES=()
WARNINGS=()
# --- Argument parsing ---
for arg in "$@"; do
case "$arg" in
--no-slack) SEND_SLACK=false ;;
--no-fix) AUTO_FIX=false ;;
esac
done
KUBECTL="kubectl"
# --- 1. Node Health ---
check_nodes() {
local nodes not_ready
nodes=$($KUBECTL get nodes --no-headers 2>&1) || { ISSUES+=("Cannot reach cluster API"); return; }
not_ready=$(echo "$nodes" | awk '$2 != "Ready" {print $1}' || true)
if [[ -n "$not_ready" ]]; then
while IFS= read -r node; do
ISSUES+=("Node NotReady: $node")
done <<< "$not_ready"
fi
# Check conditions
local conditions
conditions=$($KUBECTL get nodes -o json | python3 -c '
import json, sys
data = json.load(sys.stdin)
for node in data["items"]:
name = node["metadata"]["name"]
for c in node["status"]["conditions"]:
if c["type"] in ("MemoryPressure","DiskPressure","PIDPressure") and c["status"] == "True":
print(name + ": " + c["type"])
' 2>/dev/null) || true
if [[ -n "$conditions" ]]; then
while IFS= read -r line; do
ISSUES+=("$line")
done <<< "$conditions"
fi
}
# --- 2. Pod Health ---
check_pods() {
local bad
bad=$( {
$KUBECTL get pods -A --no-headers 2>/dev/null \
| grep -E 'CrashLoopBackOff|ImagePullBackOff|ErrImagePull|Error' || true
} | awk '!seen[$1,$2]++' | sed '/^$/d') || true
if [[ -z "$bad" ]]; then return; fi
while IFS= read -r line; do
local ns pod status
ns=$(echo "$line" | awk '{print $1}')
pod=$(echo "$line" | awk '{print $2}')
status=$(echo "$line" | awk '{print $4}')
if [[ "$status" == "CrashLoopBackOff" ]]; then
# Check if stuck for >1 hour
local restart_count
restart_count=$(echo "$line" | awk '{print $5}')
if [[ "$AUTO_FIX" == true && "$restart_count" -gt 10 ]]; then
$KUBECTL delete pod -n "$ns" "$pod" --grace-period=30 2>/dev/null && \
FIXES+=("Restarted $ns/$pod (CrashLoopBackOff, $restart_count restarts)") || \
WARNINGS+=("Failed to restart $ns/$pod")
else
ISSUES+=("CrashLoopBackOff: $ns/$pod ($restart_count restarts)")
fi
elif [[ "$status" == "ImagePullBackOff" || "$status" == "ErrImagePull" ]]; then
ISSUES+=("ImagePullBackOff: $ns/$pod")
else
ISSUES+=("Error: $ns/$pod ($status)")
fi
done <<< "$bad"
}
# --- 3. Evicted/Failed Pods ---
check_evicted() {
local evicted count
evicted=$($KUBECTL get pods -A --no-headers --field-selector=status.phase=Failed 2>/dev/null || true)
if [[ -z "$evicted" ]]; then return; fi
count=$(echo "$evicted" | wc -l | tr -d ' ')
if [[ "$AUTO_FIX" == true && "$count" -gt 0 ]]; then
$KUBECTL delete pods -A --field-selector=status.phase=Failed 2>/dev/null && \
FIXES+=("Deleted $count evicted/failed pod(s)") || \
WARNINGS+=("Failed to delete evicted pods")
else
ISSUES+=("$count evicted/failed pod(s)")
fi
}
# --- 4. Failed Deployments ---
check_deployments() {
local deps
deps=$($KUBECTL get deployments -A --no-headers 2>/dev/null) || return
while IFS= read -r line; do
local ns name ready current desired
ns=$(echo "$line" | awk '{print $1}')
name=$(echo "$line" | awk '{print $2}')
ready=$(echo "$line" | awk '{print $3}')
current=$(echo "$ready" | cut -d/ -f1)
desired=$(echo "$ready" | cut -d/ -f2)
if [[ "$current" != "$desired" ]]; then
ISSUES+=("Deployment $ns/$name: $current/$desired ready")
fi
done <<< "$deps"
}
# --- 5. Pending PVCs ---
check_pvcs() {
local pvcs
pvcs=$($KUBECTL get pvc -A --no-headers 2>/dev/null) || return
if [[ -z "$pvcs" || "$pvcs" == *"No resources found"* ]]; then return; fi
while IFS= read -r line; do
local ns name status
ns=$(echo "$line" | awk '{print $1}')
name=$(echo "$line" | awk '{print $2}')
status=$(echo "$line" | awk '{print $3}')
if [[ "$status" != "Bound" ]]; then
ISSUES+=("PVC $ns/$name: $status")
fi
done <<< "$pvcs"
}
# --- 6. Resource Pressure ---
check_resources() {
local top
top=$($KUBECTL top nodes --no-headers 2>/dev/null) || return
while IFS= read -r line; do
local node cpu_pct mem_pct
node=$(echo "$line" | awk '{print $1}')
cpu_pct=$(echo "$line" | awk '{print $3}' | tr -d '%')
mem_pct=$(echo "$line" | awk '{print $5}' | tr -d '%')
[[ "$cpu_pct" == *"unknown"* || "$mem_pct" == *"unknown"* ]] && continue
if [[ "$cpu_pct" -gt 90 || "$mem_pct" -gt 90 ]]; then
ISSUES+=("High resource usage on $node: CPU ${cpu_pct}%, Mem ${mem_pct}%")
elif [[ "$cpu_pct" -gt 80 || "$mem_pct" -gt 80 ]]; then
WARNINGS+=("Elevated resource usage on $node: CPU ${cpu_pct}%, Mem ${mem_pct}%")
fi
done <<< "$top"
}
# --- 7. CronJob Failures ---
check_cronjobs() {
local failures
failures=$($KUBECTL get jobs -A -o json 2>/dev/null | python3 -c '
import json, sys
from datetime import datetime, timezone, timedelta
data = json.load(sys.stdin)
cutoff = datetime.now(timezone.utc) - timedelta(hours=24)
for job in data.get("items", []):
meta = job.get("metadata", {})
ns = meta.get("namespace", "")
name = meta.get("name", "")
owners = meta.get("ownerReferences", [])
if not any(o.get("kind") == "CronJob" for o in owners):
continue
for c in job.get("status", {}).get("conditions", []):
if c.get("type") == "Failed" and c.get("status") == "True":
ts = c.get("lastTransitionTime", "")
if ts:
try:
t = datetime.fromisoformat(ts.replace("Z", "+00:00"))
if t > cutoff:
print(f"{ns}/{name}")
except:
print(f"{ns}/{name}")
' 2>/dev/null) || true
if [[ -n "$failures" ]]; then
local count
count=$(echo "$failures" | wc -l | tr -d ' ')
ISSUES+=("$count CronJob failure(s) in last 24h")
fi
}
# --- 8. DaemonSet Health ---
check_daemonsets() {
local ds
ds=$($KUBECTL get daemonsets -A --no-headers 2>/dev/null) || return
while IFS= read -r line; do
local ns name desired ready
ns=$(echo "$line" | awk '{print $1}')
name=$(echo "$line" | awk '{print $2}')
desired=$(echo "$line" | awk '{print $3}')
ready=$(echo "$line" | awk '{print $5}')
if [[ "$desired" != "$ready" ]]; then
ISSUES+=("DaemonSet $ns/$name: desired=$desired ready=$ready")
fi
done <<< "$ds"
}
# --- Cluster summary stats ---
get_summary_stats() {
local node_count ready_count pod_count
node_count=$($KUBECTL get nodes --no-headers 2>/dev/null | wc -l | tr -d ' ')
ready_count=$($KUBECTL get nodes --no-headers 2>/dev/null | awk '$2 == "Ready"' | wc -l | tr -d ' ')
pod_count=$($KUBECTL get pods -A --no-headers --field-selector=status.phase=Running 2>/dev/null | wc -l | tr -d ' ')
echo "${ready_count}/${node_count} nodes | ${pod_count} pods running"
}
# --- Send Slack message ---
send_slack() {
local webhook_url="$SLACK_WEBHOOK_URL"
if [[ -z "${webhook_url:-}" ]]; then
echo "WARNING: SLACK_WEBHOOK_URL not set, skipping Slack notification"
return
fi
local summary issue_count fix_count warning_count
summary=$(get_summary_stats)
issue_count=${#ISSUES[@]}
fix_count=${#FIXES[@]}
warning_count=${#WARNINGS[@]}
local text=""
local total_problems=$((issue_count + warning_count))
if [[ "$total_problems" -eq 0 && "$fix_count" -eq 0 ]]; then
text=":white_check_mark: *Cluster Health Check — All Clear*\n${summary} | 0 issues"
else
if [[ "$issue_count" -gt 0 ]]; then
text=":rotating_light: *Cluster Health Check — ${issue_count} Issue(s) Found*\n${summary}"
elif [[ "$warning_count" -gt 0 ]]; then
text=":warning: *Cluster Health Check — ${warning_count} Warning(s)*\n${summary}"
else
text=":white_check_mark: *Cluster Health Check — All Clear (auto-fixed ${fix_count})*\n${summary}"
fi
if [[ "$fix_count" -gt 0 ]]; then
text+="\n\n*Auto-fixed:*"
for fix in "${FIXES[@]}"; do
text+="\n• ${fix}"
done
fi
if [[ "$issue_count" -gt 0 ]]; then
text+="\n\n*Needs attention:*"
for issue in "${ISSUES[@]}"; do
text+="\n• ${issue}"
done
fi
if [[ "$warning_count" -gt 0 ]]; then
text+="\n\n*Warnings:*"
for warning in "${WARNINGS[@]}"; do
text+="\n• ${warning}"
done
fi
fi
curl -s -X POST "$webhook_url" \
-H 'Content-Type: application/json' \
-d "{\"text\": \"${text}\"}" > /dev/null 2>&1
}
# --- Main ---
main() {
echo "=== Cluster Health Check — $(date '+%Y-%m-%d %H:%M:%S') ==="
check_nodes
check_pods
check_evicted
check_deployments
check_pvcs
check_resources
check_cronjobs
check_daemonsets
local issue_count=${#ISSUES[@]}
local fix_count=${#FIXES[@]}
local warning_count=${#WARNINGS[@]}
echo ""
echo "Results: ${issue_count} issue(s), ${fix_count} fix(es), ${warning_count} warning(s)"
if [[ "$fix_count" -gt 0 ]]; then
echo ""
echo "Auto-fixed:"
for fix in "${FIXES[@]}"; do echo " - $fix"; done
fi
if [[ "$issue_count" -gt 0 ]]; then
echo ""
echo "Issues:"
for issue in "${ISSUES[@]}"; do echo " - $issue"; done
fi
if [[ "$warning_count" -gt 0 ]]; then
echo ""
echo "Warnings:"
for warning in "${WARNINGS[@]}"; do echo " - $warning"; done
fi
if [[ "$SEND_SLACK" == true ]]; then
send_slack
echo ""
echo "Slack notification sent."
fi
# Exit code
if [[ "$issue_count" -gt 0 ]]; then
exit 1
fi
exit 0
}
main "$@"
```
**Step 2: Make it executable**
```bash
chmod +x .claude/cluster-health.sh
```
**Step 3: Test locally (dry run)**
```bash
KUBECONFIG=$(pwd)/config SLACK_WEBHOOK_URL="" bash .claude/cluster-health.sh --no-slack
```
Expected: Script runs, prints check results, no Slack post.
**Step 4: Commit**
```bash
git add .claude/cluster-health.sh
git commit -m "[ci skip] Add cluster health check script for OpenClaw agent"
```
---
### Task 3: Create the cluster-health skill
**Files:**
- Create: `.claude/skills/cluster-health/SKILL.md`
**Step 1: Write the skill document**
```markdown
---
name: cluster-health
description: |
Check Kubernetes cluster health and fix common issues. Use when:
(1) User asks to check the cluster, check health, or "what's wrong",
(2) User asks about pod status, node health, or deployment issues,
(3) User asks to fix stuck pods, evicted pods, or CrashLoopBackOff,
(4) User mentions "health check", "cluster status", "cluster health",
(5) User asks "is everything running" or "any problems".
Runs 8 standard K8s health checks with safe auto-fix for evicted pods
and stuck CrashLoopBackOff pods.
author: Claude Code
version: 1.0.0
date: 2026-02-21
---
# Cluster Health Check
## Overview
- **Script**: `/workspace/infra/.claude/cluster-health.sh`
- **Schedule**: CronJob runs every 30 minutes, execs into this pod
- **Slack**: Posts results to `$SLACK_WEBHOOK_URL`
- **Auto-fix**: Deletes evicted pods, restarts CrashLoopBackOff pods (>10 restarts)
## Quick Check
Run the health check script:
```bash
bash /workspace/infra/.claude/cluster-health.sh --no-slack
```
Or with Slack notification:
```bash
bash /workspace/infra/.claude/cluster-health.sh
```
Report-only (no auto-fix):
```bash
bash /workspace/infra/.claude/cluster-health.sh --no-fix
```
## What It Checks
| # | Check | Auto-Fix | Alert |
|---|-------|----------|-------|
| 1 | Node health (NotReady, conditions) | No | Yes |
| 2 | Pod health (CrashLoopBackOff, ImagePullBackOff, Error) | Restart if >10 restarts | Yes |
| 3 | Evicted/failed pods | Delete all | Yes |
| 4 | Deployment availability (current != desired) | No | Yes |
| 5 | PVC status (not Bound) | No | Yes |
| 6 | Resource pressure (CPU/Mem >80%) | No | Yes |
| 7 | CronJob failures (last 24h) | No | Yes |
| 8 | DaemonSet health (desired != ready) | No | Yes |
## Safe Auto-Fix Rules
These are the ONLY things the script auto-fixes:
1. **Evicted/failed pods**: `kubectl delete pods -A --field-selector=status.phase=Failed`
2. **CrashLoopBackOff pods with >10 restarts**: `kubectl delete pod -n <ns> <pod> --grace-period=30`
Everything else is alert-only. NEVER auto-fix:
- Node NotReady (could be maintenance)
- ImagePullBackOff (needs image tag or registry fix)
- Pending PVCs (needs storage investigation)
- Failed deployments (needs config investigation)
## Deep Investigation
When the script reports issues and the user asks for more detail, use these commands:
### Node issues
```bash
kubectl describe node <node-name>
kubectl top node <node-name>
kubectl get events --field-selector involvedObject.name=<node-name>
```
### Pod issues
```bash
kubectl describe pod -n <namespace> <pod-name>
kubectl logs -n <namespace> <pod-name> --tail=100
kubectl logs -n <namespace> <pod-name> --previous --tail=100
kubectl get events -n <namespace> --field-selector involvedObject.name=<pod-name>
```
### Deployment issues
```bash
kubectl describe deployment -n <namespace> <deployment-name>
kubectl rollout status deployment -n <namespace> <deployment-name>
kubectl rollout history deployment -n <namespace> <deployment-name>
```
### PVC issues
```bash
kubectl describe pvc -n <namespace> <pvc-name>
kubectl get pv
kubectl get events -n <namespace> --field-selector involvedObject.name=<pvc-name>
```
### Resource pressure
```bash
kubectl top nodes
kubectl top pods -A --sort-by=memory | head -20
kubectl top pods -A --sort-by=cpu | head -20
```
## Common Remediation
### CrashLoopBackOff (persistent)
1. Check logs: `kubectl logs -n <ns> <pod> --previous --tail=100`
2. Check events: `kubectl describe pod -n <ns> <pod>`
3. Common causes: OOMKilled (increase memory limit), bad config, missing env var
4. If image issue: check if newer image exists, update in Terraform
### OOMKilled
1. Check current limits: `kubectl describe pod -n <ns> <pod> | grep -A2 Limits`
2. Fix: Update resource limits in Terraform module for the service
3. Apply: `terraform apply -target=module.kubernetes_cluster.module.<service> -var="kube_config_path=$(pwd)/config"`
### ImagePullBackOff
1. Check image: `kubectl describe pod -n <ns> <pod> | grep Image`
2. Check registry: Is the image tag valid? Is the registry reachable?
3. Check pull-through cache: Docker registry at 10.0.20.10
### Node NotReady
1. Check kubelet: SSH to node, `systemctl status kubelet`
2. Check resources: `kubectl top node <node>`
3. Check conditions: `kubectl describe node <node> | grep -A10 Conditions`
## Slack Webhook
Messages are posted to the webhook at `$SLACK_WEBHOOK_URL`. Format:
- All clear: green check + summary stats
- Issues found: red siren + list of issues + auto-fix actions taken
- Warnings only: yellow warning + elevated metrics
## Infrastructure
- **Terraform module**: `modules/kubernetes/openclaw/main.tf`
- **CronJob**: Runs in `openclaw` namespace every 30 min
- **Existing healthcheck**: `scripts/cluster_healthcheck.sh` (local-only, not for OpenClaw)
- **Repo path inside pod**: `/workspace/infra/`
```
**Step 2: Commit**
```bash
git add .claude/skills/cluster-health/SKILL.md
git commit -m "[ci skip] Add cluster-health skill for OpenClaw agent"
```
---
### Task 4: Add CronJob and RBAC to Terraform
**Files:**
- Modify: `modules/kubernetes/openclaw/main.tf` (append CronJob + ServiceAccount + Role + RoleBinding)
**Step 1: Add CronJob resources**
Append the following to `modules/kubernetes/openclaw/main.tf` after the `module "ingress"` block:
```hcl
# --- CronJob: Scheduled cluster health check ---
resource "kubernetes_service_account" "healthcheck" {
metadata {
name = "cluster-healthcheck"
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
}
resource "kubernetes_role" "healthcheck_exec" {
metadata {
name = "healthcheck-pod-exec"
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
rule {
api_groups = [""]
resources = ["pods"]
verbs = ["get", "list"]
}
rule {
api_groups = [""]
resources = ["pods/exec"]
verbs = ["create"]
}
}
resource "kubernetes_role_binding" "healthcheck_exec" {
metadata {
name = "healthcheck-pod-exec"
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
subject {
kind = "ServiceAccount"
name = kubernetes_service_account.healthcheck.metadata[0].name
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "Role"
name = kubernetes_role.healthcheck_exec.metadata[0].name
}
}
resource "kubernetes_cron_job_v1" "cluster_healthcheck" {
metadata {
name = "cluster-healthcheck"
namespace = kubernetes_namespace.openclaw.metadata[0].name
labels = {
app = "cluster-healthcheck"
tier = var.tier
}
}
spec {
schedule = "*/30 * * * *"
concurrency_policy = "Forbid"
failed_jobs_history_limit = 3
successful_jobs_history_limit = 3
job_template {
metadata {
labels = {
app = "cluster-healthcheck"
}
}
spec {
active_deadline_seconds = 300
template {
metadata {
labels = {
app = "cluster-healthcheck"
}
}
spec {
service_account_name = kubernetes_service_account.healthcheck.metadata[0].name
restart_policy = "Never"
container {
name = "healthcheck"
image = "bitnami/kubectl:1.34"
command = ["bash", "-c", <<-EOF
# Find the openclaw pod
POD=$(kubectl get pods -n openclaw -l app=openclaw -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
if [ -z "$POD" ]; then
echo "ERROR: OpenClaw pod not found"
exit 1
fi
echo "Executing health check in pod $POD..."
kubectl exec -n openclaw "$POD" -c openclaw -- bash /workspace/infra/.claude/cluster-health.sh
EOF
]
resources {
requests = {
cpu = "50m"
memory = "64Mi"
}
limits = {
memory = "128Mi"
}
}
}
}
}
}
}
}
}
```
**Step 2: Verify Terraform formatting**
```bash
terraform fmt modules/kubernetes/openclaw/main.tf
```
**Step 3: Verify Terraform plan**
```bash
terraform plan -target=module.kubernetes_cluster.module.openclaw -var="kube_config_path=$(pwd)/config"
```
Expected: Plan shows 4 new resources (ServiceAccount, Role, RoleBinding, CronJobV1). No destructive changes to existing resources.
**Step 4: Commit**
```bash
git add modules/kubernetes/openclaw/main.tf
git commit -m "[ci skip] Add cluster health check CronJob to OpenClaw module"
```
---
### Task 5: Deploy and verify
**Step 1: Apply Terraform**
```bash
terraform apply -target=module.kubernetes_cluster.module.openclaw -var="kube_config_path=$(pwd)/config" -auto-approve
```
**Step 2: Verify CronJob exists**
```bash
kubectl --kubeconfig $(pwd)/config get cronjob -n openclaw
```
Expected: `cluster-healthcheck` with schedule `*/30 * * * *`
**Step 3: Verify RBAC**
```bash
kubectl --kubeconfig $(pwd)/config get serviceaccount,role,rolebinding -n openclaw
```
Expected: `cluster-healthcheck` SA, `healthcheck-pod-exec` role and rolebinding
**Step 4: Trigger a manual run**
```bash
kubectl --kubeconfig $(pwd)/config create job --from=cronjob/cluster-healthcheck healthcheck-manual-test -n openclaw
```
**Step 5: Check job output**
```bash
kubectl --kubeconfig $(pwd)/config wait --for=condition=complete job/healthcheck-manual-test -n openclaw --timeout=120s
kubectl --kubeconfig $(pwd)/config logs job/healthcheck-manual-test -n openclaw
```
Expected: Health check output with results. If `SLACK_WEBHOOK_URL` is set, check Slack for the message.
**Step 6: Clean up test job**
```bash
kubectl --kubeconfig $(pwd)/config delete job healthcheck-manual-test -n openclaw
```
**Step 7: Final commit**
```bash
git add -A modules/kubernetes/openclaw/ .claude/skills/cluster-health/ .claude/cluster-health.sh
git commit -m "[ci skip] OpenClaw cluster health agent: script + skill + CronJob"
```

View file

@ -1,387 +0,0 @@
# Terragrunt Migration Design
**Date**: 2026-02-22
**Status**: Approved
## Problem
The infrastructure repo has a monolithic Terraform setup:
- 15MB state file, 857 resources, 85+ service modules in a single root
- `terraform plan/apply` evaluates all modules even when targeting one service
- `null_resource.core_services` bottleneck blocks 73 services behind 12 core modules
- 150+ variables passed through root -> kubernetes_cluster -> individual services
- 3 providers (kubernetes, helm, proxmox) initialize on every run
## Goals
- **Speed**: Faster plan/apply by splitting state into independent stacks
- **Blast radius isolation**: Bad apply can't break unrelated services
- **DRY config**: Shared provider/backend configuration via Terragrunt
- **Proper DAG**: Full references between stacks (not hardcoded DNS strings)
- **Bootstrappable**: `terragrunt run-all apply` works from scratch
- **CI/CD**: Changed-stack detection in Drone CI
## Architecture: Flat Stacks
### Directory Structure
```
infra/
├── terragrunt.hcl # Root config (providers, backend, common vars)
├── stacks/
│ ├── infra/ # Proxmox VMs, templates, docker-registry
│ │ ├── terragrunt.hcl
│ │ └── main.tf
│ ├── platform/ # Core: traefik, metallb, redis, dbaas, authentik, etc.
│ │ ├── terragrunt.hcl
│ │ └── main.tf
│ ├── blog/ # One dir per user service
│ │ ├── terragrunt.hcl
│ │ └── main.tf
│ ├── immich/
│ │ ├── terragrunt.hcl
│ │ └── main.tf
│ └── ... (~65 service dirs)
├── modules/ # UNCHANGED — existing modules stay where they are
│ ├── kubernetes/
│ │ ├── ingress_factory/
│ │ ├── setup_tls_secret/
│ │ ├── blog/
│ │ ├── immich/
│ │ └── ...
│ ├── create-vm/
│ └── create-template-vm/
├── state/ # Per-stack state files
│ ├── infra/terraform.tfstate
│ ├── platform/terraform.tfstate
│ ├── blog/terraform.tfstate
│ └── ...
├── terraform.tfvars # UNCHANGED — encrypted secrets
├── secrets/ # UNCHANGED — TLS certs
├── main.tf # LEGACY — gradually emptied during migration
└── terraform.tfstate # LEGACY — gradually emptied during migration
```
Each stack has a thin `main.tf` wrapper that calls the existing module via
`source = "../../modules/kubernetes/<service>"`. We do NOT use Terragrunt's
`terraform { source }` directive because our modules use relative paths
(`../ingress_factory`, `../setup_tls_secret`) that would break when Terragrunt
copies them to `.terragrunt-cache/`.
### Stack Composition
**Infra stack** (~10 resources):
- Proxmox VM templates (k8s, non-k8s, docker-registry)
- Docker registry VM
- Uses proxmox provider (not kubernetes/helm)
**Platform stack** (~200 resources, ~20 services):
- traefik, metallb, redis, dbaas, technitium, authentik, crowdsec, cloudflared
- monitoring (prometheus, alertmanager, grafana, loki, alloy)
- kyverno, metrics-server, nvidia, mailserver, authelia
- wireguard, headscale, xray, uptime-kuma, vaultwarden, reverse-proxy
- Exports outputs consumed by service stacks
**Per-service stacks** (~65, each 5-25 resources):
- One stack per user-facing service
- Each depends on platform via Terragrunt `dependency` block
- Some depend on other services (f1-stream -> coturn, etc.)
### Dependency Graph
```
┌─────────┐
│ infra │
└────┬────┘
┌────▼────┐
│platform │ exports: redis_host, postgresql_host,
│ │ mysql_host, smtp_host, tls_secret_name, ...
└────┬────┘
┌────────┬───────────┼───────────┬────────┐
│ │ │ │ │
┌────▼──┐ ┌───▼───┐ ┌────▼───┐ ┌─────▼──┐ ┌──▼───┐
│ blog │ │immich │ │ affine │ │ollama │ │coturn│ ...
└───────┘ └───────┘ └────────┘ └───┬────┘ └──┬───┘
│ │
┌────▼───┐ ┌───▼──────┐
│openclaw│ │f1-stream │
│gramps │ └──────────┘
│ytdlp │
└────────┘
```
### Platform Stack Outputs
| Output | Value | Consumers |
|--------|-------|-----------|
| `redis_host` | `redis.redis.svc.cluster.local` | 10 services |
| `postgresql_host` | `postgresql.dbaas.svc.cluster.local` | 10 services |
| `postgresql_port` | `5432` | 10 services |
| `mysql_host` | `mysql.dbaas.svc.cluster.local` | 8 services |
| `mysql_port` | `3306` | 8 services |
| `smtp_host` | `mail.viktorbarzin.me` | 6 services |
| `smtp_port` | `587` | 6 services |
| `tls_secret_name` | from variable | all services |
| `authentik_outpost_url` | `http://ak-outpost-...` | traefik |
| `crowdsec_lapi_host` | `crowdsec-service...` | traefik |
| `alertmanager_url` | `http://prometheus-alertmanager...` | loki |
| `loki_push_url` | `http://loki...` | alloy |
Service-to-service dependencies:
| Service | Depends on | Outputs consumed |
|---------|-----------|-----------------|
| f1-stream | coturn | `coturn_host`, `coturn_port` |
| real-estate-crawler | osm-routing | `osrm_foot_host`, `osrm_bicycle_host` |
| openclaw, grampsweb, ytdlp | ollama | `ollama_host` |
### Module Modifications
Service modules that hardcode DNS names need modification to accept hosts as variables.
~20 modules affected. Example for affine:
**Before:**
```hcl
# modules/kubernetes/affine/main.tf
DATABASE_URL = "postgresql://...@postgresql.dbaas.svc.cluster.local:5432/affine"
REDIS_SERVER_HOST = "redis.redis.svc.cluster.local"
```
**After:**
```hcl
variable "redis_host" { type = string }
variable "postgresql_host" { type = string }
variable "postgresql_port" { type = number }
DATABASE_URL = "postgresql://...@${var.postgresql_host}:${var.postgresql_port}/affine"
REDIS_SERVER_HOST = var.redis_host
```
## Root Terragrunt Configuration
```hcl
# infra/terragrunt.hcl
remote_state {
backend = "local"
generate = {
path = "backend.tf"
if_exists = "overwrite_terragrunt"
}
config = {
path = "${get_repo_root()}/state/${path_relative_to_include()}/terraform.tfstate"
}
}
terraform {
extra_arguments "common_vars" {
commands = get_terraform_commands_that_need_vars()
required_var_files = [
"${get_repo_root()}/terraform.tfvars"
]
}
}
generate "k8s_providers" {
path = "providers.tf"
if_exists = "overwrite_terragrunt"
contents = <<EOF
variable "kube_config_path" {
type = string
default = "~/.kube/config"
}
provider "kubernetes" {
config_path = var.kube_config_path
}
provider "helm" {
kubernetes {
config_path = var.kube_config_path
}
}
EOF
}
```
## Stack Wrapper Examples
### Simple service (blog)
```hcl
# stacks/blog/terragrunt.hcl
include "root" {
path = find_in_parent_folders()
}
dependency "platform" {
config_path = "../platform"
}
inputs = {
tls_secret_name = dependency.platform.outputs.tls_secret_name
}
```
```hcl
# stacks/blog/main.tf
variable "tls_secret_name" {}
variable "kube_config_path" { default = "~/.kube/config" }
module "blog" {
source = "../../modules/kubernetes/blog"
tls_secret_name = var.tls_secret_name
tier = "4-aux"
}
```
### Database-backed service (affine)
```hcl
# stacks/affine/terragrunt.hcl
include "root" {
path = find_in_parent_folders()
}
dependency "platform" {
config_path = "../platform"
}
inputs = {
tls_secret_name = dependency.platform.outputs.tls_secret_name
redis_host = dependency.platform.outputs.redis_host
postgresql_host = dependency.platform.outputs.postgresql_host
postgresql_port = dependency.platform.outputs.postgresql_port
smtp_host = dependency.platform.outputs.smtp_host
smtp_port = dependency.platform.outputs.smtp_port
}
```
```hcl
# stacks/affine/main.tf
variable "tls_secret_name" {}
variable "kube_config_path" { default = "~/.kube/config" }
variable "affine_postgresql_password" {}
variable "redis_host" { type = string }
variable "postgresql_host" { type = string }
variable "postgresql_port" { type = number }
variable "smtp_host" { type = string }
variable "smtp_port" { type = number }
module "affine" {
source = "../../modules/kubernetes/affine"
tls_secret_name = var.tls_secret_name
postgresql_password = var.affine_postgresql_password
redis_host = var.redis_host
postgresql_host = var.postgresql_host
postgresql_port = var.postgresql_port
smtp_host = var.smtp_host
smtp_port = var.smtp_port
tier = "4-aux"
}
```
### Service-to-service dependency (f1-stream -> coturn)
```hcl
# stacks/f1-stream/terragrunt.hcl
include "root" {
path = find_in_parent_folders()
}
dependency "platform" {
config_path = "../platform"
}
dependency "coturn" {
config_path = "../coturn"
}
inputs = {
tls_secret_name = dependency.platform.outputs.tls_secret_name
coturn_host = dependency.coturn.outputs.coturn_host
coturn_port = dependency.coturn.outputs.coturn_port
}
```
## Migration Strategy
### Phase 0: Setup
- Install Terragrunt
- Create root `terragrunt.hcl`, `stacks/`, `state/` directories
- No state changes, no risk
### Phase 1: Infra Stack (VMs)
- Create `stacks/infra/` with Proxmox provider + VM module calls
- `terraform state mv` 4 root-level module resources to `state/infra/`
- Remove from root `main.tf`
- Verify: `cd stacks/infra && terragrunt plan` shows no changes
### Phase 2: Platform Stack (Core Services)
- Create `stacks/platform/main.tf` with ~20 core services + outputs
- `terraform state mv` ~200 resources from `module.kubernetes_cluster.module.<core>`
- Remove `null_resource.core_services` (Terragrunt handles ordering)
- Verify: `cd stacks/platform && terragrunt plan` shows no changes
### Phase 3: Simple Services (No DB Dependencies)
- blog, echo, privatebin, excalidraw, city-guesser, dashy, etc.
- Create stack, move state, verify — one at a time
### Phase 4: Database-Backed Services
- Modify modules to accept hosts as variables
- affine, immich, linkwarden, nextcloud, grampsweb, etc.
- Create stack, move state, verify
### Phase 5: Service-to-Service Dependencies
- ollama -> openclaw, grampsweb, ytdlp
- coturn -> f1-stream
- osm-routing -> real-estate-crawler
### Phase 6: Cleanup
- Delete DEFCON system from `modules/kubernetes/main.tf`
- Delete legacy `terraform.tfstate`
- Delete root `main.tf` kubernetes_cluster module call
- Update CI/CD to Terragrunt
### Rollback
At any phase, `terraform state mv` resources back to monolith state and
restore module calls.
## CI/CD: Changed-Stack Detection
Drone CI pipeline detects changed files per commit and maps to affected stacks:
| Changed file | Affected stack |
|-------------|---------------|
| `stacks/blog/*` | blog |
| `modules/kubernetes/blog/*` | blog |
| `terraform.tfvars` | all stacks |
| `terragrunt.hcl` | all stacks |
| `modules/kubernetes/ingress_factory/*` | all stacks |
### Manual Workflow
```bash
# Apply single service
cd stacks/blog && terragrunt apply
# Apply everything (respects DAG ordering)
cd stacks && terragrunt run-all apply
# Plan everything
cd stacks && terragrunt run-all plan
```
## Decisions Made
| Decision | Choice | Rationale |
|----------|--------|-----------|
| Tool | Terragrunt | DRY config, dependency management, run-all orchestration |
| Stack granularity | 1 platform + 1 per service | Max isolation for apps, grouped core |
| Migration | Incremental | Lower risk, verify each step |
| Shared modules | Relative paths | Simple, no registry overhead |
| State backend | Local files | No external dependencies |
| Cross-stack refs | Full references via outputs | Proper DAG, bootstrappable from scratch |
| CI/CD | Changed-stack detection | Only apply what changed |

File diff suppressed because it is too large Load diff

326
main.tf
View file

@ -1,326 +0,0 @@
variable "prod" {
type = bool
default = false
}
variable "proxmox_pm_api_url" { type = string }
variable "proxmox_pm_api_token_id" { type = string }
variable "proxmox_pm_api_token_secret" { type = string }
variable "k8s_join_command" { type = string }
variable "vm_wizard_password" { type = string }
variable "proxmox_host" { type = string }
variable "ssh_private_key" {
type = string
default = ""
}
variable "ssh_public_key" {
type = string
default = ""
}
variable "tls_secret_name" {}
variable "tls_crt" {
default = ""
}
variable "tls_key" {
default = ""
}
variable "client_certificate_secret_name" {}
variable "mailserver_accounts" {}
variable "mailserver_aliases" {}
variable "mailserver_opendkim_key" {}
variable "mailserver_roundcubemail_db_password" { type = string }
variable "mailserver_sasl_passwd" {}
variable "pihole_web_password" {}
variable "webhook_handler_secret" {}
variable "wireguard_wg_0_conf" {}
variable "wireguard_firewall_sh" {}
variable "hackmd_db_password" {}
variable "bind_db_viktorbarzin_me" {}
variable "bind_db_viktorbarzin_lan" {}
variable "bind_named_conf_options" {}
variable "alertmanager_account_password" {}
variable "wireguard_wg_0_key" {}
variable "dbaas_root_password" {}
variable "dbaas_postgresql_root_password" {}
variable "dbaas_pgadmin_password" {}
variable "drone_github_client_id" {}
variable "drone_github_client_secret" {}
variable "drone_rpc_secret" {}
variable "drone_webhook_secret" {}
variable "dockerhub_registry_password" {}
variable "oauth2_proxy_client_id" {}
variable "oauth2_proxy_client_secret" {}
variable "oauth2_proxy_authenticated_emails" {}
variable "url_shortener_mysql_password" {}
variable "url_shortener_geolite_license_key" {}
variable "url_shortener_api_key" {}
variable "webhook_handler_fb_verify_token" {}
variable "webhook_handler_fb_page_token" {}
variable "webhook_handler_fb_app_secret" {}
variable "webhook_handler_git_user" {}
variable "technitium_username" {}
variable "technitium_password" {}
variable "technitium_db_password" {}
variable "webhook_handler_git_token" {}
variable "webhook_handler_ssh_key" {}
variable "monitoring_idrac_username" {}
variable "monitoring_idrac_password" {}
variable "alertmanager_slack_api_url" {}
variable "home_assistant_configuration" {}
variable "shadowsocks_password" {}
variable "finance_app_db_connection_string" {}
variable "finance_app_currency_converter_api_key" {}
variable "finance_app_graphql_api_secret" {}
variable "finance_app_gocardless_secret_key" {}
variable "finance_app_gocardless_secret_id" {}
variable "headscale_config" {}
variable "headscale_acl" {}
variable "immich_postgresql_password" {}
variable "immich_frame_api_key" {}
variable "ingress_crowdsec_api_key" {}
variable "crowdsec_enroll_key" { type = string }
variable "crowdsec_db_password" { type = string }
variable "crowdsec_dash_api_key" { type = string }
variable "crowdsec_dash_machine_id" { type = string }
variable "crowdsec_dash_machine_password" { type = string }
variable "vaultwarden_smtp_password" {}
variable "resume_database_url" {}
variable "resume_database_password" {}
variable "resume_redis_url" {}
variable "resume_auth_secret" { type = string }
variable "frigate_valchedrym_camera_credentials" { default = "" }
variable "paperless_db_password" {}
variable "diun_nfty_token" {}
variable "diun_slack_url" {}
variable "docker_config" {}
variable "nextcloud_db_password" {}
variable "homepage_credentials" {
type = map(any)
}
variable "authentik_secret_key" {}
variable "authentik_postgres_password" {}
variable "ansible_prefix" {
default = "ANSIBLE_VAULT_PASSWORD_FILE=~/.ansible/vault_pass.txt ansible-playbook -i playbook/hosts.yaml playbook/linux.yml -t linux/initial_setup"
description = "Provisioner command"
}
variable "linkwarden_postgresql_password" {}
variable "linkwarden_authentik_client_id" {}
variable "linkwarden_authentik_client_secret" {}
variable "cloudflare_api_key" {}
variable "cloudflare_email" {}
variable "cloudflare_account_id" {}
variable "cloudflare_zone_id" {}
variable "cloudflare_tunnel_id" {}
variable "public_ip" {}
variable "cloudflare_proxied_names" {}
variable "cloudflare_non_proxied_names" {}
variable "cloudflare_tunnel_token" {}
variable "owntracks_credentials" {}
variable "ollama_api_credentials" {}
variable "dawarich_database_password" {}
variable "geoapify_api_key" {}
variable "tandoor_database_password" {}
variable "n8n_postgresql_password" {}
variable "realestate_crawler_db_password" {}
variable "realestate_crawler_notification_settings" {
type = map(string)
}
variable "kured_notify_url" {}
variable "onlyoffice_db_password" { type = string }
variable "onlyoffice_jwt_token" { type = string }
variable "xray_reality_clients" { type = list(map(string)) }
variable "xray_reality_private_key" { type = string }
variable "xray_reality_short_ids" { type = list(string) }
variable "tiny_tuya_api_key" { type = string }
variable "tiny_tuya_api_secret" { type = string }
variable "tiny_tuya_service_secret" { type = string }
variable "tiny_tuya_slack_url" { type = string }
variable "haos_api_token" { type = string }
variable "pve_password" { type = string }
variable "grafana_db_password" { type = string }
variable "grafana_admin_password" { type = string }
variable "clickhouse_password" { type = string }
variable "clickhouse_postgres_password" { type = string }
variable "wealthfolio_password_hash" { type = string }
variable "aiostreams_database_connection_string" { type = string }
variable "actualbudget_credentials" { type = map(any) }
variable "speedtest_db_password" { type = string }
variable "freedify_credentials" { type = map(any) }
variable "mcaptcha_postgresql_password" { type = string }
variable "mcaptcha_cookie_secret" { type = string }
variable "mcaptcha_captcha_salt" { type = string }
variable "openrouter_api_key" { type = string }
variable "slack_bot_token" { type = string }
variable "slack_channel" { type = string }
variable "affine_postgresql_password" { type = string }
variable "health_postgresql_password" { type = string }
variable "health_secret_key" { type = string }
variable "openclaw_ssh_key" { type = string }
variable "openclaw_skill_secrets" { type = map(string) }
variable "gemini_api_key" { type = string }
variable "llama_api_key" { type = string }
variable "brave_api_key" { type = string }
variable "modal_api_key" { type = string }
variable "coturn_turn_secret" { type = string }
variable "k8s_users" {
type = map(any)
default = {}
}
variable "kube_config_path" {
type = string
default = "~/.kube/config"
}
provider "kubernetes" {
config_path = var.prod ? "" : var.kube_config_path
}
provider "helm" {
kubernetes = {
config_path = var.prod ? "" : var.kube_config_path
}
}
provider "proxmox" {
pm_api_url = var.proxmox_pm_api_url
pm_api_token_id = var.proxmox_pm_api_token_id
pm_api_token_secret = var.proxmox_pm_api_token_secret
pm_tls_insecure = true
}
# TODO: add DEFCON levels
# ---------------------------------------------------------------------------
# Infra modules (VM templates, docker-registry) migrated to stacks/infra/
# Manage with: cd stacks/infra && terragrunt apply
# ---------------------------------------------------------------------------
# module that provisions the proxmox host?
# make dns stateless?
# pfsense/truenas configs in code
# etcd db backup in code
# module "k8s_node5" {
# template_name = local.vm_template_name
# source = "./modules/create-vm"
# vm_name = "k8s-node5"
# vmid = 205
# cisnippet_name = local.vm_cloud_init_snippet_name
# vm_mac_address = "00:50:56:87:4a:2d"
# bridge = "vmbr1"
# vlan_tag = "20"
# }
# module "k8s_master" {
# source = "./modules/create-vm"
# vm_name = "k8s-master"
# vm_mac_address = "00:50:56:b0:a1:39"
# network = "dKubernetes"
# provisioner_command = "${var.ansible_prefix} -t linux/k8s/master -e hostname=k8s-master"
# vsphere_password = var.vsphere_password
# vsphere_user = var.vsphere_user
# vsphere_server = var.vsphere_server
# vsphere_datastore = "r730-datastore"
# vsphere_resource_pool = "R730"
# }
# module "k8s_node1" {
# source = "./modules/create-vm"
# vm_name = "k8s-node1"
# vm_mac_address = "00:50:56:b0:e0:c9"
# network = "dKubernetes"
# provisioner_command = "${var.ansible_prefix} -t linux/k8s/node -e hostname=k8s-node1 -e k8s_master='wizard@${module.k8s_master.guest_ip}'"
# vsphere_password = var.vsphere_password
# vsphere_user = var.vsphere_user
# vsphere_server = var.vsphere_server
# vsphere_datastore = "r730-datastore"
# vsphere_resource_pool = "R730"
# }
# module "k8s_node2" {
# source = "./modules/create-vm"
# vm_name = "k8s-node2"
# vm_mac_address = "00:50:56:b0:a1:36"
# network = "dKubernetes"
# provisioner_command = "${var.ansible_prefix} -t linux/k8s/node -e hostname=k8s-node2 -e k8s_master='wizard@${module.k8s_master.guest_ip}'"
# vsphere_password = var.vsphere_password
# vsphere_user = var.vsphere_user
# vsphere_server = var.vsphere_server
# vsphere_datastore = "r730-datastore"
# vsphere_resource_pool = "R730"
# }
# module "k8s_node3" {
# source = "./modules/create-vm"
# vm_name = "k8s-node3"
# vm_mac_address = "00:50:56:b0:a1:37"
# network = "dKubernetes"
# provisioner_command = "${var.ansible_prefix} -t linux/k8s/node -e hostname=k8s-node3 -e k8s_master='wizard@${module.k8s_master.guest_ip}'"
# vsphere_password = var.vsphere_password
# vsphere_user = var.vsphere_user
# vsphere_server = var.vsphere_server
# vsphere_datastore = "r730-datastore"
# vsphere_resource_pool = "R730"
# }
# module "k8s_node4" {
# source = "./modules/create-vm"
# vm_name = "k8s-node4"
# vmid = 204
# template_name = local.vm_template_name
# cisnippet_name = local.vm_cloud_init_snippet_name
# vm_mac_address = "00:50:56:b0:a1:38"
# bridge = "vmbr1"
# vlan_tag = "20"
# }
# module "k8s_node5" {
# source = "./modules/create-vm"
# vm_name = "k8s-node5"
# vm_mac_address = "00:50:56:b0:a1:40"
# network = "dKubernetes"
# provisioner_command = "${var.ansible_prefix} -t linux/k8s/node -e hostname=k8s-node5 -e k8s_master='wizard@${module.k8s_master.guest_ip}'"
# vsphere_password = var.vsphere_password
# vsphere_user = var.vsphere_user
# vsphere_server = var.vsphere_server
# vsphere_datastore = "r730-datastore"
# vsphere_resource_pool = "R730"
# }
# module "devvm" {
# source = "./modules/create-vm"
# vm_name = "devvm"
# vm_mac_address = "00:50:56:b0:a1:41"
# network = "dKubernetes"
# # provisioner_command = "${var.ansible_prefix} -t linux/k8s/node -e hostname=k8s-node5 -e k8s_master='wizard@${module.k8s_master.guest_ip}'"
# vsphere_password = var.vsphere_password
# vsphere_user = var.vsphere_user
# vsphere_server = var.vsphere_server
# vsphere_datastore = "r730-datastore"
# vsphere_resource_pool = "R730"
# }
# resource "null_resource" "test" {
# provisioner "local-exec" {
# working_dir = "/home/viktor/"
# command = "ANSIBLE_VAULT_PASSWORD_FILE=~/.ansible/vault_pass.txt ansible-playbook -i playbook/hosts.yaml playbook/linux.yml -t linux/k8s/node -e host='10.0.40.126'"
# }
# }
# ---------------------------------------------------------------------------
# The kubernetes_cluster module (modules/kubernetes/) has been migrated to
# individual Terragrunt stacks under stacks/.
# See stacks/<service>/main.tf for each service's configuration.
# ---------------------------------------------------------------------------

View file

@ -1,4 +0,0 @@
# Steps to migrate 1 .tfstate into another
# Inside the dir to be migrated out from do:
for s in $(tf state list); do tf state mv -state-out=../../terraform.tfstate $s "module.UPPER_WORKSPACE_MODULE_NAME.$s"; done

View file

@ -1,178 +0,0 @@
variable "tls_secret_name" {}
resource "kubernetes_namespace" "authelia" {
metadata {
name = "authelia"
labels = {
"istio-injection" : "disabled"
}
}
}
module "tls_secret" {
source = "../setup_tls_secret"
namespace = kubernetes_namespace.authelia.metadata[0].name
tls_secret_name = var.tls_secret_name
}
resource "helm_release" "authelia" {
namespace = kubernetes_namespace.authelia.metadata[0].name
name = "authelia"
atomic = true
repository = "https://charts.authelia.com"
chart = "authelia"
version = "0.10.49"
depends_on = [kubernetes_namespace.authelia]
values = [templatefile("${path.module}/values.yaml", {})]
}
# resource "kubernetes_config_map" "configuration" {
# metadata {
# name = "configuration"
# namespace = kubernetes_namespace.authelia.metadata[0].name
# labels = {
# app = "configuration"
# }
# annotations = {
# "reloader.stakater.com/match" = "true"
# }
# }
# data = {
# # "configuration.yml" = yamldecode(file("${path.module}/configuration.yml"))
# "configuration.yml" = file("${path.module}/configuration.yml")
# "users_database.yml" = file("${path.module}/users_database.yml")
# }
# }
# resource "kubernetes_deployment" "authelia" {
# metadata {
# name = "authelia"
# namespace = kubernetes_namespace.authelia.metadata[0].name
# labels = {
# app = "authelia"
# }
# annotations = {
# "reloader.stakater.com/search" = "true"
# }
# }
# spec {
# replicas = 1
# selector {
# match_labels = {
# app = "authelia"
# }
# }
# template {
# metadata {
# labels = {
# app = "authelia"
# }
# }
# spec {
# container {
# image = "authelia/authelia:4.38"
# name = "authelia"
# # command = ["tail", "-f", "/etc/passwd"]
# port {
# container_port = 9091
# }
# port {
# container_port = 8080
# }
# volume_mount {
# name = "config"
# # mount_path = "/etc/authelia/configuration.yml"
# mount_path = "/config/configuration.yml"
# sub_path = "configuration.yml"
# }
# volume_mount {
# name = "users-database"
# # mount_path = "/etc/authelia/users_database.yml"
# mount_path = "/config/users_database.yml"
# sub_path = "users_database.yml"
# }
# }
# volume {
# name = "config"
# config_map {
# name = "configuration"
# }
# }
# volume {
# name = "users-database"
# config_map {
# name = "configuration"
# }
# }
# }
# }
# }
# }
# resource "kubernetes_service" "authelia" {
# metadata {
# name = "authelia"
# namespace = kubernetes_namespace.authelia.metadata[0].name
# labels = {
# "app" = "authelia"
# }
# }
# spec {
# selector = {
# app = "authelia"
# }
# port {
# name = "http"
# port = 80
# protocol = "TCP"
# # target_port = 8080
# target_port = 9091
# }
# }
# }
# resource "kubernetes_ingress_v1" "authelia" {
# metadata {
# name = "authelia"
# namespace = kubernetes_namespace.authelia.metadata[0].name
# annotations = {
# "kubernetes.io/ingress.class" = "nginx"
# # "nginx.ingress.kubernetes.io/affinity" = "cookie"
# # "nginx.ingress.kubernetes.io/auth-tls-verify-client" = "on"
# # "nginx.ingress.kubernetes.io/auth-tls-secret" = "default/ca-secret"
# # "nginx.ingress.kubernetes.io/auth-url" : "https://oauth2.viktorbarzin.me/oauth2/auth"
# # "nginx.ingress.kubernetes.io/auth-signin" : "https://oauth2.viktorbarzin.me/oauth2/start?rd=/redirect/$http_host$escaped_request_uri"
# }
# }
# spec {
# tls {
# hosts = ["auth.viktorbarzin.me"]
# secret_name = var.tls_secret_name
# }
# rule {
# host = "auth.viktorbarzin.me"
# http {
# path {
# path = "/"
# backend {
# service {
# name = "authelia"
# port {
# number = 80
# }
# }
# }
# }
# }
# }
# }
# }

View file

@ -1,10 +0,0 @@
users:
authelia:
disabled: false
displayname: "Viktor"
# Password is authelia
password: "$6$rounds=50000$BpLnfgDsc2WD8F2q$Zis.ixdg9s/UOJYrs56b5QEZFiZECu0qZVNsIYxBaNJ7ucIL.nlxVCT5tqh8KHG8X4tlwCFm5r6NTOZZ5qRFN/" # yamllint disable-line rule:line-length
email: me@viktorbarzin.me
groups:
- admins
- dev

View file

@ -1,24 +0,0 @@
configMap:
session:
cookies:
- domain: 'authelia.viktorbarzin.me'
authelia_url: 'https://authelia.viktorbarzin.me'
storage:
local:
path: '/config/db.sqlite3'
theme: light
# Error 1: access_control (The warning)
access_control:
default_policy: 'one_factor' # Change to 'two_factor' once you have 2FA set up
rules:
- domain: "*.viktorbarzin.me"
policy: one_factor
# Error 2: authentication_backend (Where users are stored)
authentication_backend:
file:
path: /config/users.yml

View file

@ -1,93 +0,0 @@
variable "named_conf_mounts" {}
variable "deployment_name" {}
resource "kubernetes_deployment" "bind" {
metadata {
name = var.deployment_name
namespace = "bind"
labels = {
"app" = "bind"
"kubernetes.io/cluster-service" : "true"
}
annotations = {
"reloader.stakater.com/search" = "true"
}
}
spec {
replicas = "3"
selector {
match_labels = {
"app" = var.deployment_name
}
}
template {
metadata {
labels = {
"app" = var.deployment_name
"kubernetes.io/cluster-service" : "true"
}
}
spec {
container {
name = "bind"
image = "resystit/bind9:latest"
image_pull_policy = "IfNotPresent"
port {
container_port = 53
protocol = "UDP"
}
volume_mount {
mount_path = "/etc/bind/named.conf"
sub_path = "named.conf"
name = "bindconf"
}
dynamic "volume_mount" {
for_each = [for m in var.named_conf_mounts :
{
name = m.name
mount_path = m.mount_path
sub_path = m.sub_path
}]
content {
name = volume_mount.value.name
mount_path = volume_mount.value.mount_path
sub_path = volume_mount.value.sub_path
}
}
volume_mount {
mount_path = "/etc/bind/db.viktorbarzin.me"
sub_path = "db.viktorbarzin.me"
name = "bindconf"
}
volume_mount {
mount_path = "/etc/bind/db.viktorbarzin.lan"
sub_path = "db.viktorbarzin.lan"
name = "bindconf"
}
volume_mount {
mount_path = "/etc/bind/db.181.191.213.in-addr.arpa"
sub_path = "db.181.191.213.in-addr.arpa"
name = "bindconf"
}
}
container {
name = "bind-exporter"
image = "prometheuscommunity/bind-exporter:latest"
image_pull_policy = "IfNotPresent"
port {
container_port = 9119
}
}
volume {
name = "bindconf"
config_map {
name = "bind-configmap"
}
}
}
}
}
}

View file

@ -1,180 +0,0 @@
; additional bind records added via terraform automation
; entries are usually programmatically added to this file

View file

@ -1,77 +0,0 @@
variable "db_viktorbarzin_me" {}
variable "db_viktorbarzin_lan" {}
variable "named_conf_options" {}
resource "kubernetes_namespace" "bind" {
metadata {
name = "bind"
}
}
resource "kubernetes_config_map" "bind_configmap" {
metadata {
name = "bind-configmap"
namespace = "bind"
annotations = {
"reloader.stakater.com/match" = "true"
}
}
data = {
"db.viktorbarzin.lan" = var.db_viktorbarzin_lan
"db.viktorbarzin.me" = format("%s%s", var.db_viktorbarzin_me, file("${path.module}/extra/viktorbarzin.me"))
"db.181.191.213.in-addr.arpa" = var.db_ptr
"named.conf" = var.named_conf
"named.conf.local" = var.named_conf_local
"named.conf.options" = var.named_conf_options
"public-named.conf.local" = var.public_named_conf_local
"public-named.conf.options" = var.public_named_conf_options
}
}
module "bind-local-deployment" {
source = "./deployment-factory"
deployment_name = "bind"
named_conf_mounts = [
{
"mount_path" = "/etc/bind/named.conf.local"
"sub_path" = "named.conf.local"
"name" = "bindconf"
},
{
mount_path = "/etc/bind/named.conf.options"
sub_path = "named.conf.options"
name = "bindconf"
}
]
}
module "bind-local-service" {
source = "./service-factory"
service_name = "bind"
port = 5354
}
module "bind-public-deployment" {
source = "./deployment-factory"
deployment_name = "bind-public"
named_conf_mounts = [
{
"mount_path" = "/etc/bind/named.conf.local"
"sub_path" = "public-named.conf.local"
"name" = "bindconf"
},
{
mount_path = "/etc/bind/named.conf.options"
sub_path = "public-named.conf.options"
name = "bindconf"
}
]
}
module "bind-public-service" {
source = "./service-factory"
service_name = "bind-public"
port = 10053
}

View file

@ -1,28 +0,0 @@
variable "service_name" {}
variable "port" {}
resource "kubernetes_service" "bind" {
metadata {
name = var.service_name
namespace = "bind"
annotations = {
"metallb.universe.tf/allow-shared-ip" = "shared"
}
labels = {
"app" = var.service_name
}
}
spec {
type = "LoadBalancer"
external_traffic_policy = "Cluster"
selector = {
"app" = var.service_name
}
port {
name = "dns"
protocol = "UDP"
port = var.port
target_port = "53"
}
}
}

View file

@ -1,98 +0,0 @@
variable "named_conf" {
default = <<EOT
// This is the primary configuration file for the BIND DNS server named.
//
// Please read /usr/share/doc/bind9/README.Debian.gz for information on the
// structure of BIND configuration files in Debian, *BEFORE* you customize
// this configuration file.
//
// If you are just adding zones, please do that in /etc/bind/named.conf.local
include "/etc/bind/named.conf.options";
include "/etc/bind/named.conf.local";
//include "/etc/bind/named.conf.default-zones";
EOT
}
variable "named_conf_local" {
default = <<EOT
//
// Do any local configuration here
//
// Consider adding the 1918 zones here, if they are not used in your
// organization
//include "/etc/bind/zones.rfc1918";
zone "viktorbarzin.me" {
type master;
file "/etc/bind/db.viktorbarzin.me";
};
zone "viktorbarzin.lan" {
type master;
file "/etc/bind/db.viktorbarzin.lan";
};
zone "181.191.213.in-addr.arpa" {
type master;
file "/etc/bind/db.181.191.213.in-addr.arpa";
};
EOT
}
variable "public_named_conf_local" {
default = <<EOT
//
// Do any local configuration here
//
// Consider adding the 1918 zones here, if they are not used in your
// organization
//include "/etc/bind/zones.rfc1918";
zone "viktorbarzin.me" {
type master;
file "/etc/bind/db.viktorbarzin.me";
};
zone "181.191.213.in-addr.arpa" {
type master;
file "/etc/bind/db.181.191.213.in-addr.arpa";
};
EOT
}
variable "public_named_conf_options" {
default = <<EOT
options {
querylog yes;
directory "/tmp/";
listen-on {
any;
};
dnssec-validation auto;
allow-recursion {
none;
};
};
EOT
}
variable "db_ptr" {
default = <<EOT
$TTL 86400
181.191.213.in-addr.arpa. IN SOA ns1.viktorbarzin.me. ns2.viktorbarzin.me. (
5 ; Serial
28800 ; Refresh
10 ; Retry
2419200 ; Expire
60 ) ; Negative Cache TTL
181.191.213.in-addr.arpa. IN NS ns1.viktorbarzin.me.
130.181.191.213.in-addr.arpa. IN PTR viktorbarzin.me.
;130 IN PTR viktorbarzin.me.
EOT
}

View file

@ -1,107 +0,0 @@
variable "tls_secret_name" {}
resource "kubernetes_namespace" "discount-bandit" {
metadata {
name = "discount-bandit"
# labels = {
# "istio-injection" : "enabled"
# }
}
}
module "tls_secret" {
source = "../setup_tls_secret"
namespace = kubernetes_namespace.discount-bandit.metadata[0].name
tls_secret_name = var.tls_secret_name
}
resource "kubernetes_deployment" "discount-bandit" {
metadata {
name = "discount-bandit"
namespace = kubernetes_namespace.discount-bandit.metadata[0].name
labels = {
app = "discount-bandit"
}
annotations = {
"reloader.stakater.com/search" = "true"
}
}
spec {
replicas = 1
strategy {
type = "Recreate"
}
selector {
match_labels = {
app = "discount-bandit"
}
}
template {
metadata {
labels = {
app = "discount-bandit"
}
}
spec {
container {
image = "cybrarist/discount-bandit:latest-amd64"
name = "discount-bandit"
env {
name = "DB_HOST"
value = "mysql.dbaas"
}
env {
name = "DB_DATABASE"
value = "discountbandit"
}
env {
name = "DB_USERNAME"
value = "discountbandit"
}
env {
name = "DB_PASSWORD"
value = ""
}
env {
name = "APP_URL"
value = "http://discount.viktorbarzin.me:80"
}
port {
container_port = 80
}
}
}
}
}
}
resource "kubernetes_service" "discount-bandit" {
metadata {
name = "discount-bandit"
namespace = kubernetes_namespace.discount-bandit.metadata[0].name
labels = {
"app" = "discount-bandit"
}
}
spec {
selector = {
app = "discount-bandit"
}
port {
name = "http"
target_port = 80
port = 80
protocol = "TCP"
}
}
}
module "ingress" {
source = "../ingress_factory"
namespace = kubernetes_namespace.discount-bandit.metadata[0].name
name = "discount-bandit"
host = "discount"
tls_secret_name = var.tls_secret_name
}

View file

@ -1,80 +0,0 @@
# variable "tls_secret_name" {}
resource "kubernetes_namespace" "dnscat2" {
metadata {
name = "dnscat2"
labels = {
"istio-injection" : "disabled"
}
}
}
# module "tls_secret" {
# source = "../setup_tls_secret"
# namespace = kubernetes_namespace.dnscat2.metadata[0].name
# tls_secret_name = var.tls_secret_name
# }
resource "kubernetes_deployment" "dnscat2" {
metadata {
name = "dnscat2"
namespace = kubernetes_namespace.dnscat2.metadata[0].name
labels = {
app = "dnscat2"
}
}
spec {
replicas = 1
selector {
match_labels = {
app = "dnscat2"
}
}
template {
metadata {
labels = {
app = "dnscat2"
}
}
spec {
container {
image = "arno0x0x/dnscat2"
name = "dnscat2"
stdin = true
tty = true
port {
name = "dns"
container_port = 53
protocol = "UDP"
}
env {
name = "DOMAIN_NAME"
value = "rp.viktorbarzin.me"
}
}
}
}
}
}
resource "kubernetes_service" "dnscat2" {
metadata {
name = "dnscat2"
namespace = kubernetes_namespace.dnscat2.metadata[0].name
labels = {
"app" = "dnscat2"
}
}
spec {
selector = {
app = "dnscat2"
}
port {
name = "dns"
protocol = "UDP"
port = 53
# target_port = 53
}
}
}

View file

@ -1,92 +0,0 @@
resource "kubernetes_namespace" "dnscrypt" {
metadata {
name = "dnscrypt"
}
}
resource "kubernetes_config_map" "dnscrypt" {
metadata {
name = "dnscrypt-proxy-configmap"
namespace = kubernetes_namespace.dnscrypt.metadata[0].name
}
data = {
"dnscrypt-proxy.toml" = var.dnscrypt_proxy_toml
}
}
resource "kubernetes_deployment" "dnscrypt" {
metadata {
name = "dnscrypt-proxy"
namespace = kubernetes_namespace.dnscrypt.metadata[0].name
labels = {
app = "dnscrypt-proxy"
"kubernetes.io/cluster-service" = "true"
}
}
spec {
replicas = 3
selector {
match_labels = {
app = "dnscrypt-proxy"
}
}
template {
metadata {
labels = {
app = "dnscrypt-proxy"
"kubernetes.io/cluster-service" = "true"
}
}
spec {
container {
image = "gists/dnscrypt-proxy:latest"
name = "dnscrypt-proxy"
image_pull_policy = "IfNotPresent"
port {
container_port = 53
protocol = "UDP"
}
volume_mount {
name = "config"
mount_path = "/etc/dnscrypt-proxy/"
}
}
volume {
name = "config"
config_map {
name = "dnscrypt-proxy-configmap"
items {
key = "dnscrypt-proxy.toml"
path = "dnscrypt-proxy.toml"
}
}
}
}
}
}
}
resource "kubernetes_service" "dnscrypt" {
metadata {
name = "dnscrypt-proxy"
namespace = kubernetes_namespace.dnscrypt.metadata[0].name
labels = {
"app" = "dnscrypt-proxy"
}
annotations = {
"metallb.universe.tf/allow-shared-ip" = "shared"
}
}
spec {
type = "LoadBalancer"
selector = {
app = "dnscrypt-proxy"
}
port {
name = "dns"
protocol = "UDP"
port = "5353"
target_port = "53"
}
}
}

View file

@ -1,23 +0,0 @@
variable "namespace" {}
variable "password" {}
variable "dockerhub_creds_secret_name" {
default = "dockerhub-creds"
}
variable "username" {
default = "viktorbarzin"
}
# DO NOT USE until able to store `stringData`
resource "kubernetes_secret" "dockerhub_creds" {
metadata {
name = var.dockerhub_creds_secret_name
namespace = var.namespace
}
# data is additionally base64 encode, no stringData yet :/ https://github.com/hashicorp/terraform-provider-kubernetes/issues/901
data = {
"username" = var.username
"password" = var.password
}
type = "kubernetes.io/basic-auth"
}

View file

@ -1,315 +0,0 @@
variable "tls_secret_name" {}
variable "prod_graphql_endpoint" {
default = "https://finance.viktorbarzin.me/graphql"
}
variable "graphql_api_secret" {}
variable "db_connection_string" {
}
variable "currency_converter_api_key" {}
variable "gocardless_secret_key" {}
variable "gocardless_secret_id" {}
resource "kubernetes_namespace" "finance_app" {
metadata {
name = "finance-app"
# TLS MiTM fails connecting to auth0
# labels = {
# "istio-injection" : "enabled"
# }
}
}
module "tls_secret" {
source = "../setup_tls_secret"
namespace = kubernetes_namespace.finance_app.metadata[0].name
tls_secret_name = var.tls_secret_name
}
# resource "kubernetes_persistent_volume" "finance_app_pv" {
# metadata {
# name = "finance-app-iscsi-pv"
# }
# spec {
# capacity = {
# "storage" = "5G"
# }
# access_modes = ["ReadWriteOnce"]
# persistent_volume_source {
# iscsi {
# target_portal = "iscsi.viktorbarzin.lan:3260"
# iqn = "iqn.2020-12.lan.viktorbarzin:storage:finance-app"
# lun = 0
# fs_type = "ext4"
# }
# }
# }
# }
# resource "kubernetes_persistent_volume_claim" "finance_app_pvc" {
# metadata {
# name = "finance-iscsi-pvc"
# namespace = kubernetes_namespace.finance_app.metadata[0].name
# }
# spec {
# access_modes = ["ReadWriteOnce"]
# resources {
# requests = {
# "storage" = "5Gi"
# }
# }
# }
# }
resource "kubernetes_deployment" "finance_app" {
metadata {
name = "finance-app"
namespace = kubernetes_namespace.finance_app.metadata[0].name
labels = {
app = "finance-app"
}
annotations = {
"prometheus.io/scrape" = "true"
"prometheus.io/path" = "/metrics"
"prometheus.io/port" = 5000
}
}
spec {
replicas = 1
strategy {
type = "Recreate"
}
selector {
match_labels = {
app = "finance-app"
}
}
template {
metadata {
labels = {
app = "finance-app"
}
annotations = {
"prometheus.io/scrape" = "true"
"prometheus.io/path" = "/metrics"
"prometheus.io/port" = 5000
}
}
spec {
container {
image = "viktorbarzin/finance-app:latest"
name = "finance-app"
image_pull_policy = "Always"
# resources {
# limits = {
# cpu = "1"
# memory = "2Gi"
# }
# }
env {
name = "ENVIRONMENT"
value = "prod"
}
env {
name = "DB_CONNECTION_STRING"
value = var.db_connection_string
}
env {
name = "GRAPHQL_API_SECRET"
value = var.graphql_api_secret
}
env {
name = "ENABLE_SCHEDULER"
value = 1
}
env {
name = "DEBUG_METRICS"
value = 1
}
env {
name = "ML_MODEL_PATH"
value = "/data/ml_categorizer.pkl"
}
env {
name = "LABEL_ENCODER_PATH"
value = "/data/label_encoder_categorizer.pkl"
}
env {
name = "VECTORIZER_PATH"
value = "/data/vectorizer_categorizer.pkl"
}
env {
name = "CURRENCY_CONVERTER_API_KEY"
value = var.currency_converter_api_key
}
env {
name = "GOCARDLESS_SECRET_ID"
value = var.gocardless_secret_id
}
env {
name = "GOCARDLESS_SECRET_KEY"
value = var.gocardless_secret_key
}
# volume_mount {
# name = "data"
# mount_path = "/data"
# # sub_path = ""
# }
}
# volume {
# name = "data"
# iscsi {
# target_portal = "iscsi.viktorbarzin.me:3260"
# fs_type = "ext4"
# iqn = "iqn.2020-12.lan.viktorbarzin:storage:finance-app"
# lun = 0
# read_only = false
# }
# }
}
}
}
}
resource "kubernetes_deployment" "finance_app_frontend" {
metadata {
name = "finance-app-frontend"
namespace = kubernetes_namespace.finance_app.metadata[0].name
labels = {
app = "finance-app-frontend"
}
}
spec {
replicas = 1
strategy {
type = "RollingUpdate"
}
selector {
match_labels = {
app = "finance-app-frontend"
}
}
template {
metadata {
labels = {
app = "finance-app-frontend"
}
}
spec {
container {
image = "viktorbarzin/finance-app-frontend:latest"
name = "finance-app-frontend"
image_pull_policy = "Always"
}
}
}
}
}
resource "kubernetes_service" "finance_app" {
metadata {
name = "finance-app"
namespace = kubernetes_namespace.finance_app.metadata[0].name
labels = {
app = "finance-app"
}
}
spec {
selector = {
app = "finance-app"
}
port {
name = "http"
port = "5000"
}
}
}
resource "kubernetes_service" "finance_app_frontend" {
metadata {
name = "finance-app-frontend"
namespace = kubernetes_namespace.finance_app.metadata[0].name
labels = {
app = "finance-app-frontend"
}
}
spec {
selector = {
app = "finance-app-frontend"
}
port {
name = "http"
port = "3000"
}
}
}
resource "kubernetes_ingress_v1" "finance_app" {
metadata {
name = "finance-app"
namespace = kubernetes_namespace.finance_app.metadata[0].name
annotations = {
"traefik.ingress.kubernetes.io/router.middlewares" = "traefik-rate-limit@kubernetescrd,traefik-csp-headers@kubernetescrd,traefik-crowdsec@kubernetescrd"
"traefik.ingress.kubernetes.io/router.entrypoints" = "websecure"
}
}
spec {
ingress_class_name = "traefik"
tls {
hosts = ["finance.viktorbarzin.me"]
secret_name = var.tls_secret_name
}
rule {
host = "finance.viktorbarzin.me"
http {
path {
path = "/"
backend {
service {
name = "finance-app-frontend"
port {
number = 3000
}
}
}
}
}
}
rule {
host = "finance.viktorbarzin.me"
http {
path {
path = "/graphql"
backend {
service {
name = "finance-app"
port {
number = 5000
}
}
}
}
}
}
rule {
host = "finance.viktorbarzin.me"
http {
path {
path = "/webhook"
backend {
service {
name = "finance-app"
port {
number = 5000
}
}
}
}
}
}
}
}

View file

@ -1,74 +0,0 @@
# hostname: home-assistant
ingress:
main:
# -- Enables or disables the ingress
enabled: true
# -- Make this the primary ingress (used in probes, notes, etc...).
# If there is more than 1 ingress, make sure that only 1 ingress is marked as primary.
primary: true
# -- Override the name suffix that is used for this ingress.
nameOverride:
# -- Provide additional annotations which may be required.
annotations: #{}
kubernetes.io/ingress.class : "nginx"
nginx.ingress.kubernetes.io/force-ssl-redirect : "true"
nginx.ingress.kubernetes.io/auth-tls-verify-client : "on"
nginx.ingress.kubernetes.io/auth-tls-secret : ${client_certificate_secret_name}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
# -- Provide additional labels which may be required.
labels: {}
# -- Set the ingressClass that is used for this ingress.
# Requires Kubernetes >=1.19
ingressClassName: # "nginx"
## Configure the hosts for the ingress
hosts:
- # -- Host address. Helm template can be passed.
host: home-assistant.viktorbarzin.me
## Configure the paths for the host
paths:
- # -- Path. Helm template can be passed.
path: /
# -- Ignored if not kubeVersion >= 1.14-0
pathType: Prefix
service:
# -- Overrides the service name reference for this path
name: home-assistant
# -- Overrides the service port reference for this path
port: 8123
# -- Configure TLS for the ingress. Both secretName and hosts can process a Helm template.
tls: #[]
- secretName: ${tls_secret_name}
hosts:
- home-assistant.viktorbarzin.me
# -- Configure persistence for the chart here.
# Additional items can be added by adding a dictionary key similar to the 'config' key.
# [[ref]](http://docs.k8s-at-home.com/our-helm-charts/common-library-storage)
# @default -- See below
persistence:
# -- Default persistence for configuration files.
# @default -- See below
config:
# -- Enables or disables the persistence item
enabled: false
# -- Sets the persistence type
# Valid options are pvc, emptyDir, hostPath, secret, configMap or custom
type: configMap
name: home-assistant-configmap
# -- Where to mount the volume in the main container.
# Defaults to `/<name_of_the_volume>`,
# setting to '-' creates the volume but disables the volumeMount.
mountPath: /config
# -- Specify if the volume should be mounted read-only.
readOnly: true

View file

@ -1,238 +0,0 @@
variable "tls_secret_name" {}
variable "client_certificate_secret_name" {}
variable "configuration_yaml" {}
resource "kubernetes_namespace" "home_assistant" {
metadata {
name = "home-assistant"
}
}
resource "kubernetes_config_map" "home_assistant_config_map" {
metadata {
name = "home-assistant-configmap"
namespace = kubernetes_namespace.home_assistant.metadata[0].name
annotations = {
"reloader.stakater.com/match" = "true"
}
}
data = {
# "db.viktorbarzin.lan" = var.db_viktorbarzin_lan
# "db.viktorbarzin.me" = format("%s%s", var.db_viktorbarzin_me, file("${path.module}/extra/viktorbarzin.me"))
# "db.181.191.213.in-addr.arpa" = var.db_ptr
"configuration.yaml" = var.configuration_yaml
}
}
module "tls_secret" {
source = "../setup_tls_secret"
namespace = kubernetes_namespace.home_assistant.metadata[0].name
tls_secret_name = var.tls_secret_name
}
resource "helm_release" "home_assistant" {
namespace = kubernetes_namespace.home_assistant.metadata[0].name
create_namespace = true
name = "home-assistant"
repository = "https://k8s-at-home.com/charts/"
chart = "home-assistant"
values = [templatefile("${path.module}/home_assistant_chart_values.tpl", { tls_secret_name = var.tls_secret_name, client_certificate_secret_name = var.client_certificate_secret_name })]
}
resource "kubernetes_deployment" "home_assistant" {
metadata {
name = "home-assistant"
namespace = kubernetes_namespace.home_assistant.metadata[0].name
labels = {
"app.kubernetes.io/instance" = "home-assistant"
"app.kubernetes.io/name" = "home-assistant"
"app.kubernetes.io/version" = "2022.5.4"
}
}
spec {
replicas = 1
selector {
match_labels = {
"app.kubernetes.io/instance" = "home-assistant"
"app.kubernetes.io/name" = "home-assistant"
}
}
template {
metadata {
labels = {
"app.kubernetes.io/instance" = "home-assistant"
"app.kubernetes.io/name" = "home-assistant"
}
}
spec {
container {
name = "home-assistant"
# image = "ghcr.io/home-assistant/home-assistant:2022.5.4"
image = "ghcr.io/home-assistant/home-assistant:2022.5.5"
# image = "ghcr.io/home-assistant/home-assistant"
port {
name = "http"
container_port = 8123
protocol = "TCP"
}
env {
name = "TZ"
value = "UTC+3"
}
volume_mount {
name = "configuration"
mount_path = "/config"
# sub_path = "hackmd"
}
liveness_probe {
tcp_socket {
port = "8123"
}
timeout_seconds = 1
period_seconds = 10
success_threshold = 1
failure_threshold = 3
}
readiness_probe {
tcp_socket {
port = "8123"
}
timeout_seconds = 1
period_seconds = 10
success_threshold = 1
failure_threshold = 3
}
startup_probe {
tcp_socket {
port = "8123"
}
timeout_seconds = 1
period_seconds = 5
success_threshold = 1
failure_threshold = 30
}
termination_message_path = "/dev/termination-log"
image_pull_policy = "IfNotPresent"
}
volume {
name = "configuration"
iscsi {
target_portal = "iscsi.viktorbarzin.lan:3260"
fs_type = "ext4"
iqn = "iqn.2020-12.lan.viktorbarzin:storage:home-assistant"
lun = 0
read_only = false
}
}
restart_policy = "Always"
termination_grace_period_seconds = 30
dns_policy = "ClusterFirst"
service_account_name = "default"
}
}
strategy {
type = "Recreate"
}
revision_history_limit = 3
}
}
resource "kubernetes_service" "home_assistant" {
metadata {
name = "home-assistant"
namespace = kubernetes_namespace.home_assistant.metadata[0].name
labels = {
"app.kubernetes.io/instance" = "home-assistant"
"app.kubernetes.io/managed-by" = "Helm"
"app.kubernetes.io/name" = "home-assistant"
"app.kubernetes.io/version" = "2022.5.4"
"helm.sh/chart" = "home-assistant-13.2.0"
}
annotations = {
"meta.helm.sh/release-name" = "home-assistant"
"meta.helm.sh/release-namespace" = "home-assistant"
}
}
spec {
port {
name = "http"
protocol = "TCP"
port = 8123
target_port = "http"
}
selector = {
"app.kubernetes.io/instance" = "home-assistant"
"app.kubernetes.io/name" = "home-assistant"
}
# cluster_ip = "10.102.20.150"
type = "ClusterIP"
session_affinity = "None"
}
}
resource "kubernetes_ingress_v1" "home-assistant-ui" {
metadata {
name = "home-assistant-ui-ingress"
namespace = kubernetes_namespace.home_assistant.metadata[0].name
annotations = {
"traefik.ingress.kubernetes.io/router.middlewares" = "traefik-rate-limit@kubernetescrd,traefik-csp-headers@kubernetescrd,traefik-crowdsec@kubernetescrd"
"traefik.ingress.kubernetes.io/router.entrypoints" = "websecure"
"traefik.ingress.kubernetes.io/router.tls.options" = "traefik-mtls@kubernetescrd"
}
}
spec {
ingress_class_name = "traefik"
tls {
hosts = ["home-assistant.viktorbarzin.me"]
secret_name = var.tls_secret_name
}
rule {
host = "home-assistant.viktorbarzin.me"
http {
path {
path = "/"
backend {
service {
name = "home-assistant"
port {
number = 8123
}
}
}
}
}
}
}
}

View file

@ -1,12 +0,0 @@
#!/bin/bash
user="user"
pass="pass"
# Get power supply on outside system voltage
curl -s -k -u $user:$pass -H"Content-type: application/json" -X GET https://idrac/redfish/v1/Chassis/System.Embedded.1/Power/PowerSupplies/PSU.Slot.2 |jq .LineInputVoltage
# Power off
curl -s -k -u $user:$pass -X POST -d '{"Action": "Reset", "ResetType": "GracefulShutdown"}' -H"Content-type: application/json" https://idrac/redfish/v1/Systems/System.Embedded.1/Actions/ComputerSystem.Reset
# Power on
curl -s -k -u $user:$pass -X POST -d '{"Action": "Reset", "ResetType": "On"}' -H"Content-type: application/json" https://idrac/redfish/v1/Systems/System.Embedded.1/Actions/ComputerSystem.Reset

View file

@ -1,40 +0,0 @@
global:
# ImagePullSecrets for control plane ServiceAccount, list of secrets in the same namespace
# to use for pulling any images in pods that reference this ServiceAccount.
# Must be set for any cluster configured with private docker registry.
imagePullSecrets: []
# Used to locate istiod.
istioNamespace: istio-system
istiod:
enableAnalysis: false
configValidation: true
externalIstiod: false
remotePilotAddress: ""
# Platform where Istio is deployed. Possible values are: "openshift", "gcp".
# An empty value means it is a vanilla Kubernetes distribution, therefore no special
# treatment will be considered.
platform: ""
# Setup how istiod Service is configured. See https://kubernetes.io/docs/concepts/services-networking/dual-stack/#services
# This is intended only for use with external istiod.
ipFamilyPolicy: ""
ipFamilies: []
base:
# Used for helm2 to add the CRDs to templates.
enableCRDTemplates: false
# Validation webhook configuration url
# For example: https://$remotePilotAddress:15017/validate
validationURL: ""
# For istioctl usage to disable istio config crds in base
enableIstioConfigCRDs: true
defaultRevision: "default"

View file

@ -1,520 +0,0 @@
#.Values.pilot for discovery and mesh wide config
## Discovery Settings
pilot:
autoscaleEnabled: true
autoscaleMin: 1
autoscaleMax: 5
autoscaleBehavior: {}
replicaCount: 1
rollingMaxSurge: 100%
rollingMaxUnavailable: 25%
hub: ""
tag: ""
variant: ""
# Can be a full hub/image:tag
image: pilot
traceSampling: 1.0
# Resources for a small pilot install
resources:
requests:
cpu: 500m
memory: 2048Mi
# Set to `type: RuntimeDefault` to use the default profile if available.
seccompProfile: {}
# Additional container arguments
extraContainerArgs: []
env: {}
cpu:
targetAverageUtilization: 80
# Additional volumeMounts to the istiod container
volumeMounts: []
# Additional volumes to the istiod pod
volumes: []
nodeSelector: {}
podAnnotations: {}
serviceAnnotations: {}
topologySpreadConstraints: []
# You can use jwksResolverExtraRootCA to provide a root certificate
# in PEM format. This will then be trusted by pilot when resolving
# JWKS URIs.
jwksResolverExtraRootCA: ""
# This is used to set the source of configuration for
# the associated address in configSource, if nothing is specified
# the default MCP is assumed.
configSource:
subscribedResources: []
plugins: []
# The following is used to limit how long a sidecar can be connected
# to a pilot. It balances out load across pilot instances at the cost of
# increasing system churn.
keepaliveMaxServerConnectionAge: 30m
# Additional labels to apply to the deployment.
deploymentLabels: {}
## Mesh config settings
# Install the mesh config map, generated from values.yaml.
# If false, pilot wil use default values (by default) or user-supplied values.
configMap: true
# Additional labels to apply on the pod level for monitoring and logging configuration.
podLabels: {}
# Setup how istiod Service is configured. See https://kubernetes.io/docs/concepts/services-networking/dual-stack/#services
ipFamilyPolicy: ""
ipFamilies: []
sidecarInjectorWebhook:
# You can use the field called alwaysInjectSelector and neverInjectSelector which will always inject the sidecar or
# always skip the injection on pods that match that label selector, regardless of the global policy.
# See https://istio.io/docs/setup/kubernetes/additional-setup/sidecar-injection/#more-control-adding-exceptions
neverInjectSelector: []
alwaysInjectSelector: []
# injectedAnnotations are additional annotations that will be added to the pod spec after injection
# This is primarily to support PSP annotations. For example, if you defined a PSP with the annotations:
#
# annotations:
# apparmor.security.beta.kubernetes.io/allowedProfileNames: runtime/default
# apparmor.security.beta.kubernetes.io/defaultProfileName: runtime/default
#
# The PSP controller would add corresponding annotations to the pod spec for each container. However, this happens before
# the inject adds additional containers, so we must specify them explicitly here. With the above example, we could specify:
# injectedAnnotations:
# container.apparmor.security.beta.kubernetes.io/istio-init: runtime/default
# container.apparmor.security.beta.kubernetes.io/istio-proxy: runtime/default
injectedAnnotations: {}
# This enables injection of sidecar in all namespaces,
# with the exception of namespaces with "istio-injection:disabled" annotation
# Only one environment should have this enabled.
enableNamespacesByDefault: false
# Mutations that occur after the sidecar injector are not handled by default, as the Istio sidecar injector is only run
# once. For example, an OPA sidecar injected after the Istio sidecar will not have it's liveness/readiness probes rewritten.
# Setting this to `IfNeeded` will result in the sidecar injector being run again if additional mutations occur.
reinvocationPolicy: Never
rewriteAppHTTPProbe: true
# Templates defines a set of custom injection templates that can be used. For example, defining:
#
# templates:
# hello: |
# metadata:
# labels:
# hello: world
#
# Then starting a pod with the `inject.istio.io/templates: hello` annotation, will result in the pod
# being injected with the hello=world labels.
# This is intended for advanced configuration only; most users should use the built in template
templates: {}
# Default templates specifies a set of default templates that are used in sidecar injection.
# By default, a template `sidecar` is always provided, which contains the template of default sidecar.
# To inject other additional templates, define it using the `templates` option, and add it to
# the default templates list.
# For example:
#
# templates:
# hello: |
# metadata:
# labels:
# hello: world
#
# defaultTemplates: ["sidecar", "hello"]
defaultTemplates: []
istiodRemote:
# Sidecar injector mutating webhook configuration clientConfig.url value.
# For example: https://$remotePilotAddress:15017/inject
# The host should not refer to a service running in the cluster; use a service reference by specifying
# the clientConfig.service field instead.
injectionURL: ""
# Sidecar injector mutating webhook configuration path value for the clientConfig.service field.
# Override to pass env variables, for example: /inject/cluster/remote/net/network2
injectionPath: "/inject"
telemetry:
enabled: true
v2:
# For Null VM case now.
# This also enables metadata exchange.
enabled: true
metadataExchange:
# Indicates whether to enable WebAssembly runtime for metadata exchange filter.
wasmEnabled: false
# Indicate if prometheus stats filter is enabled or not
prometheus:
enabled: true
# Indicates whether to enable WebAssembly runtime for stats filter.
wasmEnabled: false
# overrides stats EnvoyFilter configuration.
configOverride:
gateway: {}
inboundSidecar: {}
outboundSidecar: {}
# stackdriver filter settings.
stackdriver:
enabled: false
logging: false
monitoring: false
topology: false # deprecated. setting this to true will have no effect, as this option is no longer supported.
disableOutbound: false
# configOverride parts give you the ability to override the low level configuration params passed to envoy filter.
configOverride: {}
# e.g.
# disable_server_access_logging: false
# disable_host_header_fallback: true
# Access Log Policy Filter Settings. This enables filtering of access logs from stackdriver.
accessLogPolicy:
enabled: false
# To reduce the number of successful logs, default log window duration is
# set to 12 hours.
logWindowDuration: "43200s"
# Revision is set as 'version' label and part of the resource names when installing multiple control planes.
revision: ""
# Revision tags are aliases to Istio control plane revisions
revisionTags: []
# For Helm compatibility.
ownerName: ""
# meshConfig defines runtime configuration of components, including Istiod and istio-agent behavior
# See https://istio.io/docs/reference/config/istio.mesh.v1alpha1/ for all available options
meshConfig:
enablePrometheusMerge: true
global:
# Used to locate istiod.
istioNamespace: istio-system
# List of cert-signers to allow "approve" action in the istio cluster role
#
# certSigners:
# - clusterissuers.cert-manager.io/istio-ca
certSigners: []
# enable pod disruption budget for the control plane, which is used to
# ensure Istio control plane components are gradually upgraded or recovered.
defaultPodDisruptionBudget:
enabled: true
# The values aren't mutable due to a current PodDisruptionBudget limitation
# minAvailable: 1
# A minimal set of requested resources to applied to all deployments so that
# Horizontal Pod Autoscaler will be able to function (if set).
# Each component can overwrite these default values by adding its own resources
# block in the relevant section below and setting the desired resources values.
defaultResources:
requests:
cpu: 10m
# memory: 128Mi
# limits:
# cpu: 100m
# memory: 128Mi
# Default hub for Istio images.
# Releases are published to docker hub under 'istio' project.
# Dev builds from prow are on gcr.io
hub: docker.io/istio
# Default tag for Istio images.
tag: 1.20.1
# Variant of the image to use.
# Currently supported are: [debug, distroless]
variant: ""
# Specify image pull policy if default behavior isn't desired.
# Default behavior: latest images will be Always else IfNotPresent.
imagePullPolicy: ""
# ImagePullSecrets for all ServiceAccount, list of secrets in the same namespace
# to use for pulling any images in pods that reference this ServiceAccount.
# For components that don't use ServiceAccounts (i.e. grafana, servicegraph, tracing)
# ImagePullSecrets will be added to the corresponding Deployment(StatefulSet) objects.
# Must be set for any cluster configured with private docker registry.
imagePullSecrets: []
# - private-registry-key
# Enabled by default in master for maximising testing.
istiod:
enableAnalysis: false
# To output all istio components logs in json format by adding --log_as_json argument to each container argument
logAsJson: false
# Comma-separated minimum per-scope logging level of messages to output, in the form of <scope>:<level>,<scope>:<level>
# The control plane has different scopes depending on component, but can configure default log level across all components
# If empty, default scope and level will be used as configured in code
logging:
level: "default:info"
omitSidecarInjectorConfigMap: false
# Whether to restrict the applications namespace the controller manages;
# If not set, controller watches all namespaces
oneNamespace: false
# Configure whether Operator manages webhook configurations. The current behavior
# of Istiod is to manage its own webhook configurations.
# When this option is set as true, Istio Operator, instead of webhooks, manages the
# webhook configurations. When this option is set as false, webhooks manage their
# own webhook configurations.
operatorManageWebhooks: false
# Custom DNS config for the pod to resolve names of services in other
# clusters. Use this to add additional search domains, and other settings.
# see
# https://kubernetes.io/docs/concepts/services-networking/dns-pod-service/#dns-config
# This does not apply to gateway pods as they typically need a different
# set of DNS settings than the normal application pods (e.g., in
# multicluster scenarios).
# NOTE: If using templates, follow the pattern in the commented example below.
#podDNSSearchNamespaces:
#- global
#- "{{ valueOrDefault .DeploymentMeta.Namespace \"default\" }}.global"
# Kubernetes >=v1.11.0 will create two PriorityClass, including system-cluster-critical and
# system-node-critical, it is better to configure this in order to make sure your Istio pods
# will not be killed because of low priority class.
# Refer to https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/#priorityclass
# for more detail.
priorityClassName: ""
proxy:
image: proxyv2
# This controls the 'policy' in the sidecar injector.
autoInject: enabled
# CAUTION: It is important to ensure that all Istio helm charts specify the same clusterDomain value
# cluster domain. Default value is "cluster.local".
clusterDomain: "cluster.local"
# Per Component log level for proxy, applies to gateways and sidecars. If a component level is
# not set, then the global "logLevel" will be used.
componentLogLevel: "misc:error"
# If set, newly injected sidecars will have core dumps enabled.
enableCoreDump: false
# istio ingress capture allowlist
# examples:
# Redirect only selected ports: --includeInboundPorts="80,8080"
excludeInboundPorts: ""
includeInboundPorts: "*"
# istio egress capture allowlist
# https://istio.io/docs/tasks/traffic-management/egress.html#calling-external-services-directly
# example: includeIPRanges: "172.30.0.0/16,172.20.0.0/16"
# would only capture egress traffic on those two IP Ranges, all other outbound traffic would
# be allowed by the sidecar
includeIPRanges: "*"
excludeIPRanges: ""
includeOutboundPorts: ""
excludeOutboundPorts: ""
# Log level for proxy, applies to gateways and sidecars.
# Expected values are: trace|debug|info|warning|error|critical|off
logLevel: warning
#If set to true, istio-proxy container will have privileged securityContext
privileged: false
# The number of successive failed probes before indicating readiness failure.
readinessFailureThreshold: 4
# The initial delay for readiness probes in seconds.
readinessInitialDelaySeconds: 0
# The period between readiness probes.
readinessPeriodSeconds: 15
# Enables or disables a startup probe.
# For optimal startup times, changing this should be tied to the readiness probe values.
#
# If the probe is enabled, it is recommended to have delay=0s,period=15s,failureThreshold=4.
# This ensures the pod is marked ready immediately after the startup probe passes (which has a 1s poll interval),
# and doesn't spam the readiness endpoint too much
#
# If the probe is disabled, it is recommended to have delay=1s,period=2s,failureThreshold=30.
# This ensures the startup is reasonable fast (polling every 2s). 1s delay is used since the startup is not often ready instantly.
startupProbe:
enabled: true
failureThreshold: 600 # 10 minutes
# Resources for the sidecar.
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 2000m
memory: 1024Mi
# Default port for Pilot agent health checks. A value of 0 will disable health checking.
statusPort: 15020
# Specify which tracer to use. One of: zipkin, lightstep, datadog, stackdriver.
# If using stackdriver tracer outside GCP, set env GOOGLE_APPLICATION_CREDENTIALS to the GCP credential file.
tracer: "zipkin"
proxy_init:
# Base name for the proxy_init container, used to configure iptables.
image: proxyv2
# configure remote pilot and istiod service and endpoint
remotePilotAddress: ""
##############################################################################################
# The following values are found in other charts. To effectively modify these values, make #
# make sure they are consistent across your Istio helm charts #
##############################################################################################
# The customized CA address to retrieve certificates for the pods in the cluster.
# CSR clients such as the Istio Agent and ingress gateways can use this to specify the CA endpoint.
# If not set explicitly, default to the Istio discovery address.
caAddress: ""
# Configure a remote cluster data plane controlled by an external istiod.
# When set to true, istiod is not deployed locally and only a subset of the other
# discovery charts are enabled.
externalIstiod: false
# Configure a remote cluster as the config cluster for an external istiod.
configCluster: false
# Configure the policy for validating JWT.
# Currently, two options are supported: "third-party-jwt" and "first-party-jwt".
jwtPolicy: "third-party-jwt"
# Mesh ID means Mesh Identifier. It should be unique within the scope where
# meshes will interact with each other, but it is not required to be
# globally/universally unique. For example, if any of the following are true,
# then two meshes must have different Mesh IDs:
# - Meshes will have their telemetry aggregated in one place
# - Meshes will be federated together
# - Policy will be written referencing one mesh from the other
#
# If an administrator expects that any of these conditions may become true in
# the future, they should ensure their meshes have different Mesh IDs
# assigned.
#
# Within a multicluster mesh, each cluster must be (manually or auto)
# configured to have the same Mesh ID value. If an existing cluster 'joins' a
# multicluster mesh, it will need to be migrated to the new mesh ID. Details
# of migration TBD, and it may be a disruptive operation to change the Mesh
# ID post-install.
#
# If the mesh admin does not specify a value, Istio will use the value of the
# mesh's Trust Domain. The best practice is to select a proper Trust Domain
# value.
meshID: ""
# Configure the mesh networks to be used by the Split Horizon EDS.
#
# The following example defines two networks with different endpoints association methods.
# For `network1` all endpoints that their IP belongs to the provided CIDR range will be
# mapped to network1. The gateway for this network example is specified by its public IP
# address and port.
# The second network, `network2`, in this example is defined differently with all endpoints
# retrieved through the specified Multi-Cluster registry being mapped to network2. The
# gateway is also defined differently with the name of the gateway service on the remote
# cluster. The public IP for the gateway will be determined from that remote service (only
# LoadBalancer gateway service type is currently supported, for a NodePort type gateway service,
# it still need to be configured manually).
#
# meshNetworks:
# network1:
# endpoints:
# - fromCidr: "192.168.0.1/24"
# gateways:
# - address: 1.1.1.1
# port: 80
# network2:
# endpoints:
# - fromRegistry: reg1
# gateways:
# - registryServiceName: istio-ingressgateway.istio-system.svc.cluster.local
# port: 443
#
meshNetworks: {}
# Use the user-specified, secret volume mounted key and certs for Pilot and workloads.
mountMtlsCerts: false
multiCluster:
# Set to true to connect two kubernetes clusters via their respective
# ingressgateway services when pods in each cluster cannot directly
# talk to one another. All clusters should be using Istio mTLS and must
# have a shared root CA for this model to work.
enabled: false
# Should be set to the name of the cluster this installation will run in. This is required for sidecar injection
# to properly label proxies
clusterName: ""
# Network defines the network this cluster belong to. This name
# corresponds to the networks in the map of mesh networks.
network: ""
# Configure the certificate provider for control plane communication.
# Currently, two providers are supported: "kubernetes" and "istiod".
# As some platforms may not have kubernetes signing APIs,
# Istiod is the default
pilotCertProvider: istiod
sds:
# The JWT token for SDS and the aud field of such JWT. See RFC 7519, section 4.1.3.
# When a CSR is sent from Istio Agent to the CA (e.g. Istiod), this aud is to make sure the
# JWT is intended for the CA.
token:
aud: istio-ca
sts:
# The service port used by Security Token Service (STS) server to handle token exchange requests.
# Setting this port to a non-zero value enables STS server.
servicePort: 0
# The name of the CA for workload certificates.
# For example, when caName=GkeWorkloadCertificate, GKE workload certificates
# will be used as the certificates for workloads.
# The default value is "" and when caName="", the CA will be configured by other
# mechanisms (e.g., environmental variable CA_PROVIDER).
caName: ""
# whether to use autoscaling/v2 template for HPA settings
# for internal usage only, not to be configured by users.
autoscalingv2API: true
base:
# For istioctl usage to disable istio config crds in base
enableIstioConfigCRDs: true
# If enabled, gateway-api types will be validated using the standard upstream validation logic.
# This is an alternative to deploying the standalone validation server the project provides.
# This is disabled by default, as the cluster may already have a validation server; while technically
# it works to have multiple redundant validations, this adds complexity and operational risks.
# Users should consider enabling this if they want full gateway-api validation but don't have other validation servers.
validateGateway: false
# keep in sync with settings used when installing the Istio CNI chart
istio_cni:
enabled: false
chained: true

View file

@ -1,122 +0,0 @@
nameOverride: ""
fullnameOverride: ""
image: # see: https://quay.io/repository/kiali/kiali-operator?tab=tags
repo: quay.io/kiali/kiali-operator # quay.io/kiali/kiali-operator
tag: v1.78.0 # version string like v1.39.0 or a digest hash
digest: "" # use "sha256" if tag is a sha256 hash (do NOT prefix this value with a "@")
pullPolicy: Always
pullSecrets: []
# Deployment options for the operator pod.
nodeSelector: {}
podAnnotations: {}
podLabels: {}
env: []
tolerations: []
resources:
requests:
cpu: "10m"
memory: "64Mi"
affinity: {}
replicaCount: 1
priorityClassName: ""
securityContext: {}
# metrics.enabled: set to true if you want Prometheus to collect metrics from the operator
metrics:
enabled: true
# debug.enabled: when true the full ansible logs are dumped after each reconciliation run
# debug.verbosity: defines the amount of details the operator will log (higher numbers are more noisy)
# debug.enableProfiler: when true (regardless of debug.enabled), timings for the most expensive tasks will be logged after each reconciliation loop
debug:
enabled: true
verbosity: "1"
enableProfiler: false
# Defines where the operator will look for Kial CR resources. "" means "all namespaces".
watchNamespace: ""
# Set to true if you want the operator to be able to create cluster roles. This is necessary
# if you want to support Kiali CRs with spec.deployment.accessible_namespaces of '**'.
# Setting this to "true" requires allowAllAccessibleNamespaces to be "true" also.
# Note that this will be overriden to "true" if cr.create is true and cr.spec.deployment.accessible_namespaces is ['**'].
clusterRoleCreator: true
# Set to a list of secrets in the cluster that the operator will be allowed to read. This is necessary if you want to
# support Kiali CRs with spec.kiali_feature_flags.certificates_information_indicators.enabled=true.
# The secrets in this list will be the only ones allowed to be specified in any Kiali CR (in the setting
# spec.kiali_feature_flags.certificates_information_indicators.secrets).
# If you set this to an empty list, the operator will not be given permission to read any additional secrets
# found in the cluster, and thus will only support a value of "false" in the Kiali CR setting
# spec.kiali_feature_flags.certificates_information_indicators.enabled.
secretReader: ["cacerts", "istio-ca-secret"]
# Set to true if you want to allow the operator to only be able to install Kiali in view-only-mode.
# The purpose for this setting is to allow you to restrict the permissions given to the operator itself.
onlyViewOnlyMode: false
# allowAdHocKialiNamespace tells the operator to allow a user to be able to install a Kiali CR in one namespace but
# be able to install Kiali in another namespace. In other words, it will allow the Kiali CR spec.deployment.namespace
# to be something other than the namespace where the CR is installed. You may want to disable this if you are
# running in a multi-tenant scenario in which you only want a user to be able to install Kiali in the same namespace
# where the user has permissions to install a Kiali CR.
allowAdHocKialiNamespace: true
# allowAdHocKialiImage tells the operator to allow a user to be able to install a custom Kiali image as opposed
# to the image the operator will install by default. In other words, it will allow the
# Kiali CR spec.deployment.image_name and spec.deployment.image_version to be configured by the user.
# You may want to disable this if you do not want users to install their own Kiali images.
allowAdHocKialiImage: false
# allowAdHocOSSMConsoleImage tells the operator to allow a user to be able to install a custom OSSMC image as opposed
# to the image the operator will install by default. In other words, it will allow the
# OSSMConsole CR spec.deployment.imageName and spec.deployment.imageVersion to be configured by the user.
# You may want to disable this if you do not want users to install their own OSSMC images.
# This is only applicable when running on OpenShift.
allowAdHocOSSMConsoleImage: false
# allowSecurityContextOverride tells the operator to allow a user to be able to fully override the Kiali
# container securityContext. If this is false, certain securityContext settings must exist on the Kiali
# container and any attempt to override them will be ignored.
allowSecurityContextOverride: false
# allowAllAccessibleNamespaces tells the operator to allow a user to be able to configure Kiali
# to access all namespaces in the cluster via spec.deployment.accessible_namespaces=['**'].
# If this is false, the user must specify an explicit list of namespaces in the Kiali CR.
# Setting this to "true" requires clusterRoleCreator to be "true" also.
# Note that this will be overriden to "true" if cr.create is true and cr.spec.deployment.accessible_namespaces is ['**'].
allowAllAccessibleNamespaces: true
# accessibleNamespacesLabel restricts the namespaces that a user can add to the Kiali CR spec.deployment.accessible_namespaces.
# This value is either an empty string (which disables this feature) or a label name with an optional label value
# (e.g. "mylabel" or "mylabel=myvalue"). Only namespaces that have that label will be permitted in
# spec.deployment.accessible_namespaces. Any namespace not labeled properly but specified in accessible_namespaces will cause
# the operator to abort the Kiali installation.
# If just a label name (but no label value) is specified, the label value the operator will look for is the value of
# the Kiali CR's spec.istio_namespace. In other words, the operator will look for the named label whose value must be the name
# of the Istio control plane namespace (which is typically, but not necessarily, "istio-system").
accessibleNamespacesLabel: ""
# For what a Kiali CR spec can look like, see:
# https://github.com/kiali/kiali-operator/blob/master/deploy/kiali/kiali_cr.yaml
cr:
create: false
name: kiali
# If you elect to create a Kiali CR (--set cr.create=true)
# and the operator is watching all namespaces (--set watchNamespace="")
# then this is the namespace where the CR will be created (the default will be the operator namespace).
namespace: ""
# Annotations to place in the Kiali CR metadata.
annotations: {}
spec:
deployment:
accessible_namespaces:
- "**"
external_services:
prometheus:
# Prometheus service name is "metrics" and is in the "telemetry" namespace
url: "http://prometheus-server.monitoring:80/"

View file

@ -1,116 +0,0 @@
variable "tls_secret_name" {}
resource "kubernetes_namespace" "istio" {
metadata {
name = "istio-system"
}
}
module "tls_secret" {
source = "../setup_tls_secret"
namespace = kubernetes_namespace.istio.metadata[0].name
tls_secret_name = var.tls_secret_name
}
# to delete all CRDS: kubectl get crd -oname | grep --color=never 'istio.io' | xargs kubectl delete
resource "helm_release" "istio-base" {
namespace = kubernetes_namespace.istio.metadata[0].name
create_namespace = false
name = "istio-base"
atomic = true
repository = "https://istio-release.storage.googleapis.com/charts"
chart = "base"
depends_on = [kubernetes_namespace.istio]
}
resource "helm_release" "istiod" {
namespace = kubernetes_namespace.istio.metadata[0].name
create_namespace = false
name = "istiod"
atomic = true
repository = "https://istio-release.storage.googleapis.com/charts"
chart = "istiod"
depends_on = [kubernetes_namespace.istio]
}
resource "helm_release" "istio-gateway" {
namespace = kubernetes_namespace.istio.metadata[0].name
create_namespace = false
name = "istio-gateway"
atomic = true
repository = "https://istio-release.storage.googleapis.com/charts"
chart = "gateway"
depends_on = [kubernetes_namespace.istio]
}
# Kiali dashboard
resource "helm_release" "kiali" {
namespace = kubernetes_namespace.istio.metadata[0].name
create_namespace = false
name = "kiali"
atomic = true
repository = "https://kiali.org/helm-charts"
chart = "kiali-operator"
set {
name = "cr.create"
value = "true"
}
set {
name = "cr.namespace"
value = "istio-system"
}
values = [templatefile("${path.module}/kiali.yaml", {})]
depends_on = [kubernetes_namespace.istio]
}
resource "kubernetes_secret" "kiali-token" {
metadata {
name = "kiali-secret"
namespace = kubernetes_namespace.istio.metadata[0].name
annotations = {
"kubernetes.io/service-account.name" : "kiali-service-account"
}
}
type = "kubernetes.io/service-account-token"
}
# Gets auto removed. revisit after finishing power consmption analysis
# resource "kubernetes_ingress_v1" "kiali" {
# metadata {
# name = "kiali"
# namespace = kubernetes_namespace.istio.metadata[0].name
# annotations = {
# "kubernetes.io/ingress.class" = "nginx"
# "nginx.ingress.kubernetes.io/auth-url" : "https://oauth2.viktorbarzin.me/oauth2/auth"
# "nginx.ingress.kubernetes.io/auth-signin" : "https://oauth2.viktorbarzin.me/oauth2/start?rd=/redirect/$http_host$escaped_request_uri"
# }
# }
# spec {
# tls {
# hosts = ["kiali.viktorbarzin.me"]
# secret_name = var.tls_secret_name
# }
# rule {
# host = "kiali.viktorbarzin.me"
# http {
# path {
# path = "/"
# backend {
# service {
# name = "kiali"
# port {
# number = 20001
# }
# }
# }
# }
# }
# }
# }
# }

View file

@ -1,117 +0,0 @@
variable "tls_secret_name" {}
resource "kubernetes_namespace" "jellyfin" {
metadata {
name = "jellyfin"
}
}
module "tls_secret" {
source = "../setup_tls_secret"
namespace = kubernetes_namespace.jellyfin.metadata[0].name
tls_secret_name = var.tls_secret_name
}
resource "kubernetes_deployment" "jellyfin" {
metadata {
name = "jellyfin"
namespace = kubernetes_namespace.jellyfin.metadata[0].name
labels = {
app = "jellyfin"
}
annotations = {
"reloader.stakater.com/search" = "true"
}
}
spec {
replicas = 1
strategy {
type = "Recreate"
}
selector {
match_labels = {
app = "jellyfin"
}
}
template {
metadata {
labels = {
app = "jellyfin"
}
}
spec {
container {
image = "jellyfin/jellyfin"
name = "jellyfin"
port {
container_port = 8096
}
volume_mount {
name = "media"
mount_path = "/media"
}
volume_mount {
name = "config"
mount_path = "/config"
}
volume_mount {
name = "cache"
mount_path = "/cache"
}
}
volume {
name = "media"
nfs {
path = "/mnt/main/jellyfin/media"
server = "10.0.10.15"
}
}
volume {
name = "config"
nfs {
path = "/mnt/main/jellyfin/config"
server = "10.0.10.15"
}
}
volume {
name = "cache"
nfs {
path = "/mnt/main/jellyfin/cache"
server = "10.0.10.15"
}
}
}
}
}
}
resource "kubernetes_service" "jellyfin" {
metadata {
name = "jellyfin"
namespace = kubernetes_namespace.jellyfin.metadata[0].name
labels = {
"app" = "jellyfin"
}
}
spec {
selector = {
app = "jellyfin"
}
port {
name = "http"
target_port = 8096
port = 80
protocol = "TCP"
}
}
}
module "ingress" {
source = "../ingress_factory"
namespace = kubernetes_namespace.jellyfin.metadata[0].name
name = "jellyfin"
tls_secret_name = var.tls_secret_name
}

View file

@ -1,9 +0,0 @@
metrics:
kafka:
enabled: true
persistence:
enabled: false
zookeeper:
persistence:
enabled: false
replicaCount: 3

View file

@ -1,142 +0,0 @@
variable "tls_secret_name" {}
variable "client_certificate_secret_name" {}
module "tls_secret" {
source = "../setup_tls_secret"
namespace = kubernetes_namespace.kafka.metadata[0].name
tls_secret_name = var.tls_secret_name
}
resource "helm_release" "kafka" {
namespace = kubernetes_namespace.kafka.metadata[0].name
create_namespace = true
name = "kafka"
repository = "https://charts.bitnami.com/bitnami"
chart = "kafka"
values = [templatefile("${path.module}/kafka_chart_values.tpl", {})]
}
resource "kubernetes_deployment" "kafka-ui" {
metadata {
name = "kafka-ui"
namespace = kubernetes_namespace.kafka.metadata[0].name
labels = {
run = "kafka-ui"
}
}
spec {
replicas = 1
selector {
match_labels = {
run = "kafka-ui"
}
}
template {
metadata {
labels = {
run = "kafka-ui"
}
}
spec {
container {
image = "provectuslabs/kafka-ui:latest"
name = "kafka-ui"
resources {
limits = {
cpu = "0.5"
memory = "512Mi"
}
requests = {
cpu = "250m"
memory = "50Mi"
}
}
port {
container_port = 8080
}
env {
name = "KAFKA_CLUSTERS_0_NAME"
value = "local"
}
env {
name = "KAFKA_CLUSTERS_0_BOOTSTRAPSERVERS"
value = "kafka:9092"
}
env {
name = "KAFKA_CLUSTERS_0_ZOOKEEPER"
value = "kafka-zookeeper:2181"
}
}
}
}
}
}
resource "kubernetes_service" "kafka-ui" {
metadata {
name = "kafka-ui"
namespace = kubernetes_namespace.kafka.metadata[0].name
labels = {
"run" = "kafka-ui"
}
# annotations = {
# "prometheus.io/scrape" = "true"
# "prometheus.io/path" = "/metrics"
# "prometheus.io/port" = "9113"
# }
}
spec {
selector = {
run = "kafka-ui"
}
port {
name = "http"
port = "80"
target_port = "8080"
}
# port {
# name = "prometheus"
# port = "9113"
# target_port = "9113"
# }
}
}
resource "kubernetes_ingress_v1" "kafka-ui" {
metadata {
name = "kafka-ui-ingress"
namespace = kubernetes_namespace.kafka.metadata[0].name
annotations = {
"traefik.ingress.kubernetes.io/router.middlewares" = "traefik-rate-limit@kubernetescrd,traefik-csp-headers@kubernetescrd,traefik-crowdsec@kubernetescrd"
"traefik.ingress.kubernetes.io/router.entrypoints" = "websecure"
"traefik.ingress.kubernetes.io/router.tls.options" = "traefik-mtls@kubernetescrd"
}
}
spec {
ingress_class_name = "traefik"
tls {
hosts = ["kafka.viktorbarzin.me"]
secret_name = var.tls_secret_name
}
rule {
host = "kafka.viktorbarzin.me"
http {
path {
path = "/"
backend {
service {
name = "kafka-ui"
port {
number = 80
}
}
}
}
}
}
}
}

View file

@ -1,73 +0,0 @@
This contains the setup for setting up a remote machine that serves a keyfile for decrypting a luks volume
1. Install nginx
```
sudo apt update
sudo apt install nginx apache2-utils -y
```
2. Create User for basic auth
```
sudo htpasswd -c /etc/nginx/.htpasswd truenas
```
3. Create secure directory and key file
```
sudo mkdir -p /srv/keys
head -c 128 /dev/urandom | sudo tee /srv/keys/truenas.key >/dev/null
```
4. Create rate limit zone
```
# /etc/nginx/conf.d/ratelimit.conf
# Allow only 3 key requests per minute per IP
limit_req_zone $binary_remote_addr zone=keylimit:10m rate=3r/m;
```
5. Configure nginx virtual host
```
# /etc/nginx/sites-available/keyserver.conf
server {
listen 443 ssl;
server_name <ip address here>;
# TLS certificate and key (we will set these in the next step)
ssl_certificate /etc/ssl/certs/keyserver.crt;
ssl_certificate_key /etc/ssl/private/keyserver.key;
# Enforce strong TLS
ssl_protocols TLSv1.2 TLSv1.3;
ssl_prefer_server_ciphers on;
# Rate limiting zone created earlier
limit_req zone=keylimit burst=2 nodelay;
location /keys/ {
alias /srv/keys/;
# Basic auth
auth_basic "Restricted";
auth_basic_user_file /etc/nginx/.htpasswd;
# Disable directory listing
autoindex off;
# Prevent caching
add_header Cache-Control "no-store, no-cache, must-revalidate, max-age=0" always;
}
}
```
6. Enable the host:
```
sudo ln -s /etc/nginx/sites-available/keyserver.conf /etc/nginx/sites-enabled/
```
7. Disable default host:
```
sudo rm /etc/nginx/sites-enabled/default
```

View file

@ -1,2 +0,0 @@
[keyserver]
130.162.165.220 ansible_user=ubuntu ansible_ssh_private_key_file=~/.ssh/id_ed25519

View file

@ -1,31 +0,0 @@
variable "tls_secret_name" {}
variable "notify_url" {}
resource "kubernetes_namespace" "kured" {
metadata {
name = "kured"
labels = {
"istio-injection" : "disabled"
}
}
}
module "tls_secret" {
source = "../setup_tls_secret"
namespace = kubernetes_namespace.kured.metadata[0].name
tls_secret_name = var.tls_secret_name
}
resource "helm_release" "kured" {
namespace = kubernetes_namespace.kured.metadata[0].name
create_namespace = false
name = "kured"
repository = "https://kubereboot.github.io/charts"
chart = "kured"
values = [templatefile("${path.module}/values.yaml", { notify_url : var.notify_url })]
atomic = true
depends_on = [kubernetes_namespace.kured]
}

View file

@ -1,12 +0,0 @@
window_start: "22:00"
window_end: "06:00"
reboot_days: "mon,tue,wed,thu,fri"
service:
annotations:
prometheus.io/scrape: "true"
prometheus.io/path: "/metrics"
prometheus.io/port: "8080"
configuration:
notifyUrl: "${notify_url}"

View file

@ -1,93 +0,0 @@
replicaCount: 1
deployment:
image: quay.io/go-skynet/local-ai:latest
env:
threads: 4
context_size: 512
modelsPath: "/models"
resources:
{}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
# Prompt templates to include
# Note: the keys of this map will be the names of the prompt template files
promptTemplates:
{}
# ggml-gpt4all-j.tmpl: |
# The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
# ### Prompt:
# {{.Input}}
# ### Response:
# Models to download at runtime
models:
# Whether to force download models even if they already exist
forceDownload: false
# The list of URLs to download models from
# Note: the name of the file will be the name of the loaded model
list:
- url:
"https://gpt4all.io/models/ggml-gpt4all-j.bin"
# basicAuth: base64EncodedCredentials
# Persistent storage for models and prompt templates.
# PVC and HostPath are mutually exclusive. If both are enabled,
# PVC configuration takes precedence. If neither are enabled, ephemeral
# storage is used.
persistence:
pvc:
enabled: false
size: 2Gi
accessModes:
- ReadWriteOnce
annotations: {}
# Optional
storageClass: ~
hostPath:
enabled: false
path: "/models"
service:
type: ClusterIP
port: 80
annotations: {}
# If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout
# service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
ingress:
enabled: true
className: "nginx"
annotations:
{}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
hosts:
- host: ai.viktorbarzin.me
paths:
- path: /
pathType: ImplementationSpecific
tls:
- secretName: "${tls_secret}"
hosts:
- ai.viktorbarzin.me
nodeSelector: {}
tolerations: []
affinity: {}

View file

@ -1,21 +0,0 @@
variable "tls_secret_name" {}
resource "helm_release" "prometheus" {
namespace = "localai"
create_namespace = true
name = "localai"
repository = "https://go-skynet.github.io/helm-charts/"
chart = "local-ai"
# version = "15.0.2"
# atomic = true
# cleanup_on_fail = true
values = [templatefile("${path.module}/chart_values.tpl", { tls_secret = var.tls_secret_name })]
}
module "tls_secret" {
source = "../setup_tls_secret"
namespace = "localai"
tls_secret_name = var.tls_secret_name
}

View file

@ -1,3 +0,0 @@
# All service modules have been migrated to individual Terragrunt stacks under stacks/.
# See stacks/<service>/main.tf for each service's configuration.
# This file is no longer used.

View file

@ -1,310 +0,0 @@
variable "tls_secret_name" {}
variable "tier" { type = string }
variable "postgresql_password" {}
variable "cookie_secret" {}
variable "captcha_salt" {}
locals {
domain = "mcaptcha.viktorbarzin.me"
port = 7000
}
resource "kubernetes_namespace" "mcaptcha" {
metadata {
name = "mcaptcha"
labels = {
"istio-injection" : "disabled"
tier = var.tier
}
}
}
module "tls_secret" {
source = "../setup_tls_secret"
namespace = kubernetes_namespace.mcaptcha.metadata[0].name
tls_secret_name = var.tls_secret_name
}
# mCaptcha requires a special Redis with the mcaptcha/cache module loaded
resource "kubernetes_deployment" "mcaptcha_redis" {
metadata {
name = "mcaptcha-redis"
namespace = kubernetes_namespace.mcaptcha.metadata[0].name
labels = {
app = "mcaptcha-redis"
tier = var.tier
}
}
spec {
replicas = 1
selector {
match_labels = {
app = "mcaptcha-redis"
}
}
strategy {
type = "Recreate"
}
template {
metadata {
labels = {
app = "mcaptcha-redis"
}
}
spec {
container {
image = "mcaptcha/cache:latest"
name = "redis"
port {
container_port = 6379
}
resources {
requests = {
memory = "64Mi"
cpu = "25m"
}
limits = {
memory = "128Mi"
cpu = "200m"
}
}
liveness_probe {
tcp_socket {
port = 6379
}
initial_delay_seconds = 10
period_seconds = 10
}
readiness_probe {
tcp_socket {
port = 6379
}
initial_delay_seconds = 5
period_seconds = 5
}
}
}
}
}
}
resource "kubernetes_service" "mcaptcha_redis" {
metadata {
name = "mcaptcha-redis"
namespace = kubernetes_namespace.mcaptcha.metadata[0].name
labels = {
app = "mcaptcha-redis"
}
}
spec {
selector = {
app = "mcaptcha-redis"
}
port {
name = "redis"
port = 6379
target_port = 6379
}
}
}
resource "kubernetes_deployment" "mcaptcha" {
metadata {
name = "mcaptcha"
namespace = kubernetes_namespace.mcaptcha.metadata[0].name
labels = {
app = "mcaptcha"
tier = var.tier
}
annotations = {
"reloader.stakater.com/search" = "true"
}
}
spec {
replicas = 1
selector {
match_labels = {
app = "mcaptcha"
}
}
strategy {
type = "Recreate"
}
template {
metadata {
labels = {
app = "mcaptcha"
}
annotations = {
"diun.enable" = "true"
"diun.include_tags" = "^\\d+(?:\\.\\d+)?(?:\\.\\d+)?$"
}
}
spec {
container {
image = "mcaptcha/mcaptcha:latest"
name = "mcaptcha"
port {
container_port = local.port
}
# Required configuration
env {
name = "MCAPTCHA_server_DOMAIN"
value = local.domain
}
env {
name = "MCAPTCHA_server_COOKIE_SECRET"
value = var.cookie_secret
}
env {
name = "MCAPTCHA_captcha_SALT"
value = var.captcha_salt
}
# Server configuration
env {
name = "PORT"
value = tostring(local.port)
}
env {
name = "MCAPTCHA_server_IP"
value = "0.0.0.0"
}
env {
name = "MCAPTCHA_server_PROXY_HAS_TLS"
value = "true"
}
# Database configuration (PostgreSQL)
env {
name = "DATABASE_URL"
value = "postgres://mcaptcha:${var.postgresql_password}@postgresql.dbaas.svc.cluster.local:5432/mcaptcha"
}
# Redis configuration (using mcaptcha/cache module)
env {
name = "MCAPTCHA_redis_URL"
value = "redis://mcaptcha-redis.mcaptcha.svc.cluster.local:6379"
}
# Feature flags
env {
name = "MCAPTCHA_allow_registration"
# value = "true"
value = "false"
}
env {
name = "MCAPTCHA_allow_demo"
value = "false"
}
env {
name = "MCAPTCHA_commercial"
value = "false"
}
env {
name = "MCAPTCHA_captcha_ENABLE_STATS"
value = "true"
}
env {
name = "MCAPTCHA_captcha_GC"
value = "30"
}
env {
name = "MCAPTCHA_debug"
value = "false"
}
env {
name = "RUST_BACKTRACE"
value = "1"
}
resources {
requests = {
memory = "64Mi"
cpu = "50m"
}
limits = {
memory = "256Mi"
cpu = "500m"
}
}
# Health checks
liveness_probe {
http_get {
path = "/"
port = local.port
}
initial_delay_seconds = 30
period_seconds = 10
timeout_seconds = 5
failure_threshold = 3
}
readiness_probe {
http_get {
path = "/"
port = local.port
}
initial_delay_seconds = 10
period_seconds = 5
timeout_seconds = 3
failure_threshold = 3
}
}
}
}
}
}
resource "kubernetes_service" "mcaptcha" {
metadata {
name = "mcaptcha"
namespace = kubernetes_namespace.mcaptcha.metadata[0].name
labels = {
"app" = "mcaptcha"
}
}
spec {
selector = {
app = "mcaptcha"
}
port {
name = "http"
port = 80
target_port = local.port
}
}
}
module "ingress" {
source = "../ingress_factory"
namespace = kubernetes_namespace.mcaptcha.metadata[0].name
name = "mcaptcha"
tls_secret_name = var.tls_secret_name
}

View file

@ -1,400 +0,0 @@
# variable "host" {
# type = string
# }
resource "kubernetes_namespace" "oauth2" {
metadata {
name = "oauth2"
# cookie seems to be not set and auth fails
# labels = {
# "istio-injection" : "enabled"
# }
}
}
variable "tls_secret_name" {
type = string
}
variable "oauth2_proxy_client_secret" {
type = string
}
variable "oauth2_proxy_client_id" {
type = string
}
variable "authenticated_emails" {
type = string
default = ""
}
module "tls_secret" {
source = "../setup_tls_secret"
namespace = "oauth2"
tls_secret_name = var.tls_secret_name
}
resource "kubernetes_config_map" "config" {
metadata {
name = "oauth2-proxy-nginx"
namespace = "oauth2"
annotations = {
"reloader.stakater.com/match" = "true"
}
}
data = {
"nginx.conf" = <<-EOT
worker_processes 5;
events {
}
http {
server {
listen 80 default_server;
location = /healthcheck {
add_header Content-Type text/plain;
return 200 'ok';
}
location ~ /redirect/(.*) {
return 307 https://$1$is_args$args;
}
}
}
EOT
}
}
resource "kubernetes_config_map" "authorized-emails" {
metadata {
name = "authorized-emails"
namespace = "oauth2"
annotations = {
"reloader.stakater.com/match" = "true"
}
}
data = {
"authorized_emails.txt" = var.authenticated_emails
}
}
resource "random_password" "cookie" {
length = 16
special = true
override_special = "_%@"
}
resource "kubernetes_deployment" "oauth2-proxy" {
metadata {
name = "oauth2-proxy"
namespace = "oauth2"
labels = {
app = "oauth2"
}
annotations = {
"reloader.stakater.com/search" = "true"
}
}
spec {
replicas = 1
selector {
match_labels = {
app = "oauth2"
}
}
template {
metadata {
labels = {
app = "oauth2"
}
}
spec {
container {
image = "nginx:latest"
name = "nginx"
port {
name = "http"
container_port = 80
protocol = "TCP"
}
volume_mount {
name = "config"
mount_path = "/etc/nginx/"
}
liveness_probe {
http_get {
path = "/healthcheck"
port = 80
}
}
}
container {
image = "quay.io/pusher/oauth2_proxy:latest"
name = "oauth2-proxy"
args = ["--provider=google", "--upstream=file:///dev/null", "--upstream=http://localhost/redirect/", "--http-address=0.0.0.0:4180", "--cookie-domain=.viktorbarzin.me", "--footer=-", "--authenticated-emails-file=/etc/authorized_emails/authorized_emails.txt"]
# args = ["--provider=google", "--upstream=file:///dev/null", "--upstream=http://localhost/redirect/", "--http-address=0.0.0.0:4180", "--cookie-domain=.viktorbarzin.me", "--footer=-", "--email-domain=*", "--google-group=barzini-lab-admins@googlegroups.com", "--google-admin-email=vbarzin@gmail.com", "--google-service-account-json=/etc/google_service_account/google_service_account.json"]
# args = ["--provider=google", "--upstream=file:///dev/null", "--upstream=http://localhost/redirect/", "--http-address=0.0.0.0:4180", "--cookie-domain=.viktorbarzin.me", "--footer=-", "--email-domain=*", "--google-group=barzini-lab-admins", "--google-admin-email=533122798643-compute@developer.gserviceaccount.com", "--google-service-account-json=/etc/google_service_account/google_service_account.json"]
env {
name = "OAUTH2_PROXY_CLIENT_ID"
value = var.oauth2_proxy_client_id
}
env {
name = "OAUTH2_PROXY_CLIENT_SECRET"
value = var.oauth2_proxy_client_secret
}
env {
name = "OAUTH2_PROXY_COOKIE_SECRET"
value = random_password.cookie.result
}
port {
name = "oauth"
container_port = 4180
protocol = "TCP"
}
volume_mount {
name = "authorized-emails"
mount_path = "/etc/authorized_emails"
}
# volume_mount {
# name = "sa-json"
# mount_path = "/etc/google_service_account/"
# }
}
volume {
name = "config"
config_map {
name = "oauth2-proxy-nginx"
}
}
volume {
name = "authorized-emails"
config_map {
name = "authorized-emails"
}
}
# volume {
# name = "sa-json"
# config_map {
# name = "google-service-account"
# }
# }
}
}
}
}
resource "kubernetes_service" "oauth_proxy" {
metadata {
name = "oauth2"
namespace = "oauth2"
labels = {
app = "oauth2"
}
}
spec {
selector = {
app = "oauth2"
}
port {
name = "http"
port = "80"
target_port = 4180
}
}
}
module "ingress" {
source = "../ingress_factory"
namespace = "oauth2"
name = "oauth2"
tls_secret_name = var.tls_secret_name
}
# variable "svc_name" {
# type = string
# }
# variable "client_id" {}
# variable "client_secret" {}
# resource "kubernetes_deployment" "oauth_proxy" {
# metadata {
# name = "oauth-proxy"
# namespace = var.namespace
# labels = {
# run = "oauth-proxy"
# }
# }
# spec {
# replicas = 1
# selector {
# match_labels = {
# run = "oauth-proxy"
# }
# }
# template {
# metadata {
# labels = {
# run = "oauth-proxy"
# }
# }
# spec {
# container {
# image = "quay.io/oauth2-proxy/oauth2-proxy:latest"
# args = ["--provider=google", "--email-domain=*", "upstream=file:///dev/null", "--http-address=0.0.0.0:4180"]
# name = "oauth-proxy"
# image_pull_policy = "IfNotPresent"
# resources {
# limits = {
# cpu = "0.5"
# memory = "512Mi"
# }
# requests = {
# cpu = "250m"
# memory = "50Mi"
# }
# }
# port {
# container_port = 4180
# }
# env {
# name = "OAUTH2_PROXY_CLIENT_ID"
# value = var.client_id
# }
# env {
# name = "OAUTH2_PROXY_CLIENT_SECRET"
# value = var.client_secret
# }
# env {
# name = "OAUTH2_PROXY_COOKIE_SECRET"
# value = random_password.cookie.result
# }
# }
# }
# }
# }
# }
# resource "kubernetes_service" "oauth_proxy" {
# metadata {
# name = var.svc_name
# namespace = var.namespace
# labels = {
# run = "oauth-proxy"
# }
# }
# spec {
# selector = {
# run = "oauth-proxy"
# }
# port {
# name = "http"
# port = "80"
# target_port = "4180"
# }
# }
# }
# resource "kubernetes_ingress_v1" "oauth" {
# metadata {
# name = "oauth-ingress"
# namespace = var.namespace
# annotations = {
# "kubernetes.io/ingress.class" = "nginx"
# "nginx.ingress.kubernetes.io/use-regex" = "true"
# }
# }
# spec {
# tls {
# hosts = [var.host]
# secret_name = var.tls_secret_name
# }
# rule {
# host = var.host
# http {
# path {
# path = "/oauth2/.*"
# backend {
# service {
# name = var.svc_name
# port {
# number = 80
# }
# }
# }
# }
# }
# }
# }
# }
# apiVersion: apps/v1
# kind: Deployment
# metadata:
# labels:
# k8s-app: oauth2-proxy
# name: oauth2-proxy
# namespace: kube-system
# spec:
# replicas: 1
# selector:
# matchLabels:
# k8s-app: oauth2-proxy
# template:
# metadata:
# labels:
# k8s-app: oauth2-proxy
# spec:
# containers:
# - args:
# - --provider=github
# - --email-domain=*
# - --upstream=file:///dev/null
# - --http-address=0.0.0.0:4180
# # Register a new application
# # https://github.com/settings/applications/new
# env:
# - name: OAUTH2_PROXY_CLIENT_ID
# value: <Client ID>
# - name: OAUTH2_PROXY_CLIENT_SECRET
# value: <Client Secret>
# # docker run -ti --rm python:3-alpine python -c 'import secrets,base64; print(base64.b64encode(base64.b64encode(secrets.token_bytes(16))));'
# - name: OAUTH2_PROXY_COOKIE_SECRET
# value: SECRET
# image: quay.io/oauth2-proxy/oauth2-proxy:latest
# imagePullPolicy: Always
# name: oauth2-proxy
# ports:
# - containerPort: 4180
# protocol: TCP
# ---
# apiVersion: v1
# kind: Service
# metadata:
# labels:
# k8s-app: oauth2-proxy
# name: oauth2-proxy
# namespace: kube-system
# spec:
# ports:
# - name: http
# port: 4180
# protocol: TCP
# targetPort: 4180
# selector:
# k8s-app: oauth2-proxy

View file

@ -1,87 +0,0 @@
variable "tls_secret_name" {}
resource "kubernetes_namespace" "openid_help_page" {
metadata {
name = "openid-help-page"
}
}
module "tls_secret" {
source = "../setup_tls_secret"
namespace = "openid-help-page"
tls_secret_name = var.tls_secret_name
}
resource "kubernetes_deployment" "openid_help_page" {
metadata {
name = "openid-help-page"
namespace = "openid-help-page"
labels = {
app = "openid-help-page"
}
}
spec {
replicas = 3
selector {
match_labels = {
app = "openid-help-page"
}
}
template {
metadata {
labels = {
app = "openid-help-page"
}
}
spec {
container {
image = "viktorbarzin/openid-create-account-help-webpage:latest"
name = "openid-help-page"
resources {
limits = {
cpu = "0.5"
memory = "512Mi"
}
requests = {
cpu = "250m"
memory = "50Mi"
}
}
port {
container_port = 80
}
}
}
}
}
}
resource "kubernetes_service" "openid_help_page" {
metadata {
name = "openid-help-page"
namespace = "openid-help-page"
}
spec {
port {
name = "service-port"
protocol = "TCP"
port = 80
target_port = "80"
}
selector = {
app = "openid-help-page"
}
type = "ClusterIP"
session_affinity = "None"
}
}
module "ingress" {
source = "../ingress_factory"
namespace = "openid-help-page"
name = "openid-help-page"
host = "kubectl"
tls_secret_name = var.tls_secret_name
}

View file

@ -1,201 +0,0 @@
variable "tls_secret_name" {}
variable "web_password" {}
resource "kubernetes_namespace" "pihole" {
metadata {
name = "pihole"
}
}
module "tls_secret" {
source = "../setup_tls_secret"
namespace = kubernetes_namespace.pihole.metadata[0].name
tls_secret_name = var.tls_secret_name
}
resource "kubernetes_config_map" "external_conf" {
metadata {
name = "external-conf"
namespace = kubernetes_namespace.pihole.metadata[0].name
labels = {
app = "pihole"
}
}
data = {
"external.conf" = "$HTTP[\"host\"] == \"pihole.viktorbarzin.me\" {\n server.document-root = \"/var/www/html/admin/\"\n}\n"
}
}
resource "kubernetes_deployment" "pihole" {
metadata {
name = "pihole"
namespace = kubernetes_namespace.pihole.metadata[0].name
labels = {
app = "pihole"
}
}
spec {
replicas = 1
selector {
match_labels = {
app = "pihole"
}
}
template {
metadata {
labels = {
app = "pihole"
}
}
spec {
container {
image = "pihole/pihole:latest"
name = "pihole"
resources {
limits = {
cpu = "1"
memory = "1Gi"
}
requests = {
cpu = "1"
memory = "1Gi"
}
}
port {
container_port = 80
}
env {
name = "DNS1"
value = "10.0.20.200#5354" # bind
}
env {
name = "VIRTUAL_HOST"
value = "pihole.viktorbarzin.me"
}
env {
name = "WEBPASSWORD"
value = var.web_password
}
env {
name = "TZ"
value = "Europe/Sofia"
}
volume_mount {
name = "external-conf"
mount_path = "/tmp/external.conf"
sub_path = "external.conf"
}
volume_mount {
name = "pihole-local-etc-volume"
mount_path = "/etc/pihole"
}
volume_mount {
name = "pihole-local-dnsmasq-volume"
mount_path = "/etc/dnsmasq.d"
}
}
volume {
name = "external-conf"
config_map {
name = "external-conf"
}
}
volume {
name = "pihole-local-etc-volume"
empty_dir {} # no hard dependencies on truenas which needs dns
}
volume {
name = "pihole-local-dnsmasq-volume"
empty_dir {} # no hard dependencies on truenas which needs dns
}
}
}
}
}
resource "kubernetes_service" "pihole-dns" {
metadata {
name = "pihole-dns"
namespace = kubernetes_namespace.pihole.metadata[0].name
labels = {
"app" = "pihole"
}
annotations = {
"metallb.universe.tf/allow-shared-ip" : "shared"
}
}
spec {
# type = "LoadBalancer"
# external_traffic_policy = "Cluster"
selector = {
app = "pihole"
}
port {
name = "dns-udp"
port = "53"
protocol = "UDP"
}
}
}
resource "kubernetes_service" "pihole-web" {
metadata {
name = "pihole-web"
namespace = kubernetes_namespace.pihole.metadata[0].name
labels = {
"app" = "pihole"
}
annotations = {
"metallb.universe.tf/allow-shared-ip" : "shared"
}
}
spec {
selector = {
app = "pihole"
}
port {
name = "dns-web"
port = "80"
}
}
}
resource "kubernetes_ingress_v1" "pihole" {
metadata {
name = "pihole-ingress"
namespace = kubernetes_namespace.pihole.metadata[0].name
annotations = {
"traefik.ingress.kubernetes.io/router.middlewares" = "traefik-rate-limit@kubernetescrd,traefik-csp-headers@kubernetescrd,traefik-crowdsec@kubernetescrd"
"traefik.ingress.kubernetes.io/router.entrypoints" = "websecure"
"traefik.ingress.kubernetes.io/router.tls.options" = "traefik-mtls@kubernetescrd"
}
}
spec {
ingress_class_name = "traefik"
tls {
hosts = ["pihole.viktorbarzin.me"]
secret_name = var.tls_secret_name
}
rule {
host = "pihole.viktorbarzin.me"
http {
path {
path = "/"
backend {
service {
name = "pihole-web"
port {
number = 80
}
}
}
}
}
}
}
}

View file

@ -1,23 +0,0 @@
global:
namespace: "vault"
image:
repository: "hashicorp/vault-k8s"
tag: "1.7.0"
agentImage:
repository: "hashicorp/vault"
tag: "1.20.4"
injector:
metrics:
enabled: true
server:
image:
repository: "hashicorp/vault"
tag: "1.20.4"
enabled: true
volumes:
- name: data
emptyDir: {}
ingress:
enabled: false
ui:
enabled: true

View file

@ -1,61 +0,0 @@
variable "tls_secret_name" {}
variable "host" {
default = "vault.viktorbarzin.me"
}
variable "tier" { type = string }
resource "kubernetes_namespace" "vault" {
metadata {
name = "vault"
labels = {
tier = var.tier
}
}
}
module "tls_secret" {
source = "../setup_tls_secret"
namespace = kubernetes_namespace.vault.metadata[0].name
tls_secret_name = var.tls_secret_name
}
resource "kubernetes_persistent_volume" "vault_data" {
metadata {
name = "vault-data-pv"
}
spec {
capacity = {
"storage" = "10Gi"
}
access_modes = ["ReadWriteOnce"]
persistent_volume_source {
nfs {
server = "10.0.10.15"
path = "/mnt/main/vault"
}
}
}
}
resource "helm_release" "vault" {
namespace = kubernetes_namespace.vault.metadata[0].name
name = "vault"
atomic = true
repository = "https://helm.releases.hashicorp.com"
chart = "vault"
values = [templatefile("${path.module}/chart_values.tpl", { host = var.host, tls_secret_name = var.tls_secret_name })]
depends_on = [kubernetes_persistent_volume.vault_data]
}
module "ingress" {
source = "../ingress_factory"
namespace = kubernetes_namespace.vault.metadata[0].name
name = "vault"
service_name = "vault-ui"
port = 8200
tls_secret_name = var.tls_secret_name
protected = true
}

View file

@ -1,8 +0,0 @@
terraform {
required_providers {
kubernetes = {
source = "hashicorp/kubernetes"
version = "3.0.1"
}
}
}

View file

@ -1,216 +0,0 @@
variable "tls_secret_name" {}
resource "kubernetes_namespace" "vikunja" {
metadata {
name = "vikunja"
}
}
module "tls_secret" {
source = "../setup_tls_secret"
namespace = kubernetes_namespace.vikunja.metadata[0].name
tls_secret_name = var.tls_secret_name
}
resource "kubernetes_deployment" "vikunja" {
metadata {
name = "vikunja"
namespace = kubernetes_namespace.vikunja.metadata[0].name
labels = {
app = "vikunja"
}
annotations = {
"reloader.stakater.com/search" = "true"
}
}
spec {
replicas = 1
strategy {
type = "Recreate"
}
selector {
match_labels = {
app = "vikunja"
}
}
template {
metadata {
labels = {
app = "vikunja"
}
}
spec {
container {
image = "vikunja/api"
name = "api"
# General settings
env {
name = "VIKUNJA_SERVICE_TIMEZONE"
value = "Europe/London"
}
env {
name = "VIKUNJA_SERVICE_ENABLEREGISTRATION"
value = "true"
}
env {
name = "VIKUNJA_LOG_LEVEL"
value = "DEBUG"
}
# Frontend Settings
env {
name = "VIKUNJA_SERVICE_JWTSECRET"
value = "vikunja"
}
env {
name = "VIKUNJA_SERVICE_FRONTENDURL"
value = "https://todo.viktorbarzin.me/"
}
# DB Settings
env {
name = "VIKUNJA_DATABASE_HOST"
value = "mysql.dbaas.svc.cluster.local"
}
env {
name = "VIKUNJA_DATABASE_PASSWORD"
value = "" # ADD ME
}
env {
name = "VIKUNJA_DATABASE_TYPE"
value = "mysql"
}
env {
name = "VIKUNJA_DATABASE_USER"
value = "vikunja"
}
env {
name = "VIKUNJA_DATABASE_DATABASE"
value = "vikunja"
}
env {
name = "VIKUNJA_LOG_DATABASE"
value = "true"
}
env {
name = "VIKUNJA_LOG_DATABASELEVEL"
value = "DEBUG"
}
# Mailser settings
env {
name = "VIKUNJA_MAILER_ENABLED"
value = "true"
}
env {
name = "VIKUNJA_MAILER_HOST"
value = "mailserver.mailserver.svc.cluster.local"
}
env {
name = "VIKUNJA_MAILER_USERNAME"
value = "me@viktorbarzin.me"
}
env {
name = "VIKUNJA_MAILER_PASSWORD"
value = "" # TODO: add me
}
env {
name = "VIKUNJA_MAILER_FROMEMAIL"
value = "todo@viktorbarzin.me"
}
# TODOIST settings
env {
name = "VIKUNJA_MIGRATION_TODOIST_ENABLE"
value = "true"
}
env {
name = "VIKUNJA_MIGRATION_TODOIST_CLIENTID"
value = "" # TODO: add me
}
env {
name = "VIKUNJA_MIGRATION_TODOIST_CLIENTSECRET"
value = "" # TODO: add me
}
env {
name = "VIKUNJA_MIGRATION_TODOIST_REDIRECTURL"
value = "https://todo.viktorbarzin.me/migrate/todoist"
}
port {
name = "api"
container_port = 3456
}
}
container {
image = "vikunja/frontend"
name = "frontend"
port {
name = "http"
container_port = 80
}
}
}
}
}
}
resource "kubernetes_service" "vikunja" {
metadata {
name = "vikunja"
namespace = kubernetes_namespace.vikunja.metadata[0].name
labels = {
"app" = "vikunja"
}
}
spec {
selector = {
app = "vikunja"
}
port {
name = "http"
target_port = 80
port = 80
protocol = "TCP"
}
}
}
resource "kubernetes_service" "api" {
metadata {
name = "api"
namespace = kubernetes_namespace.vikunja.metadata[0].name
labels = {
"app" = "vikunja"
}
}
spec {
selector = {
app = "vikunja"
}
port {
name = "api"
target_port = 3456
port = 3456
protocol = "TCP"
}
}
}
module "ingress" {
source = "../ingress_factory"
namespace = kubernetes_namespace.vikunja.metadata[0].name
name = "vikunja"
host = "todo"
tls_secret_name = var.tls_secret_name
}
module "ingress-api" {
source = "../ingress_factory"
namespace = kubernetes_namespace.vikunja.metadata[0].name
name = "vikunja-api"
host = "todo"
service_name = "api"
port = 3456
ingress_path = ["/api/"]
tls_secret_name = var.tls_secret_name
}

View file

@ -10,8 +10,6 @@ locals {
}
}
# variable "dockerhub_password" {}
resource "kubernetes_namespace" "website" {
metadata {
name = "website"
@ -28,12 +26,6 @@ module "tls_secret" {
tls_secret_name = var.tls_secret_name
}
# module "dockerhub_creds" {
# source = "../../modules/kubernetes/dockerhub_secret"
# namespace = kubernetes_namespace.website.metadata[0].name
# password = var.dockerhub_password
# }
resource "kubernetes_deployment" "blog" {
metadata {
name = "blog"

View file

@ -91,26 +91,6 @@ resource "kubernetes_service" "city-guesser" {
}
}
}
# resource "kubernetes_service" "city-guesser-oauth" {
# metadata {
# name = "city-guesser-oauth"
# namespace = "city-guesser"
# labels = {
# "run" = "city-guesser-oauth"
# }
# }
# spec {
# type = "ExternalName"
# external_name = "oauth-proxy.oauth.svc.cluster.local"
# # port {
# # name = "tcp"
# # port = "80"
# # target_port = "80"
# # }
# }
# }
module "ingress" {
source = "../../modules/kubernetes/ingress_factory"
@ -119,45 +99,3 @@ module "ingress" {
tls_secret_name = var.tls_secret_name
protected = true
}
# resource "kubernetes_ingress_v1" "city-guesser-oauth" {
# metadata {
# name = "city-guesser-ingress-oauth"
# namespace = "city-guesser"
# annotations = {
# "kubernetes.io/ingress.class" = "nginx"
# }
# }
# spec {
# tls {
# hosts = ["city-guesser.viktorbarzin.me"]
# secret_name = var.tls_secret_name
# }
# rule {
# host = "city-guesser.viktorbarzin.me"
# http {
# path {
# path = "/oauth2"
# backend {
# service_name = "city-guesser-oauth"
# service_port = "80"
# }
# }
# }
# }
# }
# }
# module "oauth" {
# source = "../../modules/kubernetes/oauth-proxy"
# # oauth_client_id = "3d8ce4bf7b893899d967"
# # oauth_client_secret = "REDACTED_OAUTH_SECRET"
# client_id = "3d8ce4bf7b893899d967"
# client_secret = "REDACTED_OAUTH_SECRET"
# namespace = "city-guesser"
# host = "city-guesser.viktorbarzin.me"
# tls_secret_name = var.tls_secret_name
# svc_name = "city-guesser-oauth"
# }

View file

@ -26,12 +26,6 @@ module "tls_secret" {
tls_secret_name = var.tls_secret_name
}
# module "dockerhub_creds" {
# source = "../../modules/kubernetes/dockerhub_secret"
# namespace = kubernetes_namespace.travel.metadata[0].name
# password = var.dockerhub_password
# }
resource "kubernetes_deployment" "blog" {
metadata {
name = "travel-blog"

View file

@ -1,49 +0,0 @@
# terraform {
# required_providers {
# kubernetes = {
# source = "hashicorp/kubernetes"
# }
# kubectl = {
# source = "gavinbunney/kubectl"
# version = ">= 1.10.0"
# }
# }
# required_version = ">= 0.13"
# }
# terraform {
# required_providers {
# proxmox = {
# source = "telmate/proxmox"
# version = "2.9.14"
# }
# }
# }
# provides more resources
# terraform {
# required_providers {
# proxmox = {
# source = "bpg/proxmox"
# version = "0.39.0"
# }
# }
# }
# terraform {
# required_providers {
# cloudflare = {
# source = "cloudflare/cloudflare"
# version = "~> 4.0"
# }
# }
# }
terraform {
required_providers {
proxmox = {
source = "telmate/proxmox"
version = "3.0.2-rc07"
}
}
}