[ci skip] Move Terraform modules into stack directories

Move all 88 service modules (66 individual + 22 platform) from
modules/kubernetes/<service>/ into their corresponding stack directories:

- Service stacks: stacks/<service>/module/
- Platform stack: stacks/platform/modules/<service>/

This collocates module source code with its Terragrunt definition.
Only shared utility modules remain in modules/kubernetes/:
ingress_factory, setup_tls_secret, dockerhub_secret, oauth-proxy.

All cross-references to shared modules updated to use correct
relative paths. Verified with terragrunt run --all -- plan:
0 adds, 0 destroys across all 68 stacks.
This commit is contained in:
Viktor Barzin 2026-02-22 14:38:14 +00:00
parent 73cb696f12
commit e225e81ebf
No known key found for this signature in database
GPG key ID: 0EB088298288D958
614 changed files with 12075 additions and 352 deletions

View file

@ -0,0 +1,72 @@
variable "tls_secret_name" {}
variable "secret_key" {}
variable "postgres_password" {}
variable "tier" { type = string }
module "tls_secret" {
source = "../../../../modules/kubernetes/setup_tls_secret"
namespace = kubernetes_namespace.authentik.metadata[0].name
tls_secret_name = var.tls_secret_name
}
resource "kubernetes_namespace" "authentik" {
metadata {
name = "authentik"
labels = {
tier = var.tier
"resource-governance/custom-quota" = "true"
}
}
}
resource "kubernetes_resource_quota" "authentik" {
metadata {
name = "authentik-quota"
namespace = kubernetes_namespace.authentik.metadata[0].name
}
spec {
hard = {
"requests.cpu" = "8"
"requests.memory" = "8Gi"
"limits.cpu" = "24"
"limits.memory" = "48Gi"
pods = "30"
}
}
}
resource "helm_release" "authentik" {
namespace = kubernetes_namespace.authentik.metadata[0].name
create_namespace = true
name = "goauthentik"
repository = "https://charts.goauthentik.io/"
chart = "authentik"
# version = "2025.8.1"
version = "2025.10.3"
atomic = true
timeout = 6000
values = [templatefile("${path.module}/values.yaml", { postgres_password = var.postgres_password, secret_key = var.secret_key })]
}
module "ingress" {
source = "../../../../modules/kubernetes/ingress_factory"
namespace = kubernetes_namespace.authentik.metadata[0].name
name = "authentik"
service_name = "goauthentik-server"
tls_secret_name = var.tls_secret_name
}
module "ingress-outpost" {
source = "../../../../modules/kubernetes/ingress_factory"
namespace = kubernetes_namespace.authentik.metadata[0].name
name = "authentik-outpost"
host = "authentik"
service_name = "ak-outpost-authentik-embedded-outpost"
port = 9000
ingress_path = ["/outpost.goauthentik.io"]
tls_secret_name = var.tls_secret_name
}

View file

@ -0,0 +1,14 @@
[databases]
authentik = host=postgresql.dbaas port=5432 dbname=authentik user=authentik password=${password}
[pgbouncer]
listen_addr = 0.0.0.0
listen_port = 6432
auth_type = md5
auth_file = /etc/pgbouncer/userlist.txt
pool_mode = transaction
max_client_conn = 200
default_pool_size = 20
reserve_pool_size = 5
reserve_pool_timeout = 5
ignore_startup_parameters = extra_float_digits

View file

@ -0,0 +1,134 @@
resource "kubernetes_config_map" "pgbouncer_config" {
metadata {
name = "pgbouncer-config"
namespace = "authentik"
}
data = {
"pgbouncer.ini" = templatefile("${path.module}/pgbouncer.ini", { password = var.postgres_password })
}
}
# --- 2 Secret for user credentials ---
resource "kubernetes_secret" "pgbouncer_auth" {
metadata {
name = "pgbouncer-auth"
namespace = "authentik"
}
data = {
"userlist.txt" = templatefile("${path.module}/userlist.txt", { password = var.postgres_password })
}
type = "Opaque"
}
# --- 3 Deployment ---
resource "kubernetes_deployment" "pgbouncer" {
metadata {
name = "pgbouncer"
namespace = "authentik"
labels = {
app = "pgbouncer"
tier = var.tier
}
}
spec {
replicas = 3
selector {
match_labels = {
app = "pgbouncer"
}
}
template {
metadata {
labels = {
app = "pgbouncer"
}
}
spec {
affinity {
pod_anti_affinity {
required_during_scheduling_ignored_during_execution {
label_selector {
match_expressions {
key = "component"
operator = "In"
values = ["server"]
}
}
topology_key = "kubernetes.io/hostname"
}
}
}
container {
name = "pgbouncer"
image = "edoburu/pgbouncer:latest"
image_pull_policy = "IfNotPresent"
port {
container_port = 6432
}
volume_mount {
name = "config"
mount_path = "/etc/pgbouncer/pgbouncer.ini"
sub_path = "pgbouncer.ini"
}
volume_mount {
name = "auth"
mount_path = "/etc/pgbouncer/userlist.txt"
sub_path = "userlist.txt"
}
env {
name = "DATABASES_AUTHENTIK"
value = "host=postgres port=5432 dbname=authentik user=authentik password=${var.postgres_password}"
}
}
volume {
name = "config"
config_map {
name = kubernetes_config_map.pgbouncer_config.metadata[0].name
}
}
volume {
name = "auth"
secret {
secret_name = kubernetes_secret.pgbouncer_auth.metadata[0].name
}
}
}
}
}
depends_on = [kubernetes_secret.pgbouncer_auth]
}
# --- 4 Service ---
resource "kubernetes_service" "pgbouncer" {
metadata {
name = "pgbouncer"
namespace = "authentik"
}
spec {
selector = {
app = "pgbouncer"
}
port {
port = 6432
target_port = 6432
protocol = "TCP"
}
type = "ClusterIP"
}
}

View file

@ -0,0 +1 @@
"authentik" "${password}"

View file

@ -0,0 +1,31 @@
authentik:
log_level: warning
# log_level: trace
secret_key: "${secret_key}"
# This sends anonymous usage-data, stack traces on errors and
# performance data to authentik.error-reporting.a7k.io, and is fully opt-in
error_reporting:
enabled: true
postgresql:
# host: postgresql.dbaas
host: pgbouncer.authentik
port: 6432
user: authentik
password: ${postgres_password}
redis:
host: redis.redis
server:
replicas: 3
ingress:
enabled: false
# hosts:
# - authentik.viktorbarzin.me
podAnnotations:
diun.enable: true
diun.include_tags: "^202[0-9].[0-9]+.*$" # no need to annotate the worker as it uses the same image
global:
addPrometheusAnnotations: true
worker:
replicas: 3

View file

@ -0,0 +1,159 @@
# Contents for cloudflare account
variable "cloudflare_api_key" {}
variable "cloudflare_email" {}
variable "cloudflare_proxied_names" { type = list(string) }
variable "cloudflare_non_proxied_names" { type = list(string) }
variable "cloudflare_zone_id" {
description = "Zone ID for your domain"
type = string
}
variable "cloudflare_account_id" {
type = string
sensitive = true
}
variable "cloudflare_tunnel_id" {
type = string
sensitive = true
}
variable "public_ip" {
type = string
}
terraform {
required_providers {
cloudflare = {
source = "cloudflare/cloudflare"
version = "~> 4"
}
}
}
provider "cloudflare" {
api_key = var.cloudflare_api_key # I gave up on getting the permissions on the token...
email = var.cloudflare_email
}
locals {
cloudflare_proxied_names_map = {
for h in var.cloudflare_proxied_names :
h => h
}
cloudflare_non_proxied_names_map = {
for h in var.cloudflare_non_proxied_names :
h => h
}
}
resource "cloudflare_zero_trust_tunnel_cloudflared_config" "sof" {
account_id = var.cloudflare_account_id
tunnel_id = var.cloudflare_tunnel_id
config {
warp_routing {
enabled = true
}
dynamic "ingress_rule" {
for_each = toset(var.cloudflare_proxied_names)
content {
hostname = ingress_rule.value == "viktorbarzin.me" ? ingress_rule.value : "${ingress_rule.value}.viktorbarzin.me"
path = "/"
service = "https://10.0.20.202:443"
origin_request {
no_tls_verify = true
}
}
}
ingress_rule {
service = "http_status:404"
}
}
}
resource "cloudflare_record" "dns_record" {
# count = length(var.cloudflare_proxied_names)
# name = var.cloudflare_proxied_names[count.index]
for_each = local.cloudflare_proxied_names_map
name = each.key
content = "${var.cloudflare_tunnel_id}.cfargotunnel.com"
proxied = true
ttl = 1
type = "CNAME"
zone_id = var.cloudflare_zone_id
}
resource "cloudflare_record" "non_proxied_dns_record" {
# count = length(var.cloudflare_non_proxied_names)
# name = var.cloudflare_non_proxied_names[count.index]
for_each = local.cloudflare_non_proxied_names_map
name = each.key
# content = var.non_proxied_names[count.index].ip
content = var.public_ip
proxied = false
ttl = 1
type = "A"
zone_id = var.cloudflare_zone_id
}
resource "cloudflare_record" "mail" {
content = "mail.viktorbarzin.me"
name = "viktorbarzin.me"
proxied = false
ttl = 1
type = "MX"
priority = 1
zone_id = var.cloudflare_zone_id
}
resource "cloudflare_record" "mail_domainkey" {
content = "\"k=rsa; p=MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDIDLB8mhAHNqs1s6GeZMQHOxWweoNKIrqo5tqRM3yFilgfPUX34aTIXNZg9xAmlK+2S/xXO1ymt127ZGMjnoFKOEP8/uZ54iHTCnioHaPZWMfJ7o6TYIXjr+9ShKfoJxZLv7lHJ2wKQK3yOw4lg4cvja5nxQ6fNoGRwo+mQ/mgJQIDAQAB\""
name = "s1._domainkey.viktorbarzin.me"
proxied = false
ttl = 1
type = "TXT"
priority = 1
zone_id = var.cloudflare_zone_id
}
resource "cloudflare_record" "mail_spf" {
content = "\"v=spf1 include:mailgun.org ~all\""
name = "viktorbarzin.me"
proxied = false
ttl = 1
type = "TXT"
priority = 1
zone_id = var.cloudflare_zone_id
}
resource "cloudflare_record" "mail_dmarc" {
content = "\"v=DMARC1; p=none; pct=100; fo=1; ri=3600; sp=none; adkim=r; aspf=r; rua=mailto:e21c0ff8@dmarc.mailgun.org,mailto:adb84997@inbox.ondmarc.com; ruf=mailto:e21c0ff8@dmarc.mailgun.org,mailto:adb84997@inbox.ondmarc.com,mailto:postmaster@viktorbarzin.me;\""
name = "_dmarc.viktorbarzin.me"
proxied = false
ttl = 1
type = "TXT"
priority = 1
zone_id = var.cloudflare_zone_id
}
resource "cloudflare_record" "keyserver" {
content = "130.162.165.220" # Oracle VPS
name = "keyserver.viktorbarzin.me"
proxied = false
ttl = 3600
type = "A"
priority = 1
zone_id = var.cloudflare_zone_id
}
# Enable HTTP/3 (QUIC) for Cloudflare-proxied domains
resource "cloudflare_zone_settings_override" "http3" {
zone_id = var.cloudflare_zone_id
settings {
http3 = "on"
}
}

View file

@ -0,0 +1,90 @@
# Contents for cloudflare tunnel
variable "tls_secret_name" {}
variable "cloudflare_tunnel_token" {}
resource "kubernetes_namespace" "cloudflared" {
metadata {
name = "cloudflared"
labels = {
tier = var.tier
}
}
}
variable "tier" { type = string }
module "tls_secret" {
source = "../../../../modules/kubernetes/setup_tls_secret"
namespace = kubernetes_namespace.cloudflared.metadata[0].name
tls_secret_name = var.tls_secret_name
}
resource "kubernetes_deployment" "cloudflared" {
metadata {
name = "cloudflared"
namespace = kubernetes_namespace.cloudflared.metadata[0].name
labels = {
app = "cloudflared"
tier = var.tier
}
annotations = {
"reloader.stakater.com/search" = "true"
}
}
spec {
replicas = 3
strategy {
type = "RollingUpdate"
}
selector {
match_labels = {
app = "cloudflared"
}
}
template {
metadata {
labels = {
app = "cloudflared"
}
}
spec {
container {
# image = "wisdomsky/cloudflared-web:latest"
image = "cloudflare/cloudflared"
name = "cloudflared"
command = ["cloudflared", "tunnel", "run"]
env {
name = "TUNNEL_TOKEN"
value = var.cloudflare_tunnel_token
}
port {
container_port = 14333
}
}
}
}
}
}
resource "kubernetes_service" "cloudflared" {
metadata {
name = "cloudflared"
namespace = kubernetes_namespace.cloudflared.metadata[0].name
labels = {
"app" = "cloudflared"
}
}
spec {
selector = {
app = "cloudflared"
}
port {
name = "http"
target_port = 14333
port = 80
protocol = "TCP"
}
}
}

View file

@ -0,0 +1,44 @@
controller:
extraVolumes:
- name: crowdsec-bouncer-plugin
emptyDir: {}
extraInitContainers:
- name: init-clone-crowdsec-bouncer
image: crowdsecurity/lua-bouncer-plugin
imagePullPolicy: IfNotPresent
env:
- name: API_URL
value: "http://crowdsec-service.crowdsec.svc.cluster.local:8080" # crowdsec lapi service-name
- name: API_KEY
value: "<API KEY>" # generated with `cscli bouncers add -n <bouncer_name>
- name: BOUNCER_CONFIG
value: "/crowdsec/crowdsec-bouncer.conf"
- name: CAPTCHA_PROVIDER
value: "recaptcha" # valid providers are recaptcha, hcaptcha, turnstile
- name: SECRET_KEY
value: "<your-captcha-secret-key>" # If you want captcha support otherwise remove this ENV VAR
- name: SITE_KEY
value: "<your-captcha-site-key>" # If you want captcha support otherwise remove this ENV VAR
- name: BAN_TEMPLATE_PATH
value: /etc/nginx/lua/plugins/crowdsec/templates/ban.html
- name: CAPTCHA_TEMPLATE_PATH
value: /etc/nginx/lua/plugins/crowdsec/templates/captcha.html
command:
[
"sh",
"-c",
"sh /docker_start.sh; mkdir -p /lua_plugins/crowdsec/; cp -R /crowdsec/* /lua_plugins/crowdsec/",
]
volumeMounts:
- name: crowdsec-bouncer-plugin
mountPath: /lua_plugins
extraVolumeMounts:
- name: crowdsec-bouncer-plugin
mountPath: /etc/nginx/lua/plugins/crowdsec
subPath: crowdsec
config:
plugins: "crowdsec"
lua-shared-dicts: "crowdsec_cache: 50m"
server-snippet: |
lua_ssl_trusted_certificate "/etc/ssl/certs/ca-certificates.crt"; # If you want captcha support otherwise remove this line
resolver local=on ipv6=off;

View file

@ -0,0 +1,353 @@
variable "tls_secret_name" {}
variable "homepage_username" {}
variable "homepage_password" {}
variable "db_password" {}
variable "enroll_key" {}
variable "crowdsec_dash_api_key" { type = string } # used for web dash
variable "crowdsec_dash_machine_id" { type = string } # used for web dash
variable "crowdsec_dash_machine_password" { type = string } # used for web dash
variable "tier" { type = string }
variable "slack_webhook_url" { type = string }
module "tls_secret" {
source = "../../../../modules/kubernetes/setup_tls_secret"
namespace = kubernetes_namespace.crowdsec.metadata[0].name
tls_secret_name = var.tls_secret_name
}
resource "kubernetes_namespace" "crowdsec" {
metadata {
name = "crowdsec"
labels = {
tier = var.tier
"resource-governance/custom-quota" = "true"
}
}
}
resource "kubernetes_config_map" "crowdsec_custom_scenarios" {
metadata {
name = "crowdsec-custom-scenarios"
namespace = kubernetes_namespace.crowdsec.metadata[0].name
labels = {
"app.kubernetes.io/name" = "crowdsec"
}
}
data = {
"http-403-abuse.yaml" = <<-YAML
type: leaky
name: crowdsecurity/http-403-abuse
description: "Detect IPs triggering too many HTTP 403s in NGINX ingress logs"
filter: "evt.Meta.log_type == 'http_access-log' && evt.Parsed.status == '403'"
groupby: "evt.Meta.source_ip"
leakspeed: "2s"
capacity: 10
blackhole: 5m
labels:
service: http
behavior: abusive_403
remediation: true
YAML
"http-429-abuse.yaml" : <<-YAML
type: leaky
name: crowdsecurity/http-429-abuse
description: "Detect IPs repeatedly triggering rate-limit (HTTP 429)"
filter: "evt.Meta.log_type == 'http_access-log' && evt.Parsed.status == '429'"
groupby: "evt.Meta.source_ip"
leakspeed: "10s"
capacity: 5
blackhole: 1m
labels:
service: http
behavior: rate_limit_abuse
remediation: true
YAML
}
}
# Whitelist for trusted IPs that should never be blocked
resource "kubernetes_config_map" "crowdsec_whitelist" {
metadata {
name = "crowdsec-whitelist"
namespace = kubernetes_namespace.crowdsec.metadata[0].name
labels = {
"app.kubernetes.io/name" = "crowdsec"
}
}
data = {
"whitelist.yaml" = <<-YAML
name: crowdsecurity/whitelist-trusted-ips
description: "Whitelist for trusted IPs that should never be blocked"
whitelist:
reason: "Trusted IP - never block"
ip:
- "176.12.22.76"
YAML
}
}
resource "helm_release" "crowdsec" {
namespace = kubernetes_namespace.crowdsec.metadata[0].name
create_namespace = true
name = "crowdsec"
atomic = true
version = "0.21.0"
repository = "https://crowdsecurity.github.io/helm-charts"
chart = "crowdsec"
values = [templatefile("${path.module}/values.yaml", { homepage_username = var.homepage_username, homepage_password = var.homepage_password, DB_PASSWORD = var.db_password, ENROLL_KEY = var.enroll_key, SLACK_WEBHOOK_URL = var.slack_webhook_url })]
timeout = 3600
}
# Deployment for my custom dashboard that helps me unblock myself when I blocklist myself
resource "kubernetes_deployment" "crowdsec-web" {
metadata {
name = "crowdsec-web"
namespace = kubernetes_namespace.crowdsec.metadata[0].name
labels = {
app = "crowdsec_web"
"kubernetes.io/cluster-service" = "true"
tier = var.tier
}
}
spec {
replicas = 1
strategy {
type = "RollingUpdate"
}
selector {
match_labels = {
app = "crowdsec_web"
}
}
template {
metadata {
labels = {
app = "crowdsec_web"
"kubernetes.io/cluster-service" = "true"
}
}
spec {
priority_class_name = "tier-1-cluster"
container {
name = "crowdsec-web"
image = "viktorbarzin/crowdsec_web"
env {
name = "CS_API_URL"
value = "http://crowdsec-service.crowdsec.svc.cluster.local:8080/v1"
}
env {
name = "CS_API_KEY"
value = var.crowdsec_dash_api_key
}
env {
name = "CS_MACHINE_ID"
value = var.crowdsec_dash_machine_id
}
env {
name = "CS_MACHINE_PASSWORD"
value = var.crowdsec_dash_machine_password
}
port {
name = "http"
container_port = 8000
protocol = "TCP"
}
}
}
}
}
}
resource "kubernetes_service" "crowdsec-web" {
metadata {
name = "crowdsec-web"
namespace = kubernetes_namespace.crowdsec.metadata[0].name
labels = {
"app" = "crowdsec_web"
}
}
spec {
selector = {
app = "crowdsec_web"
}
port {
port = "80"
target_port = "8000"
}
}
}
module "ingress" {
source = "../../../../modules/kubernetes/ingress_factory"
namespace = kubernetes_namespace.crowdsec.metadata[0].name
name = "crowdsec-web"
protected = true
tls_secret_name = var.tls_secret_name
exclude_crowdsec = true
rybbit_site_id = "d09137795ccc"
}
# CronJob to import public blocklists into CrowdSec
# https://github.com/wolffcatskyy/crowdsec-blocklist-import
# Uses kubectl exec to run in an existing CrowdSec agent pod that's already registered
resource "kubernetes_cron_job_v1" "crowdsec_blocklist_import" {
metadata {
name = "crowdsec-blocklist-import"
namespace = kubernetes_namespace.crowdsec.metadata[0].name
labels = {
app = "crowdsec-blocklist-import"
tier = var.tier
}
}
spec {
# Run daily at 4 AM
schedule = "0 4 * * *"
timezone = "Europe/London"
concurrency_policy = "Forbid"
successful_jobs_history_limit = 3
failed_jobs_history_limit = 3
job_template {
metadata {
labels = {
app = "crowdsec-blocklist-import"
}
}
spec {
backoff_limit = 3
template {
metadata {
labels = {
app = "crowdsec-blocklist-import"
}
}
spec {
service_account_name = kubernetes_service_account.blocklist_import.metadata[0].name
restart_policy = "OnFailure"
container {
name = "blocklist-import"
image = "bitnami/kubectl:latest"
command = ["/bin/bash", "-c"]
args = [
<<-EOF
set -e
echo "Finding CrowdSec agent pod..."
AGENT_POD=$(kubectl get pods -n crowdsec -l k8s-app=crowdsec,type=agent -o jsonpath='{.items[0].metadata.name}')
if [ -z "$AGENT_POD" ]; then
echo "ERROR: Could not find CrowdSec agent pod"
exit 1
fi
echo "Using agent pod: $AGENT_POD"
# Download the import script
echo "Downloading blocklist import script..."
curl -fsSL -o /tmp/import.sh \
https://raw.githubusercontent.com/wolffcatskyy/crowdsec-blocklist-import/main/import.sh
chmod +x /tmp/import.sh
# Copy script to agent pod and execute
echo "Copying script to agent pod and executing..."
kubectl cp /tmp/import.sh crowdsec/$AGENT_POD:/tmp/import.sh
kubectl exec -n crowdsec "$AGENT_POD" -- /bin/bash -c '
set -e
# Run with native mode since we are inside the CrowdSec container
export MODE=native
export DECISION_DURATION=24h
export FETCH_TIMEOUT=60
export LOG_LEVEL=INFO
/tmp/import.sh
# Cleanup
rm -f /tmp/import.sh
'
echo "Blocklist import completed successfully!"
EOF
]
}
}
}
}
}
}
}
# Service account for the blocklist import job (needs kubectl exec permissions)
resource "kubernetes_service_account" "blocklist_import" {
metadata {
name = "crowdsec-blocklist-import"
namespace = kubernetes_namespace.crowdsec.metadata[0].name
}
}
resource "kubernetes_role" "blocklist_import" {
metadata {
name = "crowdsec-blocklist-import"
namespace = kubernetes_namespace.crowdsec.metadata[0].name
}
rule {
api_groups = [""]
resources = ["pods"]
verbs = ["get", "list"]
}
rule {
api_groups = [""]
resources = ["pods/exec"]
verbs = ["create"]
}
}
resource "kubernetes_role_binding" "blocklist_import" {
metadata {
name = "crowdsec-blocklist-import"
namespace = kubernetes_namespace.crowdsec.metadata[0].name
}
role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "Role"
name = kubernetes_role.blocklist_import.metadata[0].name
}
subject {
kind = "ServiceAccount"
name = kubernetes_service_account.blocklist_import.metadata[0].name
namespace = kubernetes_namespace.crowdsec.metadata[0].name
}
}
# Custom ResourceQuota for CrowdSec needs more than default 1-cluster quota
# because it runs DaemonSet agents (1 per worker node) + 3 LAPI replicas + web UI
resource "kubernetes_resource_quota" "crowdsec" {
metadata {
name = "crowdsec-quota"
namespace = kubernetes_namespace.crowdsec.metadata[0].name
}
spec {
hard = {
"requests.cpu" = "8"
"requests.memory" = "8Gi"
"limits.cpu" = "16"
"limits.memory" = "16Gi"
pods = "30"
}
}
}

View file

@ -0,0 +1,196 @@
# values from - https://github.com/crowdsecurity/helm-charts/blob/main/charts/crowdsec/values.yaml
container_runtime: containerd
agent:
priorityClassName: "tier-1-cluster"
# To specify each pod you want to process it logs (pods present in the node)
acquisition:
# The namespace where the pod is located
- namespace: traefik
# The pod name
podName: traefik-*
# as in crowdsec configuration, we need to specify the program name so the parser will match and parse logs
program: traefik
# Those are ENV variables
env:
# As it's a test, we don't want to share signals with CrowdSec so disable the Online API.
# - name: DISABLE_ONLINE_API
# value: "true"
# As we are running Traefik, we want to install the Traefik collection
- name: COLLECTIONS
value: "crowdsecurity/traefik crowdsecurity/base-http-scenarios crowdsecurity/http-cve"
- name: SCENARIOS
value: ""
# value: "crowdsecurity/http-crawl-aggressive"
# Mount custom scenarios into /etc/crowdsec/scenarios
extraVolumeMounts:
- name: custom-scenarios
mountPath: /etc/crowdsec/scenarios/http-403-abuse.yaml
subPath: "http-403-abuse.yaml"
readonly: true
- name: custom-scenarios
mountPath: /etc/crowdsec/scenarios/http-429-abuse.yaml
subPath: "http-429-abuse.yaml"
readonly: true
- name: whitelist
mountPath: /etc/crowdsec/parsers/s02-enrich/whitelist.yaml
subPath: "whitelist.yaml"
readonly: true
extraVolumes:
- name: custom-scenarios
configMap:
name: crowdsec-custom-scenarios
- name: whitelist
configMap:
name: crowdsec-whitelist
lapi:
priorityClassName: "tier-1-cluster"
replicas: 3
extraSecrets:
dbPassword: "${DB_PASSWORD}"
storeCAPICredentialsInSecret: true
persistentVolume:
config:
enabled: false
data:
enabled: false
env:
- name: ENROLL_KEY
value: "${ENROLL_KEY}"
- name: ENROLL_INSTANCE_NAME
value: "k8s-cluster"
- name: ENROLL_TAGS
value: "k8s linux"
- name: DB_PASSWORD
valueFrom:
secretKeyRef:
name: crowdsec-lapi-secrets
key: dbPassword
# As it's a test, we don't want to share signals with CrowdSec, so disable the Online API.
# - name: DISABLE_ONLINE_API
# value: "true"
dashboard:
enabled: true
env:
- name: MB_DB_TYPE
value: "mysql"
- name: MB_DB_DBNAME
value: crowdsec-metabase
- name: MB_DB_USER
value: "crowdsec"
- name: MB_DB_PASS
value: "${DB_PASSWORD}"
- name: MB_DB_HOST
value: "mysql.dbaas.svc.cluster.local"
- name: MB_EMAIL_SMTP_USERNAME
value: "info@viktorbarzin.me"
- name: MB_EMAIL_FROM_ADDRESS
value: "info@viktorbarzin.me"
- name: MB_EMAIL_SMTP_HOST
value: "mailserver.mailserver.svc.cluster.local"
- name: MB_EMAIL_SMTP_PASSWORD
value: "" # Ignore for now as it's unclear what notifications we can get
- name: MB_EMAIL_SMTP_PORT
value: "587"
- name: MB_EMAIL_SMTP_SECURITY
value: "starttls"
ingress:
enabled: true
annotations:
nginx.ingress.kubernetes.io/backend-protocol: "HTTP"
#nginx.ingress.kubernetes.io/auth-url: "https://oauth2.viktorbarzin.me/oauth2/auth"
nginx.ingress.kubernetes.io/auth-url: "http://ak-outpost-authentik-embedded-outpost.authentik.svc.cluster.local:9000/outpost.goauthentik.io/auth/nginx"
# nginx.ingress.kubernetes.io/auth-signin: "https://oauth2.viktorbarzin.me/oauth2/start?rd=/redirect/$http_host$escaped_request_uri"
nginx.ingress.kubernetes.io/auth-signin: "https://authentik.viktorbarzin.me/outpost.goauthentik.io/start?rd=$scheme%3A%2F%2F$host$escaped_request_uri"
nginx.ingress.kubernetes.io/auth-response-headers: "Set-Cookie,X-authentik-username,X-authentik-groups,X-authentik-email,X-authentik-name,X-authentik-uid"
nginx.ingress.kubernetes.io/auth-snippet: "proxy_set_header X-Forwarded-Host $http_host;"
gethomepage.dev/enabled: "true"
gethomepage.dev/description: "Web Application Firewall"
gethomepage.dev/icon: "crowdsec.png"
gethomepage.dev/name: "CrowdSec"
gethomepage.dev/widget.type: "crowdsec"
gethomepage.dev/widget.url: "http://crowdsec-service.crowdsec.svc.cluster.local:8080"
gethomepage.dev/widget.username: "${homepage_username}"
gethomepage.dev/widget.password: "${homepage_password}"
gethomepage.dev/pod-selector: ""
ingressClassName: "nginx"
host: "crowdsec.viktorbarzin.me"
tls:
- hosts:
- crowdsec.viktorbarzin.me
secretName: "tls-secret"
metrics:
enabled: true
strategy:
type: RollingUpdate
config:
# Custom profiles: captcha for rate limiting, ban for attacks
profiles.yaml: |
# Captcha for rate limiting and 403 abuse (user can unblock themselves)
name: captcha_remediation
filters:
- Alert.Remediation == true && Alert.GetScope() == "Ip" && Alert.GetScenario() in ["crowdsecurity/http-429-abuse", "crowdsecurity/http-403-abuse", "crowdsecurity/http-crawl-non_statics", "crowdsecurity/http-sensitive-files"]
decisions:
- type: captcha
duration: 4h
notifications:
- slack_alerts
on_success: break
---
# Default: Ban for serious attacks (CVE exploits, scanners, brute force)
name: default_ip_remediation
filters:
- Alert.Remediation == true && Alert.GetScope() == "Ip"
decisions:
- type: ban
duration: 4h
notifications:
- slack_alerts
on_success: break
---
name: default_range_remediation
filters:
- Alert.Remediation == true && Alert.GetScope() == "Range"
decisions:
- type: ban
duration: 4h
notifications:
- slack_alerts
on_success: break
config.yaml.local: |
db_config:
type: mysql
user: crowdsec
password: ${DB_PASSWORD}
db_name: crowdsec
host: mysql.dbaas.svc.cluster.local
port: 3306
api:
server:
auto_registration: # Activate if not using TLS for authentication
enabled: true
token: "$${REGISTRATION_TOKEN}" # /!\ do not change
allowed_ranges: # /!\ adapt to the pod IP ranges used by your cluster
- "127.0.0.1/32"
- "192.168.0.0/16"
- "10.0.0.0/8"
- "172.16.0.0/12"
notifications:
slack.yaml: |
type: slack
name: slack_alerts
log_level: info
format: |
:rotating_light: *CrowdSec Alert*
{{range .}}
*Scenario:* {{.Alert.Scenario}}
*Source IP:* {{.Alert.Source.IP}} ({{.Alert.Source.Cn}})
*Decisions:*
{{range .Alert.Decisions}} - {{.Type}} for {{.Duration}} (scope: {{.Scope}}, value: {{.Value}})
{{end}}
{{end}}
webhook: ${SLACK_WEBHOOK_URL}

View file

@ -0,0 +1,17 @@
tls:
useSelfSigned: true
credentials:
root:
password: ${root_password}
user: root
serverInstances: 1
podSpec:
containers:
- name: mysql
resources:
requests:
memory: "1024Mi" # adapt to your needs
cpu: "1800m" # adapt to your needs
limits:
memory: "2048Mi" # adapt to your needs
cpu: "3600m" # adapt to your needs

View file

@ -0,0 +1,30 @@
apiVersion: mysql.presslabs.org/v1alpha1
kind: MysqlCluster
metadata:
name: mysql-cluster
namespace: dbaas
spec:
mysqlVersion: "5.7"
replicas: 1
secretName: cluster-secret
mysqlConf:
# read_only: 0 # mysql forms a single transaction for each sql statement, autocommit for each statement
# automatic_sp_privileges: "ON" # automatically grants the EXECUTE and ALTER ROUTINE privileges to the creator of a stored routine
# auto_generate_certs: "ON" # Auto Generation of Certificate
# auto_increment_increment: 1 # Auto Incrementing value from +1
# auto_increment_offset: 1 # Auto Increment Offset
# binlog-format: "STATEMENT" # contains various options such ROW(SLOW,SAFE) STATEMENT(FAST,UNSAFE), MIXED(combination of both)
# wait_timeout: 31536000 # 28800 number of seconds the server waits for activity on a non-interactive connection before closing it, You might encounter MySQL server has gone away error, you then tweak this value acccordingly
# interactive_timeout: 28800 # The number of seconds the server waits for activity on an interactive connection before closing it.
# max_allowed_packet: "512M" # Maximum size of MYSQL Network protocol packet that the server can create or read 4MB, 8MB, 16MB, 32MB
# max-binlog-size: 1073741824 # binary logs contains the events that describe database changes, this parameter describe size for the bin_log file.
# log_output: "TABLE" # Format in which the logout will be dumped
# master-info-repository: "TABLE" # Format in which the master info will be dumped
# relay_log_info_repository: "TABLE" # Format in which the relay info will be dumped
volumeSpec:
persistentVolumeClaim:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 10Gi

View file

@ -0,0 +1,916 @@
# DB as a service. Installs MySQL operator
variable "tls_secret_name" {}
variable "tier" { type = string }
variable "dbaas_root_password" {}
variable "cluster_master_service" {
default = "mysql"
}
variable "postgresql_root_password" {}
variable "pgadmin_password" {}
variable "prod" {
default = false
type = bool
}
resource "kubernetes_namespace" "dbaas" {
metadata {
name = "dbaas"
labels = {
tier = var.tier
}
}
}
module "tls_secret" {
source = "../../../../modules/kubernetes/setup_tls_secret"
namespace = kubernetes_namespace.dbaas.metadata[0].name
tls_secret_name = var.tls_secret_name
}
resource "kubernetes_config_map" "mycnf" {
metadata {
name = "mycnf"
namespace = kubernetes_namespace.dbaas.metadata[0].name
annotations = {
"reloader.stakater.com/match" = "true"
}
}
data = {
"my.cnf" = <<-EOT
# For advice on how to change settings please see
# http://dev.mysql.com/doc/refman/8.2/en/server-configuration-defaults.html
[mysqld]
#
# Remove leading # and set to the amount of RAM for the most important data
# cache in MySQL. Start at 70% of total RAM for dedicated server, else 10%.
# innodb_buffer_pool_size = 128M
#
# Remove leading # to turn on a very important data integrity option: logging
# changes to the binary log between backups.
# log_bin
#
# Remove leading # to set options mainly useful for reporting servers.
# The server defaults are faster for transactions and fast SELECTs.
# Adjust sizes as needed, experiment to find the optimal values.
# join_buffer_size = 128M
# sort_buffer_size = 2M
# read_rnd_buffer_size = 2M
# Remove leading # to revert to previous value for default_authentication_plugin,
# this will increase compatibility with older clients. For background, see:
# https://dev.mysql.com/doc/refman/8.2/en/server-system-variables.html#sysvar_default_authentication_plugin
# default-authentication-plugin=mysql_native_password
#skip-host-cache
skip-name-resolve
datadir=/var/lib/mysql
socket=/var/run/mysqld/mysqld.sock
secure-file-priv=/var/lib/mysql-files
user=mysql
#innodb_force_recovery = 6
#log_error_verbosity = 6
pid-file=/var/run/mysqld/mysqld.pid
[client]
socket=/var/run/mysqld/mysqld.sock
!includedir /etc/mysql/conf.d/
EOT
}
}
resource "kubernetes_service" "mysql" {
metadata {
name = var.cluster_master_service
namespace = kubernetes_namespace.dbaas.metadata[0].name
}
spec {
selector = {
app = "mysql"
}
port {
port = 3306
}
}
}
resource "kubernetes_deployment" "mysql" {
metadata {
name = "mysql"
namespace = kubernetes_namespace.dbaas.metadata[0].name
annotations = {
"reloader.stakater.com/search" = "true"
}
labels = {
tier = var.tier
}
}
spec {
replicas = 1
selector {
match_labels = {
app = "mysql"
}
}
strategy {
type = "Recreate"
}
template {
metadata {
labels = {
app = "mysql"
}
annotations = {
"diun.enable" = "false"
"diun.include_tags" = "^\\d+(?:\\.\\d+)?(?:\\.\\d+)?$"
}
}
spec {
container {
image = "mysql:9.2.0"
name = "mysql"
env {
name = "MYSQL_ROOT_PASSWORD"
value = var.dbaas_root_password
}
port {
container_port = 3306
name = "mysql"
}
volume_mount {
name = "mysql-persistent-storage"
mount_path = "/var/lib/mysql"
}
volume_mount {
name = "mycnf"
mount_path = "/etc/my.cnf"
sub_path = "my.cnf"
}
}
volume {
name = "mysql-persistent-storage"
nfs {
path = "/mnt/main/mysql"
server = "10.0.10.15"
}
}
volume {
name = "mycnf"
config_map {
name = "mycnf"
}
}
}
}
}
}
resource "kubernetes_cron_job_v1" "mysql-backup" {
metadata {
name = "mysql-backup"
namespace = kubernetes_namespace.dbaas.metadata[0].name
}
spec {
concurrency_policy = "Replace"
failed_jobs_history_limit = 5
schedule = "0 0 * * *"
# schedule = "* * * * *"
starting_deadline_seconds = 10
successful_jobs_history_limit = 10
job_template {
metadata {}
spec {
backoff_limit = 3
ttl_seconds_after_finished = 10
template {
metadata {}
spec {
container {
name = "mysql-backup"
image = "mysql"
# TODO: would be nice to rotate at some point... Current size is 11MB so not really needed atm
command = ["/bin/bash", "-c", <<-EOT
set -euxo pipefail
export now=$(date +"%Y_%m_%d_%H_%M")
mysqldump --all-databases -u root -p${var.dbaas_root_password} --host mysql.dbaas.svc.cluster.local > /backup/dump_$now.sql
# Rotate - delete last log file
cd /backup
find . -name "dump_*.sql" -type f -mtime +14 -delete # 14 day retention of backups
echo Done
EOT
]
# To restore (from outside of the cluster):
# run kubectl port-forward to pod e.g.:
# > kb port-forward mysql-647cfd4969-46rmw --address 0.0.0.0 3307:3306
# run mysql import (and specify non-localhost address to avoid using unix socket): (password is in tfvars)
# > mysql -u root -p --host 10.0.10.10 --port 3307 < /mnt/nfs/2024_01_06_13_54.sql
volume_mount {
name = "mysql-backup"
mount_path = "/backup"
}
}
volume {
name = "mysql-backup"
nfs {
path = "/mnt/main/mysql-backup"
server = "10.0.10.15"
}
}
}
}
}
}
}
}
# resource "kubernetes_persistent_volume" "mysql" {
# metadata {
# name = "mysql-pv"
# }
# spec {
# capacity = {
# "storage" = "10Gi"
# }
# access_modes = ["ReadWriteOnce"]
# persistent_volume_source {
# iscsi {
# target_portal = "iscsi.viktorbarzin.lan:3260"
# iqn = "iqn.2020-12.lan.viktorbarzin:storage:dbaas:mysql"
# lun = 0
# fs_type = "ext4"
# }
# }
# }
# }
# resource "helm_release" "mysql" {
# namespace = kubernetes_namespace.dbaas.metadata[0].name
# create_namespace = false
# name = "mysql"
# repository = "https://presslabs.github.io/charts"
# chart = "mysql-operator"
# # version = "v0.5.0-rc.3"
# values = [templatefile("${path.module}/mysql_chart_values.yaml", { secretName = var.tls_secret_name })]
# atomic = true
# depends_on = [kubernetes_namespace.dbaas]
# }
# # resource "helm_release" "mysql" {
# # namespace = kubernetes_namespace.dbaas.metadata[0].name
# # create_namespace = false
# # name = "mysql-operator"
# # repository = "https://mysql.github.io/mysql-operator/"
# # chart = "mysql-operator"
# # atomic = true
# # depends_on = [kubernetes_namespace.dbaas]
# # }
# # resource "helm_release" "innodb-cluster" {
# # namespace = kubernetes_namespace.dbaas.metadata[0].name
# # create_namespace = false
# # name = var.cluster_master_service
# # repository = "https://mysql.github.io/mysql-operator/"
# # chart = "mysql-innodbcluster"
# # atomic = true
# # depends_on = [kubernetes_namespace.dbaas]
# # values = [templatefile("${path.module}/chart_values.tpl", { root_password = var.dbaas_root_password })]
# # }
# resource "kubernetes_persistent_volume" "mysql-operator" {
# metadata {
# name = "mysql-operator-pv"
# }
# spec {
# capacity = {
# "storage" = "1Gi"
# }
# access_modes = ["ReadWriteOnce"]
# persistent_volume_source {
# iscsi {
# target_portal = "iscsi.viktorbarzin.lan:3260"
# iqn = "iqn.2020-12.lan.viktorbarzin:storage:dbaas:operator"
# lun = 0
# fs_type = "ext4"
# }
# }
# }
# }
resource "kubernetes_secret" "cluster-password" {
metadata {
name = "cluster-secret"
namespace = kubernetes_namespace.dbaas.metadata[0].name
annotations = {
"reloader.stakater.com/match" = "true"
}
}
type = "Opaque"
data = {
"ROOT_PASSWORD" = var.dbaas_root_password
}
}
# resource "kubernetes_ingress_v1" "dbaas" {
# metadata {
# name = "orchestrator-ingress"
# namespace = kubernetes_namespace.dbaas.metadata[0].name
# annotations = {
# "kubernetes.io/ingress.class" = "nginx"
# "nginx.ingress.kubernetes.io/auth-tls-verify-client" = "on"
# "nginx.ingress.kubernetes.io/auth-tls-secret" = "default/ca-secret"
# }
# }
# spec {
# tls {
# hosts = ["db.viktorbarzin.me"]
# secret_name = var.tls_secret_name
# }
# rule {
# host = "db.viktorbarzin.me"
# http {
# path {
# path = "/"
# backend {
# service {
# name = "mysql-mysql-operator"
# port {
# number = 80
# }
# }
# }
# }
# }
# }
# }
# }
# PHPMyAdmin instance
resource "kubernetes_deployment" "phpmyadmin" {
metadata {
name = "phpmyadmin"
namespace = kubernetes_namespace.dbaas.metadata[0].name
labels = {
"app" = "phpmyadmin"
tier = var.tier
}
annotations = {
"reloader.stakater.com/search" = "true"
}
}
spec {
replicas = "1"
selector {
match_labels = {
"app" = "phpmyadmin"
}
}
template {
metadata {
labels = {
"app" = "phpmyadmin"
}
}
spec {
container {
name = "phpmyadmin"
image = "phpmyadmin/phpmyadmin:5.2.3"
port {
container_port = 80
}
env {
name = "PMA_HOST"
value = var.cluster_master_service
}
env {
name = "PMA_PORT"
value = "3306"
}
env {
name = "MYSQL_ROOT_PASSWORD"
value_from {
secret_key_ref {
name = "cluster-secret"
key = "ROOT_PASSWORD"
}
}
}
env {
name = "UPLOAD_LIMIT"
value = "300M"
}
}
}
}
}
}
resource "kubernetes_service" "phpmyadmin" {
metadata {
name = "pma"
namespace = kubernetes_namespace.dbaas.metadata[0].name
}
spec {
selector = {
"app" = "phpmyadmin"
}
port {
name = "web"
port = 80
}
}
}
module "ingress" {
source = "../../../../modules/kubernetes/ingress_factory"
namespace = kubernetes_namespace.dbaas.metadata[0].name
name = "pma"
tls_secret_name = var.tls_secret_name
protected = true
extra_annotations = {}
rybbit_site_id = "942c76b8bd4d"
custom_content_security_policy = "script-src 'self' 'unsafe-inline' 'unsafe-eval' 'wasm-unsafe-eval' https://rybbit.viktorbarzin.me"
}
# resource "kubectl_manifest" "mysql-cluster" {
# yaml_body = <<-YAML
# apiVersion: mysql.presslabs.org/v1alpha1
# kind: MysqlCluster
# metadata:
# name: mysql-cluster
# namespace = kubernetes_namespace.dbaas.metadata[0].name
# spec:
# mysqlVersion: "5.7"
# replicas: 1
# secretName: cluster-secret
# mysqlConf:
# # read_only: 0 # mysql forms a single transaction for each sql statement, autocommit for each statement
# # automatic_sp_privileges: "ON" # automatically grants the EXECUTE and ALTER ROUTINE privileges to the creator of a stored routine
# # auto_generate_certs: "ON" # Auto Generation of Certificate
# # auto_increment_increment: 1 # Auto Incrementing value from +1
# # auto_increment_offset: 1 # Auto Increment Offset
# # binlog-format: "STATEMENT" # contains various options such ROW(SLOW,SAFE) STATEMENT(FAST,UNSAFE), MIXED(combination of both)
# # wait_timeout: 31536000 # 28800 number of seconds the server waits for activity on a non-interactive connection before closing it, You might encounter MySQL server has gone away error, you then tweak this value acccordingly
# # interactive_timeout: 28800 # The number of seconds the server waits for activity on an interactive connection before closing it.
# # max_allowed_packet: "512M" # Maximum size of MYSQL Network protocol packet that the server can create or read 4MB, 8MB, 16MB, 32MB
# # max-binlog-size: 1073741824 # binary logs contains the events that describe database changes, this parameter describe size for the bin_log file.
# # log_output: "TABLE" # Format in which the logout will be dumped
# # master-info-repository: "TABLE" # Format in which the master info will be dumped
# # relay_log_info_repository: "TABLE" # Format in which the relay info will be dumped
# volumeSpec:
# persistentVolumeClaim:
# accessModes:
# - ReadWriteOnce
# resources:
# requests:
# storage: 10Gi
# YAML
# depends_on = [helm_release.mysql]
# # manifest = {
# # apiVersion = "mysql.presslabs.org/v1alpha1"
# # kind = "MysqlCluster"
# # metadata = {
# # name = "mysql-cluster"
# # namespace = kubernetes_namespace.dbaas.metadata[0].name
# # }
# # spec = {
# # mysqlVersion = "5.7"
# # replicas = 1
# # secretName = "cluster-secret"
# # mysqlConf = {
# # read_only = 0
# # }
# # volumeSpec = {
# # persistentVolumeClaim = {
# # resources = {
# # requests = {
# # storage = "10Gi"
# # }
# # }
# # }
# # }
# # }
# # }
# }
# For some unknwown reason not all CRDs are installed. Add them manually
# resource "kubectl_manifest" "mysql-user" {
# yaml_body = <<-EOF
# apiVersion: apiextensions.k8s.io/v1
# kind: CustomResourceDefinition
# metadata:
# annotations:
# controller-gen.kubebuilder.io/version: v0.5.0
# helm.sh/hook: crd-install
# name: mysqlusers.mysql.presslabs.org
# labels:
# app: mysql-operator
# spec:
# group: mysql.presslabs.org
# names:
# kind: MysqlUser
# listKind: MysqlUserList
# plural: mysqlusers
# singular: mysqluser
# scope:namespace = kubernetes_namespace.dbaas.metadata[0].name
# versions:
# - additionalPrinterColumns:
# - description: The user status
# jsonPath: .status.conditions[?(@.type == 'Ready')].status
# name: Ready
# type: string
# - jsonPath: .spec.clusterRef.name
# name: Cluster
# type: string
# - jsonPath: .spec.user
# name: UserName
# type: string
# - jsonPath: .metadata.creationTimestamp
# name: Age
# type: date
# name: v1alpha1
# schema:
# openAPIV3Schema:
# description: MysqlUser is the Schema for the MySQL User API
# properties:
# apiVersion:
# description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
# type: string
# kind:
# description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
# type: string
# metadata:
# type: object
# spec:
# description: MysqlUserSpec defines the desired state of MysqlUserSpec
# properties:
# allowedHosts:
# description: AllowedHosts is the allowed host to connect from.
# items:
# type: string
# type: array
# clusterRef:
# description: ClusterRef represents a reference to the MySQL cluster. This field should be immutable.
# properties:
# name:
# description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
# type: string
# namespace = kubernetes_namespace.dbaas.metadata[0].name
# description:namespace = kubernetes_namespace.dbaas.metadata[0].name
# type: string
# type: object
# password:
# description: Password is the password for the user.
# properties:
# key:
# description: The key of the secret to select from. Must be a valid secret key.
# type: string
# name:
# description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
# type: string
# optional:
# description: Specify whether the Secret or its key must be defined
# type: boolean
# required:
# - key
# type: object
# permissions:
# description: Permissions is the list of roles that user has in the specified database.
# items:
# description: MysqlPermission defines a MySQL schema permission
# properties:
# permissions:
# description: Permissions represents the permissions granted on the schema/tables
# items:
# type: string
# type: array
# schema:
# description: Schema represents the schema to which the permission applies
# type: string
# tables:
# description: Tables represents the tables inside the schema to which the permission applies
# items:
# type: string
# type: array
# required:
# - permissions
# - schema
# - tables
# type: object
# type: array
# resourceLimits:
# additionalProperties:
# anyOf:
# - type: integer
# - type: string
# pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
# x-kubernetes-int-or-string: true
# description: 'ResourceLimits allow settings limit per mysql user as defined here: https://dev.mysql.com/doc/refman/5.7/en/user-resources.html'
# type: object
# user:
# description: User is the name of the user that will be created with will access the specified database. This field should be immutable.
# type: string
# required:
# - allowedHosts
# - clusterRef
# - password
# - user
# type: object
# status:
# description: MysqlUserStatus defines the observed state of MysqlUser
# properties:
# allowedHosts:
# description: AllowedHosts contains the list of hosts that the user is allowed to connect from.
# items:
# type: string
# type: array
# conditions:
# description: Conditions represents the MysqlUser resource conditions list.
# items:
# description: MySQLUserCondition defines the condition struct for a MysqlUser resource
# properties:
# lastTransitionTime:
# description: Last time the condition transitioned from one status to another.
# format: date-time
# type: string
# lastUpdateTime:
# description: The last time this condition was updated.
# format: date-time
# type: string
# message:
# description: A human readable message indicating details about the transition.
# type: string
# reason:
# description: The reason for the condition's last transition.
# type: string
# status:
# description: Status of the condition, one of True, False, Unknown.
# type: string
# type:
# description: Type of MysqlUser condition.
# type: string
# required:
# - lastTransitionTime
# - message
# - reason
# - status
# - type
# type: object
# type: array
# type: object
# type: object
# served: true
# storage: true
# subresources:
# status: {}
# EOF
# }
resource "kubernetes_deployment" "postgres" {
metadata {
name = "postgresql"
namespace = kubernetes_namespace.dbaas.metadata[0].name
annotations = {
"reloader.stakater.com/search" = "true"
}
labels = {
tier = var.tier
}
}
spec {
selector {
match_labels = {
app = "postgresql"
}
}
strategy {
type = "Recreate"
}
template {
metadata {
labels = {
app = "postgresql"
}
annotations = {
"diun.enable" = "false"
"diun.include_tags" = "^\\d+(?:\\.\\d+)?-bullseye$"
}
}
spec {
container {
# image = "postgis/postgis:16-master"
image = "viktorbarzin/postgres:16-master" # mix of postgis + pgvector
# image = "postgres:17.2-bullseye" # needs pg_upgrade to data dir
name = "postgresql"
env {
name = "POSTGRES_PASSWORD"
value = var.postgresql_root_password
}
env {
name = "POSTGRES_USER"
value = "root"
}
port {
container_port = 5432
protocol = "TCP"
name = "postgresql"
}
volume_mount {
name = "postgresql-persistent-storage"
mount_path = "/var/lib/postgresql/data"
}
# volume_mount {
# name = "mycnf"
# mount_path = "/etc/my.cnf"
# sub_path = "my.cnf"
# }
}
volume {
name = "postgresql-persistent-storage"
nfs {
path = "/mnt/main/postgresql/data"
server = "10.0.10.15"
}
}
# volume {
# name = "mycnf"
# config_map {
# name = "mycnf"
# }
# }
}
}
}
}
resource "kubernetes_service" "postgresql" {
metadata {
name = "postgresql"
namespace = kubernetes_namespace.dbaas.metadata[0].name
}
spec {
selector = {
"app" = "postgresql"
}
port {
name = "postgresql"
port = 5432
target_port = 5432
}
}
}
#### PGADMIN
resource "kubernetes_deployment" "pgadmin" {
metadata {
name = "pgadmin"
namespace = kubernetes_namespace.dbaas.metadata[0].name
annotations = {
"reloader.stakater.com/search" = "true"
}
labels = {
tier = var.tier
}
}
spec {
selector {
match_labels = {
app = "pgadmin"
}
}
template {
metadata {
labels = {
app = "pgadmin"
}
}
spec {
container {
image = "dpage/pgadmin4"
name = "pgadmin"
env {
name = "PGADMIN_DEFAULT_EMAIL"
value = "me@viktorbarzin.me"
}
env {
name = "PGADMIN_DEFAULT_PASSWORD"
# Changed at startup
value = var.pgadmin_password
}
port {
container_port = 80
name = "web"
}
volume_mount {
name = "pgadmin"
mount_path = "/var/lib/pgadmin/"
}
}
volume {
name = "pgadmin"
# config_map {
# name = "pgadmin-config"
# }
nfs {
path = "/mnt/main/postgresql/pgadmin"
server = "10.0.10.15"
}
}
}
}
}
}
resource "kubernetes_service" "pgadmin" {
metadata {
name = "pgadmin"
namespace = kubernetes_namespace.dbaas.metadata[0].name
}
spec {
selector = {
"app" = "pgadmin"
}
port {
name = "pgadmin"
port = 80
}
}
}
module "ingress-pgadmin" {
source = "../../../../modules/kubernetes/ingress_factory"
namespace = kubernetes_namespace.dbaas.metadata[0].name
name = "pgadmin"
tls_secret_name = var.tls_secret_name
protected = true
rybbit_site_id = "7cef78e30485"
}
resource "kubernetes_cron_job_v1" "postgresql-backup" {
metadata {
name = "postgresql-backup"
namespace = kubernetes_namespace.dbaas.metadata[0].name
}
spec {
concurrency_policy = "Replace"
failed_jobs_history_limit = 5
schedule = "0 0 * * *"
# schedule = "* * * * *"
starting_deadline_seconds = 10
successful_jobs_history_limit = 10
job_template {
metadata {}
spec {
backoff_limit = 3
ttl_seconds_after_finished = 10
template {
metadata {}
spec {
container {
name = "postgresql-backup"
image = "postgres:16.4-bullseye"
command = ["/bin/bash", "-c", <<-EOT
set -euxo pipefail
export now=$(date +"%Y_%m_%d_%H_%M")
PGPASSWORD=${var.postgresql_root_password} pg_dumpall -h postgresql.dbaas -U root > /backup/dump_$now.sql
# Rotate - delete last log file
cd /backup
find . -name "dump_*.sql" -type f -mtime +7 -delete # 7 day retention of backups
echo Done
EOT
]
volume_mount {
name = "postgresql-backup"
mount_path = "/backup"
}
}
volume {
name = "postgresql-backup"
nfs {
path = "/mnt/main/postgresql-backup"
server = "10.0.10.15"
}
}
}
}
}
}
}
}

View file

@ -0,0 +1,14 @@
---
orchestrator:
# persistence:
# enabled: false
ingress:
enable: false
hosts:
- host: db.viktorbarzin.me
paths:
- path: /
tls:
- secretName: ${secretName}
hosts:
- db.viktorbarzin.me

View file

@ -0,0 +1,30 @@
# Use the PostGIS image as the base
FROM pgvector/pgvector:0.8.0-pg16 as binary
FROM postgis/postgis:16-master
COPY --from=binary /pgvecto-rs-binary-release.deb /tmp/vectors.deb
RUN apt-get install -y /tmp/vectors.deb && rm -f /tmp/vectors.deb
# Install necessary packages
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
build-essential \
libpq-dev \
wget \
git \
postgresql-server-dev-16 \
postgresql-16-pgvector \
# Clean up to reduce layer size
&& rm -rf /var/lib/apt/lists/* \
&& cd /tmp \
&& git clone --branch v0.8.0 https://github.com/pgvector/pgvector.git \
&& cd pgvector \
&& make \
&& make install \
# Clean up unnecessary files
&& cd - \
&& apt-get purge -y --auto-remove build-essential postgresql-server-dev-16 libpq-dev wget git \
&& rm -rf /tmp/pgvector
# Copy initialization scripts
#COPY ./docker-entrypoint-initdb.d/ /docker-entrypoint-initdb.d/
CMD ["postgres", "-c" ,"shared_preload_libraries=vectors.so", "-c", "search_path=\"$user\", public, vectors", "-c", "logging_collector=on"]

View file

@ -0,0 +1,9 @@
# terraform {
# required_providers {
# kubectl = {
# source = "gavinbunney/kubectl"
# version = ">= 1.10.0"
# }
# }
# required_version = ">= 0.13"
# }

View file

@ -0,0 +1,254 @@
variable "tls_secret_name" {}
variable "tier" { type = string }
variable "headscale_config" {}
variable "headscale_acl" {}
resource "kubernetes_namespace" "headscale" {
metadata {
name = "headscale"
labels = {
tier = var.tier
}
}
}
module "tls_secret" {
source = "../../../../modules/kubernetes/setup_tls_secret"
namespace = kubernetes_namespace.headscale.metadata[0].name
tls_secret_name = var.tls_secret_name
}
resource "kubernetes_deployment" "headscale" {
metadata {
name = "headscale"
namespace = kubernetes_namespace.headscale.metadata[0].name
labels = {
app = "headscale"
tier = var.tier
# scare to try but probably non-http will fail
# "istio-injection" : "enabled"
}
annotations = {
"reloader.stakater.com/search" = "true"
}
}
spec {
replicas = 1
strategy {
type = "Recreate"
}
selector {
match_labels = {
app = "headscale"
}
}
template {
metadata {
labels = {
app = "headscale"
}
annotations = {
# "diun.enable" = "true"
"diun.enable" = "false"
"diun.include_tags" = "^\\d+(?:\\.\\d+)?(?:\\.\\d+)?$"
}
}
spec {
container {
image = "headscale/headscale:0.23.0"
# image = "headscale/headscale:0.23.0-debug" # -debug is for debug images
name = "headscale"
command = ["headscale", "serve"]
port {
container_port = 8080
}
port {
container_port = 9090
}
port {
container_port = 41641
}
volume_mount {
name = "config-volume"
mount_path = "/etc/headscale"
}
volume_mount {
mount_path = "/mnt"
name = "nfs-config"
}
}
volume {
name = "config-volume"
config_map {
name = "headscale-config"
items {
key = "config.yaml"
path = "config.yaml"
}
items {
key = "acl.yaml"
path = "acl.yaml"
}
}
}
volume {
name = "nfs-config"
nfs {
path = "/mnt/main/headscale"
server = "10.0.10.15"
}
}
# container {
# image = "simcu/headscale-ui:0.1.4"
# name = "headscale-ui"
# port {
# container_port = 80
# }
# }
container {
image = "ghcr.io/gurucomputing/headscale-ui:latest"
# image = "ghcr.io/tale/headplane:0.3.2"
name = "headscale-ui"
port {
container_port = 8081
# container_port = 3000
}
env {
name = "HTTP_PORT"
value = "8081"
}
# env {
# name = "HTTPS_PORT"
# value = "8082"
# }
env {
name = "HEADSCALE_URL"
value = "http://localhost:8080"
}
env {
name = "COOKIE_SECRET"
value = "kekekekke"
}
env {
name = "ROOT_API_KEY"
value = "kekekekeke"
}
}
}
}
}
}
resource "kubernetes_service" "headscale" {
metadata {
name = "headscale"
namespace = kubernetes_namespace.headscale.metadata[0].name
labels = {
"app" = "headscale"
}
annotations = {
"prometheus.io/scrape" = "true"
"prometheus.io/port" = "9090"
}
# annotations = {
# "metallb.universe.tf/allow-shared-ip" : "shared"
# }
}
spec {
# type = "LoadBalancer"
# external_traffic_policy = "Cluster"
selector = {
app = "headscale"
}
port {
name = "headscale"
port = "8080"
protocol = "TCP"
}
port {
name = "headscale-ui"
port = "80"
target_port = 8081
# target_port = 3000
protocol = "TCP"
}
port {
name = "metrics"
port = "9090"
protocol = "TCP"
}
}
}
module "ingress" {
source = "../../../../modules/kubernetes/ingress_factory"
namespace = kubernetes_namespace.headscale.metadata[0].name
name = "headscale"
port = 8080
tls_secret_name = var.tls_secret_name
}
module "ingress-ui" {
source = "../../../../modules/kubernetes/ingress_factory"
namespace = kubernetes_namespace.headscale.metadata[0].name
name = "headscale-ui"
host = "headscale"
service_name = "headscale"
port = 8081
ingress_path = ["/web"]
tls_secret_name = var.tls_secret_name
}
resource "kubernetes_service" "headscale-server" {
metadata {
name = "headscale-server"
namespace = kubernetes_namespace.headscale.metadata[0].name
labels = {
"app" = "headscale"
}
annotations = {
"metallb.universe.tf/allow-shared-ip" : "shared"
}
}
spec {
type = "LoadBalancer"
external_traffic_policy = "Cluster"
selector = {
app = "headscale"
}
# port {
# name = "headscale-tcp"
# port = "41641"
# protocol = "TCP"
# }
port {
name = "headscale-udp"
port = "41641"
protocol = "UDP"
}
}
}
resource "kubernetes_config_map" "headscale-config" {
metadata {
name = "headscale-config"
namespace = kubernetes_namespace.headscale.metadata[0].name
annotations = {
"reloader.stakater.com/match" = "true"
}
}
data = {
"config.yaml" = var.headscale_config
"acl.yaml" = var.headscale_acl
}
}

View file

@ -0,0 +1,212 @@
# Module to run some infra-specific things like updating the public ip
variable "git_user" {}
variable "git_token" {}
variable "technitium_username" {}
variable "technitium_password" {}
# DISABLED WHILST USING CLOUDFLARE NS
# resource "kubernetes_cron_job_v1" "update-public-ip" {
# metadata {
# name = "update-public-ip"
# namespace = "default"
# }
# spec {
# schedule = "*/5 * * * *"
# successful_jobs_history_limit = 1
# failed_jobs_history_limit = 1
# concurrency_policy = "Forbid"
# job_template {
# metadata {
# name = "update-public-ip"
# }
# spec {
# template {
# metadata {
# name = "update-public-ip"
# }
# spec {
# priority_class_name = "system-cluster-critical"
# container {
# name = "update-public-ip"
# image = "viktorbarzin/infra"
# command = ["./infra_cli"]
# args = ["-use-case", "update-public-ip"]
# env {
# name = "GIT_USER"
# value = var.git_user
# }
# env {
# name = "GIT_TOKEN"
# value = var.git_token
# }
# env {
# name = "TECHNITIUM_USERNAME"
# value = var.technitium_username
# }
# env {
# name = "TECHNITIUM_PASSWORD"
# value = var.technitium_password
# }
# }
# restart_policy = "Never"
# # service_account_name = "descheduler-sa"
# # volume {
# # name = "policy-volume"
# # config_map {
# # name = "policy-configmap"
# # }
# # }
# }
# }
# }
# }
# }
# }
# # backup etcd
resource "kubernetes_cron_job_v1" "backup-etcd" {
metadata {
name = "backup-etcd"
namespace = "default"
}
spec {
schedule = "0 0 * * *"
successful_jobs_history_limit = 1
failed_jobs_history_limit = 1
concurrency_policy = "Forbid"
job_template {
metadata {
name = "backup-etcd"
}
spec {
template {
metadata {
name = "backup-etcd"
}
spec {
node_name = "k8s-master"
priority_class_name = "system-cluster-critical"
host_network = true
container {
name = "backup-etcd"
image = "k8s.gcr.io/etcd-amd64:3.3.15"
command = ["/bin/sh"]
args = ["-c", "etcdctl --endpoints=https://127.0.0.1:2379 --cacert=/etc/kubernetes/pki/etcd/ca.crt --cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt --key=/etc/kubernetes/pki/etcd/healthcheck-client.key snapshot save /backup/etcd-snapshot-$(date +%Y_%m_%d_%H:%M:%S_%Z).db"]
env {
name = "ETCDCTL_API"
value = "3"
}
volume_mount {
mount_path = "/backup"
name = "backup"
}
volume_mount {
mount_path = "/etc/kubernetes/pki/etcd"
name = "etcd-certs"
read_only = true
}
}
container {
name = "backup-purge"
image = "busybox:1.31.1"
command = ["/bin/sh"]
args = ["-c", "find /backup -type f -mtime +30 -name '*.db' -exec rm -- '{}' \\;"]
volume_mount {
mount_path = "/backup"
name = "backup"
}
}
volume {
name = "backup"
nfs {
path = "/mnt/main/etcd-backup"
server = "10.0.10.15"
}
}
volume {
name = "etcd-certs"
host_path {
path = "/etc/kubernetes/pki/etcd"
type = "DirectoryOrCreate"
}
}
restart_policy = "Never"
}
}
}
}
}
}
# Clean up evicted/failed pods cluster-wide daily
resource "kubernetes_cron_job_v1" "cleanup-failed-pods" {
metadata {
name = "cleanup-failed-pods"
namespace = "default"
}
spec {
schedule = "0 2 * * *"
successful_jobs_history_limit = 1
failed_jobs_history_limit = 1
concurrency_policy = "Forbid"
job_template {
metadata {
name = "cleanup-failed-pods"
}
spec {
template {
metadata {
name = "cleanup-failed-pods"
}
spec {
service_account_name = kubernetes_service_account.cleanup_sa.metadata[0].name
container {
name = "cleanup"
image = "bitnami/kubectl:latest"
command = ["/bin/sh", "-c", "kubectl delete pods -A --field-selector=status.phase=Failed --ignore-not-found"]
}
restart_policy = "Never"
}
}
}
}
}
}
resource "kubernetes_service_account" "cleanup_sa" {
metadata {
name = "failed-pod-cleanup"
namespace = "default"
}
}
resource "kubernetes_cluster_role" "cleanup_role" {
metadata {
name = "failed-pod-cleanup"
}
rule {
api_groups = [""]
resources = ["pods"]
verbs = ["list", "delete"]
}
}
resource "kubernetes_cluster_role_binding" "cleanup_binding" {
metadata {
name = "failed-pod-cleanup"
}
role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "ClusterRole"
name = kubernetes_cluster_role.cleanup_role.metadata[0].name
}
subject {
kind = "ServiceAccount"
name = kubernetes_service_account.cleanup_sa.metadata[0].name
namespace = "default"
}
}

View file

@ -0,0 +1,23 @@
node_modules
# Output
.output
.vercel
.netlify
.wrangler
/.svelte-kit
/build
# OS
.DS_Store
Thumbs.db
# Env
.env
.env.*
!.env.example
!.env.test
# Vite
vite.config.js.timestamp-*
vite.config.ts.timestamp-*

View file

@ -0,0 +1 @@
engine-strict=true

View file

@ -0,0 +1,15 @@
FROM node:22-alpine AS build
WORKDIR /app
COPY package*.json ./
RUN npm ci
COPY . .
RUN npm run build
FROM node:22-alpine
WORKDIR /app
COPY --from=build /app/build ./build
COPY --from=build /app/package.json ./
COPY --from=build /app/node_modules ./node_modules
ENV PORT=3000
EXPOSE 3000
CMD ["node", "build"]

View file

@ -0,0 +1,42 @@
# sv
Everything you need to build a Svelte project, powered by [`sv`](https://github.com/sveltejs/cli).
## Creating a project
If you're seeing this, you've probably already done this step. Congrats!
```sh
# create a new project
npx sv create my-app
```
To recreate this project with the same configuration:
```sh
# recreate this project
npx sv create --template minimal --types ts --install npm .
```
## Developing
Once you've created a project and installed dependencies with `npm install` (or `pnpm install` or `yarn`), start a development server:
```sh
npm run dev
# or start the server and open the app in a new browser tab
npm run dev -- --open
```
## Building
To create a production version of your app:
```sh
npm run build
```
You can preview the production build with `npm run preview`.
> To deploy your app, you may need to install an [adapter](https://svelte.dev/docs/kit/adapters) for your target environment.

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,24 @@
{
"name": "files",
"private": true,
"version": "0.0.1",
"type": "module",
"scripts": {
"dev": "vite dev",
"build": "vite build",
"preview": "vite preview",
"prepare": "svelte-kit sync || echo ''",
"check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json",
"check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch"
},
"devDependencies": {
"@sveltejs/adapter-auto": "^7.0.0",
"@sveltejs/adapter-node": "^5.5.3",
"@sveltejs/kit": "^2.50.2",
"@sveltejs/vite-plugin-svelte": "^6.2.4",
"svelte": "^5.49.2",
"svelte-check": "^4.3.6",
"typescript": "^5.9.3",
"vite": "^7.3.1"
}
}

View file

@ -0,0 +1,13 @@
// See https://svelte.dev/docs/kit/types#app.d.ts
// for information about these interfaces
declare global {
namespace App {
// interface Error {}
// interface Locals {}
// interface PageData {}
// interface PageState {}
// interface Platform {}
}
}
export {};

View file

@ -0,0 +1,11 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
%sveltekit.head%
</head>
<body data-sveltekit-preload-data="hover">
<div style="display: contents">%sveltekit.body%</div>
</body>
</html>

View file

@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" width="107" height="128" viewBox="0 0 107 128"><title>svelte-logo</title><path d="M94.157 22.819c-10.4-14.885-30.94-19.297-45.792-9.835L22.282 29.608A29.92 29.92 0 0 0 8.764 49.65a31.5 31.5 0 0 0 3.108 20.231 30 30 0 0 0-4.477 11.183 31.9 31.9 0 0 0 5.448 24.116c10.402 14.887 30.942 19.297 45.791 9.835l26.083-16.624A29.92 29.92 0 0 0 98.235 78.35a31.53 31.53 0 0 0-3.105-20.232 30 30 0 0 0 4.474-11.182 31.88 31.88 0 0 0-5.447-24.116" style="fill:#ff3e00"/><path d="M45.817 106.582a20.72 20.72 0 0 1-22.237-8.243 19.17 19.17 0 0 1-3.277-14.503 18 18 0 0 1 .624-2.435l.49-1.498 1.337.981a33.6 33.6 0 0 0 10.203 5.098l.97.294-.09.968a5.85 5.85 0 0 0 1.052 3.878 6.24 6.24 0 0 0 6.695 2.485 5.8 5.8 0 0 0 1.603-.704L69.27 76.28a5.43 5.43 0 0 0 2.45-3.631 5.8 5.8 0 0 0-.987-4.371 6.24 6.24 0 0 0-6.698-2.487 5.7 5.7 0 0 0-1.6.704l-9.953 6.345a19 19 0 0 1-5.296 2.326 20.72 20.72 0 0 1-22.237-8.243 19.17 19.17 0 0 1-3.277-14.502 17.99 17.99 0 0 1 8.13-12.052l26.081-16.623a19 19 0 0 1 5.3-2.329 20.72 20.72 0 0 1 22.237 8.243 19.17 19.17 0 0 1 3.277 14.503 18 18 0 0 1-.624 2.435l-.49 1.498-1.337-.98a33.6 33.6 0 0 0-10.203-5.1l-.97-.294.09-.968a5.86 5.86 0 0 0-1.052-3.878 6.24 6.24 0 0 0-6.696-2.485 5.8 5.8 0 0 0-1.602.704L37.73 51.72a5.42 5.42 0 0 0-2.449 3.63 5.79 5.79 0 0 0 .986 4.372 6.24 6.24 0 0 0 6.698 2.486 5.8 5.8 0 0 0 1.602-.704l9.952-6.342a19 19 0 0 1 5.295-2.328 20.72 20.72 0 0 1 22.237 8.242 19.17 19.17 0 0 1 3.277 14.503 18 18 0 0 1-8.13 12.053l-26.081 16.622a19 19 0 0 1-5.3 2.328" style="fill:#fff"/></svg>

After

Width:  |  Height:  |  Size: 1.5 KiB

View file

@ -0,0 +1 @@
// place files you want to import through the `$lib` alias in this folder.

View file

@ -0,0 +1,11 @@
<script lang="ts">
import favicon from '$lib/assets/favicon.svg';
let { children } = $props();
</script>
<svelte:head>
<link rel="icon" href={favicon} />
</svelte:head>
{@render children()}

View file

@ -0,0 +1,33 @@
import type { PageServerLoad } from './$types';
import { readFileSync } from 'fs';
interface UserRole {
role: string;
namespaces: string[];
}
export const load: PageServerLoad = async ({ request }) => {
const email = request.headers.get('x-authentik-email') || 'unknown';
const username = request.headers.get('x-authentik-username') || 'unknown';
const groups = request.headers.get('x-authentik-groups') || '';
// Read user roles from ConfigMap-mounted file
let userRole: UserRole = { role: 'unknown', namespaces: [] };
try {
const usersJson = readFileSync('/config/users.json', 'utf-8');
const users = JSON.parse(usersJson);
if (users[email]) {
userRole = users[email];
}
} catch {
// ConfigMap not mounted or parse error
}
return {
email,
username,
groups: groups.split('|').filter(Boolean),
role: userRole.role,
namespaces: userRole.namespaces
};
};

View file

@ -0,0 +1,42 @@
<script lang="ts">
let { data } = $props();
</script>
<main>
<h1>Kubernetes Access Portal</h1>
<section>
<h2>Your Identity</h2>
<p><strong>Username:</strong> {data.username}</p>
<p><strong>Email:</strong> {data.email}</p>
<p><strong>Role:</strong> {data.role}</p>
{#if data.namespaces.length > 0}
<p><strong>Namespaces:</strong> {data.namespaces.join(', ')}</p>
{/if}
</section>
<section>
<h2>Get Started</h2>
<ol>
<li><a href="/setup">Install kubectl and kubelogin</a></li>
<li><a href="/download">Download your kubeconfig</a></li>
<li>Run <code>kubectl get pods</code> to verify access</li>
</ol>
</section>
</main>
<style>
main {
max-width: 640px;
margin: 2rem auto;
font-family: system-ui;
}
code {
background: #f0f0f0;
padding: 2px 6px;
border-radius: 3px;
}
section {
margin: 2rem 0;
}
</style>

View file

@ -0,0 +1,58 @@
import type { RequestHandler } from './$types';
import { readFileSync } from 'fs';
const CLUSTER_SERVER = 'https://10.0.20.100:6443';
const OIDC_ISSUER = 'https://authentik.viktorbarzin.me/application/o/kubernetes/';
const OIDC_CLIENT_ID = 'kubernetes';
export const GET: RequestHandler = async ({ request }) => {
const email = request.headers.get('x-authentik-email') || 'user';
// Read CA cert from mounted ConfigMap
let caCert = '';
try {
caCert = readFileSync('/config/ca.crt', 'utf-8');
} catch {
// CA cert not available
}
const caCertBase64 = Buffer.from(caCert).toString('base64');
const sanitizedEmail = email.replace(/[^a-zA-Z0-9@._-]/g, '');
const kubeconfig = `apiVersion: v1
kind: Config
clusters:
- cluster:
server: ${CLUSTER_SERVER}
certificate-authority-data: ${caCertBase64}
name: home-cluster
contexts:
- context:
cluster: home-cluster
user: oidc-${sanitizedEmail}
name: home-cluster
current-context: home-cluster
users:
- name: oidc-${sanitizedEmail}
user:
exec:
apiVersion: client.authentication.k8s.io/v1beta1
command: kubectl
args:
- oidc-login
- get-token
- --oidc-issuer-url=${OIDC_ISSUER}
- --oidc-client-id=${OIDC_CLIENT_ID}
- --oidc-extra-scope=email
- --oidc-extra-scope=profile
- --oidc-extra-scope=groups
interactiveMode: IfAvailable
`;
return new Response(kubeconfig, {
headers: {
'Content-Type': 'application/yaml',
'Content-Disposition': `attachment; filename="kubeconfig-home-cluster.yaml"`
}
});
};

View file

@ -0,0 +1,69 @@
<main>
<h1>Setup Instructions</h1>
<section>
<h2>Quick Setup (one command)</h2>
<p>Run this in your terminal to install everything and configure kubectl automatically:</p>
<h3>macOS</h3>
<pre>bash &lt;(curl -fsSL https://k8s-portal.viktorbarzin.me/setup/script?os=mac)</pre>
<h3>Linux</h3>
<pre>bash &lt;(curl -fsSL https://k8s-portal.viktorbarzin.me/setup/script?os=linux)</pre>
</section>
<section>
<h2>Manual Setup</h2>
<h3>1. Install kubectl</h3>
<h4>macOS</h4>
<pre>brew install kubectl</pre>
<h4>Linux</h4>
<pre>curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
chmod +x kubectl && sudo mv kubectl /usr/local/bin/</pre>
<h3>2. Install kubelogin (OIDC plugin)</h3>
<h4>macOS</h4>
<pre>brew install int128/kubelogin/kubelogin</pre>
<h4>Linux</h4>
<pre>curl -LO https://github.com/int128/kubelogin/releases/latest/download/kubelogin_linux_amd64.zip
unzip kubelogin_linux_amd64.zip && sudo mv kubelogin /usr/local/bin/kubectl-oidc_login
rm kubelogin_linux_amd64.zip</pre>
<h3>3. Download and use your kubeconfig</h3>
<pre>
mkdir -p ~/.kube
# Download from the portal (requires auth cookie from browser)
# Or use the download button on the portal homepage
# Set the KUBECONFIG environment variable
export KUBECONFIG=~/.kube/config-home
# Test access (opens browser for login)
kubectl get namespaces
</pre>
</section>
<p><a href="/">&#8592; Back to portal</a></p>
</main>
<style>
main {
max-width: 640px;
margin: 2rem auto;
font-family: system-ui;
}
pre {
background: #1e1e1e;
color: #d4d4d4;
padding: 1rem;
border-radius: 6px;
overflow-x: auto;
}
section {
margin: 2rem 0;
}
h4 {
margin: 0.5rem 0 0.25rem;
color: #666;
}
</style>

View file

@ -0,0 +1,181 @@
import type { RequestHandler } from './$types';
import { readFileSync } from 'fs';
const CLUSTER_SERVER = 'https://10.0.20.100:6443';
const OIDC_ISSUER = 'https://authentik.viktorbarzin.me/application/o/kubernetes/';
const OIDC_CLIENT_ID = 'kubernetes';
const PORTAL_URL = 'https://k8s-portal.viktorbarzin.me';
export const GET: RequestHandler = async ({ url }) => {
const os = url.searchParams.get('os') || 'mac';
let caCert = '';
try {
caCert = readFileSync('/config/ca.crt', 'utf-8');
} catch {
// CA cert not available
}
const caCertBase64 = Buffer.from(caCert).toString('base64');
const kubeconfigContent = `apiVersion: v1
kind: Config
clusters:
- cluster:
server: ${CLUSTER_SERVER}
certificate-authority-data: ${caCertBase64}
name: home-cluster
contexts:
- context:
cluster: home-cluster
user: oidc-user
name: home-cluster
current-context: home-cluster
users:
- name: oidc-user
user:
exec:
apiVersion: client.authentication.k8s.io/v1beta1
command: kubectl
args:
- oidc-login
- get-token
- --oidc-issuer-url=${OIDC_ISSUER}
- --oidc-client-id=${OIDC_CLIENT_ID}
- --oidc-extra-scope=email
- --oidc-extra-scope=profile
- --oidc-extra-scope=groups
interactiveMode: IfAvailable`;
const escapedKubeconfig = kubeconfigContent.replace(/'/g, "'\\''");
let script: string;
if (os === 'linux') {
script = `#!/bin/bash
set -e
echo "=== Kubernetes Cluster Setup ==="
echo ""
# Use sudo if available, otherwise install directly (e.g. in containers running as root)
SUDO=""
if [ "$(id -u)" -ne 0 ] && command -v sudo &>/dev/null; then
SUDO="sudo"
fi
# Determine install directory
INSTALL_DIR="/usr/local/bin"
if [ ! -w "\$INSTALL_DIR" ] && [ -z "\$SUDO" ]; then
INSTALL_DIR="\$HOME/.local/bin"
mkdir -p "\$INSTALL_DIR"
export PATH="\$INSTALL_DIR:\$PATH"
fi
# Install kubectl
if command -v kubectl &>/dev/null; then
echo "[OK] kubectl already installed"
else
echo "[..] Installing kubectl..."
KUBECTL_VERSION=\$(curl -L -s https://dl.k8s.io/release/stable.txt)
curl -fsSLO "https://dl.k8s.io/release/\${KUBECTL_VERSION}/bin/linux/amd64/kubectl"
chmod +x kubectl && \$SUDO mv kubectl "\$INSTALL_DIR/"
echo "[OK] kubectl installed"
fi
# Install kubelogin
if command -v kubectl-oidc_login &>/dev/null; then
echo "[OK] kubelogin already installed"
else
echo "[..] Installing kubelogin..."
KUBELOGIN_VERSION=\$(curl -fsSL -o /dev/null -w "%{url_effective}" https://github.com/int128/kubelogin/releases/latest | grep -o '[^/]*\$')
curl -fsSLO "https://github.com/int128/kubelogin/releases/download/\${KUBELOGIN_VERSION}/kubelogin_linux_amd64.zip"
unzip -o kubelogin_linux_amd64.zip kubelogin -d /tmp
\$SUDO mv /tmp/kubelogin "\$INSTALL_DIR/kubectl-oidc_login"
rm -f kubelogin_linux_amd64.zip
echo "[OK] kubelogin installed"
fi
# Write kubeconfig
mkdir -p ~/.kube
cat > ~/.kube/config-home << 'KUBECONFIG_EOF'
${escapedKubeconfig}
KUBECONFIG_EOF
echo "[OK] Kubeconfig written to ~/.kube/config-home"
# Add KUBECONFIG to shell profile
SHELL_RC=~/.bashrc
[ -f ~/.zshrc ] && SHELL_RC=~/.zshrc
if ! grep -q 'config-home' "\$SHELL_RC" 2>/dev/null; then
echo 'export KUBECONFIG=~/.kube/config-home' >> "\$SHELL_RC"
echo "[OK] Added KUBECONFIG to \$SHELL_RC"
fi
export KUBECONFIG=~/.kube/config-home
echo ""
echo "=== Setup complete! ==="
echo ""
echo "Run 'kubectl get namespaces' to test (opens browser for login)."
echo "You may need to restart your shell or run: export KUBECONFIG=~/.kube/config-home"
`;
} else {
script = `#!/bin/bash
set -e
echo "=== Kubernetes Cluster Setup ==="
echo ""
# Check for Homebrew
if ! command -v brew &>/dev/null; then
echo "[!!] Homebrew not found. Install it first:"
echo ' /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"'
exit 1
fi
# Install kubectl
if command -v kubectl &>/dev/null; then
echo "[OK] kubectl already installed ($(kubectl version --client -o json 2>/dev/null | grep -o '"gitVersion":"[^"]*"' | cut -d'"' -f4))"
else
echo "[..] Installing kubectl..."
brew install kubectl
echo "[OK] kubectl installed"
fi
# Install kubelogin
if command -v kubectl-oidc_login &>/dev/null; then
echo "[OK] kubelogin already installed"
else
echo "[..] Installing kubelogin..."
brew install int128/kubelogin/kubelogin
echo "[OK] kubelogin installed"
fi
# Write kubeconfig
mkdir -p ~/.kube
cat > ~/.kube/config-home << 'KUBECONFIG_EOF'
${escapedKubeconfig}
KUBECONFIG_EOF
echo "[OK] Kubeconfig written to ~/.kube/config-home"
# Add KUBECONFIG to shell profile
SHELL_RC=~/.zshrc
[ ! -f ~/.zshrc ] && SHELL_RC=~/.bashrc
if ! grep -q 'config-home' "\$SHELL_RC" 2>/dev/null; then
echo 'export KUBECONFIG=~/.kube/config-home' >> "\$SHELL_RC"
echo "[OK] Added KUBECONFIG to \$SHELL_RC"
fi
export KUBECONFIG=~/.kube/config-home
echo ""
echo "=== Setup complete! ==="
echo ""
echo "Run 'kubectl get namespaces' to test (opens browser for login)."
echo "You may need to restart your shell or run: export KUBECONFIG=~/.kube/config-home"
`;
}
return new Response(script, {
headers: {
'Content-Type': 'text/plain; charset=utf-8'
}
});
};

View file

@ -0,0 +1,3 @@
# allow crawling everything by default
User-agent: *
Disallow:

View file

@ -0,0 +1,10 @@
import adapter from '@sveltejs/adapter-node';
/** @type {import('@sveltejs/kit').Config} */
const config = {
kit: {
adapter: adapter()
}
};
export default config;

View file

@ -0,0 +1,20 @@
{
"extends": "./.svelte-kit/tsconfig.json",
"compilerOptions": {
"rewriteRelativeImportExtensions": true,
"allowJs": true,
"checkJs": true,
"esModuleInterop": true,
"forceConsistentCasingInFileNames": true,
"resolveJsonModule": true,
"skipLibCheck": true,
"sourceMap": true,
"strict": true,
"moduleResolution": "bundler"
}
// Path aliases are handled by https://svelte.dev/docs/kit/configuration#alias
// except $lib which is handled by https://svelte.dev/docs/kit/configuration#files
//
// To make changes to top-level options such as include and exclude, we recommend extending
// the generated config; see https://svelte.dev/docs/kit/configuration#typescript
}

View file

@ -0,0 +1,6 @@
import { sveltekit } from '@sveltejs/kit/vite';
import { defineConfig } from 'vite';
export default defineConfig({
plugins: [sveltekit()]
});

View file

@ -0,0 +1,117 @@
variable "tls_secret_name" {}
variable "tier" { type = string }
resource "kubernetes_namespace" "k8s_portal" {
metadata {
name = "k8s-portal"
labels = {
tier = var.tier
}
}
}
module "tls_secret" {
source = "../../../../modules/kubernetes/setup_tls_secret"
namespace = kubernetes_namespace.k8s_portal.metadata[0].name
tls_secret_name = var.tls_secret_name
}
resource "kubernetes_config_map" "k8s_portal_config" {
metadata {
name = "k8s-portal-config"
namespace = kubernetes_namespace.k8s_portal.metadata[0].name
}
data = {
# CA cert extracted from kubeconfig will be populated with cluster CA cert
"ca.crt" = ""
}
}
resource "kubernetes_deployment" "k8s_portal" {
metadata {
name = "k8s-portal"
namespace = kubernetes_namespace.k8s_portal.metadata[0].name
labels = {
app = "k8s-portal"
tier = var.tier
}
}
spec {
replicas = 1
selector {
match_labels = {
app = "k8s-portal"
}
}
template {
metadata {
labels = {
app = "k8s-portal"
}
}
spec {
container {
name = "portal"
image = "viktorbarzin/k8s-portal:latest"
port {
container_port = 3000
}
volume_mount {
name = "config"
mount_path = "/config"
read_only = true
}
}
volume {
name = "config"
config_map {
name = kubernetes_config_map.k8s_portal_config.metadata[0].name
}
}
}
}
}
}
resource "kubernetes_service" "k8s_portal" {
metadata {
name = "k8s-portal"
namespace = kubernetes_namespace.k8s_portal.metadata[0].name
}
spec {
selector = {
app = "k8s-portal"
}
port {
port = 80
target_port = 3000
}
}
}
module "ingress" {
source = "../../../../modules/kubernetes/ingress_factory"
namespace = kubernetes_namespace.k8s_portal.metadata[0].name
name = "k8s-portal"
tls_secret_name = var.tls_secret_name
protected = true # Require Authentik login
}
# Unprotected ingress for the setup script (needs to be curl-able without auth)
module "ingress_setup_script" {
source = "../../../../modules/kubernetes/ingress_factory"
namespace = kubernetes_namespace.k8s_portal.metadata[0].name
name = "k8s-portal-setup"
host = "k8s-portal"
service_name = "k8s-portal"
ingress_path = ["/setup/script"]
tls_secret_name = var.tls_secret_name
protected = false
}

View file

@ -0,0 +1,120 @@
resource "kubernetes_namespace" "kyverno" {
metadata {
name = "kyverno"
labels = {
"istio-injection" : "disabled"
}
}
}
resource "helm_release" "kyverno" {
namespace = kubernetes_namespace.kyverno.metadata[0].name
create_namespace = false
name = "kyverno"
atomic = true
repository = "https://kyverno.github.io/kyverno/"
chart = "kyverno"
# values = [templatefile("${path.module}/grafana_chart_values.yaml", { db_password = var.grafana_db_password })]
}
# To unlabel all:
# kubectl label deployment,statefulset,daemonset --all-namespaces -l tier tier-
resource "kubernetes_manifest" "mutate_tier_from_namespace" {
manifest = {
apiVersion = "kyverno.io/v1"
kind = "ClusterPolicy"
metadata = {
name = "sync-tier-label-from-namespace"
}
spec = {
rules = [
{
name = "lookup-and-add-tier"
match = {
any = [
{
resources = {
kinds = ["Deployment", "StatefulSet", "DaemonSet"]
}
}
]
}
exclude = {
any = [
{
resources = {
namespaces = ["kube-system", "metallb-system", "n8n"]
}
}
]
}
# Context allows us to perform an API call to get Namespace metadata
context = [
{
name = "namespaceLabel"
apiCall = {
urlPath = "/api/v1/namespaces/{{request.namespace}}"
jmesPath = "metadata.labels.tier || 'default'"
}
}
]
mutate = {
patchStrategicMerge = {
metadata = {
labels = {
# Injects the variable discovered in the context above
"+(tier)" = "{{namespaceLabel}}"
}
}
}
}
}
]
}
}
}
# resource "kubernetes_manifest" "enforce_pod_tier_label" {
# manifest = {
# apiVersion = "kyverno.io/v1"
# kind = "ClusterPolicy"
# metadata = {
# name = "enforce-pod-tier-label"
# annotations = {
# "policies.kyverno.io/description" = "Rejects any pod that does not have a tier label."
# }
# }
# spec = {
# # 'Enforce' blocks the creation. 'Audit' just reports it.
# validationFailureAction = "Enforce"
# background = true
# rules = [
# {
# name = "check-for-tier-label"
# match = {
# any = [
# {
# resources = {
# kinds = ["Pod"]
# }
# }
# ]
# }
# validate = {
# message = "The label 'tier' is required for all pods in this cluster."
# pattern = {
# metadata = {
# labels = {
# "tier" = "?*" # The "?*" syntax means the value must not be empty
# }
# }
# }
# }
# }
# ]
# }
# }
# }

View file

@ -0,0 +1,809 @@
# =============================================================================
# Tier-Based Resource Governance
# =============================================================================
# Four layers of protection against noisy neighbor issues:
# 1. PriorityClasses - critical services survive resource pressure
# 2. LimitRange defaults (Kyverno generate) - auto-inject defaults for containers without resources
# 3. ResourceQuotas (Kyverno generate) - hard ceiling on namespace resource consumption
# 4. Priority injection (Kyverno mutate) - set priorityClassName based on namespace tier label
# -----------------------------------------------------------------------------
# Layer 1: PriorityClasses
# -----------------------------------------------------------------------------
# Values stay well below system-cluster-critical (2,000,000,000)
resource "kubernetes_priority_class" "tier_0_core" {
metadata {
name = "tier-0-core"
}
value = 1000000
global_default = false
preemption_policy = "PreemptLowerPriority"
description = "Critical infrastructure: ingress, DNS, VPN, auth, monitoring"
}
resource "kubernetes_priority_class" "tier_1_cluster" {
metadata {
name = "tier-1-cluster"
}
value = 800000
global_default = false
preemption_policy = "PreemptLowerPriority"
description = "Cluster services: Redis, metrics, security"
}
resource "kubernetes_priority_class" "tier_2_gpu" {
metadata {
name = "tier-2-gpu"
}
value = 600000
global_default = false
preemption_policy = "PreemptLowerPriority"
description = "GPU workloads: Immich, Ollama, Frigate"
}
resource "kubernetes_priority_class" "tier_3_edge" {
metadata {
name = "tier-3-edge"
}
value = 400000
global_default = false
preemption_policy = "PreemptLowerPriority"
description = "User-facing services: mail, file sync, dashboards"
}
resource "kubernetes_priority_class" "tier_4_aux" {
metadata {
name = "tier-4-aux"
}
value = 200000
global_default = false
preemption_policy = "Never"
description = "Optional services: blogs, tools, experiments. Will not preempt other aux services."
}
# -----------------------------------------------------------------------------
# Layer 2: LimitRange Defaults (Kyverno Generate)
# -----------------------------------------------------------------------------
# Creates a LimitRange in each namespace based on its tier label.
# Only affects containers WITHOUT explicit resource requests/limits.
resource "kubernetes_manifest" "generate_limitrange_by_tier" {
manifest = {
apiVersion = "kyverno.io/v1"
kind = "ClusterPolicy"
metadata = {
name = "generate-limitrange-by-tier"
annotations = {
"policies.kyverno.io/title" = "Generate LimitRange by Tier"
"policies.kyverno.io/description" = "Creates tier-appropriate LimitRange defaults in namespaces based on their tier label. Only affects containers without explicit resource specifications."
}
}
spec = {
generateExisting = true
rules = [
# Tier 0-core
{
name = "limitrange-tier-0-core"
match = {
any = [
{
resources = {
kinds = ["Namespace"]
selector = {
matchLabels = {
tier = "0-core"
}
}
}
}
]
}
generate = {
synchronize = true
apiVersion = "v1"
kind = "LimitRange"
name = "tier-defaults"
namespace = "{{request.object.metadata.name}}"
data = {
spec = {
limits = [
{
type = "Container"
default = {
cpu = "2"
memory = "4Gi"
}
defaultRequest = {
cpu = "100m"
memory = "128Mi"
}
max = {
cpu = "8"
memory = "16Gi"
}
}
]
}
}
}
},
# Tier 1-cluster
{
name = "limitrange-tier-1-cluster"
match = {
any = [
{
resources = {
kinds = ["Namespace"]
selector = {
matchLabels = {
tier = "1-cluster"
}
}
}
}
]
}
generate = {
synchronize = true
apiVersion = "v1"
kind = "LimitRange"
name = "tier-defaults"
namespace = "{{request.object.metadata.name}}"
data = {
spec = {
limits = [
{
type = "Container"
default = {
cpu = "2"
memory = "4Gi"
}
defaultRequest = {
cpu = "100m"
memory = "128Mi"
}
max = {
cpu = "4"
memory = "8Gi"
}
}
]
}
}
}
},
# Tier 2-gpu
{
name = "limitrange-tier-2-gpu"
match = {
any = [
{
resources = {
kinds = ["Namespace"]
selector = {
matchLabels = {
tier = "2-gpu"
}
}
}
}
]
}
generate = {
synchronize = true
apiVersion = "v1"
kind = "LimitRange"
name = "tier-defaults"
namespace = "{{request.object.metadata.name}}"
data = {
spec = {
limits = [
{
type = "Container"
default = {
cpu = "4"
memory = "8Gi"
}
defaultRequest = {
cpu = "100m"
memory = "256Mi"
}
max = {
cpu = "8"
memory = "16Gi"
}
}
]
}
}
}
},
# Tier 3-edge
{
name = "limitrange-tier-3-edge"
match = {
any = [
{
resources = {
kinds = ["Namespace"]
selector = {
matchLabels = {
tier = "3-edge"
}
}
}
}
]
}
generate = {
synchronize = true
apiVersion = "v1"
kind = "LimitRange"
name = "tier-defaults"
namespace = "{{request.object.metadata.name}}"
data = {
spec = {
limits = [
{
type = "Container"
default = {
cpu = "1"
memory = "2Gi"
}
defaultRequest = {
cpu = "50m"
memory = "128Mi"
}
max = {
cpu = "4"
memory = "8Gi"
}
}
]
}
}
}
},
# Tier 4-aux
{
name = "limitrange-tier-4-aux"
match = {
any = [
{
resources = {
kinds = ["Namespace"]
selector = {
matchLabels = {
tier = "4-aux"
}
}
}
}
]
}
generate = {
synchronize = true
apiVersion = "v1"
kind = "LimitRange"
name = "tier-defaults"
namespace = "{{request.object.metadata.name}}"
data = {
spec = {
limits = [
{
type = "Container"
default = {
cpu = "500m"
memory = "1Gi"
}
defaultRequest = {
cpu = "25m"
memory = "64Mi"
}
max = {
cpu = "2"
memory = "4Gi"
}
}
]
}
}
}
},
# Fallback: namespaces without a tier label get aux-level defaults
{
name = "limitrange-no-tier-fallback"
match = {
any = [
{
resources = {
kinds = ["Namespace"]
}
}
]
}
exclude = {
any = [
{
resources = {
selector = {
matchExpressions = [
{
key = "tier"
operator = "Exists"
}
]
}
}
},
{
resources = {
namespaces = ["kube-system", "metallb-system", "kyverno", "calico-system", "calico-apiserver"]
}
}
]
}
generate = {
synchronize = true
apiVersion = "v1"
kind = "LimitRange"
name = "tier-defaults"
namespace = "{{request.object.metadata.name}}"
data = {
spec = {
limits = [
{
type = "Container"
default = {
cpu = "500m"
memory = "1Gi"
}
defaultRequest = {
cpu = "25m"
memory = "64Mi"
}
max = {
cpu = "2"
memory = "4Gi"
}
}
]
}
}
}
},
]
}
}
}
# -----------------------------------------------------------------------------
# Layer 3: ResourceQuotas (Kyverno Generate)
# -----------------------------------------------------------------------------
# Creates a ResourceQuota in each namespace based on its tier label.
# Sets hard ceiling on total namespace resource consumption.
# Namespaces with label resource-governance/custom-quota=true are excluded.
#
# IMPORTANT: LimitRange (Layer 2) must exist before ResourceQuota takes effect,
# because ResourceQuota requires all pods to have resource requests set.
resource "kubernetes_manifest" "generate_resourcequota_by_tier" {
depends_on = [kubernetes_manifest.generate_limitrange_by_tier]
manifest = {
apiVersion = "kyverno.io/v1"
kind = "ClusterPolicy"
metadata = {
name = "generate-resourcequota-by-tier"
annotations = {
"policies.kyverno.io/title" = "Generate ResourceQuota by Tier"
"policies.kyverno.io/description" = "Creates tier-appropriate ResourceQuota in namespaces based on their tier label. Excludes namespaces with resource-governance/custom-quota label."
}
}
spec = {
generateExisting = true
rules = [
# Tier 0-core
{
name = "quota-tier-0-core"
match = {
any = [
{
resources = {
kinds = ["Namespace"]
selector = {
matchLabels = {
tier = "0-core"
}
}
}
}
]
}
exclude = {
any = [
{
resources = {
selector = {
matchLabels = {
"resource-governance/custom-quota" = "true"
}
}
}
}
]
}
generate = {
synchronize = true
apiVersion = "v1"
kind = "ResourceQuota"
name = "tier-quota"
namespace = "{{request.object.metadata.name}}"
data = {
spec = {
hard = {
"requests.cpu" = "8"
"requests.memory" = "8Gi"
"limits.cpu" = "32"
"limits.memory" = "64Gi"
pods = "100"
}
}
}
}
},
# Tier 1-cluster
{
name = "quota-tier-1-cluster"
match = {
any = [
{
resources = {
kinds = ["Namespace"]
selector = {
matchLabels = {
tier = "1-cluster"
}
}
}
}
]
}
exclude = {
any = [
{
resources = {
selector = {
matchLabels = {
"resource-governance/custom-quota" = "true"
}
}
}
}
]
}
generate = {
synchronize = true
apiVersion = "v1"
kind = "ResourceQuota"
name = "tier-quota"
namespace = "{{request.object.metadata.name}}"
data = {
spec = {
hard = {
"requests.cpu" = "4"
"requests.memory" = "4Gi"
"limits.cpu" = "16"
"limits.memory" = "32Gi"
pods = "30"
}
}
}
}
},
# Tier 2-gpu
{
name = "quota-tier-2-gpu"
match = {
any = [
{
resources = {
kinds = ["Namespace"]
selector = {
matchLabels = {
tier = "2-gpu"
}
}
}
}
]
}
exclude = {
any = [
{
resources = {
selector = {
matchLabels = {
"resource-governance/custom-quota" = "true"
}
}
}
}
]
}
generate = {
synchronize = true
apiVersion = "v1"
kind = "ResourceQuota"
name = "tier-quota"
namespace = "{{request.object.metadata.name}}"
data = {
spec = {
hard = {
"requests.cpu" = "8"
"requests.memory" = "8Gi"
"limits.cpu" = "48"
"limits.memory" = "96Gi"
pods = "40"
}
}
}
}
},
# Tier 3-edge
{
name = "quota-tier-3-edge"
match = {
any = [
{
resources = {
kinds = ["Namespace"]
selector = {
matchLabels = {
tier = "3-edge"
}
}
}
}
]
}
exclude = {
any = [
{
resources = {
selector = {
matchLabels = {
"resource-governance/custom-quota" = "true"
}
}
}
}
]
}
generate = {
synchronize = true
apiVersion = "v1"
kind = "ResourceQuota"
name = "tier-quota"
namespace = "{{request.object.metadata.name}}"
data = {
spec = {
hard = {
"requests.cpu" = "4"
"requests.memory" = "4Gi"
"limits.cpu" = "16"
"limits.memory" = "32Gi"
pods = "30"
}
}
}
}
},
# Tier 4-aux
{
name = "quota-tier-4-aux"
match = {
any = [
{
resources = {
kinds = ["Namespace"]
selector = {
matchLabels = {
tier = "4-aux"
}
}
}
}
]
}
exclude = {
any = [
{
resources = {
selector = {
matchLabels = {
"resource-governance/custom-quota" = "true"
}
}
}
}
]
}
generate = {
synchronize = true
apiVersion = "v1"
kind = "ResourceQuota"
name = "tier-quota"
namespace = "{{request.object.metadata.name}}"
data = {
spec = {
hard = {
"requests.cpu" = "2"
"requests.memory" = "2Gi"
"limits.cpu" = "8"
"limits.memory" = "16Gi"
pods = "20"
}
}
}
}
},
]
}
}
}
# -----------------------------------------------------------------------------
# Layer 4: PriorityClassName Injection (Kyverno Mutate)
# -----------------------------------------------------------------------------
# Automatically sets priorityClassName on Pods based on their namespace's tier label.
# Skips pods that already have a priorityClassName set.
resource "kubernetes_manifest" "mutate_priority_from_tier" {
manifest = {
apiVersion = "kyverno.io/v1"
kind = "ClusterPolicy"
metadata = {
name = "inject-priority-class-from-tier"
annotations = {
"policies.kyverno.io/title" = "Inject PriorityClass from Tier"
"policies.kyverno.io/description" = "Sets priorityClassName on Pods based on the namespace tier label. Skips pods that already have a priorityClassName."
}
}
spec = {
rules = [
{
name = "inject-priority-class"
match = {
any = [
{
resources = {
kinds = ["Pod"]
}
}
]
}
exclude = {
any = [
{
resources = {
namespaces = ["kube-system", "metallb-system", "kyverno", "calico-system", "calico-apiserver"]
}
}
]
}
context = [
{
name = "tierLabel"
apiCall = {
urlPath = "/api/v1/namespaces/{{request.namespace}}"
jmesPath = "metadata.labels.tier || ''"
}
}
]
preconditions = {
all = [
{
key = "{{request.object.spec.priorityClassName || ''}}"
operator = "Equals"
value = ""
},
{
key = "{{tierLabel}}"
operator = "NotEquals"
value = ""
}
]
}
mutate = {
patchesJson6902 = yamlencode([
{
op = "remove"
path = "/spec/priority"
},
{
op = "remove"
path = "/spec/preemptionPolicy"
},
{
op = "add"
path = "/spec/priorityClassName"
value = "tier-{{tierLabel}}"
}
])
}
}
]
}
}
}
# --- ndots:2 injection ---
# Kubernetes defaults to ndots:5, which causes 4 wasted NxDomain queries per
# external DNS lookup (search domain expansion). This policy injects ndots:2
# on all pods to reduce NxDomain flood while still allowing short-name service
# resolution (e.g. "redis.redis" has 1 dot, so it still expands).
resource "kubernetes_manifest" "mutate_ndots" {
manifest = {
apiVersion = "kyverno.io/v1"
kind = "ClusterPolicy"
metadata = {
name = "inject-ndots"
annotations = {
"policies.kyverno.io/title" = "Inject ndots:2 DNS Config"
"policies.kyverno.io/description" = "Sets ndots:2 on all Pods to reduce NxDomain query flood from search domain expansion. Skips pods that already have ndots configured."
}
}
spec = {
rules = [
{
name = "inject-ndots-2"
match = {
any = [
{
resources = {
kinds = ["Pod"]
}
}
]
}
exclude = {
any = [
{
resources = {
namespaces = ["kube-system", "metallb-system", "kyverno", "calico-system", "calico-apiserver"]
}
}
]
}
preconditions = {
all = [
{
key = "{{ request.object.spec.dnsConfig.options || `[]` | [?name == 'ndots'] | length(@) }}"
operator = "Equals"
value = "0"
}
]
}
mutate = {
patchStrategicMerge = {
spec = {
dnsConfig = {
options = [
{
name = "ndots"
value = "2"
}
]
}
}
}
}
}
]
}
}
}

View file

@ -0,0 +1,5 @@
firmly-gerardo-generated@viktorbarzin.me me@viktorbarzin.me
closely-keith-generated@viktorbarzin.me vbarzin@gmail.com
literally-paolo-generated@viktorbarzin.me viktorbarzin@fb.com
hastily-stefanie-generated@viktorbarzin.me elliestamenova@gmail.com
vaultwarden@viktorbarzin.me me@viktorbarzin.me

View file

@ -0,0 +1,444 @@
variable "tls_secret_name" {}
variable "tier" { type = string }
variable "mailserver_accounts" {}
variable "postfix_account_aliases" {}
variable "opendkim_key" {}
variable "sasl_passwd" {} # For sendgrid i.e relayhost
resource "kubernetes_namespace" "mailserver" {
metadata {
name = "mailserver"
labels = {
tier = var.tier
}
# connecting via localhost does not seem to work?
# labels = {
# "istio-injection" : "enabled"
# }
}
}
module "tls_secret" {
source = "../../../../modules/kubernetes/setup_tls_secret"
namespace = kubernetes_namespace.mailserver.metadata[0].name
tls_secret_name = var.tls_secret_name
}
resource "kubernetes_config_map" "mailserver_env_config" {
metadata {
name = "mailserver.env.config"
namespace = kubernetes_namespace.mailserver.metadata[0].name
labels = {
app = "mailserver"
}
annotations = {
"reloader.stakater.com/match" = "true"
}
}
data = {
DMS_DEBUG = "0"
# LOG_LEVEL = "debug"
ENABLE_CLAMAV = "0"
ENABLE_AMAVIS = "0"
ENABLE_FAIL2BAN = "0"
ENABLE_FETCHMAIL = "0"
ENABLE_POSTGREY = "0"
ENABLE_SASLAUTHD = "0"
ENABLE_SPAMASSASSIN = "0"
ENABLE_SRS = "1"
FETCHMAIL_POLL = "120"
ONE_DIR = "1"
OVERRIDE_HOSTNAME = "mail.viktorbarzin.me"
POSTFIX_MESSAGE_SIZE_LIMIT = 1024 * 1024 * 200 # 200 MB
POSTFIX_REJECT_UNKNOWN_CLIENT_HOSTNAME = "1"
# TLS_LEVEL = "intermediate"
# DEFAULT_RELAY_HOST = "[smtp.sendgrid.net]:587"
DEFAULT_RELAY_HOST = "[smtp.eu.mailgun.org]:587"
SPOOF_PROTECTION = "1"
SSL_TYPE = "manual"
SSL_CERT_PATH = "/tmp/ssl/tls.crt"
SSL_KEY_PATH = "/tmp/ssl/tls.key"
}
}
resource "kubernetes_config_map" "mailserver_config" {
metadata {
name = "mailserver.config"
namespace = kubernetes_namespace.mailserver.metadata[0].name
labels = {
app = "mailserver"
}
annotations = {
"reloader.stakater.com/match" = "true"
}
}
data = {
# Actual mail settings
"postfix-accounts.cf" = join("\n", [for user, pass in var.mailserver_accounts : "${user}|${bcrypt(pass, 6)}"])
"postfix-main.cf" = var.postfix_cf
"postfix-virtual.cf" = format("%s%s", var.postfix_account_aliases, file("${path.module}/extra/aliases.txt"))
KeyTable = "mail._domainkey.viktorbarzin.me viktorbarzin.me:mail:/etc/opendkim/keys/viktorbarzin.me-mail.key\n"
SigningTable = "*@viktorbarzin.me mail._domainkey.viktorbarzin.me\n"
TrustedHosts = "127.0.0.1\nlocalhost\n"
"sasl_passwd" = var.sasl_passwd
fail2ban_conf = <<-EOF
[DEFAULT]
#logtarget = /var/log/fail2ban.log
logtarget = SYSOUT
EOF
}
# Password hashes are different each time and avoid changing secret constantly.
# Either 1.Create consistent hashes or 2.Find a way to ignore_changes on per password
lifecycle {
ignore_changes = [data["postfix-accounts.cf"]]
}
}
# resource "kubernetes_config_map" "user_patches" {
# metadata {
# name = "user-patches"
# namespace = kubernetes_namespace.mailserver.metadata[0].name
# labels = {
# "app" = "mailserver"
# }
# }
# data = {
# user_patches = <<EOF
# #!/bin/bash
# cp -f /tmp/dovecot.key /etc/dovecot/ssl/dovecot.key
# cp -f /tmp/dovecot.crt /etc/dovecot/ssl/dovecot.pem
# EOF
# }
# }
resource "kubernetes_secret" "opendkim_key" {
metadata {
name = "mailserver.opendkim.key"
namespace = kubernetes_namespace.mailserver.metadata[0].name
labels = {
"app" = "mailserver"
}
}
type = "Opaque"
data = {
"viktorbarzin.me-mail.key" = var.opendkim_key
}
}
resource "kubernetes_deployment" "mailserver" {
metadata {
name = "mailserver"
namespace = kubernetes_namespace.mailserver.metadata[0].name
labels = {
"app" = "mailserver"
tier = var.tier
}
annotations = {
"reloader.stakater.com/search" = "true"
}
}
spec {
replicas = "1"
strategy {
type = "Recreate"
}
selector {
match_labels = {
"app" = "mailserver"
}
}
template {
metadata {
annotations = {
# "diun.enable" = "true"
}
labels = {
"app" = "mailserver"
"role" = "mail"
}
}
spec {
container {
name = "docker-mailserver"
image = "docker.io/mailserver/docker-mailserver:15.0.0"
image_pull_policy = "IfNotPresent"
security_context {
capabilities {
add = ["NET_ADMIN"]
}
}
lifecycle {
post_start {
exec {
command = [
"postmap",
"/etc/postfix/sasl/passwd"
# "/bin/sh",
# "-c",
# "cp -f /tmp/user-patches.sh /tmp/docker-mailserver/user-patches.sh && chown root:root /var/log/mail && chmod 755 /var/log/mail",
]
}
}
}
volume_mount {
name = "config-tls"
mount_path = "/tmp/ssl/tls.key"
sub_path = "tls.key"
read_only = true
}
volume_mount {
name = "config-tls"
mount_path = "/tmp/ssl/tls.crt"
sub_path = "tls.crt"
read_only = true
}
volume_mount {
name = "config"
mount_path = "/tmp/docker-mailserver/postfix-accounts.cf"
sub_path = "postfix-accounts.cf"
read_only = true
}
volume_mount {
name = "config"
mount_path = "/tmp/docker-mailserver/postfix-main.cf"
sub_path = "postfix-main.cf"
read_only = true
}
volume_mount {
name = "config"
mount_path = "/tmp/docker-mailserver/postfix-virtual.cf"
sub_path = "postfix-virtual.cf"
read_only = true
}
volume_mount {
name = "config"
mount_path = "/tmp/docker-mailserver/fetchmail.cf"
sub_path = "fetchmail.cf"
read_only = true
}
# volume_mount {
# name = "config"
# mount_path = "/tmp/docker-mailserver/dovecot.cf"
# sub_path = "dovecot.cf"
# read_only = true
# }
# volume_mount {
# name = "user-patches"
# mount_path = "/tmp/user-patches.sh"
# sub_path = "user-patches.sh"
# read_only = true
# }
volume_mount {
name = "config"
mount_path = "/tmp/docker-mailserver/opendkim/SigningTable"
sub_path = "SigningTable"
read_only = true
}
volume_mount {
name = "config"
mount_path = "/tmp/docker-mailserver/opendkim/KeyTable"
sub_path = "KeyTable"
read_only = true
}
volume_mount {
name = "config"
mount_path = "/tmp/docker-mailserver/opendkim/TrustedHosts"
sub_path = "TrustedHosts"
read_only = true
}
volume_mount {
name = "opendkim-key"
mount_path = "/tmp/docker-mailserver/opendkim/keys"
read_only = true
}
volume_mount {
name = "data"
mount_path = "/var/mail"
sub_path = "data"
}
volume_mount {
name = "data"
mount_path = "/var/mail-state"
sub_path = "state"
}
volume_mount {
name = "data"
mount_path = "/var/log/mail"
sub_path = "log"
}
volume_mount {
name = "var-run-dovecot"
mount_path = "/var/run/dovecot"
}
volume_mount {
name = "config"
mount_path = "/etc/postfix/sasl/passwd"
sub_path = "sasl_passwd"
read_only = true
}
volume_mount {
name = "config"
mount_path = "/etc/fail2ban/fail2ban.local"
sub_path = "fail2ban_conf"
read_only = true
}
port {
name = "smtp"
container_port = 25
protocol = "TCP"
}
port {
name = "smtp-secure"
container_port = 465
protocol = "TCP"
}
port {
name = "smtp-auth"
container_port = 587
protocol = "TCP"
}
port {
name = "imap-secure"
container_port = 993
protocol = "TCP"
}
env_from {
config_map_ref {
name = "mailserver.env.config"
}
}
}
container {
name = "dovecot-exporter"
image = "viktorbarzin/dovecot_exporter:latest"
command = [
"/dovecot_exporter/exporter",
"--dovecot.socket-path=/var/run/dovecot/stats-reader"
]
image_pull_policy = "IfNotPresent"
port {
name = "dovecotexporter"
container_port = 9166
protocol = "TCP"
}
volume_mount {
name = "var-run-dovecot"
mount_path = "/var/run/dovecot"
}
}
volume {
name = "config"
config_map {
name = "mailserver.config"
}
}
volume {
name = "config-tls"
secret {
secret_name = var.tls_secret_name
}
}
volume {
name = "opendkim-key"
secret {
secret_name = "mailserver.opendkim.key"
}
}
volume {
name = "data"
nfs {
path = "/mnt/main/mailserver"
server = "10.0.10.15"
}
# iscsi {
# target_portal = "iscsi.viktorbarzin.lan:3260"
# iqn = "iqn.2020-12.lan.viktorbarzin:storage:mailserver"
# lun = 0
# fs_type = "ext4"
# }
}
# volume {
# name = "user-patches"
# config_map {
# name = "user-patches"
# }
# }
volume {
name = "var-run-dovecot"
empty_dir {}
}
}
}
}
}
resource "kubernetes_service" "mailserver" {
metadata {
name = "mailserver"
namespace = kubernetes_namespace.mailserver.metadata[0].name
labels = {
app = "mailserver"
}
annotations = {
"metallb.universe.tf/allow-shared-ip" = "shared"
}
}
spec {
type = "LoadBalancer"
# external_traffic_policy = "Cluster"
external_traffic_policy = "Local"
selector = {
app = "mailserver"
}
port {
name = "smtp"
protocol = "TCP"
port = 25
target_port = "smtp"
}
port {
name = "smtp-secure"
protocol = "TCP"
port = 465
target_port = "smtp-secure"
}
port {
name = "smtp-auth"
protocol = "TCP"
port = 587
target_port = "smtp-auth"
}
port {
name = "imap-secure"
protocol = "TCP"
port = 993
target_port = "imap-secure"
}
port {
name = "roundcube"
protocol = "TCP"
port = 80
}
}
}

View file

@ -0,0 +1,196 @@
variable "roundcube_db_password" { type = string }
# If you want to override settings mount this in /var/roundcube/config
# more info in https://github.com/roundcube/roundcubemail-docker?tab=readme-ov-file
# resource "kubernetes_config_map" "roundcubemail_config" {
# metadata {
# name = "roundcubemail.config"
# namespace = "mailserver"
# labels = {
# app = "mailserver"
# }
# annotations = {
# "reloader.stakater.com/match" = "true"
# }
# }
# data = {
# # if you want to override things see https://github.com/roundcube/roundcubemail/blob/master/config/defaults.inc.php
# "imap.php" = <<-EOF
# <?php
# $config['imap_host'] = 'ssl://mail.viktorbarzin.me:993';
# ?>
# EOF
# }
# }
resource "kubernetes_deployment" "roundcubemail" {
metadata {
name = "roundcubemail"
namespace = "mailserver"
labels = {
"app" = "roundcubemail"
tier = var.tier
}
annotations = {
"reloader.stakater.com/search" = "true"
}
}
spec {
replicas = "1"
strategy {
type = "RollingUpdate"
}
selector {
match_labels = {
"app" = "roundcubemail"
}
}
template {
metadata {
labels = {
"app" = "roundcubemail"
}
}
spec {
container {
name = "roundcube"
image = "roundcube/roundcubemail:latest"
# Uncomment me to mount additional settings
# volume_mount {
# name = "imap-config"
# mount_path = "/var/roundcube/config/imap.php"
# sub_path = "imap.php"
# }
env {
name = "ROUNDCUBEMAIL_DEFAULT_HOST"
value = "ssl://mail.viktorbarzin.me" # tls cert must be valid!
}
env {
name = "ROUNDCUBEMAIL_DEFAULT_PORT"
value = "993"
}
env {
name = "ROUNDCUBEMAIL_SMTP_SERVER"
value = "tls://mail.viktorbarzin.me" # tls cert must be valid!
}
env {
name = "ROUNDCUBEMAIL_SMTP_PORT"
value = 587
}
# DB Settings
env {
name = "ROUNDCUBEMAIL_DB_TYPE"
value = "mysql"
}
env {
name = "ROUNDCUBEMAIL_DB_HOST"
value = "mysql.dbaas"
}
env {
name = "ROUNDCUBEMAIL_DB_USER"
value = "roundcubemail"
}
env {
name = "ROUNDCUBEMAIL_DB_PASSWORD"
value = var.roundcube_db_password
}
# Plugins
env {
name = "ROUNDCUBEMAIL_COMPOSER_PLUGINS"
value = "mmvi/twofactor_webauthn,texxasrulez/persistent_login,dsoares/rcguard"
}
env {
name = "ROUNDCUBEMAIL_PLUGINS"
value = "attachment_reminder,database_attachments,enigma,twofactor_webauthn,persistent_login,rcguard"
}
env {
name = "ROUNDCUBEMAIL_SMTP_DEBUG"
value = "true"
}
env {
name = "ROUNDCUBEMAIL_DEBUG_LEVEL"
value = "6"
}
env {
name = "ROUNDCUBEMAIL_LOG_DRIVER"
# value = "file"
value = "syslog"
}
port {
name = "web"
container_port = 80
protocol = "TCP"
}
volume_mount {
name = "html"
mount_path = "/var/www/html"
}
volume_mount {
name = "enigma"
mount_path = "/var/roundcube/enigma"
}
}
# volume {
# name = "imap-config"
# config_map {
# name = "roundcubemail.config"
# }
# }
volume {
name = "html"
nfs {
path = "/mnt/main/roundcubemail/html"
server = "10.0.10.15"
}
}
volume {
name = "enigma"
nfs {
path = "/mnt/main/roundcubemail/enigma"
server = "10.0.10.15"
}
}
}
}
}
}
resource "kubernetes_service" "roundcubemail" {
metadata {
name = "roundcubemail"
namespace = "mailserver"
labels = {
app = "roundcubemail"
}
}
spec {
selector = {
app = "roundcubemail"
}
port {
name = "roundcube"
protocol = "TCP"
port = 80
}
}
}
module "ingress" {
source = "../../../../modules/kubernetes/ingress_factory"
namespace = "mailserver"
name = "mail"
service_name = "roundcubemail"
tls_secret_name = var.tls_secret_name
rybbit_site_id = "082f164faa7d"
}

View file

@ -0,0 +1,158 @@
# this is appended and merged to the main postfix.cf
# see defaults - https://github.com/docker-mailserver/docker-mailserver/blob/master/target/postfix/main.cf
variable "postfix_cf" {
default = <<EOT
#relayhost = [smtp.sendgrid.net]:587
relayhost = [smtp.eu.mailgun.org]:587
smtp_sasl_auth_enable = yes
smtp_sasl_password_maps = hash:/etc/postfix/sasl/passwd
smtp_sasl_security_options = noanonymous
smtp_sasl_tls_security_options = noanonymous
smtp_tls_security_level = encrypt
smtpd_tls_cert_file=/tmp/ssl/tls.crt
smtpd_tls_key_file=/tmp/ssl/tls.key
smtpd_use_tls=yes
header_size_limit = 4096000
# Debug mail tls
smtpd_tls_loglevel = 1
#smtpd_tls_ciphers = TLS_AES_128_GCM_SHA256:TLS_AES_256_GCM_SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-AES128-SHA256:ECDHE-RSA-AES128-SHA256:ECDHE-RSA-AES256-SHA384:ECDHE-ECDSA-AES256-SHA384:DHE-RSA-AES128-SHA256:DHE-RSA-AES256-SHA256:!aNULL:!SEED:!CAMELLIA:!RSA+AES:!SHA1
#tls_medium_cipherlist = ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-AES128-SHA256:ECDHE-RSA-AES128-SHA256:ECDHE-RSA-AES256-SHA384:ECDHE-ECDSA-AES256-SHA384:DHE-RSA-AES128-SHA256:DHE-RSA-AES256-SHA256:!aNULL:!SEED:!CAMELLIA:!RSA+AES:!SHA1
EOT
}
variable "postfix_cf_reference_DO_NOT_USE" {
default = <<EOT
# See /usr/share/postfix/main.cf.dist for a commented, more complete version
smtpd_banner = $myhostname ESMTP $mail_name (Debian)
biff = no
append_dot_mydomain = no
readme_directory = no
# Basic configuration
# myhostname =
alias_maps = hash:/etc/aliases
alias_database = hash:/etc/aliases
mydestination = $myhostname, localhost.$mydomain, localhost
mynetworks = 127.0.0.0/8 [::1]/128 [fe80::]/64
mailbox_size_limit = 0
recipient_delimiter = +
inet_interfaces = all
inet_protocols = ipv4
# TLS parameters
smtpd_tls_cert_file=/tmp/ssl/tls.crt
smtpd_tls_key_file=/tmp/ssl/tls.key
#smtpd_tls_CAfile=
#smtp_tls_CAfile=
smtpd_tls_security_level = may
smtpd_use_tls=yes
smtpd_tls_loglevel = 1
smtp_tls_loglevel = 1
tls_ssl_options = NO_COMPRESSION
tls_high_cipherlist = ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-AES128-SHA256:ECDHE-RSA-AES128-SHA256:ECDHE-ECDSA-AES128-SHA:ECDHE-RSA-AES256-SHA384:ECDHE-RSA-AES128-SHA:ECDHE-ECDSA-AES256-SHA384:ECDHE-ECDSA-AES256-SHA:ECDHE-RSA-AES256-SHA:DHE-RSA-AES128-SHA256:DHE-RSA-AES128-SHA:DHE-RSA-AES256-SHA256:DHE-RSA-AES256-SHA:ECDHE-ECDSA-DES-CBC3-SHA:ECDHE-RSA-DES-CBC3-SHA:EDH-RSA-DES-CBC3-SHA:AES128-GCM-SHA256:AES256-GCM-SHA384:AES128-SHA256:AES256-SHA256:AES128-SHA:AES256-SHA:DES-CBC3-SHA:!DSS
tls_preempt_cipherlist = yes
smtpd_tls_protocols = !SSLv2,!SSLv3
smtp_tls_protocols = !SSLv2,!SSLv3
smtpd_tls_mandatory_ciphers = high
smtpd_tls_mandatory_protocols = !SSLv2,!SSLv3
smtpd_tls_exclude_ciphers = aNULL, LOW, EXP, MEDIUM, ADH, AECDH, MD5, DSS, ECDSA, CAMELLIA128, 3DES, CAMELLIA256, RSA+AES, eNULL
smtpd_tls_dh1024_param_file = /etc/postfix/dhparams.pem
smtpd_tls_CApath = /etc/ssl/certs
smtp_tls_CApath = /etc/ssl/certs
# Settings to prevent SPAM early
smtpd_helo_required = yes
smtpd_delay_reject = yes
smtpd_helo_restrictions = permit_mynetworks, reject_invalid_helo_hostname, permit
#smtpd_relay_restrictions = permit_mynetworks permit_sasl_authenticated defer_unauth_destination
#smtpd_relay_restrictions = reject_sender_login_mismatch permit_sasl_authenticated permit_mynetworks defer_unauth_destination
smtpd_relay_restrictions = reject_sender_login_mismatch permit_sasl_authenticated permit_mynetworks defer_unauth_destination
smtpd_recipient_restrictions = permit_sasl_authenticated, reject_unauth_destination, reject_unauth_pipelining, reject_invalid_helo_hostname, reject_non_fqdn_helo_hostname, reject_unknown_recipient_domain, reject_rbl_client bl.spamcop.net, permit_mynetworks
smtpd_client_restrictions = permit_mynetworks, permit_sasl_authenticated, reject_unauth_destination, reject_unauth_pipelining
#smtpd_sender_restrictions = reject_sender_login_mismatch, permit_sasl_authenticated, permit_mynetworks, reject_unknown_sender_domain
smtpd_sender_restrictions = reject_sender_login_mismatch, reject_authenticated_sender_login_mismatch, reject_unknown_sender_domain, permit_sasl_authenticated, permit_mynetworks
disable_vrfy_command = yes
# Postscreen settings to drop zombies/open relays/spam early
#postscreen_dnsbl_action = enforce
postscreen_dnsbl_action = ignore
postscreen_dnsbl_sites = zen.spamhaus.org*2
bl.mailspike.net
b.barracudacentral.org*2
bl.spameatingmonkey.net
bl.spamcop.net
dnsbl.sorbs.net
psbl.surriel.com
list.dnswl.org=127.0.[0..255].0*-2
list.dnswl.org=127.0.[0..255].1*-3
list.dnswl.org=127.0.[0..255].[2..3]*-4
postscreen_dnsbl_threshold = 3
postscreen_dnsbl_whitelist_threshold = -1
postscreen_greet_action = enforce
postscreen_bare_newline_action = enforce
# SASL
smtpd_sasl_auth_enable = no
#smtpd_sasl_auth_enable = yes
##smtpd_sasl_path = /var/spool/postfix/private/auth
#smtpd_sasl_path = /var/spool/postfix/private/smtpd
##smtpd_sasl_type = dovecot
#smtpd_sasl_type = dovecot
##smtpd_sasl_security_options = noanonymous
#smtpd_sasl_security_options = noanonymous
##smtpd_sasl_local_domain = $mydomain
##broken_sasl_auth_clients = yes
#broken_sasl_auth_clients = yes
# SMTP configuration
smtp_sasl_auth_enable = yes
smtp_sasl_password_maps = hash:/etc/postfix/sasl/passwd
smtp_sasl_security_options = noanonymous
smtp_sasl_tls_security_options = noanonymous
smtp_tls_security_level = encrypt
header_size_limit = 4096000
relayhost = [smtp.sendgrid.net]:587
# Mail directory
virtual_transport = lmtp:unix:/var/run/dovecot/lmtp
virtual_mailbox_domains = /etc/postfix/vhost
virtual_mailbox_maps = texthash:/etc/postfix/vmailbox
virtual_alias_maps = texthash:/etc/postfix/virtual
# Additional option for filtering
content_filter = smtp-amavis:[127.0.0.1]:10024
# Milters used by DKIM
milter_protocol = 6
milter_default_action = accept
dkim_milter = inet:localhost:8891
dmarc_milter = inet:localhost:8893
smtpd_milters = $dkim_milter,$dmarc_milter
non_smtpd_milters = $dkim_milter
# SPF policy settings
policyd-spf_time_limit = 3600
# Header checks for content inspection on receiving
header_checks = pcre:/etc/postfix/maps/header_checks.pcre
# Remove unwanted headers that reveail our privacy
smtp_header_checks = pcre:/etc/postfix/maps/sender_header_filter.pcre
myhostname = mail.viktorbarzin.me
mydomain = viktorbarzin.me
smtputf8_enable = no
message_size_limit = 20480000
sender_canonical_maps = tcp:localhost:10001
sender_canonical_classes = envelope_sender
recipient_canonical_maps = tcp:localhost:10002
recipient_canonical_classes = envelope_recipient,header_recipient
compatibility_level = 2
# enable_original_recipient = no # b4 uncommenting see https://serverfault.com/questions/661615/how-to-drop-orig-to-using-postfix-virtual-domains
always_add_missing_headers = yes
anvil_status_update_time = 5s
EOT
}

View file

@ -0,0 +1,40 @@
# Creates namespace and everythin needed
# Do not use until https://github.com/colinwilson/terraform-kubernetes-metallb/issues/5 is solved
# module "metallb" {
# source = "colinwilson/metallb/kubernetes"
# version = "0.1.7"
# }
variable "tier" { type = string }
resource "kubernetes_namespace" "metallb" {
metadata {
name = "metallb-system"
labels = {
app = "metallb"
# "istio-injection" : "disabled"
# tier = var.tier
}
}
}
module "metallb" {
source = "ViktorBarzin/metallb/kubernetes"
version = "0.1.5"
depends_on = [kubernetes_namespace.metallb]
}
resource "kubernetes_config_map" "config" {
metadata {
name = "config"
namespace = kubernetes_namespace.metallb.metadata[0].name
}
data = {
config = <<EOT
address-pools:
- name: default
protocol: layer2
addresses:
- 10.0.20.200-10.0.20.220
EOT
}
}

View file

@ -0,0 +1,29 @@
variable "tls_secret_name" {}
variable "tier" { type = string }
resource "kubernetes_namespace" "metrics-server" {
metadata {
name = "metrics-server"
labels = {
tier = var.tier
}
}
}
module "tls_secret" {
source = "../../../../modules/kubernetes/setup_tls_secret"
namespace = kubernetes_namespace.metrics-server.metadata[0].name
tls_secret_name = var.tls_secret_name
}
resource "helm_release" "metrics-server" {
namespace = kubernetes_namespace.metrics-server.metadata[0].name
create_namespace = false
name = "metrics-server"
atomic = true
repository = "https://kubernetes-sigs.github.io/metrics-server/"
chart = "metrics-server"
values = [templatefile("${path.module}/values.yaml", {})]
}

View file

@ -0,0 +1,2 @@
args:
- "--kubelet-insecure-tls"

View file

@ -0,0 +1,27 @@
# dockerhub: viktorbarzin/redfish-exporter
# repo: https://pkg.go.dev/github.com/jenningsloy318/redfish_exporter#section-readme
FROM golang:rc-bullseye AS builder
LABEL maintainer="Viktor Barzin <me@viktorbarzin.me>"
ARG ARCH=amd64
ENV GOROOT /usr/local/go
ENV GOPATH /go
ENV PATH "$GOROOT/bin:$GOPATH/bin:$PATH"
ENV GO_VERSION 1.15.2
ENV GO111MODULE=on
# Build dependencies
RUN mkdir -p /go/src/github.com/ && \
git clone https://github.com/jenningsloy318/redfish_exporter /go/src/github.com/jenningsloy318/redfish_exporter && \
cd /go/src/github.com/jenningsloy318/redfish_exporter && \
make build
FROM golang:rc-bullseye
COPY --from=builder /go/src/github.com/jenningsloy318/redfish_exporter/build/redfish_exporter /usr/local/bin/redfish_exporter
RUN mkdir /etc/prometheus
# config file mounter at runtime
CMD ["/usr/local/bin/redfish_exporter", "--config.file", "/etc/prometheus/redfish_exporter.yml"]

View file

@ -0,0 +1,131 @@
alloy:
configMap:
content: |-
// Write your Alloy config here:
logging {
level = "info"
format = "logfmt"
}
loki.write "default" {
endpoint {
url = "http://loki.monitoring.svc.cluster.local:3100/loki/api/v1/push"
}
}
// discovery.kubernetes allows you to find scrape targets from Kubernetes resources.
// It watches cluster state and ensures targets are continually synced with what is currently running in your cluster.
discovery.kubernetes "pod" {
role = "pod"
}
// discovery.relabel rewrites the label set of the input targets by applying one or more relabeling rules.
// If no rules are defined, then the input targets are exported as-is.
discovery.relabel "pod_logs" {
targets = discovery.kubernetes.pod.targets
// Label creation - "namespace" field from "__meta_kubernetes_namespace"
rule {
source_labels = ["__meta_kubernetes_namespace"]
action = "replace"
target_label = "namespace"
}
// Label creation - "pod" field from "__meta_kubernetes_pod_name"
rule {
source_labels = ["__meta_kubernetes_pod_name"]
action = "replace"
target_label = "pod"
}
// Label creation - "container" field from "__meta_kubernetes_pod_container_name"
rule {
source_labels = ["__meta_kubernetes_pod_container_name"]
action = "replace"
target_label = "container"
}
// Label creation - "app" field from "__meta_kubernetes_pod_label_app_kubernetes_io_name"
rule {
source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"]
action = "replace"
target_label = "app"
}
// Label creation - "job" field from "__meta_kubernetes_namespace" and "__meta_kubernetes_pod_container_name"
// Concatenate values __meta_kubernetes_namespace/__meta_kubernetes_pod_container_name
rule {
source_labels = ["__meta_kubernetes_namespace", "__meta_kubernetes_pod_container_name"]
action = "replace"
target_label = "job"
separator = "/"
replacement = "$1"
}
// Label creation - "container" field from "__meta_kubernetes_pod_uid" and "__meta_kubernetes_pod_container_name"
// Concatenate values __meta_kubernetes_pod_uid/__meta_kubernetes_pod_container_name.log
rule {
source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"]
action = "replace"
target_label = "__path__"
separator = "/"
replacement = "/var/log/pods/*$1/*.log"
}
// Label creation - "container_runtime" field from "__meta_kubernetes_pod_container_id"
rule {
source_labels = ["__meta_kubernetes_pod_container_id"]
action = "replace"
target_label = "container_runtime"
regex = "^(\\S+):\\/\\/.+$"
replacement = "$1"
}
}
// loki.source.kubernetes tails logs from Kubernetes containers using the Kubernetes API.
loki.source.kubernetes "pod_logs" {
targets = discovery.relabel.pod_logs.output
forward_to = [loki.process.pod_logs.receiver]
}
// loki.process receives log entries from other Loki components, applies one or more processing stages,
// and forwards the results to the list of receivers in the component's arguments.
loki.process "pod_logs" {
stage.static_labels {
values = {
cluster = "default",
}
}
forward_to = [loki.write.default.receiver]
}
// Kubernetes audit log collection from /var/log/kubernetes/audit.log
// Requires alloy.mounts.varlog=true to mount /var/log from the host
local.file_match "audit_logs" {
path_targets = [{
__path__ = "/var/log/kubernetes/audit.log",
job = "kubernetes-audit",
node = env("HOSTNAME"),
}]
}
loki.source.file "audit_logs" {
targets = local.file_match.audit_logs.targets
forward_to = [loki.write.default.receiver]
}
# Mount /var/log from the host for file-based log collection (audit logs)
mounts:
varlog: true
# Resource limits for DaemonSet pods
# Alloy tails logs from all containers on the node via K8s API and batches
# them to Loki. Memory scales with number of active log streams (~30-50 per node).
# 128Mi was OOMKilled; steady-state usage is ~400-450Mi per pod.
resources:
requests:
cpu: 50m
memory: 256Mi
limits:
cpu: 200m
memory: 768Mi

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,204 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": { "type": "datasource", "uid": "grafana" },
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"description": "Kubernetes API server audit logs from Loki",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": 0,
"links": [],
"panels": [
{
"collapsed": false,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 },
"id": 100,
"panels": [],
"title": "Recent Activity",
"type": "row"
},
{
"datasource": { "type": "loki", "uid": "P8E80F9AEF21F6940" },
"description": "Recent Kubernetes API actions from audit logs",
"fieldConfig": {
"defaults": {
"color": { "mode": "thresholds" },
"custom": {
"align": "auto",
"cellOptions": { "type": "auto" },
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [{ "color": "green", "value": null }]
}
},
"overrides": []
},
"gridPos": { "h": 12, "w": 24, "x": 0, "y": 1 },
"id": 1,
"options": {
"cellHeight": "sm",
"footer": { "countRows": false, "fields": "", "reducer": ["sum"], "show": false },
"showHeader": true,
"sortBy": [{ "desc": true, "displayName": "Time" }]
},
"pluginVersion": "12.3.0",
"targets": [
{
"datasource": { "type": "loki", "uid": "P8E80F9AEF21F6940" },
"editorMode": "code",
"expr": "{job=\"kubernetes-audit\"} | json | line_format \"{{.user.username}} {{.verb}} {{.objectRef.resource}} {{.objectRef.namespace}}\"",
"legendFormat": "",
"queryType": "range",
"refId": "A"
}
],
"title": "Recent Actions",
"type": "table"
},
{
"collapsed": false,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 13 },
"id": 101,
"panels": [],
"title": "Request Rates",
"type": "row"
},
{
"datasource": { "type": "loki", "uid": "P8E80F9AEF21F6940" },
"description": "API request count by user over time",
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "off" }
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [{ "color": "green", "value": null }]
},
"unit": "short"
},
"overrides": []
},
"gridPos": { "h": 10, "w": 24, "x": 0, "y": 14 },
"id": 2,
"options": {
"legend": { "calcs": ["sum", "lastNotNull"], "displayMode": "table", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "multi", "sort": "desc" }
},
"pluginVersion": "12.3.0",
"targets": [
{
"datasource": { "type": "loki", "uid": "P8E80F9AEF21F6940" },
"editorMode": "code",
"expr": "sum by (user_username) (count_over_time({job=\"kubernetes-audit\"} | json [5m]))",
"legendFormat": "{{user_username}}",
"queryType": "range",
"refId": "A"
}
],
"title": "Request Count by User",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 24 },
"id": 102,
"panels": [],
"title": "Denied Requests",
"type": "row"
},
{
"datasource": { "type": "loki", "uid": "P8E80F9AEF21F6940" },
"description": "API requests denied with HTTP 403+ status codes",
"fieldConfig": {
"defaults": {
"color": { "mode": "thresholds" },
"custom": {
"align": "auto",
"cellOptions": { "type": "auto" },
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "red", "value": 403 }
]
}
},
"overrides": []
},
"gridPos": { "h": 12, "w": 24, "x": 0, "y": 25 },
"id": 3,
"options": {
"cellHeight": "sm",
"footer": { "countRows": false, "fields": "", "reducer": ["sum"], "show": false },
"showHeader": true,
"sortBy": [{ "desc": true, "displayName": "Time" }]
},
"pluginVersion": "12.3.0",
"targets": [
{
"datasource": { "type": "loki", "uid": "P8E80F9AEF21F6940" },
"editorMode": "code",
"expr": "{job=\"kubernetes-audit\"} | json | responseStatus_code >= 403",
"legendFormat": "",
"queryType": "range",
"refId": "A"
}
],
"title": "Denied Requests (403+)",
"type": "table"
}
],
"preload": false,
"refresh": "30s",
"schemaVersion": 42,
"tags": ["kubernetes", "audit", "security"],
"templating": {
"list": []
},
"time": {
"from": "now-24h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Kubernetes Audit Logs",
"uid": "k8s-audit",
"version": 1
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,288 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"description": "Logs collected from Kubernetes, stored in Loki",
"editable": true,
"fiscalYearStartMonth": 0,
"gnetId": 15141,
"graphTooltip": 0,
"id": 25,
"links": [],
"panels": [
{
"datasource": {
"type": "loki",
"uid": "P8E80F9AEF21F6940"
},
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "bars",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 24,
"x": 0,
"y": 0
},
"id": 4,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": false
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "loki",
"uid": "P8E80F9AEF21F6940"
},
"editorMode": "code",
"expr": "sum(count_over_time({namespace=~\"$namespace\", container =~\"$container\"} |= \"$query\" [$__interval]))",
"instant": false,
"legendFormat": "Log count",
"queryType": "range",
"range": true,
"refId": "A"
}
],
"type": "timeseries"
},
{
"datasource": {
"type": "loki",
"uid": "P8E80F9AEF21F6940"
},
"description": "Logs from services running in Kubernetes",
"gridPos": {
"h": 25,
"w": 24,
"x": 0,
"y": 4
},
"id": 2,
"options": {
"dedupStrategy": "none",
"enableLogDetails": true,
"prettifyLogMessage": false,
"showCommonLabels": false,
"showLabels": false,
"showTime": false,
"sortOrder": "Descending",
"wrapLogMessage": false
},
"targets": [
{
"datasource": {
"type": "loki",
"uid": "P8E80F9AEF21F6940"
},
"editorMode": "code",
"expr": "{namespace=~\"$namespace\", container =~\"$container\"} |= \"$query\"",
"queryType": "range",
"refId": "A"
}
],
"type": "logs"
}
],
"refresh": "5s",
"schemaVersion": 39,
"tags": [],
"templating": {
"list": [
{
"current": {
"selected": false,
"text": "",
"value": ""
},
"description": "String to search for",
"hide": 0,
"label": "Search Query",
"name": "query",
"options": [
{
"selected": true,
"text": "",
"value": ""
}
],
"query": "",
"skipUrlSync": false,
"type": "textbox"
},
{
"allValue": ".+",
"current": {
"selected": true,
"text": [
"dbaas"
],
"value": [
"dbaas"
]
},
"datasource": {
"type": "loki",
"uid": "P8E80F9AEF21F6940"
},
"definition": "label_values(namespace)",
"hide": 0,
"includeAll": true,
"multi": true,
"name": "namespace",
"options": [],
"query": "label_values(namespace)",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"allValue": ".+",
"current": {
"selected": true,
"text": [
"All"
],
"value": [
"$__all"
]
},
"datasource": {
"type": "loki",
"uid": "P8E80F9AEF21F6940"
},
"definition": "label_values(stream)",
"hide": 0,
"includeAll": true,
"multi": true,
"name": "stream",
"options": [],
"query": "label_values(stream)",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"allValue": ".+",
"current": {
"selected": true,
"text": [
"All"
],
"value": [
"$__all"
]
},
"datasource": {
"type": "loki",
"uid": "P8E80F9AEF21F6940"
},
"definition": "label_values(container)",
"hide": 0,
"includeAll": true,
"multi": true,
"name": "container",
"options": [],
"query": "label_values(container)",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
}
]
},
"time": {
"from": "now-5m",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Loki Kubernetes Logs",
"uid": "o6-BGgnnk",
"version": 2,
"weekStart": ""
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,816 @@
{
"annotations": {
"list": [
{
"$$hashKey": "object:192",
"builtIn": 1,
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"description": "This dashboard is to display the metrics from DCGM Exporter on a Kubernetes (1.13+) cluster",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 0,
"links": [],
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "celsius"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 18,
"x": 0,
"y": 0
},
"id": 12,
"options": {
"legend": {
"calcs": [
"mean",
"lastNotNull",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "12.3.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"editorMode": "code",
"expr": "nvidia_tesla_t4_DCGM_FI_DEV_GPU_TEMP",
"instant": false,
"interval": "",
"legendFormat": "GPU 0",
"refId": "A"
}
],
"title": "GPU Temperature",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"max": 100,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "#EAB839",
"value": 70
},
{
"color": "red",
"value": 80
}
]
},
"unit": "celsius"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 6,
"x": 18,
"y": 0
},
"id": 14,
"options": {
"minVizHeight": 75,
"minVizWidth": 75,
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showThresholdLabels": false,
"showThresholdMarkers": true,
"sizing": "auto"
},
"pluginVersion": "12.3.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"editorMode": "code",
"expr": "nvidia_tesla_t4_DCGM_FI_DEV_GPU_TEMP",
"interval": "",
"legendFormat": "",
"range": true,
"refId": "A"
}
],
"title": "GPU Current Temp",
"type": "gauge"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "watt"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 18,
"x": 0,
"y": 8
},
"id": 10,
"options": {
"legend": {
"calcs": [
"mean",
"lastNotNull",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "12.3.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"editorMode": "code",
"expr": "nvidia_tesla_t4_DCGM_FI_DEV_POWER_USAGE",
"interval": "",
"legendFormat": "GPU {{gpu}}",
"range": true,
"refId": "A"
}
],
"title": "GPU Power Usage",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"max": 2400,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "#EAB839",
"value": 1800
},
{
"color": "red",
"value": 2200
}
]
},
"unit": "watt"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 6,
"x": 18,
"y": 8
},
"id": 16,
"options": {
"minVizHeight": 75,
"minVizWidth": 75,
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"sum"
],
"fields": "",
"values": false
},
"showThresholdLabels": false,
"showThresholdMarkers": true,
"sizing": "auto"
},
"pluginVersion": "12.3.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"editorMode": "code",
"exemplar": false,
"expr": "sum(nvidia_tesla_t4_DCGM_FI_DEV_POWER_USAGE)",
"instant": true,
"interval": "",
"legendFormat": "",
"range": false,
"refId": "A"
}
],
"title": "GPU Power Total",
"type": "gauge"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"max": 100,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percent"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 16
},
"id": 6,
"options": {
"legend": {
"calcs": [
"mean",
"lastNotNull",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "12.3.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"editorMode": "code",
"expr": "nvidia_tesla_t4_DCGM_FI_DEV_GPU_UTIL",
"interval": "",
"legendFormat": "GPU {{gpu}}",
"range": true,
"refId": "A"
}
],
"title": "GPU Utilization",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "decmbytes"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 16
},
"id": 18,
"options": {
"legend": {
"calcs": [
"mean",
"max"
],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "12.3.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"editorMode": "code",
"expr": "nvidia_tesla_t4_DCGM_FI_DEV_FB_USED",
"interval": "",
"legendFormat": "GPU {{gpu}}",
"range": true,
"refId": "A"
}
],
"title": "GPU Framebuffer Mem Used",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "hertz"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 24
},
"id": 2,
"options": {
"legend": {
"calcs": [
"mean",
"lastNotNull",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "12.3.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"editorMode": "code",
"expr": "nvidia_tesla_t4_DCGM_FI_DEV_SM_CLOCK* 1000000",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
"legendFormat": "GPU {{gpu}}",
"range": true,
"refId": "A"
}
],
"title": "GPU SM Clocks",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 24
},
"id": 19,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.3.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"editorMode": "code",
"expr": "sum by (namespace) (gpu_pod_memory_used_bytes)",
"instant": false,
"legendFormat": "{{namespace}}",
"range": true,
"refId": "A"
}
],
"title": "GPU Memory per Application",
"type": "timeseries"
}
],
"preload": false,
"refresh": "auto",
"schemaVersion": 42,
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-12h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "NVIDIA DCGM Exporter Dashboard",
"uid": "Oxed_c6Wz",
"version": 9
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,976 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": { "type": "datasource", "uid": "grafana" },
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": 0,
"links": [],
"panels": [
{
"collapsed": false,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 },
"id": 100,
"panels": [],
"title": "Scraping Overview",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"description": "Total listings discovered during scrape runs",
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": { "mode": "absolute", "steps": [{ "color": "blue", "value": null }] },
"unit": "short"
},
"overrides": []
},
"gridPos": { "h": 4, "w": 4, "x": 0, "y": 1 },
"id": 1,
"options": {
"colorMode": "background",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.3.0",
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"editorMode": "code",
"expr": "scrape_listings_found_total{job=\"realestate-crawler-celery\"}",
"legendFormat": "Found",
"range": true,
"refId": "A"
}
],
"title": "Total Listings Found",
"type": "stat"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"description": "Total listings successfully processed",
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] },
"unit": "short"
},
"overrides": []
},
"gridPos": { "h": 4, "w": 4, "x": 4, "y": 1 },
"id": 2,
"options": {
"colorMode": "background",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.3.0",
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"editorMode": "code",
"expr": "scrape_listings_processed_total{job=\"realestate-crawler-celery\"}",
"legendFormat": "Processed",
"range": true,
"refId": "A"
}
],
"title": "Total Listings Processed",
"type": "stat"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"description": "Total listings that failed processing",
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": { "mode": "absolute", "steps": [{ "color": "red", "value": null }] },
"unit": "short"
},
"overrides": []
},
"gridPos": { "h": 4, "w": 4, "x": 8, "y": 1 },
"id": 3,
"options": {
"colorMode": "background",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.3.0",
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"editorMode": "code",
"expr": "scrape_listings_failed_total{job=\"realestate-crawler-celery\"}",
"legendFormat": "Failed",
"range": true,
"refId": "A"
}
],
"title": "Total Listings Failed",
"type": "stat"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"description": "Total API pages fetched during scraping",
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": { "mode": "absolute", "steps": [{ "color": "purple", "value": null }] },
"unit": "short"
},
"overrides": []
},
"gridPos": { "h": 4, "w": 4, "x": 12, "y": 1 },
"id": 4,
"options": {
"colorMode": "background",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.3.0",
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"editorMode": "code",
"expr": "scrape_pages_fetched_total{job=\"realestate-crawler-celery\"}",
"legendFormat": "Pages",
"range": true,
"refId": "A"
}
],
"title": "Total Pages Fetched",
"type": "stat"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"description": "Total subqueries executed after query splitting",
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": { "mode": "absolute", "steps": [{ "color": "orange", "value": null }] },
"unit": "short"
},
"overrides": []
},
"gridPos": { "h": 4, "w": 4, "x": 16, "y": 1 },
"id": 5,
"options": {
"colorMode": "background",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.3.0",
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"editorMode": "code",
"expr": "scrape_subqueries_total{job=\"realestate-crawler-celery\"}",
"legendFormat": "Subqueries",
"range": true,
"refId": "A"
}
],
"title": "Total Subqueries",
"type": "stat"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"description": "Circuit breaker state: 0=closed (healthy), 1=half-open, 2=open (tripped)",
"fieldConfig": {
"defaults": {
"mappings": [
{ "options": { "0": { "color": "green", "text": "Closed" } }, "type": "value" },
{ "options": { "1": { "color": "yellow", "text": "Half-Open" } }, "type": "value" },
{ "options": { "2": { "color": "red", "text": "Open" } }, "type": "value" }
],
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 1 },
{ "color": "red", "value": 2 }
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": { "h": 4, "w": 4, "x": 20, "y": 1 },
"id": 6,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.3.0",
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"editorMode": "code",
"expr": "circuit_breaker_state{job=\"realestate-crawler-celery\"}",
"legendFormat": "State",
"range": true,
"refId": "A"
}
],
"title": "Circuit Breaker",
"type": "stat"
},
{
"collapsed": false,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 5 },
"id": 101,
"panels": [],
"title": "Scraping Activity",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"description": "Rate of listings found, processed, and failed",
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "off" }
},
"mappings": [],
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] },
"unit": "short"
},
"overrides": [
{
"matcher": { "id": "byName", "options": "Failed" },
"properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }]
}
]
},
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 6 },
"id": 10,
"options": {
"legend": { "calcs": ["sum", "lastNotNull"], "displayMode": "table", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "multi", "sort": "desc" }
},
"pluginVersion": "12.3.0",
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"editorMode": "code",
"expr": "increase(scrape_listings_found_total{job=\"realestate-crawler-celery\"}[5m])",
"legendFormat": "Found",
"range": true,
"refId": "A"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"editorMode": "code",
"expr": "increase(scrape_listings_processed_total{job=\"realestate-crawler-celery\"}[5m])",
"legendFormat": "Processed",
"range": true,
"refId": "B"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"editorMode": "code",
"expr": "increase(scrape_listings_failed_total{job=\"realestate-crawler-celery\"}[5m])",
"legendFormat": "Failed",
"range": true,
"refId": "C"
}
],
"title": "Listing Activity (5m increase)",
"type": "timeseries"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"description": "Duration of full scrape runs",
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "auto",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "off" }
},
"mappings": [],
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] },
"unit": "s"
},
"overrides": []
},
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 6 },
"id": 11,
"options": {
"legend": { "calcs": ["mean", "max", "lastNotNull"], "displayMode": "table", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "multi", "sort": "desc" }
},
"pluginVersion": "12.3.0",
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"editorMode": "code",
"expr": "rate(scrape_duration_seconds_sum{job=\"realestate-crawler-celery\"}[5m]) / rate(scrape_duration_seconds_count{job=\"realestate-crawler-celery\"}[5m])",
"legendFormat": "Avg Duration",
"range": true,
"refId": "A"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"editorMode": "code",
"expr": "histogram_quantile(0.95, rate(scrape_duration_seconds_bucket{job=\"realestate-crawler-celery\"}[30m]))",
"legendFormat": "p95 Duration",
"range": true,
"refId": "B"
}
],
"title": "Scrape Duration",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 14 },
"id": 102,
"panels": [],
"title": "Throttling & Errors",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"description": "Throttle events by type: rate_limit, service_unavailable, ip_blocked, slow_response, empty_response, invalid_response",
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "bars",
"fillOpacity": 80,
"gradientMode": "none",
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "normal" },
"thresholdsStyle": { "mode": "off" }
},
"mappings": [],
"min": 0,
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] },
"unit": "short"
},
"overrides": [
{ "matcher": { "id": "byName", "options": "rate_limit" }, "properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }] },
{ "matcher": { "id": "byName", "options": "ip_blocked" }, "properties": [{ "id": "color", "value": { "fixedColor": "dark-red", "mode": "fixed" } }] },
{ "matcher": { "id": "byName", "options": "service_unavailable" }, "properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }] },
{ "matcher": { "id": "byName", "options": "slow_response" }, "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] }
]
},
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 15 },
"id": 20,
"options": {
"legend": { "calcs": ["sum"], "displayMode": "table", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "multi", "sort": "desc" }
},
"pluginVersion": "12.3.0",
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"editorMode": "code",
"expr": "increase(throttle_events_total{job=\"realestate-crawler-celery\"}[5m])",
"legendFormat": "{{ type }}",
"range": true,
"refId": "A"
}
],
"title": "Throttle Events (5m increase)",
"type": "timeseries"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"description": "Circuit breaker state over time: 0=closed (healthy), 1=half-open, 2=open (tripped)",
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 30,
"gradientMode": "none",
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
"insertNulls": false,
"lineInterpolation": "stepAfter",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": {
"mode": "line+area",
"thresholds": [
{ "color": "red", "value": 2 }
]
}
},
"decimals": 0,
"mappings": [
{ "options": { "0": { "text": "Closed" } }, "type": "value" },
{ "options": { "1": { "text": "Half-Open" } }, "type": "value" },
{ "options": { "2": { "text": "Open" } }, "type": "value" }
],
"max": 2,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 1 },
{ "color": "red", "value": 2 }
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 15 },
"id": 21,
"options": {
"legend": { "calcs": ["lastNotNull"], "displayMode": "list", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "single", "sort": "none" }
},
"pluginVersion": "12.3.0",
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"editorMode": "code",
"expr": "circuit_breaker_state{job=\"realestate-crawler-celery\"}",
"legendFormat": "Circuit Breaker",
"range": true,
"refId": "A"
}
],
"title": "Circuit Breaker State",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 23 },
"id": 103,
"panels": [],
"title": "Cache & OCR",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"description": "GeoJSON cache hit ratio",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "red", "value": null },
{ "color": "yellow", "value": 0.5 },
{ "color": "green", "value": 0.8 }
]
},
"max": 1,
"min": 0,
"unit": "percentunit"
},
"overrides": []
},
"gridPos": { "h": 6, "w": 4, "x": 0, "y": 24 },
"id": 30,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.3.0",
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"editorMode": "code",
"expr": "rate(geojson_cache_operations_total{job=\"realestate-crawler-api\",result=\"hit\"}[15m]) / (rate(geojson_cache_operations_total{job=\"realestate-crawler-api\",result=\"hit\"}[15m]) + rate(geojson_cache_operations_total{job=\"realestate-crawler-api\",result=\"miss\"}[15m]))",
"legendFormat": "Hit Rate",
"range": true,
"refId": "A"
}
],
"title": "GeoJSON Cache Hit Rate",
"type": "stat"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"description": "GeoJSON cache hits and misses over time",
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "off" }
},
"mappings": [],
"min": 0,
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] },
"unit": "ops"
},
"overrides": [
{ "matcher": { "id": "byName", "options": "miss" }, "properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }] },
{ "matcher": { "id": "byName", "options": "hit" }, "properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }] }
]
},
"gridPos": { "h": 6, "w": 8, "x": 4, "y": 24 },
"id": 31,
"options": {
"legend": { "calcs": ["sum"], "displayMode": "table", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "multi", "sort": "desc" }
},
"pluginVersion": "12.3.0",
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"editorMode": "code",
"expr": "rate(geojson_cache_operations_total{job=\"realestate-crawler-api\"}[5m])",
"legendFormat": "{{ result }}",
"range": true,
"refId": "A"
}
],
"title": "GeoJSON Cache Operations",
"type": "timeseries"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"description": "OCR detection attempts and successes for floorplan square meter extraction",
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "off" }
},
"mappings": [],
"min": 0,
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] },
"unit": "short"
},
"overrides": []
},
"gridPos": { "h": 6, "w": 12, "x": 12, "y": 24 },
"id": 32,
"options": {
"legend": { "calcs": ["sum", "lastNotNull"], "displayMode": "table", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "multi", "sort": "desc" }
},
"pluginVersion": "12.3.0",
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"editorMode": "code",
"expr": "increase(ocr_attempts_total{job=\"realestate-crawler-celery\"}[5m])",
"legendFormat": "Attempts",
"range": true,
"refId": "A"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"editorMode": "code",
"expr": "increase(ocr_successes_total{job=\"realestate-crawler-celery\"}[5m])",
"legendFormat": "Successes",
"range": true,
"refId": "B"
}
],
"title": "OCR Floorplan Detection (5m increase)",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 30 },
"id": 104,
"panels": [],
"title": "Celery Tasks",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"description": "Celery task completions by task name and status",
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "bars",
"fillOpacity": 80,
"gradientMode": "none",
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "normal" },
"thresholdsStyle": { "mode": "off" }
},
"mappings": [],
"min": 0,
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] },
"unit": "short"
},
"overrides": []
},
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 31 },
"id": 40,
"options": {
"legend": { "calcs": ["sum"], "displayMode": "table", "placement": "right", "showLegend": true },
"tooltip": { "mode": "multi", "sort": "desc" }
},
"pluginVersion": "12.3.0",
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"editorMode": "code",
"expr": "increase(celery_tasks_total{job=\"realestate-crawler-celery\"}[5m])",
"legendFormat": "{{ task_name }} ({{ status }})",
"range": true,
"refId": "A"
}
],
"title": "Celery Tasks by Name & Status (5m increase)",
"type": "timeseries"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"description": "Average and p95 task durations by task name",
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "auto",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "off" }
},
"mappings": [],
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] },
"unit": "s"
},
"overrides": []
},
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 31 },
"id": 41,
"options": {
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "right", "showLegend": true },
"tooltip": { "mode": "multi", "sort": "desc" }
},
"pluginVersion": "12.3.0",
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"editorMode": "code",
"expr": "rate(celery_task_duration_seconds_sum{job=\"realestate-crawler-celery\"}[5m]) / rate(celery_task_duration_seconds_count{job=\"realestate-crawler-celery\"}[5m])",
"legendFormat": "{{ task_name }} avg",
"range": true,
"refId": "A"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"editorMode": "code",
"expr": "histogram_quantile(0.95, rate(celery_task_duration_seconds_bucket{job=\"realestate-crawler-celery\"}[30m]))",
"legendFormat": "{{ task_name }} p95",
"range": true,
"refId": "B"
}
],
"title": "Celery Task Duration",
"type": "timeseries"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"description": "Currently active (in-flight) Celery tasks",
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 30,
"gradientMode": "none",
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
"insertNulls": false,
"lineInterpolation": "stepAfter",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "normal" },
"thresholdsStyle": { "mode": "off" }
},
"mappings": [],
"min": 0,
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] },
"unit": "short"
},
"overrides": []
},
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 39 },
"id": 42,
"options": {
"legend": { "calcs": ["max", "lastNotNull"], "displayMode": "table", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "multi", "sort": "desc" }
},
"pluginVersion": "12.3.0",
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"editorMode": "code",
"expr": "celery_tasks_active{job=\"realestate-crawler-celery\"}",
"legendFormat": "{{ task_name }}",
"range": true,
"refId": "A"
}
],
"title": "Active Celery Tasks",
"type": "timeseries"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"description": "Pages fetched and subqueries executed during scraping",
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "off" }
},
"mappings": [],
"min": 0,
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] },
"unit": "short"
},
"overrides": []
},
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 39 },
"id": 43,
"options": {
"legend": { "calcs": ["sum", "lastNotNull"], "displayMode": "table", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "multi", "sort": "desc" }
},
"pluginVersion": "12.3.0",
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"editorMode": "code",
"expr": "increase(scrape_pages_fetched_total{job=\"realestate-crawler-celery\"}[5m])",
"legendFormat": "Pages Fetched",
"range": true,
"refId": "A"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"editorMode": "code",
"expr": "increase(scrape_subqueries_total{job=\"realestate-crawler-celery\"}[5m])",
"legendFormat": "Subqueries",
"range": true,
"refId": "B"
}
],
"title": "Scraping Pagination (5m increase)",
"type": "timeseries"
}
],
"preload": false,
"refresh": "30s",
"schemaVersion": 42,
"tags": ["realestate-crawler", "celery", "scraping"],
"templating": {
"list": []
},
"time": {
"from": "now-24h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Real Estate Crawler",
"uid": "realestate-crawler",
"version": 1
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,488 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": { "type": "datasource", "uid": "grafana" },
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"description": "Technitium DNS query logs from MySQL",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": null,
"links": [],
"panels": [
{
"title": "Total Queries",
"type": "stat",
"datasource": { "type": "mysql", "uid": "technitium-mysql" },
"gridPos": { "h": 4, "w": 4, "x": 0, "y": 0 },
"fieldConfig": {
"defaults": {
"color": { "mode": "thresholds" },
"thresholds": {
"steps": [
{ "color": "green", "value": null }
]
}
},
"overrides": []
},
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"textMode": "auto",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
},
"targets": [
{
"rawSql": "SELECT COUNT(*) as total_queries FROM dns_logs WHERE $__timeFilter(timestamp)",
"format": "table",
"refId": "A"
}
]
},
{
"title": "Cached %",
"type": "stat",
"datasource": { "type": "mysql", "uid": "technitium-mysql" },
"gridPos": { "h": 4, "w": 4, "x": 4, "y": 0 },
"fieldConfig": {
"defaults": {
"color": { "mode": "thresholds" },
"unit": "percentunit",
"thresholds": {
"steps": [
{ "color": "red", "value": null },
{ "color": "yellow", "value": 0.3 },
{ "color": "green", "value": 0.5 }
]
}
},
"overrides": []
},
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"textMode": "auto",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
},
"targets": [
{
"rawSql": "SELECT SUM(CASE WHEN response_type = 3 THEN 1 ELSE 0 END) / COUNT(*) as cached_pct FROM dns_logs WHERE $__timeFilter(timestamp)",
"format": "table",
"refId": "A"
}
]
},
{
"title": "Blocked %",
"type": "stat",
"datasource": { "type": "mysql", "uid": "technitium-mysql" },
"gridPos": { "h": 4, "w": 4, "x": 8, "y": 0 },
"fieldConfig": {
"defaults": {
"color": { "mode": "thresholds" },
"unit": "percentunit",
"thresholds": {
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 0.1 },
{ "color": "red", "value": 0.3 }
]
}
},
"overrides": []
},
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"textMode": "auto",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
},
"targets": [
{
"rawSql": "SELECT SUM(CASE WHEN response_type = 4 THEN 1 ELSE 0 END) / COUNT(*) as blocked_pct FROM dns_logs WHERE $__timeFilter(timestamp)",
"format": "table",
"refId": "A"
}
]
},
{
"title": "NxDomain %",
"type": "stat",
"datasource": { "type": "mysql", "uid": "technitium-mysql" },
"gridPos": { "h": 4, "w": 4, "x": 12, "y": 0 },
"fieldConfig": {
"defaults": {
"color": { "mode": "thresholds" },
"unit": "percentunit",
"thresholds": {
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 0.2 },
{ "color": "red", "value": 0.5 }
]
}
},
"overrides": []
},
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"textMode": "auto",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
},
"targets": [
{
"rawSql": "SELECT SUM(CASE WHEN rcode = 3 THEN 1 ELSE 0 END) / COUNT(*) as nxdomain_pct FROM dns_logs WHERE $__timeFilter(timestamp)",
"format": "table",
"refId": "A"
}
]
},
{
"title": "Avg Response Time",
"type": "stat",
"datasource": { "type": "mysql", "uid": "technitium-mysql" },
"gridPos": { "h": 4, "w": 4, "x": 16, "y": 0 },
"fieldConfig": {
"defaults": {
"color": { "mode": "thresholds" },
"unit": "ms",
"thresholds": {
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 50 },
{ "color": "red", "value": 200 }
]
}
},
"overrides": []
},
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"textMode": "auto",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
},
"targets": [
{
"rawSql": "SELECT AVG(response_rtt) as avg_rtt_ms FROM dns_logs WHERE $__timeFilter(timestamp) AND response_rtt IS NOT NULL",
"format": "table",
"refId": "A"
}
]
},
{
"title": "Queries by Protocol",
"type": "stat",
"datasource": { "type": "mysql", "uid": "technitium-mysql" },
"gridPos": { "h": 4, "w": 4, "x": 20, "y": 0 },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" }
},
"overrides": []
},
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"textMode": "auto",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": true }
},
"targets": [
{
"rawSql": "SELECT SUM(CASE WHEN protocol = 0 THEN 1 ELSE 0 END) as UDP, SUM(CASE WHEN protocol = 1 THEN 1 ELSE 0 END) as TCP, SUM(CASE WHEN protocol = 3 THEN 1 ELSE 0 END) as DoH, SUM(CASE WHEN protocol = 4 THEN 1 ELSE 0 END) as DoT FROM dns_logs WHERE $__timeFilter(timestamp)",
"format": "table",
"refId": "A"
}
]
},
{
"title": "Queries Over Time",
"type": "timeseries",
"datasource": { "type": "mysql", "uid": "technitium-mysql" },
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 4 },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "bars",
"fillOpacity": 50,
"gradientMode": "none",
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "normal" }
}
},
"overrides": []
},
"options": {
"legend": { "calcs": ["sum"], "displayMode": "list", "placement": "bottom" },
"tooltip": { "mode": "multi", "sort": "desc" }
},
"targets": [
{
"rawSql": "SELECT $__timeGroup(timestamp, $__interval) as time, SUM(CASE WHEN response_type = 1 THEN 1 ELSE 0 END) as Authoritative, SUM(CASE WHEN response_type = 2 THEN 1 ELSE 0 END) as Recursive, SUM(CASE WHEN response_type = 3 THEN 1 ELSE 0 END) as Cached, SUM(CASE WHEN response_type = 4 THEN 1 ELSE 0 END) as Blocked, SUM(CASE WHEN response_type = 5 THEN 1 ELSE 0 END) as Dropped FROM dns_logs WHERE $__timeFilter(timestamp) GROUP BY time ORDER BY time",
"format": "time_series",
"refId": "A"
}
]
},
{
"title": "Response Codes",
"type": "piechart",
"datasource": { "type": "mysql", "uid": "technitium-mysql" },
"gridPos": { "h": 8, "w": 8, "x": 0, "y": 12 },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" }
},
"overrides": [
{ "matcher": { "id": "byName", "options": "NOERROR" }, "properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }] },
{ "matcher": { "id": "byName", "options": "NXDOMAIN" }, "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] },
{ "matcher": { "id": "byName", "options": "SERVFAIL" }, "properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }] },
{ "matcher": { "id": "byName", "options": "REFUSED" }, "properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }] }
]
},
"options": {
"legend": { "displayMode": "table", "placement": "right", "values": ["value", "percent"] },
"pieType": "donut",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": true },
"tooltip": { "mode": "single" }
},
"targets": [
{
"rawSql": "SELECT SUM(CASE WHEN rcode = 0 THEN 1 ELSE 0 END) as NOERROR, SUM(CASE WHEN rcode = 2 THEN 1 ELSE 0 END) as SERVFAIL, SUM(CASE WHEN rcode = 3 THEN 1 ELSE 0 END) as NXDOMAIN, SUM(CASE WHEN rcode = 5 THEN 1 ELSE 0 END) as REFUSED, SUM(CASE WHEN rcode NOT IN (0,2,3,5) THEN 1 ELSE 0 END) as Other FROM dns_logs WHERE $__timeFilter(timestamp)",
"format": "table",
"refId": "A"
}
]
},
{
"title": "Response Types",
"type": "piechart",
"datasource": { "type": "mysql", "uid": "technitium-mysql" },
"gridPos": { "h": 8, "w": 8, "x": 8, "y": 12 },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" }
},
"overrides": [
{ "matcher": { "id": "byName", "options": "Cached" }, "properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }] },
{ "matcher": { "id": "byName", "options": "Blocked" }, "properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }] },
{ "matcher": { "id": "byName", "options": "Recursive" }, "properties": [{ "id": "color", "value": { "fixedColor": "blue", "mode": "fixed" } }] },
{ "matcher": { "id": "byName", "options": "Authoritative" }, "properties": [{ "id": "color", "value": { "fixedColor": "purple", "mode": "fixed" } }] }
]
},
"options": {
"legend": { "displayMode": "table", "placement": "right", "values": ["value", "percent"] },
"pieType": "donut",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": true },
"tooltip": { "mode": "single" }
},
"targets": [
{
"rawSql": "SELECT SUM(CASE WHEN response_type = 1 THEN 1 ELSE 0 END) as Authoritative, SUM(CASE WHEN response_type = 2 THEN 1 ELSE 0 END) as Recursive, SUM(CASE WHEN response_type = 3 THEN 1 ELSE 0 END) as Cached, SUM(CASE WHEN response_type = 4 THEN 1 ELSE 0 END) as Blocked, SUM(CASE WHEN response_type = 5 THEN 1 ELSE 0 END) as Dropped FROM dns_logs WHERE $__timeFilter(timestamp)",
"format": "table",
"refId": "A"
}
]
},
{
"title": "Query Types",
"type": "piechart",
"datasource": { "type": "mysql", "uid": "technitium-mysql" },
"gridPos": { "h": 8, "w": 8, "x": 16, "y": 12 },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" }
},
"overrides": []
},
"options": {
"legend": { "displayMode": "table", "placement": "right", "values": ["value", "percent"] },
"pieType": "donut",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": true },
"tooltip": { "mode": "single" }
},
"targets": [
{
"rawSql": "SELECT SUM(CASE WHEN qtype = 1 THEN 1 ELSE 0 END) as A, SUM(CASE WHEN qtype = 28 THEN 1 ELSE 0 END) as AAAA, SUM(CASE WHEN qtype = 5 THEN 1 ELSE 0 END) as CNAME, SUM(CASE WHEN qtype = 15 THEN 1 ELSE 0 END) as MX, SUM(CASE WHEN qtype = 16 THEN 1 ELSE 0 END) as TXT, SUM(CASE WHEN qtype = 33 THEN 1 ELSE 0 END) as SRV, SUM(CASE WHEN qtype = 12 THEN 1 ELSE 0 END) as PTR, SUM(CASE WHEN qtype = 6 THEN 1 ELSE 0 END) as SOA, SUM(CASE WHEN qtype = 2 THEN 1 ELSE 0 END) as NS, SUM(CASE WHEN qtype = 65 THEN 1 ELSE 0 END) as HTTPS, SUM(CASE WHEN qtype NOT IN (1,2,5,6,12,15,16,28,33,65) THEN 1 ELSE 0 END) as Other FROM dns_logs WHERE $__timeFilter(timestamp)",
"format": "table",
"refId": "A"
}
]
},
{
"title": "Top 20 Queried Domains",
"type": "table",
"datasource": { "type": "mysql", "uid": "technitium-mysql" },
"gridPos": { "h": 10, "w": 12, "x": 0, "y": 20 },
"fieldConfig": {
"defaults": {
"custom": { "filterable": true }
},
"overrides": [
{ "matcher": { "id": "byName", "options": "count" }, "properties": [{ "id": "custom.width", "value": 100 }] }
]
},
"options": {
"showHeader": true,
"sortBy": [{ "desc": true, "displayName": "count" }]
},
"targets": [
{
"rawSql": "SELECT qname as domain, COUNT(*) as count FROM dns_logs WHERE $__timeFilter(timestamp) GROUP BY qname ORDER BY count DESC LIMIT 20",
"format": "table",
"refId": "A"
}
]
},
{
"title": "Top 20 Clients",
"type": "table",
"datasource": { "type": "mysql", "uid": "technitium-mysql" },
"gridPos": { "h": 10, "w": 12, "x": 12, "y": 20 },
"fieldConfig": {
"defaults": {
"custom": { "filterable": true }
},
"overrides": [
{ "matcher": { "id": "byName", "options": "count" }, "properties": [{ "id": "custom.width", "value": 100 }] }
]
},
"options": {
"showHeader": true,
"sortBy": [{ "desc": true, "displayName": "count" }]
},
"targets": [
{
"rawSql": "SELECT client_ip, COUNT(*) as count FROM dns_logs WHERE $__timeFilter(timestamp) GROUP BY client_ip ORDER BY count DESC LIMIT 20",
"format": "table",
"refId": "A"
}
]
},
{
"title": "Average Response Time Over Time",
"type": "timeseries",
"datasource": { "type": "mysql", "uid": "technitium-mysql" },
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 30 },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"unit": "ms",
"custom": {
"axisBorderShow": false,
"axisLabel": "Response Time (ms)",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"lineWidth": 2,
"pointSize": 5,
"showPoints": "never",
"spanNulls": true
}
},
"overrides": []
},
"options": {
"legend": { "calcs": ["mean", "max"], "displayMode": "list", "placement": "bottom" },
"tooltip": { "mode": "multi", "sort": "desc" }
},
"targets": [
{
"rawSql": "SELECT $__timeGroup(timestamp, $__interval) as time, AVG(response_rtt) as avg_rtt, MAX(response_rtt) as max_rtt FROM dns_logs WHERE $__timeFilter(timestamp) AND response_rtt IS NOT NULL GROUP BY time ORDER BY time",
"format": "time_series",
"refId": "A"
}
]
},
{
"title": "Top 20 NxDomain Domains",
"type": "table",
"datasource": { "type": "mysql", "uid": "technitium-mysql" },
"gridPos": { "h": 10, "w": 12, "x": 0, "y": 38 },
"fieldConfig": {
"defaults": {
"custom": { "filterable": true }
},
"overrides": [
{ "matcher": { "id": "byName", "options": "count" }, "properties": [{ "id": "custom.width", "value": 100 }] }
]
},
"options": {
"showHeader": true,
"sortBy": [{ "desc": true, "displayName": "count" }]
},
"targets": [
{
"rawSql": "SELECT qname as domain, COUNT(*) as count FROM dns_logs WHERE $__timeFilter(timestamp) AND rcode = 3 GROUP BY qname ORDER BY count DESC LIMIT 20",
"format": "table",
"refId": "A"
}
]
},
{
"title": "Top 20 Blocked Domains",
"type": "table",
"datasource": { "type": "mysql", "uid": "technitium-mysql" },
"gridPos": { "h": 10, "w": 12, "x": 12, "y": 38 },
"fieldConfig": {
"defaults": {
"custom": { "filterable": true }
},
"overrides": [
{ "matcher": { "id": "byName", "options": "count" }, "properties": [{ "id": "custom.width", "value": 100 }] }
]
},
"options": {
"showHeader": true,
"sortBy": [{ "desc": true, "displayName": "count" }]
},
"targets": [
{
"rawSql": "SELECT qname as domain, COUNT(*) as count FROM dns_logs WHERE $__timeFilter(timestamp) AND response_type = 4 GROUP BY qname ORDER BY count DESC LIMIT 20",
"format": "table",
"refId": "A"
}
]
}
],
"refresh": "5m",
"schemaVersion": 39,
"tags": ["dns", "technitium", "mysql"],
"templating": { "list": [] },
"time": { "from": "now-24h", "to": "now" },
"timepicker": {},
"timezone": "",
"title": "Technitium DNS",
"uid": "technitium-dns",
"version": 1
}

View file

@ -0,0 +1,303 @@
# HELP snmpEnableAuthenTraps Indicates whether the SNMP entity is permitted to generate authenticationFailure traps - 1.3.6.1.2.1.11.30
# TYPE snmpEnableAuthenTraps gauge
snmpEnableAuthenTraps 2
# HELP snmpInASNParseErrs The total number of ASN.1 or BER errors encountered by the SNMP entity when decoding received SNMP messages. - 1.3.6.1.2.1.11.6
# TYPE snmpInASNParseErrs counter
snmpInASNParseErrs 0
# HELP snmpInBadCommunityNames The total number of community-based SNMP messages (for example, SNMPv1) delivered to the SNMP entity which used an SNMP community name not known to said entity - 1.3.6.1.2.1.11.4
# TYPE snmpInBadCommunityNames counter
snmpInBadCommunityNames 184
# HELP snmpInBadCommunityUses The total number of community-based SNMP messages (for example, SNMPv1) delivered to the SNMP entity which represented an SNMP operation that was not allowed for the SNMP community named in the message - 1.3.6.1.2.1.11.5
# TYPE snmpInBadCommunityUses counter
snmpInBadCommunityUses 0
# HELP snmpInBadValues The total number of SNMP PDUs which were delivered to the SNMP protocol entity and for which the value of the error-status field was `badValue'. - 1.3.6.1.2.1.11.10
# TYPE snmpInBadValues counter
snmpInBadValues 0
# HELP snmpInBadVersions The total number of SNMP messages which were delivered to the SNMP entity and were for an unsupported SNMP version. - 1.3.6.1.2.1.11.3
# TYPE snmpInBadVersions counter
snmpInBadVersions 0
# HELP snmpInGenErrs The total number of SNMP PDUs which were delivered to the SNMP protocol entity and for which the value of the error-status field was `genErr'. - 1.3.6.1.2.1.11.12
# TYPE snmpInGenErrs counter
snmpInGenErrs 0
# HELP snmpInGetNexts The total number of SNMP Get-Next PDUs which have been accepted and processed by the SNMP protocol entity. - 1.3.6.1.2.1.11.16
# TYPE snmpInGetNexts counter
snmpInGetNexts 2940
# HELP snmpInGetRequests The total number of SNMP Get-Request PDUs which have been accepted and processed by the SNMP protocol entity. - 1.3.6.1.2.1.11.15
# TYPE snmpInGetRequests counter
snmpInGetRequests 9
# HELP snmpInGetResponses The total number of SNMP Get-Response PDUs which have been accepted and processed by the SNMP protocol entity. - 1.3.6.1.2.1.11.18
# TYPE snmpInGetResponses counter
snmpInGetResponses 0
# HELP snmpInNoSuchNames The total number of SNMP PDUs which were delivered to the SNMP protocol entity and for which the value of the error-status field was `noSuchName'. - 1.3.6.1.2.1.11.9
# TYPE snmpInNoSuchNames counter
snmpInNoSuchNames 0
# HELP snmpInPkts The total number of messages delivered to the SNMP entity from the transport service. - 1.3.6.1.2.1.11.1
# TYPE snmpInPkts counter
snmpInPkts 5928
# HELP snmpInReadOnlys The total number valid SNMP PDUs which were delivered to the SNMP protocol entity and for which the value of the error-status field was `readOnly' - 1.3.6.1.2.1.11.11
# TYPE snmpInReadOnlys counter
snmpInReadOnlys 0
# HELP snmpInSetRequests The total number of SNMP Set-Request PDUs which have been accepted and processed by the SNMP protocol entity. - 1.3.6.1.2.1.11.17
# TYPE snmpInSetRequests counter
snmpInSetRequests 0
# HELP snmpInTooBigs The total number of SNMP PDUs which were delivered to the SNMP protocol entity and for which the value of the error-status field was `tooBig'. - 1.3.6.1.2.1.11.8
# TYPE snmpInTooBigs counter
snmpInTooBigs 0
# HELP snmpInTotalReqVars The total number of MIB objects which have been retrieved successfully by the SNMP protocol entity as the result of receiving valid SNMP Get-Request and Get-Next PDUs. - 1.3.6.1.2.1.11.13
# TYPE snmpInTotalReqVars counter
snmpInTotalReqVars 72699
# HELP snmpInTotalSetVars The total number of MIB objects which have been altered successfully by the SNMP protocol entity as the result of receiving valid SNMP Set-Request PDUs. - 1.3.6.1.2.1.11.14
# TYPE snmpInTotalSetVars counter
snmpInTotalSetVars 0
# HELP snmpInTraps The total number of SNMP Trap PDUs which have been accepted and processed by the SNMP protocol entity. - 1.3.6.1.2.1.11.19
# TYPE snmpInTraps counter
snmpInTraps 0
# HELP snmpOutBadValues The total number of SNMP PDUs which were generated by the SNMP protocol entity and for which the value of the error-status field was `badValue'. - 1.3.6.1.2.1.11.22
# TYPE snmpOutBadValues counter
snmpOutBadValues 0
# HELP snmpOutGenErrs The total number of SNMP PDUs which were generated by the SNMP protocol entity and for which the value of the error-status field was `genErr'. - 1.3.6.1.2.1.11.24
# TYPE snmpOutGenErrs counter
snmpOutGenErrs 0
# HELP snmpOutGetNexts The total number of SNMP Get-Next PDUs which have been generated by the SNMP protocol entity. - 1.3.6.1.2.1.11.26
# TYPE snmpOutGetNexts counter
snmpOutGetNexts 0
# HELP snmpOutGetRequests The total number of SNMP Get-Request PDUs which have been generated by the SNMP protocol entity. - 1.3.6.1.2.1.11.25
# TYPE snmpOutGetRequests counter
snmpOutGetRequests 0
# HELP snmpOutGetResponses The total number of SNMP Get-Response PDUs which have been generated by the SNMP protocol entity. - 1.3.6.1.2.1.11.28
# TYPE snmpOutGetResponses counter
snmpOutGetResponses 5740
# HELP snmpOutNoSuchNames The total number of SNMP PDUs which were generated by the SNMP protocol entity and for which the value of the error-status was `noSuchName'. - 1.3.6.1.2.1.11.21
# TYPE snmpOutNoSuchNames counter
snmpOutNoSuchNames 0
# HELP snmpOutPkts The total number of SNMP Messages which were passed from the SNMP protocol entity to the transport service. - 1.3.6.1.2.1.11.2
# TYPE snmpOutPkts counter
snmpOutPkts 5739
# HELP snmpOutSetRequests The total number of SNMP Set-Request PDUs which have been generated by the SNMP protocol entity. - 1.3.6.1.2.1.11.27
# TYPE snmpOutSetRequests counter
snmpOutSetRequests 0
# HELP snmpOutTooBigs The total number of SNMP PDUs which were generated by the SNMP protocol entity and for which the value of the error-status field was `tooBig.' - 1.3.6.1.2.1.11.20
# TYPE snmpOutTooBigs counter
snmpOutTooBigs 0
# HELP snmpOutTraps The total number of SNMP Trap PDUs which have been generated by the SNMP protocol entity. - 1.3.6.1.2.1.11.29
# TYPE snmpOutTraps counter
snmpOutTraps 0
# HELP snmpProxyDrops The total number of Confirmed Class PDUs (such as GetRequest-PDUs, GetNextRequest-PDUs, GetBulkRequest-PDUs, SetRequest-PDUs, and InformRequest-PDUs) delivered to the SNMP entity which were silently dropped because the transmission of the (possibly translated) message to a proxy target failed in a manner (other than a time-out) such that no Response Class PDU (such as a Response-PDU) could be returned. - 1.3.6.1.2.1.11.32
# TYPE snmpProxyDrops counter
snmpProxyDrops 0
# HELP snmpSilentDrops The total number of Confirmed Class PDUs (such as GetRequest-PDUs, GetNextRequest-PDUs, GetBulkRequest-PDUs, SetRequest-PDUs, and InformRequest-PDUs) delivered to the SNMP entity which were silently dropped because the size of a reply containing an alternate Response Class PDU (such as a Response-PDU) with an empty variable-bindings field was greater than either a local constraint or the maximum message size associated with the originator of the request. - 1.3.6.1.2.1.11.31
# TYPE snmpSilentDrops counter
snmpSilentDrops 0
# HELP snmp_scrape_duration_seconds Total SNMP time scrape took (walk and processing).
# TYPE snmp_scrape_duration_seconds gauge
snmp_scrape_duration_seconds{module="huawei"} 0.39253882
# HELP snmp_scrape_packets_retried Packets retried for get, bulkget, and walk.
# TYPE snmp_scrape_packets_retried gauge
snmp_scrape_packets_retried{module="huawei"} 0
# HELP snmp_scrape_packets_sent Packets sent for get, bulkget, and walk; including retries.
# TYPE snmp_scrape_packets_sent gauge
snmp_scrape_packets_sent{module="huawei"} 6
# HELP snmp_scrape_pdus_returned PDUs returned from get, bulkget, and walk.
# TYPE snmp_scrape_pdus_returned gauge
snmp_scrape_pdus_returned{module="huawei"} 104
# HELP snmp_scrape_walk_duration_seconds Time SNMP walk/bulkwalk took.
# TYPE snmp_scrape_walk_duration_seconds gauge
snmp_scrape_walk_duration_seconds{module="huawei"} 0.391760524
# HELP sysContact The textual identification of the contact person for this managed node, together with information on how to contact this person - 1.3.6.1.2.1.1.4
# TYPE sysContact gauge
sysContact{sysContact="Not Configure System Contact"} 1
# HELP sysDescr A textual description of the entity - 1.3.6.1.2.1.1.1
# TYPE sysDescr gauge
sysDescr{sysDescr="Linux GSE200M 2.6.27-SPEAr310 #80 Fri Jan 13 11:22:09 CST 2017 armv5tejl"} 1
# HELP sysLocation The physical location of this node (e.g., 'telephone closet, 3rd floor') - 1.3.6.1.2.1.1.6
# TYPE sysLocation gauge
sysLocation{sysLocation="Garage G03"} 1
# HELP sysName An administratively-assigned name for this managed node - 1.3.6.1.2.1.1.5
# TYPE sysName gauge
sysName{sysName="ups2000"} 1
# HELP sysORDescr A textual description of the capabilities identified by the corresponding instance of sysORID. - 1.3.6.1.2.1.1.9.1.3
# TYPE sysORDescr gauge
sysORDescr{sysORDescr="The MIB for Message Processing and Dispatching.",sysORIndex="3"} 1
sysORDescr{sysORDescr="The MIB module for SNMPv2 entities",sysORIndex="1"} 1
sysORDescr{sysORDescr="The SNMP Management Architecture MIB.",sysORIndex="5"} 1
sysORDescr{sysORDescr="The management information definitions for the SNMP User-based Security Model.",sysORIndex="4"} 1
sysORDescr{sysORDescr="View-based Access Control Model for SNMP.",sysORIndex="2"} 1
# HELP sysORID An authoritative identification of a capabilities statement with respect to various MIB modules supported by the local SNMP application acting as a command responder. - 1.3.6.1.2.1.1.9.1.2
# TYPE sysORID gauge
sysORID{sysORID="1.3.6.1.6.3.1",sysORIndex="1"} 1
sysORID{sysORID="1.3.6.1.6.3.10.3.1.1",sysORIndex="5"} 1
sysORID{sysORID="1.3.6.1.6.3.11.3.1.1",sysORIndex="3"} 1
sysORID{sysORID="1.3.6.1.6.3.15.2.1.1",sysORIndex="4"} 1
sysORID{sysORID="1.3.6.1.6.3.16.2.2.1",sysORIndex="2"} 1
# HELP sysORLastChange The value of sysUpTime at the time of the most recent change in state or value of any instance of sysORID. - 1.3.6.1.2.1.1.8
# TYPE sysORLastChange gauge
sysORLastChange 8
# HELP sysORUpTime The value of sysUpTime at the time this conceptual row was last instantiated. - 1.3.6.1.2.1.1.9.1.4
# TYPE sysORUpTime gauge
sysORUpTime{sysORIndex="1"} 7
sysORUpTime{sysORIndex="2"} 8
sysORUpTime{sysORIndex="3"} 8
sysORUpTime{sysORIndex="4"} 8
sysORUpTime{sysORIndex="5"} 8
# HELP sysObjectID The vendor's authoritative identification of the network management subsystem contained in the entity - 1.3.6.1.2.1.1.2
# TYPE sysObjectID gauge
sysObjectID{sysObjectID="1.3.6.1.4.1.8072.3.2.10"} 1
# HELP sysUpTime The time (in hundredths of a second) since the network management portion of the system was last re-initialized. - 1.3.6.1.2.1.1.3
# TYPE sysUpTime gauge
sysUpTime 5.3264032e+07
# HELP upsAlarmsPresent The present number of active alarm conditions. - 1.3.6.1.2.1.33.1.6.1
# TYPE upsAlarmsPresent gauge
upsAlarmsPresent 0
# HELP upsAutoRestart Setting this object to 'on' will cause the UPS system to restart after a shutdown if the shutdown occurred during a power loss as a result of either a upsShutdownAfterDelay or an internal battery depleted condition - 1.3.6.1.2.1.33.1.8.5
# TYPE upsAutoRestart gauge
upsAutoRestart 0
# HELP upsBatteryCurrent The present battery current. - 1.3.6.1.2.1.33.1.2.6
# TYPE upsBatteryCurrent gauge
upsBatteryCurrent 2.147483647e+09
# HELP upsBatteryStatus The indication of the capacity remaining in the UPS system's batteries - 1.3.6.1.2.1.33.1.2.1
# TYPE upsBatteryStatus gauge
upsBatteryStatus 2
# HELP upsBatteryTemperature The ambient temperature at or near the UPS Battery casing. - 1.3.6.1.2.1.33.1.2.7
# TYPE upsBatteryTemperature gauge
upsBatteryTemperature 2.147483647e+09
# HELP upsBatteryVoltage The magnitude of the present battery voltage. - 1.3.6.1.2.1.33.1.2.5
# TYPE upsBatteryVoltage gauge
upsBatteryVoltage 821
# HELP upsBypassFrequency The present bypass frequency. - 1.3.6.1.2.1.33.1.5.1
# TYPE upsBypassFrequency gauge
upsBypassFrequency 500
# HELP upsBypassLineIndex The bypass line identifier. - 1.3.6.1.2.1.33.1.5.3.1.1
# TYPE upsBypassLineIndex gauge
upsBypassLineIndex{upsBypassLineIndex="1"} 1
# HELP upsBypassNumLines The number of bypass lines utilized in this device - 1.3.6.1.2.1.33.1.5.2
# TYPE upsBypassNumLines gauge
upsBypassNumLines 1
# HELP upsBypassVoltage The present bypass voltage. - 1.3.6.1.2.1.33.1.5.3.1.2
# TYPE upsBypassVoltage gauge
upsBypassVoltage{upsBypassLineIndex="1"} 220
# HELP upsConfigAudibleStatus The requested state of the audible alarm - 1.3.6.1.2.1.33.1.9.8
# TYPE upsConfigAudibleStatus gauge
upsConfigAudibleStatus 0
# HELP upsConfigHighVoltageTransferPoint The maximum line voltage allowed before the UPS system transfers to battery backup. - 1.3.6.1.2.1.33.1.9.10
# TYPE upsConfigHighVoltageTransferPoint gauge
upsConfigHighVoltageTransferPoint 0
# HELP upsConfigInputFreq The nominal input frequency - 1.3.6.1.2.1.33.1.9.2
# TYPE upsConfigInputFreq gauge
upsConfigInputFreq 0
# HELP upsConfigInputVoltage The magnitude of the nominal input voltage - 1.3.6.1.2.1.33.1.9.1
# TYPE upsConfigInputVoltage gauge
upsConfigInputVoltage 0
# HELP upsConfigLowBattTime The value of upsEstimatedMinutesRemaining at which a lowBattery condition is declared - 1.3.6.1.2.1.33.1.9.7
# TYPE upsConfigLowBattTime gauge
upsConfigLowBattTime 0
# HELP upsConfigLowVoltageTransferPoint The minimum input line voltage allowed before the UPS system transfers to battery backup. - 1.3.6.1.2.1.33.1.9.9
# TYPE upsConfigLowVoltageTransferPoint gauge
upsConfigLowVoltageTransferPoint 0
# HELP upsConfigOutputFreq The nominal output frequency - 1.3.6.1.2.1.33.1.9.4
# TYPE upsConfigOutputFreq gauge
upsConfigOutputFreq 0
# HELP upsConfigOutputPower The magnitude of the nominal true power rating. - 1.3.6.1.2.1.33.1.9.6
# TYPE upsConfigOutputPower gauge
upsConfigOutputPower 0
# HELP upsConfigOutputVA The magnitude of the nominal Volt-Amp rating. - 1.3.6.1.2.1.33.1.9.5
# TYPE upsConfigOutputVA gauge
upsConfigOutputVA 0
# HELP upsConfigOutputVoltage The magnitude of the nominal output voltage - 1.3.6.1.2.1.33.1.9.3
# TYPE upsConfigOutputVoltage gauge
upsConfigOutputVoltage 0
# HELP upsEstimatedChargeRemaining An estimate of the battery charge remaining expressed as a percent of full charge. - 1.3.6.1.2.1.33.1.2.4
# TYPE upsEstimatedChargeRemaining gauge
upsEstimatedChargeRemaining 91
# HELP upsEstimatedMinutesRemaining An estimate of the time to battery charge depletion under the present load conditions if the utility power is off and remains off, or if it were to be lost and remain off. - 1.3.6.1.2.1.33.1.2.3
# TYPE upsEstimatedMinutesRemaining gauge
upsEstimatedMinutesRemaining 34
# HELP upsIdentAgentSoftwareVersion The UPS agent software version - 1.3.6.1.2.1.33.1.1.4
# TYPE upsIdentAgentSoftwareVersion gauge
upsIdentAgentSoftwareVersion{upsIdentAgentSoftwareVersion="V200R001C31B016"} 1
# HELP upsIdentAttachedDevices A string identifying the devices attached to the output(s) of the UPS - 1.3.6.1.2.1.33.1.1.6
# TYPE upsIdentAttachedDevices gauge
upsIdentAttachedDevices{upsIdentAttachedDevices="None"} 1
# HELP upsIdentManufacturer The name of the UPS manufacturer. - 1.3.6.1.2.1.33.1.1.1
# TYPE upsIdentManufacturer gauge
upsIdentManufacturer{upsIdentManufacturer="HUAWEI"} 1
# HELP upsIdentModel The UPS Model designation. - 1.3.6.1.2.1.33.1.1.2
# TYPE upsIdentModel gauge
upsIdentModel{upsIdentModel="UPS2000 2kVA"} 1
# HELP upsIdentName A string identifying the UPS - 1.3.6.1.2.1.33.1.1.5
# TYPE upsIdentName gauge
upsIdentName{upsIdentName="ups2000"} 1
# HELP upsIdentUPSSoftwareVersion The UPS firmware/software version(s) - 1.3.6.1.2.1.33.1.1.3
# TYPE upsIdentUPSSoftwareVersion gauge
upsIdentUPSSoftwareVersion{upsIdentUPSSoftwareVersion="V2R1C1SPC40"} 1
# HELP upsInputFrequency The present input frequency. - 1.3.6.1.2.1.33.1.3.3.1.2
# TYPE upsInputFrequency gauge
upsInputFrequency{upsInputLineIndex="1"} 500
# HELP upsInputLineBads A count of the number of times the input entered an out-of-tolerance condition as defined by the manufacturer - 1.3.6.1.2.1.33.1.3.1
# TYPE upsInputLineBads counter
upsInputLineBads 0
# HELP upsInputLineIndex The input line identifier. - 1.3.6.1.2.1.33.1.3.3.1.1
# TYPE upsInputLineIndex gauge
upsInputLineIndex{upsInputLineIndex="1"} 1
# HELP upsInputNumLines The number of input lines utilized in this device - 1.3.6.1.2.1.33.1.3.2
# TYPE upsInputNumLines gauge
upsInputNumLines 1
# HELP upsInputVoltage The magnitude of the present input voltage. - 1.3.6.1.2.1.33.1.3.3.1.3
# TYPE upsInputVoltage gauge
upsInputVoltage{upsInputLineIndex="1"} 218
# HELP upsOutputCurrent The present output current. - 1.3.6.1.2.1.33.1.4.4.1.3
# TYPE upsOutputCurrent gauge
upsOutputCurrent{upsOutputLineIndex="1"} 56
# HELP upsOutputFrequency The present output frequency. - 1.3.6.1.2.1.33.1.4.2
# TYPE upsOutputFrequency gauge
upsOutputFrequency 500
# HELP upsOutputLineIndex The output line identifier. - 1.3.6.1.2.1.33.1.4.4.1.1
# TYPE upsOutputLineIndex gauge
upsOutputLineIndex{upsOutputLineIndex="1"} 1
# HELP upsOutputNumLines The number of output lines utilized in this device - 1.3.6.1.2.1.33.1.4.3
# TYPE upsOutputNumLines gauge
upsOutputNumLines 1
# HELP upsOutputPercentLoad The percentage of the UPS power capacity presently being used on this output line, i.e., the greater of the percent load of true power capacity and the percent load of VA. - 1.3.6.1.2.1.33.1.4.4.1.5
# TYPE upsOutputPercentLoad gauge
upsOutputPercentLoad{upsOutputLineIndex="1"} 66
# HELP upsOutputPower The present output true power. - 1.3.6.1.2.1.33.1.4.4.1.4
# TYPE upsOutputPower gauge
upsOutputPower{upsOutputLineIndex="1"} 1
# HELP upsOutputSource The present source of output power - 1.3.6.1.2.1.33.1.4.1
# TYPE upsOutputSource gauge
upsOutputSource 3
# HELP upsOutputVoltage The present output voltage. - 1.3.6.1.2.1.33.1.4.4.1.2
# TYPE upsOutputVoltage gauge
upsOutputVoltage{upsOutputLineIndex="1"} 230
# HELP upsRebootWithDuration Setting this object will immediately shutdown (i.e., turn off) either the UPS output or the UPS system (as determined by the value of upsShutdownType at the time of shutdown) for a period equal to the indicated number of seconds, after which time the output will be started, including starting the UPS, if necessary - 1.3.6.1.2.1.33.1.8.4
# TYPE upsRebootWithDuration gauge
upsRebootWithDuration 0
# HELP upsSecondsOnBattery If the unit is on battery power, the elapsed time since the UPS last switched to battery power, or the time since the network management subsystem was last restarted, whichever is less - 1.3.6.1.2.1.33.1.2.2
# TYPE upsSecondsOnBattery gauge
upsSecondsOnBattery 0
# HELP upsShutdownAfterDelay Setting this object will shutdown (i.e., turn off) either the UPS output or the UPS system (as determined by the value of upsShutdownType at the time of shutdown) after the indicated number of seconds, or less if the UPS batteries become depleted - 1.3.6.1.2.1.33.1.8.2
# TYPE upsShutdownAfterDelay gauge
upsShutdownAfterDelay 0
# HELP upsShutdownType This object determines the nature of the action to be taken at the time when the countdown of the upsShutdownAfterDelay and upsRebootWithDuration objects reaches zero - 1.3.6.1.2.1.33.1.8.1
# TYPE upsShutdownType gauge
upsShutdownType 0
# HELP upsStartupAfterDelay Setting this object will start the output after the indicated number of seconds, including starting the UPS, if necessary - 1.3.6.1.2.1.33.1.8.3
# TYPE upsStartupAfterDelay gauge
upsStartupAfterDelay 0
# HELP upsTestElapsedTime The amount of time, in TimeTicks, since the test in progress was initiated, or, if no test is in progress, the previous test took to complete - 1.3.6.1.2.1.33.1.7.6
# TYPE upsTestElapsedTime gauge
upsTestElapsedTime 0
# HELP upsTestId The test is named by an OBJECT IDENTIFIER which allows a standard mechanism for the initiation of tests, including the well known tests identified in this document as well as those introduced by a particular implementation, i.e., as documented in the private enterprise MIB definition for the device - 1.3.6.1.2.1.33.1.7.1
# TYPE upsTestId gauge
upsTestId{upsTestId="0"} 1
# HELP upsTestResultsDetail Additional information about upsTestResultsSummary - 1.3.6.1.2.1.33.1.7.4
# TYPE upsTestResultsDetail gauge
upsTestResultsDetail{upsTestResultsDetail="0"} 1
# HELP upsTestResultsSummary The results of the current or last UPS diagnostics test performed - 1.3.6.1.2.1.33.1.7.3
# TYPE upsTestResultsSummary gauge
upsTestResultsSummary 0
# HELP upsTestSpinLock A spin lock on the test subsystem - 1.3.6.1.2.1.33.1.7.2
# TYPE upsTestSpinLock gauge
upsTestSpinLock 0
# HELP upsTestStartTime The value of sysUpTime at the time the test in progress was initiated, or, if no test is in progress, the time the previous test was initiated - 1.3.6.1.2.1.33.1.7.5
# TYPE upsTestStartTime gauge
upsTestStartTime 0

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,69 @@
# resource "kubernetes_persistent_volume" "prometheus_grafana_pv" {
# metadata {
# name = "grafana-pv"
# }
# spec {
# capacity = {
# "storage" = "2Gi"
# }
# access_modes = ["ReadWriteOnce"]
# persistent_volume_source {
# nfs {
# path = "/mnt/main/grafana"
# server = "10.0.10.15"
# }
# # iscsi {
# # target_portal = "iscsi.viktorbarzin.lan:3260"
# # iqn = "iqn.2020-12.lan.viktorbarzin:storage:monitoring:grafana"
# # lun = 0
# # fs_type = "ext4"
# # }
# }
# }
# }
resource "kubernetes_persistent_volume" "alertmanager_pv" {
metadata {
name = "alertmanager-pv"
}
spec {
capacity = {
"storage" = "2Gi"
}
access_modes = ["ReadWriteOnce"]
persistent_volume_source {
nfs {
path = "/mnt/main/alertmanager"
server = "10.0.10.15"
}
}
}
}
# resource "kubernetes_persistent_volume_claim" "grafana_pvc" {
# metadata {
# name = "grafana-pvc"
# namespace = kubernetes_namespace.monitoring.metadata[0].name
# }
# spec {
# access_modes = ["ReadWriteOnce"]
# resources {
# requests = {
# "storage" = "2Gi"
# }
# }
# }
# }
resource "helm_release" "grafana" {
namespace = kubernetes_namespace.monitoring.metadata[0].name
create_namespace = true
name = "grafana"
atomic = true
timeout = 600
repository = "https://grafana.github.io/helm-charts"
chart = "grafana"
values = [templatefile("${path.module}/grafana_chart_values.yaml", { db_password = var.grafana_db_password, grafana_admin_password = var.grafana_admin_password })]
}

View file

@ -0,0 +1,78 @@
deploymentStrategy:
type: RollingUpdate
replicas: 3
adminPassword: "${grafana_admin_password}"
resources:
requests:
cpu: 50m
memory: 128Mi
limits:
cpu: 500m
memory: 512Mi
persistence:
enabled: false # using external mysql
existingClaim: "grafana-pvc"
ingress:
enabled: "true"
ingressClassName: "traefik"
annotations:
traefik.ingress.kubernetes.io/router.middlewares: "traefik-rate-limit@kubernetescrd,traefik-csp-headers@kubernetescrd,traefik-crowdsec@kubernetescrd"
traefik.ingress.kubernetes.io/router.entrypoints: "websecure"
tls:
- secretName: "tls-secret"
hosts:
- "grafana.viktorbarzin.me"
hosts:
- "grafana.viktorbarzin.me"
sidecar:
datasources:
enabled: "true"
dashboards:
enabled: true
label: "grafana_dashboard"
dashboardProviders:
dashboardproviders.yaml:
apiVersion: 1
name: default
ordId: 1
# folder: ""
type: "file"
# disableDeletion: "false"
# editable: "true"
options:
path: "/var/lib/grafana/dashboards/default"
env:
GF_DATABASE_PASSWORD: "${db_password}"
GF_SERVER_ROOT_URL: https://grafana.viktorbarzin.me
grafana.ini:
database:
type: mysql
host: mysql.dbaas.svc.cluster.local:3306
name: grafana
user: grafana
password: $__env{GF_DATABASE_PASSWORD}
ssl_mode: disable
auth.anonymous:
enabled: true
org_role: Viewer
# auth.google:
# enabled: true
analytics:
check_for_updates: "true"
grafana_net:
url: "https://grafana.net"
log:
mode: "console"
paths:
data: "/var/lib/grafana/data"
logs: "/var/log/grafana"
plugins: "/var/lib/grafana/plugins"
provisioning: "/etc/grafana/provisioning"
security:
allow_embedding: true # Allow to be iframed
# url: https://grafana.com/api/dashboards/11074/revisions/2/download
# datasources:
# - name: Prometheus
# url: http://prometheus-server

View file

@ -0,0 +1,123 @@
resource "kubernetes_config_map" "redfish-config" {
metadata {
name = "redfish-exporter-config"
namespace = kubernetes_namespace.monitoring.metadata[0].name
annotations = {
"reloader.stakater.com/match" = "true"
}
}
data = {
"config.yml" = <<-EOF
address: 0.0.0.0
port: 9610
hosts:
${var.idrac_host}:
username: ${var.idrac_username}
password: ${var.idrac_password}
default:
username: root
password: calvin
metrics:
all: true
# system: true
# sensors: true
# power: true
# sel: false # Disable SEL - often slow
# storage: true # Disable storage - slowest endpoint
# memory: true
# network: false # Disable network adapters
# firmware: false # Don't need this frequently
EOF
}
}
resource "kubernetes_deployment" "idrac-redfish" {
metadata {
name = "idrac-redfish-exporter"
namespace = kubernetes_namespace.monitoring.metadata[0].name
labels = {
app = "idrac-redfish-exporter"
tier = var.tier
}
annotations = {
"reloader.stakater.com/search" = "true"
}
}
spec {
replicas = 1
selector {
match_labels = {
app = "idrac-redfish-exporter"
}
}
template {
metadata {
labels = {
app = "idrac-redfish-exporter"
}
}
spec {
priority_class_name = "tier-1-cluster"
container {
# https://github.com/mrlhansen/idrac_exporter?tab=readme-ov-file
image = "ghcr.io/mrlhansen/idrac_exporter:latest"
name = "redfish-exporter"
port {
container_port = 9610
}
volume_mount {
name = "redfish-exporter-config"
mount_path = "/etc/prometheus/idrac.yml"
sub_path = "config.yml"
}
}
volume {
name = "redfish-exporter-config"
config_map {
name = "redfish-exporter-config"
}
}
}
}
}
}
resource "kubernetes_service" "idrac-redfish-exporter" {
metadata {
name = "idrac-redfish-exporter"
namespace = kubernetes_namespace.monitoring.metadata[0].name
labels = {
"app" = "idrac-redfish-exporter"
}
# annotations = {
# "prometheus.io/scrape" = "true"
# "prometheus.io/path" = "/metrics"
# "prometheus.io/port" = "9090"
# }
}
spec {
selector = {
"app" = "idrac-redfish-exporter"
}
port {
name = "http"
port = "9090"
target_port = "9610"
}
}
}
module "idrac-redfish-exporter-ingress" {
source = "../../../../modules/kubernetes/ingress_factory"
namespace = kubernetes_namespace.monitoring.metadata[0].name
name = "idrac-redfish-exporter"
root_domain = "viktorbarzin.lan"
tls_secret_name = var.tls_secret_name
allow_local_access_only = true
ssl_redirect = false
port = 9090
}

View file

@ -0,0 +1,78 @@
---
cluster:
name: default
destinations:
- name: loki
type: loki
url: http://loki-gateway.monitoring.svc.cluster.local/loki/api/v1/push
clusterEvents:
enabled: false
collector: alloy-logs
namespaces:
- dbaas
- immich
- authentik
- mailserver
- crowdsec
- descheduler
- calibre
- monitoring
- ingress-nginx
- vaultwarden
nodeLogs:
enabled: false
podLogs:
enabled: true
gatherMethod: kubernetesApi
collector: alloy-logs
labelsToKeep:
[
"app_kubernetes_io_name",
"container",
"instance",
"job",
"level",
"namespace",
"service_name",
"service_namespace",
"deployment_environment",
"deployment_environment_name",
]
structuredMetadata:
pod: pod # Set structured metadata "pod" from label "pod"
namespaces:
- dbaas
- immich
- authentik
- mailserver
- crowdsec
- descheduler
- calibre
- monitoring
- ingress-nginx
- vaultwarden
# Collectors
alloy-singleton:
enabled: false
alloy-metrics:
enabled: false
alloy-logs:
enabled: true
# Required when using the Kubernetes API to pod logs
alloy:
mounts:
varlog: false
clustering:
enabled: true
alloy-profiles:
enabled: false
alloy-receiver:
enabled: false

View file

@ -0,0 +1,186 @@
resource "helm_release" "loki" {
namespace = kubernetes_namespace.monitoring.metadata[0].name
create_namespace = true
name = "loki"
repository = "https://grafana.github.io/helm-charts"
chart = "loki"
values = [templatefile("${path.module}/loki.yaml", {})]
timeout = 600
depends_on = [kubernetes_config_map.loki_alert_rules]
}
resource "kubernetes_persistent_volume" "loki" {
metadata {
name = "loki"
}
spec {
capacity = {
storage = "15Gi"
}
access_modes = ["ReadWriteOnce"]
persistent_volume_source {
nfs {
path = "/mnt/main/loki/loki"
server = "10.0.10.15"
}
}
persistent_volume_reclaim_policy = "Retain"
volume_mode = "Filesystem"
}
}
# https://grafana.com/docs/alloy/latest/configure/kubernetes/
resource "helm_release" "alloy" {
namespace = kubernetes_namespace.monitoring.metadata[0].name
create_namespace = true
name = "alloy"
repository = "https://grafana.github.io/helm-charts"
chart = "alloy"
values = [file("${path.module}/alloy.yaml")]
atomic = true
depends_on = [helm_release.loki]
}
resource "kubernetes_daemon_set_v1" "sysctl-inotify" {
metadata {
name = "sysctl-inotify"
namespace = kubernetes_namespace.monitoring.metadata[0].name
labels = {
app = "sysctl-inotify"
}
}
spec {
selector {
match_labels = {
app = "sysctl-inotify"
}
}
template {
metadata {
labels = {
app = "sysctl-inotify"
}
}
spec {
init_container {
name = "sysctl"
image = "busybox:1.37"
command = [
"sh", "-c",
"sysctl -w fs.inotify.max_user_watches=1048576 && sysctl -w fs.inotify.max_user_instances=8192 && sysctl -w fs.inotify.max_queued_events=1048576"
]
security_context {
privileged = true
}
}
container {
name = "pause"
image = "registry.k8s.io/pause:3.10"
resources {
requests = {
cpu = "1m"
memory = "4Mi"
}
limits = {
cpu = "1m"
memory = "4Mi"
}
}
}
host_pid = true
toleration {
operator = "Exists"
}
}
}
}
}
# resource "helm_release" "k8s-monitoring" {
# namespace = kubernetes_namespace.monitoring.metadata[0].name
# create_namespace = true
# name = "k8s-monitoring"
# repository = "https://grafana.github.io/helm-charts"
# chart = "k8s-monitoring"
# values = [templatefile("${path.module}/k8s-monitoring-values.yaml", {})]
# atomic = true
# }
resource "kubernetes_config_map" "loki_alert_rules" {
metadata {
name = "loki-alert-rules"
namespace = kubernetes_namespace.monitoring.metadata[0].name
}
data = {
"rules.yaml" = yamlencode({
groups = [{
name = "log-alerts"
rules = [
{
alert = "HighErrorRate"
expr = "sum(rate({namespace=~\".+\"} |= \"error\" [5m])) by (namespace) > 10"
for = "5m"
labels = {
severity = "warning"
}
annotations = {
summary = "High error rate in {{ $labels.namespace }}"
}
},
{
alert = "PodCrashLoopBackOff"
expr = "count_over_time({namespace=~\".+\"} |= \"CrashLoopBackOff\" [5m]) > 0"
for = "1m"
labels = {
severity = "critical"
}
annotations = {
summary = "CrashLoopBackOff detected in {{ $labels.namespace }}"
}
},
{
alert = "OOMKilled"
expr = "count_over_time({namespace=~\".+\"} |= \"OOMKilled\" [5m]) > 0"
for = "1m"
labels = {
severity = "critical"
}
annotations = {
summary = "OOMKilled detected in {{ $labels.namespace }}"
}
}
]
}]
})
}
}
resource "kubernetes_config_map" "grafana_loki_datasource" {
metadata {
name = "grafana-loki-datasource"
namespace = kubernetes_namespace.monitoring.metadata[0].name
labels = {
grafana_datasource = "1"
}
}
data = {
"loki-datasource.yaml" = yamlencode({
apiVersion = 1
datasources = [{
name = "Loki"
type = "loki"
access = "proxy"
url = "http://loki.monitoring.svc.cluster.local:3100"
isDefault = false
}]
})
}
}

View file

@ -0,0 +1,110 @@
loki:
commonConfig:
replication_factor: 1
schemaConfig:
configs:
- from: "2025-04-01"
store: tsdb
object_store: filesystem
schema: v13
index:
prefix: loki_index_
period: 24h
ingester:
chunk_idle_period: 12h
max_chunk_age: 24h
chunk_retain_period: 1m
chunk_target_size: 1572864
wal:
dir: /loki-wal
pattern_ingester:
enabled: true
limits_config:
allow_structured_metadata: true
volume_enabled: true
retention_period: 168h
compactor:
retention_enabled: true
working_directory: /var/loki/compactor
compaction_interval: 1h
delete_request_store: filesystem
ruler:
enable_api: true
storage:
type: local
local:
directory: /loki/rules
alertmanager_url: http://prometheus-alertmanager.monitoring.svc.cluster.local:9093
ring:
kvstore:
store: inmemory
rule_path: /var/loki/scratch
storage:
type: "filesystem"
auth_enabled: false
minio:
enabled: false
deploymentMode: SingleBinary
singleBinary:
replicas: 1
persistence:
enabled: true
size: 15Gi
storageClass: ""
extraVolumes:
- name: wal
emptyDir:
medium: Memory
sizeLimit: 2Gi
- name: rules
configMap:
name: loki-alert-rules
extraVolumeMounts:
- name: wal
mountPath: /loki-wal
- name: rules
mountPath: /loki/rules/fake
resources:
requests:
cpu: 250m
memory: 4Gi
limits:
cpu: "1"
memory: 6Gi
# Zero out replica counts of other deployment modes
backend:
replicas: 0
read:
replicas: 0
write:
replicas: 0
ingester:
replicas: 0
querier:
replicas: 0
queryFrontend:
replicas: 0
queryScheduler:
replicas: 0
distributor:
replicas: 0
compactor:
replicas: 0
indexGateway:
replicas: 0
bloomCompactor:
replicas: 0
bloomGateway:
replicas: 0
# Disable optional components for single binary mode
gateway:
enabled: false
chunksCache:
enabled: false
resultsCache:
enabled: false

View file

@ -0,0 +1,202 @@
variable "tls_secret_name" {}
variable "alertmanager_account_password" {}
variable "idrac_host" {
default = "192.168.1.4"
}
variable "idrac_username" {
default = "root"
}
variable "idrac_password" {
default = "calvin"
}
variable "alertmanager_slack_api_url" {}
variable "tiny_tuya_service_secret" { type = string }
variable "haos_api_token" { type = string }
variable "pve_password" { type = string }
variable "grafana_db_password" { type = string }
variable "grafana_admin_password" { type = string }
variable "tier" { type = string }
resource "kubernetes_namespace" "monitoring" {
metadata {
name = "monitoring"
labels = {
"istio-injection" : "disabled"
tier = var.tier
"resource-governance/custom-quota" = "true"
}
}
}
module "tls_secret" {
source = "../../../../modules/kubernetes/setup_tls_secret"
namespace = kubernetes_namespace.monitoring.metadata[0].name
tls_secret_name = var.tls_secret_name
}
# Terraform get angry with the 30k values file :/ use ansible until solved
# resource "helm_release" "ups_prometheus_snmp_exporter" {
# namespace = kubernetes_namespace.monitoring.metadata[0].name
# create_namespace = true
# name = "ups_prometheus_exporter"
# repository = "https://prometheus-community.github.io/helm-charts"
# chart = "prometheus-snmp-exporter"
# values = [file("${path.module}/ups_snmp_values.yaml")]
# }
resource "kubernetes_cron_job_v1" "monitor_prom" {
metadata {
name = "monitor-prometheus"
}
spec {
concurrency_policy = "Replace"
failed_jobs_history_limit = 5
schedule = "*/30 * * * *"
job_template {
metadata {
}
spec {
template {
metadata {
}
spec {
container {
name = "monitor-prometheus"
image = "alpine"
command = ["/bin/sh", "-c", "apk add --update curl && curl --connect-timeout 2 prometheus-server.monitoring.svc.cluster.local || curl https://webhook.viktorbarzin.me/fb/message-viktor -d 'Prometheus is down!'"]
}
}
}
}
}
}
}
resource "kubernetes_manifest" "status_redirect_middleware" {
manifest = {
apiVersion = "traefik.io/v1alpha1"
kind = "Middleware"
metadata = {
name = "status-redirect"
namespace = kubernetes_namespace.monitoring.metadata[0].name
}
spec = {
redirectRegex = {
regex = ".*"
replacement = "https://hetrixtools.com/r/38981b548b5d38b052aca8d01285a3f3/"
permanent = true
}
}
}
}
resource "kubernetes_ingress_v1" "status" {
metadata {
name = "hetrix-redirect-ingress"
namespace = kubernetes_namespace.monitoring.metadata[0].name
annotations = {
"traefik.ingress.kubernetes.io/router.middlewares" = "monitoring-status-redirect@kubernetescrd"
"traefik.ingress.kubernetes.io/router.entrypoints" = "websecure"
}
}
spec {
ingress_class_name = "traefik"
tls {
hosts = ["status.viktorbarzin.me"]
secret_name = var.tls_secret_name
}
rule {
host = "status.viktorbarzin.me"
http {
path {
path = "/"
backend {
service {
name = "not-used"
port {
number = 80 # redirected by middleware
}
}
}
}
}
}
}
}
resource "kubernetes_manifest" "yotovski_redirect_middleware" {
manifest = {
apiVersion = "traefik.io/v1alpha1"
kind = "Middleware"
metadata = {
name = "yotovski-redirect"
namespace = kubernetes_namespace.monitoring.metadata[0].name
}
spec = {
redirectRegex = {
regex = ".*"
replacement = "https://hetrixtools.com/r/2ba9d7a5e017794db0fd91f0115a8b3b/"
permanent = true
}
}
}
}
resource "kubernetes_ingress_v1" "status_yotovski" {
metadata {
name = "hetrix-yotovski-redirect-ingress"
namespace = kubernetes_namespace.monitoring.metadata[0].name
annotations = {
"traefik.ingress.kubernetes.io/router.middlewares" = "monitoring-yotovski-redirect@kubernetescrd"
"traefik.ingress.kubernetes.io/router.entrypoints" = "websecure"
}
}
spec {
ingress_class_name = "traefik"
tls {
hosts = ["yotovski-status.viktorbarzin.me"]
secret_name = var.tls_secret_name
}
rule {
host = "yotovski-status.viktorbarzin.me"
http {
path {
path = "/"
backend {
service {
name = "not-used" # redirected by middleware
port {
number = 80
}
}
}
}
}
}
}
}
# Custom ResourceQuota for monitoring larger than the default 1-cluster tier quota
# because monitoring runs 29+ pods (Prometheus, Grafana, Loki, Alloy, exporters, etc.)
resource "kubernetes_resource_quota" "monitoring" {
metadata {
name = "monitoring-quota"
namespace = kubernetes_namespace.monitoring.metadata[0].name
}
spec {
hard = {
"requests.cpu" = "16"
"requests.memory" = "16Gi"
"limits.cpu" = "80"
"limits.memory" = "160Gi"
pods = "100"
}
}
}

View file

@ -0,0 +1,58 @@
resource "kubernetes_persistent_volume_claim" "prometheus_server_pvc" {
metadata {
name = "prometheus-iscsi-pvc"
namespace = kubernetes_namespace.monitoring.metadata[0].name
}
spec {
access_modes = ["ReadWriteOnce"]
resources {
requests = {
storage = "15Gi"
}
}
# storage_class_name = "standard"
volume_name = "prometheus-iscsi-pv"
}
}
resource "kubernetes_persistent_volume" "prometheus_server_pvc" {
metadata {
name = "prometheus-iscsi-pv"
}
spec {
capacity = {
storage = "15Gi"
}
access_modes = ["ReadWriteOnce"]
persistent_volume_source {
nfs {
path = "/mnt/main/prometheus"
server = "10.0.10.15"
}
# iscsi {
# fs_type = "ext4"
# iqn = "iqn.2020-12.lan.viktorbarzin:storage:monitoring:prometheus"
# lun = 0
# target_portal = "iscsi.viktorbarzin.me:3260"
# }
}
persistent_volume_reclaim_policy = "Retain"
volume_mode = "Filesystem"
}
}
resource "helm_release" "prometheus" {
namespace = kubernetes_namespace.monitoring.metadata[0].name
create_namespace = true
name = "prometheus"
repository = "https://prometheus-community.github.io/helm-charts"
chart = "prometheus"
# version = "15.0.2"
version = "25.8.2"
values = [templatefile("${path.module}/prometheus_chart_values.tpl", { alertmanager_mail_pass = var.alertmanager_account_password, alertmanager_slack_api_url = var.alertmanager_slack_api_url, tuya_api_key = var.tiny_tuya_service_secret, haos_api_token = var.haos_api_token })]
}

View file

@ -0,0 +1,815 @@
# Helm values
# all values - https://github.com/prometheus-community/helm-charts/blob/main/charts/prometheus/values.yaml
alertmanager:
persistentVolume:
enabled: true
existingClaim: alertmanager-pvc
#existingClaim: alertmanager-iscsi-pvc
# storageClass: rook-cephfs
strategy:
type: Recreate
baseURL: "https://alertmanager.viktorbarzin.me"
ingress:
enabled: true
ingressClassName: "traefik"
annotations:
traefik.ingress.kubernetes.io/router.middlewares: "traefik-rate-limit@kubernetescrd,traefik-csp-headers@kubernetescrd,traefik-crowdsec@kubernetescrd,traefik-authentik-forward-auth@kubernetescrd"
traefik.ingress.kubernetes.io/router.entrypoints: "websecure"
tls:
- secretName: "tls-secret"
hosts:
- "alertmanager.viktorbarzin.me"
hosts:
# - alertmanager.viktorbarzin.me
- host: alertmanager.viktorbarzin.me
paths:
- path: /
pathType: Prefix
serviceName: prometheus-server
servicePort: 80
config:
enabled: true
global:
smtp_from: "alertmanager@viktorbarzin.me"
# smtp_smarthost: "smtp.viktorbarzin.me:587"
smtp_smarthost: "mailserver.mailserver.svc.cluster.local:587"
smtp_auth_username: "alertmanager@viktorbarzin.me"
smtp_auth_password: "${alertmanager_mail_pass}"
smtp_require_tls: true
slack_api_url: "${alertmanager_slack_api_url}"
# templates:
# - "/etc/alertmanager/template/*.tmpl"
route:
group_by: ["alertname"]
group_wait: 30s
group_interval: 5m
repeat_interval: 4h
receiver: slack-warning
routes:
- receiver: slack-critical
group_wait: 10s
group_interval: 1m
repeat_interval: 1h
matchers:
- severity = critical
continue: false
- receiver: slack-info
group_wait: 5m
group_interval: 30m
repeat_interval: 12h
matchers:
- severity = info
continue: false
inhibit_rules:
# Node down makes node-condition alerts redundant
- source_matchers:
- alertname = NodeDown
target_matchers:
- alertname =~ "NodeNotReady|NodeConditionBad"
# Traefik down makes service-level alerts noise
- source_matchers:
- alertname = TraefikDown
target_matchers:
- alertname =~ "HighServiceErrorRate|HighService4xxRate|HighServiceLatency|TraefikHighOpenConnections"
# Power outage makes on-battery alert redundant
- source_matchers:
- alertname = PowerOutage
target_matchers:
- alertname = OnBattery
receivers:
- name: slack-critical
slack_configs:
- send_resolved: true
channel: "#alerts"
color: '{{ if eq .Status "firing" }}danger{{ else }}good{{ end }}'
title: '{{ if eq .Status "firing" }}[CRITICAL]{{ else }}[RESOLVED]{{ end }} {{ .GroupLabels.alertname }} ({{ .Alerts | len }})'
text: '{{ range .Alerts }}{{ .Annotations.summary }}{{ "\n" }}{{ end }}'
- name: slack-warning
slack_configs:
- send_resolved: true
channel: "#alerts"
color: '{{ if eq .Status "firing" }}warning{{ else }}good{{ end }}'
title: '{{ if eq .Status "firing" }}[WARNING]{{ else }}[RESOLVED]{{ end }} {{ .GroupLabels.alertname }} ({{ .Alerts | len }})'
text: '{{ range .Alerts }}{{ .Annotations.summary }}{{ "\n" }}{{ end }}'
- name: slack-info
slack_configs:
- send_resolved: true
channel: "#alerts"
color: '{{ if eq .Status "firing" }}#439FE0{{ else }}good{{ end }}'
title: '[INFO] {{ .GroupLabels.alertname }}'
text: '{{ range .Alerts }}{{ .Annotations.summary }}{{ "\n" }}{{ end }}'
# web.external-url seems to be hardcoded, edited deployment manually
# extraArgs:
# web.external-url: "https://prometheus.viktorbarzin.me"
# prometheus-node-exporter:
# enabled: true
server:
# Enable me to delete metrics
extraFlags:
# - "web.enable-admin-api"
- "web.enable-lifecycle"
- "storage.tsdb.allow-overlapping-blocks"
- "storage.tsdb.retention.size=45GB"
- "storage.tsdb.wal-compression"
persistentVolume:
# enabled: false
existingClaim: prometheus-iscsi-pvc
# storageClass: rook-cephfs
retention: "52w"
strategy:
type: Recreate
baseURL: "https://prometheus.viktorbarzin.me"
extraVolumes:
- name: prometheus-wal-tmpfs
emptyDir:
medium: Memory
sizeLimit: 2Gi
# 2. Mount it over the WAL directory
extraVolumeMounts:
- name: prometheus-wal-tmpfs
mountPath: /data/wal # Standard path for the chart
ingress:
enabled: true
ingressClassName: "traefik"
annotations:
traefik.ingress.kubernetes.io/router.middlewares: "traefik-rate-limit@kubernetescrd,traefik-csp-headers@kubernetescrd,traefik-crowdsec@kubernetescrd,traefik-authentik-forward-auth@kubernetescrd"
traefik.ingress.kubernetes.io/router.entrypoints: "websecure"
gethomepage.dev/enabled: "true"
gethomepage.dev/description: "Prometheus"
gethomepage.dev/icon: "prometheus.png"
gethomepage.dev/name: "Prometheus"
gethomepage.dev/widget.type: "prometheus"
gethomepage.dev/widget.url: "http://prometheus-server.monitoring.svc.cluster.local:80"
gethomepage.dev/pod-selector: ""
tls:
- secretName: "tls-secret"
hosts:
- "prometheus.viktorbarzin.me"
hosts:
- "prometheus.viktorbarzin.me"
alertmanagers:
- static_configs:
- targets:
- "prometheus-alertmanager.monitoring.svc.cluster.local:9093"
# - "alertmanager.viktorbarzin.me"
tls_config:
insecure_skip_verify: true
serverFiles:
# prometheus.yml:
# storage:
# tsdb:
# # no_lockfile: true
# # max_blocks_in_cache: 100000
# # max_lookback_duration: 0s
# # min_block_duration: 2h
# # retention: 15d
# # chunk_encoding: 1
# # chunk_range: 1h
# # max_chunks_to_persist: 4800
# # chunks_to_persist: 4800
# cache:
# entries: 5000
# head:
# chunk_bytes: 1048576
# # wal:
# # compressions: 1
# # flush_after_seconds: 30
# # segment_size: 1073741824
# series_file:
# # no_sync: true
# # max_concurrent_writes: 256
# # block_size: 262144
# cache:
# max_size: 1073741824
# alertingaaa:
# alertmanagers:
# - static_configs:
# targets: "alertmanager.viktorbarzin.lan"
alerting_rules.yml:
groups:
- name: R730 Host
rules:
- alert: HighCPUTemperature
expr: node_hwmon_temp_celsius{instance="pve-node-r730"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance="pve-node-r730"} > 75
for: 30m
labels:
severity: warning
annotations:
summary: "CPU temp: {{ $value | printf \"%.0f\" }}°C (threshold: 75°C)"
- alert: SSDHighWriteRate
expr: rate(node_disk_written_bytes_total{job="proxmox-host", device="sdb"}[2m]) / 1024 / 1024 > 2 # sdb is SSD; value in MB
for: 10m
labels:
severity: info
annotations:
summary: "SSD write rate: {{ $value | printf \"%.1f\" }} MB/s (threshold: 2 MB/s)"
- alert: HDDHighWriteRate
expr: rate(node_disk_written_bytes_total{job="proxmox-host", device="sdc"}[2m]) / 1024 / 1024 > 10 # sdc is 11TB HDD; value in MB
for: 20m
labels:
severity: info
annotations:
summary: "HDD write rate: {{ $value | printf \"%.1f\" }} MB/s (threshold: 10 MB/s)"
- alert: NoiDRACData
expr: (max(r730_idrac_idrac_system_health + 1) or on() vector(0)) == 0
for: 30m
labels:
severity: info
annotations:
summary: "No iDRAC data for 30m - check Prometheus scraping"
- alert: HighSystemLoad
expr: scalar(node_load1{instance="pve-node-r730"}) * 100 / count(count(node_cpu_seconds_total{instance="pve-node-r730"}) by (cpu)) > 50
for: 30m
labels:
severity: info
annotations:
summary: "System load: {{ $value | printf \"%.0f\" }}% (threshold: 50%)"
- alert: FanFailure
expr: r730_idrac_redfish_chassis_fan_health != 1
for: 5m
labels:
severity: warning
annotations:
summary: "Fan unhealthy on R730 - check iDRAC"
- name: Nvidia Tesla T4 GPU
rules:
- alert: HighGPUTemp
expr: nvidia_tesla_t4_DCGM_FI_DEV_GPU_TEMP > 65
for: 1m
labels:
severity: warning
annotations:
summary: "GPU temp: {{ $value | printf \"%.0f\" }}°C (threshold: 65°C)"
- alert: HighPowerUsage
expr: nvidia_tesla_t4_DCGM_FI_DEV_POWER_USAGE > 50
for: 30m
labels:
severity: info
annotations:
summary: "GPU power: {{ $value | printf \"%.0f\" }}W (threshold: 50W)"
- alert: HighUtilization
expr: nvidia_tesla_t4_DCGM_FI_DEV_GPU_UTIL > 50
for: 30m
labels:
severity: info
annotations:
summary: "GPU util: {{ $value | printf \"%.0f\" }}% (threshold: 50%)"
- alert: HighMemoryUsage
expr: nvidia_tesla_t4_DCGM_FI_DEV_FB_USED / 1024 > 12
for: 5m
labels:
severity: info
annotations:
summary: "VRAM used: {{ $value | printf \"%.1f\" }} GB (threshold: 12 GB)"
- name: Power
rules:
- alert: OnBattery
expr: ups_upsSecondsOnBattery > 0
for: 30m
labels:
severity: critical
annotations:
summary: "UPS on battery: {{ $value | printf \"%.0f\" }}s"
- alert: LowUPSBattery
expr: ups_upsEstimatedMinutesRemaining < 25 and on(instance) ups_upsInputVoltage < 150
for: 1m
labels:
severity: critical
annotations:
summary: "UPS battery low: {{ $value | printf \"%.0f\" }} min remaining (threshold: 25 min)"
- alert: PowerOutage
expr: ups_upsInputVoltage < 150
labels:
severity: critical
annotations:
summary: "Power outage - input voltage: {{ $value | printf \"%.0f\" }}V (threshold: <150V)"
- alert: HighPowerUsage
expr: r730_idrac_idrac_power_control_consumed_watts > 200
for: 60m
labels:
severity: info
annotations:
summary: "Server power: {{ $value | printf \"%.0f\" }}W (threshold: 200W)"
- alert: UsingInverterEnergyForTooLong
expr: automatic_transfer_switch_power_mode > 0 # 1 = Inverter; 0 = Grid
for: 24h
labels:
severity: info
annotations:
summary: "On inverter for >24h - check grid switchover"
- name: Storage
rules:
- alert: NodeFilesystemFull
expr: (node_filesystem_avail_bytes{fstype!~"tmpfs|fuse.*"} / node_filesystem_size_bytes) * 100 < 10
for: 15m
labels:
severity: warning
annotations:
summary: "Disk {{ $labels.mountpoint }} on {{ $labels.instance }}: {{ $value | printf \"%.1f\" }}% free (threshold: 10%)"
- alert: PVFillingUp
expr: (kubelet_volume_stats_used_bytes / kubelet_volume_stats_capacity_bytes) * 100 > 85
for: 30m
labels:
severity: warning
annotations:
summary: "PV {{ $labels.persistentvolumeclaim }} in {{ $labels.namespace }}: {{ $value | printf \"%.0f\" }}% used (threshold: 85%)"
- name: K8s Health
rules:
- alert: PodCrashLooping
expr: increase(kube_pod_container_status_restarts_total[1h]) > 5
for: 10m
labels:
severity: warning
annotations:
summary: "{{ $labels.namespace }}/{{ $labels.pod }}: {{ $value | printf \"%.0f\" }} restarts in 1h"
- alert: ContainerOOMKilled
expr: increase(container_oom_events_total{container!=""}[15m]) > 0
labels:
severity: warning
annotations:
summary: "{{ $labels.namespace }}/{{ $labels.pod }}/{{ $labels.container }}: OOM killed"
- alert: NodeNotReady
expr: kube_node_status_condition{condition="Ready",status="true"} == 0
for: 5m
labels:
severity: critical
annotations:
summary: "Node {{ $labels.node }} is NotReady"
- alert: NodeConditionBad
expr: kube_node_status_condition{condition=~"MemoryPressure|DiskPressure|PIDPressure",status="true"} == 1
for: 5m
labels:
severity: warning
annotations:
summary: "Node {{ $labels.node }}: {{ $labels.condition }}"
- alert: JobFailed
expr: kube_job_status_failed > 0
for: 15m
labels:
severity: warning
annotations:
summary: "Job {{ $labels.namespace }}/{{ $labels.job_name }}: {{ $value | printf \"%.0f\" }} failure(s)"
- name: Infrastructure Health
rules:
- alert: HomeAssistantDown
expr: up{job="haos"} == 0
for: 5m
labels:
severity: warning
annotations:
summary: "Home Assistant down: {{ $labels.instance }}"
- alert: CoreDNSErrors
expr: rate(coredns_dns_responses_total{rcode="SERVFAIL"}[5m]) > 1
for: 10m
labels:
severity: warning
annotations:
summary: "CoreDNS SERVFAIL rate: {{ $value | printf \"%.1f\" }}/s (threshold: 1/s)"
- alert: ScrapeTargetDown
expr: up{job!~"istiod|envoy-stats|openwrt"} == 0
for: 15m
labels:
severity: warning
annotations:
summary: "Scrape target down: {{ $labels.job }}/{{ $labels.instance }}"
- alert: PrometheusStorageFull
expr: (prometheus_tsdb_storage_blocks_bytes / (1024*1024*1024)) > 50
for: 30m
labels:
severity: warning
annotations:
summary: "Prometheus TSDB: {{ $value | printf \"%.0f\" }} GiB (threshold: 50 GiB)"
- alert: PrometheusNotificationsFailing
expr: rate(prometheus_notifications_errors_total[5m]) > 0
for: 10m
labels:
severity: warning
annotations:
summary: "Prometheus notification errors: {{ $value | printf \"%.2f\" }}/s"
- name: Cluster
rules:
- alert: NodeDown
expr: (up{job="kubernetes-nodes"} or on() vector(0)) == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Node down: {{ $labels.instance }}"
- alert: DockerRegistryDown
expr: (registry_process_start_time_seconds or on() vector(0)) == 0
for: 10m
labels:
severity: warning
annotations:
summary: "Docker registry down for 10m"
- alert: RegistryLowCacheHitRate
expr: (sum by (job) (rate(registry_registry_storage_cache_total{type="Hit"}[15m]))) / (sum by (job) (rate(registry_registry_storage_cache_total{type="Request"}[15m]))) * 100 < 25
for: 12h
labels:
severity: info
annotations:
summary: "Registry cache hit rate: {{ $value | printf \"%.0f\" }}% (threshold: 25%)"
- alert: NodeHighCPUUsage
expr: pve_cpu_usage_ratio * 100 > 30
for: 6h
labels:
severity: info
annotations:
summary: "CPU usage on {{ $labels.node }}: {{ $value | printf \"%.0f\" }}% (threshold: 30%)"
- alert: NodeLowFreeMemory
expr: ((1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) or on() vector(1)) * 100 > 95
for: 10m
labels:
severity: warning
annotations:
summary: "Memory usage on {{ $labels.node }}: {{ $value | printf \"%.0f\" }}% (threshold: 95%)"
# - name: PodStuckNotReady
# rules:
# - alert: PodStuckNotReady
# expr: kube_pod_status_ready{condition="true"} == 0
# for: 5m
# labels:
# severity: page
# annotations:
# summary: Pod stuck not ready.
- alert: DeploymentReplicasMismatch
expr: |
(
kube_deployment_spec_replicas
- on(namespace, deployment) kube_deployment_status_replicas_available
) > 0
for: 15m
labels:
severity: warning
annotations:
summary: "{{ $labels.namespace }}/{{ $labels.deployment }}: {{ $value | printf \"%.0f\" }} replica(s) unavailable"
- alert: StatefulSetReplicasMismatch
expr: |
(
kube_statefulset_replicas
- on(namespace, statefulset) kube_statefulset_status_replicas_ready
) > 0
for: 15m
labels:
severity: warning
annotations:
summary: "{{ $labels.namespace }}/{{ $labels.statefulset }}: {{ $value | printf \"%.0f\" }} replica(s) unavailable"
- alert: DaemonSetMissingPods
expr: |
(
kube_daemonset_status_desired_number_scheduled
- on(namespace, daemonset) kube_daemonset_status_number_ready
) > 0
for: 15m
labels:
severity: warning
annotations:
summary: "{{ $labels.namespace }}/{{ $labels.daemonset }}: {{ $value | printf \"%.0f\" }} pod(s) missing"
- alert: NoNodeLoadData
expr: (node_load1 OR on() vector(0)) == 0
for: 10m
labels:
severity: info
annotations:
summary: "No node load data for 10m - check Prometheus scraping"
- name: "Traefik Ingress"
rules:
- alert: TraefikDown
expr: up{job="traefik"} == 0
for: 2m
labels:
severity: critical
annotations:
summary: "Traefik pod {{ $labels.instance }} is down"
- alert: HighServiceErrorRate
expr: |
(
sum(rate(traefik_service_requests_total{code=~"5.."}[5m])) by (service)
/ sum(rate(traefik_service_requests_total[5m])) by (service)
* 100
) > 10
and sum(rate(traefik_service_requests_total[5m])) by (service) > 0.1
for: 5m
labels:
severity: warning
annotations:
summary: "5xx rate on {{ $labels.service }}: {{ $value | printf \"%.1f\" }}% (threshold: 10%)"
- alert: HighService4xxRate
expr: |
(
sum(rate(traefik_service_requests_total{code=~"4..", service!~".*nextcloud.*|.*grafana.*"}[5m])) by (service)
/ sum(rate(traefik_service_requests_total{service!~".*nextcloud.*|.*grafana.*"}[5m])) by (service)
* 100
) > 30
and sum(rate(traefik_service_requests_total{service!~".*nextcloud.*|.*grafana.*"}[5m])) by (service) > 0.1
for: 10m
labels:
severity: warning
annotations:
summary: "4xx rate on {{ $labels.service }}: {{ $value | printf \"%.1f\" }}% (threshold: 30%)"
- alert: HighServiceLatency
expr: |
histogram_quantile(0.99,
sum(rate(traefik_service_request_duration_seconds_bucket[5m])) by (service, le)
) > 10
for: 5m
labels:
severity: warning
annotations:
summary: "p99 latency on {{ $labels.service }}: {{ $value | printf \"%.1f\" }}s (threshold: 10s)"
- alert: TLSCertExpiringSoon
expr: (traefik_tls_certs_not_after - time()) / 86400 < 7
for: 1h
labels:
severity: warning
annotations:
summary: "TLS cert {{ $labels.cn }} expires in {{ $value | printf \"%.0f\" }} days"
- alert: TraefikHighOpenConnections
expr: sum(traefik_service_open_connections) by (service) > 500
for: 5m
labels:
severity: warning
annotations:
summary: "{{ $labels.service }} has {{ $value | printf \"%.0f\" }} open connections (threshold: 500)"
# - alert: OpenWRT High Memory Usage
# expr: 100 - ((openwrt_node_memory_MemAvailable_bytes * 100) / openwrt_node_memory_MemTotal_bytes) > 90
# for: 10m
# labels:
# severity: page
# annotations:
# summary: OpenWRT high memory usage. Can cause services getting stuck.
# - alert: Mail server has no replicas available
# expr: (kube_deployment_status_replicas_available{namespace="mailserver"} or on() vector(0)) < 1
# for: 10m
# labels:
# severity: page
# annotations:
# summary: Mail server has no available replicas. This means mail may not be received.
# - alert: Hackmd has no replicas available
# expr: (kube_deployment_status_replicas_available{namespace="hackmd"} or on() vector(0)) < 1
# for: 1m
# labels:
# severity: page
# annotations:
# summary: Hackmd has no available replicas.
# - alert: Privatebin has no replicas available
# expr: (kube_deployment_status_replicas_available{namespace="privatebin"} or on() vector(0)) < 1
# for: 10m
# labels:
# severity: page
# annotations:
# summary: Privatebin has no available replicas.
# - name: London OpenWRT Down
# rules:
# - alert: OpenWRT client unreachable
# expr: (openwrt_node_openwrt_info or on() vector(0)) == 0
# for: 10m
# labels:
# severity: page
# annotations:
# summary: London OpenWRT router unreachable through VPN
# - alert: OpenWRT high system load
# expr: openwrt_node_load1 > 0.9
# for: 15m
# labels:
# severity: page
# annotations:
# summary: High system load on OpenWRT
# - alert: Finance app webhook exceptions
# expr: changes(webhook_failure_total[5m]) >= 1
# for: 1m
# labels:
# severity: page
# annotations:
# summary: Finance app webhook exceptions
# - alert: Finance app unhandled exceptions
# expr: changes(flask_http_request_exceptions_total[5m]) >= 1
# for: 1m
# labels:
# severity: page
# annotations:
# summary: Finance app unhandled exceptions
- alert: New Tailscale client
expr: irate(headscale_machine_registrations_total{action="reauth"}[5m]) > 0
labels:
severity: info
annotations:
summary: "New Tailscale client registered"
extraScrapeConfigs: |
- job_name: 'proxmox-host'
static_configs:
- targets:
- "192.168.1.127:9100"
labels:
node: 'pve-node-r730'
metrics_path: '/metrics'
relabel_configs:
- source_labels: [__address__]
target_label: instance
replacement: 'pve-node-r730' # Giving it a friendly name
- job_name: 'istiod'
kubernetes_sd_configs:
- role: endpoints
namespaces:
names:
- istio-system
relabel_configs:
- source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: istiod;http-monitoring
- job_name: 'envoy-stats'
metrics_path: /stats/prometheus
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_container_port_name]
action: keep
regex: '.*-envoy-prom'
- job_name: 'crowdsec'
static_configs:
- targets:
- "crowdsec-service.crowdsec.svc.cluster.local:6060"
metrics_path: '/metrics'
- job_name: 'snmp-idrac'
scrape_interval: 1m
scrape_timeout: 45s
static_configs:
- targets:
- "idrac.viktorbarzin.lan.:161"
metrics_path: '/snmp'
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: 'snmp-exporter.monitoring.svc.cluster.local:9116'
metric_relabel_configs:
- source_labels: [ __name__ ]
target_label: '__name__'
action: replace
regex: '(.*)'
replacement: 'r730_idrac_$${1}'
- job_name: 'redfish-idrac'
scrape_interval: 3m
scrape_timeout: 45s
metrics_path: /metrics
static_configs:
- targets:
- 192.168.1.4
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: idrac-redfish-exporter.monitoring.svc.cluster.local:9090
metric_relabel_configs:
- source_labels: [ __name__ ]
target_label: '__name__'
action: replace
regex: '(.*)'
replacement: 'r730_idrac_$${1}'
- job_name: 'openwrt'
static_configs:
- targets:
#- "home.viktorbarzin.lan:9100"
#- "10.0.20.100:9100"
- "192.168.2.1:9100"
metrics_path: '/metrics'
#relabel_configs:
# - source_labels: [__address__]
# target_label: __param_target
# - source_labels: [__param_target]
# target_label: instance
# - target_label: __address__
# #replacement: 'home.viktorbarzin.lan:9100'
# #replacement: '10.0.20.100:9100'
metric_relabel_configs:
- source_labels: [ __name__ ]
target_label: '__name__'
action: replace
regex: '(.*)'
replacement: 'openwrt_$${1}'
- job_name: 'snmp-ups'
params:
module: [huawei]
static_configs:
- targets:
- "ups.viktorbarzin.lan.:161"
metrics_path: '/snmp'
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: 'snmp-exporter.monitoring.svc.cluster.local:9116'
metric_relabel_configs:
- source_labels: [ __name__ ]
target_label: '__name__'
action: replace
regex: '(.*)'
replacement: 'ups_$${1}'
- job_name: 'registry'
static_configs:
- targets:
#- "192.168.1.10:5001" # rpi
#- "10.0.10.10:5001" # devvm
- "10.0.20.10:5001" # registry-vm
metrics_path: '/metrics'
metric_relabel_configs:
- source_labels: [ __name__ ]
target_label: '__name__'
action: replace
regex: '(.*)'
replacement: 'registry_$${1}'
- job_name: 'automatic-transfer-switch'
static_configs:
- targets:
- "tuya-bridge.tuya-bridge.svc.cluster.local:80"
metrics_path: '/metrics/bfe98afa941d5a1e2def8s'
params:
api-key: ['${tuya_api_key}']
metric_relabel_configs:
- source_labels: [ __name__ ]
target_label: '__name__'
action: replace
regex: '(.*)'
replacement: 'automatic_transfer_switch_$${1}'
- job_name: 'fuse-garage'
static_configs:
- targets:
- "tuya-bridge.tuya-bridge.svc.cluster.local:80"
metrics_path: '/metrics/bf62301ef04e38d881ugcu'
params:
api-key: ['${tuya_api_key}']
metric_relabel_configs:
- source_labels: [ __name__ ]
target_label: '__name__'
action: replace
regex: '(.*)'
replacement: 'fuse_garage_$${1}'
- job_name: 'fuse-main'
static_configs:
- targets:
- "tuya-bridge.tuya-bridge.svc.cluster.local:80"
metrics_path: '/metrics/bf1a684e80ae942e4dji6b'
params:
api-key: ['${tuya_api_key}']
metric_relabel_configs:
- source_labels: [ __name__ ]
target_label: '__name__'
action: replace
regex: '(.*)'
replacement: 'fuse_main_$${1}'
- job_name: 'haos'
static_configs:
- targets:
- "ha-sofia.viktorbarzin.lan.:8123"
metrics_path: '/api/prometheus'
bearer_token: "${haos_api_token}"
- job_name: 'nvidia'
static_configs:
- targets:
- "nvidia-exporter.nvidia.svc.cluster.local"
metrics_path: '/metrics'
metric_relabel_configs:
- source_labels: [ __name__ ]
target_label: '__name__'
action: replace
regex: '(.*)'
replacement: 'nvidia_tesla_t4_$${1}'
- job_name: 'gpu-pod-memory'
static_configs:
- targets:
- "gpu-pod-exporter.nvidia.svc.cluster.local"
metrics_path: '/metrics'
- job_name: 'traefik'
kubernetes_sd_configs:
- role: pod
namespaces:
names:
- traefik
relabel_configs:
- source_labels: [__meta_kubernetes_pod_container_port_name]
action: keep
regex: metrics
- source_labels: [__meta_kubernetes_pod_name]
target_label: instance
- job_name: 'realestate-crawler-api'
static_configs:
- targets:
- "realestate-crawler-api.realestate-crawler.svc.cluster.local:80"
metrics_path: '/metrics'
- job_name: 'realestate-crawler-celery'
static_configs:
- targets:
- "realestate-crawler-celery-metrics.realestate-crawler.svc.cluster.local:9090"
metrics_path: '/metrics'

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,106 @@
resource "kubernetes_secret" "pve_exporter_config" {
metadata {
name = "pve-exporter-config"
namespace = kubernetes_namespace.monitoring.metadata[0].name
}
data = {
"pve.yml" = <<-EOF
default:
user: "root@pam"
password: ${var.pve_password}
verify_ssl: false
timeout: 30
EOF
}
}
resource "kubernetes_deployment" "pve_exporter" {
metadata {
name = "proxmox-exporter"
namespace = kubernetes_namespace.monitoring.metadata[0].name
labels = {
tier = var.tier
}
}
spec {
replicas = 1
selector {
match_labels = {
app = "proxmox-exporter"
}
}
template {
metadata {
labels = {
app = "proxmox-exporter"
}
}
spec {
container {
name = "proxmox-exporter"
image = "prompve/prometheus-pve-exporter:latest"
port {
container_port = 9221
}
# Mount the file into the container
volume_mount {
name = "config-volume"
mount_path = "/etc/prometheus"
read_only = true
}
}
volume {
name = "config-volume"
secret {
secret_name = kubernetes_secret.pve_exporter_config.metadata[0].name
items {
key = "pve.yml"
path = "pve.yml" # This results in /etc/prometheus/pve.yml
}
}
}
}
}
}
}
resource "kubernetes_service" "proxmox-exporter" {
metadata {
name = "proxmox-exporter"
namespace = kubernetes_namespace.monitoring.metadata[0].name
labels = {
"app" = "proxmox-exporter"
}
annotations = {
"prometheus.io/scrape" = "true"
"prometheus.io/port" = 9221
"prometheus.io/path" = "/pve"
"prometheus.io/param_target" = "192.168.1.127"
"prometheus.io/param_node" = "1"
"prometheus.io/param_cluster" = "1"
}
}
spec {
selector = {
"app" = "proxmox-exporter"
}
port {
name = "http"
port = 9221
target_port = 9221
}
}
}
# To monitor the pve node, use the node exporter and the playbook in this repo. from the root run:
# ansible-playbook -i ./playbooks/inventory.ini ./playbooks/deploy_node_exporter.yaml
# This installs the exporter binary

View file

@ -0,0 +1,51 @@
import asyncio
import logging
import os
import signal
import sys
import time
import aiohttp
iDRAC_HOST = 'idrac'
iDRAC_USER_ENV_VAR = 'idrac_user'
iDRAC_PASSWORD_ENV_VAR = 'idrac_password'
SHOULD_RUN = True
def signal_handler(sig, frame):
logging.warning(f'signal {sig} received. shutting down gracefully...')
global SHOULD_RUN
SHOULD_RUN = False
time.sleep(60)
sys.exit(0)
async def main() -> None:
# define signal handlers
signal.signal(signal.SIGINT, signal_handler)
user = os.environ.get(iDRAC_USER_ENV_VAR)
if user is None:
logging.critical('missing environment variable for idrac user'
f' please set {iDRAC_USER_ENV_VAR}')
return
password = os.environ.get(iDRAC_PASSWORD_ENV_VAR)
if password is None:
logging.critical('missing environment variable for idrac password'
f' please set {iDRAC_PASSWORD_ENV_VAR}')
return
logging.info('service initiated with credentials')
return await monitor(user, password)
async def monitor(user: str, password: str) -> None:
while SHOULD_RUN:
pass
if __name__ == '__main__':
# abandoned bc server cannot start itself when it's off :/
asyncio.run(main())

View file

@ -0,0 +1,66 @@
#!/bin/sh
tag=server-power-cycle-script
logger -t $tag start $(date '+%F-%R')
if [ -f /tmp/server-power-cycle-lock ]; then
logger -t $tag 'Script already running. exiting'
exit 0
fi
touch /tmp/server-power-cycle-lock
if [ -f /root/server-power-cycle/state.off ]; then
logger -t $tag 'Server state set to off'
while true; do
sleep 60 # sleep 1 minute
logger -t $tag 'Trying to connect to idrac system...'
curl --connect-timeout 5 -s -k -u root:calvin -H"Content-type: application/json" -X GET https://192.168.1.4/redfish/v1/Chassis/System.Embedded.1/Power/PowerSupplies/PSU.Slot.2
if [[ $? -eq 0 ]]; then
logger -t $tag "Connected to idrac, assuming power is back on"
logger -t $tag "Power supply restored, sending power on command"
curl -s -k -u root:calvin -X POST -d '{"Action": "Reset", "ResetType": "On"}' -H"Content-type: application/json" https://192.168.1.4/redfish/v1/Systems/System.Embedded.1/Actions/ComputerSystem.Reset
rm /root/server-power-cycle/state.off
logger -t $tag end $(date '+%F-%R')
rm /tmp/server-power-cycle-lock
exit 0
fi
done
fi
voltage=$(curl -s -k -u root:calvin -H"Content-type: application/json" -X GET https://192.168.1.4/redfish/v1/Chassis/System.Embedded.1/Power/PowerSupplies/PSU.Slot.2 |jq .LineInputVoltage)
# check input voltage on the pwoer supply connected to the outer system
if [[ $voltage -gt 0 ]]; then
logger -t $tag "power supply is on. exiting"
logger -t $tag end $(date '+%F-%R')
rm /tmp/server-power-cycle-lock
exit 0
fi
to_wait=30
echo "Continuously checking power supply for the next $to_wait minutes"
for i in $(seq 30); do
logger -t $tag "Sleeping a minute..Minute $i"
sleep 60
# check input voltage on the pwoer supply connected to the outer system
voltage=$(curl -s -k -u root:calvin -H"Content-type: application/json" -X GET https://192.168.1.4/redfish/v1/Chassis/System.Embedded.1/Power/PowerSupplies/PSU.Slot.2 |jq .LineInputVoltage)
if [[ $voltage -gt 0 ]]; then
logger -t $tag "power supply is on. exiting"
logger -t $tag end $(date '+%F-%R')
rm /tmp/server-power-cycle-lock
exit 0
fi
done
logger -t $tag "Power supply did not come back, sending graceful shutdown signal"
curl -s -k -u root:calvin -X POST -d '{"Action": "Reset", "ResetType": "GracefulShutdown"}' -H"Content-type: application/json" https://192.168.1.4/redfish/v1/Systems/System.Embedded.1/Actions/ComputerSystem.Reset
touch /root/server-power-cycle/state.off
rm /tmp/server-power-cycle-lock
logger -t $tag end $(date '+%F-%R')

View file

@ -0,0 +1,113 @@
/**
1. clone snmp exporter
2. update generator.yaml to include only interesting modules
3. make generate
4. cp snmp.yml to whereever is used
5. scrape service with curl 'http://snmp-exporter.monitoring.svc.cluster.local:9116/snmp?auth=public_v2&module=huawei&target=192.168.1.5%3A161'
generate reference - https://github.com/prometheus/snmp_exporter/tree/main/generator
https://sbcode.net/prometheus/snmp-generate-huawei/
*/
resource "kubernetes_config_map" "snmp-exporter-yaml" {
metadata {
name = "snmp-exporter-yaml"
namespace = kubernetes_namespace.monitoring.metadata[0].name
annotations = {
"reloader.stakater.com/match" = "true"
}
}
data = {
"snmp.yml" = file("${path.module}/ups_snmp_values.yaml")
}
}
resource "kubernetes_deployment" "snmp-exporter" {
metadata {
name = "snmp-exporter"
namespace = kubernetes_namespace.monitoring.metadata[0].name
labels = {
app = "snmp-exporter"
tier = var.tier
}
annotations = {
"reloader.stakater.com/search" = "true"
}
}
spec {
replicas = 1
selector {
match_labels = {
app = "snmp-exporter"
}
}
template {
metadata {
labels = {
app = "snmp-exporter"
}
}
spec {
container {
image = "prom/snmp-exporter"
name = "snmp-exporter"
# command = ["/usr/local/bin/redfish_exporter", "--config.file", "/app/config.yml"]
port {
container_port = 9116
}
volume_mount {
name = "config-volume"
mount_path = "/etc/snmp_exporter/"
}
}
volume {
name = "config-volume"
config_map {
name = "snmp-exporter-yaml"
}
}
}
}
}
}
resource "kubernetes_service" "snmp-exporter" {
metadata {
name = "snmp-exporter"
namespace = kubernetes_namespace.monitoring.metadata[0].name
labels = {
"app" = "snmp-exporter"
}
# annotations = {
# "prometheus.io/scrape" = "true"
# "prometheus.io/path" = "/snmp?auth=Public0&target=tcp%3A%2F%2F192.%3A161"
# "prometheus.io/port" = "9116"
# }
}
spec {
selector = {
"app" = "snmp-exporter"
}
port {
name = "http"
port = "9116"
target_port = "9116"
}
}
}
module "snmp-exporter-ingress" {
source = "../../../../modules/kubernetes/ingress_factory"
namespace = kubernetes_namespace.monitoring.metadata[0].name
name = "snmp-exporter"
root_domain = "viktorbarzin.lan"
tls_secret_name = var.tls_secret_name
allow_local_access_only = true
ssl_redirect = false
port = 9116
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,27 @@
# GPU container
FROM ubuntu
ENV DEBIAN_FRONTEND=noninteractive
# Install Python and pip
RUN apt-get update && \
apt-get install -y --no-install-recommends \
python3 \
python3-pip \
python3-venv
# Deps
RUN apt-get install -y ffmpeg espeak-ng
# Set a working directory
WORKDIR /app
RUN python3 -m venv audiblez && ./audiblez/bin/pip install audiblez
# RUN python3 -m venv audiblez
CMD ["/usr/bin/sleep", "86400"]
# RUN pip install audiblez
# # Default command
# CMD ["/usr/bin/sleep", "86400"]

View file

@ -0,0 +1,654 @@
variable "tls_secret_name" {}
variable "tier" { type = string }
module "tls_secret" {
source = "../../../../modules/kubernetes/setup_tls_secret"
namespace = kubernetes_namespace.nvidia.metadata[0].name
tls_secret_name = var.tls_secret_name
}
resource "kubernetes_namespace" "nvidia" {
metadata {
name = "nvidia"
labels = {
"istio-injection" : "disabled"
tier = var.tier
}
}
}
# Apply GPU taint to ensure only GPU workloads run on GPU node
resource "null_resource" "gpu_node_taint" {
provisioner "local-exec" {
command = "kubectl taint nodes k8s-node1 nvidia.com/gpu=true:NoSchedule --overwrite"
}
# Re-run if namespace changes (proxy for cluster changes)
triggers = {
namespace = kubernetes_namespace.nvidia.metadata[0].name
}
}
# [not needed anymore; part of the chart values] Apply to operator with:
# kubectl patch clusterpolicies.nvidia.com/cluster-policy -n gpu-operator --type merge -p '{"spec": {"devicePlugin": {"config": {"name": "time-slicing-config", "default": "any"}}}}'
resource "kubernetes_config_map" "time_slicing_config" {
metadata {
name = "time-slicing-config"
namespace = kubernetes_namespace.nvidia.metadata[0].name
}
data = {
any = <<-EOF
flags:
migStrategy: none
sharing:
timeSlicing:
renameByDefault: false
failRequestsGreaterThanOne: false
resources:
- name: nvidia.com/gpu
replicas: 100
EOF
}
depends_on = [kubernetes_namespace.nvidia]
}
resource "helm_release" "nvidia-gpu-operator" {
namespace = kubernetes_namespace.nvidia.metadata[0].name
name = "nvidia-gpu-operator"
repository = "https://helm.ngc.nvidia.com/nvidia"
chart = "gpu-operator"
atomic = true
# version = "0.9.3"
timeout = 6000
values = [templatefile("${path.module}/values.yaml", {})]
depends_on = [kubernetes_config_map.time_slicing_config]
}
resource "kubernetes_deployment" "nvidia-exporter" {
metadata {
name = "nvidia-exporter"
namespace = kubernetes_namespace.nvidia.metadata[0].name
labels = {
app = "nvidia-exporter"
tier = var.tier
}
}
spec {
replicas = 1
selector {
match_labels = {
app = "nvidia-exporter"
}
}
template {
metadata {
labels = {
app = "nvidia-exporter"
}
}
spec {
node_selector = {
"gpu" : "true"
}
toleration {
key = "nvidia.com/gpu"
operator = "Equal"
value = "true"
effect = "NoSchedule"
}
container {
image = "nvidia/dcgm-exporter:latest"
name = "nvidia-exporter"
port {
container_port = 9400
}
security_context {
privileged = true
capabilities {
add = ["SYS_ADMIN"]
}
}
resources {
limits = {
"nvidia.com/gpu" = "1"
}
}
}
}
}
}
depends_on = [helm_release.nvidia-gpu-operator]
}
resource "kubernetes_service" "nvidia-exporter" {
metadata {
name = "nvidia-exporter"
namespace = kubernetes_namespace.nvidia.metadata[0].name
labels = {
"app" = "nvidia-exporter"
}
}
spec {
selector = {
app = "nvidia-exporter"
}
port {
name = "http"
port = 80
target_port = 9400
}
}
}
module "ingress" {
source = "../../../../modules/kubernetes/ingress_factory"
namespace = kubernetes_namespace.nvidia.metadata[0].name
name = "nvidia-exporter"
root_domain = "viktorbarzin.lan"
tls_secret_name = var.tls_secret_name
allow_local_access_only = true
ssl_redirect = false
}
# resource "kubernetes_ingress_v1" "nvidia-exporter" {
# metadata {
# name = "nvidia-exporter"
# namespace = kubernetes_namespace.nvidia.metadata[0].name
# annotations = {
# "kubernetes.io/ingress.class" = "nginx"
# "nginx.ingress.kubernetes.io/whitelist-source-range" : "192.168.1.0/24, 10.0.0.0/8"
# "nginx.ingress.kubernetes.io/ssl-redirect" : "false" # used only in LAN
# }
# }
# spec {
# tls {
# hosts = ["nvidia-exporter.viktorbarzin.lan"]
# secret_name = var.tls_secret_name
# }
# rule {
# host = "nvidia-exporter.viktorbarzin.lan"
# http {
# path {
# backend {
# service {
# name = "nvidia-exporter"
# port {
# number = 80
# }
# }
# }
# }
# }
# }
# }
# }
# resource "kubernetes_deployment" "gpu-container" {
# metadata {
# name = "gpu-container"
# namespace = kubernetes_namespace.nvidia.metadata[0].name
# labels = {
# app = "gpu-container"
# }
# }
# spec {
# replicas = 1
# selector {
# match_labels = {
# app = "gpu-container"
# }
# }
# template {
# metadata {
# labels = {
# app = "gpu-container"
# }
# }
# spec {
# node_selector = {
# "gpu" : "true"
# }
# container {
# image = "ubuntu"
# name = "gpu-container"
# command = ["/usr/bin/sleep", "3600"]
# # security_context {
# # privileged = true
# # capabilities {
# # add = ["SYS_ADMIN"]
# # }
# # }
# resources {
# limits = {
# "nvidia.com/gpu" = "1"
# }
# }
# }
# }
# }
# }
# depends_on = [helm_release.nvidia-gpu-operator]
# }
# GPU Pod Memory Exporter - exposes per-pod GPU memory usage as Prometheus metrics
resource "kubernetes_config_map" "gpu_pod_exporter_script" {
metadata {
name = "gpu-pod-exporter-script"
namespace = kubernetes_namespace.nvidia.metadata[0].name
}
data = {
"exporter.py" = <<-EOF
#!/usr/bin/env python3
"""GPU Pod Memory Exporter - Collects per-pod GPU memory usage."""
import subprocess
import time
import re
import os
import json
import urllib.request
import ssl
from http.server import HTTPServer, BaseHTTPRequestHandler
METRICS_PORT = 9401
SCRAPE_INTERVAL = 15
# Kubernetes API configuration
K8S_API = "https://kubernetes.default.svc"
TOKEN_PATH = "/var/run/secrets/kubernetes.io/serviceaccount/token"
CA_PATH = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
# Cache for container ID to pod info mapping
container_cache = {}
cache_refresh_time = 0
CACHE_TTL = 60 # Refresh cache every 60 seconds
def get_k8s_token():
"""Read Kubernetes service account token."""
try:
with open(TOKEN_PATH, 'r') as f:
return f.read().strip()
except:
return None
def refresh_container_cache():
"""Refresh the container ID to pod mapping from Kubernetes API."""
global container_cache, cache_refresh_time
token = get_k8s_token()
if not token:
return
try:
# Create SSL context with K8s CA
ctx = ssl.create_default_context()
if os.path.exists(CA_PATH):
ctx.load_verify_locations(CA_PATH)
# Get all pods on this node
node_name = os.environ.get('NODE_NAME', '')
url = f"{K8S_API}/api/v1/pods?fieldSelector=spec.nodeName={node_name}"
req = urllib.request.Request(url, headers={
'Authorization': f'Bearer {token}',
'Accept': 'application/json'
})
with urllib.request.urlopen(req, context=ctx, timeout=10) as resp:
data = json.loads(resp.read().decode())
new_cache = {}
for pod in data.get('items', []):
pod_name = pod['metadata']['name']
namespace = pod['metadata']['namespace']
# Get container statuses
for status in pod.get('status', {}).get('containerStatuses', []):
container_id = status.get('containerID', '')
# Extract the ID part (e.g., "containerd://abc123..." -> "abc123")
if '://' in container_id:
container_id = container_id.split('://')[-1]
if container_id:
short_id = container_id[:12]
new_cache[short_id] = {
'pod': pod_name,
'namespace': namespace,
'container': status.get('name', 'unknown')
}
container_cache = new_cache
cache_refresh_time = time.time()
print(f"Refreshed container cache: {len(new_cache)} containers")
except Exception as e:
print(f"Error refreshing container cache: {e}")
def get_pod_info(container_id):
"""Look up pod info for a container ID."""
global cache_refresh_time
# Refresh cache if stale
if time.time() - cache_refresh_time > CACHE_TTL:
refresh_container_cache()
return container_cache.get(container_id, {
'pod': 'unknown',
'namespace': 'unknown',
'container': 'unknown'
})
def get_gpu_processes():
"""Run nvidia-smi to get GPU process info."""
try:
result = subprocess.run(
["nvidia-smi", "--query-compute-apps=pid,used_memory,process_name", "--format=csv,noheader,nounits"],
capture_output=True, text=True, timeout=10
)
if result.returncode != 0:
print(f"nvidia-smi error: {result.stderr}")
return []
processes = []
for line in result.stdout.strip().split('\n'):
if not line.strip():
continue
parts = [p.strip() for p in line.split(',')]
if len(parts) >= 3:
pid, memory_mib, process_name = parts[0], parts[1], parts[2]
processes.append({
'pid': pid,
'memory_bytes': int(memory_mib) * 1024 * 1024,
'process_name': process_name
})
return processes
except Exception as e:
print(f"Error running nvidia-smi: {e}")
return []
def get_container_id(pid):
"""Map PID to container ID via cgroup."""
cgroup_path = f"/host_proc/{pid}/cgroup"
try:
with open(cgroup_path, 'r') as f:
for line in f:
# Match container ID patterns (docker, containerd, cri-o)
match = re.search(r'[:/]([a-f0-9]{64})', line)
if match:
return match.group(1)[:12]
match = re.search(r'cri-containerd-([a-f0-9]{64})', line)
if match:
return match.group(1)[:12]
except (FileNotFoundError, PermissionError):
pass
return "host"
# Global metrics storage
current_metrics = []
def collect_metrics():
"""Collect GPU memory metrics."""
global current_metrics
metrics = []
processes = get_gpu_processes()
for proc in processes:
container_id = get_container_id(proc['pid'])
pod_info = get_pod_info(container_id)
metrics.append({
'container_id': container_id,
'pid': proc['pid'],
'process_name': proc['process_name'],
'memory_bytes': proc['memory_bytes'],
'pod': pod_info['pod'],
'namespace': pod_info['namespace'],
'container': pod_info['container']
})
current_metrics = metrics
def format_metrics():
"""Format metrics in Prometheus exposition format."""
lines = [
"# HELP gpu_pod_memory_used_bytes GPU memory used by pod",
"# TYPE gpu_pod_memory_used_bytes gauge"
]
for m in current_metrics:
labels = ','.join([
f'namespace="{m["namespace"]}"',
f'pod="{m["pod"]}"',
f'container="{m["container"]}"',
f'process_name="{m["process_name"]}"',
f'pid="{m["pid"]}"'
])
lines.append(f'gpu_pod_memory_used_bytes{{{labels}}} {m["memory_bytes"]}')
return '\n'.join(lines) + '\n'
class MetricsHandler(BaseHTTPRequestHandler):
def do_GET(self):
if self.path == '/metrics':
content = format_metrics()
self.send_response(200)
self.send_header('Content-Type', 'text/plain; charset=utf-8')
self.end_headers()
self.wfile.write(content.encode())
elif self.path == '/health':
self.send_response(200)
self.end_headers()
self.wfile.write(b'ok')
else:
self.send_response(404)
self.end_headers()
def log_message(self, format, *args):
pass # Suppress request logging
def background_collector():
"""Background thread to collect metrics periodically."""
import threading
def run():
while True:
collect_metrics()
time.sleep(SCRAPE_INTERVAL)
thread = threading.Thread(target=run, daemon=True)
thread.start()
if __name__ == '__main__':
print(f"Starting GPU Pod Memory Exporter on port {METRICS_PORT}")
refresh_container_cache() # Initial cache load
collect_metrics() # Initial collection
background_collector()
server = HTTPServer(('', METRICS_PORT), MetricsHandler)
server.serve_forever()
EOF
}
}
resource "kubernetes_service_account" "gpu_pod_exporter" {
metadata {
name = "gpu-pod-exporter"
namespace = kubernetes_namespace.nvidia.metadata[0].name
}
}
resource "kubernetes_cluster_role" "gpu_pod_exporter" {
metadata {
name = "gpu-pod-exporter"
}
rule {
api_groups = [""]
resources = ["pods"]
verbs = ["list"]
}
}
resource "kubernetes_cluster_role_binding" "gpu_pod_exporter" {
metadata {
name = "gpu-pod-exporter"
}
role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "ClusterRole"
name = kubernetes_cluster_role.gpu_pod_exporter.metadata[0].name
}
subject {
kind = "ServiceAccount"
name = kubernetes_service_account.gpu_pod_exporter.metadata[0].name
namespace = kubernetes_namespace.nvidia.metadata[0].name
}
}
resource "kubernetes_daemonset" "gpu_pod_exporter" {
metadata {
name = "gpu-pod-exporter"
namespace = kubernetes_namespace.nvidia.metadata[0].name
labels = {
app = "gpu-pod-exporter"
tier = var.tier
}
}
spec {
selector {
match_labels = {
app = "gpu-pod-exporter"
}
}
template {
metadata {
labels = {
app = "gpu-pod-exporter"
}
}
spec {
host_pid = true
service_account_name = kubernetes_service_account.gpu_pod_exporter.metadata[0].name
node_selector = {
"gpu" : "true"
}
toleration {
key = "nvidia.com/gpu"
operator = "Equal"
value = "true"
effect = "NoSchedule"
}
container {
name = "exporter"
image = "python:3.11-slim"
command = ["/bin/bash", "-c"]
args = [
"python3 /scripts/exporter.py"
]
env {
name = "NODE_NAME"
value_from {
field_ref {
field_path = "spec.nodeName"
}
}
}
port {
container_port = 9401
name = "metrics"
}
volume_mount {
name = "scripts"
mount_path = "/scripts"
read_only = true
}
volume_mount {
name = "host-proc"
mount_path = "/host_proc"
read_only = true
}
resources {
requests = {
cpu = "50m"
memory = "128Mi"
}
limits = {
cpu = "200m"
memory = "256Mi"
"nvidia.com/gpu" = "1"
}
}
liveness_probe {
http_get {
path = "/health"
port = 9401
}
initial_delay_seconds = 30
period_seconds = 30
timeout_seconds = 5
}
}
volume {
name = "scripts"
config_map {
name = kubernetes_config_map.gpu_pod_exporter_script.metadata[0].name
default_mode = "0755"
}
}
volume {
name = "host-proc"
host_path {
path = "/proc"
type = "Directory"
}
}
}
}
}
depends_on = [helm_release.nvidia-gpu-operator]
}
resource "kubernetes_service" "gpu_pod_exporter" {
metadata {
name = "gpu-pod-exporter"
namespace = kubernetes_namespace.nvidia.metadata[0].name
labels = {
app = "gpu-pod-exporter"
}
}
spec {
selector = {
app = "gpu-pod-exporter"
}
port {
name = "metrics"
port = 80
target_port = 9401
}
}
}

View file

@ -0,0 +1,27 @@
driver:
enabled: true
# repository: nvcr.io/nvidia/driver
# choose a driver version compatible with your GPU + CUDA 12.x (example)
# NVIDIA GPU driver - https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/platform-support.html#known-issue
# https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/
# 13.x >= 580
# 12.x >= 525, <580
# 11.x >= 450, <525
#
# Delete the cluster policy before each change
# version: "575.57.08" # CUDA 12.9
version: "570.195.03" # CUDA 12.8
upgradePolicy:
autoUpgrade: false
devicePlugin:
config:
name: time-slicing-config
# Tolerate GPU node taint for all GPU operator components
daemonsets:
tolerations:
- key: "nvidia.com/gpu"
operator: "Equal"
value: "true"
effect: "NoSchedule"

View file

@ -0,0 +1,55 @@
# Configure kube-apiserver for OIDC authentication
# This SSHs to k8s-master and adds OIDC flags to the static pod manifest.
# Kubelet auto-restarts the API server when the manifest changes.
variable "k8s_master_host" {
type = string
default = "10.0.20.100"
}
variable "ssh_private_key" {
type = string
sensitive = true
}
variable "oidc_issuer_url" {
type = string
default = "https://authentik.viktorbarzin.me/application/o/kubernetes/"
}
variable "oidc_client_id" {
type = string
default = "kubernetes"
}
resource "null_resource" "apiserver_oidc_config" {
connection {
type = "ssh"
user = "wizard"
host = var.k8s_master_host
private_key = var.ssh_private_key
}
provisioner "remote-exec" {
inline = [
# Check if OIDC flags already present
"if grep -q 'oidc-issuer-url' /etc/kubernetes/manifests/kube-apiserver.yaml; then echo 'OIDC flags already configured'; exit 0; fi",
# Backup the manifest
"sudo cp /etc/kubernetes/manifests/kube-apiserver.yaml /etc/kubernetes/manifests/kube-apiserver.yaml.bak",
# Add OIDC flags after the last --tls-private-key-file flag (safe insertion point)
"sudo sed -i '/- --tls-private-key-file/a\\ - --oidc-issuer-url=${var.oidc_issuer_url}\\n - --oidc-client-id=${var.oidc_client_id}\\n - --oidc-username-claim=email\\n - --oidc-groups-claim=groups' /etc/kubernetes/manifests/kube-apiserver.yaml",
# Wait for API server to restart (kubelet watches the manifest)
"echo 'Waiting for API server to restart...'",
"sleep 30",
"sudo kubectl --kubeconfig=/etc/kubernetes/admin.conf get nodes || echo 'API server still restarting, check manually'",
]
}
triggers = {
oidc_issuer_url = var.oidc_issuer_url
oidc_client_id = var.oidc_client_id
}
}

View file

@ -0,0 +1,95 @@
# Deploy audit policy to k8s-master and configure kube-apiserver to use it.
# Audit logs are written to /var/log/kubernetes/audit.log on the master node.
# Alloy (log collector DaemonSet) will pick them up and ship to Loki.
resource "null_resource" "audit_policy" {
connection {
type = "ssh"
user = "wizard"
host = var.k8s_master_host
private_key = var.ssh_private_key
}
# Upload audit policy file
provisioner "file" {
content = yamlencode({
apiVersion = "audit.k8s.io/v1"
kind = "Policy"
rules = [
{
# Don't log requests to the API discovery endpoints (very noisy)
level = "None"
resources = [{
group = ""
resources = ["endpoints", "services", "services/status"]
}]
users = ["system:kube-proxy"]
},
{
# Don't log watch requests (very noisy)
level = "None"
verbs = ["watch"]
},
{
# Don't log health checks
level = "None"
nonResourceURLs = ["/healthz*", "/readyz*", "/livez*"]
},
{
# Log secret access at Metadata level only (no request/response bodies)
level = "Metadata"
resources = [{
group = ""
resources = ["secrets"]
}]
},
{
# Log all other mutating requests at RequestResponse level
level = "RequestResponse"
verbs = ["create", "update", "patch", "delete"]
},
{
# Log read requests at Metadata level
level = "Metadata"
verbs = ["get", "list"]
},
]
})
destination = "/tmp/audit-policy.yaml"
}
provisioner "remote-exec" {
inline = [
# Move audit policy to proper location
"sudo mkdir -p /etc/kubernetes/policies",
"sudo mv /tmp/audit-policy.yaml /etc/kubernetes/policies/audit-policy.yaml",
"sudo chown root:root /etc/kubernetes/policies/audit-policy.yaml",
# Create audit log directory
"sudo mkdir -p /var/log/kubernetes",
# Check if audit flags already present
"if grep -q 'audit-policy-file' /etc/kubernetes/manifests/kube-apiserver.yaml; then echo 'Audit flags already configured'; exit 0; fi",
# Add audit flags to kube-apiserver manifest
"sudo sed -i '/- --oidc-groups-claim/a\\ - --audit-policy-file=/etc/kubernetes/policies/audit-policy.yaml\\n - --audit-log-path=/var/log/kubernetes/audit.log\\n - --audit-log-maxage=7\\n - --audit-log-maxbackup=3\\n - --audit-log-maxsize=100' /etc/kubernetes/manifests/kube-apiserver.yaml",
# Add volume mount for audit policy (hostPath)
# The kube-apiserver pod needs access to the policy file and log directory
"sudo sed -i '/volumes:/a\\ - hostPath:\\n path: /etc/kubernetes/policies\\n type: DirectoryOrCreate\\n name: audit-policy\\n - hostPath:\\n path: /var/log/kubernetes\\n type: DirectoryOrCreate\\n name: audit-log' /etc/kubernetes/manifests/kube-apiserver.yaml",
"sudo sed -i '/volumeMounts:/a\\ - mountPath: /etc/kubernetes/policies\\n name: audit-policy\\n readOnly: true\\n - mountPath: /var/log/kubernetes\\n name: audit-log' /etc/kubernetes/manifests/kube-apiserver.yaml",
# Wait for API server to restart
"echo 'Waiting for API server to restart with audit logging...'",
"sleep 30",
"sudo kubectl --kubeconfig=/etc/kubernetes/admin.conf get nodes || echo 'API server still restarting'",
]
}
triggers = {
policy_version = "v1" # Bump to re-apply
}
depends_on = [null_resource.apiserver_oidc_config]
}

View file

@ -0,0 +1,252 @@
variable "tls_secret_name" {}
variable "tier" { type = string }
variable "k8s_users" {
type = map(object({
role = string # "admin", "power-user", "namespace-owner"
email = string # OIDC email claim
namespaces = optional(list(string), []) # for namespace-owners
quota = optional(object({
cpu_requests = optional(string, "2")
memory_requests = optional(string, "4Gi")
cpu_limits = optional(string, "4")
memory_limits = optional(string, "8Gi")
pods = optional(string, "20")
}), {})
}))
default = {}
}
# --- Admin role ---
# Binds to built-in cluster-admin ClusterRole
resource "kubernetes_cluster_role_binding" "admin_users" {
for_each = { for name, user in var.k8s_users : name => user if user.role == "admin" }
metadata {
name = "oidc-admin-${each.key}"
}
role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "ClusterRole"
name = "cluster-admin"
}
subject {
kind = "User"
name = each.value.email
api_group = "rbac.authorization.k8s.io"
}
}
# --- Power-user role ---
# Can manage workloads cluster-wide but cannot modify RBAC, nodes, or persistent volumes
resource "kubernetes_cluster_role" "power_user" {
metadata {
name = "oidc-power-user"
}
# Core resources
rule {
api_groups = [""]
resources = ["pods", "pods/log", "pods/exec", "services", "endpoints", "configmaps", "secrets", "persistentvolumeclaims", "events", "namespaces"]
verbs = ["get", "list", "watch"]
}
rule {
api_groups = [""]
resources = ["pods", "services", "configmaps", "secrets", "persistentvolumeclaims"]
verbs = ["create", "update", "patch", "delete"]
}
# Apps
rule {
api_groups = ["apps"]
resources = ["deployments", "statefulsets", "daemonsets", "replicasets"]
verbs = ["get", "list", "watch", "create", "update", "patch", "delete"]
}
# Batch
rule {
api_groups = ["batch"]
resources = ["jobs", "cronjobs"]
verbs = ["get", "list", "watch", "create", "update", "patch", "delete"]
}
# Networking
rule {
api_groups = ["networking.k8s.io"]
resources = ["ingresses", "networkpolicies"]
verbs = ["get", "list", "watch", "create", "update", "patch", "delete"]
}
# Autoscaling
rule {
api_groups = ["autoscaling"]
resources = ["horizontalpodautoscalers"]
verbs = ["get", "list", "watch", "create", "update", "patch", "delete"]
}
# Read-only on cluster-level resources
rule {
api_groups = [""]
resources = ["nodes"]
verbs = ["get", "list", "watch"]
}
rule {
api_groups = ["storage.k8s.io"]
resources = ["storageclasses"]
verbs = ["get", "list", "watch"]
}
rule {
api_groups = ["rbac.authorization.k8s.io"]
resources = ["clusterroles", "clusterrolebindings", "roles", "rolebindings"]
verbs = ["get", "list", "watch"]
}
}
resource "kubernetes_cluster_role_binding" "power_users" {
for_each = { for name, user in var.k8s_users : name => user if user.role == "power-user" }
metadata {
name = "oidc-power-user-${each.key}"
}
role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "ClusterRole"
name = kubernetes_cluster_role.power_user.metadata[0].name
}
subject {
kind = "User"
name = each.value.email
api_group = "rbac.authorization.k8s.io"
}
}
# --- Namespace-owner role ---
# Full admin within assigned namespaces + read-only cluster-wide
locals {
# Flatten user->namespace pairs for iteration
namespace_owner_pairs = flatten([
for name, user in var.k8s_users : [
for ns in user.namespaces : {
user_key = name
namespace = ns
email = user.email
quota = user.quota
}
] if user.role == "namespace-owner"
])
}
resource "kubernetes_role_binding" "namespace_owner" {
for_each = { for pair in local.namespace_owner_pairs : "${pair.user_key}-${pair.namespace}" => pair }
metadata {
name = "namespace-owner-${each.value.user_key}"
namespace = each.value.namespace
}
role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "ClusterRole"
name = "admin" # Built-in ClusterRole with full namespace access
}
subject {
kind = "User"
name = each.value.email
api_group = "rbac.authorization.k8s.io"
}
}
# Read-only cluster-wide access for namespace owners
resource "kubernetes_cluster_role" "namespace_owner_readonly" {
metadata {
name = "oidc-namespace-owner-readonly"
}
rule {
api_groups = [""]
resources = ["namespaces", "nodes"]
verbs = ["get", "list", "watch"]
}
rule {
api_groups = [""]
resources = ["pods", "services", "configmaps", "events"]
verbs = ["get", "list", "watch"]
}
rule {
api_groups = ["apps"]
resources = ["deployments", "statefulsets", "daemonsets"]
verbs = ["get", "list", "watch"]
}
}
resource "kubernetes_cluster_role_binding" "namespace_owner_readonly" {
for_each = { for name, user in var.k8s_users : name => user if user.role == "namespace-owner" }
metadata {
name = "oidc-ns-owner-readonly-${each.key}"
}
role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "ClusterRole"
name = kubernetes_cluster_role.namespace_owner_readonly.metadata[0].name
}
subject {
kind = "User"
name = each.value.email
api_group = "rbac.authorization.k8s.io"
}
}
# Resource quotas per user namespace
resource "kubernetes_resource_quota" "user_namespace_quota" {
for_each = { for pair in local.namespace_owner_pairs : "${pair.user_key}-${pair.namespace}" => pair }
metadata {
name = "user-quota"
namespace = each.value.namespace
}
spec {
hard = {
"requests.cpu" = each.value.quota.cpu_requests
"requests.memory" = each.value.quota.memory_requests
"limits.cpu" = each.value.quota.cpu_limits
"limits.memory" = each.value.quota.memory_limits
"pods" = each.value.quota.pods
}
}
depends_on = [kubernetes_role_binding.namespace_owner]
}
# ConfigMap with user-role mapping for the self-service portal
resource "kubernetes_config_map" "user_roles" {
metadata {
name = "k8s-user-roles"
namespace = "k8s-portal"
}
data = {
"users.json" = jsonencode({
for name, user in var.k8s_users : user.email => {
role = user.role
namespaces = user.namespaces
}
})
}
}

View file

@ -0,0 +1,104 @@
variable "tls_secret_name" {}
variable "tier" { type = string }
resource "kubernetes_namespace" "redis" {
metadata {
name = "redis"
labels = {
tier = var.tier
}
}
}
module "tls_secret" {
source = "../../../../modules/kubernetes/setup_tls_secret"
namespace = kubernetes_namespace.redis.metadata[0].name
tls_secret_name = var.tls_secret_name
}
resource "kubernetes_deployment" "redis" {
metadata {
name = "redis"
namespace = kubernetes_namespace.redis.metadata[0].name
labels = {
app = "redis"
tier = var.tier
}
annotations = {
"reloader.stakater.com/search" = "true"
}
}
spec {
replicas = 1
strategy {
type = "Recreate"
}
selector {
match_labels = {
app = "redis"
}
}
template {
metadata {
labels = {
app = "redis"
}
}
spec {
container {
image = "redis/redis-stack:latest"
name = "redis"
port {
container_port = 6379
}
port {
container_port = 8001
}
volume_mount {
name = "data"
mount_path = "/data"
}
}
volume {
name = "data"
nfs {
path = "/mnt/main/redis"
server = "10.0.10.15"
}
}
}
}
}
}
resource "kubernetes_service" "redis" {
metadata {
name = "redis"
namespace = kubernetes_namespace.redis.metadata[0].name
labels = {
app = "redis"
}
}
spec {
selector = {
app = "redis"
}
port {
name = "redis"
port = 6379
}
port {
name = "http"
port = 8001
}
}
}
module "ingress" {
source = "../../../../modules/kubernetes/ingress_factory"
namespace = kubernetes_namespace.redis.metadata[0].name
name = "redis"
tls_secret_name = var.tls_secret_name
protected = true
port = 8001
}

View file

@ -0,0 +1,163 @@
variable "name" {}
variable "namespace" {
default = "reverse-proxy"
}
variable "external_name" {}
variable "port" {
default = "80"
}
variable "tls_secret_name" {}
variable "backend_protocol" {
default = "HTTP"
}
variable "protected" {
type = bool
default = true
}
variable "ingress_path" {
type = list(string)
default = ["/"]
}
variable "max_body_size" {
type = string
default = "50m"
}
variable "extra_annotations" {
default = {}
}
variable "rybbit_site_id" {
default = null
type = string
}
variable "custom_content_security_policy" {
default = null
type = string
}
variable "strip_auth_headers" {
type = bool
default = false
}
variable "extra_middlewares" {
type = list(string)
default = []
}
resource "kubernetes_service" "proxied-service" {
metadata {
name = var.name
namespace = var.namespace
labels = {
"app" = var.name
}
}
spec {
type = "ExternalName"
external_name = var.external_name
port {
name = var.backend_protocol == "HTTPS" ? "https-${var.name}" : "${var.name}-web"
port = var.port
protocol = "TCP"
target_port = var.port
}
}
}
resource "kubernetes_ingress_v1" "proxied-ingress" {
metadata {
name = var.name
namespace = var.namespace
annotations = merge({
"traefik.ingress.kubernetes.io/router.middlewares" = join(",", compact(concat([
"traefik-rate-limit@kubernetescrd",
var.custom_content_security_policy == null ? "traefik-csp-headers@kubernetescrd" : null,
"traefik-crowdsec@kubernetescrd",
var.protected ? "traefik-authentik-forward-auth@kubernetescrd" : null,
var.strip_auth_headers ? "traefik-strip-auth-headers@kubernetescrd" : null,
var.rybbit_site_id != null ? "traefik-strip-accept-encoding@kubernetescrd" : null,
var.rybbit_site_id != null ? "${var.namespace}-rybbit-analytics-${var.name}@kubernetescrd" : null,
var.custom_content_security_policy != null ? "${var.namespace}-custom-csp-${var.name}@kubernetescrd" : null,
], var.extra_middlewares)))
"traefik.ingress.kubernetes.io/router.entrypoints" = "websecure"
"traefik.ingress.kubernetes.io/service.serversscheme" = var.backend_protocol == "HTTPS" ? "https" : null
"traefik.ingress.kubernetes.io/service.serverstransport" = var.backend_protocol == "HTTPS" ? "traefik-insecure-skip-verify@kubernetescrd" : null
}, var.extra_annotations)
}
spec {
ingress_class_name = "traefik"
tls {
hosts = ["${var.name}.viktorbarzin.me"]
secret_name = var.tls_secret_name
}
rule {
host = "${var.name}.viktorbarzin.me"
http {
dynamic "path" {
for_each = var.ingress_path
content {
path = path.value
backend {
service {
name = var.name
port {
number = var.port
}
}
}
}
}
}
}
}
}
# Rybbit analytics middleware (rewrite-body plugin with content-type filtering) - created per service when rybbit_site_id is set
resource "kubernetes_manifest" "rybbit_analytics" {
count = var.rybbit_site_id != null ? 1 : 0
manifest = {
apiVersion = "traefik.io/v1alpha1"
kind = "Middleware"
metadata = {
name = "rybbit-analytics-${var.name}"
namespace = var.namespace
}
spec = {
plugin = {
rewrite-body = {
rewrites = [{
regex = "</head>"
replacement = "<script src=\"https://rybbit.viktorbarzin.me/api/script.js\" data-site-id=\"${var.rybbit_site_id}\" defer></script></head>"
}]
monitoring = {
types = ["text/html"]
}
}
}
}
}
}
# Custom CSP headers middleware - created per service when custom_content_security_policy is set
resource "kubernetes_manifest" "custom_csp" {
count = var.custom_content_security_policy != null ? 1 : 0
manifest = {
apiVersion = "traefik.io/v1alpha1"
kind = "Middleware"
metadata = {
name = "custom-csp-${var.name}"
namespace = var.namespace
}
spec = {
headers = {
contentSecurityPolicy = var.custom_content_security_policy
}
}
}
}

View file

@ -0,0 +1,295 @@
# Reverse proxy for things in my infra that are
# outside of K8S but would be nice to use the Nginx-ingress
variable "tls_secret_name" {}
variable "truenas_homepage_token" {}
variable "pfsense_homepage_token" {}
resource "kubernetes_namespace" "reverse-proxy" {
metadata {
name = "reverse-proxy"
}
}
module "tls_secret" {
source = "../../../../modules/kubernetes/setup_tls_secret"
namespace = "reverse-proxy"
tls_secret_name = var.tls_secret_name
depends_on = [kubernetes_namespace.reverse-proxy]
}
# https://pfsense.viktorbarzin.me/
module "pfsense" {
source = "./factory"
name = "pfsense"
external_name = "pfsense.viktorbarzin.lan"
tls_secret_name = var.tls_secret_name
port = 443
backend_protocol = "HTTPS"
extra_annotations = {
"gethomepage.dev/enabled" : "true"
"gethomepage.dev/description" : "Cluster Firewall"
# gethomepage.dev/group: Media
"gethomepage.dev/icon" : "pfsense.png"
"gethomepage.dev/name" : "pFsense"
"gethomepage.dev/widget.type" : "pfsense"
"gethomepage.dev/widget.version" : "2"
"gethomepage.dev/widget.url" : "https://10.0.20.1"
# "gethomepage.dev/widget.token" = var.homepage_token
"gethomepage.dev/widget.username" : "admin"
"gethomepage.dev/widget.password" : var.pfsense_homepage_token
"gethomepage.dev/widget.fields" = "[\"load\", \"memory\", \"wanStatus\", \"disk\"]"
"gethomepage.dev/widget.wan" = "vmx0"
# "gethomepage.dev/pod-selector" : ""
}
depends_on = [kubernetes_namespace.reverse-proxy]
rybbit_site_id = "b029580e5a7c"
}
# https://nas.viktorbarzin.me/
module "nas" {
source = "./factory"
name = "nas"
external_name = "nas.viktorbarzin.lan"
port = 5001
tls_secret_name = var.tls_secret_name
backend_protocol = "HTTPS"
max_body_size = "0m"
depends_on = [kubernetes_namespace.reverse-proxy]
rybbit_site_id = "1e11f8449f7d"
}
# https://files.viktorbarzin.me/
module "nas-files" {
source = "./factory"
name = "files"
external_name = "nas.viktorbarzin.lan"
port = 5001
tls_secret_name = var.tls_secret_name
backend_protocol = "HTTPS"
protected = false # allow anyone to download files
ingress_path = ["/sharing", "/scripts", "/webman", "/wfmlogindialog.js", "/fsdownload"]
max_body_size = "0m"
depends_on = [kubernetes_namespace.reverse-proxy]
}
# https://idrac.viktorbarzin.me/
module "idrac" {
source = "./factory"
name = "idrac"
external_name = "idrac.viktorbarzin.lan"
port = 443
tls_secret_name = var.tls_secret_name
backend_protocol = "HTTPS"
strip_auth_headers = true
extra_annotations = {}
depends_on = [kubernetes_namespace.reverse-proxy]
}
# Can either listen on https or http; can't do both :/
# TODO: Not working yet
module "tp-link-gateway" {
source = "./factory"
name = "gw"
external_name = "gw.viktorbarzin.lan"
port = 443
tls_secret_name = var.tls_secret_name
backend_protocol = "HTTPS"
depends_on = [kubernetes_namespace.reverse-proxy]
protected = true
strip_auth_headers = true
extra_annotations = {}
}
# https://truenas.viktorbarzin.me/
module "truenas" {
source = "./factory"
name = "truenas"
external_name = "truenas.viktorbarzin.lan"
port = 80
tls_secret_name = var.tls_secret_name
max_body_size = "0m"
extra_annotations = {
"gethomepage.dev/enabled" : "true"
"gethomepage.dev/description" : "TrueNAS"
# gethomepage.dev/group: Media
"gethomepage.dev/icon" : "truenas.png"
"gethomepage.dev/name" : "TrueNAS"
"gethomepage.dev/widget.type" : "truenas"
"gethomepage.dev/widget.url" : "https://truenas.viktorbarzin.lan"
"gethomepage.dev/widget.key" : var.truenas_homepage_token
# "gethomepage.dev/widget.enablePools" : "true"
# "gethomepage.dev/pod-selector" : ""
}
depends_on = [kubernetes_namespace.reverse-proxy]
rybbit_site_id = "b66fbd3cb58a"
}
# https://r730.viktorbarzin.me/
module "r730" {
source = "./factory"
name = "r730"
external_name = "r730.viktorbarzin.lan"
port = 443
tls_secret_name = var.tls_secret_name
backend_protocol = "HTTPS"
depends_on = [kubernetes_namespace.reverse-proxy]
}
# https://proxmox.viktorbarzin.me/
module "proxmox" {
source = "./factory"
name = "proxmox"
external_name = "proxmox.viktorbarzin.lan"
port = 8006
tls_secret_name = var.tls_secret_name
backend_protocol = "HTTPS"
max_body_size = "0" # unlimited
depends_on = [kubernetes_namespace.reverse-proxy]
rybbit_site_id = "190a7ad3e1c7"
}
# https://registry.viktorbarzin.me/
module "docker-registry-ui" {
source = "./factory"
name = "registry"
external_name = "docker-registry.viktorbarzin.lan"
port = 8080
tls_secret_name = var.tls_secret_name
depends_on = [kubernetes_namespace.reverse-proxy]
extra_annotations = {
# Override middleware chain to remove rate-limit; the UI fires many API calls to list repos/tags
"traefik.ingress.kubernetes.io/router.middlewares" = "traefik-csp-headers@kubernetescrd,traefik-crowdsec@kubernetescrd,traefik-authentik-forward-auth@kubernetescrd"
}
}
# https://valchedrym.viktorbarzin.me/
module "valchedrym" {
source = "./factory"
name = "valchedrym"
external_name = "valchedrym.viktorbarzin.lan"
tls_secret_name = var.tls_secret_name
port = 80
backend_protocol = "HTTP"
depends_on = [kubernetes_namespace.reverse-proxy]
}
# https://ip150.viktorbarzin.me/
# Server has funky behaviour based on headers; works on some browrsers not others...
# module "valchedrym-ip150" {
# source = "./factory"
# name = "ip150"
# # external_name = "valchedrym.ddns.net"
# external_name = "192.168.0.10"
# port = 80
# backend_protocol = "HTTP"
# use_proxy_protocol = false
# tls_secret_name = var.tls_secret_name
# protected = false
# depends_on = [kubernetes_namespace.reverse-proxy]
# }
# https://mladost3.viktorbarzin.me/
module "mladost3" {
source = "./factory"
name = "mladost3"
external_name = "mladost3.ddns.net"
port = 8080
tls_secret_name = var.tls_secret_name
depends_on = [kubernetes_namespace.reverse-proxy]
}
# # https://server-switch.viktorbarzin.me/
# module "server-switch" {
# source = "./factory"
# name = "server-switch"
# external_name = "server-switch.viktorbarzin.lan"
# port = 80
# tls_secret_name = var.tls_secret_name
# depends_on = [kubernetes_namespace.reverse-proxy]
# }
# https://ha-sofia.viktorbarzin.me/
module "ha-sofia" {
source = "./factory"
name = "ha-sofia"
external_name = "ha-sofia.viktorbarzin.lan"
port = 8123
tls_secret_name = var.tls_secret_name
depends_on = [kubernetes_namespace.reverse-proxy]
protected = false
rybbit_site_id = "590fc392690a"
}
# https://ha-london.viktorbarzin.me/
module "ha-london" {
source = "./factory"
name = "ha-london"
external_name = "ha-london.viktorbarzin.lan"
port = 8123
tls_secret_name = var.tls_secret_name
depends_on = [kubernetes_namespace.reverse-proxy]
protected = false
}
# https://london.viktorbarzin.me/
module "london" {
source = "./factory"
name = "london"
external_name = "openwrt-london.viktorbarzin.lan"
port = 443
tls_secret_name = var.tls_secret_name
backend_protocol = "HTTPS"
protected = true
depends_on = [kubernetes_namespace.reverse-proxy]
extra_annotations = {
"gethomepage.dev/enabled" : "false"
"gethomepage.dev/description" : "OpenWRT London"
# gethomepage.dev/group: Media
"gethomepage.dev/icon" : "openwrt.png"
"gethomepage.dev/name" : "OpenWRT London"
"gethomepage.dev/widget.type" : "openwrt"
"gethomepage.dev/widget.url" : "https://100.64.0.14"
# "gethomepage.dev/widget.token" = var.homepage_token
"gethomepage.dev/widget.username" : "homepage"
"gethomepage.dev/widget.password" : "" # add later as Flint2's openwrt is a little odd
"gethomepage.dev/pod-selector" : ""
}
}
module "pi-lights" {
source = "./factory"
name = "pi"
external_name = "ha-london.viktorbarzin.lan"
port = 5000
tls_secret_name = var.tls_secret_name
protected = true
depends_on = [kubernetes_namespace.reverse-proxy]
}
# module "ups" { # .NET app doesn't work well behind host
# source = "./factory"
# name = "ups"
# external_name = "ups.viktorbarzin.lan"
# backend_protocol = "HTTPS"
# port = 443
# tls_secret_name = var.tls_secret_name
# # protected = true
# protected = false
# depends_on = [kubernetes_namespace.reverse-proxy]
# extra_annotations = {
# "nginx.ingress.kubernetes.io/upstream-vhost" : "",
# # "nginx.ingress.kubernetes.io/proxy-set-header" : "Host: <>",
# }
# }
module "mbp14" {
source = "./factory"
name = "mbp14"
external_name = "mbp14.viktorbarzin.lan"
port = 4020
tls_secret_name = var.tls_secret_name
protected = true
depends_on = [kubernetes_namespace.reverse-proxy]
}

View file

@ -0,0 +1,308 @@
variable "tls_secret_name" {}
variable "tier" { type = string }
variable "homepage_token" {}
variable "technitium_db_password" {}
resource "kubernetes_namespace" "technitium" {
metadata {
name = "technitium"
labels = {
tier = var.tier
}
# stale cache error when trying to resolve
# labels = {
# "istio-injection" : "enabled"
# }
}
}
module "tls_secret" {
source = "../../../../modules/kubernetes/setup_tls_secret"
namespace = kubernetes_namespace.technitium.metadata[0].name
tls_secret_name = var.tls_secret_name
}
# CoreDNS Corefile - manages cluster DNS resolution
# The viktorbarzin.lan block forwards to Technitium via LoadBalancer.
# A template regex in the viktorbarzin.lan block short-circuits junk queries
# caused by ndots:5 search domain expansion (e.g. www.cloudflare.com.viktorbarzin.lan,
# redis.redis.svc.cluster.local.viktorbarzin.lan) by returning NXDOMAIN for any
# query with 2+ labels before .viktorbarzin.lan. Legitimate single-label queries
# (e.g. idrac.viktorbarzin.lan) fall through to Technitium.
resource "kubernetes_config_map" "coredns" {
metadata {
name = "coredns"
namespace = "kube-system"
}
data = {
Corefile = <<-EOF
.:53 {
#log
errors
health {
lameduck 5s
}
ready
kubernetes cluster.local in-addr.arpa ip6.arpa {
pods insecure
fallthrough in-addr.arpa ip6.arpa
ttl 30
}
prometheus :9153
#forward . 1.1.1.1
forward . 10.0.20.1
#forward . /etc/resolv.conf
cache {
success 10000 300 6
denial 10000 300 60
}
loop
reload
loadbalance
}
viktorbarzin.lan:53 {
#log
errors
template ANY ANY viktorbarzin.lan {
match ".*\..*\.viktorbarzin\.lan\.$"
rcode NXDOMAIN
fallthrough
}
forward . 10.0.20.204 # Technitium LoadBalancer
cache {
success 10000 300 6
denial 10000 300 60
}
}
EOF
}
}
resource "kubernetes_deployment" "technitium" {
# resource "kubernetes_daemonset" "technitium" {
metadata {
name = "technitium"
namespace = kubernetes_namespace.technitium.metadata[0].name
labels = {
app = "technitium"
tier = var.tier
}
}
spec {
strategy {
type = "Recreate"
}
# replicas = 1
selector {
match_labels = {
app = "technitium"
}
}
template {
metadata {
annotations = {
"diun.enable" = "false"
# "diun.include_tags" = "^\\d+(?:\\.\\d+)?(?:\\.\\d+)?$"
"diun.include_tags" = "latest"
}
labels = {
app = "technitium"
}
}
spec {
# Prefer nodes running Traefik for network locality
affinity {
pod_affinity {
preferred_during_scheduling_ignored_during_execution {
weight = 100
pod_affinity_term {
label_selector {
match_expressions {
key = "app.kubernetes.io/name"
operator = "In"
values = ["traefik"]
}
}
topology_key = "kubernetes.io/hostname"
}
}
}
}
container {
image = "technitium/dns-server:latest"
name = "technitium"
resources {
# limits = {
# cpu = "1"
# memory = "1Gi"
# }
# requests = {
# cpu = "1"
# memory = "1Gi"
# }
}
port {
container_port = 5380
}
port {
container_port = 53
}
port {
container_port = 80
}
volume_mount {
mount_path = "/etc/dns"
name = "nfs-config"
}
volume_mount {
mount_path = "/etc/tls/"
name = "tls-cert"
}
}
volume {
name = "nfs-config"
nfs {
path = "/mnt/main/technitium"
server = "10.0.10.15"
}
}
volume {
name = "tls-cert"
secret {
secret_name = var.tls_secret_name
}
}
}
}
}
}
resource "kubernetes_service" "technitium-web" {
metadata {
name = "technitium-web"
namespace = kubernetes_namespace.technitium.metadata[0].name
labels = {
"app" = "technitium"
}
# annotations = {
# "metallb.universe.tf/allow-shared-ip" : "shared"
# }
}
spec {
# type = "LoadBalancer"
# external_traffic_policy = "Cluster"
selector = {
app = "technitium"
}
port {
name = "technitium-dns"
port = "5380"
protocol = "TCP"
}
port {
name = "technitium-doh"
port = "80"
protocol = "TCP"
}
}
}
resource "kubernetes_service" "technitium-dns" {
metadata {
name = "technitium-dns"
namespace = kubernetes_namespace.technitium.metadata[0].name
labels = {
"app" = "technitium"
}
}
spec {
type = "LoadBalancer"
port {
name = "technitium-dns"
port = 53
protocol = "UDP"
}
external_traffic_policy = "Local"
selector = {
app = "technitium"
}
}
}
module "ingress" {
source = "../../../../modules/kubernetes/ingress_factory"
namespace = kubernetes_namespace.technitium.metadata[0].name
name = "technitium"
tls_secret_name = var.tls_secret_name
port = 5380
service_name = "technitium-web"
extra_annotations = {
"gethomepage.dev/enabled" = "true"
"gethomepage.dev/description" = "Internal DNS Server and Recursive Resolver"
# gethomepage.dev/group: Media
"gethomepage.dev/icon" : "technitium.png"
"gethomepage.dev/name" = "Technitium"
"gethomepage.dev/widget.type" = "technitium"
"gethomepage.dev/widget.url" = "http://technitium-web.technitium.svc.cluster.local:5380"
"gethomepage.dev/widget.key" = var.homepage_token
"gethomepage.dev/widget.range" = "LastWeek"
"gethomepage.dev/widget.fields" = "[\"totalQueries\", \"totalCached\", \"totalBlocked\", \"totalRecursive\"]"
"gethomepage.dev/pod-selector" = ""
}
}
module "ingress-doh" {
source = "../../../../modules/kubernetes/ingress_factory"
namespace = kubernetes_namespace.technitium.metadata[0].name
name = "technitium-doh"
tls_secret_name = var.tls_secret_name
host = "dns"
service_name = "technitium-web"
}
# Grafana datasource for Technitium DNS query logs in MySQL
resource "kubernetes_config_map" "grafana_technitium_datasource" {
metadata {
name = "grafana-technitium-datasource"
namespace = "monitoring"
labels = {
grafana_datasource = "1"
}
}
data = {
"technitium-datasource.yaml" = yamlencode({
apiVersion = 1
datasources = [{
name = "Technitium MySQL"
type = "mysql"
access = "proxy"
url = "mysql.dbaas.svc.cluster.local:3306"
database = "technitium"
user = "technitium"
uid = "technitium-mysql"
secureJsonData = {
password = var.technitium_db_password
}
}]
})
}
}
# Grafana dashboard for Technitium DNS query logs
resource "kubernetes_config_map" "grafana_technitium_dashboard" {
metadata {
name = "grafana-technitium-dashboard"
namespace = "monitoring"
labels = {
grafana_dashboard = "1"
}
}
data = {
"technitium-dns.json" = file("${path.module}/../monitoring/dashboards/technitium-dns.json")
}
}

View file

@ -0,0 +1,244 @@
variable "tier" { type = string }
variable "crowdsec_api_key" { type = string }
variable "tls_secret_name" {}
resource "kubernetes_namespace" "traefik" {
metadata {
name = "traefik"
labels = {
"app.kubernetes.io/name" = "traefik"
"app.kubernetes.io/instance" = "traefik"
tier = var.tier
}
}
}
resource "helm_release" "traefik" {
namespace = kubernetes_namespace.traefik.metadata[0].name
create_namespace = false
name = "traefik"
repository = "https://traefik.github.io/charts"
chart = "traefik"
atomic = true
timeout = 600
values = [yamlencode({
deployment = {
replicas = 3
podAnnotations = {
"diun.enable" = "true"
"diun.include_tags" = "^v\\d+(?:\\.\\d+)?(?:\\.\\d+)?.*$"
}
initContainers = [{
name = "download-plugins"
image = "alpine:3"
command = ["sh", "-c", join("", [
"set -e; ",
"STORAGE=/plugins-storage; ",
"mkdir -p \"$STORAGE/archives/github.com/maxlerebourg/crowdsec-bouncer-traefik-plugin\"; ",
"mkdir -p \"$STORAGE/archives/github.com/packruler/rewrite-body\"; ",
"wget -q -T 30 -O \"$STORAGE/archives/github.com/maxlerebourg/crowdsec-bouncer-traefik-plugin/v1.4.2.zip\" ",
"\"https://github.com/maxlerebourg/crowdsec-bouncer-traefik-plugin/archive/refs/tags/v1.4.2.zip\"; ",
"wget -q -T 30 -O \"$STORAGE/archives/github.com/packruler/rewrite-body/v1.2.0.zip\" ",
"\"https://github.com/packruler/rewrite-body/archive/refs/tags/v1.2.0.zip\"; ",
"printf '{\"github.com/maxlerebourg/crowdsec-bouncer-traefik-plugin\":\"v1.4.2\",\"github.com/packruler/rewrite-body\":\"v1.2.0\"}' ",
"> \"$STORAGE/archives/state.json\"; ",
"echo \"Plugins pre-downloaded successfully\"",
])]
volumeMounts = [{
name = "plugins"
mountPath = "/plugins-storage"
}]
}]
}
updateStrategy = {
type = "RollingUpdate"
rollingUpdate = {
maxUnavailable = 1
maxSurge = 2
}
}
ingressClass = {
enabled = true
isDefaultClass = true
}
providers = {
kubernetesIngress = {
enabled = true
allowExternalNameServices = true
publishedService = { enabled = true }
}
kubernetesCRD = {
enabled = true
allowExternalNameServices = true
allowCrossNamespace = true
}
}
# Enable dashboard API (accessible on port 8080 internally)
api = {
insecure = true
}
# Entrypoints
ports = {
web = {
port = 8000
exposedPort = 80
protocol = "TCP"
http = {
redirections = {
entryPoint = {
to = "websecure"
scheme = "https"
}
}
}
}
websecure = {
port = 8443
exposedPort = 443
protocol = "TCP"
http = {
tls = {
enabled = true
}
}
http3 = {
enabled = true
advertisedPort = 443
}
}
whisper-tcp = {
port = 10300
exposedPort = 10300
protocol = "TCP"
expose = { default = true }
}
piper-tcp = {
port = 10200
exposedPort = 10200
protocol = "TCP"
expose = { default = true }
}
ollama-tcp = {
port = 11434
exposedPort = 11434
protocol = "TCP"
expose = { default = true }
}
}
service = {
type = "LoadBalancer"
annotations = {
"metallb.universe.tf/loadBalancerIPs" = "10.0.20.202"
}
spec = {
externalTrafficPolicy = "Local"
}
}
# Plugins
experimental = {
plugins = {
crowdsec-bouncer = {
moduleName = "github.com/maxlerebourg/crowdsec-bouncer-traefik-plugin"
version = "v1.4.2"
}
rewrite-body = {
moduleName = "github.com/packruler/rewrite-body"
version = "v1.2.0"
}
}
}
# Prometheus metrics
metrics = {
prometheus = {
entryPoint = "metrics"
addEntryPointsLabels = true
addServicesLabels = true
addRoutersLabels = true
}
}
# Access logs
logs = {
access = {
enabled = true
}
}
additionalArguments = [
"--api.insecure=true",
"--global.checknewversion=false",
"--global.sendanonymoususage=false",
# Skip TLS verification for self-signed backend certs (proxmox, idrac, etc.)
"--serversTransport.insecureSkipVerify=true",
# Increase timeouts for services like Immich
"--serversTransport.forwardingTimeouts.dialTimeout=60s",
"--serversTransport.forwardingTimeouts.responseHeaderTimeout=0s",
"--serversTransport.forwardingTimeouts.idleConnTimeout=90s",
# Use forwarded headers from trusted proxies
"--entryPoints.websecure.forwardedHeaders.insecure=true",
"--entryPoints.web.forwardedHeaders.insecure=true",
]
resources = {
requests = {
cpu = "100m"
memory = "128Mi"
}
}
nodeSelector = {
"kubernetes.io/os" = "linux"
}
tolerations = []
})]
}
# Dashboard resources
module "tls_secret" {
source = "../../../../modules/kubernetes/setup_tls_secret"
namespace = kubernetes_namespace.traefik.metadata[0].name
tls_secret_name = var.tls_secret_name
}
resource "kubernetes_service" "traefik_dashboard" {
metadata {
name = "traefik-dashboard"
namespace = kubernetes_namespace.traefik.metadata[0].name
labels = {
"app" = "traefik-dashboard"
}
}
spec {
selector = {
"app.kubernetes.io/name" = "traefik"
}
port {
name = "http"
port = 8080
target_port = 8080
protocol = "TCP"
}
}
}
module "ingress" {
source = "../../../../modules/kubernetes/ingress_factory"
namespace = kubernetes_namespace.traefik.metadata[0].name
name = "traefik"
service_name = "traefik-dashboard"
host = "traefik"
port = 8080
tls_secret_name = var.tls_secret_name
protected = true
}

View file

@ -0,0 +1,243 @@
# Shared Traefik Middleware CRDs
# These are referenced by ingress resources via annotations like:
# "traefik.ingress.kubernetes.io/router.middlewares" = "traefik-rate-limit@kubernetescrd"
# Rate limiting middleware
resource "kubernetes_manifest" "middleware_rate_limit" {
manifest = {
apiVersion = "traefik.io/v1alpha1"
kind = "Middleware"
metadata = {
name = "rate-limit"
namespace = kubernetes_namespace.traefik.metadata[0].name
}
spec = {
rateLimit = {
average = 5
burst = 250
}
}
}
depends_on = [helm_release.traefik]
}
# Authentik forward auth middleware
resource "kubernetes_manifest" "middleware_authentik_forward_auth" {
manifest = {
apiVersion = "traefik.io/v1alpha1"
kind = "Middleware"
metadata = {
name = "authentik-forward-auth"
namespace = kubernetes_namespace.traefik.metadata[0].name
}
spec = {
forwardAuth = {
address = "http://ak-outpost-authentik-embedded-outpost.authentik.svc.cluster.local:9000/outpost.goauthentik.io/auth/traefik"
trustForwardHeader = true
authResponseHeaders = [
"X-authentik-username",
"X-authentik-uid",
"X-authentik-email",
"X-authentik-name",
"X-authentik-groups",
"Set-Cookie",
]
}
}
}
depends_on = [helm_release.traefik]
}
# IP allowlist for local-only access
resource "kubernetes_manifest" "middleware_local_only" {
manifest = {
apiVersion = "traefik.io/v1alpha1"
kind = "Middleware"
metadata = {
name = "local-only"
namespace = kubernetes_namespace.traefik.metadata[0].name
}
spec = {
ipAllowList = {
sourceRange = [
"192.168.1.0/24",
"10.0.0.0/8",
"fc00::/7",
"fe80::/10",
]
}
}
}
depends_on = [helm_release.traefik]
}
# HTTPS redirect middleware
resource "kubernetes_manifest" "middleware_redirect_https" {
manifest = {
apiVersion = "traefik.io/v1alpha1"
kind = "Middleware"
metadata = {
name = "redirect-https"
namespace = kubernetes_namespace.traefik.metadata[0].name
}
spec = {
redirectScheme = {
scheme = "https"
permanent = true
}
}
}
depends_on = [helm_release.traefik]
}
# CSP headers middleware (default)
resource "kubernetes_manifest" "middleware_csp_headers" {
manifest = {
apiVersion = "traefik.io/v1alpha1"
kind = "Middleware"
metadata = {
name = "csp-headers"
namespace = kubernetes_namespace.traefik.metadata[0].name
}
spec = {
headers = {
contentSecurityPolicy = "frame-ancestors 'self' *.viktorbarzin.me viktorbarzin.me"
}
}
}
depends_on = [helm_release.traefik]
}
# CrowdSec bouncer plugin middleware
resource "kubernetes_manifest" "middleware_crowdsec" {
manifest = {
apiVersion = "traefik.io/v1alpha1"
kind = "Middleware"
metadata = {
name = "crowdsec"
namespace = kubernetes_namespace.traefik.metadata[0].name
}
spec = {
plugin = {
crowdsec-bouncer = {
crowdsecLapiKey = var.crowdsec_api_key
crowdsecLapiHost = "crowdsec-service.crowdsec.svc.cluster.local:8080"
crowdsecMode = "stream"
}
}
}
}
depends_on = [helm_release.traefik]
}
# TLS option for mTLS (client certificate auth)
resource "kubernetes_manifest" "tls_option_mtls" {
manifest = {
apiVersion = "traefik.io/v1alpha1"
kind = "TLSOption"
metadata = {
name = "mtls"
namespace = kubernetes_namespace.traefik.metadata[0].name
}
spec = {
clientAuth = {
secretNames = ["ca-secret"]
clientAuthType = "RequireAndVerifyClientCert"
}
}
}
depends_on = [helm_release.traefik]
}
# ServersTransport for backends with self-signed certificates
resource "kubernetes_manifest" "servers_transport_insecure" {
manifest = {
apiVersion = "traefik.io/v1alpha1"
kind = "ServersTransport"
metadata = {
name = "insecure-skip-verify"
namespace = kubernetes_namespace.traefik.metadata[0].name
}
spec = {
insecureSkipVerify = true
}
}
depends_on = [helm_release.traefik]
}
# Strip Authentik auth headers/cookies before forwarding to backend
# Useful for backends (iDRAC, TP-Link) that break when receiving extra headers
resource "kubernetes_manifest" "middleware_strip_auth_headers" {
manifest = {
apiVersion = "traefik.io/v1alpha1"
kind = "Middleware"
metadata = {
name = "strip-auth-headers"
namespace = kubernetes_namespace.traefik.metadata[0].name
}
spec = {
headers = {
customRequestHeaders = {
"X-authentik-username" = ""
"X-authentik-uid" = ""
"X-authentik-email" = ""
"X-authentik-name" = ""
"X-authentik-groups" = ""
}
}
}
}
depends_on = [helm_release.traefik]
}
# Immich-specific rate limit (higher limits for photo uploads)
resource "kubernetes_manifest" "middleware_immich_rate_limit" {
manifest = {
apiVersion = "traefik.io/v1alpha1"
kind = "Middleware"
metadata = {
name = "immich-rate-limit"
namespace = kubernetes_namespace.traefik.metadata[0].name
}
spec = {
rateLimit = {
average = 100
burst = 1000
}
}
}
depends_on = [helm_release.traefik]
}
# Strip Accept-Encoding header so backends send uncompressed responses.
# Used alongside rewrite-body plugin (rybbit analytics) which fails to
# decompress certain gzip responses (flate: corrupt input before offset 5).
resource "kubernetes_manifest" "middleware_strip_accept_encoding" {
manifest = {
apiVersion = "traefik.io/v1alpha1"
kind = "Middleware"
metadata = {
name = "strip-accept-encoding"
namespace = kubernetes_namespace.traefik.metadata[0].name
}
spec = {
headers = {
customRequestHeaders = {
"Accept-Encoding" = ""
}
}
}
}
depends_on = [helm_release.traefik]
}

View file

@ -0,0 +1,172 @@
variable "tls_secret_name" {}
variable "tier" { type = string }
resource "kubernetes_namespace" "uptime-kuma" {
metadata {
name = "uptime-kuma"
labels = {
tier = var.tier
}
# labels = {
# "istio-injection" : "enabled"
# }
}
}
module "tls_secret" {
source = "../../../../modules/kubernetes/setup_tls_secret"
namespace = kubernetes_namespace.uptime-kuma.metadata[0].name
tls_secret_name = var.tls_secret_name
}
resource "kubernetes_deployment" "uptime-kuma" {
metadata {
name = "uptime-kuma"
namespace = kubernetes_namespace.uptime-kuma.metadata[0].name
labels = {
app = "uptime-kuma"
tier = var.tier
}
annotations = {
"reloader.stakater.com/search" = "true"
}
}
spec {
replicas = 1
strategy {
type = "Recreate"
}
selector {
match_labels = {
app = "uptime-kuma"
}
}
template {
metadata {
annotations = {
"diun.enable" = "true"
"diun.include_tags" = "latest"
}
labels = {
app = "uptime-kuma"
}
}
spec {
container {
image = "louislam/uptime-kuma:2"
name = "uptime-kuma"
port {
container_port = 3001
}
volume_mount {
name = "data"
mount_path = "/app/data"
}
}
volume {
name = "data"
nfs {
server = "10.0.10.15"
path = "/mnt/main/uptime-kuma"
}
}
}
}
}
}
resource "kubernetes_service" "uptime-kuma" {
metadata {
name = "uptime-kuma"
namespace = kubernetes_namespace.uptime-kuma.metadata[0].name
labels = {
"app" = "uptime-kuma"
}
}
spec {
selector = {
app = "uptime-kuma"
}
port {
port = "80"
target_port = "3001"
}
}
}
module "ingress" {
source = "../../../../modules/kubernetes/ingress_factory"
namespace = kubernetes_namespace.uptime-kuma.metadata[0].name
name = "uptime"
tls_secret_name = var.tls_secret_name
service_name = "uptime-kuma"
extra_annotations = {
"gethomepage.dev/enabled" = "true"
"gethomepage.dev/description" = "Uptime monitor"
# gethomepage.dev/group: Media
"gethomepage.dev/icon" : "uptime-kuma.png"
"gethomepage.dev/name" = "Uptime Kuma"
"gethomepage.dev/widget.type" = "uptimekuma"
"gethomepage.dev/widget.url" = "https://uptime.viktorbarzin.me"
"gethomepage.dev/widget.slug" = "cluster-internal"
"gethomepage.dev/pod-selector" = ""
}
rybbit_site_id = "8fef77b1f7fe"
}
# CronJob for daily SQLite backups # no longer needed as we're using the mysql
# resource "kubernetes_cron_job_v1" "sqlite-backup" {
# metadata {
# name = "backup"
# namespace = kubernetes_namespace.uptime-kuma.metadata[0].name
# }
# spec {
# concurrency_policy = "Replace"
# failed_jobs_history_limit = 5
# schedule = "0 0 * * *"
# # schedule = "* * * * *"
# starting_deadline_seconds = 10
# successful_jobs_history_limit = 3
# job_template {
# metadata {}
# spec {
# active_deadline_seconds = 600 # should finish in 10 minutes
# backoff_limit = 3
# ttl_seconds_after_finished = 10
# template {
# metadata {}
# spec {
# container {
# name = "backup"
# image = "alpine/sqlite:latest"
# command = ["/bin/sh", "-c", <<-EOT
# set -e
# export now=$(date +"%Y_%m_%d_%H_%M")
# echo "Backing up SQLite database to /app/data/backup/backup_$now.sqlite"
# sqlite3 /app/data/kuma.db ".backup /app/data/backup/backup_$now.sqlite"
# echo "Backup completed. Deleting old backups..."
# # Rotate - delete last log file
# cd /app/data/backup
# find . -name "*.sqlite" -type f -mtime +7 -delete # 7 day retention of backups
# echo "Old backups deleted."
# EOT
# ]
# volume_mount {
# name = "data"
# mount_path = "/app/data"
# }
# }
# volume {
# name = "data"
# nfs {
# server = "10.0.10.15"
# path = "/mnt/main/uptime-kuma"
# }
# }
# }
# }
# }
# }
# }
# }

View file

@ -0,0 +1,134 @@
variable "tls_secret_name" {}
variable "tier" { type = string }
variable "smtp_password" {}
resource "kubernetes_namespace" "vaultwarden" {
metadata {
name = "vaultwarden"
labels = {
"istio-injection" : "disabled"
tier = var.tier
}
}
}
module "tls_secret" {
source = "../../../../modules/kubernetes/setup_tls_secret"
namespace = kubernetes_namespace.vaultwarden.metadata[0].name
tls_secret_name = var.tls_secret_name
}
resource "kubernetes_deployment" "vaultwarden" {
metadata {
name = "vaultwarden"
namespace = kubernetes_namespace.vaultwarden.metadata[0].name
labels = {
app = "vaultwarden"
tier = var.tier
}
annotations = {
"reloader.stakater.com/search" = "true"
}
}
spec {
replicas = 1
selector {
match_labels = {
app = "vaultwarden"
}
}
template {
metadata {
annotations = {
"diun.enable" = "true"
"diun.include_tags" = "^\\d+(?:\\.\\d+)?(?:\\.\\d+)?$"
}
labels = {
"app" = "vaultwarden"
}
}
spec {
container {
image = "vaultwarden/server:1.35.2"
name = "vaultwarden"
env {
name = "DOMAIN"
value = "https://vaultwarden.viktorbarzin.me"
}
# env {
# name = "ADMIN_TOKEN"
# value = ""
# }
env {
name = "SMTP_HOST"
value = "mail.viktorbarzin.me"
}
env {
name = "SMTP_FROM"
value = "vaultwarden@viktorbarzin.me"
}
env {
name = "SMTP_PORT"
value = "587"
}
env {
name = "SMTP_SECURITY"
value = "starttls"
}
env {
name = "SMTP_USERNAME"
value = "vaultwarden@viktorbarzin.me"
}
env {
name = "SMTP_PASSWORD"
value = var.smtp_password
}
port {
container_port = 80
}
volume_mount {
name = "data"
mount_path = "/data"
}
}
volume {
name = "data"
nfs {
path = "/mnt/main/vaultwarden"
server = "10.0.10.15"
}
}
}
}
}
}
resource "kubernetes_service" "vaultwarden" {
metadata {
name = "vaultwarden"
namespace = kubernetes_namespace.vaultwarden.metadata[0].name
labels = {
"app" = "vaultwarden"
}
}
spec {
selector = {
app = "vaultwarden"
}
port {
name = "http"
port = "80"
protocol = "TCP"
}
}
}
module "ingress" {
source = "../../../../modules/kubernetes/ingress_factory"
namespace = kubernetes_namespace.vaultwarden.metadata[0].name
name = "vaultwarden"
tls_secret_name = var.tls_secret_name
rybbit_site_id = "b8fc85e18683"
}

View file

@ -0,0 +1,4 @@
[Peer]
# friendly_name = anca
PublicKey = fr4DB6FHhxYyzrtnoNbhdT8Fqwvsz7QkhTnZpSQmBCY=
AllowedIPs = 10.3.3.13/32

View file

@ -0,0 +1 @@
# DO NOT MANUALLY EDIT THIS LINE. Last IP: 10.3.3.15/24

View file

@ -0,0 +1,227 @@
variable "tls_secret_name" {}
variable "tier" { type = string }
variable "wg_0_conf" {}
variable "firewall_sh" {}
variable "wg_0_key" {}
module "tls_secret" {
source = "../../../../modules/kubernetes/setup_tls_secret"
namespace = kubernetes_namespace.wireguard.metadata[0].name
tls_secret_name = var.tls_secret_name
}
resource "kubernetes_namespace" "wireguard" {
metadata {
name = "wireguard"
labels = {
tier = var.tier
}
}
}
resource "kubernetes_config_map" "wg_0_conf" {
metadata {
name = "wg0-conf"
namespace = kubernetes_namespace.wireguard.metadata[0].name
labels = {
app = "wireguard"
}
annotations = {
"reloader.stakater.com/match" = "true"
}
}
data = {
"setup-firewall.sh" = var.firewall_sh
"wg0.conf" = format("%s%s", var.wg_0_conf, file("${path.module}/extra/clients.conf"))
}
}
resource "kubernetes_secret" "wg_0_key" {
metadata {
name = "wg0-key"
namespace = kubernetes_namespace.wireguard.metadata[0].name
annotations = {
"reloader.stakater.com/match" = "true"
}
}
data = {
"wg0.key" = var.wg_0_key
# If thep rivate key changes the pub key must be updated manually
"wg-ui-config" = format("{\"PrivateKey\": \"%s\",\"PublicKey\": \"%s\",\"Users\": {}}", var.wg_0_key, "3OeDa6Z3Z6vPVxn/WKJujYL7DoDYPPpI5W+2glUYLHU=")
}
type = "generic"
}
resource "kubernetes_deployment" "wireguard" {
metadata {
name = "wireguard"
namespace = kubernetes_namespace.wireguard.metadata[0].name
labels = {
app = "wireguard"
tier = var.tier
}
annotations = {
"reloader.stakater.com/search" = "true"
}
}
spec {
replicas = 1
strategy {
rolling_update {
max_surge = "2"
max_unavailable = "0"
}
}
selector {
match_labels = {
app = "wireguard"
}
}
template {
metadata {
labels = {
app = "wireguard"
}
annotations = {
"prometheus.io/scrape" = "true"
"prometheus.io/port" = "9586"
}
}
spec {
init_container {
name = "sysctl-setup"
image = "busybox"
command = ["/bin/sh", "-c", "echo 1 > /proc/sys/net/ipv4/ip_forward"]
security_context {
privileged = true
}
}
container {
image = "sclevine/wg:latest"
name = "wireguard"
image_pull_policy = "IfNotPresent"
lifecycle {
post_start {
exec {
command = ["wg-quick", "up", "wg0"]
}
}
pre_stop {
exec {
command = ["wg-quick", "down", "wg0"]
}
}
}
command = ["tail", "-f", "/dev/null"]
port {
container_port = 51820
protocol = "UDP"
}
volume_mount {
name = "wg0-key"
mount_path = "/etc/wireguard/wg0.key"
sub_path = "wg0.key"
}
volume_mount {
name = "wg0-conf"
mount_path = "/etc/wireguard/wg0.conf"
sub_path = "wg0.conf"
}
volume_mount {
name = "wg0-conf"
mount_path = "/etc/wireguard/setup-firewall.sh"
sub_path = "setup-firewall.sh"
}
security_context {
capabilities {
add = ["NET_ADMIN", "SYS_MODULE"]
}
}
}
container {
name = "prometheus-exporter"
image = "mindflavor/prometheus-wireguard-exporter"
image_pull_policy = "IfNotPresent"
command = ["prometheus_wireguard_exporter", "-a", "true", "-v", "true", "-n", "/etc/wireguard/wg0.conf"]
volume_mount {
name = "wg0-conf"
mount_path = "/etc/wireguard/wg0.conf"
sub_path = "wg0.conf"
}
security_context {
capabilities {
add = ["NET_ADMIN"]
}
}
port {
container_port = 9586
protocol = "TCP"
}
}
volume {
name = "wg0-key"
secret {
secret_name = "wg0-key"
}
}
volume {
name = "wg0-conf"
config_map {
name = "wg0-conf"
}
}
}
}
}
}
resource "kubernetes_service" "wireguard" {
metadata {
name = "wireguard"
namespace = kubernetes_namespace.wireguard.metadata[0].name
annotations = {
"metallb.universe.tf/allow-shared-ip" = "shared"
}
labels = {
"app" = "wireguard"
}
}
spec {
type = "LoadBalancer"
external_traffic_policy = "Cluster"
selector = {
app = "wireguard"
}
port {
port = "51820"
protocol = "UDP"
}
}
}
resource "kubernetes_service" "wireguard_exporter" {
metadata {
name = "wireguard-exporter"
namespace = kubernetes_namespace.wireguard.metadata[0].name
labels = {
"app" = "wireguard-exporter"
}
}
spec {
selector = {
app = "wireguard"
}
port {
port = "9102"
target_port = "9586"
}
}
}

Some files were not shown because too many files have changed in this diff Show more