[ci skip] Move Terraform modules into stack directories
Move all 88 service modules (66 individual + 22 platform) from modules/kubernetes/<service>/ into their corresponding stack directories: - Service stacks: stacks/<service>/module/ - Platform stack: stacks/platform/modules/<service>/ This collocates module source code with its Terragrunt definition. Only shared utility modules remain in modules/kubernetes/: ingress_factory, setup_tls_secret, dockerhub_secret, oauth-proxy. All cross-references to shared modules updated to use correct relative paths. Verified with terragrunt run --all -- plan: 0 adds, 0 destroys across all 68 stacks.
This commit is contained in:
parent
73cb696f12
commit
e225e81ebf
614 changed files with 12075 additions and 352 deletions
72
stacks/platform/modules/authentik/main.tf
Normal file
72
stacks/platform/modules/authentik/main.tf
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
variable "tls_secret_name" {}
|
||||
variable "secret_key" {}
|
||||
variable "postgres_password" {}
|
||||
variable "tier" { type = string }
|
||||
|
||||
|
||||
module "tls_secret" {
|
||||
source = "../../../../modules/kubernetes/setup_tls_secret"
|
||||
namespace = kubernetes_namespace.authentik.metadata[0].name
|
||||
tls_secret_name = var.tls_secret_name
|
||||
}
|
||||
|
||||
resource "kubernetes_namespace" "authentik" {
|
||||
metadata {
|
||||
name = "authentik"
|
||||
labels = {
|
||||
tier = var.tier
|
||||
"resource-governance/custom-quota" = "true"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_resource_quota" "authentik" {
|
||||
metadata {
|
||||
name = "authentik-quota"
|
||||
namespace = kubernetes_namespace.authentik.metadata[0].name
|
||||
}
|
||||
spec {
|
||||
hard = {
|
||||
"requests.cpu" = "8"
|
||||
"requests.memory" = "8Gi"
|
||||
"limits.cpu" = "24"
|
||||
"limits.memory" = "48Gi"
|
||||
pods = "30"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "helm_release" "authentik" {
|
||||
namespace = kubernetes_namespace.authentik.metadata[0].name
|
||||
create_namespace = true
|
||||
name = "goauthentik"
|
||||
|
||||
repository = "https://charts.goauthentik.io/"
|
||||
chart = "authentik"
|
||||
# version = "2025.8.1"
|
||||
version = "2025.10.3"
|
||||
atomic = true
|
||||
timeout = 6000
|
||||
|
||||
values = [templatefile("${path.module}/values.yaml", { postgres_password = var.postgres_password, secret_key = var.secret_key })]
|
||||
}
|
||||
|
||||
|
||||
module "ingress" {
|
||||
source = "../../../../modules/kubernetes/ingress_factory"
|
||||
namespace = kubernetes_namespace.authentik.metadata[0].name
|
||||
name = "authentik"
|
||||
service_name = "goauthentik-server"
|
||||
tls_secret_name = var.tls_secret_name
|
||||
}
|
||||
|
||||
module "ingress-outpost" {
|
||||
source = "../../../../modules/kubernetes/ingress_factory"
|
||||
namespace = kubernetes_namespace.authentik.metadata[0].name
|
||||
name = "authentik-outpost"
|
||||
host = "authentik"
|
||||
service_name = "ak-outpost-authentik-embedded-outpost"
|
||||
port = 9000
|
||||
ingress_path = ["/outpost.goauthentik.io"]
|
||||
tls_secret_name = var.tls_secret_name
|
||||
}
|
||||
14
stacks/platform/modules/authentik/pgbouncer.ini
Normal file
14
stacks/platform/modules/authentik/pgbouncer.ini
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
[databases]
|
||||
authentik = host=postgresql.dbaas port=5432 dbname=authentik user=authentik password=${password}
|
||||
|
||||
[pgbouncer]
|
||||
listen_addr = 0.0.0.0
|
||||
listen_port = 6432
|
||||
auth_type = md5
|
||||
auth_file = /etc/pgbouncer/userlist.txt
|
||||
pool_mode = transaction
|
||||
max_client_conn = 200
|
||||
default_pool_size = 20
|
||||
reserve_pool_size = 5
|
||||
reserve_pool_timeout = 5
|
||||
ignore_startup_parameters = extra_float_digits
|
||||
134
stacks/platform/modules/authentik/pgbouncer.tf
Normal file
134
stacks/platform/modules/authentik/pgbouncer.tf
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
resource "kubernetes_config_map" "pgbouncer_config" {
|
||||
metadata {
|
||||
name = "pgbouncer-config"
|
||||
namespace = "authentik"
|
||||
}
|
||||
|
||||
data = {
|
||||
"pgbouncer.ini" = templatefile("${path.module}/pgbouncer.ini", { password = var.postgres_password })
|
||||
}
|
||||
}
|
||||
|
||||
# --- 2️⃣ Secret for user credentials ---
|
||||
resource "kubernetes_secret" "pgbouncer_auth" {
|
||||
metadata {
|
||||
name = "pgbouncer-auth"
|
||||
namespace = "authentik"
|
||||
}
|
||||
|
||||
data = {
|
||||
"userlist.txt" = templatefile("${path.module}/userlist.txt", { password = var.postgres_password })
|
||||
}
|
||||
|
||||
type = "Opaque"
|
||||
}
|
||||
|
||||
# --- 3️⃣ Deployment ---
|
||||
resource "kubernetes_deployment" "pgbouncer" {
|
||||
metadata {
|
||||
name = "pgbouncer"
|
||||
namespace = "authentik"
|
||||
labels = {
|
||||
app = "pgbouncer"
|
||||
tier = var.tier
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
replicas = 3
|
||||
|
||||
selector {
|
||||
match_labels = {
|
||||
app = "pgbouncer"
|
||||
}
|
||||
}
|
||||
|
||||
template {
|
||||
metadata {
|
||||
labels = {
|
||||
app = "pgbouncer"
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
affinity {
|
||||
pod_anti_affinity {
|
||||
required_during_scheduling_ignored_during_execution {
|
||||
label_selector {
|
||||
match_expressions {
|
||||
key = "component"
|
||||
operator = "In"
|
||||
values = ["server"]
|
||||
}
|
||||
}
|
||||
topology_key = "kubernetes.io/hostname"
|
||||
}
|
||||
}
|
||||
}
|
||||
container {
|
||||
name = "pgbouncer"
|
||||
image = "edoburu/pgbouncer:latest"
|
||||
image_pull_policy = "IfNotPresent"
|
||||
|
||||
port {
|
||||
container_port = 6432
|
||||
}
|
||||
|
||||
volume_mount {
|
||||
name = "config"
|
||||
mount_path = "/etc/pgbouncer/pgbouncer.ini"
|
||||
sub_path = "pgbouncer.ini"
|
||||
}
|
||||
|
||||
volume_mount {
|
||||
name = "auth"
|
||||
mount_path = "/etc/pgbouncer/userlist.txt"
|
||||
sub_path = "userlist.txt"
|
||||
}
|
||||
|
||||
env {
|
||||
name = "DATABASES_AUTHENTIK"
|
||||
value = "host=postgres port=5432 dbname=authentik user=authentik password=${var.postgres_password}"
|
||||
}
|
||||
}
|
||||
|
||||
volume {
|
||||
name = "config"
|
||||
config_map {
|
||||
name = kubernetes_config_map.pgbouncer_config.metadata[0].name
|
||||
}
|
||||
}
|
||||
|
||||
volume {
|
||||
name = "auth"
|
||||
secret {
|
||||
secret_name = kubernetes_secret.pgbouncer_auth.metadata[0].name
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
depends_on = [kubernetes_secret.pgbouncer_auth]
|
||||
}
|
||||
|
||||
# --- 4️⃣ Service ---
|
||||
resource "kubernetes_service" "pgbouncer" {
|
||||
metadata {
|
||||
name = "pgbouncer"
|
||||
namespace = "authentik"
|
||||
}
|
||||
|
||||
spec {
|
||||
selector = {
|
||||
app = "pgbouncer"
|
||||
}
|
||||
|
||||
port {
|
||||
port = 6432
|
||||
target_port = 6432
|
||||
protocol = "TCP"
|
||||
}
|
||||
|
||||
type = "ClusterIP"
|
||||
}
|
||||
}
|
||||
1
stacks/platform/modules/authentik/userlist.txt
Normal file
1
stacks/platform/modules/authentik/userlist.txt
Normal file
|
|
@ -0,0 +1 @@
|
|||
"authentik" "${password}"
|
||||
31
stacks/platform/modules/authentik/values.yaml
Normal file
31
stacks/platform/modules/authentik/values.yaml
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
authentik:
|
||||
log_level: warning
|
||||
# log_level: trace
|
||||
secret_key: "${secret_key}"
|
||||
# This sends anonymous usage-data, stack traces on errors and
|
||||
# performance data to authentik.error-reporting.a7k.io, and is fully opt-in
|
||||
error_reporting:
|
||||
enabled: true
|
||||
postgresql:
|
||||
# host: postgresql.dbaas
|
||||
host: pgbouncer.authentik
|
||||
port: 6432
|
||||
user: authentik
|
||||
password: ${postgres_password}
|
||||
redis:
|
||||
host: redis.redis
|
||||
|
||||
server:
|
||||
replicas: 3
|
||||
ingress:
|
||||
enabled: false
|
||||
# hosts:
|
||||
# - authentik.viktorbarzin.me
|
||||
podAnnotations:
|
||||
diun.enable: true
|
||||
diun.include_tags: "^202[0-9].[0-9]+.*$" # no need to annotate the worker as it uses the same image
|
||||
global:
|
||||
addPrometheusAnnotations: true
|
||||
|
||||
worker:
|
||||
replicas: 3
|
||||
159
stacks/platform/modules/cloudflared/cloudflare.tf
Normal file
159
stacks/platform/modules/cloudflared/cloudflare.tf
Normal file
|
|
@ -0,0 +1,159 @@
|
|||
# Contents for cloudflare account
|
||||
variable "cloudflare_api_key" {}
|
||||
variable "cloudflare_email" {}
|
||||
variable "cloudflare_proxied_names" { type = list(string) }
|
||||
variable "cloudflare_non_proxied_names" { type = list(string) }
|
||||
variable "cloudflare_zone_id" {
|
||||
description = "Zone ID for your domain"
|
||||
type = string
|
||||
}
|
||||
variable "cloudflare_account_id" {
|
||||
type = string
|
||||
sensitive = true
|
||||
}
|
||||
variable "cloudflare_tunnel_id" {
|
||||
type = string
|
||||
sensitive = true
|
||||
}
|
||||
variable "public_ip" {
|
||||
type = string
|
||||
}
|
||||
|
||||
|
||||
terraform {
|
||||
required_providers {
|
||||
cloudflare = {
|
||||
source = "cloudflare/cloudflare"
|
||||
version = "~> 4"
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
provider "cloudflare" {
|
||||
api_key = var.cloudflare_api_key # I gave up on getting the permissions on the token...
|
||||
email = var.cloudflare_email
|
||||
}
|
||||
|
||||
|
||||
locals {
|
||||
cloudflare_proxied_names_map = {
|
||||
for h in var.cloudflare_proxied_names :
|
||||
h => h
|
||||
}
|
||||
cloudflare_non_proxied_names_map = {
|
||||
for h in var.cloudflare_non_proxied_names :
|
||||
h => h
|
||||
}
|
||||
}
|
||||
|
||||
resource "cloudflare_zero_trust_tunnel_cloudflared_config" "sof" {
|
||||
account_id = var.cloudflare_account_id
|
||||
tunnel_id = var.cloudflare_tunnel_id
|
||||
|
||||
config {
|
||||
warp_routing {
|
||||
enabled = true
|
||||
}
|
||||
dynamic "ingress_rule" {
|
||||
for_each = toset(var.cloudflare_proxied_names)
|
||||
content {
|
||||
hostname = ingress_rule.value == "viktorbarzin.me" ? ingress_rule.value : "${ingress_rule.value}.viktorbarzin.me"
|
||||
path = "/"
|
||||
service = "https://10.0.20.202:443"
|
||||
origin_request {
|
||||
no_tls_verify = true
|
||||
}
|
||||
}
|
||||
}
|
||||
ingress_rule {
|
||||
service = "http_status:404"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "cloudflare_record" "dns_record" {
|
||||
# count = length(var.cloudflare_proxied_names)
|
||||
# name = var.cloudflare_proxied_names[count.index]
|
||||
for_each = local.cloudflare_proxied_names_map
|
||||
name = each.key
|
||||
|
||||
content = "${var.cloudflare_tunnel_id}.cfargotunnel.com"
|
||||
proxied = true
|
||||
ttl = 1
|
||||
type = "CNAME"
|
||||
zone_id = var.cloudflare_zone_id
|
||||
}
|
||||
|
||||
resource "cloudflare_record" "non_proxied_dns_record" {
|
||||
# count = length(var.cloudflare_non_proxied_names)
|
||||
# name = var.cloudflare_non_proxied_names[count.index]
|
||||
for_each = local.cloudflare_non_proxied_names_map
|
||||
name = each.key
|
||||
|
||||
# content = var.non_proxied_names[count.index].ip
|
||||
content = var.public_ip
|
||||
proxied = false
|
||||
ttl = 1
|
||||
type = "A"
|
||||
zone_id = var.cloudflare_zone_id
|
||||
}
|
||||
|
||||
|
||||
resource "cloudflare_record" "mail" {
|
||||
content = "mail.viktorbarzin.me"
|
||||
name = "viktorbarzin.me"
|
||||
proxied = false
|
||||
ttl = 1
|
||||
type = "MX"
|
||||
priority = 1
|
||||
zone_id = var.cloudflare_zone_id
|
||||
}
|
||||
|
||||
resource "cloudflare_record" "mail_domainkey" {
|
||||
content = "\"k=rsa; p=MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDIDLB8mhAHNqs1s6GeZMQHOxWweoNKIrqo5tqRM3yFilgfPUX34aTIXNZg9xAmlK+2S/xXO1ymt127ZGMjnoFKOEP8/uZ54iHTCnioHaPZWMfJ7o6TYIXjr+9ShKfoJxZLv7lHJ2wKQK3yOw4lg4cvja5nxQ6fNoGRwo+mQ/mgJQIDAQAB\""
|
||||
name = "s1._domainkey.viktorbarzin.me"
|
||||
proxied = false
|
||||
ttl = 1
|
||||
type = "TXT"
|
||||
priority = 1
|
||||
zone_id = var.cloudflare_zone_id
|
||||
}
|
||||
|
||||
resource "cloudflare_record" "mail_spf" {
|
||||
content = "\"v=spf1 include:mailgun.org ~all\""
|
||||
name = "viktorbarzin.me"
|
||||
proxied = false
|
||||
ttl = 1
|
||||
type = "TXT"
|
||||
priority = 1
|
||||
zone_id = var.cloudflare_zone_id
|
||||
}
|
||||
|
||||
resource "cloudflare_record" "mail_dmarc" {
|
||||
content = "\"v=DMARC1; p=none; pct=100; fo=1; ri=3600; sp=none; adkim=r; aspf=r; rua=mailto:e21c0ff8@dmarc.mailgun.org,mailto:adb84997@inbox.ondmarc.com; ruf=mailto:e21c0ff8@dmarc.mailgun.org,mailto:adb84997@inbox.ondmarc.com,mailto:postmaster@viktorbarzin.me;\""
|
||||
name = "_dmarc.viktorbarzin.me"
|
||||
proxied = false
|
||||
ttl = 1
|
||||
type = "TXT"
|
||||
priority = 1
|
||||
zone_id = var.cloudflare_zone_id
|
||||
}
|
||||
|
||||
resource "cloudflare_record" "keyserver" {
|
||||
content = "130.162.165.220" # Oracle VPS
|
||||
name = "keyserver.viktorbarzin.me"
|
||||
proxied = false
|
||||
ttl = 3600
|
||||
type = "A"
|
||||
priority = 1
|
||||
zone_id = var.cloudflare_zone_id
|
||||
}
|
||||
|
||||
# Enable HTTP/3 (QUIC) for Cloudflare-proxied domains
|
||||
resource "cloudflare_zone_settings_override" "http3" {
|
||||
zone_id = var.cloudflare_zone_id
|
||||
|
||||
settings {
|
||||
http3 = "on"
|
||||
}
|
||||
}
|
||||
90
stacks/platform/modules/cloudflared/main.tf
Normal file
90
stacks/platform/modules/cloudflared/main.tf
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
# Contents for cloudflare tunnel
|
||||
|
||||
variable "tls_secret_name" {}
|
||||
variable "cloudflare_tunnel_token" {}
|
||||
resource "kubernetes_namespace" "cloudflared" {
|
||||
metadata {
|
||||
name = "cloudflared"
|
||||
labels = {
|
||||
tier = var.tier
|
||||
}
|
||||
}
|
||||
}
|
||||
variable "tier" { type = string }
|
||||
|
||||
module "tls_secret" {
|
||||
source = "../../../../modules/kubernetes/setup_tls_secret"
|
||||
namespace = kubernetes_namespace.cloudflared.metadata[0].name
|
||||
tls_secret_name = var.tls_secret_name
|
||||
}
|
||||
|
||||
resource "kubernetes_deployment" "cloudflared" {
|
||||
metadata {
|
||||
name = "cloudflared"
|
||||
namespace = kubernetes_namespace.cloudflared.metadata[0].name
|
||||
labels = {
|
||||
app = "cloudflared"
|
||||
tier = var.tier
|
||||
}
|
||||
annotations = {
|
||||
"reloader.stakater.com/search" = "true"
|
||||
}
|
||||
}
|
||||
spec {
|
||||
replicas = 3
|
||||
strategy {
|
||||
type = "RollingUpdate"
|
||||
}
|
||||
selector {
|
||||
match_labels = {
|
||||
app = "cloudflared"
|
||||
}
|
||||
}
|
||||
template {
|
||||
metadata {
|
||||
labels = {
|
||||
app = "cloudflared"
|
||||
}
|
||||
}
|
||||
spec {
|
||||
container {
|
||||
# image = "wisdomsky/cloudflared-web:latest"
|
||||
image = "cloudflare/cloudflared"
|
||||
name = "cloudflared"
|
||||
command = ["cloudflared", "tunnel", "run"]
|
||||
env {
|
||||
name = "TUNNEL_TOKEN"
|
||||
value = var.cloudflare_tunnel_token
|
||||
}
|
||||
|
||||
port {
|
||||
container_port = 14333
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_service" "cloudflared" {
|
||||
metadata {
|
||||
name = "cloudflared"
|
||||
namespace = kubernetes_namespace.cloudflared.metadata[0].name
|
||||
labels = {
|
||||
"app" = "cloudflared"
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
selector = {
|
||||
app = "cloudflared"
|
||||
}
|
||||
port {
|
||||
name = "http"
|
||||
target_port = 14333
|
||||
port = 80
|
||||
protocol = "TCP"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
controller:
|
||||
extraVolumes:
|
||||
- name: crowdsec-bouncer-plugin
|
||||
emptyDir: {}
|
||||
extraInitContainers:
|
||||
- name: init-clone-crowdsec-bouncer
|
||||
image: crowdsecurity/lua-bouncer-plugin
|
||||
imagePullPolicy: IfNotPresent
|
||||
env:
|
||||
- name: API_URL
|
||||
value: "http://crowdsec-service.crowdsec.svc.cluster.local:8080" # crowdsec lapi service-name
|
||||
- name: API_KEY
|
||||
value: "<API KEY>" # generated with `cscli bouncers add -n <bouncer_name>
|
||||
- name: BOUNCER_CONFIG
|
||||
value: "/crowdsec/crowdsec-bouncer.conf"
|
||||
- name: CAPTCHA_PROVIDER
|
||||
value: "recaptcha" # valid providers are recaptcha, hcaptcha, turnstile
|
||||
- name: SECRET_KEY
|
||||
value: "<your-captcha-secret-key>" # If you want captcha support otherwise remove this ENV VAR
|
||||
- name: SITE_KEY
|
||||
value: "<your-captcha-site-key>" # If you want captcha support otherwise remove this ENV VAR
|
||||
- name: BAN_TEMPLATE_PATH
|
||||
value: /etc/nginx/lua/plugins/crowdsec/templates/ban.html
|
||||
- name: CAPTCHA_TEMPLATE_PATH
|
||||
value: /etc/nginx/lua/plugins/crowdsec/templates/captcha.html
|
||||
command:
|
||||
[
|
||||
"sh",
|
||||
"-c",
|
||||
"sh /docker_start.sh; mkdir -p /lua_plugins/crowdsec/; cp -R /crowdsec/* /lua_plugins/crowdsec/",
|
||||
]
|
||||
volumeMounts:
|
||||
- name: crowdsec-bouncer-plugin
|
||||
mountPath: /lua_plugins
|
||||
extraVolumeMounts:
|
||||
- name: crowdsec-bouncer-plugin
|
||||
mountPath: /etc/nginx/lua/plugins/crowdsec
|
||||
subPath: crowdsec
|
||||
config:
|
||||
plugins: "crowdsec"
|
||||
lua-shared-dicts: "crowdsec_cache: 50m"
|
||||
server-snippet: |
|
||||
lua_ssl_trusted_certificate "/etc/ssl/certs/ca-certificates.crt"; # If you want captcha support otherwise remove this line
|
||||
resolver local=on ipv6=off;
|
||||
353
stacks/platform/modules/crowdsec/main.tf
Normal file
353
stacks/platform/modules/crowdsec/main.tf
Normal file
|
|
@ -0,0 +1,353 @@
|
|||
variable "tls_secret_name" {}
|
||||
variable "homepage_username" {}
|
||||
variable "homepage_password" {}
|
||||
variable "db_password" {}
|
||||
variable "enroll_key" {}
|
||||
variable "crowdsec_dash_api_key" { type = string } # used for web dash
|
||||
variable "crowdsec_dash_machine_id" { type = string } # used for web dash
|
||||
variable "crowdsec_dash_machine_password" { type = string } # used for web dash
|
||||
variable "tier" { type = string }
|
||||
variable "slack_webhook_url" { type = string }
|
||||
|
||||
module "tls_secret" {
|
||||
source = "../../../../modules/kubernetes/setup_tls_secret"
|
||||
namespace = kubernetes_namespace.crowdsec.metadata[0].name
|
||||
tls_secret_name = var.tls_secret_name
|
||||
}
|
||||
|
||||
resource "kubernetes_namespace" "crowdsec" {
|
||||
metadata {
|
||||
name = "crowdsec"
|
||||
labels = {
|
||||
tier = var.tier
|
||||
"resource-governance/custom-quota" = "true"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_config_map" "crowdsec_custom_scenarios" {
|
||||
metadata {
|
||||
name = "crowdsec-custom-scenarios"
|
||||
namespace = kubernetes_namespace.crowdsec.metadata[0].name
|
||||
labels = {
|
||||
"app.kubernetes.io/name" = "crowdsec"
|
||||
}
|
||||
}
|
||||
|
||||
data = {
|
||||
"http-403-abuse.yaml" = <<-YAML
|
||||
type: leaky
|
||||
name: crowdsecurity/http-403-abuse
|
||||
description: "Detect IPs triggering too many HTTP 403s in NGINX ingress logs"
|
||||
filter: "evt.Meta.log_type == 'http_access-log' && evt.Parsed.status == '403'"
|
||||
groupby: "evt.Meta.source_ip"
|
||||
leakspeed: "2s"
|
||||
capacity: 10
|
||||
blackhole: 5m
|
||||
labels:
|
||||
service: http
|
||||
behavior: abusive_403
|
||||
remediation: true
|
||||
YAML
|
||||
"http-429-abuse.yaml" : <<-YAML
|
||||
type: leaky
|
||||
name: crowdsecurity/http-429-abuse
|
||||
description: "Detect IPs repeatedly triggering rate-limit (HTTP 429)"
|
||||
filter: "evt.Meta.log_type == 'http_access-log' && evt.Parsed.status == '429'"
|
||||
groupby: "evt.Meta.source_ip"
|
||||
leakspeed: "10s"
|
||||
capacity: 5
|
||||
blackhole: 1m
|
||||
labels:
|
||||
service: http
|
||||
behavior: rate_limit_abuse
|
||||
remediation: true
|
||||
YAML
|
||||
}
|
||||
}
|
||||
|
||||
# Whitelist for trusted IPs that should never be blocked
|
||||
resource "kubernetes_config_map" "crowdsec_whitelist" {
|
||||
metadata {
|
||||
name = "crowdsec-whitelist"
|
||||
namespace = kubernetes_namespace.crowdsec.metadata[0].name
|
||||
labels = {
|
||||
"app.kubernetes.io/name" = "crowdsec"
|
||||
}
|
||||
}
|
||||
|
||||
data = {
|
||||
"whitelist.yaml" = <<-YAML
|
||||
name: crowdsecurity/whitelist-trusted-ips
|
||||
description: "Whitelist for trusted IPs that should never be blocked"
|
||||
whitelist:
|
||||
reason: "Trusted IP - never block"
|
||||
ip:
|
||||
- "176.12.22.76"
|
||||
YAML
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
resource "helm_release" "crowdsec" {
|
||||
namespace = kubernetes_namespace.crowdsec.metadata[0].name
|
||||
create_namespace = true
|
||||
name = "crowdsec"
|
||||
atomic = true
|
||||
version = "0.21.0"
|
||||
|
||||
repository = "https://crowdsecurity.github.io/helm-charts"
|
||||
chart = "crowdsec"
|
||||
|
||||
values = [templatefile("${path.module}/values.yaml", { homepage_username = var.homepage_username, homepage_password = var.homepage_password, DB_PASSWORD = var.db_password, ENROLL_KEY = var.enroll_key, SLACK_WEBHOOK_URL = var.slack_webhook_url })]
|
||||
timeout = 3600
|
||||
}
|
||||
|
||||
|
||||
# Deployment for my custom dashboard that helps me unblock myself when I blocklist myself
|
||||
resource "kubernetes_deployment" "crowdsec-web" {
|
||||
metadata {
|
||||
name = "crowdsec-web"
|
||||
namespace = kubernetes_namespace.crowdsec.metadata[0].name
|
||||
labels = {
|
||||
app = "crowdsec_web"
|
||||
"kubernetes.io/cluster-service" = "true"
|
||||
tier = var.tier
|
||||
}
|
||||
}
|
||||
spec {
|
||||
replicas = 1
|
||||
strategy {
|
||||
type = "RollingUpdate"
|
||||
}
|
||||
selector {
|
||||
match_labels = {
|
||||
app = "crowdsec_web"
|
||||
}
|
||||
}
|
||||
template {
|
||||
metadata {
|
||||
labels = {
|
||||
app = "crowdsec_web"
|
||||
"kubernetes.io/cluster-service" = "true"
|
||||
}
|
||||
}
|
||||
spec {
|
||||
priority_class_name = "tier-1-cluster"
|
||||
container {
|
||||
name = "crowdsec-web"
|
||||
image = "viktorbarzin/crowdsec_web"
|
||||
env {
|
||||
name = "CS_API_URL"
|
||||
value = "http://crowdsec-service.crowdsec.svc.cluster.local:8080/v1"
|
||||
}
|
||||
env {
|
||||
name = "CS_API_KEY"
|
||||
value = var.crowdsec_dash_api_key
|
||||
}
|
||||
env {
|
||||
name = "CS_MACHINE_ID"
|
||||
value = var.crowdsec_dash_machine_id
|
||||
}
|
||||
env {
|
||||
name = "CS_MACHINE_PASSWORD"
|
||||
value = var.crowdsec_dash_machine_password
|
||||
}
|
||||
port {
|
||||
name = "http"
|
||||
container_port = 8000
|
||||
protocol = "TCP"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_service" "crowdsec-web" {
|
||||
metadata {
|
||||
name = "crowdsec-web"
|
||||
namespace = kubernetes_namespace.crowdsec.metadata[0].name
|
||||
labels = {
|
||||
"app" = "crowdsec_web"
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
selector = {
|
||||
app = "crowdsec_web"
|
||||
}
|
||||
port {
|
||||
port = "80"
|
||||
target_port = "8000"
|
||||
}
|
||||
}
|
||||
}
|
||||
module "ingress" {
|
||||
source = "../../../../modules/kubernetes/ingress_factory"
|
||||
namespace = kubernetes_namespace.crowdsec.metadata[0].name
|
||||
name = "crowdsec-web"
|
||||
protected = true
|
||||
tls_secret_name = var.tls_secret_name
|
||||
exclude_crowdsec = true
|
||||
rybbit_site_id = "d09137795ccc"
|
||||
}
|
||||
|
||||
# CronJob to import public blocklists into CrowdSec
|
||||
# https://github.com/wolffcatskyy/crowdsec-blocklist-import
|
||||
# Uses kubectl exec to run in an existing CrowdSec agent pod that's already registered
|
||||
resource "kubernetes_cron_job_v1" "crowdsec_blocklist_import" {
|
||||
metadata {
|
||||
name = "crowdsec-blocklist-import"
|
||||
namespace = kubernetes_namespace.crowdsec.metadata[0].name
|
||||
labels = {
|
||||
app = "crowdsec-blocklist-import"
|
||||
tier = var.tier
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
# Run daily at 4 AM
|
||||
schedule = "0 4 * * *"
|
||||
timezone = "Europe/London"
|
||||
concurrency_policy = "Forbid"
|
||||
successful_jobs_history_limit = 3
|
||||
failed_jobs_history_limit = 3
|
||||
|
||||
job_template {
|
||||
metadata {
|
||||
labels = {
|
||||
app = "crowdsec-blocklist-import"
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
backoff_limit = 3
|
||||
template {
|
||||
metadata {
|
||||
labels = {
|
||||
app = "crowdsec-blocklist-import"
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
service_account_name = kubernetes_service_account.blocklist_import.metadata[0].name
|
||||
restart_policy = "OnFailure"
|
||||
|
||||
container {
|
||||
name = "blocklist-import"
|
||||
image = "bitnami/kubectl:latest"
|
||||
|
||||
command = ["/bin/bash", "-c"]
|
||||
args = [
|
||||
<<-EOF
|
||||
set -e
|
||||
|
||||
echo "Finding CrowdSec agent pod..."
|
||||
AGENT_POD=$(kubectl get pods -n crowdsec -l k8s-app=crowdsec,type=agent -o jsonpath='{.items[0].metadata.name}')
|
||||
|
||||
if [ -z "$AGENT_POD" ]; then
|
||||
echo "ERROR: Could not find CrowdSec agent pod"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Using agent pod: $AGENT_POD"
|
||||
|
||||
# Download the import script
|
||||
echo "Downloading blocklist import script..."
|
||||
curl -fsSL -o /tmp/import.sh \
|
||||
https://raw.githubusercontent.com/wolffcatskyy/crowdsec-blocklist-import/main/import.sh
|
||||
chmod +x /tmp/import.sh
|
||||
|
||||
# Copy script to agent pod and execute
|
||||
echo "Copying script to agent pod and executing..."
|
||||
kubectl cp /tmp/import.sh crowdsec/$AGENT_POD:/tmp/import.sh
|
||||
|
||||
kubectl exec -n crowdsec "$AGENT_POD" -- /bin/bash -c '
|
||||
set -e
|
||||
|
||||
# Run with native mode since we are inside the CrowdSec container
|
||||
export MODE=native
|
||||
export DECISION_DURATION=24h
|
||||
export FETCH_TIMEOUT=60
|
||||
export LOG_LEVEL=INFO
|
||||
|
||||
/tmp/import.sh
|
||||
|
||||
# Cleanup
|
||||
rm -f /tmp/import.sh
|
||||
'
|
||||
|
||||
echo "Blocklist import completed successfully!"
|
||||
EOF
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Service account for the blocklist import job (needs kubectl exec permissions)
|
||||
resource "kubernetes_service_account" "blocklist_import" {
|
||||
metadata {
|
||||
name = "crowdsec-blocklist-import"
|
||||
namespace = kubernetes_namespace.crowdsec.metadata[0].name
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_role" "blocklist_import" {
|
||||
metadata {
|
||||
name = "crowdsec-blocklist-import"
|
||||
namespace = kubernetes_namespace.crowdsec.metadata[0].name
|
||||
}
|
||||
|
||||
rule {
|
||||
api_groups = [""]
|
||||
resources = ["pods"]
|
||||
verbs = ["get", "list"]
|
||||
}
|
||||
rule {
|
||||
api_groups = [""]
|
||||
resources = ["pods/exec"]
|
||||
verbs = ["create"]
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_role_binding" "blocklist_import" {
|
||||
metadata {
|
||||
name = "crowdsec-blocklist-import"
|
||||
namespace = kubernetes_namespace.crowdsec.metadata[0].name
|
||||
}
|
||||
|
||||
role_ref {
|
||||
api_group = "rbac.authorization.k8s.io"
|
||||
kind = "Role"
|
||||
name = kubernetes_role.blocklist_import.metadata[0].name
|
||||
}
|
||||
|
||||
subject {
|
||||
kind = "ServiceAccount"
|
||||
name = kubernetes_service_account.blocklist_import.metadata[0].name
|
||||
namespace = kubernetes_namespace.crowdsec.metadata[0].name
|
||||
}
|
||||
}
|
||||
|
||||
# Custom ResourceQuota for CrowdSec — needs more than default 1-cluster quota
|
||||
# because it runs DaemonSet agents (1 per worker node) + 3 LAPI replicas + web UI
|
||||
resource "kubernetes_resource_quota" "crowdsec" {
|
||||
metadata {
|
||||
name = "crowdsec-quota"
|
||||
namespace = kubernetes_namespace.crowdsec.metadata[0].name
|
||||
}
|
||||
spec {
|
||||
hard = {
|
||||
"requests.cpu" = "8"
|
||||
"requests.memory" = "8Gi"
|
||||
"limits.cpu" = "16"
|
||||
"limits.memory" = "16Gi"
|
||||
pods = "30"
|
||||
}
|
||||
}
|
||||
}
|
||||
196
stacks/platform/modules/crowdsec/values.yaml
Normal file
196
stacks/platform/modules/crowdsec/values.yaml
Normal file
|
|
@ -0,0 +1,196 @@
|
|||
# values from - https://github.com/crowdsecurity/helm-charts/blob/main/charts/crowdsec/values.yaml
|
||||
container_runtime: containerd
|
||||
|
||||
agent:
|
||||
priorityClassName: "tier-1-cluster"
|
||||
# To specify each pod you want to process it logs (pods present in the node)
|
||||
acquisition:
|
||||
# The namespace where the pod is located
|
||||
- namespace: traefik
|
||||
# The pod name
|
||||
podName: traefik-*
|
||||
# as in crowdsec configuration, we need to specify the program name so the parser will match and parse logs
|
||||
program: traefik
|
||||
# Those are ENV variables
|
||||
env:
|
||||
# As it's a test, we don't want to share signals with CrowdSec so disable the Online API.
|
||||
# - name: DISABLE_ONLINE_API
|
||||
# value: "true"
|
||||
# As we are running Traefik, we want to install the Traefik collection
|
||||
- name: COLLECTIONS
|
||||
value: "crowdsecurity/traefik crowdsecurity/base-http-scenarios crowdsecurity/http-cve"
|
||||
- name: SCENARIOS
|
||||
value: ""
|
||||
# value: "crowdsecurity/http-crawl-aggressive"
|
||||
# Mount custom scenarios into /etc/crowdsec/scenarios
|
||||
extraVolumeMounts:
|
||||
- name: custom-scenarios
|
||||
mountPath: /etc/crowdsec/scenarios/http-403-abuse.yaml
|
||||
subPath: "http-403-abuse.yaml"
|
||||
readonly: true
|
||||
- name: custom-scenarios
|
||||
mountPath: /etc/crowdsec/scenarios/http-429-abuse.yaml
|
||||
subPath: "http-429-abuse.yaml"
|
||||
readonly: true
|
||||
- name: whitelist
|
||||
mountPath: /etc/crowdsec/parsers/s02-enrich/whitelist.yaml
|
||||
subPath: "whitelist.yaml"
|
||||
readonly: true
|
||||
extraVolumes:
|
||||
- name: custom-scenarios
|
||||
configMap:
|
||||
name: crowdsec-custom-scenarios
|
||||
- name: whitelist
|
||||
configMap:
|
||||
name: crowdsec-whitelist
|
||||
lapi:
|
||||
priorityClassName: "tier-1-cluster"
|
||||
replicas: 3
|
||||
extraSecrets:
|
||||
dbPassword: "${DB_PASSWORD}"
|
||||
storeCAPICredentialsInSecret: true
|
||||
persistentVolume:
|
||||
config:
|
||||
enabled: false
|
||||
data:
|
||||
enabled: false
|
||||
env:
|
||||
- name: ENROLL_KEY
|
||||
value: "${ENROLL_KEY}"
|
||||
- name: ENROLL_INSTANCE_NAME
|
||||
value: "k8s-cluster"
|
||||
- name: ENROLL_TAGS
|
||||
value: "k8s linux"
|
||||
- name: DB_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: crowdsec-lapi-secrets
|
||||
key: dbPassword
|
||||
# As it's a test, we don't want to share signals with CrowdSec, so disable the Online API.
|
||||
# - name: DISABLE_ONLINE_API
|
||||
# value: "true"
|
||||
dashboard:
|
||||
enabled: true
|
||||
env:
|
||||
- name: MB_DB_TYPE
|
||||
value: "mysql"
|
||||
- name: MB_DB_DBNAME
|
||||
value: crowdsec-metabase
|
||||
- name: MB_DB_USER
|
||||
value: "crowdsec"
|
||||
- name: MB_DB_PASS
|
||||
value: "${DB_PASSWORD}"
|
||||
- name: MB_DB_HOST
|
||||
value: "mysql.dbaas.svc.cluster.local"
|
||||
|
||||
- name: MB_EMAIL_SMTP_USERNAME
|
||||
value: "info@viktorbarzin.me"
|
||||
- name: MB_EMAIL_FROM_ADDRESS
|
||||
value: "info@viktorbarzin.me"
|
||||
- name: MB_EMAIL_SMTP_HOST
|
||||
value: "mailserver.mailserver.svc.cluster.local"
|
||||
- name: MB_EMAIL_SMTP_PASSWORD
|
||||
value: "" # Ignore for now as it's unclear what notifications we can get
|
||||
- name: MB_EMAIL_SMTP_PORT
|
||||
value: "587"
|
||||
- name: MB_EMAIL_SMTP_SECURITY
|
||||
value: "starttls"
|
||||
ingress:
|
||||
enabled: true
|
||||
annotations:
|
||||
nginx.ingress.kubernetes.io/backend-protocol: "HTTP"
|
||||
#nginx.ingress.kubernetes.io/auth-url: "https://oauth2.viktorbarzin.me/oauth2/auth"
|
||||
nginx.ingress.kubernetes.io/auth-url: "http://ak-outpost-authentik-embedded-outpost.authentik.svc.cluster.local:9000/outpost.goauthentik.io/auth/nginx"
|
||||
# nginx.ingress.kubernetes.io/auth-signin: "https://oauth2.viktorbarzin.me/oauth2/start?rd=/redirect/$http_host$escaped_request_uri"
|
||||
nginx.ingress.kubernetes.io/auth-signin: "https://authentik.viktorbarzin.me/outpost.goauthentik.io/start?rd=$scheme%3A%2F%2F$host$escaped_request_uri"
|
||||
nginx.ingress.kubernetes.io/auth-response-headers: "Set-Cookie,X-authentik-username,X-authentik-groups,X-authentik-email,X-authentik-name,X-authentik-uid"
|
||||
nginx.ingress.kubernetes.io/auth-snippet: "proxy_set_header X-Forwarded-Host $http_host;"
|
||||
gethomepage.dev/enabled: "true"
|
||||
gethomepage.dev/description: "Web Application Firewall"
|
||||
gethomepage.dev/icon: "crowdsec.png"
|
||||
gethomepage.dev/name: "CrowdSec"
|
||||
gethomepage.dev/widget.type: "crowdsec"
|
||||
gethomepage.dev/widget.url: "http://crowdsec-service.crowdsec.svc.cluster.local:8080"
|
||||
gethomepage.dev/widget.username: "${homepage_username}"
|
||||
gethomepage.dev/widget.password: "${homepage_password}"
|
||||
gethomepage.dev/pod-selector: ""
|
||||
ingressClassName: "nginx"
|
||||
host: "crowdsec.viktorbarzin.me"
|
||||
tls:
|
||||
- hosts:
|
||||
- crowdsec.viktorbarzin.me
|
||||
secretName: "tls-secret"
|
||||
metrics:
|
||||
enabled: true
|
||||
strategy:
|
||||
type: RollingUpdate
|
||||
|
||||
config:
|
||||
# Custom profiles: captcha for rate limiting, ban for attacks
|
||||
profiles.yaml: |
|
||||
# Captcha for rate limiting and 403 abuse (user can unblock themselves)
|
||||
name: captcha_remediation
|
||||
filters:
|
||||
- Alert.Remediation == true && Alert.GetScope() == "Ip" && Alert.GetScenario() in ["crowdsecurity/http-429-abuse", "crowdsecurity/http-403-abuse", "crowdsecurity/http-crawl-non_statics", "crowdsecurity/http-sensitive-files"]
|
||||
decisions:
|
||||
- type: captcha
|
||||
duration: 4h
|
||||
notifications:
|
||||
- slack_alerts
|
||||
on_success: break
|
||||
---
|
||||
# Default: Ban for serious attacks (CVE exploits, scanners, brute force)
|
||||
name: default_ip_remediation
|
||||
filters:
|
||||
- Alert.Remediation == true && Alert.GetScope() == "Ip"
|
||||
decisions:
|
||||
- type: ban
|
||||
duration: 4h
|
||||
notifications:
|
||||
- slack_alerts
|
||||
on_success: break
|
||||
---
|
||||
name: default_range_remediation
|
||||
filters:
|
||||
- Alert.Remediation == true && Alert.GetScope() == "Range"
|
||||
decisions:
|
||||
- type: ban
|
||||
duration: 4h
|
||||
notifications:
|
||||
- slack_alerts
|
||||
on_success: break
|
||||
|
||||
config.yaml.local: |
|
||||
db_config:
|
||||
type: mysql
|
||||
user: crowdsec
|
||||
password: ${DB_PASSWORD}
|
||||
db_name: crowdsec
|
||||
host: mysql.dbaas.svc.cluster.local
|
||||
port: 3306
|
||||
api:
|
||||
server:
|
||||
auto_registration: # Activate if not using TLS for authentication
|
||||
enabled: true
|
||||
token: "$${REGISTRATION_TOKEN}" # /!\ do not change
|
||||
allowed_ranges: # /!\ adapt to the pod IP ranges used by your cluster
|
||||
- "127.0.0.1/32"
|
||||
- "192.168.0.0/16"
|
||||
- "10.0.0.0/8"
|
||||
- "172.16.0.0/12"
|
||||
|
||||
notifications:
|
||||
slack.yaml: |
|
||||
type: slack
|
||||
name: slack_alerts
|
||||
log_level: info
|
||||
format: |
|
||||
:rotating_light: *CrowdSec Alert*
|
||||
{{range .}}
|
||||
*Scenario:* {{.Alert.Scenario}}
|
||||
*Source IP:* {{.Alert.Source.IP}} ({{.Alert.Source.Cn}})
|
||||
*Decisions:*
|
||||
{{range .Alert.Decisions}} - {{.Type}} for {{.Duration}} (scope: {{.Scope}}, value: {{.Value}})
|
||||
{{end}}
|
||||
{{end}}
|
||||
webhook: ${SLACK_WEBHOOK_URL}
|
||||
17
stacks/platform/modules/dbaas/chart_values.tpl
Normal file
17
stacks/platform/modules/dbaas/chart_values.tpl
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
tls:
|
||||
useSelfSigned: true
|
||||
credentials:
|
||||
root:
|
||||
password: ${root_password}
|
||||
user: root
|
||||
serverInstances: 1
|
||||
podSpec:
|
||||
containers:
|
||||
- name: mysql
|
||||
resources:
|
||||
requests:
|
||||
memory: "1024Mi" # adapt to your needs
|
||||
cpu: "1800m" # adapt to your needs
|
||||
limits:
|
||||
memory: "2048Mi" # adapt to your needs
|
||||
cpu: "3600m" # adapt to your needs
|
||||
30
stacks/platform/modules/dbaas/cluster.yaml
Normal file
30
stacks/platform/modules/dbaas/cluster.yaml
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
apiVersion: mysql.presslabs.org/v1alpha1
|
||||
kind: MysqlCluster
|
||||
metadata:
|
||||
name: mysql-cluster
|
||||
namespace: dbaas
|
||||
spec:
|
||||
mysqlVersion: "5.7"
|
||||
replicas: 1
|
||||
secretName: cluster-secret
|
||||
mysqlConf:
|
||||
# read_only: 0 # mysql forms a single transaction for each sql statement, autocommit for each statement
|
||||
# automatic_sp_privileges: "ON" # automatically grants the EXECUTE and ALTER ROUTINE privileges to the creator of a stored routine
|
||||
# auto_generate_certs: "ON" # Auto Generation of Certificate
|
||||
# auto_increment_increment: 1 # Auto Incrementing value from +1
|
||||
# auto_increment_offset: 1 # Auto Increment Offset
|
||||
# binlog-format: "STATEMENT" # contains various options such ROW(SLOW,SAFE) STATEMENT(FAST,UNSAFE), MIXED(combination of both)
|
||||
# wait_timeout: 31536000 # 28800 number of seconds the server waits for activity on a non-interactive connection before closing it, You might encounter MySQL server has gone away error, you then tweak this value acccordingly
|
||||
# interactive_timeout: 28800 # The number of seconds the server waits for activity on an interactive connection before closing it.
|
||||
# max_allowed_packet: "512M" # Maximum size of MYSQL Network protocol packet that the server can create or read 4MB, 8MB, 16MB, 32MB
|
||||
# max-binlog-size: 1073741824 # binary logs contains the events that describe database changes, this parameter describe size for the bin_log file.
|
||||
# log_output: "TABLE" # Format in which the logout will be dumped
|
||||
# master-info-repository: "TABLE" # Format in which the master info will be dumped
|
||||
# relay_log_info_repository: "TABLE" # Format in which the relay info will be dumped
|
||||
volumeSpec:
|
||||
persistentVolumeClaim:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 10Gi
|
||||
916
stacks/platform/modules/dbaas/main.tf
Normal file
916
stacks/platform/modules/dbaas/main.tf
Normal file
|
|
@ -0,0 +1,916 @@
|
|||
# DB as a service. Installs MySQL operator
|
||||
variable "tls_secret_name" {}
|
||||
variable "tier" { type = string }
|
||||
variable "dbaas_root_password" {}
|
||||
variable "cluster_master_service" {
|
||||
default = "mysql"
|
||||
}
|
||||
variable "postgresql_root_password" {}
|
||||
variable "pgadmin_password" {}
|
||||
variable "prod" {
|
||||
default = false
|
||||
type = bool
|
||||
}
|
||||
|
||||
resource "kubernetes_namespace" "dbaas" {
|
||||
metadata {
|
||||
name = "dbaas"
|
||||
labels = {
|
||||
tier = var.tier
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module "tls_secret" {
|
||||
source = "../../../../modules/kubernetes/setup_tls_secret"
|
||||
namespace = kubernetes_namespace.dbaas.metadata[0].name
|
||||
tls_secret_name = var.tls_secret_name
|
||||
}
|
||||
|
||||
|
||||
resource "kubernetes_config_map" "mycnf" {
|
||||
metadata {
|
||||
name = "mycnf"
|
||||
namespace = kubernetes_namespace.dbaas.metadata[0].name
|
||||
|
||||
annotations = {
|
||||
"reloader.stakater.com/match" = "true"
|
||||
}
|
||||
}
|
||||
|
||||
data = {
|
||||
"my.cnf" = <<-EOT
|
||||
# For advice on how to change settings please see
|
||||
# http://dev.mysql.com/doc/refman/8.2/en/server-configuration-defaults.html
|
||||
|
||||
[mysqld]
|
||||
#
|
||||
# Remove leading # and set to the amount of RAM for the most important data
|
||||
# cache in MySQL. Start at 70% of total RAM for dedicated server, else 10%.
|
||||
# innodb_buffer_pool_size = 128M
|
||||
#
|
||||
# Remove leading # to turn on a very important data integrity option: logging
|
||||
# changes to the binary log between backups.
|
||||
# log_bin
|
||||
#
|
||||
# Remove leading # to set options mainly useful for reporting servers.
|
||||
# The server defaults are faster for transactions and fast SELECTs.
|
||||
# Adjust sizes as needed, experiment to find the optimal values.
|
||||
# join_buffer_size = 128M
|
||||
# sort_buffer_size = 2M
|
||||
# read_rnd_buffer_size = 2M
|
||||
|
||||
# Remove leading # to revert to previous value for default_authentication_plugin,
|
||||
# this will increase compatibility with older clients. For background, see:
|
||||
# https://dev.mysql.com/doc/refman/8.2/en/server-system-variables.html#sysvar_default_authentication_plugin
|
||||
# default-authentication-plugin=mysql_native_password
|
||||
#skip-host-cache
|
||||
skip-name-resolve
|
||||
datadir=/var/lib/mysql
|
||||
socket=/var/run/mysqld/mysqld.sock
|
||||
secure-file-priv=/var/lib/mysql-files
|
||||
user=mysql
|
||||
#innodb_force_recovery = 6
|
||||
#log_error_verbosity = 6
|
||||
|
||||
pid-file=/var/run/mysqld/mysqld.pid
|
||||
[client]
|
||||
socket=/var/run/mysqld/mysqld.sock
|
||||
|
||||
!includedir /etc/mysql/conf.d/
|
||||
EOT
|
||||
}
|
||||
}
|
||||
resource "kubernetes_service" "mysql" {
|
||||
metadata {
|
||||
name = var.cluster_master_service
|
||||
namespace = kubernetes_namespace.dbaas.metadata[0].name
|
||||
}
|
||||
spec {
|
||||
selector = {
|
||||
app = "mysql"
|
||||
}
|
||||
port {
|
||||
port = 3306
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_deployment" "mysql" {
|
||||
metadata {
|
||||
name = "mysql"
|
||||
namespace = kubernetes_namespace.dbaas.metadata[0].name
|
||||
annotations = {
|
||||
"reloader.stakater.com/search" = "true"
|
||||
}
|
||||
labels = {
|
||||
tier = var.tier
|
||||
}
|
||||
}
|
||||
spec {
|
||||
replicas = 1
|
||||
selector {
|
||||
match_labels = {
|
||||
app = "mysql"
|
||||
}
|
||||
}
|
||||
strategy {
|
||||
type = "Recreate"
|
||||
}
|
||||
template {
|
||||
metadata {
|
||||
labels = {
|
||||
app = "mysql"
|
||||
}
|
||||
annotations = {
|
||||
"diun.enable" = "false"
|
||||
"diun.include_tags" = "^\\d+(?:\\.\\d+)?(?:\\.\\d+)?$"
|
||||
}
|
||||
}
|
||||
spec {
|
||||
container {
|
||||
image = "mysql:9.2.0"
|
||||
name = "mysql"
|
||||
env {
|
||||
name = "MYSQL_ROOT_PASSWORD"
|
||||
value = var.dbaas_root_password
|
||||
}
|
||||
port {
|
||||
container_port = 3306
|
||||
name = "mysql"
|
||||
}
|
||||
volume_mount {
|
||||
name = "mysql-persistent-storage"
|
||||
mount_path = "/var/lib/mysql"
|
||||
}
|
||||
volume_mount {
|
||||
name = "mycnf"
|
||||
mount_path = "/etc/my.cnf"
|
||||
sub_path = "my.cnf"
|
||||
}
|
||||
}
|
||||
volume {
|
||||
name = "mysql-persistent-storage"
|
||||
nfs {
|
||||
path = "/mnt/main/mysql"
|
||||
server = "10.0.10.15"
|
||||
}
|
||||
}
|
||||
|
||||
volume {
|
||||
name = "mycnf"
|
||||
|
||||
config_map {
|
||||
name = "mycnf"
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_cron_job_v1" "mysql-backup" {
|
||||
metadata {
|
||||
name = "mysql-backup"
|
||||
namespace = kubernetes_namespace.dbaas.metadata[0].name
|
||||
}
|
||||
spec {
|
||||
concurrency_policy = "Replace"
|
||||
failed_jobs_history_limit = 5
|
||||
schedule = "0 0 * * *"
|
||||
# schedule = "* * * * *"
|
||||
starting_deadline_seconds = 10
|
||||
successful_jobs_history_limit = 10
|
||||
job_template {
|
||||
metadata {}
|
||||
spec {
|
||||
backoff_limit = 3
|
||||
ttl_seconds_after_finished = 10
|
||||
template {
|
||||
metadata {}
|
||||
spec {
|
||||
container {
|
||||
name = "mysql-backup"
|
||||
image = "mysql"
|
||||
# TODO: would be nice to rotate at some point... Current size is 11MB so not really needed atm
|
||||
command = ["/bin/bash", "-c", <<-EOT
|
||||
set -euxo pipefail
|
||||
export now=$(date +"%Y_%m_%d_%H_%M")
|
||||
mysqldump --all-databases -u root -p${var.dbaas_root_password} --host mysql.dbaas.svc.cluster.local > /backup/dump_$now.sql
|
||||
|
||||
# Rotate - delete last log file
|
||||
cd /backup
|
||||
find . -name "dump_*.sql" -type f -mtime +14 -delete # 14 day retention of backups
|
||||
echo Done
|
||||
EOT
|
||||
]
|
||||
# To restore (from outside of the cluster):
|
||||
# run kubectl port-forward to pod e.g.:
|
||||
# > kb port-forward mysql-647cfd4969-46rmw --address 0.0.0.0 3307:3306
|
||||
# run mysql import (and specify non-localhost address to avoid using unix socket): (password is in tfvars)
|
||||
# > mysql -u root -p --host 10.0.10.10 --port 3307 < /mnt/nfs/2024_01_06_13_54.sql
|
||||
volume_mount {
|
||||
name = "mysql-backup"
|
||||
mount_path = "/backup"
|
||||
}
|
||||
}
|
||||
volume {
|
||||
name = "mysql-backup"
|
||||
nfs {
|
||||
path = "/mnt/main/mysql-backup"
|
||||
server = "10.0.10.15"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# resource "kubernetes_persistent_volume" "mysql" {
|
||||
# metadata {
|
||||
# name = "mysql-pv"
|
||||
# }
|
||||
# spec {
|
||||
# capacity = {
|
||||
# "storage" = "10Gi"
|
||||
# }
|
||||
# access_modes = ["ReadWriteOnce"]
|
||||
# persistent_volume_source {
|
||||
# iscsi {
|
||||
# target_portal = "iscsi.viktorbarzin.lan:3260"
|
||||
# iqn = "iqn.2020-12.lan.viktorbarzin:storage:dbaas:mysql"
|
||||
# lun = 0
|
||||
# fs_type = "ext4"
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
|
||||
|
||||
# resource "helm_release" "mysql" {
|
||||
# namespace = kubernetes_namespace.dbaas.metadata[0].name
|
||||
# create_namespace = false
|
||||
# name = "mysql"
|
||||
|
||||
# repository = "https://presslabs.github.io/charts"
|
||||
# chart = "mysql-operator"
|
||||
# # version = "v0.5.0-rc.3"
|
||||
|
||||
# values = [templatefile("${path.module}/mysql_chart_values.yaml", { secretName = var.tls_secret_name })]
|
||||
# atomic = true
|
||||
|
||||
# depends_on = [kubernetes_namespace.dbaas]
|
||||
# }
|
||||
|
||||
# # resource "helm_release" "mysql" {
|
||||
# # namespace = kubernetes_namespace.dbaas.metadata[0].name
|
||||
# # create_namespace = false
|
||||
# # name = "mysql-operator"
|
||||
|
||||
# # repository = "https://mysql.github.io/mysql-operator/"
|
||||
# # chart = "mysql-operator"
|
||||
# # atomic = true
|
||||
# # depends_on = [kubernetes_namespace.dbaas]
|
||||
# # }
|
||||
|
||||
# # resource "helm_release" "innodb-cluster" {
|
||||
# # namespace = kubernetes_namespace.dbaas.metadata[0].name
|
||||
# # create_namespace = false
|
||||
# # name = var.cluster_master_service
|
||||
|
||||
# # repository = "https://mysql.github.io/mysql-operator/"
|
||||
# # chart = "mysql-innodbcluster"
|
||||
# # atomic = true
|
||||
# # depends_on = [kubernetes_namespace.dbaas]
|
||||
# # values = [templatefile("${path.module}/chart_values.tpl", { root_password = var.dbaas_root_password })]
|
||||
# # }
|
||||
|
||||
# resource "kubernetes_persistent_volume" "mysql-operator" {
|
||||
# metadata {
|
||||
# name = "mysql-operator-pv"
|
||||
# }
|
||||
# spec {
|
||||
# capacity = {
|
||||
# "storage" = "1Gi"
|
||||
# }
|
||||
# access_modes = ["ReadWriteOnce"]
|
||||
# persistent_volume_source {
|
||||
# iscsi {
|
||||
# target_portal = "iscsi.viktorbarzin.lan:3260"
|
||||
# iqn = "iqn.2020-12.lan.viktorbarzin:storage:dbaas:operator"
|
||||
# lun = 0
|
||||
# fs_type = "ext4"
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
|
||||
resource "kubernetes_secret" "cluster-password" {
|
||||
metadata {
|
||||
name = "cluster-secret"
|
||||
namespace = kubernetes_namespace.dbaas.metadata[0].name
|
||||
annotations = {
|
||||
"reloader.stakater.com/match" = "true"
|
||||
}
|
||||
}
|
||||
type = "Opaque"
|
||||
data = {
|
||||
"ROOT_PASSWORD" = var.dbaas_root_password
|
||||
}
|
||||
}
|
||||
|
||||
# resource "kubernetes_ingress_v1" "dbaas" {
|
||||
# metadata {
|
||||
# name = "orchestrator-ingress"
|
||||
# namespace = kubernetes_namespace.dbaas.metadata[0].name
|
||||
# annotations = {
|
||||
# "kubernetes.io/ingress.class" = "nginx"
|
||||
# "nginx.ingress.kubernetes.io/auth-tls-verify-client" = "on"
|
||||
# "nginx.ingress.kubernetes.io/auth-tls-secret" = "default/ca-secret"
|
||||
# }
|
||||
# }
|
||||
|
||||
# spec {
|
||||
# tls {
|
||||
# hosts = ["db.viktorbarzin.me"]
|
||||
# secret_name = var.tls_secret_name
|
||||
# }
|
||||
# rule {
|
||||
# host = "db.viktorbarzin.me"
|
||||
# http {
|
||||
# path {
|
||||
# path = "/"
|
||||
# backend {
|
||||
# service {
|
||||
# name = "mysql-mysql-operator"
|
||||
# port {
|
||||
# number = 80
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
|
||||
|
||||
# PHPMyAdmin instance
|
||||
resource "kubernetes_deployment" "phpmyadmin" {
|
||||
metadata {
|
||||
name = "phpmyadmin"
|
||||
namespace = kubernetes_namespace.dbaas.metadata[0].name
|
||||
labels = {
|
||||
"app" = "phpmyadmin"
|
||||
tier = var.tier
|
||||
|
||||
}
|
||||
annotations = {
|
||||
"reloader.stakater.com/search" = "true"
|
||||
}
|
||||
}
|
||||
spec {
|
||||
replicas = "1"
|
||||
selector {
|
||||
match_labels = {
|
||||
"app" = "phpmyadmin"
|
||||
}
|
||||
}
|
||||
template {
|
||||
metadata {
|
||||
labels = {
|
||||
"app" = "phpmyadmin"
|
||||
}
|
||||
}
|
||||
spec {
|
||||
container {
|
||||
name = "phpmyadmin"
|
||||
image = "phpmyadmin/phpmyadmin:5.2.3"
|
||||
port {
|
||||
container_port = 80
|
||||
}
|
||||
env {
|
||||
name = "PMA_HOST"
|
||||
value = var.cluster_master_service
|
||||
}
|
||||
env {
|
||||
name = "PMA_PORT"
|
||||
value = "3306"
|
||||
}
|
||||
env {
|
||||
name = "MYSQL_ROOT_PASSWORD"
|
||||
value_from {
|
||||
secret_key_ref {
|
||||
name = "cluster-secret"
|
||||
key = "ROOT_PASSWORD"
|
||||
}
|
||||
}
|
||||
}
|
||||
env {
|
||||
name = "UPLOAD_LIMIT"
|
||||
value = "300M"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_service" "phpmyadmin" {
|
||||
metadata {
|
||||
name = "pma"
|
||||
namespace = kubernetes_namespace.dbaas.metadata[0].name
|
||||
}
|
||||
spec {
|
||||
selector = {
|
||||
"app" = "phpmyadmin"
|
||||
}
|
||||
port {
|
||||
name = "web"
|
||||
port = 80
|
||||
}
|
||||
}
|
||||
}
|
||||
module "ingress" {
|
||||
source = "../../../../modules/kubernetes/ingress_factory"
|
||||
namespace = kubernetes_namespace.dbaas.metadata[0].name
|
||||
name = "pma"
|
||||
tls_secret_name = var.tls_secret_name
|
||||
protected = true
|
||||
extra_annotations = {}
|
||||
rybbit_site_id = "942c76b8bd4d"
|
||||
custom_content_security_policy = "script-src 'self' 'unsafe-inline' 'unsafe-eval' 'wasm-unsafe-eval' https://rybbit.viktorbarzin.me"
|
||||
}
|
||||
|
||||
|
||||
# resource "kubectl_manifest" "mysql-cluster" {
|
||||
# yaml_body = <<-YAML
|
||||
# apiVersion: mysql.presslabs.org/v1alpha1
|
||||
# kind: MysqlCluster
|
||||
# metadata:
|
||||
# name: mysql-cluster
|
||||
# namespace = kubernetes_namespace.dbaas.metadata[0].name
|
||||
# spec:
|
||||
# mysqlVersion: "5.7"
|
||||
# replicas: 1
|
||||
# secretName: cluster-secret
|
||||
# mysqlConf:
|
||||
# # read_only: 0 # mysql forms a single transaction for each sql statement, autocommit for each statement
|
||||
# # automatic_sp_privileges: "ON" # automatically grants the EXECUTE and ALTER ROUTINE privileges to the creator of a stored routine
|
||||
# # auto_generate_certs: "ON" # Auto Generation of Certificate
|
||||
# # auto_increment_increment: 1 # Auto Incrementing value from +1
|
||||
# # auto_increment_offset: 1 # Auto Increment Offset
|
||||
# # binlog-format: "STATEMENT" # contains various options such ROW(SLOW,SAFE) STATEMENT(FAST,UNSAFE), MIXED(combination of both)
|
||||
# # wait_timeout: 31536000 # 28800 number of seconds the server waits for activity on a non-interactive connection before closing it, You might encounter MySQL server has gone away error, you then tweak this value acccordingly
|
||||
# # interactive_timeout: 28800 # The number of seconds the server waits for activity on an interactive connection before closing it.
|
||||
# # max_allowed_packet: "512M" # Maximum size of MYSQL Network protocol packet that the server can create or read 4MB, 8MB, 16MB, 32MB
|
||||
# # max-binlog-size: 1073741824 # binary logs contains the events that describe database changes, this parameter describe size for the bin_log file.
|
||||
# # log_output: "TABLE" # Format in which the logout will be dumped
|
||||
# # master-info-repository: "TABLE" # Format in which the master info will be dumped
|
||||
# # relay_log_info_repository: "TABLE" # Format in which the relay info will be dumped
|
||||
# volumeSpec:
|
||||
# persistentVolumeClaim:
|
||||
# accessModes:
|
||||
# - ReadWriteOnce
|
||||
# resources:
|
||||
# requests:
|
||||
# storage: 10Gi
|
||||
# YAML
|
||||
# depends_on = [helm_release.mysql]
|
||||
# # manifest = {
|
||||
# # apiVersion = "mysql.presslabs.org/v1alpha1"
|
||||
# # kind = "MysqlCluster"
|
||||
# # metadata = {
|
||||
# # name = "mysql-cluster"
|
||||
# # namespace = kubernetes_namespace.dbaas.metadata[0].name
|
||||
# # }
|
||||
# # spec = {
|
||||
# # mysqlVersion = "5.7"
|
||||
# # replicas = 1
|
||||
# # secretName = "cluster-secret"
|
||||
# # mysqlConf = {
|
||||
# # read_only = 0
|
||||
# # }
|
||||
# # volumeSpec = {
|
||||
# # persistentVolumeClaim = {
|
||||
# # resources = {
|
||||
# # requests = {
|
||||
# # storage = "10Gi"
|
||||
# # }
|
||||
# # }
|
||||
# # }
|
||||
# # }
|
||||
# # }
|
||||
# # }
|
||||
# }
|
||||
|
||||
|
||||
# For some unknwown reason not all CRDs are installed. Add them manually
|
||||
# resource "kubectl_manifest" "mysql-user" {
|
||||
# yaml_body = <<-EOF
|
||||
# apiVersion: apiextensions.k8s.io/v1
|
||||
# kind: CustomResourceDefinition
|
||||
# metadata:
|
||||
# annotations:
|
||||
# controller-gen.kubebuilder.io/version: v0.5.0
|
||||
# helm.sh/hook: crd-install
|
||||
# name: mysqlusers.mysql.presslabs.org
|
||||
# labels:
|
||||
# app: mysql-operator
|
||||
# spec:
|
||||
# group: mysql.presslabs.org
|
||||
# names:
|
||||
# kind: MysqlUser
|
||||
# listKind: MysqlUserList
|
||||
# plural: mysqlusers
|
||||
# singular: mysqluser
|
||||
# scope:namespace = kubernetes_namespace.dbaas.metadata[0].name
|
||||
# versions:
|
||||
# - additionalPrinterColumns:
|
||||
# - description: The user status
|
||||
# jsonPath: .status.conditions[?(@.type == 'Ready')].status
|
||||
# name: Ready
|
||||
# type: string
|
||||
# - jsonPath: .spec.clusterRef.name
|
||||
# name: Cluster
|
||||
# type: string
|
||||
# - jsonPath: .spec.user
|
||||
# name: UserName
|
||||
# type: string
|
||||
# - jsonPath: .metadata.creationTimestamp
|
||||
# name: Age
|
||||
# type: date
|
||||
# name: v1alpha1
|
||||
# schema:
|
||||
# openAPIV3Schema:
|
||||
# description: MysqlUser is the Schema for the MySQL User API
|
||||
# properties:
|
||||
# apiVersion:
|
||||
# description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
||||
# type: string
|
||||
# kind:
|
||||
# description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||
# type: string
|
||||
# metadata:
|
||||
# type: object
|
||||
# spec:
|
||||
# description: MysqlUserSpec defines the desired state of MysqlUserSpec
|
||||
# properties:
|
||||
# allowedHosts:
|
||||
# description: AllowedHosts is the allowed host to connect from.
|
||||
# items:
|
||||
# type: string
|
||||
# type: array
|
||||
# clusterRef:
|
||||
# description: ClusterRef represents a reference to the MySQL cluster. This field should be immutable.
|
||||
# properties:
|
||||
# name:
|
||||
# description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
# type: string
|
||||
# namespace = kubernetes_namespace.dbaas.metadata[0].name
|
||||
# description:namespace = kubernetes_namespace.dbaas.metadata[0].name
|
||||
# type: string
|
||||
# type: object
|
||||
# password:
|
||||
# description: Password is the password for the user.
|
||||
# properties:
|
||||
# key:
|
||||
# description: The key of the secret to select from. Must be a valid secret key.
|
||||
# type: string
|
||||
# name:
|
||||
# description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
# type: string
|
||||
# optional:
|
||||
# description: Specify whether the Secret or its key must be defined
|
||||
# type: boolean
|
||||
# required:
|
||||
# - key
|
||||
# type: object
|
||||
# permissions:
|
||||
# description: Permissions is the list of roles that user has in the specified database.
|
||||
# items:
|
||||
# description: MysqlPermission defines a MySQL schema permission
|
||||
# properties:
|
||||
# permissions:
|
||||
# description: Permissions represents the permissions granted on the schema/tables
|
||||
# items:
|
||||
# type: string
|
||||
# type: array
|
||||
# schema:
|
||||
# description: Schema represents the schema to which the permission applies
|
||||
# type: string
|
||||
# tables:
|
||||
# description: Tables represents the tables inside the schema to which the permission applies
|
||||
# items:
|
||||
# type: string
|
||||
# type: array
|
||||
# required:
|
||||
# - permissions
|
||||
# - schema
|
||||
# - tables
|
||||
# type: object
|
||||
# type: array
|
||||
# resourceLimits:
|
||||
# additionalProperties:
|
||||
# anyOf:
|
||||
# - type: integer
|
||||
# - type: string
|
||||
# pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
# x-kubernetes-int-or-string: true
|
||||
# description: 'ResourceLimits allow settings limit per mysql user as defined here: https://dev.mysql.com/doc/refman/5.7/en/user-resources.html'
|
||||
# type: object
|
||||
# user:
|
||||
# description: User is the name of the user that will be created with will access the specified database. This field should be immutable.
|
||||
# type: string
|
||||
# required:
|
||||
# - allowedHosts
|
||||
# - clusterRef
|
||||
# - password
|
||||
# - user
|
||||
# type: object
|
||||
# status:
|
||||
# description: MysqlUserStatus defines the observed state of MysqlUser
|
||||
# properties:
|
||||
# allowedHosts:
|
||||
# description: AllowedHosts contains the list of hosts that the user is allowed to connect from.
|
||||
# items:
|
||||
# type: string
|
||||
# type: array
|
||||
# conditions:
|
||||
# description: Conditions represents the MysqlUser resource conditions list.
|
||||
# items:
|
||||
# description: MySQLUserCondition defines the condition struct for a MysqlUser resource
|
||||
# properties:
|
||||
# lastTransitionTime:
|
||||
# description: Last time the condition transitioned from one status to another.
|
||||
# format: date-time
|
||||
# type: string
|
||||
# lastUpdateTime:
|
||||
# description: The last time this condition was updated.
|
||||
# format: date-time
|
||||
# type: string
|
||||
# message:
|
||||
# description: A human readable message indicating details about the transition.
|
||||
# type: string
|
||||
# reason:
|
||||
# description: The reason for the condition's last transition.
|
||||
# type: string
|
||||
# status:
|
||||
# description: Status of the condition, one of True, False, Unknown.
|
||||
# type: string
|
||||
# type:
|
||||
# description: Type of MysqlUser condition.
|
||||
# type: string
|
||||
# required:
|
||||
# - lastTransitionTime
|
||||
# - message
|
||||
# - reason
|
||||
# - status
|
||||
# - type
|
||||
# type: object
|
||||
# type: array
|
||||
# type: object
|
||||
# type: object
|
||||
# served: true
|
||||
# storage: true
|
||||
# subresources:
|
||||
# status: {}
|
||||
# EOF
|
||||
# }
|
||||
|
||||
resource "kubernetes_deployment" "postgres" {
|
||||
metadata {
|
||||
name = "postgresql"
|
||||
namespace = kubernetes_namespace.dbaas.metadata[0].name
|
||||
annotations = {
|
||||
"reloader.stakater.com/search" = "true"
|
||||
}
|
||||
labels = {
|
||||
tier = var.tier
|
||||
}
|
||||
}
|
||||
spec {
|
||||
selector {
|
||||
match_labels = {
|
||||
app = "postgresql"
|
||||
}
|
||||
}
|
||||
strategy {
|
||||
type = "Recreate"
|
||||
}
|
||||
template {
|
||||
metadata {
|
||||
labels = {
|
||||
app = "postgresql"
|
||||
}
|
||||
annotations = {
|
||||
"diun.enable" = "false"
|
||||
"diun.include_tags" = "^\\d+(?:\\.\\d+)?-bullseye$"
|
||||
}
|
||||
}
|
||||
spec {
|
||||
container {
|
||||
# image = "postgis/postgis:16-master"
|
||||
image = "viktorbarzin/postgres:16-master" # mix of postgis + pgvector
|
||||
# image = "postgres:17.2-bullseye" # needs pg_upgrade to data dir
|
||||
name = "postgresql"
|
||||
env {
|
||||
name = "POSTGRES_PASSWORD"
|
||||
value = var.postgresql_root_password
|
||||
}
|
||||
env {
|
||||
name = "POSTGRES_USER"
|
||||
value = "root"
|
||||
}
|
||||
port {
|
||||
container_port = 5432
|
||||
protocol = "TCP"
|
||||
name = "postgresql"
|
||||
}
|
||||
volume_mount {
|
||||
name = "postgresql-persistent-storage"
|
||||
mount_path = "/var/lib/postgresql/data"
|
||||
}
|
||||
# volume_mount {
|
||||
# name = "mycnf"
|
||||
# mount_path = "/etc/my.cnf"
|
||||
# sub_path = "my.cnf"
|
||||
# }
|
||||
}
|
||||
volume {
|
||||
name = "postgresql-persistent-storage"
|
||||
nfs {
|
||||
path = "/mnt/main/postgresql/data"
|
||||
server = "10.0.10.15"
|
||||
}
|
||||
}
|
||||
# volume {
|
||||
# name = "mycnf"
|
||||
|
||||
# config_map {
|
||||
# name = "mycnf"
|
||||
# }
|
||||
# }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
resource "kubernetes_service" "postgresql" {
|
||||
metadata {
|
||||
name = "postgresql"
|
||||
namespace = kubernetes_namespace.dbaas.metadata[0].name
|
||||
}
|
||||
spec {
|
||||
selector = {
|
||||
"app" = "postgresql"
|
||||
}
|
||||
port {
|
||||
name = "postgresql"
|
||||
port = 5432
|
||||
target_port = 5432
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#### PGADMIN
|
||||
|
||||
resource "kubernetes_deployment" "pgadmin" {
|
||||
metadata {
|
||||
name = "pgadmin"
|
||||
namespace = kubernetes_namespace.dbaas.metadata[0].name
|
||||
annotations = {
|
||||
"reloader.stakater.com/search" = "true"
|
||||
}
|
||||
labels = {
|
||||
tier = var.tier
|
||||
}
|
||||
}
|
||||
spec {
|
||||
selector {
|
||||
match_labels = {
|
||||
app = "pgadmin"
|
||||
}
|
||||
}
|
||||
template {
|
||||
metadata {
|
||||
labels = {
|
||||
app = "pgadmin"
|
||||
}
|
||||
}
|
||||
spec {
|
||||
container {
|
||||
image = "dpage/pgadmin4"
|
||||
name = "pgadmin"
|
||||
env {
|
||||
name = "PGADMIN_DEFAULT_EMAIL"
|
||||
value = "me@viktorbarzin.me"
|
||||
}
|
||||
env {
|
||||
name = "PGADMIN_DEFAULT_PASSWORD"
|
||||
# Changed at startup
|
||||
value = var.pgadmin_password
|
||||
}
|
||||
port {
|
||||
container_port = 80
|
||||
name = "web"
|
||||
}
|
||||
volume_mount {
|
||||
name = "pgadmin"
|
||||
mount_path = "/var/lib/pgadmin/"
|
||||
}
|
||||
|
||||
}
|
||||
volume {
|
||||
name = "pgadmin"
|
||||
# config_map {
|
||||
# name = "pgadmin-config"
|
||||
# }
|
||||
nfs {
|
||||
path = "/mnt/main/postgresql/pgadmin"
|
||||
server = "10.0.10.15"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
resource "kubernetes_service" "pgadmin" {
|
||||
metadata {
|
||||
name = "pgadmin"
|
||||
namespace = kubernetes_namespace.dbaas.metadata[0].name
|
||||
}
|
||||
spec {
|
||||
selector = {
|
||||
"app" = "pgadmin"
|
||||
}
|
||||
port {
|
||||
name = "pgadmin"
|
||||
port = 80
|
||||
}
|
||||
}
|
||||
}
|
||||
module "ingress-pgadmin" {
|
||||
source = "../../../../modules/kubernetes/ingress_factory"
|
||||
namespace = kubernetes_namespace.dbaas.metadata[0].name
|
||||
name = "pgadmin"
|
||||
tls_secret_name = var.tls_secret_name
|
||||
protected = true
|
||||
rybbit_site_id = "7cef78e30485"
|
||||
}
|
||||
|
||||
|
||||
resource "kubernetes_cron_job_v1" "postgresql-backup" {
|
||||
metadata {
|
||||
name = "postgresql-backup"
|
||||
namespace = kubernetes_namespace.dbaas.metadata[0].name
|
||||
}
|
||||
spec {
|
||||
concurrency_policy = "Replace"
|
||||
failed_jobs_history_limit = 5
|
||||
schedule = "0 0 * * *"
|
||||
# schedule = "* * * * *"
|
||||
starting_deadline_seconds = 10
|
||||
successful_jobs_history_limit = 10
|
||||
job_template {
|
||||
metadata {}
|
||||
spec {
|
||||
backoff_limit = 3
|
||||
ttl_seconds_after_finished = 10
|
||||
template {
|
||||
metadata {}
|
||||
spec {
|
||||
container {
|
||||
name = "postgresql-backup"
|
||||
image = "postgres:16.4-bullseye"
|
||||
command = ["/bin/bash", "-c", <<-EOT
|
||||
set -euxo pipefail
|
||||
export now=$(date +"%Y_%m_%d_%H_%M")
|
||||
PGPASSWORD=${var.postgresql_root_password} pg_dumpall -h postgresql.dbaas -U root > /backup/dump_$now.sql
|
||||
|
||||
# Rotate - delete last log file
|
||||
cd /backup
|
||||
find . -name "dump_*.sql" -type f -mtime +7 -delete # 7 day retention of backups
|
||||
echo Done
|
||||
EOT
|
||||
]
|
||||
volume_mount {
|
||||
name = "postgresql-backup"
|
||||
mount_path = "/backup"
|
||||
}
|
||||
}
|
||||
volume {
|
||||
name = "postgresql-backup"
|
||||
nfs {
|
||||
path = "/mnt/main/postgresql-backup"
|
||||
server = "10.0.10.15"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
14
stacks/platform/modules/dbaas/mysql_chart_values.yaml
Normal file
14
stacks/platform/modules/dbaas/mysql_chart_values.yaml
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
---
|
||||
orchestrator:
|
||||
# persistence:
|
||||
# enabled: false
|
||||
ingress:
|
||||
enable: false
|
||||
hosts:
|
||||
- host: db.viktorbarzin.me
|
||||
paths:
|
||||
- path: /
|
||||
tls:
|
||||
- secretName: ${secretName}
|
||||
hosts:
|
||||
- db.viktorbarzin.me
|
||||
30
stacks/platform/modules/dbaas/postgres/postgres_Dockerfile
Normal file
30
stacks/platform/modules/dbaas/postgres/postgres_Dockerfile
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
# Use the PostGIS image as the base
|
||||
FROM pgvector/pgvector:0.8.0-pg16 as binary
|
||||
FROM postgis/postgis:16-master
|
||||
COPY --from=binary /pgvecto-rs-binary-release.deb /tmp/vectors.deb
|
||||
RUN apt-get install -y /tmp/vectors.deb && rm -f /tmp/vectors.deb
|
||||
|
||||
# Install necessary packages
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
libpq-dev \
|
||||
wget \
|
||||
git \
|
||||
postgresql-server-dev-16 \
|
||||
postgresql-16-pgvector \
|
||||
# Clean up to reduce layer size
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& cd /tmp \
|
||||
&& git clone --branch v0.8.0 https://github.com/pgvector/pgvector.git \
|
||||
&& cd pgvector \
|
||||
&& make \
|
||||
&& make install \
|
||||
# Clean up unnecessary files
|
||||
&& cd - \
|
||||
&& apt-get purge -y --auto-remove build-essential postgresql-server-dev-16 libpq-dev wget git \
|
||||
&& rm -rf /tmp/pgvector
|
||||
|
||||
# Copy initialization scripts
|
||||
#COPY ./docker-entrypoint-initdb.d/ /docker-entrypoint-initdb.d/
|
||||
CMD ["postgres", "-c" ,"shared_preload_libraries=vectors.so", "-c", "search_path=\"$user\", public, vectors", "-c", "logging_collector=on"]
|
||||
9
stacks/platform/modules/dbaas/versions.tf
Normal file
9
stacks/platform/modules/dbaas/versions.tf
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
# terraform {
|
||||
# required_providers {
|
||||
# kubectl = {
|
||||
# source = "gavinbunney/kubectl"
|
||||
# version = ">= 1.10.0"
|
||||
# }
|
||||
# }
|
||||
# required_version = ">= 0.13"
|
||||
# }
|
||||
254
stacks/platform/modules/headscale/main.tf
Normal file
254
stacks/platform/modules/headscale/main.tf
Normal file
|
|
@ -0,0 +1,254 @@
|
|||
|
||||
variable "tls_secret_name" {}
|
||||
variable "tier" { type = string }
|
||||
variable "headscale_config" {}
|
||||
variable "headscale_acl" {}
|
||||
|
||||
resource "kubernetes_namespace" "headscale" {
|
||||
metadata {
|
||||
name = "headscale"
|
||||
labels = {
|
||||
tier = var.tier
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module "tls_secret" {
|
||||
source = "../../../../modules/kubernetes/setup_tls_secret"
|
||||
namespace = kubernetes_namespace.headscale.metadata[0].name
|
||||
tls_secret_name = var.tls_secret_name
|
||||
}
|
||||
|
||||
resource "kubernetes_deployment" "headscale" {
|
||||
metadata {
|
||||
name = "headscale"
|
||||
namespace = kubernetes_namespace.headscale.metadata[0].name
|
||||
labels = {
|
||||
app = "headscale"
|
||||
tier = var.tier
|
||||
# scare to try but probably non-http will fail
|
||||
# "istio-injection" : "enabled"
|
||||
}
|
||||
|
||||
annotations = {
|
||||
"reloader.stakater.com/search" = "true"
|
||||
}
|
||||
}
|
||||
spec {
|
||||
replicas = 1
|
||||
strategy {
|
||||
type = "Recreate"
|
||||
}
|
||||
selector {
|
||||
match_labels = {
|
||||
app = "headscale"
|
||||
}
|
||||
}
|
||||
template {
|
||||
metadata {
|
||||
labels = {
|
||||
app = "headscale"
|
||||
}
|
||||
annotations = {
|
||||
# "diun.enable" = "true"
|
||||
"diun.enable" = "false"
|
||||
"diun.include_tags" = "^\\d+(?:\\.\\d+)?(?:\\.\\d+)?$"
|
||||
}
|
||||
}
|
||||
spec {
|
||||
container {
|
||||
image = "headscale/headscale:0.23.0"
|
||||
# image = "headscale/headscale:0.23.0-debug" # -debug is for debug images
|
||||
name = "headscale"
|
||||
command = ["headscale", "serve"]
|
||||
port {
|
||||
container_port = 8080
|
||||
}
|
||||
port {
|
||||
container_port = 9090
|
||||
}
|
||||
port {
|
||||
container_port = 41641
|
||||
}
|
||||
|
||||
volume_mount {
|
||||
name = "config-volume"
|
||||
mount_path = "/etc/headscale"
|
||||
}
|
||||
|
||||
volume_mount {
|
||||
mount_path = "/mnt"
|
||||
name = "nfs-config"
|
||||
}
|
||||
}
|
||||
volume {
|
||||
name = "config-volume"
|
||||
config_map {
|
||||
name = "headscale-config"
|
||||
items {
|
||||
key = "config.yaml"
|
||||
path = "config.yaml"
|
||||
}
|
||||
items {
|
||||
key = "acl.yaml"
|
||||
path = "acl.yaml"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
volume {
|
||||
name = "nfs-config"
|
||||
nfs {
|
||||
path = "/mnt/main/headscale"
|
||||
server = "10.0.10.15"
|
||||
}
|
||||
}
|
||||
# container {
|
||||
# image = "simcu/headscale-ui:0.1.4"
|
||||
# name = "headscale-ui"
|
||||
# port {
|
||||
# container_port = 80
|
||||
# }
|
||||
# }
|
||||
container {
|
||||
image = "ghcr.io/gurucomputing/headscale-ui:latest"
|
||||
# image = "ghcr.io/tale/headplane:0.3.2"
|
||||
name = "headscale-ui"
|
||||
port {
|
||||
container_port = 8081
|
||||
# container_port = 3000
|
||||
}
|
||||
env {
|
||||
name = "HTTP_PORT"
|
||||
value = "8081"
|
||||
}
|
||||
# env {
|
||||
# name = "HTTPS_PORT"
|
||||
# value = "8082"
|
||||
# }
|
||||
env {
|
||||
name = "HEADSCALE_URL"
|
||||
value = "http://localhost:8080"
|
||||
}
|
||||
env {
|
||||
name = "COOKIE_SECRET"
|
||||
value = "kekekekke"
|
||||
}
|
||||
env {
|
||||
name = "ROOT_API_KEY"
|
||||
value = "kekekekeke"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
resource "kubernetes_service" "headscale" {
|
||||
metadata {
|
||||
name = "headscale"
|
||||
namespace = kubernetes_namespace.headscale.metadata[0].name
|
||||
labels = {
|
||||
"app" = "headscale"
|
||||
}
|
||||
annotations = {
|
||||
"prometheus.io/scrape" = "true"
|
||||
"prometheus.io/port" = "9090"
|
||||
}
|
||||
# annotations = {
|
||||
# "metallb.universe.tf/allow-shared-ip" : "shared"
|
||||
# }
|
||||
}
|
||||
|
||||
spec {
|
||||
# type = "LoadBalancer"
|
||||
# external_traffic_policy = "Cluster"
|
||||
selector = {
|
||||
app = "headscale"
|
||||
|
||||
}
|
||||
port {
|
||||
name = "headscale"
|
||||
port = "8080"
|
||||
protocol = "TCP"
|
||||
}
|
||||
port {
|
||||
name = "headscale-ui"
|
||||
port = "80"
|
||||
target_port = 8081
|
||||
# target_port = 3000
|
||||
protocol = "TCP"
|
||||
}
|
||||
port {
|
||||
name = "metrics"
|
||||
port = "9090"
|
||||
protocol = "TCP"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module "ingress" {
|
||||
source = "../../../../modules/kubernetes/ingress_factory"
|
||||
namespace = kubernetes_namespace.headscale.metadata[0].name
|
||||
name = "headscale"
|
||||
port = 8080
|
||||
tls_secret_name = var.tls_secret_name
|
||||
}
|
||||
|
||||
module "ingress-ui" {
|
||||
source = "../../../../modules/kubernetes/ingress_factory"
|
||||
namespace = kubernetes_namespace.headscale.metadata[0].name
|
||||
name = "headscale-ui"
|
||||
host = "headscale"
|
||||
service_name = "headscale"
|
||||
port = 8081
|
||||
ingress_path = ["/web"]
|
||||
tls_secret_name = var.tls_secret_name
|
||||
}
|
||||
|
||||
resource "kubernetes_service" "headscale-server" {
|
||||
metadata {
|
||||
name = "headscale-server"
|
||||
namespace = kubernetes_namespace.headscale.metadata[0].name
|
||||
labels = {
|
||||
"app" = "headscale"
|
||||
}
|
||||
annotations = {
|
||||
"metallb.universe.tf/allow-shared-ip" : "shared"
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
type = "LoadBalancer"
|
||||
external_traffic_policy = "Cluster"
|
||||
selector = {
|
||||
app = "headscale"
|
||||
|
||||
}
|
||||
# port {
|
||||
# name = "headscale-tcp"
|
||||
# port = "41641"
|
||||
# protocol = "TCP"
|
||||
# }
|
||||
port {
|
||||
name = "headscale-udp"
|
||||
port = "41641"
|
||||
protocol = "UDP"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_config_map" "headscale-config" {
|
||||
metadata {
|
||||
name = "headscale-config"
|
||||
namespace = kubernetes_namespace.headscale.metadata[0].name
|
||||
|
||||
annotations = {
|
||||
"reloader.stakater.com/match" = "true"
|
||||
}
|
||||
}
|
||||
|
||||
data = {
|
||||
"config.yaml" = var.headscale_config
|
||||
"acl.yaml" = var.headscale_acl
|
||||
}
|
||||
}
|
||||
212
stacks/platform/modules/infra-maintenance/main.tf
Normal file
212
stacks/platform/modules/infra-maintenance/main.tf
Normal file
|
|
@ -0,0 +1,212 @@
|
|||
# Module to run some infra-specific things like updating the public ip
|
||||
variable "git_user" {}
|
||||
variable "git_token" {}
|
||||
variable "technitium_username" {}
|
||||
variable "technitium_password" {}
|
||||
|
||||
|
||||
# DISABLED WHILST USING CLOUDFLARE NS
|
||||
# resource "kubernetes_cron_job_v1" "update-public-ip" {
|
||||
# metadata {
|
||||
# name = "update-public-ip"
|
||||
# namespace = "default"
|
||||
# }
|
||||
# spec {
|
||||
# schedule = "*/5 * * * *"
|
||||
# successful_jobs_history_limit = 1
|
||||
# failed_jobs_history_limit = 1
|
||||
# concurrency_policy = "Forbid"
|
||||
# job_template {
|
||||
# metadata {
|
||||
# name = "update-public-ip"
|
||||
# }
|
||||
# spec {
|
||||
# template {
|
||||
# metadata {
|
||||
# name = "update-public-ip"
|
||||
# }
|
||||
# spec {
|
||||
# priority_class_name = "system-cluster-critical"
|
||||
# container {
|
||||
# name = "update-public-ip"
|
||||
# image = "viktorbarzin/infra"
|
||||
# command = ["./infra_cli"]
|
||||
# args = ["-use-case", "update-public-ip"]
|
||||
|
||||
# env {
|
||||
# name = "GIT_USER"
|
||||
# value = var.git_user
|
||||
# }
|
||||
# env {
|
||||
# name = "GIT_TOKEN"
|
||||
# value = var.git_token
|
||||
# }
|
||||
# env {
|
||||
# name = "TECHNITIUM_USERNAME"
|
||||
# value = var.technitium_username
|
||||
# }
|
||||
# env {
|
||||
# name = "TECHNITIUM_PASSWORD"
|
||||
# value = var.technitium_password
|
||||
# }
|
||||
# }
|
||||
# restart_policy = "Never"
|
||||
# # service_account_name = "descheduler-sa"
|
||||
# # volume {
|
||||
# # name = "policy-volume"
|
||||
# # config_map {
|
||||
# # name = "policy-configmap"
|
||||
# # }
|
||||
# # }
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
|
||||
# # backup etcd
|
||||
resource "kubernetes_cron_job_v1" "backup-etcd" {
|
||||
metadata {
|
||||
name = "backup-etcd"
|
||||
namespace = "default"
|
||||
}
|
||||
spec {
|
||||
schedule = "0 0 * * *"
|
||||
successful_jobs_history_limit = 1
|
||||
failed_jobs_history_limit = 1
|
||||
concurrency_policy = "Forbid"
|
||||
job_template {
|
||||
metadata {
|
||||
name = "backup-etcd"
|
||||
}
|
||||
spec {
|
||||
template {
|
||||
metadata {
|
||||
name = "backup-etcd"
|
||||
}
|
||||
spec {
|
||||
node_name = "k8s-master"
|
||||
priority_class_name = "system-cluster-critical"
|
||||
host_network = true
|
||||
container {
|
||||
name = "backup-etcd"
|
||||
image = "k8s.gcr.io/etcd-amd64:3.3.15"
|
||||
command = ["/bin/sh"]
|
||||
args = ["-c", "etcdctl --endpoints=https://127.0.0.1:2379 --cacert=/etc/kubernetes/pki/etcd/ca.crt --cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt --key=/etc/kubernetes/pki/etcd/healthcheck-client.key snapshot save /backup/etcd-snapshot-$(date +%Y_%m_%d_%H:%M:%S_%Z).db"]
|
||||
env {
|
||||
name = "ETCDCTL_API"
|
||||
value = "3"
|
||||
}
|
||||
volume_mount {
|
||||
mount_path = "/backup"
|
||||
name = "backup"
|
||||
}
|
||||
volume_mount {
|
||||
mount_path = "/etc/kubernetes/pki/etcd"
|
||||
name = "etcd-certs"
|
||||
read_only = true
|
||||
}
|
||||
}
|
||||
container {
|
||||
name = "backup-purge"
|
||||
image = "busybox:1.31.1"
|
||||
command = ["/bin/sh"]
|
||||
args = ["-c", "find /backup -type f -mtime +30 -name '*.db' -exec rm -- '{}' \\;"]
|
||||
|
||||
volume_mount {
|
||||
mount_path = "/backup"
|
||||
name = "backup"
|
||||
}
|
||||
}
|
||||
|
||||
volume {
|
||||
name = "backup"
|
||||
nfs {
|
||||
path = "/mnt/main/etcd-backup"
|
||||
server = "10.0.10.15"
|
||||
}
|
||||
}
|
||||
volume {
|
||||
name = "etcd-certs"
|
||||
host_path {
|
||||
path = "/etc/kubernetes/pki/etcd"
|
||||
type = "DirectoryOrCreate"
|
||||
}
|
||||
}
|
||||
restart_policy = "Never"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Clean up evicted/failed pods cluster-wide daily
|
||||
resource "kubernetes_cron_job_v1" "cleanup-failed-pods" {
|
||||
metadata {
|
||||
name = "cleanup-failed-pods"
|
||||
namespace = "default"
|
||||
}
|
||||
spec {
|
||||
schedule = "0 2 * * *"
|
||||
successful_jobs_history_limit = 1
|
||||
failed_jobs_history_limit = 1
|
||||
concurrency_policy = "Forbid"
|
||||
job_template {
|
||||
metadata {
|
||||
name = "cleanup-failed-pods"
|
||||
}
|
||||
spec {
|
||||
template {
|
||||
metadata {
|
||||
name = "cleanup-failed-pods"
|
||||
}
|
||||
spec {
|
||||
service_account_name = kubernetes_service_account.cleanup_sa.metadata[0].name
|
||||
container {
|
||||
name = "cleanup"
|
||||
image = "bitnami/kubectl:latest"
|
||||
command = ["/bin/sh", "-c", "kubectl delete pods -A --field-selector=status.phase=Failed --ignore-not-found"]
|
||||
}
|
||||
restart_policy = "Never"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_service_account" "cleanup_sa" {
|
||||
metadata {
|
||||
name = "failed-pod-cleanup"
|
||||
namespace = "default"
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_cluster_role" "cleanup_role" {
|
||||
metadata {
|
||||
name = "failed-pod-cleanup"
|
||||
}
|
||||
rule {
|
||||
api_groups = [""]
|
||||
resources = ["pods"]
|
||||
verbs = ["list", "delete"]
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_cluster_role_binding" "cleanup_binding" {
|
||||
metadata {
|
||||
name = "failed-pod-cleanup"
|
||||
}
|
||||
role_ref {
|
||||
api_group = "rbac.authorization.k8s.io"
|
||||
kind = "ClusterRole"
|
||||
name = kubernetes_cluster_role.cleanup_role.metadata[0].name
|
||||
}
|
||||
subject {
|
||||
kind = "ServiceAccount"
|
||||
name = kubernetes_service_account.cleanup_sa.metadata[0].name
|
||||
namespace = "default"
|
||||
}
|
||||
}
|
||||
23
stacks/platform/modules/k8s-portal/files/.gitignore
vendored
Normal file
23
stacks/platform/modules/k8s-portal/files/.gitignore
vendored
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
node_modules
|
||||
|
||||
# Output
|
||||
.output
|
||||
.vercel
|
||||
.netlify
|
||||
.wrangler
|
||||
/.svelte-kit
|
||||
/build
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Env
|
||||
.env
|
||||
.env.*
|
||||
!.env.example
|
||||
!.env.test
|
||||
|
||||
# Vite
|
||||
vite.config.js.timestamp-*
|
||||
vite.config.ts.timestamp-*
|
||||
1
stacks/platform/modules/k8s-portal/files/.npmrc
Normal file
1
stacks/platform/modules/k8s-portal/files/.npmrc
Normal file
|
|
@ -0,0 +1 @@
|
|||
engine-strict=true
|
||||
15
stacks/platform/modules/k8s-portal/files/Dockerfile
Normal file
15
stacks/platform/modules/k8s-portal/files/Dockerfile
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
FROM node:22-alpine AS build
|
||||
WORKDIR /app
|
||||
COPY package*.json ./
|
||||
RUN npm ci
|
||||
COPY . .
|
||||
RUN npm run build
|
||||
|
||||
FROM node:22-alpine
|
||||
WORKDIR /app
|
||||
COPY --from=build /app/build ./build
|
||||
COPY --from=build /app/package.json ./
|
||||
COPY --from=build /app/node_modules ./node_modules
|
||||
ENV PORT=3000
|
||||
EXPOSE 3000
|
||||
CMD ["node", "build"]
|
||||
42
stacks/platform/modules/k8s-portal/files/README.md
Normal file
42
stacks/platform/modules/k8s-portal/files/README.md
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
# sv
|
||||
|
||||
Everything you need to build a Svelte project, powered by [`sv`](https://github.com/sveltejs/cli).
|
||||
|
||||
## Creating a project
|
||||
|
||||
If you're seeing this, you've probably already done this step. Congrats!
|
||||
|
||||
```sh
|
||||
# create a new project
|
||||
npx sv create my-app
|
||||
```
|
||||
|
||||
To recreate this project with the same configuration:
|
||||
|
||||
```sh
|
||||
# recreate this project
|
||||
npx sv create --template minimal --types ts --install npm .
|
||||
```
|
||||
|
||||
## Developing
|
||||
|
||||
Once you've created a project and installed dependencies with `npm install` (or `pnpm install` or `yarn`), start a development server:
|
||||
|
||||
```sh
|
||||
npm run dev
|
||||
|
||||
# or start the server and open the app in a new browser tab
|
||||
npm run dev -- --open
|
||||
```
|
||||
|
||||
## Building
|
||||
|
||||
To create a production version of your app:
|
||||
|
||||
```sh
|
||||
npm run build
|
||||
```
|
||||
|
||||
You can preview the production build with `npm run preview`.
|
||||
|
||||
> To deploy your app, you may need to install an [adapter](https://svelte.dev/docs/kit/adapters) for your target environment.
|
||||
1844
stacks/platform/modules/k8s-portal/files/package-lock.json
generated
Normal file
1844
stacks/platform/modules/k8s-portal/files/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load diff
24
stacks/platform/modules/k8s-portal/files/package.json
Normal file
24
stacks/platform/modules/k8s-portal/files/package.json
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
{
|
||||
"name": "files",
|
||||
"private": true,
|
||||
"version": "0.0.1",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"dev": "vite dev",
|
||||
"build": "vite build",
|
||||
"preview": "vite preview",
|
||||
"prepare": "svelte-kit sync || echo ''",
|
||||
"check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json",
|
||||
"check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@sveltejs/adapter-auto": "^7.0.0",
|
||||
"@sveltejs/adapter-node": "^5.5.3",
|
||||
"@sveltejs/kit": "^2.50.2",
|
||||
"@sveltejs/vite-plugin-svelte": "^6.2.4",
|
||||
"svelte": "^5.49.2",
|
||||
"svelte-check": "^4.3.6",
|
||||
"typescript": "^5.9.3",
|
||||
"vite": "^7.3.1"
|
||||
}
|
||||
}
|
||||
13
stacks/platform/modules/k8s-portal/files/src/app.d.ts
vendored
Normal file
13
stacks/platform/modules/k8s-portal/files/src/app.d.ts
vendored
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
// See https://svelte.dev/docs/kit/types#app.d.ts
|
||||
// for information about these interfaces
|
||||
declare global {
|
||||
namespace App {
|
||||
// interface Error {}
|
||||
// interface Locals {}
|
||||
// interface PageData {}
|
||||
// interface PageState {}
|
||||
// interface Platform {}
|
||||
}
|
||||
}
|
||||
|
||||
export {};
|
||||
11
stacks/platform/modules/k8s-portal/files/src/app.html
Normal file
11
stacks/platform/modules/k8s-portal/files/src/app.html
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
%sveltekit.head%
|
||||
</head>
|
||||
<body data-sveltekit-preload-data="hover">
|
||||
<div style="display: contents">%sveltekit.body%</div>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1 @@
|
|||
<svg xmlns="http://www.w3.org/2000/svg" width="107" height="128" viewBox="0 0 107 128"><title>svelte-logo</title><path d="M94.157 22.819c-10.4-14.885-30.94-19.297-45.792-9.835L22.282 29.608A29.92 29.92 0 0 0 8.764 49.65a31.5 31.5 0 0 0 3.108 20.231 30 30 0 0 0-4.477 11.183 31.9 31.9 0 0 0 5.448 24.116c10.402 14.887 30.942 19.297 45.791 9.835l26.083-16.624A29.92 29.92 0 0 0 98.235 78.35a31.53 31.53 0 0 0-3.105-20.232 30 30 0 0 0 4.474-11.182 31.88 31.88 0 0 0-5.447-24.116" style="fill:#ff3e00"/><path d="M45.817 106.582a20.72 20.72 0 0 1-22.237-8.243 19.17 19.17 0 0 1-3.277-14.503 18 18 0 0 1 .624-2.435l.49-1.498 1.337.981a33.6 33.6 0 0 0 10.203 5.098l.97.294-.09.968a5.85 5.85 0 0 0 1.052 3.878 6.24 6.24 0 0 0 6.695 2.485 5.8 5.8 0 0 0 1.603-.704L69.27 76.28a5.43 5.43 0 0 0 2.45-3.631 5.8 5.8 0 0 0-.987-4.371 6.24 6.24 0 0 0-6.698-2.487 5.7 5.7 0 0 0-1.6.704l-9.953 6.345a19 19 0 0 1-5.296 2.326 20.72 20.72 0 0 1-22.237-8.243 19.17 19.17 0 0 1-3.277-14.502 17.99 17.99 0 0 1 8.13-12.052l26.081-16.623a19 19 0 0 1 5.3-2.329 20.72 20.72 0 0 1 22.237 8.243 19.17 19.17 0 0 1 3.277 14.503 18 18 0 0 1-.624 2.435l-.49 1.498-1.337-.98a33.6 33.6 0 0 0-10.203-5.1l-.97-.294.09-.968a5.86 5.86 0 0 0-1.052-3.878 6.24 6.24 0 0 0-6.696-2.485 5.8 5.8 0 0 0-1.602.704L37.73 51.72a5.42 5.42 0 0 0-2.449 3.63 5.79 5.79 0 0 0 .986 4.372 6.24 6.24 0 0 0 6.698 2.486 5.8 5.8 0 0 0 1.602-.704l9.952-6.342a19 19 0 0 1 5.295-2.328 20.72 20.72 0 0 1 22.237 8.242 19.17 19.17 0 0 1 3.277 14.503 18 18 0 0 1-8.13 12.053l-26.081 16.622a19 19 0 0 1-5.3 2.328" style="fill:#fff"/></svg>
|
||||
|
After Width: | Height: | Size: 1.5 KiB |
|
|
@ -0,0 +1 @@
|
|||
// place files you want to import through the `$lib` alias in this folder.
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
<script lang="ts">
|
||||
import favicon from '$lib/assets/favicon.svg';
|
||||
|
||||
let { children } = $props();
|
||||
</script>
|
||||
|
||||
<svelte:head>
|
||||
<link rel="icon" href={favicon} />
|
||||
</svelte:head>
|
||||
|
||||
{@render children()}
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
import type { PageServerLoad } from './$types';
|
||||
import { readFileSync } from 'fs';
|
||||
|
||||
interface UserRole {
|
||||
role: string;
|
||||
namespaces: string[];
|
||||
}
|
||||
|
||||
export const load: PageServerLoad = async ({ request }) => {
|
||||
const email = request.headers.get('x-authentik-email') || 'unknown';
|
||||
const username = request.headers.get('x-authentik-username') || 'unknown';
|
||||
const groups = request.headers.get('x-authentik-groups') || '';
|
||||
|
||||
// Read user roles from ConfigMap-mounted file
|
||||
let userRole: UserRole = { role: 'unknown', namespaces: [] };
|
||||
try {
|
||||
const usersJson = readFileSync('/config/users.json', 'utf-8');
|
||||
const users = JSON.parse(usersJson);
|
||||
if (users[email]) {
|
||||
userRole = users[email];
|
||||
}
|
||||
} catch {
|
||||
// ConfigMap not mounted or parse error
|
||||
}
|
||||
|
||||
return {
|
||||
email,
|
||||
username,
|
||||
groups: groups.split('|').filter(Boolean),
|
||||
role: userRole.role,
|
||||
namespaces: userRole.namespaces
|
||||
};
|
||||
};
|
||||
|
|
@ -0,0 +1,42 @@
|
|||
<script lang="ts">
|
||||
let { data } = $props();
|
||||
</script>
|
||||
|
||||
<main>
|
||||
<h1>Kubernetes Access Portal</h1>
|
||||
|
||||
<section>
|
||||
<h2>Your Identity</h2>
|
||||
<p><strong>Username:</strong> {data.username}</p>
|
||||
<p><strong>Email:</strong> {data.email}</p>
|
||||
<p><strong>Role:</strong> {data.role}</p>
|
||||
{#if data.namespaces.length > 0}
|
||||
<p><strong>Namespaces:</strong> {data.namespaces.join(', ')}</p>
|
||||
{/if}
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<h2>Get Started</h2>
|
||||
<ol>
|
||||
<li><a href="/setup">Install kubectl and kubelogin</a></li>
|
||||
<li><a href="/download">Download your kubeconfig</a></li>
|
||||
<li>Run <code>kubectl get pods</code> to verify access</li>
|
||||
</ol>
|
||||
</section>
|
||||
</main>
|
||||
|
||||
<style>
|
||||
main {
|
||||
max-width: 640px;
|
||||
margin: 2rem auto;
|
||||
font-family: system-ui;
|
||||
}
|
||||
code {
|
||||
background: #f0f0f0;
|
||||
padding: 2px 6px;
|
||||
border-radius: 3px;
|
||||
}
|
||||
section {
|
||||
margin: 2rem 0;
|
||||
}
|
||||
</style>
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
import type { RequestHandler } from './$types';
|
||||
import { readFileSync } from 'fs';
|
||||
|
||||
const CLUSTER_SERVER = 'https://10.0.20.100:6443';
|
||||
const OIDC_ISSUER = 'https://authentik.viktorbarzin.me/application/o/kubernetes/';
|
||||
const OIDC_CLIENT_ID = 'kubernetes';
|
||||
|
||||
export const GET: RequestHandler = async ({ request }) => {
|
||||
const email = request.headers.get('x-authentik-email') || 'user';
|
||||
|
||||
// Read CA cert from mounted ConfigMap
|
||||
let caCert = '';
|
||||
try {
|
||||
caCert = readFileSync('/config/ca.crt', 'utf-8');
|
||||
} catch {
|
||||
// CA cert not available
|
||||
}
|
||||
|
||||
const caCertBase64 = Buffer.from(caCert).toString('base64');
|
||||
const sanitizedEmail = email.replace(/[^a-zA-Z0-9@._-]/g, '');
|
||||
|
||||
const kubeconfig = `apiVersion: v1
|
||||
kind: Config
|
||||
clusters:
|
||||
- cluster:
|
||||
server: ${CLUSTER_SERVER}
|
||||
certificate-authority-data: ${caCertBase64}
|
||||
name: home-cluster
|
||||
contexts:
|
||||
- context:
|
||||
cluster: home-cluster
|
||||
user: oidc-${sanitizedEmail}
|
||||
name: home-cluster
|
||||
current-context: home-cluster
|
||||
users:
|
||||
- name: oidc-${sanitizedEmail}
|
||||
user:
|
||||
exec:
|
||||
apiVersion: client.authentication.k8s.io/v1beta1
|
||||
command: kubectl
|
||||
args:
|
||||
- oidc-login
|
||||
- get-token
|
||||
- --oidc-issuer-url=${OIDC_ISSUER}
|
||||
- --oidc-client-id=${OIDC_CLIENT_ID}
|
||||
- --oidc-extra-scope=email
|
||||
- --oidc-extra-scope=profile
|
||||
- --oidc-extra-scope=groups
|
||||
interactiveMode: IfAvailable
|
||||
`;
|
||||
|
||||
return new Response(kubeconfig, {
|
||||
headers: {
|
||||
'Content-Type': 'application/yaml',
|
||||
'Content-Disposition': `attachment; filename="kubeconfig-home-cluster.yaml"`
|
||||
}
|
||||
});
|
||||
};
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
<main>
|
||||
<h1>Setup Instructions</h1>
|
||||
|
||||
<section>
|
||||
<h2>Quick Setup (one command)</h2>
|
||||
<p>Run this in your terminal to install everything and configure kubectl automatically:</p>
|
||||
<h3>macOS</h3>
|
||||
<pre>bash <(curl -fsSL https://k8s-portal.viktorbarzin.me/setup/script?os=mac)</pre>
|
||||
<h3>Linux</h3>
|
||||
<pre>bash <(curl -fsSL https://k8s-portal.viktorbarzin.me/setup/script?os=linux)</pre>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<h2>Manual Setup</h2>
|
||||
|
||||
<h3>1. Install kubectl</h3>
|
||||
<h4>macOS</h4>
|
||||
<pre>brew install kubectl</pre>
|
||||
<h4>Linux</h4>
|
||||
<pre>curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
|
||||
chmod +x kubectl && sudo mv kubectl /usr/local/bin/</pre>
|
||||
|
||||
<h3>2. Install kubelogin (OIDC plugin)</h3>
|
||||
<h4>macOS</h4>
|
||||
<pre>brew install int128/kubelogin/kubelogin</pre>
|
||||
<h4>Linux</h4>
|
||||
<pre>curl -LO https://github.com/int128/kubelogin/releases/latest/download/kubelogin_linux_amd64.zip
|
||||
unzip kubelogin_linux_amd64.zip && sudo mv kubelogin /usr/local/bin/kubectl-oidc_login
|
||||
rm kubelogin_linux_amd64.zip</pre>
|
||||
|
||||
<h3>3. Download and use your kubeconfig</h3>
|
||||
<pre>
|
||||
mkdir -p ~/.kube
|
||||
|
||||
# Download from the portal (requires auth cookie from browser)
|
||||
# Or use the download button on the portal homepage
|
||||
|
||||
# Set the KUBECONFIG environment variable
|
||||
export KUBECONFIG=~/.kube/config-home
|
||||
|
||||
# Test access (opens browser for login)
|
||||
kubectl get namespaces
|
||||
</pre>
|
||||
</section>
|
||||
|
||||
<p><a href="/">← Back to portal</a></p>
|
||||
</main>
|
||||
|
||||
<style>
|
||||
main {
|
||||
max-width: 640px;
|
||||
margin: 2rem auto;
|
||||
font-family: system-ui;
|
||||
}
|
||||
pre {
|
||||
background: #1e1e1e;
|
||||
color: #d4d4d4;
|
||||
padding: 1rem;
|
||||
border-radius: 6px;
|
||||
overflow-x: auto;
|
||||
}
|
||||
section {
|
||||
margin: 2rem 0;
|
||||
}
|
||||
h4 {
|
||||
margin: 0.5rem 0 0.25rem;
|
||||
color: #666;
|
||||
}
|
||||
</style>
|
||||
|
|
@ -0,0 +1,181 @@
|
|||
import type { RequestHandler } from './$types';
|
||||
import { readFileSync } from 'fs';
|
||||
|
||||
const CLUSTER_SERVER = 'https://10.0.20.100:6443';
|
||||
const OIDC_ISSUER = 'https://authentik.viktorbarzin.me/application/o/kubernetes/';
|
||||
const OIDC_CLIENT_ID = 'kubernetes';
|
||||
const PORTAL_URL = 'https://k8s-portal.viktorbarzin.me';
|
||||
|
||||
export const GET: RequestHandler = async ({ url }) => {
|
||||
const os = url.searchParams.get('os') || 'mac';
|
||||
|
||||
let caCert = '';
|
||||
try {
|
||||
caCert = readFileSync('/config/ca.crt', 'utf-8');
|
||||
} catch {
|
||||
// CA cert not available
|
||||
}
|
||||
const caCertBase64 = Buffer.from(caCert).toString('base64');
|
||||
|
||||
const kubeconfigContent = `apiVersion: v1
|
||||
kind: Config
|
||||
clusters:
|
||||
- cluster:
|
||||
server: ${CLUSTER_SERVER}
|
||||
certificate-authority-data: ${caCertBase64}
|
||||
name: home-cluster
|
||||
contexts:
|
||||
- context:
|
||||
cluster: home-cluster
|
||||
user: oidc-user
|
||||
name: home-cluster
|
||||
current-context: home-cluster
|
||||
users:
|
||||
- name: oidc-user
|
||||
user:
|
||||
exec:
|
||||
apiVersion: client.authentication.k8s.io/v1beta1
|
||||
command: kubectl
|
||||
args:
|
||||
- oidc-login
|
||||
- get-token
|
||||
- --oidc-issuer-url=${OIDC_ISSUER}
|
||||
- --oidc-client-id=${OIDC_CLIENT_ID}
|
||||
- --oidc-extra-scope=email
|
||||
- --oidc-extra-scope=profile
|
||||
- --oidc-extra-scope=groups
|
||||
interactiveMode: IfAvailable`;
|
||||
|
||||
const escapedKubeconfig = kubeconfigContent.replace(/'/g, "'\\''");
|
||||
|
||||
let script: string;
|
||||
|
||||
if (os === 'linux') {
|
||||
script = `#!/bin/bash
|
||||
set -e
|
||||
|
||||
echo "=== Kubernetes Cluster Setup ==="
|
||||
echo ""
|
||||
|
||||
# Use sudo if available, otherwise install directly (e.g. in containers running as root)
|
||||
SUDO=""
|
||||
if [ "$(id -u)" -ne 0 ] && command -v sudo &>/dev/null; then
|
||||
SUDO="sudo"
|
||||
fi
|
||||
|
||||
# Determine install directory
|
||||
INSTALL_DIR="/usr/local/bin"
|
||||
if [ ! -w "\$INSTALL_DIR" ] && [ -z "\$SUDO" ]; then
|
||||
INSTALL_DIR="\$HOME/.local/bin"
|
||||
mkdir -p "\$INSTALL_DIR"
|
||||
export PATH="\$INSTALL_DIR:\$PATH"
|
||||
fi
|
||||
|
||||
# Install kubectl
|
||||
if command -v kubectl &>/dev/null; then
|
||||
echo "[OK] kubectl already installed"
|
||||
else
|
||||
echo "[..] Installing kubectl..."
|
||||
KUBECTL_VERSION=\$(curl -L -s https://dl.k8s.io/release/stable.txt)
|
||||
curl -fsSLO "https://dl.k8s.io/release/\${KUBECTL_VERSION}/bin/linux/amd64/kubectl"
|
||||
chmod +x kubectl && \$SUDO mv kubectl "\$INSTALL_DIR/"
|
||||
echo "[OK] kubectl installed"
|
||||
fi
|
||||
|
||||
# Install kubelogin
|
||||
if command -v kubectl-oidc_login &>/dev/null; then
|
||||
echo "[OK] kubelogin already installed"
|
||||
else
|
||||
echo "[..] Installing kubelogin..."
|
||||
KUBELOGIN_VERSION=\$(curl -fsSL -o /dev/null -w "%{url_effective}" https://github.com/int128/kubelogin/releases/latest | grep -o '[^/]*\$')
|
||||
curl -fsSLO "https://github.com/int128/kubelogin/releases/download/\${KUBELOGIN_VERSION}/kubelogin_linux_amd64.zip"
|
||||
unzip -o kubelogin_linux_amd64.zip kubelogin -d /tmp
|
||||
\$SUDO mv /tmp/kubelogin "\$INSTALL_DIR/kubectl-oidc_login"
|
||||
rm -f kubelogin_linux_amd64.zip
|
||||
echo "[OK] kubelogin installed"
|
||||
fi
|
||||
|
||||
# Write kubeconfig
|
||||
mkdir -p ~/.kube
|
||||
cat > ~/.kube/config-home << 'KUBECONFIG_EOF'
|
||||
${escapedKubeconfig}
|
||||
KUBECONFIG_EOF
|
||||
echo "[OK] Kubeconfig written to ~/.kube/config-home"
|
||||
|
||||
# Add KUBECONFIG to shell profile
|
||||
SHELL_RC=~/.bashrc
|
||||
[ -f ~/.zshrc ] && SHELL_RC=~/.zshrc
|
||||
if ! grep -q 'config-home' "\$SHELL_RC" 2>/dev/null; then
|
||||
echo 'export KUBECONFIG=~/.kube/config-home' >> "\$SHELL_RC"
|
||||
echo "[OK] Added KUBECONFIG to \$SHELL_RC"
|
||||
fi
|
||||
export KUBECONFIG=~/.kube/config-home
|
||||
|
||||
echo ""
|
||||
echo "=== Setup complete! ==="
|
||||
echo ""
|
||||
echo "Run 'kubectl get namespaces' to test (opens browser for login)."
|
||||
echo "You may need to restart your shell or run: export KUBECONFIG=~/.kube/config-home"
|
||||
`;
|
||||
} else {
|
||||
script = `#!/bin/bash
|
||||
set -e
|
||||
|
||||
echo "=== Kubernetes Cluster Setup ==="
|
||||
echo ""
|
||||
|
||||
# Check for Homebrew
|
||||
if ! command -v brew &>/dev/null; then
|
||||
echo "[!!] Homebrew not found. Install it first:"
|
||||
echo ' /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Install kubectl
|
||||
if command -v kubectl &>/dev/null; then
|
||||
echo "[OK] kubectl already installed ($(kubectl version --client -o json 2>/dev/null | grep -o '"gitVersion":"[^"]*"' | cut -d'"' -f4))"
|
||||
else
|
||||
echo "[..] Installing kubectl..."
|
||||
brew install kubectl
|
||||
echo "[OK] kubectl installed"
|
||||
fi
|
||||
|
||||
# Install kubelogin
|
||||
if command -v kubectl-oidc_login &>/dev/null; then
|
||||
echo "[OK] kubelogin already installed"
|
||||
else
|
||||
echo "[..] Installing kubelogin..."
|
||||
brew install int128/kubelogin/kubelogin
|
||||
echo "[OK] kubelogin installed"
|
||||
fi
|
||||
|
||||
# Write kubeconfig
|
||||
mkdir -p ~/.kube
|
||||
cat > ~/.kube/config-home << 'KUBECONFIG_EOF'
|
||||
${escapedKubeconfig}
|
||||
KUBECONFIG_EOF
|
||||
echo "[OK] Kubeconfig written to ~/.kube/config-home"
|
||||
|
||||
# Add KUBECONFIG to shell profile
|
||||
SHELL_RC=~/.zshrc
|
||||
[ ! -f ~/.zshrc ] && SHELL_RC=~/.bashrc
|
||||
if ! grep -q 'config-home' "\$SHELL_RC" 2>/dev/null; then
|
||||
echo 'export KUBECONFIG=~/.kube/config-home' >> "\$SHELL_RC"
|
||||
echo "[OK] Added KUBECONFIG to \$SHELL_RC"
|
||||
fi
|
||||
export KUBECONFIG=~/.kube/config-home
|
||||
|
||||
echo ""
|
||||
echo "=== Setup complete! ==="
|
||||
echo ""
|
||||
echo "Run 'kubectl get namespaces' to test (opens browser for login)."
|
||||
echo "You may need to restart your shell or run: export KUBECONFIG=~/.kube/config-home"
|
||||
`;
|
||||
}
|
||||
|
||||
return new Response(script, {
|
||||
headers: {
|
||||
'Content-Type': 'text/plain; charset=utf-8'
|
||||
}
|
||||
});
|
||||
};
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
# allow crawling everything by default
|
||||
User-agent: *
|
||||
Disallow:
|
||||
10
stacks/platform/modules/k8s-portal/files/svelte.config.js
Normal file
10
stacks/platform/modules/k8s-portal/files/svelte.config.js
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
import adapter from '@sveltejs/adapter-node';
|
||||
|
||||
/** @type {import('@sveltejs/kit').Config} */
|
||||
const config = {
|
||||
kit: {
|
||||
adapter: adapter()
|
||||
}
|
||||
};
|
||||
|
||||
export default config;
|
||||
20
stacks/platform/modules/k8s-portal/files/tsconfig.json
Normal file
20
stacks/platform/modules/k8s-portal/files/tsconfig.json
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
{
|
||||
"extends": "./.svelte-kit/tsconfig.json",
|
||||
"compilerOptions": {
|
||||
"rewriteRelativeImportExtensions": true,
|
||||
"allowJs": true,
|
||||
"checkJs": true,
|
||||
"esModuleInterop": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"resolveJsonModule": true,
|
||||
"skipLibCheck": true,
|
||||
"sourceMap": true,
|
||||
"strict": true,
|
||||
"moduleResolution": "bundler"
|
||||
}
|
||||
// Path aliases are handled by https://svelte.dev/docs/kit/configuration#alias
|
||||
// except $lib which is handled by https://svelte.dev/docs/kit/configuration#files
|
||||
//
|
||||
// To make changes to top-level options such as include and exclude, we recommend extending
|
||||
// the generated config; see https://svelte.dev/docs/kit/configuration#typescript
|
||||
}
|
||||
6
stacks/platform/modules/k8s-portal/files/vite.config.ts
Normal file
6
stacks/platform/modules/k8s-portal/files/vite.config.ts
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
import { sveltekit } from '@sveltejs/kit/vite';
|
||||
import { defineConfig } from 'vite';
|
||||
|
||||
export default defineConfig({
|
||||
plugins: [sveltekit()]
|
||||
});
|
||||
117
stacks/platform/modules/k8s-portal/main.tf
Normal file
117
stacks/platform/modules/k8s-portal/main.tf
Normal file
|
|
@ -0,0 +1,117 @@
|
|||
variable "tls_secret_name" {}
|
||||
variable "tier" { type = string }
|
||||
|
||||
resource "kubernetes_namespace" "k8s_portal" {
|
||||
metadata {
|
||||
name = "k8s-portal"
|
||||
labels = {
|
||||
tier = var.tier
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module "tls_secret" {
|
||||
source = "../../../../modules/kubernetes/setup_tls_secret"
|
||||
namespace = kubernetes_namespace.k8s_portal.metadata[0].name
|
||||
tls_secret_name = var.tls_secret_name
|
||||
}
|
||||
|
||||
resource "kubernetes_config_map" "k8s_portal_config" {
|
||||
metadata {
|
||||
name = "k8s-portal-config"
|
||||
namespace = kubernetes_namespace.k8s_portal.metadata[0].name
|
||||
}
|
||||
|
||||
data = {
|
||||
# CA cert extracted from kubeconfig — will be populated with cluster CA cert
|
||||
"ca.crt" = ""
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_deployment" "k8s_portal" {
|
||||
metadata {
|
||||
name = "k8s-portal"
|
||||
namespace = kubernetes_namespace.k8s_portal.metadata[0].name
|
||||
labels = {
|
||||
app = "k8s-portal"
|
||||
tier = var.tier
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
replicas = 1
|
||||
selector {
|
||||
match_labels = {
|
||||
app = "k8s-portal"
|
||||
}
|
||||
}
|
||||
|
||||
template {
|
||||
metadata {
|
||||
labels = {
|
||||
app = "k8s-portal"
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
container {
|
||||
name = "portal"
|
||||
image = "viktorbarzin/k8s-portal:latest"
|
||||
port {
|
||||
container_port = 3000
|
||||
}
|
||||
|
||||
volume_mount {
|
||||
name = "config"
|
||||
mount_path = "/config"
|
||||
read_only = true
|
||||
}
|
||||
}
|
||||
|
||||
volume {
|
||||
name = "config"
|
||||
config_map {
|
||||
name = kubernetes_config_map.k8s_portal_config.metadata[0].name
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_service" "k8s_portal" {
|
||||
metadata {
|
||||
name = "k8s-portal"
|
||||
namespace = kubernetes_namespace.k8s_portal.metadata[0].name
|
||||
}
|
||||
|
||||
spec {
|
||||
selector = {
|
||||
app = "k8s-portal"
|
||||
}
|
||||
port {
|
||||
port = 80
|
||||
target_port = 3000
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module "ingress" {
|
||||
source = "../../../../modules/kubernetes/ingress_factory"
|
||||
namespace = kubernetes_namespace.k8s_portal.metadata[0].name
|
||||
name = "k8s-portal"
|
||||
tls_secret_name = var.tls_secret_name
|
||||
protected = true # Require Authentik login
|
||||
}
|
||||
|
||||
# Unprotected ingress for the setup script (needs to be curl-able without auth)
|
||||
module "ingress_setup_script" {
|
||||
source = "../../../../modules/kubernetes/ingress_factory"
|
||||
namespace = kubernetes_namespace.k8s_portal.metadata[0].name
|
||||
name = "k8s-portal-setup"
|
||||
host = "k8s-portal"
|
||||
service_name = "k8s-portal"
|
||||
ingress_path = ["/setup/script"]
|
||||
tls_secret_name = var.tls_secret_name
|
||||
protected = false
|
||||
}
|
||||
120
stacks/platform/modules/kyverno/main.tf
Normal file
120
stacks/platform/modules/kyverno/main.tf
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
|
||||
resource "kubernetes_namespace" "kyverno" {
|
||||
metadata {
|
||||
name = "kyverno"
|
||||
labels = {
|
||||
"istio-injection" : "disabled"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "helm_release" "kyverno" {
|
||||
namespace = kubernetes_namespace.kyverno.metadata[0].name
|
||||
create_namespace = false
|
||||
name = "kyverno"
|
||||
atomic = true
|
||||
|
||||
repository = "https://kyverno.github.io/kyverno/"
|
||||
chart = "kyverno"
|
||||
|
||||
# values = [templatefile("${path.module}/grafana_chart_values.yaml", { db_password = var.grafana_db_password })]
|
||||
}
|
||||
|
||||
# To unlabel all:
|
||||
# kubectl label deployment,statefulset,daemonset --all-namespaces -l tier tier-
|
||||
resource "kubernetes_manifest" "mutate_tier_from_namespace" {
|
||||
manifest = {
|
||||
apiVersion = "kyverno.io/v1"
|
||||
kind = "ClusterPolicy"
|
||||
metadata = {
|
||||
name = "sync-tier-label-from-namespace"
|
||||
}
|
||||
spec = {
|
||||
rules = [
|
||||
{
|
||||
name = "lookup-and-add-tier"
|
||||
match = {
|
||||
any = [
|
||||
{
|
||||
resources = {
|
||||
kinds = ["Deployment", "StatefulSet", "DaemonSet"]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
exclude = {
|
||||
any = [
|
||||
{
|
||||
resources = {
|
||||
namespaces = ["kube-system", "metallb-system", "n8n"]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
# Context allows us to perform an API call to get Namespace metadata
|
||||
context = [
|
||||
{
|
||||
name = "namespaceLabel"
|
||||
apiCall = {
|
||||
urlPath = "/api/v1/namespaces/{{request.namespace}}"
|
||||
jmesPath = "metadata.labels.tier || 'default'"
|
||||
}
|
||||
}
|
||||
]
|
||||
mutate = {
|
||||
patchStrategicMerge = {
|
||||
metadata = {
|
||||
labels = {
|
||||
# Injects the variable discovered in the context above
|
||||
"+(tier)" = "{{namespaceLabel}}"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# resource "kubernetes_manifest" "enforce_pod_tier_label" {
|
||||
# manifest = {
|
||||
# apiVersion = "kyverno.io/v1"
|
||||
# kind = "ClusterPolicy"
|
||||
# metadata = {
|
||||
# name = "enforce-pod-tier-label"
|
||||
# annotations = {
|
||||
# "policies.kyverno.io/description" = "Rejects any pod that does not have a tier label."
|
||||
# }
|
||||
# }
|
||||
# spec = {
|
||||
# # 'Enforce' blocks the creation. 'Audit' just reports it.
|
||||
# validationFailureAction = "Enforce"
|
||||
# background = true
|
||||
# rules = [
|
||||
# {
|
||||
# name = "check-for-tier-label"
|
||||
# match = {
|
||||
# any = [
|
||||
# {
|
||||
# resources = {
|
||||
# kinds = ["Pod"]
|
||||
# }
|
||||
# }
|
||||
# ]
|
||||
# }
|
||||
# validate = {
|
||||
# message = "The label 'tier' is required for all pods in this cluster."
|
||||
# pattern = {
|
||||
# metadata = {
|
||||
# labels = {
|
||||
# "tier" = "?*" # The "?*" syntax means the value must not be empty
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
# ]
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
809
stacks/platform/modules/kyverno/resource-governance.tf
Normal file
809
stacks/platform/modules/kyverno/resource-governance.tf
Normal file
|
|
@ -0,0 +1,809 @@
|
|||
|
||||
# =============================================================================
|
||||
# Tier-Based Resource Governance
|
||||
# =============================================================================
|
||||
# Four layers of protection against noisy neighbor issues:
|
||||
# 1. PriorityClasses - critical services survive resource pressure
|
||||
# 2. LimitRange defaults (Kyverno generate) - auto-inject defaults for containers without resources
|
||||
# 3. ResourceQuotas (Kyverno generate) - hard ceiling on namespace resource consumption
|
||||
# 4. Priority injection (Kyverno mutate) - set priorityClassName based on namespace tier label
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Layer 1: PriorityClasses
|
||||
# -----------------------------------------------------------------------------
|
||||
# Values stay well below system-cluster-critical (2,000,000,000)
|
||||
|
||||
resource "kubernetes_priority_class" "tier_0_core" {
|
||||
metadata {
|
||||
name = "tier-0-core"
|
||||
}
|
||||
value = 1000000
|
||||
global_default = false
|
||||
preemption_policy = "PreemptLowerPriority"
|
||||
description = "Critical infrastructure: ingress, DNS, VPN, auth, monitoring"
|
||||
}
|
||||
|
||||
resource "kubernetes_priority_class" "tier_1_cluster" {
|
||||
metadata {
|
||||
name = "tier-1-cluster"
|
||||
}
|
||||
value = 800000
|
||||
global_default = false
|
||||
preemption_policy = "PreemptLowerPriority"
|
||||
description = "Cluster services: Redis, metrics, security"
|
||||
}
|
||||
|
||||
resource "kubernetes_priority_class" "tier_2_gpu" {
|
||||
metadata {
|
||||
name = "tier-2-gpu"
|
||||
}
|
||||
value = 600000
|
||||
global_default = false
|
||||
preemption_policy = "PreemptLowerPriority"
|
||||
description = "GPU workloads: Immich, Ollama, Frigate"
|
||||
}
|
||||
|
||||
resource "kubernetes_priority_class" "tier_3_edge" {
|
||||
metadata {
|
||||
name = "tier-3-edge"
|
||||
}
|
||||
value = 400000
|
||||
global_default = false
|
||||
preemption_policy = "PreemptLowerPriority"
|
||||
description = "User-facing services: mail, file sync, dashboards"
|
||||
}
|
||||
|
||||
resource "kubernetes_priority_class" "tier_4_aux" {
|
||||
metadata {
|
||||
name = "tier-4-aux"
|
||||
}
|
||||
value = 200000
|
||||
global_default = false
|
||||
preemption_policy = "Never"
|
||||
description = "Optional services: blogs, tools, experiments. Will not preempt other aux services."
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Layer 2: LimitRange Defaults (Kyverno Generate)
|
||||
# -----------------------------------------------------------------------------
|
||||
# Creates a LimitRange in each namespace based on its tier label.
|
||||
# Only affects containers WITHOUT explicit resource requests/limits.
|
||||
|
||||
resource "kubernetes_manifest" "generate_limitrange_by_tier" {
|
||||
manifest = {
|
||||
apiVersion = "kyverno.io/v1"
|
||||
kind = "ClusterPolicy"
|
||||
metadata = {
|
||||
name = "generate-limitrange-by-tier"
|
||||
annotations = {
|
||||
"policies.kyverno.io/title" = "Generate LimitRange by Tier"
|
||||
"policies.kyverno.io/description" = "Creates tier-appropriate LimitRange defaults in namespaces based on their tier label. Only affects containers without explicit resource specifications."
|
||||
}
|
||||
}
|
||||
spec = {
|
||||
generateExisting = true
|
||||
rules = [
|
||||
# Tier 0-core
|
||||
{
|
||||
name = "limitrange-tier-0-core"
|
||||
match = {
|
||||
any = [
|
||||
{
|
||||
resources = {
|
||||
kinds = ["Namespace"]
|
||||
selector = {
|
||||
matchLabels = {
|
||||
tier = "0-core"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
generate = {
|
||||
synchronize = true
|
||||
apiVersion = "v1"
|
||||
kind = "LimitRange"
|
||||
name = "tier-defaults"
|
||||
namespace = "{{request.object.metadata.name}}"
|
||||
data = {
|
||||
spec = {
|
||||
limits = [
|
||||
{
|
||||
type = "Container"
|
||||
default = {
|
||||
cpu = "2"
|
||||
memory = "4Gi"
|
||||
}
|
||||
defaultRequest = {
|
||||
cpu = "100m"
|
||||
memory = "128Mi"
|
||||
}
|
||||
max = {
|
||||
cpu = "8"
|
||||
memory = "16Gi"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
# Tier 1-cluster
|
||||
{
|
||||
name = "limitrange-tier-1-cluster"
|
||||
match = {
|
||||
any = [
|
||||
{
|
||||
resources = {
|
||||
kinds = ["Namespace"]
|
||||
selector = {
|
||||
matchLabels = {
|
||||
tier = "1-cluster"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
generate = {
|
||||
synchronize = true
|
||||
apiVersion = "v1"
|
||||
kind = "LimitRange"
|
||||
name = "tier-defaults"
|
||||
namespace = "{{request.object.metadata.name}}"
|
||||
data = {
|
||||
spec = {
|
||||
limits = [
|
||||
{
|
||||
type = "Container"
|
||||
default = {
|
||||
cpu = "2"
|
||||
memory = "4Gi"
|
||||
}
|
||||
defaultRequest = {
|
||||
cpu = "100m"
|
||||
memory = "128Mi"
|
||||
}
|
||||
max = {
|
||||
cpu = "4"
|
||||
memory = "8Gi"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
# Tier 2-gpu
|
||||
{
|
||||
name = "limitrange-tier-2-gpu"
|
||||
match = {
|
||||
any = [
|
||||
{
|
||||
resources = {
|
||||
kinds = ["Namespace"]
|
||||
selector = {
|
||||
matchLabels = {
|
||||
tier = "2-gpu"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
generate = {
|
||||
synchronize = true
|
||||
apiVersion = "v1"
|
||||
kind = "LimitRange"
|
||||
name = "tier-defaults"
|
||||
namespace = "{{request.object.metadata.name}}"
|
||||
data = {
|
||||
spec = {
|
||||
limits = [
|
||||
{
|
||||
type = "Container"
|
||||
default = {
|
||||
cpu = "4"
|
||||
memory = "8Gi"
|
||||
}
|
||||
defaultRequest = {
|
||||
cpu = "100m"
|
||||
memory = "256Mi"
|
||||
}
|
||||
max = {
|
||||
cpu = "8"
|
||||
memory = "16Gi"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
# Tier 3-edge
|
||||
{
|
||||
name = "limitrange-tier-3-edge"
|
||||
match = {
|
||||
any = [
|
||||
{
|
||||
resources = {
|
||||
kinds = ["Namespace"]
|
||||
selector = {
|
||||
matchLabels = {
|
||||
tier = "3-edge"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
generate = {
|
||||
synchronize = true
|
||||
apiVersion = "v1"
|
||||
kind = "LimitRange"
|
||||
name = "tier-defaults"
|
||||
namespace = "{{request.object.metadata.name}}"
|
||||
data = {
|
||||
spec = {
|
||||
limits = [
|
||||
{
|
||||
type = "Container"
|
||||
default = {
|
||||
cpu = "1"
|
||||
memory = "2Gi"
|
||||
}
|
||||
defaultRequest = {
|
||||
cpu = "50m"
|
||||
memory = "128Mi"
|
||||
}
|
||||
max = {
|
||||
cpu = "4"
|
||||
memory = "8Gi"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
# Tier 4-aux
|
||||
{
|
||||
name = "limitrange-tier-4-aux"
|
||||
match = {
|
||||
any = [
|
||||
{
|
||||
resources = {
|
||||
kinds = ["Namespace"]
|
||||
selector = {
|
||||
matchLabels = {
|
||||
tier = "4-aux"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
generate = {
|
||||
synchronize = true
|
||||
apiVersion = "v1"
|
||||
kind = "LimitRange"
|
||||
name = "tier-defaults"
|
||||
namespace = "{{request.object.metadata.name}}"
|
||||
data = {
|
||||
spec = {
|
||||
limits = [
|
||||
{
|
||||
type = "Container"
|
||||
default = {
|
||||
cpu = "500m"
|
||||
memory = "1Gi"
|
||||
}
|
||||
defaultRequest = {
|
||||
cpu = "25m"
|
||||
memory = "64Mi"
|
||||
}
|
||||
max = {
|
||||
cpu = "2"
|
||||
memory = "4Gi"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
# Fallback: namespaces without a tier label get aux-level defaults
|
||||
{
|
||||
name = "limitrange-no-tier-fallback"
|
||||
match = {
|
||||
any = [
|
||||
{
|
||||
resources = {
|
||||
kinds = ["Namespace"]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
exclude = {
|
||||
any = [
|
||||
{
|
||||
resources = {
|
||||
selector = {
|
||||
matchExpressions = [
|
||||
{
|
||||
key = "tier"
|
||||
operator = "Exists"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
resources = {
|
||||
namespaces = ["kube-system", "metallb-system", "kyverno", "calico-system", "calico-apiserver"]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
generate = {
|
||||
synchronize = true
|
||||
apiVersion = "v1"
|
||||
kind = "LimitRange"
|
||||
name = "tier-defaults"
|
||||
namespace = "{{request.object.metadata.name}}"
|
||||
data = {
|
||||
spec = {
|
||||
limits = [
|
||||
{
|
||||
type = "Container"
|
||||
default = {
|
||||
cpu = "500m"
|
||||
memory = "1Gi"
|
||||
}
|
||||
defaultRequest = {
|
||||
cpu = "25m"
|
||||
memory = "64Mi"
|
||||
}
|
||||
max = {
|
||||
cpu = "2"
|
||||
memory = "4Gi"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Layer 3: ResourceQuotas (Kyverno Generate)
|
||||
# -----------------------------------------------------------------------------
|
||||
# Creates a ResourceQuota in each namespace based on its tier label.
|
||||
# Sets hard ceiling on total namespace resource consumption.
|
||||
# Namespaces with label resource-governance/custom-quota=true are excluded.
|
||||
#
|
||||
# IMPORTANT: LimitRange (Layer 2) must exist before ResourceQuota takes effect,
|
||||
# because ResourceQuota requires all pods to have resource requests set.
|
||||
|
||||
resource "kubernetes_manifest" "generate_resourcequota_by_tier" {
|
||||
depends_on = [kubernetes_manifest.generate_limitrange_by_tier]
|
||||
|
||||
manifest = {
|
||||
apiVersion = "kyverno.io/v1"
|
||||
kind = "ClusterPolicy"
|
||||
metadata = {
|
||||
name = "generate-resourcequota-by-tier"
|
||||
annotations = {
|
||||
"policies.kyverno.io/title" = "Generate ResourceQuota by Tier"
|
||||
"policies.kyverno.io/description" = "Creates tier-appropriate ResourceQuota in namespaces based on their tier label. Excludes namespaces with resource-governance/custom-quota label."
|
||||
}
|
||||
}
|
||||
spec = {
|
||||
generateExisting = true
|
||||
rules = [
|
||||
# Tier 0-core
|
||||
{
|
||||
name = "quota-tier-0-core"
|
||||
match = {
|
||||
any = [
|
||||
{
|
||||
resources = {
|
||||
kinds = ["Namespace"]
|
||||
selector = {
|
||||
matchLabels = {
|
||||
tier = "0-core"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
exclude = {
|
||||
any = [
|
||||
{
|
||||
resources = {
|
||||
selector = {
|
||||
matchLabels = {
|
||||
"resource-governance/custom-quota" = "true"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
generate = {
|
||||
synchronize = true
|
||||
apiVersion = "v1"
|
||||
kind = "ResourceQuota"
|
||||
name = "tier-quota"
|
||||
namespace = "{{request.object.metadata.name}}"
|
||||
data = {
|
||||
spec = {
|
||||
hard = {
|
||||
"requests.cpu" = "8"
|
||||
"requests.memory" = "8Gi"
|
||||
"limits.cpu" = "32"
|
||||
"limits.memory" = "64Gi"
|
||||
pods = "100"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
# Tier 1-cluster
|
||||
{
|
||||
name = "quota-tier-1-cluster"
|
||||
match = {
|
||||
any = [
|
||||
{
|
||||
resources = {
|
||||
kinds = ["Namespace"]
|
||||
selector = {
|
||||
matchLabels = {
|
||||
tier = "1-cluster"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
exclude = {
|
||||
any = [
|
||||
{
|
||||
resources = {
|
||||
selector = {
|
||||
matchLabels = {
|
||||
"resource-governance/custom-quota" = "true"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
generate = {
|
||||
synchronize = true
|
||||
apiVersion = "v1"
|
||||
kind = "ResourceQuota"
|
||||
name = "tier-quota"
|
||||
namespace = "{{request.object.metadata.name}}"
|
||||
data = {
|
||||
spec = {
|
||||
hard = {
|
||||
"requests.cpu" = "4"
|
||||
"requests.memory" = "4Gi"
|
||||
"limits.cpu" = "16"
|
||||
"limits.memory" = "32Gi"
|
||||
pods = "30"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
# Tier 2-gpu
|
||||
{
|
||||
name = "quota-tier-2-gpu"
|
||||
match = {
|
||||
any = [
|
||||
{
|
||||
resources = {
|
||||
kinds = ["Namespace"]
|
||||
selector = {
|
||||
matchLabels = {
|
||||
tier = "2-gpu"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
exclude = {
|
||||
any = [
|
||||
{
|
||||
resources = {
|
||||
selector = {
|
||||
matchLabels = {
|
||||
"resource-governance/custom-quota" = "true"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
generate = {
|
||||
synchronize = true
|
||||
apiVersion = "v1"
|
||||
kind = "ResourceQuota"
|
||||
name = "tier-quota"
|
||||
namespace = "{{request.object.metadata.name}}"
|
||||
data = {
|
||||
spec = {
|
||||
hard = {
|
||||
"requests.cpu" = "8"
|
||||
"requests.memory" = "8Gi"
|
||||
"limits.cpu" = "48"
|
||||
"limits.memory" = "96Gi"
|
||||
pods = "40"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
# Tier 3-edge
|
||||
{
|
||||
name = "quota-tier-3-edge"
|
||||
match = {
|
||||
any = [
|
||||
{
|
||||
resources = {
|
||||
kinds = ["Namespace"]
|
||||
selector = {
|
||||
matchLabels = {
|
||||
tier = "3-edge"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
exclude = {
|
||||
any = [
|
||||
{
|
||||
resources = {
|
||||
selector = {
|
||||
matchLabels = {
|
||||
"resource-governance/custom-quota" = "true"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
generate = {
|
||||
synchronize = true
|
||||
apiVersion = "v1"
|
||||
kind = "ResourceQuota"
|
||||
name = "tier-quota"
|
||||
namespace = "{{request.object.metadata.name}}"
|
||||
data = {
|
||||
spec = {
|
||||
hard = {
|
||||
"requests.cpu" = "4"
|
||||
"requests.memory" = "4Gi"
|
||||
"limits.cpu" = "16"
|
||||
"limits.memory" = "32Gi"
|
||||
pods = "30"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
# Tier 4-aux
|
||||
{
|
||||
name = "quota-tier-4-aux"
|
||||
match = {
|
||||
any = [
|
||||
{
|
||||
resources = {
|
||||
kinds = ["Namespace"]
|
||||
selector = {
|
||||
matchLabels = {
|
||||
tier = "4-aux"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
exclude = {
|
||||
any = [
|
||||
{
|
||||
resources = {
|
||||
selector = {
|
||||
matchLabels = {
|
||||
"resource-governance/custom-quota" = "true"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
generate = {
|
||||
synchronize = true
|
||||
apiVersion = "v1"
|
||||
kind = "ResourceQuota"
|
||||
name = "tier-quota"
|
||||
namespace = "{{request.object.metadata.name}}"
|
||||
data = {
|
||||
spec = {
|
||||
hard = {
|
||||
"requests.cpu" = "2"
|
||||
"requests.memory" = "2Gi"
|
||||
"limits.cpu" = "8"
|
||||
"limits.memory" = "16Gi"
|
||||
pods = "20"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Layer 4: PriorityClassName Injection (Kyverno Mutate)
|
||||
# -----------------------------------------------------------------------------
|
||||
# Automatically sets priorityClassName on Pods based on their namespace's tier label.
|
||||
# Skips pods that already have a priorityClassName set.
|
||||
|
||||
resource "kubernetes_manifest" "mutate_priority_from_tier" {
|
||||
manifest = {
|
||||
apiVersion = "kyverno.io/v1"
|
||||
kind = "ClusterPolicy"
|
||||
metadata = {
|
||||
name = "inject-priority-class-from-tier"
|
||||
annotations = {
|
||||
"policies.kyverno.io/title" = "Inject PriorityClass from Tier"
|
||||
"policies.kyverno.io/description" = "Sets priorityClassName on Pods based on the namespace tier label. Skips pods that already have a priorityClassName."
|
||||
}
|
||||
}
|
||||
spec = {
|
||||
rules = [
|
||||
{
|
||||
name = "inject-priority-class"
|
||||
match = {
|
||||
any = [
|
||||
{
|
||||
resources = {
|
||||
kinds = ["Pod"]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
exclude = {
|
||||
any = [
|
||||
{
|
||||
resources = {
|
||||
namespaces = ["kube-system", "metallb-system", "kyverno", "calico-system", "calico-apiserver"]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
context = [
|
||||
{
|
||||
name = "tierLabel"
|
||||
apiCall = {
|
||||
urlPath = "/api/v1/namespaces/{{request.namespace}}"
|
||||
jmesPath = "metadata.labels.tier || ''"
|
||||
}
|
||||
}
|
||||
]
|
||||
preconditions = {
|
||||
all = [
|
||||
{
|
||||
key = "{{request.object.spec.priorityClassName || ''}}"
|
||||
operator = "Equals"
|
||||
value = ""
|
||||
},
|
||||
{
|
||||
key = "{{tierLabel}}"
|
||||
operator = "NotEquals"
|
||||
value = ""
|
||||
}
|
||||
]
|
||||
}
|
||||
mutate = {
|
||||
patchesJson6902 = yamlencode([
|
||||
{
|
||||
op = "remove"
|
||||
path = "/spec/priority"
|
||||
},
|
||||
{
|
||||
op = "remove"
|
||||
path = "/spec/preemptionPolicy"
|
||||
},
|
||||
{
|
||||
op = "add"
|
||||
path = "/spec/priorityClassName"
|
||||
value = "tier-{{tierLabel}}"
|
||||
}
|
||||
])
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# --- ndots:2 injection ---
|
||||
# Kubernetes defaults to ndots:5, which causes 4 wasted NxDomain queries per
|
||||
# external DNS lookup (search domain expansion). This policy injects ndots:2
|
||||
# on all pods to reduce NxDomain flood while still allowing short-name service
|
||||
# resolution (e.g. "redis.redis" has 1 dot, so it still expands).
|
||||
resource "kubernetes_manifest" "mutate_ndots" {
|
||||
manifest = {
|
||||
apiVersion = "kyverno.io/v1"
|
||||
kind = "ClusterPolicy"
|
||||
metadata = {
|
||||
name = "inject-ndots"
|
||||
annotations = {
|
||||
"policies.kyverno.io/title" = "Inject ndots:2 DNS Config"
|
||||
"policies.kyverno.io/description" = "Sets ndots:2 on all Pods to reduce NxDomain query flood from search domain expansion. Skips pods that already have ndots configured."
|
||||
}
|
||||
}
|
||||
spec = {
|
||||
rules = [
|
||||
{
|
||||
name = "inject-ndots-2"
|
||||
match = {
|
||||
any = [
|
||||
{
|
||||
resources = {
|
||||
kinds = ["Pod"]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
exclude = {
|
||||
any = [
|
||||
{
|
||||
resources = {
|
||||
namespaces = ["kube-system", "metallb-system", "kyverno", "calico-system", "calico-apiserver"]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
preconditions = {
|
||||
all = [
|
||||
{
|
||||
key = "{{ request.object.spec.dnsConfig.options || `[]` | [?name == 'ndots'] | length(@) }}"
|
||||
operator = "Equals"
|
||||
value = "0"
|
||||
}
|
||||
]
|
||||
}
|
||||
mutate = {
|
||||
patchStrategicMerge = {
|
||||
spec = {
|
||||
dnsConfig = {
|
||||
options = [
|
||||
{
|
||||
name = "ndots"
|
||||
value = "2"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
5
stacks/platform/modules/mailserver/extra/aliases.txt
Normal file
5
stacks/platform/modules/mailserver/extra/aliases.txt
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
firmly-gerardo-generated@viktorbarzin.me me@viktorbarzin.me
|
||||
closely-keith-generated@viktorbarzin.me vbarzin@gmail.com
|
||||
literally-paolo-generated@viktorbarzin.me viktorbarzin@fb.com
|
||||
hastily-stefanie-generated@viktorbarzin.me elliestamenova@gmail.com
|
||||
vaultwarden@viktorbarzin.me me@viktorbarzin.me
|
||||
444
stacks/platform/modules/mailserver/main.tf
Normal file
444
stacks/platform/modules/mailserver/main.tf
Normal file
|
|
@ -0,0 +1,444 @@
|
|||
variable "tls_secret_name" {}
|
||||
variable "tier" { type = string }
|
||||
variable "mailserver_accounts" {}
|
||||
variable "postfix_account_aliases" {}
|
||||
variable "opendkim_key" {}
|
||||
variable "sasl_passwd" {} # For sendgrid i.e relayhost
|
||||
|
||||
resource "kubernetes_namespace" "mailserver" {
|
||||
metadata {
|
||||
name = "mailserver"
|
||||
labels = {
|
||||
tier = var.tier
|
||||
}
|
||||
# connecting via localhost does not seem to work?
|
||||
# labels = {
|
||||
# "istio-injection" : "enabled"
|
||||
# }
|
||||
}
|
||||
}
|
||||
|
||||
module "tls_secret" {
|
||||
source = "../../../../modules/kubernetes/setup_tls_secret"
|
||||
namespace = kubernetes_namespace.mailserver.metadata[0].name
|
||||
tls_secret_name = var.tls_secret_name
|
||||
}
|
||||
|
||||
resource "kubernetes_config_map" "mailserver_env_config" {
|
||||
metadata {
|
||||
name = "mailserver.env.config"
|
||||
namespace = kubernetes_namespace.mailserver.metadata[0].name
|
||||
labels = {
|
||||
app = "mailserver"
|
||||
}
|
||||
annotations = {
|
||||
"reloader.stakater.com/match" = "true"
|
||||
}
|
||||
}
|
||||
|
||||
data = {
|
||||
DMS_DEBUG = "0"
|
||||
# LOG_LEVEL = "debug"
|
||||
ENABLE_CLAMAV = "0"
|
||||
ENABLE_AMAVIS = "0"
|
||||
ENABLE_FAIL2BAN = "0"
|
||||
ENABLE_FETCHMAIL = "0"
|
||||
ENABLE_POSTGREY = "0"
|
||||
ENABLE_SASLAUTHD = "0"
|
||||
ENABLE_SPAMASSASSIN = "0"
|
||||
ENABLE_SRS = "1"
|
||||
FETCHMAIL_POLL = "120"
|
||||
ONE_DIR = "1"
|
||||
OVERRIDE_HOSTNAME = "mail.viktorbarzin.me"
|
||||
POSTFIX_MESSAGE_SIZE_LIMIT = 1024 * 1024 * 200 # 200 MB
|
||||
POSTFIX_REJECT_UNKNOWN_CLIENT_HOSTNAME = "1"
|
||||
# TLS_LEVEL = "intermediate"
|
||||
# DEFAULT_RELAY_HOST = "[smtp.sendgrid.net]:587"
|
||||
DEFAULT_RELAY_HOST = "[smtp.eu.mailgun.org]:587"
|
||||
SPOOF_PROTECTION = "1"
|
||||
SSL_TYPE = "manual"
|
||||
SSL_CERT_PATH = "/tmp/ssl/tls.crt"
|
||||
SSL_KEY_PATH = "/tmp/ssl/tls.key"
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_config_map" "mailserver_config" {
|
||||
metadata {
|
||||
name = "mailserver.config"
|
||||
namespace = kubernetes_namespace.mailserver.metadata[0].name
|
||||
|
||||
labels = {
|
||||
app = "mailserver"
|
||||
}
|
||||
annotations = {
|
||||
"reloader.stakater.com/match" = "true"
|
||||
}
|
||||
}
|
||||
|
||||
data = {
|
||||
# Actual mail settings
|
||||
"postfix-accounts.cf" = join("\n", [for user, pass in var.mailserver_accounts : "${user}|${bcrypt(pass, 6)}"])
|
||||
"postfix-main.cf" = var.postfix_cf
|
||||
"postfix-virtual.cf" = format("%s%s", var.postfix_account_aliases, file("${path.module}/extra/aliases.txt"))
|
||||
|
||||
KeyTable = "mail._domainkey.viktorbarzin.me viktorbarzin.me:mail:/etc/opendkim/keys/viktorbarzin.me-mail.key\n"
|
||||
SigningTable = "*@viktorbarzin.me mail._domainkey.viktorbarzin.me\n"
|
||||
TrustedHosts = "127.0.0.1\nlocalhost\n"
|
||||
"sasl_passwd" = var.sasl_passwd
|
||||
fail2ban_conf = <<-EOF
|
||||
[DEFAULT]
|
||||
|
||||
#logtarget = /var/log/fail2ban.log
|
||||
logtarget = SYSOUT
|
||||
EOF
|
||||
}
|
||||
# Password hashes are different each time and avoid changing secret constantly.
|
||||
# Either 1.Create consistent hashes or 2.Find a way to ignore_changes on per password
|
||||
lifecycle {
|
||||
ignore_changes = [data["postfix-accounts.cf"]]
|
||||
}
|
||||
}
|
||||
|
||||
# resource "kubernetes_config_map" "user_patches" {
|
||||
# metadata {
|
||||
# name = "user-patches"
|
||||
# namespace = kubernetes_namespace.mailserver.metadata[0].name
|
||||
# labels = {
|
||||
# "app" = "mailserver"
|
||||
# }
|
||||
# }
|
||||
|
||||
# data = {
|
||||
# user_patches = <<EOF
|
||||
# #!/bin/bash
|
||||
# cp -f /tmp/dovecot.key /etc/dovecot/ssl/dovecot.key
|
||||
# cp -f /tmp/dovecot.crt /etc/dovecot/ssl/dovecot.pem
|
||||
# EOF
|
||||
# }
|
||||
# }
|
||||
|
||||
resource "kubernetes_secret" "opendkim_key" {
|
||||
metadata {
|
||||
name = "mailserver.opendkim.key"
|
||||
namespace = kubernetes_namespace.mailserver.metadata[0].name
|
||||
labels = {
|
||||
"app" = "mailserver"
|
||||
}
|
||||
}
|
||||
type = "Opaque"
|
||||
data = {
|
||||
"viktorbarzin.me-mail.key" = var.opendkim_key
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
resource "kubernetes_deployment" "mailserver" {
|
||||
metadata {
|
||||
name = "mailserver"
|
||||
namespace = kubernetes_namespace.mailserver.metadata[0].name
|
||||
labels = {
|
||||
"app" = "mailserver"
|
||||
tier = var.tier
|
||||
}
|
||||
annotations = {
|
||||
"reloader.stakater.com/search" = "true"
|
||||
}
|
||||
}
|
||||
spec {
|
||||
replicas = "1"
|
||||
strategy {
|
||||
type = "Recreate"
|
||||
}
|
||||
selector {
|
||||
match_labels = {
|
||||
"app" = "mailserver"
|
||||
}
|
||||
}
|
||||
template {
|
||||
metadata {
|
||||
annotations = {
|
||||
# "diun.enable" = "true"
|
||||
}
|
||||
labels = {
|
||||
"app" = "mailserver"
|
||||
"role" = "mail"
|
||||
}
|
||||
}
|
||||
spec {
|
||||
container {
|
||||
name = "docker-mailserver"
|
||||
image = "docker.io/mailserver/docker-mailserver:15.0.0"
|
||||
image_pull_policy = "IfNotPresent"
|
||||
security_context {
|
||||
capabilities {
|
||||
add = ["NET_ADMIN"]
|
||||
}
|
||||
}
|
||||
|
||||
lifecycle {
|
||||
post_start {
|
||||
exec {
|
||||
command = [
|
||||
"postmap",
|
||||
"/etc/postfix/sasl/passwd"
|
||||
# "/bin/sh",
|
||||
# "-c",
|
||||
# "cp -f /tmp/user-patches.sh /tmp/docker-mailserver/user-patches.sh && chown root:root /var/log/mail && chmod 755 /var/log/mail",
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
volume_mount {
|
||||
name = "config-tls"
|
||||
mount_path = "/tmp/ssl/tls.key"
|
||||
sub_path = "tls.key"
|
||||
read_only = true
|
||||
}
|
||||
volume_mount {
|
||||
name = "config-tls"
|
||||
mount_path = "/tmp/ssl/tls.crt"
|
||||
sub_path = "tls.crt"
|
||||
read_only = true
|
||||
}
|
||||
volume_mount {
|
||||
name = "config"
|
||||
mount_path = "/tmp/docker-mailserver/postfix-accounts.cf"
|
||||
sub_path = "postfix-accounts.cf"
|
||||
read_only = true
|
||||
}
|
||||
volume_mount {
|
||||
name = "config"
|
||||
mount_path = "/tmp/docker-mailserver/postfix-main.cf"
|
||||
sub_path = "postfix-main.cf"
|
||||
read_only = true
|
||||
}
|
||||
volume_mount {
|
||||
name = "config"
|
||||
mount_path = "/tmp/docker-mailserver/postfix-virtual.cf"
|
||||
sub_path = "postfix-virtual.cf"
|
||||
read_only = true
|
||||
}
|
||||
volume_mount {
|
||||
name = "config"
|
||||
mount_path = "/tmp/docker-mailserver/fetchmail.cf"
|
||||
sub_path = "fetchmail.cf"
|
||||
read_only = true
|
||||
}
|
||||
# volume_mount {
|
||||
# name = "config"
|
||||
# mount_path = "/tmp/docker-mailserver/dovecot.cf"
|
||||
# sub_path = "dovecot.cf"
|
||||
# read_only = true
|
||||
# }
|
||||
# volume_mount {
|
||||
# name = "user-patches"
|
||||
# mount_path = "/tmp/user-patches.sh"
|
||||
# sub_path = "user-patches.sh"
|
||||
# read_only = true
|
||||
# }
|
||||
volume_mount {
|
||||
name = "config"
|
||||
mount_path = "/tmp/docker-mailserver/opendkim/SigningTable"
|
||||
sub_path = "SigningTable"
|
||||
read_only = true
|
||||
}
|
||||
volume_mount {
|
||||
name = "config"
|
||||
mount_path = "/tmp/docker-mailserver/opendkim/KeyTable"
|
||||
sub_path = "KeyTable"
|
||||
read_only = true
|
||||
}
|
||||
volume_mount {
|
||||
name = "config"
|
||||
mount_path = "/tmp/docker-mailserver/opendkim/TrustedHosts"
|
||||
sub_path = "TrustedHosts"
|
||||
read_only = true
|
||||
}
|
||||
volume_mount {
|
||||
name = "opendkim-key"
|
||||
mount_path = "/tmp/docker-mailserver/opendkim/keys"
|
||||
read_only = true
|
||||
}
|
||||
volume_mount {
|
||||
name = "data"
|
||||
mount_path = "/var/mail"
|
||||
sub_path = "data"
|
||||
}
|
||||
volume_mount {
|
||||
name = "data"
|
||||
mount_path = "/var/mail-state"
|
||||
sub_path = "state"
|
||||
}
|
||||
volume_mount {
|
||||
name = "data"
|
||||
mount_path = "/var/log/mail"
|
||||
sub_path = "log"
|
||||
}
|
||||
volume_mount {
|
||||
name = "var-run-dovecot"
|
||||
mount_path = "/var/run/dovecot"
|
||||
}
|
||||
volume_mount {
|
||||
name = "config"
|
||||
mount_path = "/etc/postfix/sasl/passwd"
|
||||
sub_path = "sasl_passwd"
|
||||
read_only = true
|
||||
}
|
||||
volume_mount {
|
||||
name = "config"
|
||||
mount_path = "/etc/fail2ban/fail2ban.local"
|
||||
sub_path = "fail2ban_conf"
|
||||
read_only = true
|
||||
}
|
||||
port {
|
||||
name = "smtp"
|
||||
container_port = 25
|
||||
protocol = "TCP"
|
||||
}
|
||||
port {
|
||||
name = "smtp-secure"
|
||||
container_port = 465
|
||||
protocol = "TCP"
|
||||
}
|
||||
port {
|
||||
name = "smtp-auth"
|
||||
container_port = 587
|
||||
protocol = "TCP"
|
||||
}
|
||||
port {
|
||||
name = "imap-secure"
|
||||
container_port = 993
|
||||
protocol = "TCP"
|
||||
}
|
||||
env_from {
|
||||
config_map_ref {
|
||||
name = "mailserver.env.config"
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
container {
|
||||
name = "dovecot-exporter"
|
||||
image = "viktorbarzin/dovecot_exporter:latest"
|
||||
command = [
|
||||
"/dovecot_exporter/exporter",
|
||||
"--dovecot.socket-path=/var/run/dovecot/stats-reader"
|
||||
]
|
||||
image_pull_policy = "IfNotPresent"
|
||||
port {
|
||||
name = "dovecotexporter"
|
||||
container_port = 9166
|
||||
protocol = "TCP"
|
||||
}
|
||||
volume_mount {
|
||||
name = "var-run-dovecot"
|
||||
mount_path = "/var/run/dovecot"
|
||||
}
|
||||
}
|
||||
|
||||
volume {
|
||||
name = "config"
|
||||
config_map {
|
||||
name = "mailserver.config"
|
||||
}
|
||||
}
|
||||
volume {
|
||||
name = "config-tls"
|
||||
secret {
|
||||
secret_name = var.tls_secret_name
|
||||
}
|
||||
}
|
||||
volume {
|
||||
name = "opendkim-key"
|
||||
secret {
|
||||
secret_name = "mailserver.opendkim.key"
|
||||
}
|
||||
}
|
||||
volume {
|
||||
name = "data"
|
||||
nfs {
|
||||
path = "/mnt/main/mailserver"
|
||||
server = "10.0.10.15"
|
||||
}
|
||||
# iscsi {
|
||||
# target_portal = "iscsi.viktorbarzin.lan:3260"
|
||||
# iqn = "iqn.2020-12.lan.viktorbarzin:storage:mailserver"
|
||||
# lun = 0
|
||||
# fs_type = "ext4"
|
||||
# }
|
||||
}
|
||||
# volume {
|
||||
# name = "user-patches"
|
||||
# config_map {
|
||||
# name = "user-patches"
|
||||
# }
|
||||
# }
|
||||
volume {
|
||||
name = "var-run-dovecot"
|
||||
empty_dir {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_service" "mailserver" {
|
||||
metadata {
|
||||
name = "mailserver"
|
||||
namespace = kubernetes_namespace.mailserver.metadata[0].name
|
||||
|
||||
labels = {
|
||||
app = "mailserver"
|
||||
}
|
||||
|
||||
annotations = {
|
||||
"metallb.universe.tf/allow-shared-ip" = "shared"
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
type = "LoadBalancer"
|
||||
# external_traffic_policy = "Cluster"
|
||||
external_traffic_policy = "Local"
|
||||
selector = {
|
||||
app = "mailserver"
|
||||
}
|
||||
|
||||
port {
|
||||
name = "smtp"
|
||||
protocol = "TCP"
|
||||
port = 25
|
||||
target_port = "smtp"
|
||||
}
|
||||
|
||||
port {
|
||||
name = "smtp-secure"
|
||||
protocol = "TCP"
|
||||
port = 465
|
||||
target_port = "smtp-secure"
|
||||
}
|
||||
|
||||
port {
|
||||
name = "smtp-auth"
|
||||
protocol = "TCP"
|
||||
port = 587
|
||||
target_port = "smtp-auth"
|
||||
}
|
||||
|
||||
port {
|
||||
name = "imap-secure"
|
||||
protocol = "TCP"
|
||||
port = 993
|
||||
target_port = "imap-secure"
|
||||
}
|
||||
|
||||
port {
|
||||
name = "roundcube"
|
||||
protocol = "TCP"
|
||||
port = 80
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
196
stacks/platform/modules/mailserver/roundcubemail.tf
Normal file
196
stacks/platform/modules/mailserver/roundcubemail.tf
Normal file
|
|
@ -0,0 +1,196 @@
|
|||
variable "roundcube_db_password" { type = string }
|
||||
|
||||
# If you want to override settings mount this in /var/roundcube/config
|
||||
# more info in https://github.com/roundcube/roundcubemail-docker?tab=readme-ov-file
|
||||
# resource "kubernetes_config_map" "roundcubemail_config" {
|
||||
# metadata {
|
||||
# name = "roundcubemail.config"
|
||||
# namespace = "mailserver"
|
||||
|
||||
# labels = {
|
||||
# app = "mailserver"
|
||||
# }
|
||||
# annotations = {
|
||||
# "reloader.stakater.com/match" = "true"
|
||||
# }
|
||||
# }
|
||||
|
||||
# data = {
|
||||
# # if you want to override things see https://github.com/roundcube/roundcubemail/blob/master/config/defaults.inc.php
|
||||
# "imap.php" = <<-EOF
|
||||
# <?php
|
||||
# $config['imap_host'] = 'ssl://mail.viktorbarzin.me:993';
|
||||
# ?>
|
||||
# EOF
|
||||
# }
|
||||
# }
|
||||
|
||||
|
||||
resource "kubernetes_deployment" "roundcubemail" {
|
||||
metadata {
|
||||
name = "roundcubemail"
|
||||
namespace = "mailserver"
|
||||
labels = {
|
||||
"app" = "roundcubemail"
|
||||
tier = var.tier
|
||||
}
|
||||
annotations = {
|
||||
"reloader.stakater.com/search" = "true"
|
||||
}
|
||||
}
|
||||
spec {
|
||||
replicas = "1"
|
||||
strategy {
|
||||
type = "RollingUpdate"
|
||||
}
|
||||
selector {
|
||||
match_labels = {
|
||||
"app" = "roundcubemail"
|
||||
}
|
||||
}
|
||||
template {
|
||||
metadata {
|
||||
labels = {
|
||||
"app" = "roundcubemail"
|
||||
}
|
||||
}
|
||||
spec {
|
||||
container {
|
||||
name = "roundcube"
|
||||
image = "roundcube/roundcubemail:latest"
|
||||
# Uncomment me to mount additional settings
|
||||
# volume_mount {
|
||||
# name = "imap-config"
|
||||
# mount_path = "/var/roundcube/config/imap.php"
|
||||
# sub_path = "imap.php"
|
||||
# }
|
||||
env {
|
||||
name = "ROUNDCUBEMAIL_DEFAULT_HOST"
|
||||
value = "ssl://mail.viktorbarzin.me" # tls cert must be valid!
|
||||
}
|
||||
env {
|
||||
name = "ROUNDCUBEMAIL_DEFAULT_PORT"
|
||||
value = "993"
|
||||
}
|
||||
env {
|
||||
name = "ROUNDCUBEMAIL_SMTP_SERVER"
|
||||
value = "tls://mail.viktorbarzin.me" # tls cert must be valid!
|
||||
}
|
||||
|
||||
env {
|
||||
name = "ROUNDCUBEMAIL_SMTP_PORT"
|
||||
value = 587
|
||||
}
|
||||
|
||||
# DB Settings
|
||||
env {
|
||||
name = "ROUNDCUBEMAIL_DB_TYPE"
|
||||
value = "mysql"
|
||||
}
|
||||
env {
|
||||
name = "ROUNDCUBEMAIL_DB_HOST"
|
||||
value = "mysql.dbaas"
|
||||
}
|
||||
env {
|
||||
name = "ROUNDCUBEMAIL_DB_USER"
|
||||
value = "roundcubemail"
|
||||
}
|
||||
env {
|
||||
name = "ROUNDCUBEMAIL_DB_PASSWORD"
|
||||
value = var.roundcube_db_password
|
||||
}
|
||||
# Plugins
|
||||
env {
|
||||
name = "ROUNDCUBEMAIL_COMPOSER_PLUGINS"
|
||||
value = "mmvi/twofactor_webauthn,texxasrulez/persistent_login,dsoares/rcguard"
|
||||
}
|
||||
env {
|
||||
name = "ROUNDCUBEMAIL_PLUGINS"
|
||||
value = "attachment_reminder,database_attachments,enigma,twofactor_webauthn,persistent_login,rcguard"
|
||||
}
|
||||
|
||||
env {
|
||||
name = "ROUNDCUBEMAIL_SMTP_DEBUG"
|
||||
value = "true"
|
||||
}
|
||||
env {
|
||||
name = "ROUNDCUBEMAIL_DEBUG_LEVEL"
|
||||
value = "6"
|
||||
}
|
||||
env {
|
||||
name = "ROUNDCUBEMAIL_LOG_DRIVER"
|
||||
# value = "file"
|
||||
value = "syslog"
|
||||
}
|
||||
port {
|
||||
name = "web"
|
||||
container_port = 80
|
||||
protocol = "TCP"
|
||||
}
|
||||
volume_mount {
|
||||
name = "html"
|
||||
mount_path = "/var/www/html"
|
||||
}
|
||||
volume_mount {
|
||||
name = "enigma"
|
||||
mount_path = "/var/roundcube/enigma"
|
||||
}
|
||||
}
|
||||
|
||||
# volume {
|
||||
# name = "imap-config"
|
||||
# config_map {
|
||||
# name = "roundcubemail.config"
|
||||
# }
|
||||
# }
|
||||
|
||||
volume {
|
||||
name = "html"
|
||||
nfs {
|
||||
path = "/mnt/main/roundcubemail/html"
|
||||
server = "10.0.10.15"
|
||||
}
|
||||
}
|
||||
volume {
|
||||
name = "enigma"
|
||||
nfs {
|
||||
path = "/mnt/main/roundcubemail/enigma"
|
||||
server = "10.0.10.15"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_service" "roundcubemail" {
|
||||
metadata {
|
||||
name = "roundcubemail"
|
||||
namespace = "mailserver"
|
||||
|
||||
labels = {
|
||||
app = "roundcubemail"
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
selector = {
|
||||
app = "roundcubemail"
|
||||
}
|
||||
|
||||
port {
|
||||
name = "roundcube"
|
||||
protocol = "TCP"
|
||||
port = 80
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module "ingress" {
|
||||
source = "../../../../modules/kubernetes/ingress_factory"
|
||||
namespace = "mailserver"
|
||||
name = "mail"
|
||||
service_name = "roundcubemail"
|
||||
tls_secret_name = var.tls_secret_name
|
||||
rybbit_site_id = "082f164faa7d"
|
||||
}
|
||||
158
stacks/platform/modules/mailserver/variables.tf
Normal file
158
stacks/platform/modules/mailserver/variables.tf
Normal file
|
|
@ -0,0 +1,158 @@
|
|||
# this is appended and merged to the main postfix.cf
|
||||
# see defaults - https://github.com/docker-mailserver/docker-mailserver/blob/master/target/postfix/main.cf
|
||||
variable "postfix_cf" {
|
||||
default = <<EOT
|
||||
#relayhost = [smtp.sendgrid.net]:587
|
||||
relayhost = [smtp.eu.mailgun.org]:587
|
||||
smtp_sasl_auth_enable = yes
|
||||
smtp_sasl_password_maps = hash:/etc/postfix/sasl/passwd
|
||||
smtp_sasl_security_options = noanonymous
|
||||
smtp_sasl_tls_security_options = noanonymous
|
||||
smtp_tls_security_level = encrypt
|
||||
smtpd_tls_cert_file=/tmp/ssl/tls.crt
|
||||
smtpd_tls_key_file=/tmp/ssl/tls.key
|
||||
smtpd_use_tls=yes
|
||||
header_size_limit = 4096000
|
||||
|
||||
# Debug mail tls
|
||||
smtpd_tls_loglevel = 1
|
||||
#smtpd_tls_ciphers = TLS_AES_128_GCM_SHA256:TLS_AES_256_GCM_SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-AES128-SHA256:ECDHE-RSA-AES128-SHA256:ECDHE-RSA-AES256-SHA384:ECDHE-ECDSA-AES256-SHA384:DHE-RSA-AES128-SHA256:DHE-RSA-AES256-SHA256:!aNULL:!SEED:!CAMELLIA:!RSA+AES:!SHA1
|
||||
#tls_medium_cipherlist = ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-AES128-SHA256:ECDHE-RSA-AES128-SHA256:ECDHE-RSA-AES256-SHA384:ECDHE-ECDSA-AES256-SHA384:DHE-RSA-AES128-SHA256:DHE-RSA-AES256-SHA256:!aNULL:!SEED:!CAMELLIA:!RSA+AES:!SHA1
|
||||
EOT
|
||||
}
|
||||
|
||||
variable "postfix_cf_reference_DO_NOT_USE" {
|
||||
default = <<EOT
|
||||
# See /usr/share/postfix/main.cf.dist for a commented, more complete version
|
||||
|
||||
smtpd_banner = $myhostname ESMTP $mail_name (Debian)
|
||||
biff = no
|
||||
append_dot_mydomain = no
|
||||
readme_directory = no
|
||||
|
||||
# Basic configuration
|
||||
# myhostname =
|
||||
alias_maps = hash:/etc/aliases
|
||||
alias_database = hash:/etc/aliases
|
||||
mydestination = $myhostname, localhost.$mydomain, localhost
|
||||
mynetworks = 127.0.0.0/8 [::1]/128 [fe80::]/64
|
||||
mailbox_size_limit = 0
|
||||
recipient_delimiter = +
|
||||
inet_interfaces = all
|
||||
inet_protocols = ipv4
|
||||
|
||||
# TLS parameters
|
||||
smtpd_tls_cert_file=/tmp/ssl/tls.crt
|
||||
smtpd_tls_key_file=/tmp/ssl/tls.key
|
||||
#smtpd_tls_CAfile=
|
||||
#smtp_tls_CAfile=
|
||||
smtpd_tls_security_level = may
|
||||
smtpd_use_tls=yes
|
||||
smtpd_tls_loglevel = 1
|
||||
smtp_tls_loglevel = 1
|
||||
tls_ssl_options = NO_COMPRESSION
|
||||
tls_high_cipherlist = ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-AES128-SHA256:ECDHE-RSA-AES128-SHA256:ECDHE-ECDSA-AES128-SHA:ECDHE-RSA-AES256-SHA384:ECDHE-RSA-AES128-SHA:ECDHE-ECDSA-AES256-SHA384:ECDHE-ECDSA-AES256-SHA:ECDHE-RSA-AES256-SHA:DHE-RSA-AES128-SHA256:DHE-RSA-AES128-SHA:DHE-RSA-AES256-SHA256:DHE-RSA-AES256-SHA:ECDHE-ECDSA-DES-CBC3-SHA:ECDHE-RSA-DES-CBC3-SHA:EDH-RSA-DES-CBC3-SHA:AES128-GCM-SHA256:AES256-GCM-SHA384:AES128-SHA256:AES256-SHA256:AES128-SHA:AES256-SHA:DES-CBC3-SHA:!DSS
|
||||
tls_preempt_cipherlist = yes
|
||||
smtpd_tls_protocols = !SSLv2,!SSLv3
|
||||
smtp_tls_protocols = !SSLv2,!SSLv3
|
||||
smtpd_tls_mandatory_ciphers = high
|
||||
smtpd_tls_mandatory_protocols = !SSLv2,!SSLv3
|
||||
smtpd_tls_exclude_ciphers = aNULL, LOW, EXP, MEDIUM, ADH, AECDH, MD5, DSS, ECDSA, CAMELLIA128, 3DES, CAMELLIA256, RSA+AES, eNULL
|
||||
smtpd_tls_dh1024_param_file = /etc/postfix/dhparams.pem
|
||||
smtpd_tls_CApath = /etc/ssl/certs
|
||||
smtp_tls_CApath = /etc/ssl/certs
|
||||
|
||||
# Settings to prevent SPAM early
|
||||
smtpd_helo_required = yes
|
||||
smtpd_delay_reject = yes
|
||||
smtpd_helo_restrictions = permit_mynetworks, reject_invalid_helo_hostname, permit
|
||||
#smtpd_relay_restrictions = permit_mynetworks permit_sasl_authenticated defer_unauth_destination
|
||||
#smtpd_relay_restrictions = reject_sender_login_mismatch permit_sasl_authenticated permit_mynetworks defer_unauth_destination
|
||||
smtpd_relay_restrictions = reject_sender_login_mismatch permit_sasl_authenticated permit_mynetworks defer_unauth_destination
|
||||
smtpd_recipient_restrictions = permit_sasl_authenticated, reject_unauth_destination, reject_unauth_pipelining, reject_invalid_helo_hostname, reject_non_fqdn_helo_hostname, reject_unknown_recipient_domain, reject_rbl_client bl.spamcop.net, permit_mynetworks
|
||||
smtpd_client_restrictions = permit_mynetworks, permit_sasl_authenticated, reject_unauth_destination, reject_unauth_pipelining
|
||||
#smtpd_sender_restrictions = reject_sender_login_mismatch, permit_sasl_authenticated, permit_mynetworks, reject_unknown_sender_domain
|
||||
smtpd_sender_restrictions = reject_sender_login_mismatch, reject_authenticated_sender_login_mismatch, reject_unknown_sender_domain, permit_sasl_authenticated, permit_mynetworks
|
||||
disable_vrfy_command = yes
|
||||
|
||||
# Postscreen settings to drop zombies/open relays/spam early
|
||||
#postscreen_dnsbl_action = enforce
|
||||
postscreen_dnsbl_action = ignore
|
||||
postscreen_dnsbl_sites = zen.spamhaus.org*2
|
||||
bl.mailspike.net
|
||||
b.barracudacentral.org*2
|
||||
bl.spameatingmonkey.net
|
||||
bl.spamcop.net
|
||||
dnsbl.sorbs.net
|
||||
psbl.surriel.com
|
||||
list.dnswl.org=127.0.[0..255].0*-2
|
||||
list.dnswl.org=127.0.[0..255].1*-3
|
||||
list.dnswl.org=127.0.[0..255].[2..3]*-4
|
||||
postscreen_dnsbl_threshold = 3
|
||||
postscreen_dnsbl_whitelist_threshold = -1
|
||||
postscreen_greet_action = enforce
|
||||
postscreen_bare_newline_action = enforce
|
||||
|
||||
# SASL
|
||||
smtpd_sasl_auth_enable = no
|
||||
#smtpd_sasl_auth_enable = yes
|
||||
##smtpd_sasl_path = /var/spool/postfix/private/auth
|
||||
#smtpd_sasl_path = /var/spool/postfix/private/smtpd
|
||||
##smtpd_sasl_type = dovecot
|
||||
#smtpd_sasl_type = dovecot
|
||||
##smtpd_sasl_security_options = noanonymous
|
||||
#smtpd_sasl_security_options = noanonymous
|
||||
##smtpd_sasl_local_domain = $mydomain
|
||||
##broken_sasl_auth_clients = yes
|
||||
#broken_sasl_auth_clients = yes
|
||||
|
||||
# SMTP configuration
|
||||
smtp_sasl_auth_enable = yes
|
||||
smtp_sasl_password_maps = hash:/etc/postfix/sasl/passwd
|
||||
smtp_sasl_security_options = noanonymous
|
||||
smtp_sasl_tls_security_options = noanonymous
|
||||
smtp_tls_security_level = encrypt
|
||||
header_size_limit = 4096000
|
||||
relayhost = [smtp.sendgrid.net]:587
|
||||
|
||||
# Mail directory
|
||||
virtual_transport = lmtp:unix:/var/run/dovecot/lmtp
|
||||
virtual_mailbox_domains = /etc/postfix/vhost
|
||||
virtual_mailbox_maps = texthash:/etc/postfix/vmailbox
|
||||
virtual_alias_maps = texthash:/etc/postfix/virtual
|
||||
|
||||
# Additional option for filtering
|
||||
content_filter = smtp-amavis:[127.0.0.1]:10024
|
||||
|
||||
# Milters used by DKIM
|
||||
milter_protocol = 6
|
||||
milter_default_action = accept
|
||||
dkim_milter = inet:localhost:8891
|
||||
dmarc_milter = inet:localhost:8893
|
||||
smtpd_milters = $dkim_milter,$dmarc_milter
|
||||
non_smtpd_milters = $dkim_milter
|
||||
|
||||
# SPF policy settings
|
||||
policyd-spf_time_limit = 3600
|
||||
|
||||
# Header checks for content inspection on receiving
|
||||
header_checks = pcre:/etc/postfix/maps/header_checks.pcre
|
||||
|
||||
# Remove unwanted headers that reveail our privacy
|
||||
smtp_header_checks = pcre:/etc/postfix/maps/sender_header_filter.pcre
|
||||
myhostname = mail.viktorbarzin.me
|
||||
mydomain = viktorbarzin.me
|
||||
smtputf8_enable = no
|
||||
message_size_limit = 20480000
|
||||
sender_canonical_maps = tcp:localhost:10001
|
||||
sender_canonical_classes = envelope_sender
|
||||
recipient_canonical_maps = tcp:localhost:10002
|
||||
recipient_canonical_classes = envelope_recipient,header_recipient
|
||||
compatibility_level = 2
|
||||
# enable_original_recipient = no # b4 uncommenting see https://serverfault.com/questions/661615/how-to-drop-orig-to-using-postfix-virtual-domains
|
||||
always_add_missing_headers = yes
|
||||
|
||||
anvil_status_update_time = 5s
|
||||
EOT
|
||||
}
|
||||
|
||||
40
stacks/platform/modules/metallb/main.tf
Normal file
40
stacks/platform/modules/metallb/main.tf
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
# Creates namespace and everythin needed
|
||||
# Do not use until https://github.com/colinwilson/terraform-kubernetes-metallb/issues/5 is solved
|
||||
# module "metallb" {
|
||||
# source = "colinwilson/metallb/kubernetes"
|
||||
# version = "0.1.7"
|
||||
# }
|
||||
variable "tier" { type = string }
|
||||
|
||||
resource "kubernetes_namespace" "metallb" {
|
||||
metadata {
|
||||
name = "metallb-system"
|
||||
labels = {
|
||||
app = "metallb"
|
||||
# "istio-injection" : "disabled"
|
||||
# tier = var.tier
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module "metallb" {
|
||||
source = "ViktorBarzin/metallb/kubernetes"
|
||||
version = "0.1.5"
|
||||
depends_on = [kubernetes_namespace.metallb]
|
||||
}
|
||||
|
||||
resource "kubernetes_config_map" "config" {
|
||||
metadata {
|
||||
name = "config"
|
||||
namespace = kubernetes_namespace.metallb.metadata[0].name
|
||||
}
|
||||
data = {
|
||||
config = <<EOT
|
||||
address-pools:
|
||||
- name: default
|
||||
protocol: layer2
|
||||
addresses:
|
||||
- 10.0.20.200-10.0.20.220
|
||||
EOT
|
||||
}
|
||||
}
|
||||
29
stacks/platform/modules/metrics-server/main.tf
Normal file
29
stacks/platform/modules/metrics-server/main.tf
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
variable "tls_secret_name" {}
|
||||
variable "tier" { type = string }
|
||||
|
||||
resource "kubernetes_namespace" "metrics-server" {
|
||||
metadata {
|
||||
name = "metrics-server"
|
||||
labels = {
|
||||
tier = var.tier
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module "tls_secret" {
|
||||
source = "../../../../modules/kubernetes/setup_tls_secret"
|
||||
namespace = kubernetes_namespace.metrics-server.metadata[0].name
|
||||
tls_secret_name = var.tls_secret_name
|
||||
}
|
||||
|
||||
resource "helm_release" "metrics-server" {
|
||||
namespace = kubernetes_namespace.metrics-server.metadata[0].name
|
||||
create_namespace = false
|
||||
name = "metrics-server"
|
||||
atomic = true
|
||||
|
||||
repository = "https://kubernetes-sigs.github.io/metrics-server/"
|
||||
chart = "metrics-server"
|
||||
|
||||
values = [templatefile("${path.module}/values.yaml", {})]
|
||||
}
|
||||
2
stacks/platform/modules/metrics-server/values.yaml
Normal file
2
stacks/platform/modules/metrics-server/values.yaml
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
args:
|
||||
- "--kubelet-insecure-tls"
|
||||
27
stacks/platform/modules/monitoring/Dockerfile
Normal file
27
stacks/platform/modules/monitoring/Dockerfile
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
# dockerhub: viktorbarzin/redfish-exporter
|
||||
# repo: https://pkg.go.dev/github.com/jenningsloy318/redfish_exporter#section-readme
|
||||
FROM golang:rc-bullseye AS builder
|
||||
|
||||
LABEL maintainer="Viktor Barzin <me@viktorbarzin.me>"
|
||||
|
||||
ARG ARCH=amd64
|
||||
|
||||
ENV GOROOT /usr/local/go
|
||||
ENV GOPATH /go
|
||||
ENV PATH "$GOROOT/bin:$GOPATH/bin:$PATH"
|
||||
ENV GO_VERSION 1.15.2
|
||||
ENV GO111MODULE=on
|
||||
|
||||
|
||||
# Build dependencies
|
||||
RUN mkdir -p /go/src/github.com/ && \
|
||||
git clone https://github.com/jenningsloy318/redfish_exporter /go/src/github.com/jenningsloy318/redfish_exporter && \
|
||||
cd /go/src/github.com/jenningsloy318/redfish_exporter && \
|
||||
make build
|
||||
|
||||
FROM golang:rc-bullseye
|
||||
|
||||
COPY --from=builder /go/src/github.com/jenningsloy318/redfish_exporter/build/redfish_exporter /usr/local/bin/redfish_exporter
|
||||
RUN mkdir /etc/prometheus
|
||||
# config file mounter at runtime
|
||||
CMD ["/usr/local/bin/redfish_exporter", "--config.file", "/etc/prometheus/redfish_exporter.yml"]
|
||||
131
stacks/platform/modules/monitoring/alloy.yaml
Normal file
131
stacks/platform/modules/monitoring/alloy.yaml
Normal file
|
|
@ -0,0 +1,131 @@
|
|||
alloy:
|
||||
configMap:
|
||||
content: |-
|
||||
// Write your Alloy config here:
|
||||
logging {
|
||||
level = "info"
|
||||
format = "logfmt"
|
||||
}
|
||||
loki.write "default" {
|
||||
endpoint {
|
||||
url = "http://loki.monitoring.svc.cluster.local:3100/loki/api/v1/push"
|
||||
}
|
||||
}
|
||||
|
||||
// discovery.kubernetes allows you to find scrape targets from Kubernetes resources.
|
||||
// It watches cluster state and ensures targets are continually synced with what is currently running in your cluster.
|
||||
discovery.kubernetes "pod" {
|
||||
role = "pod"
|
||||
}
|
||||
|
||||
// discovery.relabel rewrites the label set of the input targets by applying one or more relabeling rules.
|
||||
// If no rules are defined, then the input targets are exported as-is.
|
||||
discovery.relabel "pod_logs" {
|
||||
targets = discovery.kubernetes.pod.targets
|
||||
|
||||
// Label creation - "namespace" field from "__meta_kubernetes_namespace"
|
||||
rule {
|
||||
source_labels = ["__meta_kubernetes_namespace"]
|
||||
action = "replace"
|
||||
target_label = "namespace"
|
||||
}
|
||||
|
||||
// Label creation - "pod" field from "__meta_kubernetes_pod_name"
|
||||
rule {
|
||||
source_labels = ["__meta_kubernetes_pod_name"]
|
||||
action = "replace"
|
||||
target_label = "pod"
|
||||
}
|
||||
|
||||
// Label creation - "container" field from "__meta_kubernetes_pod_container_name"
|
||||
rule {
|
||||
source_labels = ["__meta_kubernetes_pod_container_name"]
|
||||
action = "replace"
|
||||
target_label = "container"
|
||||
}
|
||||
|
||||
// Label creation - "app" field from "__meta_kubernetes_pod_label_app_kubernetes_io_name"
|
||||
rule {
|
||||
source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"]
|
||||
action = "replace"
|
||||
target_label = "app"
|
||||
}
|
||||
|
||||
// Label creation - "job" field from "__meta_kubernetes_namespace" and "__meta_kubernetes_pod_container_name"
|
||||
// Concatenate values __meta_kubernetes_namespace/__meta_kubernetes_pod_container_name
|
||||
rule {
|
||||
source_labels = ["__meta_kubernetes_namespace", "__meta_kubernetes_pod_container_name"]
|
||||
action = "replace"
|
||||
target_label = "job"
|
||||
separator = "/"
|
||||
replacement = "$1"
|
||||
}
|
||||
|
||||
// Label creation - "container" field from "__meta_kubernetes_pod_uid" and "__meta_kubernetes_pod_container_name"
|
||||
// Concatenate values __meta_kubernetes_pod_uid/__meta_kubernetes_pod_container_name.log
|
||||
rule {
|
||||
source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"]
|
||||
action = "replace"
|
||||
target_label = "__path__"
|
||||
separator = "/"
|
||||
replacement = "/var/log/pods/*$1/*.log"
|
||||
}
|
||||
|
||||
// Label creation - "container_runtime" field from "__meta_kubernetes_pod_container_id"
|
||||
rule {
|
||||
source_labels = ["__meta_kubernetes_pod_container_id"]
|
||||
action = "replace"
|
||||
target_label = "container_runtime"
|
||||
regex = "^(\\S+):\\/\\/.+$"
|
||||
replacement = "$1"
|
||||
}
|
||||
}
|
||||
|
||||
// loki.source.kubernetes tails logs from Kubernetes containers using the Kubernetes API.
|
||||
loki.source.kubernetes "pod_logs" {
|
||||
targets = discovery.relabel.pod_logs.output
|
||||
forward_to = [loki.process.pod_logs.receiver]
|
||||
}
|
||||
|
||||
// loki.process receives log entries from other Loki components, applies one or more processing stages,
|
||||
// and forwards the results to the list of receivers in the component's arguments.
|
||||
loki.process "pod_logs" {
|
||||
stage.static_labels {
|
||||
values = {
|
||||
cluster = "default",
|
||||
}
|
||||
}
|
||||
|
||||
forward_to = [loki.write.default.receiver]
|
||||
}
|
||||
|
||||
// Kubernetes audit log collection from /var/log/kubernetes/audit.log
|
||||
// Requires alloy.mounts.varlog=true to mount /var/log from the host
|
||||
local.file_match "audit_logs" {
|
||||
path_targets = [{
|
||||
__path__ = "/var/log/kubernetes/audit.log",
|
||||
job = "kubernetes-audit",
|
||||
node = env("HOSTNAME"),
|
||||
}]
|
||||
}
|
||||
|
||||
loki.source.file "audit_logs" {
|
||||
targets = local.file_match.audit_logs.targets
|
||||
forward_to = [loki.write.default.receiver]
|
||||
}
|
||||
|
||||
# Mount /var/log from the host for file-based log collection (audit logs)
|
||||
mounts:
|
||||
varlog: true
|
||||
|
||||
# Resource limits for DaemonSet pods
|
||||
# Alloy tails logs from all containers on the node via K8s API and batches
|
||||
# them to Loki. Memory scales with number of active log streams (~30-50 per node).
|
||||
# 128Mi was OOMKilled; steady-state usage is ~400-450Mi per pod.
|
||||
resources:
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 256Mi
|
||||
limits:
|
||||
cpu: 200m
|
||||
memory: 768Mi
|
||||
1350
stacks/platform/modules/monitoring/dashboards/api_server.json
Normal file
1350
stacks/platform/modules/monitoring/dashboards/api_server.json
Normal file
File diff suppressed because it is too large
Load diff
5000
stacks/platform/modules/monitoring/dashboards/cluster_health.json
Normal file
5000
stacks/platform/modules/monitoring/dashboards/cluster_health.json
Normal file
File diff suppressed because it is too large
Load diff
2343
stacks/platform/modules/monitoring/dashboards/core_dns.json
Normal file
2343
stacks/platform/modules/monitoring/dashboards/core_dns.json
Normal file
File diff suppressed because it is too large
Load diff
2880
stacks/platform/modules/monitoring/dashboards/idrac.json
Normal file
2880
stacks/platform/modules/monitoring/dashboards/idrac.json
Normal file
File diff suppressed because it is too large
Load diff
204
stacks/platform/modules/monitoring/dashboards/k8s-audit.json
Normal file
204
stacks/platform/modules/monitoring/dashboards/k8s-audit.json
Normal file
|
|
@ -0,0 +1,204 @@
|
|||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": { "type": "datasource", "uid": "grafana" },
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "Kubernetes API server audit logs from Loki",
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 1,
|
||||
"id": 0,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 },
|
||||
"id": 100,
|
||||
"panels": [],
|
||||
"title": "Recent Activity",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "loki", "uid": "P8E80F9AEF21F6940" },
|
||||
"description": "Recent Kubernetes API actions from audit logs",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "thresholds" },
|
||||
"custom": {
|
||||
"align": "auto",
|
||||
"cellOptions": { "type": "auto" },
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [{ "color": "green", "value": null }]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 12, "w": 24, "x": 0, "y": 1 },
|
||||
"id": 1,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"footer": { "countRows": false, "fields": "", "reducer": ["sum"], "show": false },
|
||||
"showHeader": true,
|
||||
"sortBy": [{ "desc": true, "displayName": "Time" }]
|
||||
},
|
||||
"pluginVersion": "12.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "loki", "uid": "P8E80F9AEF21F6940" },
|
||||
"editorMode": "code",
|
||||
"expr": "{job=\"kubernetes-audit\"} | json | line_format \"{{.user.username}} {{.verb}} {{.objectRef.resource}} {{.objectRef.namespace}}\"",
|
||||
"legendFormat": "",
|
||||
"queryType": "range",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Recent Actions",
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 13 },
|
||||
"id": 101,
|
||||
"panels": [],
|
||||
"title": "Request Rates",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "loki", "uid": "P8E80F9AEF21F6940" },
|
||||
"description": "API request count by user over time",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 20,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "smooth",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": { "type": "linear" },
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": { "group": "A", "mode": "none" },
|
||||
"thresholdsStyle": { "mode": "off" }
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [{ "color": "green", "value": null }]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 10, "w": 24, "x": 0, "y": 14 },
|
||||
"id": 2,
|
||||
"options": {
|
||||
"legend": { "calcs": ["sum", "lastNotNull"], "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||
"tooltip": { "mode": "multi", "sort": "desc" }
|
||||
},
|
||||
"pluginVersion": "12.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "loki", "uid": "P8E80F9AEF21F6940" },
|
||||
"editorMode": "code",
|
||||
"expr": "sum by (user_username) (count_over_time({job=\"kubernetes-audit\"} | json [5m]))",
|
||||
"legendFormat": "{{user_username}}",
|
||||
"queryType": "range",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Request Count by User",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 24 },
|
||||
"id": 102,
|
||||
"panels": [],
|
||||
"title": "Denied Requests",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "loki", "uid": "P8E80F9AEF21F6940" },
|
||||
"description": "API requests denied with HTTP 403+ status codes",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "thresholds" },
|
||||
"custom": {
|
||||
"align": "auto",
|
||||
"cellOptions": { "type": "auto" },
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "red", "value": 403 }
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 12, "w": 24, "x": 0, "y": 25 },
|
||||
"id": 3,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"footer": { "countRows": false, "fields": "", "reducer": ["sum"], "show": false },
|
||||
"showHeader": true,
|
||||
"sortBy": [{ "desc": true, "displayName": "Time" }]
|
||||
},
|
||||
"pluginVersion": "12.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "loki", "uid": "P8E80F9AEF21F6940" },
|
||||
"editorMode": "code",
|
||||
"expr": "{job=\"kubernetes-audit\"} | json | responseStatus_code >= 403",
|
||||
"legendFormat": "",
|
||||
"queryType": "range",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Denied Requests (403+)",
|
||||
"type": "table"
|
||||
}
|
||||
],
|
||||
"preload": false,
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 42,
|
||||
"tags": ["kubernetes", "audit", "security"],
|
||||
"templating": {
|
||||
"list": []
|
||||
},
|
||||
"time": {
|
||||
"from": "now-24h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "Kubernetes Audit Logs",
|
||||
"uid": "k8s-audit",
|
||||
"version": 1
|
||||
}
|
||||
File diff suppressed because it is too large
Load diff
288
stacks/platform/modules/monitoring/dashboards/loki.json
Normal file
288
stacks/platform/modules/monitoring/dashboards/loki.json
Normal file
|
|
@ -0,0 +1,288 @@
|
|||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "grafana"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"tags": [],
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "Logs collected from Kubernetes, stored in Loki",
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"gnetId": 15141,
|
||||
"graphTooltip": 0,
|
||||
"id": 25,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "P8E80F9AEF21F6940"
|
||||
},
|
||||
"description": "",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "bars",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": false
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "P8E80F9AEF21F6940"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(count_over_time({namespace=~\"$namespace\", container =~\"$container\"} |= \"$query\" [$__interval]))",
|
||||
"instant": false,
|
||||
"legendFormat": "Log count",
|
||||
"queryType": "range",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "P8E80F9AEF21F6940"
|
||||
},
|
||||
"description": "Logs from services running in Kubernetes",
|
||||
"gridPos": {
|
||||
"h": 25,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 4
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"dedupStrategy": "none",
|
||||
"enableLogDetails": true,
|
||||
"prettifyLogMessage": false,
|
||||
"showCommonLabels": false,
|
||||
"showLabels": false,
|
||||
"showTime": false,
|
||||
"sortOrder": "Descending",
|
||||
"wrapLogMessage": false
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "P8E80F9AEF21F6940"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "{namespace=~\"$namespace\", container =~\"$container\"} |= \"$query\"",
|
||||
"queryType": "range",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"type": "logs"
|
||||
}
|
||||
],
|
||||
"refresh": "5s",
|
||||
"schemaVersion": 39,
|
||||
"tags": [],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"description": "String to search for",
|
||||
"hide": 0,
|
||||
"label": "Search Query",
|
||||
"name": "query",
|
||||
"options": [
|
||||
{
|
||||
"selected": true,
|
||||
"text": "",
|
||||
"value": ""
|
||||
}
|
||||
],
|
||||
"query": "",
|
||||
"skipUrlSync": false,
|
||||
"type": "textbox"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": [
|
||||
"dbaas"
|
||||
],
|
||||
"value": [
|
||||
"dbaas"
|
||||
]
|
||||
},
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "P8E80F9AEF21F6940"
|
||||
},
|
||||
"definition": "label_values(namespace)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"name": "namespace",
|
||||
"options": [],
|
||||
"query": "label_values(namespace)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": [
|
||||
"All"
|
||||
],
|
||||
"value": [
|
||||
"$__all"
|
||||
]
|
||||
},
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "P8E80F9AEF21F6940"
|
||||
},
|
||||
"definition": "label_values(stream)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"name": "stream",
|
||||
"options": [],
|
||||
"query": "label_values(stream)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": [
|
||||
"All"
|
||||
],
|
||||
"value": [
|
||||
"$__all"
|
||||
]
|
||||
},
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "P8E80F9AEF21F6940"
|
||||
},
|
||||
"definition": "label_values(container)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"name": "container",
|
||||
"options": [],
|
||||
"query": "label_values(container)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-5m",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "Loki Kubernetes Logs",
|
||||
"uid": "o6-BGgnnk",
|
||||
"version": 2,
|
||||
"weekStart": ""
|
||||
}
|
||||
1976
stacks/platform/modules/monitoring/dashboards/nginx_ingress.json
Normal file
1976
stacks/platform/modules/monitoring/dashboards/nginx_ingress.json
Normal file
File diff suppressed because it is too large
Load diff
23872
stacks/platform/modules/monitoring/dashboards/node_exporter_full.json
Normal file
23872
stacks/platform/modules/monitoring/dashboards/node_exporter_full.json
Normal file
File diff suppressed because it is too large
Load diff
3927
stacks/platform/modules/monitoring/dashboards/nodes.json
Normal file
3927
stacks/platform/modules/monitoring/dashboards/nodes.json
Normal file
File diff suppressed because it is too large
Load diff
816
stacks/platform/modules/monitoring/dashboards/nvidia.json
Normal file
816
stacks/platform/modules/monitoring/dashboards/nvidia.json
Normal file
|
|
@ -0,0 +1,816 @@
|
|||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"$$hashKey": "object:192",
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "grafana"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "This dashboard is to display the metrics from DCGM Exporter on a Kubernetes (1.13+) cluster",
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": 0,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "celsius"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 18,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 12,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"lastNotNull",
|
||||
"max"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.3.1",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "nvidia_tesla_t4_DCGM_FI_DEV_GPU_TEMP",
|
||||
"instant": false,
|
||||
"interval": "",
|
||||
"legendFormat": "GPU 0",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "GPU Temperature",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"max": 100,
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "#EAB839",
|
||||
"value": 70
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "celsius"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 0
|
||||
},
|
||||
"id": 14,
|
||||
"options": {
|
||||
"minVizHeight": 75,
|
||||
"minVizWidth": 75,
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"showThresholdLabels": false,
|
||||
"showThresholdMarkers": true,
|
||||
"sizing": "auto"
|
||||
},
|
||||
"pluginVersion": "12.3.1",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "nvidia_tesla_t4_DCGM_FI_DEV_GPU_TEMP",
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "GPU Current Temp",
|
||||
"type": "gauge"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "watt"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 18,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"id": 10,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"lastNotNull",
|
||||
"max"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.3.1",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "nvidia_tesla_t4_DCGM_FI_DEV_POWER_USAGE",
|
||||
"interval": "",
|
||||
"legendFormat": "GPU {{gpu}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "GPU Power Usage",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"max": 2400,
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "#EAB839",
|
||||
"value": 1800
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 2200
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "watt"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 8
|
||||
},
|
||||
"id": 16,
|
||||
"options": {
|
||||
"minVizHeight": 75,
|
||||
"minVizWidth": 75,
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"sum"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"showThresholdLabels": false,
|
||||
"showThresholdMarkers": true,
|
||||
"sizing": "auto"
|
||||
},
|
||||
"pluginVersion": "12.3.1",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "sum(nvidia_tesla_t4_DCGM_FI_DEV_POWER_USAGE)",
|
||||
"instant": true,
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"range": false,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "GPU Power Total",
|
||||
"type": "gauge"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"max": 100,
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "percent"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 16
|
||||
},
|
||||
"id": 6,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"lastNotNull",
|
||||
"max"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.3.1",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "nvidia_tesla_t4_DCGM_FI_DEV_GPU_UTIL",
|
||||
"interval": "",
|
||||
"legendFormat": "GPU {{gpu}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "GPU Utilization",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "decmbytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 16
|
||||
},
|
||||
"id": 18,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"max"
|
||||
],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.3.1",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "nvidia_tesla_t4_DCGM_FI_DEV_FB_USED",
|
||||
"interval": "",
|
||||
"legendFormat": "GPU {{gpu}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "GPU Framebuffer Mem Used",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "hertz"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 24
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"lastNotNull",
|
||||
"max"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.3.1",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "nvidia_tesla_t4_DCGM_FI_DEV_SM_CLOCK* 1000000",
|
||||
"format": "time_series",
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "GPU {{gpu}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "GPU SM Clocks",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 24
|
||||
},
|
||||
"id": 19,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.3.1",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum by (namespace) (gpu_pod_memory_used_bytes)",
|
||||
"instant": false,
|
||||
"legendFormat": "{{namespace}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "GPU Memory per Application",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"preload": false,
|
||||
"refresh": "auto",
|
||||
"schemaVersion": 42,
|
||||
"tags": [],
|
||||
"templating": {
|
||||
"list": []
|
||||
},
|
||||
"time": {
|
||||
"from": "now-12h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "NVIDIA DCGM Exporter Dashboard",
|
||||
"uid": "Oxed_c6Wz",
|
||||
"version": 9
|
||||
}
|
||||
2658
stacks/platform/modules/monitoring/dashboards/pods.json
Normal file
2658
stacks/platform/modules/monitoring/dashboards/pods.json
Normal file
File diff suppressed because it is too large
Load diff
15758
stacks/platform/modules/monitoring/dashboards/proxmox_node_exporter.json
Normal file
15758
stacks/platform/modules/monitoring/dashboards/proxmox_node_exporter.json
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,976 @@
|
|||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": { "type": "datasource", "uid": "grafana" },
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 1,
|
||||
"id": 0,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 },
|
||||
"id": 100,
|
||||
"panels": [],
|
||||
"title": "Scraping Overview",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"description": "Total listings discovered during scrape runs",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"mappings": [],
|
||||
"thresholds": { "mode": "absolute", "steps": [{ "color": "blue", "value": null }] },
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 4, "w": 4, "x": 0, "y": 1 },
|
||||
"id": 1,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
||||
"showPercentChange": false,
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "12.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"editorMode": "code",
|
||||
"expr": "scrape_listings_found_total{job=\"realestate-crawler-celery\"}",
|
||||
"legendFormat": "Found",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Total Listings Found",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"description": "Total listings successfully processed",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"mappings": [],
|
||||
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] },
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 4, "w": 4, "x": 4, "y": 1 },
|
||||
"id": 2,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
||||
"showPercentChange": false,
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "12.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"editorMode": "code",
|
||||
"expr": "scrape_listings_processed_total{job=\"realestate-crawler-celery\"}",
|
||||
"legendFormat": "Processed",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Total Listings Processed",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"description": "Total listings that failed processing",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"mappings": [],
|
||||
"thresholds": { "mode": "absolute", "steps": [{ "color": "red", "value": null }] },
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 4, "w": 4, "x": 8, "y": 1 },
|
||||
"id": 3,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
||||
"showPercentChange": false,
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "12.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"editorMode": "code",
|
||||
"expr": "scrape_listings_failed_total{job=\"realestate-crawler-celery\"}",
|
||||
"legendFormat": "Failed",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Total Listings Failed",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"description": "Total API pages fetched during scraping",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"mappings": [],
|
||||
"thresholds": { "mode": "absolute", "steps": [{ "color": "purple", "value": null }] },
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 4, "w": 4, "x": 12, "y": 1 },
|
||||
"id": 4,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
||||
"showPercentChange": false,
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "12.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"editorMode": "code",
|
||||
"expr": "scrape_pages_fetched_total{job=\"realestate-crawler-celery\"}",
|
||||
"legendFormat": "Pages",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Total Pages Fetched",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"description": "Total subqueries executed after query splitting",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"mappings": [],
|
||||
"thresholds": { "mode": "absolute", "steps": [{ "color": "orange", "value": null }] },
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 4, "w": 4, "x": 16, "y": 1 },
|
||||
"id": 5,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
||||
"showPercentChange": false,
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "12.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"editorMode": "code",
|
||||
"expr": "scrape_subqueries_total{job=\"realestate-crawler-celery\"}",
|
||||
"legendFormat": "Subqueries",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Total Subqueries",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"description": "Circuit breaker state: 0=closed (healthy), 1=half-open, 2=open (tripped)",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"mappings": [
|
||||
{ "options": { "0": { "color": "green", "text": "Closed" } }, "type": "value" },
|
||||
{ "options": { "1": { "color": "yellow", "text": "Half-Open" } }, "type": "value" },
|
||||
{ "options": { "2": { "color": "red", "text": "Open" } }, "type": "value" }
|
||||
],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 1 },
|
||||
{ "color": "red", "value": 2 }
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 4, "w": 4, "x": 20, "y": 1 },
|
||||
"id": 6,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
||||
"showPercentChange": false,
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "12.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"editorMode": "code",
|
||||
"expr": "circuit_breaker_state{job=\"realestate-crawler-celery\"}",
|
||||
"legendFormat": "State",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Circuit Breaker",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 5 },
|
||||
"id": 101,
|
||||
"panels": [],
|
||||
"title": "Scraping Activity",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"description": "Rate of listings found, processed, and failed",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 20,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "smooth",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": { "type": "linear" },
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": { "group": "A", "mode": "none" },
|
||||
"thresholdsStyle": { "mode": "off" }
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] },
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": { "id": "byName", "options": "Failed" },
|
||||
"properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 6 },
|
||||
"id": 10,
|
||||
"options": {
|
||||
"legend": { "calcs": ["sum", "lastNotNull"], "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||
"tooltip": { "mode": "multi", "sort": "desc" }
|
||||
},
|
||||
"pluginVersion": "12.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"editorMode": "code",
|
||||
"expr": "increase(scrape_listings_found_total{job=\"realestate-crawler-celery\"}[5m])",
|
||||
"legendFormat": "Found",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"editorMode": "code",
|
||||
"expr": "increase(scrape_listings_processed_total{job=\"realestate-crawler-celery\"}[5m])",
|
||||
"legendFormat": "Processed",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"editorMode": "code",
|
||||
"expr": "increase(scrape_listings_failed_total{job=\"realestate-crawler-celery\"}[5m])",
|
||||
"legendFormat": "Failed",
|
||||
"range": true,
|
||||
"refId": "C"
|
||||
}
|
||||
],
|
||||
"title": "Listing Activity (5m increase)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"description": "Duration of full scrape runs",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "smooth",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": { "type": "linear" },
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": { "group": "A", "mode": "none" },
|
||||
"thresholdsStyle": { "mode": "off" }
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] },
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 6 },
|
||||
"id": 11,
|
||||
"options": {
|
||||
"legend": { "calcs": ["mean", "max", "lastNotNull"], "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||
"tooltip": { "mode": "multi", "sort": "desc" }
|
||||
},
|
||||
"pluginVersion": "12.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"editorMode": "code",
|
||||
"expr": "rate(scrape_duration_seconds_sum{job=\"realestate-crawler-celery\"}[5m]) / rate(scrape_duration_seconds_count{job=\"realestate-crawler-celery\"}[5m])",
|
||||
"legendFormat": "Avg Duration",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.95, rate(scrape_duration_seconds_bucket{job=\"realestate-crawler-celery\"}[30m]))",
|
||||
"legendFormat": "p95 Duration",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Scrape Duration",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 14 },
|
||||
"id": 102,
|
||||
"panels": [],
|
||||
"title": "Throttling & Errors",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"description": "Throttle events by type: rate_limit, service_unavailable, ip_blocked, slow_response, empty_response, invalid_response",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "bars",
|
||||
"fillOpacity": 80,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": { "type": "linear" },
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": { "group": "A", "mode": "normal" },
|
||||
"thresholdsStyle": { "mode": "off" }
|
||||
},
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] },
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [
|
||||
{ "matcher": { "id": "byName", "options": "rate_limit" }, "properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }] },
|
||||
{ "matcher": { "id": "byName", "options": "ip_blocked" }, "properties": [{ "id": "color", "value": { "fixedColor": "dark-red", "mode": "fixed" } }] },
|
||||
{ "matcher": { "id": "byName", "options": "service_unavailable" }, "properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }] },
|
||||
{ "matcher": { "id": "byName", "options": "slow_response" }, "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] }
|
||||
]
|
||||
},
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 15 },
|
||||
"id": 20,
|
||||
"options": {
|
||||
"legend": { "calcs": ["sum"], "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||
"tooltip": { "mode": "multi", "sort": "desc" }
|
||||
},
|
||||
"pluginVersion": "12.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"editorMode": "code",
|
||||
"expr": "increase(throttle_events_total{job=\"realestate-crawler-celery\"}[5m])",
|
||||
"legendFormat": "{{ type }}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Throttle Events (5m increase)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"description": "Circuit breaker state over time: 0=closed (healthy), 1=half-open, 2=open (tripped)",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 30,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "stepAfter",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": { "type": "linear" },
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": { "group": "A", "mode": "none" },
|
||||
"thresholdsStyle": {
|
||||
"mode": "line+area",
|
||||
"thresholds": [
|
||||
{ "color": "red", "value": 2 }
|
||||
]
|
||||
}
|
||||
},
|
||||
"decimals": 0,
|
||||
"mappings": [
|
||||
{ "options": { "0": { "text": "Closed" } }, "type": "value" },
|
||||
{ "options": { "1": { "text": "Half-Open" } }, "type": "value" },
|
||||
{ "options": { "2": { "text": "Open" } }, "type": "value" }
|
||||
],
|
||||
"max": 2,
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 1 },
|
||||
{ "color": "red", "value": 2 }
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 15 },
|
||||
"id": 21,
|
||||
"options": {
|
||||
"legend": { "calcs": ["lastNotNull"], "displayMode": "list", "placement": "bottom", "showLegend": true },
|
||||
"tooltip": { "mode": "single", "sort": "none" }
|
||||
},
|
||||
"pluginVersion": "12.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"editorMode": "code",
|
||||
"expr": "circuit_breaker_state{job=\"realestate-crawler-celery\"}",
|
||||
"legendFormat": "Circuit Breaker",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Circuit Breaker State",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 23 },
|
||||
"id": 103,
|
||||
"panels": [],
|
||||
"title": "Cache & OCR",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"description": "GeoJSON cache hit ratio",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "red", "value": null },
|
||||
{ "color": "yellow", "value": 0.5 },
|
||||
{ "color": "green", "value": 0.8 }
|
||||
]
|
||||
},
|
||||
"max": 1,
|
||||
"min": 0,
|
||||
"unit": "percentunit"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 6, "w": 4, "x": 0, "y": 24 },
|
||||
"id": 30,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
||||
"showPercentChange": false,
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "12.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"editorMode": "code",
|
||||
"expr": "rate(geojson_cache_operations_total{job=\"realestate-crawler-api\",result=\"hit\"}[15m]) / (rate(geojson_cache_operations_total{job=\"realestate-crawler-api\",result=\"hit\"}[15m]) + rate(geojson_cache_operations_total{job=\"realestate-crawler-api\",result=\"miss\"}[15m]))",
|
||||
"legendFormat": "Hit Rate",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "GeoJSON Cache Hit Rate",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"description": "GeoJSON cache hits and misses over time",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 20,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "smooth",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": { "type": "linear" },
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": { "group": "A", "mode": "none" },
|
||||
"thresholdsStyle": { "mode": "off" }
|
||||
},
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] },
|
||||
"unit": "ops"
|
||||
},
|
||||
"overrides": [
|
||||
{ "matcher": { "id": "byName", "options": "miss" }, "properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }] },
|
||||
{ "matcher": { "id": "byName", "options": "hit" }, "properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }] }
|
||||
]
|
||||
},
|
||||
"gridPos": { "h": 6, "w": 8, "x": 4, "y": 24 },
|
||||
"id": 31,
|
||||
"options": {
|
||||
"legend": { "calcs": ["sum"], "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||
"tooltip": { "mode": "multi", "sort": "desc" }
|
||||
},
|
||||
"pluginVersion": "12.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"editorMode": "code",
|
||||
"expr": "rate(geojson_cache_operations_total{job=\"realestate-crawler-api\"}[5m])",
|
||||
"legendFormat": "{{ result }}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "GeoJSON Cache Operations",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"description": "OCR detection attempts and successes for floorplan square meter extraction",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 20,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "smooth",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": { "type": "linear" },
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": { "group": "A", "mode": "none" },
|
||||
"thresholdsStyle": { "mode": "off" }
|
||||
},
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] },
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 6, "w": 12, "x": 12, "y": 24 },
|
||||
"id": 32,
|
||||
"options": {
|
||||
"legend": { "calcs": ["sum", "lastNotNull"], "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||
"tooltip": { "mode": "multi", "sort": "desc" }
|
||||
},
|
||||
"pluginVersion": "12.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"editorMode": "code",
|
||||
"expr": "increase(ocr_attempts_total{job=\"realestate-crawler-celery\"}[5m])",
|
||||
"legendFormat": "Attempts",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"editorMode": "code",
|
||||
"expr": "increase(ocr_successes_total{job=\"realestate-crawler-celery\"}[5m])",
|
||||
"legendFormat": "Successes",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "OCR Floorplan Detection (5m increase)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 30 },
|
||||
"id": 104,
|
||||
"panels": [],
|
||||
"title": "Celery Tasks",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"description": "Celery task completions by task name and status",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "bars",
|
||||
"fillOpacity": 80,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": { "type": "linear" },
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": { "group": "A", "mode": "normal" },
|
||||
"thresholdsStyle": { "mode": "off" }
|
||||
},
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] },
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 31 },
|
||||
"id": 40,
|
||||
"options": {
|
||||
"legend": { "calcs": ["sum"], "displayMode": "table", "placement": "right", "showLegend": true },
|
||||
"tooltip": { "mode": "multi", "sort": "desc" }
|
||||
},
|
||||
"pluginVersion": "12.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"editorMode": "code",
|
||||
"expr": "increase(celery_tasks_total{job=\"realestate-crawler-celery\"}[5m])",
|
||||
"legendFormat": "{{ task_name }} ({{ status }})",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Celery Tasks by Name & Status (5m increase)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"description": "Average and p95 task durations by task name",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "smooth",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": { "type": "linear" },
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": { "group": "A", "mode": "none" },
|
||||
"thresholdsStyle": { "mode": "off" }
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] },
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 31 },
|
||||
"id": 41,
|
||||
"options": {
|
||||
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "right", "showLegend": true },
|
||||
"tooltip": { "mode": "multi", "sort": "desc" }
|
||||
},
|
||||
"pluginVersion": "12.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"editorMode": "code",
|
||||
"expr": "rate(celery_task_duration_seconds_sum{job=\"realestate-crawler-celery\"}[5m]) / rate(celery_task_duration_seconds_count{job=\"realestate-crawler-celery\"}[5m])",
|
||||
"legendFormat": "{{ task_name }} avg",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.95, rate(celery_task_duration_seconds_bucket{job=\"realestate-crawler-celery\"}[30m]))",
|
||||
"legendFormat": "{{ task_name }} p95",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Celery Task Duration",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"description": "Currently active (in-flight) Celery tasks",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 30,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "stepAfter",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": { "type": "linear" },
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": { "group": "A", "mode": "normal" },
|
||||
"thresholdsStyle": { "mode": "off" }
|
||||
},
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] },
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 39 },
|
||||
"id": 42,
|
||||
"options": {
|
||||
"legend": { "calcs": ["max", "lastNotNull"], "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||
"tooltip": { "mode": "multi", "sort": "desc" }
|
||||
},
|
||||
"pluginVersion": "12.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"editorMode": "code",
|
||||
"expr": "celery_tasks_active{job=\"realestate-crawler-celery\"}",
|
||||
"legendFormat": "{{ task_name }}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Active Celery Tasks",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"description": "Pages fetched and subqueries executed during scraping",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 20,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "smooth",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": { "type": "linear" },
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": { "group": "A", "mode": "none" },
|
||||
"thresholdsStyle": { "mode": "off" }
|
||||
},
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] },
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 39 },
|
||||
"id": 43,
|
||||
"options": {
|
||||
"legend": { "calcs": ["sum", "lastNotNull"], "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||
"tooltip": { "mode": "multi", "sort": "desc" }
|
||||
},
|
||||
"pluginVersion": "12.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"editorMode": "code",
|
||||
"expr": "increase(scrape_pages_fetched_total{job=\"realestate-crawler-celery\"}[5m])",
|
||||
"legendFormat": "Pages Fetched",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"editorMode": "code",
|
||||
"expr": "increase(scrape_subqueries_total{job=\"realestate-crawler-celery\"}[5m])",
|
||||
"legendFormat": "Subqueries",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Scraping Pagination (5m increase)",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"preload": false,
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 42,
|
||||
"tags": ["realestate-crawler", "celery", "scraping"],
|
||||
"templating": {
|
||||
"list": []
|
||||
},
|
||||
"time": {
|
||||
"from": "now-24h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "Real Estate Crawler",
|
||||
"uid": "realestate-crawler",
|
||||
"version": 1
|
||||
}
|
||||
1764
stacks/platform/modules/monitoring/dashboards/registry.json
Normal file
1764
stacks/platform/modules/monitoring/dashboards/registry.json
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,488 @@
|
|||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": { "type": "datasource", "uid": "grafana" },
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "Technitium DNS query logs from MySQL",
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 1,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"title": "Total Queries",
|
||||
"type": "stat",
|
||||
"datasource": { "type": "mysql", "uid": "technitium-mysql" },
|
||||
"gridPos": { "h": 4, "w": 4, "x": 0, "y": 0 },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "thresholds" },
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{ "color": "green", "value": null }
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"textMode": "auto",
|
||||
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"rawSql": "SELECT COUNT(*) as total_queries FROM dns_logs WHERE $__timeFilter(timestamp)",
|
||||
"format": "table",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Cached %",
|
||||
"type": "stat",
|
||||
"datasource": { "type": "mysql", "uid": "technitium-mysql" },
|
||||
"gridPos": { "h": 4, "w": 4, "x": 4, "y": 0 },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "thresholds" },
|
||||
"unit": "percentunit",
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{ "color": "red", "value": null },
|
||||
{ "color": "yellow", "value": 0.3 },
|
||||
{ "color": "green", "value": 0.5 }
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"textMode": "auto",
|
||||
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"rawSql": "SELECT SUM(CASE WHEN response_type = 3 THEN 1 ELSE 0 END) / COUNT(*) as cached_pct FROM dns_logs WHERE $__timeFilter(timestamp)",
|
||||
"format": "table",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Blocked %",
|
||||
"type": "stat",
|
||||
"datasource": { "type": "mysql", "uid": "technitium-mysql" },
|
||||
"gridPos": { "h": 4, "w": 4, "x": 8, "y": 0 },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "thresholds" },
|
||||
"unit": "percentunit",
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 0.1 },
|
||||
{ "color": "red", "value": 0.3 }
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"textMode": "auto",
|
||||
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"rawSql": "SELECT SUM(CASE WHEN response_type = 4 THEN 1 ELSE 0 END) / COUNT(*) as blocked_pct FROM dns_logs WHERE $__timeFilter(timestamp)",
|
||||
"format": "table",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "NxDomain %",
|
||||
"type": "stat",
|
||||
"datasource": { "type": "mysql", "uid": "technitium-mysql" },
|
||||
"gridPos": { "h": 4, "w": 4, "x": 12, "y": 0 },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "thresholds" },
|
||||
"unit": "percentunit",
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 0.2 },
|
||||
{ "color": "red", "value": 0.5 }
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"textMode": "auto",
|
||||
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"rawSql": "SELECT SUM(CASE WHEN rcode = 3 THEN 1 ELSE 0 END) / COUNT(*) as nxdomain_pct FROM dns_logs WHERE $__timeFilter(timestamp)",
|
||||
"format": "table",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Avg Response Time",
|
||||
"type": "stat",
|
||||
"datasource": { "type": "mysql", "uid": "technitium-mysql" },
|
||||
"gridPos": { "h": 4, "w": 4, "x": 16, "y": 0 },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "thresholds" },
|
||||
"unit": "ms",
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 50 },
|
||||
{ "color": "red", "value": 200 }
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"textMode": "auto",
|
||||
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"rawSql": "SELECT AVG(response_rtt) as avg_rtt_ms FROM dns_logs WHERE $__timeFilter(timestamp) AND response_rtt IS NOT NULL",
|
||||
"format": "table",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Queries by Protocol",
|
||||
"type": "stat",
|
||||
"datasource": { "type": "mysql", "uid": "technitium-mysql" },
|
||||
"gridPos": { "h": 4, "w": 4, "x": 20, "y": 0 },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" }
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"textMode": "auto",
|
||||
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": true }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"rawSql": "SELECT SUM(CASE WHEN protocol = 0 THEN 1 ELSE 0 END) as UDP, SUM(CASE WHEN protocol = 1 THEN 1 ELSE 0 END) as TCP, SUM(CASE WHEN protocol = 3 THEN 1 ELSE 0 END) as DoH, SUM(CASE WHEN protocol = 4 THEN 1 ELSE 0 END) as DoT FROM dns_logs WHERE $__timeFilter(timestamp)",
|
||||
"format": "table",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Queries Over Time",
|
||||
"type": "timeseries",
|
||||
"datasource": { "type": "mysql", "uid": "technitium-mysql" },
|
||||
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 4 },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "bars",
|
||||
"fillOpacity": 50,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": { "type": "linear" },
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": { "group": "A", "mode": "normal" }
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"legend": { "calcs": ["sum"], "displayMode": "list", "placement": "bottom" },
|
||||
"tooltip": { "mode": "multi", "sort": "desc" }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"rawSql": "SELECT $__timeGroup(timestamp, $__interval) as time, SUM(CASE WHEN response_type = 1 THEN 1 ELSE 0 END) as Authoritative, SUM(CASE WHEN response_type = 2 THEN 1 ELSE 0 END) as Recursive, SUM(CASE WHEN response_type = 3 THEN 1 ELSE 0 END) as Cached, SUM(CASE WHEN response_type = 4 THEN 1 ELSE 0 END) as Blocked, SUM(CASE WHEN response_type = 5 THEN 1 ELSE 0 END) as Dropped FROM dns_logs WHERE $__timeFilter(timestamp) GROUP BY time ORDER BY time",
|
||||
"format": "time_series",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Response Codes",
|
||||
"type": "piechart",
|
||||
"datasource": { "type": "mysql", "uid": "technitium-mysql" },
|
||||
"gridPos": { "h": 8, "w": 8, "x": 0, "y": 12 },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" }
|
||||
},
|
||||
"overrides": [
|
||||
{ "matcher": { "id": "byName", "options": "NOERROR" }, "properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }] },
|
||||
{ "matcher": { "id": "byName", "options": "NXDOMAIN" }, "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] },
|
||||
{ "matcher": { "id": "byName", "options": "SERVFAIL" }, "properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }] },
|
||||
{ "matcher": { "id": "byName", "options": "REFUSED" }, "properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }] }
|
||||
]
|
||||
},
|
||||
"options": {
|
||||
"legend": { "displayMode": "table", "placement": "right", "values": ["value", "percent"] },
|
||||
"pieType": "donut",
|
||||
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": true },
|
||||
"tooltip": { "mode": "single" }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"rawSql": "SELECT SUM(CASE WHEN rcode = 0 THEN 1 ELSE 0 END) as NOERROR, SUM(CASE WHEN rcode = 2 THEN 1 ELSE 0 END) as SERVFAIL, SUM(CASE WHEN rcode = 3 THEN 1 ELSE 0 END) as NXDOMAIN, SUM(CASE WHEN rcode = 5 THEN 1 ELSE 0 END) as REFUSED, SUM(CASE WHEN rcode NOT IN (0,2,3,5) THEN 1 ELSE 0 END) as Other FROM dns_logs WHERE $__timeFilter(timestamp)",
|
||||
"format": "table",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Response Types",
|
||||
"type": "piechart",
|
||||
"datasource": { "type": "mysql", "uid": "technitium-mysql" },
|
||||
"gridPos": { "h": 8, "w": 8, "x": 8, "y": 12 },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" }
|
||||
},
|
||||
"overrides": [
|
||||
{ "matcher": { "id": "byName", "options": "Cached" }, "properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }] },
|
||||
{ "matcher": { "id": "byName", "options": "Blocked" }, "properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }] },
|
||||
{ "matcher": { "id": "byName", "options": "Recursive" }, "properties": [{ "id": "color", "value": { "fixedColor": "blue", "mode": "fixed" } }] },
|
||||
{ "matcher": { "id": "byName", "options": "Authoritative" }, "properties": [{ "id": "color", "value": { "fixedColor": "purple", "mode": "fixed" } }] }
|
||||
]
|
||||
},
|
||||
"options": {
|
||||
"legend": { "displayMode": "table", "placement": "right", "values": ["value", "percent"] },
|
||||
"pieType": "donut",
|
||||
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": true },
|
||||
"tooltip": { "mode": "single" }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"rawSql": "SELECT SUM(CASE WHEN response_type = 1 THEN 1 ELSE 0 END) as Authoritative, SUM(CASE WHEN response_type = 2 THEN 1 ELSE 0 END) as Recursive, SUM(CASE WHEN response_type = 3 THEN 1 ELSE 0 END) as Cached, SUM(CASE WHEN response_type = 4 THEN 1 ELSE 0 END) as Blocked, SUM(CASE WHEN response_type = 5 THEN 1 ELSE 0 END) as Dropped FROM dns_logs WHERE $__timeFilter(timestamp)",
|
||||
"format": "table",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Query Types",
|
||||
"type": "piechart",
|
||||
"datasource": { "type": "mysql", "uid": "technitium-mysql" },
|
||||
"gridPos": { "h": 8, "w": 8, "x": 16, "y": 12 },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" }
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"legend": { "displayMode": "table", "placement": "right", "values": ["value", "percent"] },
|
||||
"pieType": "donut",
|
||||
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": true },
|
||||
"tooltip": { "mode": "single" }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"rawSql": "SELECT SUM(CASE WHEN qtype = 1 THEN 1 ELSE 0 END) as A, SUM(CASE WHEN qtype = 28 THEN 1 ELSE 0 END) as AAAA, SUM(CASE WHEN qtype = 5 THEN 1 ELSE 0 END) as CNAME, SUM(CASE WHEN qtype = 15 THEN 1 ELSE 0 END) as MX, SUM(CASE WHEN qtype = 16 THEN 1 ELSE 0 END) as TXT, SUM(CASE WHEN qtype = 33 THEN 1 ELSE 0 END) as SRV, SUM(CASE WHEN qtype = 12 THEN 1 ELSE 0 END) as PTR, SUM(CASE WHEN qtype = 6 THEN 1 ELSE 0 END) as SOA, SUM(CASE WHEN qtype = 2 THEN 1 ELSE 0 END) as NS, SUM(CASE WHEN qtype = 65 THEN 1 ELSE 0 END) as HTTPS, SUM(CASE WHEN qtype NOT IN (1,2,5,6,12,15,16,28,33,65) THEN 1 ELSE 0 END) as Other FROM dns_logs WHERE $__timeFilter(timestamp)",
|
||||
"format": "table",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Top 20 Queried Domains",
|
||||
"type": "table",
|
||||
"datasource": { "type": "mysql", "uid": "technitium-mysql" },
|
||||
"gridPos": { "h": 10, "w": 12, "x": 0, "y": 20 },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": { "filterable": true }
|
||||
},
|
||||
"overrides": [
|
||||
{ "matcher": { "id": "byName", "options": "count" }, "properties": [{ "id": "custom.width", "value": 100 }] }
|
||||
]
|
||||
},
|
||||
"options": {
|
||||
"showHeader": true,
|
||||
"sortBy": [{ "desc": true, "displayName": "count" }]
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"rawSql": "SELECT qname as domain, COUNT(*) as count FROM dns_logs WHERE $__timeFilter(timestamp) GROUP BY qname ORDER BY count DESC LIMIT 20",
|
||||
"format": "table",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Top 20 Clients",
|
||||
"type": "table",
|
||||
"datasource": { "type": "mysql", "uid": "technitium-mysql" },
|
||||
"gridPos": { "h": 10, "w": 12, "x": 12, "y": 20 },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": { "filterable": true }
|
||||
},
|
||||
"overrides": [
|
||||
{ "matcher": { "id": "byName", "options": "count" }, "properties": [{ "id": "custom.width", "value": 100 }] }
|
||||
]
|
||||
},
|
||||
"options": {
|
||||
"showHeader": true,
|
||||
"sortBy": [{ "desc": true, "displayName": "count" }]
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"rawSql": "SELECT client_ip, COUNT(*) as count FROM dns_logs WHERE $__timeFilter(timestamp) GROUP BY client_ip ORDER BY count DESC LIMIT 20",
|
||||
"format": "table",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Average Response Time Over Time",
|
||||
"type": "timeseries",
|
||||
"datasource": { "type": "mysql", "uid": "technitium-mysql" },
|
||||
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 30 },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"unit": "ms",
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisLabel": "Response Time (ms)",
|
||||
"axisPlacement": "auto",
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 20,
|
||||
"gradientMode": "none",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"legend": { "calcs": ["mean", "max"], "displayMode": "list", "placement": "bottom" },
|
||||
"tooltip": { "mode": "multi", "sort": "desc" }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"rawSql": "SELECT $__timeGroup(timestamp, $__interval) as time, AVG(response_rtt) as avg_rtt, MAX(response_rtt) as max_rtt FROM dns_logs WHERE $__timeFilter(timestamp) AND response_rtt IS NOT NULL GROUP BY time ORDER BY time",
|
||||
"format": "time_series",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Top 20 NxDomain Domains",
|
||||
"type": "table",
|
||||
"datasource": { "type": "mysql", "uid": "technitium-mysql" },
|
||||
"gridPos": { "h": 10, "w": 12, "x": 0, "y": 38 },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": { "filterable": true }
|
||||
},
|
||||
"overrides": [
|
||||
{ "matcher": { "id": "byName", "options": "count" }, "properties": [{ "id": "custom.width", "value": 100 }] }
|
||||
]
|
||||
},
|
||||
"options": {
|
||||
"showHeader": true,
|
||||
"sortBy": [{ "desc": true, "displayName": "count" }]
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"rawSql": "SELECT qname as domain, COUNT(*) as count FROM dns_logs WHERE $__timeFilter(timestamp) AND rcode = 3 GROUP BY qname ORDER BY count DESC LIMIT 20",
|
||||
"format": "table",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Top 20 Blocked Domains",
|
||||
"type": "table",
|
||||
"datasource": { "type": "mysql", "uid": "technitium-mysql" },
|
||||
"gridPos": { "h": 10, "w": 12, "x": 12, "y": 38 },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": { "filterable": true }
|
||||
},
|
||||
"overrides": [
|
||||
{ "matcher": { "id": "byName", "options": "count" }, "properties": [{ "id": "custom.width", "value": 100 }] }
|
||||
]
|
||||
},
|
||||
"options": {
|
||||
"showHeader": true,
|
||||
"sortBy": [{ "desc": true, "displayName": "count" }]
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"rawSql": "SELECT qname as domain, COUNT(*) as count FROM dns_logs WHERE $__timeFilter(timestamp) AND response_type = 4 GROUP BY qname ORDER BY count DESC LIMIT 20",
|
||||
"format": "table",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"refresh": "5m",
|
||||
"schemaVersion": 39,
|
||||
"tags": ["dns", "technitium", "mysql"],
|
||||
"templating": { "list": [] },
|
||||
"time": { "from": "now-24h", "to": "now" },
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "Technitium DNS",
|
||||
"uid": "technitium-dns",
|
||||
"version": 1
|
||||
}
|
||||
|
|
@ -0,0 +1,303 @@
|
|||
# HELP snmpEnableAuthenTraps Indicates whether the SNMP entity is permitted to generate authenticationFailure traps - 1.3.6.1.2.1.11.30
|
||||
# TYPE snmpEnableAuthenTraps gauge
|
||||
snmpEnableAuthenTraps 2
|
||||
# HELP snmpInASNParseErrs The total number of ASN.1 or BER errors encountered by the SNMP entity when decoding received SNMP messages. - 1.3.6.1.2.1.11.6
|
||||
# TYPE snmpInASNParseErrs counter
|
||||
snmpInASNParseErrs 0
|
||||
# HELP snmpInBadCommunityNames The total number of community-based SNMP messages (for example, SNMPv1) delivered to the SNMP entity which used an SNMP community name not known to said entity - 1.3.6.1.2.1.11.4
|
||||
# TYPE snmpInBadCommunityNames counter
|
||||
snmpInBadCommunityNames 184
|
||||
# HELP snmpInBadCommunityUses The total number of community-based SNMP messages (for example, SNMPv1) delivered to the SNMP entity which represented an SNMP operation that was not allowed for the SNMP community named in the message - 1.3.6.1.2.1.11.5
|
||||
# TYPE snmpInBadCommunityUses counter
|
||||
snmpInBadCommunityUses 0
|
||||
# HELP snmpInBadValues The total number of SNMP PDUs which were delivered to the SNMP protocol entity and for which the value of the error-status field was `badValue'. - 1.3.6.1.2.1.11.10
|
||||
# TYPE snmpInBadValues counter
|
||||
snmpInBadValues 0
|
||||
# HELP snmpInBadVersions The total number of SNMP messages which were delivered to the SNMP entity and were for an unsupported SNMP version. - 1.3.6.1.2.1.11.3
|
||||
# TYPE snmpInBadVersions counter
|
||||
snmpInBadVersions 0
|
||||
# HELP snmpInGenErrs The total number of SNMP PDUs which were delivered to the SNMP protocol entity and for which the value of the error-status field was `genErr'. - 1.3.6.1.2.1.11.12
|
||||
# TYPE snmpInGenErrs counter
|
||||
snmpInGenErrs 0
|
||||
# HELP snmpInGetNexts The total number of SNMP Get-Next PDUs which have been accepted and processed by the SNMP protocol entity. - 1.3.6.1.2.1.11.16
|
||||
# TYPE snmpInGetNexts counter
|
||||
snmpInGetNexts 2940
|
||||
# HELP snmpInGetRequests The total number of SNMP Get-Request PDUs which have been accepted and processed by the SNMP protocol entity. - 1.3.6.1.2.1.11.15
|
||||
# TYPE snmpInGetRequests counter
|
||||
snmpInGetRequests 9
|
||||
# HELP snmpInGetResponses The total number of SNMP Get-Response PDUs which have been accepted and processed by the SNMP protocol entity. - 1.3.6.1.2.1.11.18
|
||||
# TYPE snmpInGetResponses counter
|
||||
snmpInGetResponses 0
|
||||
# HELP snmpInNoSuchNames The total number of SNMP PDUs which were delivered to the SNMP protocol entity and for which the value of the error-status field was `noSuchName'. - 1.3.6.1.2.1.11.9
|
||||
# TYPE snmpInNoSuchNames counter
|
||||
snmpInNoSuchNames 0
|
||||
# HELP snmpInPkts The total number of messages delivered to the SNMP entity from the transport service. - 1.3.6.1.2.1.11.1
|
||||
# TYPE snmpInPkts counter
|
||||
snmpInPkts 5928
|
||||
# HELP snmpInReadOnlys The total number valid SNMP PDUs which were delivered to the SNMP protocol entity and for which the value of the error-status field was `readOnly' - 1.3.6.1.2.1.11.11
|
||||
# TYPE snmpInReadOnlys counter
|
||||
snmpInReadOnlys 0
|
||||
# HELP snmpInSetRequests The total number of SNMP Set-Request PDUs which have been accepted and processed by the SNMP protocol entity. - 1.3.6.1.2.1.11.17
|
||||
# TYPE snmpInSetRequests counter
|
||||
snmpInSetRequests 0
|
||||
# HELP snmpInTooBigs The total number of SNMP PDUs which were delivered to the SNMP protocol entity and for which the value of the error-status field was `tooBig'. - 1.3.6.1.2.1.11.8
|
||||
# TYPE snmpInTooBigs counter
|
||||
snmpInTooBigs 0
|
||||
# HELP snmpInTotalReqVars The total number of MIB objects which have been retrieved successfully by the SNMP protocol entity as the result of receiving valid SNMP Get-Request and Get-Next PDUs. - 1.3.6.1.2.1.11.13
|
||||
# TYPE snmpInTotalReqVars counter
|
||||
snmpInTotalReqVars 72699
|
||||
# HELP snmpInTotalSetVars The total number of MIB objects which have been altered successfully by the SNMP protocol entity as the result of receiving valid SNMP Set-Request PDUs. - 1.3.6.1.2.1.11.14
|
||||
# TYPE snmpInTotalSetVars counter
|
||||
snmpInTotalSetVars 0
|
||||
# HELP snmpInTraps The total number of SNMP Trap PDUs which have been accepted and processed by the SNMP protocol entity. - 1.3.6.1.2.1.11.19
|
||||
# TYPE snmpInTraps counter
|
||||
snmpInTraps 0
|
||||
# HELP snmpOutBadValues The total number of SNMP PDUs which were generated by the SNMP protocol entity and for which the value of the error-status field was `badValue'. - 1.3.6.1.2.1.11.22
|
||||
# TYPE snmpOutBadValues counter
|
||||
snmpOutBadValues 0
|
||||
# HELP snmpOutGenErrs The total number of SNMP PDUs which were generated by the SNMP protocol entity and for which the value of the error-status field was `genErr'. - 1.3.6.1.2.1.11.24
|
||||
# TYPE snmpOutGenErrs counter
|
||||
snmpOutGenErrs 0
|
||||
# HELP snmpOutGetNexts The total number of SNMP Get-Next PDUs which have been generated by the SNMP protocol entity. - 1.3.6.1.2.1.11.26
|
||||
# TYPE snmpOutGetNexts counter
|
||||
snmpOutGetNexts 0
|
||||
# HELP snmpOutGetRequests The total number of SNMP Get-Request PDUs which have been generated by the SNMP protocol entity. - 1.3.6.1.2.1.11.25
|
||||
# TYPE snmpOutGetRequests counter
|
||||
snmpOutGetRequests 0
|
||||
# HELP snmpOutGetResponses The total number of SNMP Get-Response PDUs which have been generated by the SNMP protocol entity. - 1.3.6.1.2.1.11.28
|
||||
# TYPE snmpOutGetResponses counter
|
||||
snmpOutGetResponses 5740
|
||||
# HELP snmpOutNoSuchNames The total number of SNMP PDUs which were generated by the SNMP protocol entity and for which the value of the error-status was `noSuchName'. - 1.3.6.1.2.1.11.21
|
||||
# TYPE snmpOutNoSuchNames counter
|
||||
snmpOutNoSuchNames 0
|
||||
# HELP snmpOutPkts The total number of SNMP Messages which were passed from the SNMP protocol entity to the transport service. - 1.3.6.1.2.1.11.2
|
||||
# TYPE snmpOutPkts counter
|
||||
snmpOutPkts 5739
|
||||
# HELP snmpOutSetRequests The total number of SNMP Set-Request PDUs which have been generated by the SNMP protocol entity. - 1.3.6.1.2.1.11.27
|
||||
# TYPE snmpOutSetRequests counter
|
||||
snmpOutSetRequests 0
|
||||
# HELP snmpOutTooBigs The total number of SNMP PDUs which were generated by the SNMP protocol entity and for which the value of the error-status field was `tooBig.' - 1.3.6.1.2.1.11.20
|
||||
# TYPE snmpOutTooBigs counter
|
||||
snmpOutTooBigs 0
|
||||
# HELP snmpOutTraps The total number of SNMP Trap PDUs which have been generated by the SNMP protocol entity. - 1.3.6.1.2.1.11.29
|
||||
# TYPE snmpOutTraps counter
|
||||
snmpOutTraps 0
|
||||
# HELP snmpProxyDrops The total number of Confirmed Class PDUs (such as GetRequest-PDUs, GetNextRequest-PDUs, GetBulkRequest-PDUs, SetRequest-PDUs, and InformRequest-PDUs) delivered to the SNMP entity which were silently dropped because the transmission of the (possibly translated) message to a proxy target failed in a manner (other than a time-out) such that no Response Class PDU (such as a Response-PDU) could be returned. - 1.3.6.1.2.1.11.32
|
||||
# TYPE snmpProxyDrops counter
|
||||
snmpProxyDrops 0
|
||||
# HELP snmpSilentDrops The total number of Confirmed Class PDUs (such as GetRequest-PDUs, GetNextRequest-PDUs, GetBulkRequest-PDUs, SetRequest-PDUs, and InformRequest-PDUs) delivered to the SNMP entity which were silently dropped because the size of a reply containing an alternate Response Class PDU (such as a Response-PDU) with an empty variable-bindings field was greater than either a local constraint or the maximum message size associated with the originator of the request. - 1.3.6.1.2.1.11.31
|
||||
# TYPE snmpSilentDrops counter
|
||||
snmpSilentDrops 0
|
||||
# HELP snmp_scrape_duration_seconds Total SNMP time scrape took (walk and processing).
|
||||
# TYPE snmp_scrape_duration_seconds gauge
|
||||
snmp_scrape_duration_seconds{module="huawei"} 0.39253882
|
||||
# HELP snmp_scrape_packets_retried Packets retried for get, bulkget, and walk.
|
||||
# TYPE snmp_scrape_packets_retried gauge
|
||||
snmp_scrape_packets_retried{module="huawei"} 0
|
||||
# HELP snmp_scrape_packets_sent Packets sent for get, bulkget, and walk; including retries.
|
||||
# TYPE snmp_scrape_packets_sent gauge
|
||||
snmp_scrape_packets_sent{module="huawei"} 6
|
||||
# HELP snmp_scrape_pdus_returned PDUs returned from get, bulkget, and walk.
|
||||
# TYPE snmp_scrape_pdus_returned gauge
|
||||
snmp_scrape_pdus_returned{module="huawei"} 104
|
||||
# HELP snmp_scrape_walk_duration_seconds Time SNMP walk/bulkwalk took.
|
||||
# TYPE snmp_scrape_walk_duration_seconds gauge
|
||||
snmp_scrape_walk_duration_seconds{module="huawei"} 0.391760524
|
||||
# HELP sysContact The textual identification of the contact person for this managed node, together with information on how to contact this person - 1.3.6.1.2.1.1.4
|
||||
# TYPE sysContact gauge
|
||||
sysContact{sysContact="Not Configure System Contact"} 1
|
||||
# HELP sysDescr A textual description of the entity - 1.3.6.1.2.1.1.1
|
||||
# TYPE sysDescr gauge
|
||||
sysDescr{sysDescr="Linux GSE200M 2.6.27-SPEAr310 #80 Fri Jan 13 11:22:09 CST 2017 armv5tejl"} 1
|
||||
# HELP sysLocation The physical location of this node (e.g., 'telephone closet, 3rd floor') - 1.3.6.1.2.1.1.6
|
||||
# TYPE sysLocation gauge
|
||||
sysLocation{sysLocation="Garage G03"} 1
|
||||
# HELP sysName An administratively-assigned name for this managed node - 1.3.6.1.2.1.1.5
|
||||
# TYPE sysName gauge
|
||||
sysName{sysName="ups2000"} 1
|
||||
# HELP sysORDescr A textual description of the capabilities identified by the corresponding instance of sysORID. - 1.3.6.1.2.1.1.9.1.3
|
||||
# TYPE sysORDescr gauge
|
||||
sysORDescr{sysORDescr="The MIB for Message Processing and Dispatching.",sysORIndex="3"} 1
|
||||
sysORDescr{sysORDescr="The MIB module for SNMPv2 entities",sysORIndex="1"} 1
|
||||
sysORDescr{sysORDescr="The SNMP Management Architecture MIB.",sysORIndex="5"} 1
|
||||
sysORDescr{sysORDescr="The management information definitions for the SNMP User-based Security Model.",sysORIndex="4"} 1
|
||||
sysORDescr{sysORDescr="View-based Access Control Model for SNMP.",sysORIndex="2"} 1
|
||||
# HELP sysORID An authoritative identification of a capabilities statement with respect to various MIB modules supported by the local SNMP application acting as a command responder. - 1.3.6.1.2.1.1.9.1.2
|
||||
# TYPE sysORID gauge
|
||||
sysORID{sysORID="1.3.6.1.6.3.1",sysORIndex="1"} 1
|
||||
sysORID{sysORID="1.3.6.1.6.3.10.3.1.1",sysORIndex="5"} 1
|
||||
sysORID{sysORID="1.3.6.1.6.3.11.3.1.1",sysORIndex="3"} 1
|
||||
sysORID{sysORID="1.3.6.1.6.3.15.2.1.1",sysORIndex="4"} 1
|
||||
sysORID{sysORID="1.3.6.1.6.3.16.2.2.1",sysORIndex="2"} 1
|
||||
# HELP sysORLastChange The value of sysUpTime at the time of the most recent change in state or value of any instance of sysORID. - 1.3.6.1.2.1.1.8
|
||||
# TYPE sysORLastChange gauge
|
||||
sysORLastChange 8
|
||||
# HELP sysORUpTime The value of sysUpTime at the time this conceptual row was last instantiated. - 1.3.6.1.2.1.1.9.1.4
|
||||
# TYPE sysORUpTime gauge
|
||||
sysORUpTime{sysORIndex="1"} 7
|
||||
sysORUpTime{sysORIndex="2"} 8
|
||||
sysORUpTime{sysORIndex="3"} 8
|
||||
sysORUpTime{sysORIndex="4"} 8
|
||||
sysORUpTime{sysORIndex="5"} 8
|
||||
# HELP sysObjectID The vendor's authoritative identification of the network management subsystem contained in the entity - 1.3.6.1.2.1.1.2
|
||||
# TYPE sysObjectID gauge
|
||||
sysObjectID{sysObjectID="1.3.6.1.4.1.8072.3.2.10"} 1
|
||||
# HELP sysUpTime The time (in hundredths of a second) since the network management portion of the system was last re-initialized. - 1.3.6.1.2.1.1.3
|
||||
# TYPE sysUpTime gauge
|
||||
sysUpTime 5.3264032e+07
|
||||
# HELP upsAlarmsPresent The present number of active alarm conditions. - 1.3.6.1.2.1.33.1.6.1
|
||||
# TYPE upsAlarmsPresent gauge
|
||||
upsAlarmsPresent 0
|
||||
# HELP upsAutoRestart Setting this object to 'on' will cause the UPS system to restart after a shutdown if the shutdown occurred during a power loss as a result of either a upsShutdownAfterDelay or an internal battery depleted condition - 1.3.6.1.2.1.33.1.8.5
|
||||
# TYPE upsAutoRestart gauge
|
||||
upsAutoRestart 0
|
||||
# HELP upsBatteryCurrent The present battery current. - 1.3.6.1.2.1.33.1.2.6
|
||||
# TYPE upsBatteryCurrent gauge
|
||||
upsBatteryCurrent 2.147483647e+09
|
||||
# HELP upsBatteryStatus The indication of the capacity remaining in the UPS system's batteries - 1.3.6.1.2.1.33.1.2.1
|
||||
# TYPE upsBatteryStatus gauge
|
||||
upsBatteryStatus 2
|
||||
# HELP upsBatteryTemperature The ambient temperature at or near the UPS Battery casing. - 1.3.6.1.2.1.33.1.2.7
|
||||
# TYPE upsBatteryTemperature gauge
|
||||
upsBatteryTemperature 2.147483647e+09
|
||||
# HELP upsBatteryVoltage The magnitude of the present battery voltage. - 1.3.6.1.2.1.33.1.2.5
|
||||
# TYPE upsBatteryVoltage gauge
|
||||
upsBatteryVoltage 821
|
||||
# HELP upsBypassFrequency The present bypass frequency. - 1.3.6.1.2.1.33.1.5.1
|
||||
# TYPE upsBypassFrequency gauge
|
||||
upsBypassFrequency 500
|
||||
# HELP upsBypassLineIndex The bypass line identifier. - 1.3.6.1.2.1.33.1.5.3.1.1
|
||||
# TYPE upsBypassLineIndex gauge
|
||||
upsBypassLineIndex{upsBypassLineIndex="1"} 1
|
||||
# HELP upsBypassNumLines The number of bypass lines utilized in this device - 1.3.6.1.2.1.33.1.5.2
|
||||
# TYPE upsBypassNumLines gauge
|
||||
upsBypassNumLines 1
|
||||
# HELP upsBypassVoltage The present bypass voltage. - 1.3.6.1.2.1.33.1.5.3.1.2
|
||||
# TYPE upsBypassVoltage gauge
|
||||
upsBypassVoltage{upsBypassLineIndex="1"} 220
|
||||
# HELP upsConfigAudibleStatus The requested state of the audible alarm - 1.3.6.1.2.1.33.1.9.8
|
||||
# TYPE upsConfigAudibleStatus gauge
|
||||
upsConfigAudibleStatus 0
|
||||
# HELP upsConfigHighVoltageTransferPoint The maximum line voltage allowed before the UPS system transfers to battery backup. - 1.3.6.1.2.1.33.1.9.10
|
||||
# TYPE upsConfigHighVoltageTransferPoint gauge
|
||||
upsConfigHighVoltageTransferPoint 0
|
||||
# HELP upsConfigInputFreq The nominal input frequency - 1.3.6.1.2.1.33.1.9.2
|
||||
# TYPE upsConfigInputFreq gauge
|
||||
upsConfigInputFreq 0
|
||||
# HELP upsConfigInputVoltage The magnitude of the nominal input voltage - 1.3.6.1.2.1.33.1.9.1
|
||||
# TYPE upsConfigInputVoltage gauge
|
||||
upsConfigInputVoltage 0
|
||||
# HELP upsConfigLowBattTime The value of upsEstimatedMinutesRemaining at which a lowBattery condition is declared - 1.3.6.1.2.1.33.1.9.7
|
||||
# TYPE upsConfigLowBattTime gauge
|
||||
upsConfigLowBattTime 0
|
||||
# HELP upsConfigLowVoltageTransferPoint The minimum input line voltage allowed before the UPS system transfers to battery backup. - 1.3.6.1.2.1.33.1.9.9
|
||||
# TYPE upsConfigLowVoltageTransferPoint gauge
|
||||
upsConfigLowVoltageTransferPoint 0
|
||||
# HELP upsConfigOutputFreq The nominal output frequency - 1.3.6.1.2.1.33.1.9.4
|
||||
# TYPE upsConfigOutputFreq gauge
|
||||
upsConfigOutputFreq 0
|
||||
# HELP upsConfigOutputPower The magnitude of the nominal true power rating. - 1.3.6.1.2.1.33.1.9.6
|
||||
# TYPE upsConfigOutputPower gauge
|
||||
upsConfigOutputPower 0
|
||||
# HELP upsConfigOutputVA The magnitude of the nominal Volt-Amp rating. - 1.3.6.1.2.1.33.1.9.5
|
||||
# TYPE upsConfigOutputVA gauge
|
||||
upsConfigOutputVA 0
|
||||
# HELP upsConfigOutputVoltage The magnitude of the nominal output voltage - 1.3.6.1.2.1.33.1.9.3
|
||||
# TYPE upsConfigOutputVoltage gauge
|
||||
upsConfigOutputVoltage 0
|
||||
# HELP upsEstimatedChargeRemaining An estimate of the battery charge remaining expressed as a percent of full charge. - 1.3.6.1.2.1.33.1.2.4
|
||||
# TYPE upsEstimatedChargeRemaining gauge
|
||||
upsEstimatedChargeRemaining 91
|
||||
# HELP upsEstimatedMinutesRemaining An estimate of the time to battery charge depletion under the present load conditions if the utility power is off and remains off, or if it were to be lost and remain off. - 1.3.6.1.2.1.33.1.2.3
|
||||
# TYPE upsEstimatedMinutesRemaining gauge
|
||||
upsEstimatedMinutesRemaining 34
|
||||
# HELP upsIdentAgentSoftwareVersion The UPS agent software version - 1.3.6.1.2.1.33.1.1.4
|
||||
# TYPE upsIdentAgentSoftwareVersion gauge
|
||||
upsIdentAgentSoftwareVersion{upsIdentAgentSoftwareVersion="V200R001C31B016"} 1
|
||||
# HELP upsIdentAttachedDevices A string identifying the devices attached to the output(s) of the UPS - 1.3.6.1.2.1.33.1.1.6
|
||||
# TYPE upsIdentAttachedDevices gauge
|
||||
upsIdentAttachedDevices{upsIdentAttachedDevices="None"} 1
|
||||
# HELP upsIdentManufacturer The name of the UPS manufacturer. - 1.3.6.1.2.1.33.1.1.1
|
||||
# TYPE upsIdentManufacturer gauge
|
||||
upsIdentManufacturer{upsIdentManufacturer="HUAWEI"} 1
|
||||
# HELP upsIdentModel The UPS Model designation. - 1.3.6.1.2.1.33.1.1.2
|
||||
# TYPE upsIdentModel gauge
|
||||
upsIdentModel{upsIdentModel="UPS2000 2kVA"} 1
|
||||
# HELP upsIdentName A string identifying the UPS - 1.3.6.1.2.1.33.1.1.5
|
||||
# TYPE upsIdentName gauge
|
||||
upsIdentName{upsIdentName="ups2000"} 1
|
||||
# HELP upsIdentUPSSoftwareVersion The UPS firmware/software version(s) - 1.3.6.1.2.1.33.1.1.3
|
||||
# TYPE upsIdentUPSSoftwareVersion gauge
|
||||
upsIdentUPSSoftwareVersion{upsIdentUPSSoftwareVersion="V2R1C1SPC40"} 1
|
||||
# HELP upsInputFrequency The present input frequency. - 1.3.6.1.2.1.33.1.3.3.1.2
|
||||
# TYPE upsInputFrequency gauge
|
||||
upsInputFrequency{upsInputLineIndex="1"} 500
|
||||
# HELP upsInputLineBads A count of the number of times the input entered an out-of-tolerance condition as defined by the manufacturer - 1.3.6.1.2.1.33.1.3.1
|
||||
# TYPE upsInputLineBads counter
|
||||
upsInputLineBads 0
|
||||
# HELP upsInputLineIndex The input line identifier. - 1.3.6.1.2.1.33.1.3.3.1.1
|
||||
# TYPE upsInputLineIndex gauge
|
||||
upsInputLineIndex{upsInputLineIndex="1"} 1
|
||||
# HELP upsInputNumLines The number of input lines utilized in this device - 1.3.6.1.2.1.33.1.3.2
|
||||
# TYPE upsInputNumLines gauge
|
||||
upsInputNumLines 1
|
||||
# HELP upsInputVoltage The magnitude of the present input voltage. - 1.3.6.1.2.1.33.1.3.3.1.3
|
||||
# TYPE upsInputVoltage gauge
|
||||
upsInputVoltage{upsInputLineIndex="1"} 218
|
||||
# HELP upsOutputCurrent The present output current. - 1.3.6.1.2.1.33.1.4.4.1.3
|
||||
# TYPE upsOutputCurrent gauge
|
||||
upsOutputCurrent{upsOutputLineIndex="1"} 56
|
||||
# HELP upsOutputFrequency The present output frequency. - 1.3.6.1.2.1.33.1.4.2
|
||||
# TYPE upsOutputFrequency gauge
|
||||
upsOutputFrequency 500
|
||||
# HELP upsOutputLineIndex The output line identifier. - 1.3.6.1.2.1.33.1.4.4.1.1
|
||||
# TYPE upsOutputLineIndex gauge
|
||||
upsOutputLineIndex{upsOutputLineIndex="1"} 1
|
||||
# HELP upsOutputNumLines The number of output lines utilized in this device - 1.3.6.1.2.1.33.1.4.3
|
||||
# TYPE upsOutputNumLines gauge
|
||||
upsOutputNumLines 1
|
||||
# HELP upsOutputPercentLoad The percentage of the UPS power capacity presently being used on this output line, i.e., the greater of the percent load of true power capacity and the percent load of VA. - 1.3.6.1.2.1.33.1.4.4.1.5
|
||||
# TYPE upsOutputPercentLoad gauge
|
||||
upsOutputPercentLoad{upsOutputLineIndex="1"} 66
|
||||
# HELP upsOutputPower The present output true power. - 1.3.6.1.2.1.33.1.4.4.1.4
|
||||
# TYPE upsOutputPower gauge
|
||||
upsOutputPower{upsOutputLineIndex="1"} 1
|
||||
# HELP upsOutputSource The present source of output power - 1.3.6.1.2.1.33.1.4.1
|
||||
# TYPE upsOutputSource gauge
|
||||
upsOutputSource 3
|
||||
# HELP upsOutputVoltage The present output voltage. - 1.3.6.1.2.1.33.1.4.4.1.2
|
||||
# TYPE upsOutputVoltage gauge
|
||||
upsOutputVoltage{upsOutputLineIndex="1"} 230
|
||||
# HELP upsRebootWithDuration Setting this object will immediately shutdown (i.e., turn off) either the UPS output or the UPS system (as determined by the value of upsShutdownType at the time of shutdown) for a period equal to the indicated number of seconds, after which time the output will be started, including starting the UPS, if necessary - 1.3.6.1.2.1.33.1.8.4
|
||||
# TYPE upsRebootWithDuration gauge
|
||||
upsRebootWithDuration 0
|
||||
# HELP upsSecondsOnBattery If the unit is on battery power, the elapsed time since the UPS last switched to battery power, or the time since the network management subsystem was last restarted, whichever is less - 1.3.6.1.2.1.33.1.2.2
|
||||
# TYPE upsSecondsOnBattery gauge
|
||||
upsSecondsOnBattery 0
|
||||
# HELP upsShutdownAfterDelay Setting this object will shutdown (i.e., turn off) either the UPS output or the UPS system (as determined by the value of upsShutdownType at the time of shutdown) after the indicated number of seconds, or less if the UPS batteries become depleted - 1.3.6.1.2.1.33.1.8.2
|
||||
# TYPE upsShutdownAfterDelay gauge
|
||||
upsShutdownAfterDelay 0
|
||||
# HELP upsShutdownType This object determines the nature of the action to be taken at the time when the countdown of the upsShutdownAfterDelay and upsRebootWithDuration objects reaches zero - 1.3.6.1.2.1.33.1.8.1
|
||||
# TYPE upsShutdownType gauge
|
||||
upsShutdownType 0
|
||||
# HELP upsStartupAfterDelay Setting this object will start the output after the indicated number of seconds, including starting the UPS, if necessary - 1.3.6.1.2.1.33.1.8.3
|
||||
# TYPE upsStartupAfterDelay gauge
|
||||
upsStartupAfterDelay 0
|
||||
# HELP upsTestElapsedTime The amount of time, in TimeTicks, since the test in progress was initiated, or, if no test is in progress, the previous test took to complete - 1.3.6.1.2.1.33.1.7.6
|
||||
# TYPE upsTestElapsedTime gauge
|
||||
upsTestElapsedTime 0
|
||||
# HELP upsTestId The test is named by an OBJECT IDENTIFIER which allows a standard mechanism for the initiation of tests, including the well known tests identified in this document as well as those introduced by a particular implementation, i.e., as documented in the private enterprise MIB definition for the device - 1.3.6.1.2.1.33.1.7.1
|
||||
# TYPE upsTestId gauge
|
||||
upsTestId{upsTestId="0"} 1
|
||||
# HELP upsTestResultsDetail Additional information about upsTestResultsSummary - 1.3.6.1.2.1.33.1.7.4
|
||||
# TYPE upsTestResultsDetail gauge
|
||||
upsTestResultsDetail{upsTestResultsDetail="0"} 1
|
||||
# HELP upsTestResultsSummary The results of the current or last UPS diagnostics test performed - 1.3.6.1.2.1.33.1.7.3
|
||||
# TYPE upsTestResultsSummary gauge
|
||||
upsTestResultsSummary 0
|
||||
# HELP upsTestSpinLock A spin lock on the test subsystem - 1.3.6.1.2.1.33.1.7.2
|
||||
# TYPE upsTestSpinLock gauge
|
||||
upsTestSpinLock 0
|
||||
# HELP upsTestStartTime The value of sysUpTime at the time the test in progress was initiated, or, if no test is in progress, the time the previous test was initiated - 1.3.6.1.2.1.33.1.7.5
|
||||
# TYPE upsTestStartTime gauge
|
||||
upsTestStartTime 0
|
||||
2022
stacks/platform/modules/monitoring/dashboards/ups.json
Normal file
2022
stacks/platform/modules/monitoring/dashboards/ups.json
Normal file
File diff suppressed because it is too large
Load diff
69
stacks/platform/modules/monitoring/grafana.tf
Normal file
69
stacks/platform/modules/monitoring/grafana.tf
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
|
||||
# resource "kubernetes_persistent_volume" "prometheus_grafana_pv" {
|
||||
# metadata {
|
||||
# name = "grafana-pv"
|
||||
# }
|
||||
# spec {
|
||||
# capacity = {
|
||||
# "storage" = "2Gi"
|
||||
# }
|
||||
# access_modes = ["ReadWriteOnce"]
|
||||
# persistent_volume_source {
|
||||
# nfs {
|
||||
# path = "/mnt/main/grafana"
|
||||
# server = "10.0.10.15"
|
||||
# }
|
||||
# # iscsi {
|
||||
# # target_portal = "iscsi.viktorbarzin.lan:3260"
|
||||
# # iqn = "iqn.2020-12.lan.viktorbarzin:storage:monitoring:grafana"
|
||||
# # lun = 0
|
||||
# # fs_type = "ext4"
|
||||
# # }
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
|
||||
resource "kubernetes_persistent_volume" "alertmanager_pv" {
|
||||
metadata {
|
||||
name = "alertmanager-pv"
|
||||
}
|
||||
spec {
|
||||
capacity = {
|
||||
"storage" = "2Gi"
|
||||
}
|
||||
access_modes = ["ReadWriteOnce"]
|
||||
persistent_volume_source {
|
||||
nfs {
|
||||
path = "/mnt/main/alertmanager"
|
||||
server = "10.0.10.15"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
# resource "kubernetes_persistent_volume_claim" "grafana_pvc" {
|
||||
# metadata {
|
||||
# name = "grafana-pvc"
|
||||
# namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||
# }
|
||||
# spec {
|
||||
# access_modes = ["ReadWriteOnce"]
|
||||
# resources {
|
||||
# requests = {
|
||||
# "storage" = "2Gi"
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
|
||||
resource "helm_release" "grafana" {
|
||||
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||
create_namespace = true
|
||||
name = "grafana"
|
||||
atomic = true
|
||||
timeout = 600
|
||||
|
||||
repository = "https://grafana.github.io/helm-charts"
|
||||
chart = "grafana"
|
||||
|
||||
values = [templatefile("${path.module}/grafana_chart_values.yaml", { db_password = var.grafana_db_password, grafana_admin_password = var.grafana_admin_password })]
|
||||
}
|
||||
78
stacks/platform/modules/monitoring/grafana_chart_values.yaml
Normal file
78
stacks/platform/modules/monitoring/grafana_chart_values.yaml
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
deploymentStrategy:
|
||||
type: RollingUpdate
|
||||
replicas: 3
|
||||
adminPassword: "${grafana_admin_password}"
|
||||
resources:
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 128Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 512Mi
|
||||
persistence:
|
||||
enabled: false # using external mysql
|
||||
existingClaim: "grafana-pvc"
|
||||
ingress:
|
||||
enabled: "true"
|
||||
ingressClassName: "traefik"
|
||||
annotations:
|
||||
traefik.ingress.kubernetes.io/router.middlewares: "traefik-rate-limit@kubernetescrd,traefik-csp-headers@kubernetescrd,traefik-crowdsec@kubernetescrd"
|
||||
traefik.ingress.kubernetes.io/router.entrypoints: "websecure"
|
||||
tls:
|
||||
- secretName: "tls-secret"
|
||||
hosts:
|
||||
- "grafana.viktorbarzin.me"
|
||||
hosts:
|
||||
- "grafana.viktorbarzin.me"
|
||||
sidecar:
|
||||
datasources:
|
||||
enabled: "true"
|
||||
dashboards:
|
||||
enabled: true
|
||||
label: "grafana_dashboard"
|
||||
dashboardProviders:
|
||||
dashboardproviders.yaml:
|
||||
apiVersion: 1
|
||||
name: default
|
||||
ordId: 1
|
||||
# folder: ""
|
||||
type: "file"
|
||||
# disableDeletion: "false"
|
||||
# editable: "true"
|
||||
options:
|
||||
path: "/var/lib/grafana/dashboards/default"
|
||||
env:
|
||||
GF_DATABASE_PASSWORD: "${db_password}"
|
||||
GF_SERVER_ROOT_URL: https://grafana.viktorbarzin.me
|
||||
|
||||
grafana.ini:
|
||||
database:
|
||||
type: mysql
|
||||
host: mysql.dbaas.svc.cluster.local:3306
|
||||
name: grafana
|
||||
user: grafana
|
||||
password: $__env{GF_DATABASE_PASSWORD}
|
||||
ssl_mode: disable
|
||||
auth.anonymous:
|
||||
enabled: true
|
||||
org_role: Viewer
|
||||
# auth.google:
|
||||
# enabled: true
|
||||
analytics:
|
||||
check_for_updates: "true"
|
||||
grafana_net:
|
||||
url: "https://grafana.net"
|
||||
log:
|
||||
mode: "console"
|
||||
paths:
|
||||
data: "/var/lib/grafana/data"
|
||||
logs: "/var/log/grafana"
|
||||
plugins: "/var/lib/grafana/plugins"
|
||||
provisioning: "/etc/grafana/provisioning"
|
||||
security:
|
||||
allow_embedding: true # Allow to be iframed
|
||||
|
||||
# url: https://grafana.com/api/dashboards/11074/revisions/2/download
|
||||
# datasources:
|
||||
# - name: Prometheus
|
||||
# url: http://prometheus-server
|
||||
123
stacks/platform/modules/monitoring/idrac.tf
Normal file
123
stacks/platform/modules/monitoring/idrac.tf
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
|
||||
resource "kubernetes_config_map" "redfish-config" {
|
||||
metadata {
|
||||
name = "redfish-exporter-config"
|
||||
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||
|
||||
annotations = {
|
||||
"reloader.stakater.com/match" = "true"
|
||||
}
|
||||
}
|
||||
data = {
|
||||
"config.yml" = <<-EOF
|
||||
address: 0.0.0.0
|
||||
port: 9610
|
||||
hosts:
|
||||
${var.idrac_host}:
|
||||
username: ${var.idrac_username}
|
||||
password: ${var.idrac_password}
|
||||
default:
|
||||
username: root
|
||||
password: calvin
|
||||
metrics:
|
||||
all: true
|
||||
# system: true
|
||||
# sensors: true
|
||||
# power: true
|
||||
# sel: false # Disable SEL - often slow
|
||||
# storage: true # Disable storage - slowest endpoint
|
||||
# memory: true
|
||||
# network: false # Disable network adapters
|
||||
# firmware: false # Don't need this frequently
|
||||
EOF
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_deployment" "idrac-redfish" {
|
||||
metadata {
|
||||
name = "idrac-redfish-exporter"
|
||||
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||
labels = {
|
||||
app = "idrac-redfish-exporter"
|
||||
tier = var.tier
|
||||
}
|
||||
annotations = {
|
||||
"reloader.stakater.com/search" = "true"
|
||||
}
|
||||
}
|
||||
spec {
|
||||
replicas = 1
|
||||
selector {
|
||||
match_labels = {
|
||||
app = "idrac-redfish-exporter"
|
||||
}
|
||||
}
|
||||
template {
|
||||
metadata {
|
||||
labels = {
|
||||
app = "idrac-redfish-exporter"
|
||||
}
|
||||
}
|
||||
spec {
|
||||
priority_class_name = "tier-1-cluster"
|
||||
container {
|
||||
# https://github.com/mrlhansen/idrac_exporter?tab=readme-ov-file
|
||||
image = "ghcr.io/mrlhansen/idrac_exporter:latest"
|
||||
name = "redfish-exporter"
|
||||
port {
|
||||
container_port = 9610
|
||||
}
|
||||
|
||||
volume_mount {
|
||||
name = "redfish-exporter-config"
|
||||
mount_path = "/etc/prometheus/idrac.yml"
|
||||
sub_path = "config.yml"
|
||||
}
|
||||
}
|
||||
volume {
|
||||
name = "redfish-exporter-config"
|
||||
config_map {
|
||||
name = "redfish-exporter-config"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_service" "idrac-redfish-exporter" {
|
||||
metadata {
|
||||
name = "idrac-redfish-exporter"
|
||||
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||
labels = {
|
||||
"app" = "idrac-redfish-exporter"
|
||||
}
|
||||
# annotations = {
|
||||
# "prometheus.io/scrape" = "true"
|
||||
# "prometheus.io/path" = "/metrics"
|
||||
# "prometheus.io/port" = "9090"
|
||||
# }
|
||||
}
|
||||
|
||||
spec {
|
||||
selector = {
|
||||
"app" = "idrac-redfish-exporter"
|
||||
}
|
||||
port {
|
||||
name = "http"
|
||||
port = "9090"
|
||||
target_port = "9610"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module "idrac-redfish-exporter-ingress" {
|
||||
source = "../../../../modules/kubernetes/ingress_factory"
|
||||
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||
name = "idrac-redfish-exporter"
|
||||
root_domain = "viktorbarzin.lan"
|
||||
tls_secret_name = var.tls_secret_name
|
||||
allow_local_access_only = true
|
||||
ssl_redirect = false
|
||||
port = 9090
|
||||
}
|
||||
|
|
@ -0,0 +1,78 @@
|
|||
---
|
||||
cluster:
|
||||
name: default
|
||||
|
||||
destinations:
|
||||
- name: loki
|
||||
type: loki
|
||||
url: http://loki-gateway.monitoring.svc.cluster.local/loki/api/v1/push
|
||||
|
||||
clusterEvents:
|
||||
enabled: false
|
||||
collector: alloy-logs
|
||||
namespaces:
|
||||
- dbaas
|
||||
- immich
|
||||
- authentik
|
||||
- mailserver
|
||||
- crowdsec
|
||||
- descheduler
|
||||
- calibre
|
||||
- monitoring
|
||||
- ingress-nginx
|
||||
- vaultwarden
|
||||
|
||||
nodeLogs:
|
||||
enabled: false
|
||||
|
||||
podLogs:
|
||||
enabled: true
|
||||
gatherMethod: kubernetesApi
|
||||
collector: alloy-logs
|
||||
labelsToKeep:
|
||||
[
|
||||
"app_kubernetes_io_name",
|
||||
"container",
|
||||
"instance",
|
||||
"job",
|
||||
"level",
|
||||
"namespace",
|
||||
"service_name",
|
||||
"service_namespace",
|
||||
"deployment_environment",
|
||||
"deployment_environment_name",
|
||||
]
|
||||
structuredMetadata:
|
||||
pod: pod # Set structured metadata "pod" from label "pod"
|
||||
namespaces:
|
||||
- dbaas
|
||||
- immich
|
||||
- authentik
|
||||
- mailserver
|
||||
- crowdsec
|
||||
- descheduler
|
||||
- calibre
|
||||
- monitoring
|
||||
- ingress-nginx
|
||||
- vaultwarden
|
||||
# Collectors
|
||||
alloy-singleton:
|
||||
enabled: false
|
||||
|
||||
alloy-metrics:
|
||||
enabled: false
|
||||
|
||||
alloy-logs:
|
||||
enabled: true
|
||||
# Required when using the Kubernetes API to pod logs
|
||||
alloy:
|
||||
mounts:
|
||||
varlog: false
|
||||
clustering:
|
||||
enabled: true
|
||||
|
||||
alloy-profiles:
|
||||
enabled: false
|
||||
|
||||
alloy-receiver:
|
||||
enabled: false
|
||||
186
stacks/platform/modules/monitoring/loki.tf
Normal file
186
stacks/platform/modules/monitoring/loki.tf
Normal file
|
|
@ -0,0 +1,186 @@
|
|||
resource "helm_release" "loki" {
|
||||
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||
create_namespace = true
|
||||
name = "loki"
|
||||
|
||||
repository = "https://grafana.github.io/helm-charts"
|
||||
chart = "loki"
|
||||
|
||||
values = [templatefile("${path.module}/loki.yaml", {})]
|
||||
timeout = 600
|
||||
|
||||
depends_on = [kubernetes_config_map.loki_alert_rules]
|
||||
}
|
||||
|
||||
resource "kubernetes_persistent_volume" "loki" {
|
||||
metadata {
|
||||
name = "loki"
|
||||
}
|
||||
spec {
|
||||
capacity = {
|
||||
storage = "15Gi"
|
||||
}
|
||||
access_modes = ["ReadWriteOnce"]
|
||||
persistent_volume_source {
|
||||
nfs {
|
||||
path = "/mnt/main/loki/loki"
|
||||
server = "10.0.10.15"
|
||||
}
|
||||
}
|
||||
persistent_volume_reclaim_policy = "Retain"
|
||||
volume_mode = "Filesystem"
|
||||
}
|
||||
}
|
||||
|
||||
# https://grafana.com/docs/alloy/latest/configure/kubernetes/
|
||||
resource "helm_release" "alloy" {
|
||||
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||
create_namespace = true
|
||||
name = "alloy"
|
||||
|
||||
repository = "https://grafana.github.io/helm-charts"
|
||||
chart = "alloy"
|
||||
|
||||
values = [file("${path.module}/alloy.yaml")]
|
||||
atomic = true
|
||||
|
||||
depends_on = [helm_release.loki]
|
||||
}
|
||||
|
||||
resource "kubernetes_daemon_set_v1" "sysctl-inotify" {
|
||||
metadata {
|
||||
name = "sysctl-inotify"
|
||||
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||
labels = {
|
||||
app = "sysctl-inotify"
|
||||
}
|
||||
}
|
||||
spec {
|
||||
selector {
|
||||
match_labels = {
|
||||
app = "sysctl-inotify"
|
||||
}
|
||||
}
|
||||
template {
|
||||
metadata {
|
||||
labels = {
|
||||
app = "sysctl-inotify"
|
||||
}
|
||||
}
|
||||
spec {
|
||||
init_container {
|
||||
name = "sysctl"
|
||||
image = "busybox:1.37"
|
||||
command = [
|
||||
"sh", "-c",
|
||||
"sysctl -w fs.inotify.max_user_watches=1048576 && sysctl -w fs.inotify.max_user_instances=8192 && sysctl -w fs.inotify.max_queued_events=1048576"
|
||||
]
|
||||
security_context {
|
||||
privileged = true
|
||||
}
|
||||
}
|
||||
container {
|
||||
name = "pause"
|
||||
image = "registry.k8s.io/pause:3.10"
|
||||
resources {
|
||||
requests = {
|
||||
cpu = "1m"
|
||||
memory = "4Mi"
|
||||
}
|
||||
limits = {
|
||||
cpu = "1m"
|
||||
memory = "4Mi"
|
||||
}
|
||||
}
|
||||
}
|
||||
host_pid = true
|
||||
toleration {
|
||||
operator = "Exists"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# resource "helm_release" "k8s-monitoring" {
|
||||
# namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||
# create_namespace = true
|
||||
# name = "k8s-monitoring"
|
||||
|
||||
# repository = "https://grafana.github.io/helm-charts"
|
||||
# chart = "k8s-monitoring"
|
||||
|
||||
# values = [templatefile("${path.module}/k8s-monitoring-values.yaml", {})]
|
||||
# atomic = true
|
||||
# }
|
||||
|
||||
resource "kubernetes_config_map" "loki_alert_rules" {
|
||||
metadata {
|
||||
name = "loki-alert-rules"
|
||||
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||
}
|
||||
data = {
|
||||
"rules.yaml" = yamlencode({
|
||||
groups = [{
|
||||
name = "log-alerts"
|
||||
rules = [
|
||||
{
|
||||
alert = "HighErrorRate"
|
||||
expr = "sum(rate({namespace=~\".+\"} |= \"error\" [5m])) by (namespace) > 10"
|
||||
for = "5m"
|
||||
labels = {
|
||||
severity = "warning"
|
||||
}
|
||||
annotations = {
|
||||
summary = "High error rate in {{ $labels.namespace }}"
|
||||
}
|
||||
},
|
||||
{
|
||||
alert = "PodCrashLoopBackOff"
|
||||
expr = "count_over_time({namespace=~\".+\"} |= \"CrashLoopBackOff\" [5m]) > 0"
|
||||
for = "1m"
|
||||
labels = {
|
||||
severity = "critical"
|
||||
}
|
||||
annotations = {
|
||||
summary = "CrashLoopBackOff detected in {{ $labels.namespace }}"
|
||||
}
|
||||
},
|
||||
{
|
||||
alert = "OOMKilled"
|
||||
expr = "count_over_time({namespace=~\".+\"} |= \"OOMKilled\" [5m]) > 0"
|
||||
for = "1m"
|
||||
labels = {
|
||||
severity = "critical"
|
||||
}
|
||||
annotations = {
|
||||
summary = "OOMKilled detected in {{ $labels.namespace }}"
|
||||
}
|
||||
}
|
||||
]
|
||||
}]
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_config_map" "grafana_loki_datasource" {
|
||||
metadata {
|
||||
name = "grafana-loki-datasource"
|
||||
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||
labels = {
|
||||
grafana_datasource = "1"
|
||||
}
|
||||
}
|
||||
data = {
|
||||
"loki-datasource.yaml" = yamlencode({
|
||||
apiVersion = 1
|
||||
datasources = [{
|
||||
name = "Loki"
|
||||
type = "loki"
|
||||
access = "proxy"
|
||||
url = "http://loki.monitoring.svc.cluster.local:3100"
|
||||
isDefault = false
|
||||
}]
|
||||
})
|
||||
}
|
||||
}
|
||||
110
stacks/platform/modules/monitoring/loki.yaml
Normal file
110
stacks/platform/modules/monitoring/loki.yaml
Normal file
|
|
@ -0,0 +1,110 @@
|
|||
loki:
|
||||
commonConfig:
|
||||
replication_factor: 1
|
||||
schemaConfig:
|
||||
configs:
|
||||
- from: "2025-04-01"
|
||||
store: tsdb
|
||||
object_store: filesystem
|
||||
schema: v13
|
||||
index:
|
||||
prefix: loki_index_
|
||||
period: 24h
|
||||
ingester:
|
||||
chunk_idle_period: 12h
|
||||
max_chunk_age: 24h
|
||||
chunk_retain_period: 1m
|
||||
chunk_target_size: 1572864
|
||||
wal:
|
||||
dir: /loki-wal
|
||||
pattern_ingester:
|
||||
enabled: true
|
||||
limits_config:
|
||||
allow_structured_metadata: true
|
||||
volume_enabled: true
|
||||
retention_period: 168h
|
||||
compactor:
|
||||
retention_enabled: true
|
||||
working_directory: /var/loki/compactor
|
||||
compaction_interval: 1h
|
||||
delete_request_store: filesystem
|
||||
ruler:
|
||||
enable_api: true
|
||||
storage:
|
||||
type: local
|
||||
local:
|
||||
directory: /loki/rules
|
||||
alertmanager_url: http://prometheus-alertmanager.monitoring.svc.cluster.local:9093
|
||||
ring:
|
||||
kvstore:
|
||||
store: inmemory
|
||||
rule_path: /var/loki/scratch
|
||||
storage:
|
||||
type: "filesystem"
|
||||
auth_enabled: false
|
||||
|
||||
minio:
|
||||
enabled: false
|
||||
|
||||
deploymentMode: SingleBinary
|
||||
|
||||
singleBinary:
|
||||
replicas: 1
|
||||
persistence:
|
||||
enabled: true
|
||||
size: 15Gi
|
||||
storageClass: ""
|
||||
extraVolumes:
|
||||
- name: wal
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 2Gi
|
||||
- name: rules
|
||||
configMap:
|
||||
name: loki-alert-rules
|
||||
extraVolumeMounts:
|
||||
- name: wal
|
||||
mountPath: /loki-wal
|
||||
- name: rules
|
||||
mountPath: /loki/rules/fake
|
||||
resources:
|
||||
requests:
|
||||
cpu: 250m
|
||||
memory: 4Gi
|
||||
limits:
|
||||
cpu: "1"
|
||||
memory: 6Gi
|
||||
|
||||
# Zero out replica counts of other deployment modes
|
||||
backend:
|
||||
replicas: 0
|
||||
read:
|
||||
replicas: 0
|
||||
write:
|
||||
replicas: 0
|
||||
ingester:
|
||||
replicas: 0
|
||||
querier:
|
||||
replicas: 0
|
||||
queryFrontend:
|
||||
replicas: 0
|
||||
queryScheduler:
|
||||
replicas: 0
|
||||
distributor:
|
||||
replicas: 0
|
||||
compactor:
|
||||
replicas: 0
|
||||
indexGateway:
|
||||
replicas: 0
|
||||
bloomCompactor:
|
||||
replicas: 0
|
||||
bloomGateway:
|
||||
replicas: 0
|
||||
|
||||
# Disable optional components for single binary mode
|
||||
gateway:
|
||||
enabled: false
|
||||
chunksCache:
|
||||
enabled: false
|
||||
resultsCache:
|
||||
enabled: false
|
||||
202
stacks/platform/modules/monitoring/main.tf
Normal file
202
stacks/platform/modules/monitoring/main.tf
Normal file
|
|
@ -0,0 +1,202 @@
|
|||
variable "tls_secret_name" {}
|
||||
variable "alertmanager_account_password" {}
|
||||
variable "idrac_host" {
|
||||
default = "192.168.1.4"
|
||||
}
|
||||
variable "idrac_username" {
|
||||
default = "root"
|
||||
}
|
||||
variable "idrac_password" {
|
||||
default = "calvin"
|
||||
}
|
||||
variable "alertmanager_slack_api_url" {}
|
||||
variable "tiny_tuya_service_secret" { type = string }
|
||||
variable "haos_api_token" { type = string }
|
||||
variable "pve_password" { type = string }
|
||||
variable "grafana_db_password" { type = string }
|
||||
variable "grafana_admin_password" { type = string }
|
||||
variable "tier" { type = string }
|
||||
|
||||
resource "kubernetes_namespace" "monitoring" {
|
||||
metadata {
|
||||
name = "monitoring"
|
||||
labels = {
|
||||
"istio-injection" : "disabled"
|
||||
tier = var.tier
|
||||
"resource-governance/custom-quota" = "true"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module "tls_secret" {
|
||||
source = "../../../../modules/kubernetes/setup_tls_secret"
|
||||
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||
tls_secret_name = var.tls_secret_name
|
||||
}
|
||||
# Terraform get angry with the 30k values file :/ use ansible until solved
|
||||
# resource "helm_release" "ups_prometheus_snmp_exporter" {
|
||||
# namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||
# create_namespace = true
|
||||
# name = "ups_prometheus_exporter"
|
||||
|
||||
# repository = "https://prometheus-community.github.io/helm-charts"
|
||||
# chart = "prometheus-snmp-exporter"
|
||||
|
||||
# values = [file("${path.module}/ups_snmp_values.yaml")]
|
||||
# }
|
||||
|
||||
|
||||
|
||||
resource "kubernetes_cron_job_v1" "monitor_prom" {
|
||||
metadata {
|
||||
name = "monitor-prometheus"
|
||||
}
|
||||
spec {
|
||||
concurrency_policy = "Replace"
|
||||
failed_jobs_history_limit = 5
|
||||
schedule = "*/30 * * * *"
|
||||
job_template {
|
||||
metadata {
|
||||
|
||||
}
|
||||
spec {
|
||||
template {
|
||||
metadata {
|
||||
|
||||
}
|
||||
spec {
|
||||
container {
|
||||
name = "monitor-prometheus"
|
||||
image = "alpine"
|
||||
command = ["/bin/sh", "-c", "apk add --update curl && curl --connect-timeout 2 prometheus-server.monitoring.svc.cluster.local || curl https://webhook.viktorbarzin.me/fb/message-viktor -d 'Prometheus is down!'"]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_manifest" "status_redirect_middleware" {
|
||||
manifest = {
|
||||
apiVersion = "traefik.io/v1alpha1"
|
||||
kind = "Middleware"
|
||||
metadata = {
|
||||
name = "status-redirect"
|
||||
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||
}
|
||||
spec = {
|
||||
redirectRegex = {
|
||||
regex = ".*"
|
||||
replacement = "https://hetrixtools.com/r/38981b548b5d38b052aca8d01285a3f3/"
|
||||
permanent = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_ingress_v1" "status" {
|
||||
metadata {
|
||||
name = "hetrix-redirect-ingress"
|
||||
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||
annotations = {
|
||||
"traefik.ingress.kubernetes.io/router.middlewares" = "monitoring-status-redirect@kubernetescrd"
|
||||
"traefik.ingress.kubernetes.io/router.entrypoints" = "websecure"
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
ingress_class_name = "traefik"
|
||||
tls {
|
||||
hosts = ["status.viktorbarzin.me"]
|
||||
secret_name = var.tls_secret_name
|
||||
}
|
||||
rule {
|
||||
host = "status.viktorbarzin.me"
|
||||
http {
|
||||
path {
|
||||
path = "/"
|
||||
backend {
|
||||
service {
|
||||
name = "not-used"
|
||||
port {
|
||||
number = 80 # redirected by middleware
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_manifest" "yotovski_redirect_middleware" {
|
||||
manifest = {
|
||||
apiVersion = "traefik.io/v1alpha1"
|
||||
kind = "Middleware"
|
||||
metadata = {
|
||||
name = "yotovski-redirect"
|
||||
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||
}
|
||||
spec = {
|
||||
redirectRegex = {
|
||||
regex = ".*"
|
||||
replacement = "https://hetrixtools.com/r/2ba9d7a5e017794db0fd91f0115a8b3b/"
|
||||
permanent = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_ingress_v1" "status_yotovski" {
|
||||
metadata {
|
||||
name = "hetrix-yotovski-redirect-ingress"
|
||||
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||
annotations = {
|
||||
"traefik.ingress.kubernetes.io/router.middlewares" = "monitoring-yotovski-redirect@kubernetescrd"
|
||||
"traefik.ingress.kubernetes.io/router.entrypoints" = "websecure"
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
ingress_class_name = "traefik"
|
||||
tls {
|
||||
hosts = ["yotovski-status.viktorbarzin.me"]
|
||||
secret_name = var.tls_secret_name
|
||||
}
|
||||
rule {
|
||||
host = "yotovski-status.viktorbarzin.me"
|
||||
http {
|
||||
path {
|
||||
path = "/"
|
||||
backend {
|
||||
service {
|
||||
name = "not-used" # redirected by middleware
|
||||
port {
|
||||
number = 80
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Custom ResourceQuota for monitoring — larger than the default 1-cluster tier quota
|
||||
# because monitoring runs 29+ pods (Prometheus, Grafana, Loki, Alloy, exporters, etc.)
|
||||
resource "kubernetes_resource_quota" "monitoring" {
|
||||
metadata {
|
||||
name = "monitoring-quota"
|
||||
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||
}
|
||||
spec {
|
||||
hard = {
|
||||
"requests.cpu" = "16"
|
||||
"requests.memory" = "16Gi"
|
||||
"limits.cpu" = "80"
|
||||
"limits.memory" = "160Gi"
|
||||
pods = "100"
|
||||
}
|
||||
}
|
||||
}
|
||||
58
stacks/platform/modules/monitoring/prometheus.tf
Normal file
58
stacks/platform/modules/monitoring/prometheus.tf
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
|
||||
resource "kubernetes_persistent_volume_claim" "prometheus_server_pvc" {
|
||||
metadata {
|
||||
name = "prometheus-iscsi-pvc"
|
||||
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||
}
|
||||
|
||||
spec {
|
||||
access_modes = ["ReadWriteOnce"]
|
||||
resources {
|
||||
requests = {
|
||||
storage = "15Gi"
|
||||
}
|
||||
}
|
||||
# storage_class_name = "standard"
|
||||
volume_name = "prometheus-iscsi-pv"
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_persistent_volume" "prometheus_server_pvc" {
|
||||
metadata {
|
||||
name = "prometheus-iscsi-pv"
|
||||
}
|
||||
spec {
|
||||
capacity = {
|
||||
storage = "15Gi"
|
||||
}
|
||||
access_modes = ["ReadWriteOnce"]
|
||||
persistent_volume_source {
|
||||
nfs {
|
||||
path = "/mnt/main/prometheus"
|
||||
server = "10.0.10.15"
|
||||
}
|
||||
# iscsi {
|
||||
# fs_type = "ext4"
|
||||
# iqn = "iqn.2020-12.lan.viktorbarzin:storage:monitoring:prometheus"
|
||||
# lun = 0
|
||||
# target_portal = "iscsi.viktorbarzin.me:3260"
|
||||
# }
|
||||
|
||||
}
|
||||
persistent_volume_reclaim_policy = "Retain"
|
||||
volume_mode = "Filesystem"
|
||||
}
|
||||
}
|
||||
|
||||
resource "helm_release" "prometheus" {
|
||||
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||
create_namespace = true
|
||||
name = "prometheus"
|
||||
|
||||
repository = "https://prometheus-community.github.io/helm-charts"
|
||||
chart = "prometheus"
|
||||
# version = "15.0.2"
|
||||
version = "25.8.2"
|
||||
|
||||
values = [templatefile("${path.module}/prometheus_chart_values.tpl", { alertmanager_mail_pass = var.alertmanager_account_password, alertmanager_slack_api_url = var.alertmanager_slack_api_url, tuya_api_key = var.tiny_tuya_service_secret, haos_api_token = var.haos_api_token })]
|
||||
}
|
||||
815
stacks/platform/modules/monitoring/prometheus_chart_values.tpl
Executable file
815
stacks/platform/modules/monitoring/prometheus_chart_values.tpl
Executable file
|
|
@ -0,0 +1,815 @@
|
|||
# Helm values
|
||||
# all values - https://github.com/prometheus-community/helm-charts/blob/main/charts/prometheus/values.yaml
|
||||
alertmanager:
|
||||
persistentVolume:
|
||||
enabled: true
|
||||
existingClaim: alertmanager-pvc
|
||||
#existingClaim: alertmanager-iscsi-pvc
|
||||
# storageClass: rook-cephfs
|
||||
strategy:
|
||||
type: Recreate
|
||||
baseURL: "https://alertmanager.viktorbarzin.me"
|
||||
ingress:
|
||||
enabled: true
|
||||
ingressClassName: "traefik"
|
||||
annotations:
|
||||
traefik.ingress.kubernetes.io/router.middlewares: "traefik-rate-limit@kubernetescrd,traefik-csp-headers@kubernetescrd,traefik-crowdsec@kubernetescrd,traefik-authentik-forward-auth@kubernetescrd"
|
||||
traefik.ingress.kubernetes.io/router.entrypoints: "websecure"
|
||||
tls:
|
||||
- secretName: "tls-secret"
|
||||
hosts:
|
||||
- "alertmanager.viktorbarzin.me"
|
||||
hosts:
|
||||
# - alertmanager.viktorbarzin.me
|
||||
- host: alertmanager.viktorbarzin.me
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
serviceName: prometheus-server
|
||||
servicePort: 80
|
||||
config:
|
||||
enabled: true
|
||||
global:
|
||||
smtp_from: "alertmanager@viktorbarzin.me"
|
||||
# smtp_smarthost: "smtp.viktorbarzin.me:587"
|
||||
smtp_smarthost: "mailserver.mailserver.svc.cluster.local:587"
|
||||
smtp_auth_username: "alertmanager@viktorbarzin.me"
|
||||
smtp_auth_password: "${alertmanager_mail_pass}"
|
||||
smtp_require_tls: true
|
||||
slack_api_url: "${alertmanager_slack_api_url}"
|
||||
# templates:
|
||||
# - "/etc/alertmanager/template/*.tmpl"
|
||||
route:
|
||||
group_by: ["alertname"]
|
||||
group_wait: 30s
|
||||
group_interval: 5m
|
||||
repeat_interval: 4h
|
||||
receiver: slack-warning
|
||||
routes:
|
||||
- receiver: slack-critical
|
||||
group_wait: 10s
|
||||
group_interval: 1m
|
||||
repeat_interval: 1h
|
||||
matchers:
|
||||
- severity = critical
|
||||
continue: false
|
||||
- receiver: slack-info
|
||||
group_wait: 5m
|
||||
group_interval: 30m
|
||||
repeat_interval: 12h
|
||||
matchers:
|
||||
- severity = info
|
||||
continue: false
|
||||
inhibit_rules:
|
||||
# Node down makes node-condition alerts redundant
|
||||
- source_matchers:
|
||||
- alertname = NodeDown
|
||||
target_matchers:
|
||||
- alertname =~ "NodeNotReady|NodeConditionBad"
|
||||
# Traefik down makes service-level alerts noise
|
||||
- source_matchers:
|
||||
- alertname = TraefikDown
|
||||
target_matchers:
|
||||
- alertname =~ "HighServiceErrorRate|HighService4xxRate|HighServiceLatency|TraefikHighOpenConnections"
|
||||
# Power outage makes on-battery alert redundant
|
||||
- source_matchers:
|
||||
- alertname = PowerOutage
|
||||
target_matchers:
|
||||
- alertname = OnBattery
|
||||
receivers:
|
||||
- name: slack-critical
|
||||
slack_configs:
|
||||
- send_resolved: true
|
||||
channel: "#alerts"
|
||||
color: '{{ if eq .Status "firing" }}danger{{ else }}good{{ end }}'
|
||||
title: '{{ if eq .Status "firing" }}[CRITICAL]{{ else }}[RESOLVED]{{ end }} {{ .GroupLabels.alertname }} ({{ .Alerts | len }})'
|
||||
text: '{{ range .Alerts }}• {{ .Annotations.summary }}{{ "\n" }}{{ end }}'
|
||||
- name: slack-warning
|
||||
slack_configs:
|
||||
- send_resolved: true
|
||||
channel: "#alerts"
|
||||
color: '{{ if eq .Status "firing" }}warning{{ else }}good{{ end }}'
|
||||
title: '{{ if eq .Status "firing" }}[WARNING]{{ else }}[RESOLVED]{{ end }} {{ .GroupLabels.alertname }} ({{ .Alerts | len }})'
|
||||
text: '{{ range .Alerts }}• {{ .Annotations.summary }}{{ "\n" }}{{ end }}'
|
||||
- name: slack-info
|
||||
slack_configs:
|
||||
- send_resolved: true
|
||||
channel: "#alerts"
|
||||
color: '{{ if eq .Status "firing" }}#439FE0{{ else }}good{{ end }}'
|
||||
title: '[INFO] {{ .GroupLabels.alertname }}'
|
||||
text: '{{ range .Alerts }}• {{ .Annotations.summary }}{{ "\n" }}{{ end }}'
|
||||
# web.external-url seems to be hardcoded, edited deployment manually
|
||||
# extraArgs:
|
||||
# web.external-url: "https://prometheus.viktorbarzin.me"
|
||||
# prometheus-node-exporter:
|
||||
# enabled: true
|
||||
server:
|
||||
# Enable me to delete metrics
|
||||
extraFlags:
|
||||
# - "web.enable-admin-api"
|
||||
- "web.enable-lifecycle"
|
||||
- "storage.tsdb.allow-overlapping-blocks"
|
||||
- "storage.tsdb.retention.size=45GB"
|
||||
- "storage.tsdb.wal-compression"
|
||||
persistentVolume:
|
||||
# enabled: false
|
||||
existingClaim: prometheus-iscsi-pvc
|
||||
# storageClass: rook-cephfs
|
||||
retention: "52w"
|
||||
strategy:
|
||||
type: Recreate
|
||||
baseURL: "https://prometheus.viktorbarzin.me"
|
||||
extraVolumes:
|
||||
- name: prometheus-wal-tmpfs
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 2Gi
|
||||
# 2. Mount it over the WAL directory
|
||||
extraVolumeMounts:
|
||||
- name: prometheus-wal-tmpfs
|
||||
mountPath: /data/wal # Standard path for the chart
|
||||
ingress:
|
||||
enabled: true
|
||||
ingressClassName: "traefik"
|
||||
annotations:
|
||||
traefik.ingress.kubernetes.io/router.middlewares: "traefik-rate-limit@kubernetescrd,traefik-csp-headers@kubernetescrd,traefik-crowdsec@kubernetescrd,traefik-authentik-forward-auth@kubernetescrd"
|
||||
traefik.ingress.kubernetes.io/router.entrypoints: "websecure"
|
||||
|
||||
gethomepage.dev/enabled: "true"
|
||||
gethomepage.dev/description: "Prometheus"
|
||||
gethomepage.dev/icon: "prometheus.png"
|
||||
gethomepage.dev/name: "Prometheus"
|
||||
gethomepage.dev/widget.type: "prometheus"
|
||||
gethomepage.dev/widget.url: "http://prometheus-server.monitoring.svc.cluster.local:80"
|
||||
gethomepage.dev/pod-selector: ""
|
||||
tls:
|
||||
- secretName: "tls-secret"
|
||||
hosts:
|
||||
- "prometheus.viktorbarzin.me"
|
||||
hosts:
|
||||
- "prometheus.viktorbarzin.me"
|
||||
alertmanagers:
|
||||
- static_configs:
|
||||
- targets:
|
||||
- "prometheus-alertmanager.monitoring.svc.cluster.local:9093"
|
||||
# - "alertmanager.viktorbarzin.me"
|
||||
tls_config:
|
||||
insecure_skip_verify: true
|
||||
|
||||
serverFiles:
|
||||
# prometheus.yml:
|
||||
# storage:
|
||||
# tsdb:
|
||||
# # no_lockfile: true
|
||||
# # max_blocks_in_cache: 100000
|
||||
# # max_lookback_duration: 0s
|
||||
# # min_block_duration: 2h
|
||||
# # retention: 15d
|
||||
# # chunk_encoding: 1
|
||||
# # chunk_range: 1h
|
||||
# # max_chunks_to_persist: 4800
|
||||
# # chunks_to_persist: 4800
|
||||
# cache:
|
||||
# entries: 5000
|
||||
# head:
|
||||
# chunk_bytes: 1048576
|
||||
# # wal:
|
||||
# # compressions: 1
|
||||
# # flush_after_seconds: 30
|
||||
# # segment_size: 1073741824
|
||||
# series_file:
|
||||
# # no_sync: true
|
||||
# # max_concurrent_writes: 256
|
||||
# # block_size: 262144
|
||||
# cache:
|
||||
# max_size: 1073741824
|
||||
|
||||
# alertingaaa:
|
||||
# alertmanagers:
|
||||
# - static_configs:
|
||||
# targets: "alertmanager.viktorbarzin.lan"
|
||||
alerting_rules.yml:
|
||||
groups:
|
||||
- name: R730 Host
|
||||
rules:
|
||||
- alert: HighCPUTemperature
|
||||
expr: node_hwmon_temp_celsius{instance="pve-node-r730"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance="pve-node-r730"} > 75
|
||||
for: 30m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "CPU temp: {{ $value | printf \"%.0f\" }}°C (threshold: 75°C)"
|
||||
- alert: SSDHighWriteRate
|
||||
expr: rate(node_disk_written_bytes_total{job="proxmox-host", device="sdb"}[2m]) / 1024 / 1024 > 2 # sdb is SSD; value in MB
|
||||
for: 10m
|
||||
labels:
|
||||
severity: info
|
||||
annotations:
|
||||
summary: "SSD write rate: {{ $value | printf \"%.1f\" }} MB/s (threshold: 2 MB/s)"
|
||||
- alert: HDDHighWriteRate
|
||||
expr: rate(node_disk_written_bytes_total{job="proxmox-host", device="sdc"}[2m]) / 1024 / 1024 > 10 # sdc is 11TB HDD; value in MB
|
||||
for: 20m
|
||||
labels:
|
||||
severity: info
|
||||
annotations:
|
||||
summary: "HDD write rate: {{ $value | printf \"%.1f\" }} MB/s (threshold: 10 MB/s)"
|
||||
- alert: NoiDRACData
|
||||
expr: (max(r730_idrac_idrac_system_health + 1) or on() vector(0)) == 0
|
||||
for: 30m
|
||||
labels:
|
||||
severity: info
|
||||
annotations:
|
||||
summary: "No iDRAC data for 30m - check Prometheus scraping"
|
||||
- alert: HighSystemLoad
|
||||
expr: scalar(node_load1{instance="pve-node-r730"}) * 100 / count(count(node_cpu_seconds_total{instance="pve-node-r730"}) by (cpu)) > 50
|
||||
for: 30m
|
||||
labels:
|
||||
severity: info
|
||||
annotations:
|
||||
summary: "System load: {{ $value | printf \"%.0f\" }}% (threshold: 50%)"
|
||||
- alert: FanFailure
|
||||
expr: r730_idrac_redfish_chassis_fan_health != 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Fan unhealthy on R730 - check iDRAC"
|
||||
- name: Nvidia Tesla T4 GPU
|
||||
rules:
|
||||
- alert: HighGPUTemp
|
||||
expr: nvidia_tesla_t4_DCGM_FI_DEV_GPU_TEMP > 65
|
||||
for: 1m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "GPU temp: {{ $value | printf \"%.0f\" }}°C (threshold: 65°C)"
|
||||
- alert: HighPowerUsage
|
||||
expr: nvidia_tesla_t4_DCGM_FI_DEV_POWER_USAGE > 50
|
||||
for: 30m
|
||||
labels:
|
||||
severity: info
|
||||
annotations:
|
||||
summary: "GPU power: {{ $value | printf \"%.0f\" }}W (threshold: 50W)"
|
||||
- alert: HighUtilization
|
||||
expr: nvidia_tesla_t4_DCGM_FI_DEV_GPU_UTIL > 50
|
||||
for: 30m
|
||||
labels:
|
||||
severity: info
|
||||
annotations:
|
||||
summary: "GPU util: {{ $value | printf \"%.0f\" }}% (threshold: 50%)"
|
||||
- alert: HighMemoryUsage
|
||||
expr: nvidia_tesla_t4_DCGM_FI_DEV_FB_USED / 1024 > 12
|
||||
for: 5m
|
||||
labels:
|
||||
severity: info
|
||||
annotations:
|
||||
summary: "VRAM used: {{ $value | printf \"%.1f\" }} GB (threshold: 12 GB)"
|
||||
- name: Power
|
||||
rules:
|
||||
- alert: OnBattery
|
||||
expr: ups_upsSecondsOnBattery > 0
|
||||
for: 30m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "UPS on battery: {{ $value | printf \"%.0f\" }}s"
|
||||
- alert: LowUPSBattery
|
||||
expr: ups_upsEstimatedMinutesRemaining < 25 and on(instance) ups_upsInputVoltage < 150
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "UPS battery low: {{ $value | printf \"%.0f\" }} min remaining (threshold: 25 min)"
|
||||
- alert: PowerOutage
|
||||
expr: ups_upsInputVoltage < 150
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Power outage - input voltage: {{ $value | printf \"%.0f\" }}V (threshold: <150V)"
|
||||
- alert: HighPowerUsage
|
||||
expr: r730_idrac_idrac_power_control_consumed_watts > 200
|
||||
for: 60m
|
||||
labels:
|
||||
severity: info
|
||||
annotations:
|
||||
summary: "Server power: {{ $value | printf \"%.0f\" }}W (threshold: 200W)"
|
||||
- alert: UsingInverterEnergyForTooLong
|
||||
expr: automatic_transfer_switch_power_mode > 0 # 1 = Inverter; 0 = Grid
|
||||
for: 24h
|
||||
labels:
|
||||
severity: info
|
||||
annotations:
|
||||
summary: "On inverter for >24h - check grid switchover"
|
||||
- name: Storage
|
||||
rules:
|
||||
- alert: NodeFilesystemFull
|
||||
expr: (node_filesystem_avail_bytes{fstype!~"tmpfs|fuse.*"} / node_filesystem_size_bytes) * 100 < 10
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Disk {{ $labels.mountpoint }} on {{ $labels.instance }}: {{ $value | printf \"%.1f\" }}% free (threshold: 10%)"
|
||||
- alert: PVFillingUp
|
||||
expr: (kubelet_volume_stats_used_bytes / kubelet_volume_stats_capacity_bytes) * 100 > 85
|
||||
for: 30m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "PV {{ $labels.persistentvolumeclaim }} in {{ $labels.namespace }}: {{ $value | printf \"%.0f\" }}% used (threshold: 85%)"
|
||||
- name: K8s Health
|
||||
rules:
|
||||
- alert: PodCrashLooping
|
||||
expr: increase(kube_pod_container_status_restarts_total[1h]) > 5
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "{{ $labels.namespace }}/{{ $labels.pod }}: {{ $value | printf \"%.0f\" }} restarts in 1h"
|
||||
- alert: ContainerOOMKilled
|
||||
expr: increase(container_oom_events_total{container!=""}[15m]) > 0
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "{{ $labels.namespace }}/{{ $labels.pod }}/{{ $labels.container }}: OOM killed"
|
||||
- alert: NodeNotReady
|
||||
expr: kube_node_status_condition{condition="Ready",status="true"} == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Node {{ $labels.node }} is NotReady"
|
||||
- alert: NodeConditionBad
|
||||
expr: kube_node_status_condition{condition=~"MemoryPressure|DiskPressure|PIDPressure",status="true"} == 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Node {{ $labels.node }}: {{ $labels.condition }}"
|
||||
- alert: JobFailed
|
||||
expr: kube_job_status_failed > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Job {{ $labels.namespace }}/{{ $labels.job_name }}: {{ $value | printf \"%.0f\" }} failure(s)"
|
||||
- name: Infrastructure Health
|
||||
rules:
|
||||
- alert: HomeAssistantDown
|
||||
expr: up{job="haos"} == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Home Assistant down: {{ $labels.instance }}"
|
||||
- alert: CoreDNSErrors
|
||||
expr: rate(coredns_dns_responses_total{rcode="SERVFAIL"}[5m]) > 1
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "CoreDNS SERVFAIL rate: {{ $value | printf \"%.1f\" }}/s (threshold: 1/s)"
|
||||
- alert: ScrapeTargetDown
|
||||
expr: up{job!~"istiod|envoy-stats|openwrt"} == 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Scrape target down: {{ $labels.job }}/{{ $labels.instance }}"
|
||||
- alert: PrometheusStorageFull
|
||||
expr: (prometheus_tsdb_storage_blocks_bytes / (1024*1024*1024)) > 50
|
||||
for: 30m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Prometheus TSDB: {{ $value | printf \"%.0f\" }} GiB (threshold: 50 GiB)"
|
||||
- alert: PrometheusNotificationsFailing
|
||||
expr: rate(prometheus_notifications_errors_total[5m]) > 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Prometheus notification errors: {{ $value | printf \"%.2f\" }}/s"
|
||||
- name: Cluster
|
||||
rules:
|
||||
- alert: NodeDown
|
||||
expr: (up{job="kubernetes-nodes"} or on() vector(0)) == 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Node down: {{ $labels.instance }}"
|
||||
- alert: DockerRegistryDown
|
||||
expr: (registry_process_start_time_seconds or on() vector(0)) == 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Docker registry down for 10m"
|
||||
- alert: RegistryLowCacheHitRate
|
||||
expr: (sum by (job) (rate(registry_registry_storage_cache_total{type="Hit"}[15m]))) / (sum by (job) (rate(registry_registry_storage_cache_total{type="Request"}[15m]))) * 100 < 25
|
||||
for: 12h
|
||||
labels:
|
||||
severity: info
|
||||
annotations:
|
||||
summary: "Registry cache hit rate: {{ $value | printf \"%.0f\" }}% (threshold: 25%)"
|
||||
- alert: NodeHighCPUUsage
|
||||
expr: pve_cpu_usage_ratio * 100 > 30
|
||||
for: 6h
|
||||
labels:
|
||||
severity: info
|
||||
annotations:
|
||||
summary: "CPU usage on {{ $labels.node }}: {{ $value | printf \"%.0f\" }}% (threshold: 30%)"
|
||||
- alert: NodeLowFreeMemory
|
||||
expr: ((1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) or on() vector(1)) * 100 > 95
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Memory usage on {{ $labels.node }}: {{ $value | printf \"%.0f\" }}% (threshold: 95%)"
|
||||
# - name: PodStuckNotReady
|
||||
# rules:
|
||||
# - alert: PodStuckNotReady
|
||||
# expr: kube_pod_status_ready{condition="true"} == 0
|
||||
# for: 5m
|
||||
# labels:
|
||||
# severity: page
|
||||
# annotations:
|
||||
# summary: Pod stuck not ready.
|
||||
- alert: DeploymentReplicasMismatch
|
||||
expr: |
|
||||
(
|
||||
kube_deployment_spec_replicas
|
||||
- on(namespace, deployment) kube_deployment_status_replicas_available
|
||||
) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "{{ $labels.namespace }}/{{ $labels.deployment }}: {{ $value | printf \"%.0f\" }} replica(s) unavailable"
|
||||
- alert: StatefulSetReplicasMismatch
|
||||
expr: |
|
||||
(
|
||||
kube_statefulset_replicas
|
||||
- on(namespace, statefulset) kube_statefulset_status_replicas_ready
|
||||
) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "{{ $labels.namespace }}/{{ $labels.statefulset }}: {{ $value | printf \"%.0f\" }} replica(s) unavailable"
|
||||
- alert: DaemonSetMissingPods
|
||||
expr: |
|
||||
(
|
||||
kube_daemonset_status_desired_number_scheduled
|
||||
- on(namespace, daemonset) kube_daemonset_status_number_ready
|
||||
) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "{{ $labels.namespace }}/{{ $labels.daemonset }}: {{ $value | printf \"%.0f\" }} pod(s) missing"
|
||||
- alert: NoNodeLoadData
|
||||
expr: (node_load1 OR on() vector(0)) == 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: info
|
||||
annotations:
|
||||
summary: "No node load data for 10m - check Prometheus scraping"
|
||||
- name: "Traefik Ingress"
|
||||
rules:
|
||||
- alert: TraefikDown
|
||||
expr: up{job="traefik"} == 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Traefik pod {{ $labels.instance }} is down"
|
||||
- alert: HighServiceErrorRate
|
||||
expr: |
|
||||
(
|
||||
sum(rate(traefik_service_requests_total{code=~"5.."}[5m])) by (service)
|
||||
/ sum(rate(traefik_service_requests_total[5m])) by (service)
|
||||
* 100
|
||||
) > 10
|
||||
and sum(rate(traefik_service_requests_total[5m])) by (service) > 0.1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "5xx rate on {{ $labels.service }}: {{ $value | printf \"%.1f\" }}% (threshold: 10%)"
|
||||
- alert: HighService4xxRate
|
||||
expr: |
|
||||
(
|
||||
sum(rate(traefik_service_requests_total{code=~"4..", service!~".*nextcloud.*|.*grafana.*"}[5m])) by (service)
|
||||
/ sum(rate(traefik_service_requests_total{service!~".*nextcloud.*|.*grafana.*"}[5m])) by (service)
|
||||
* 100
|
||||
) > 30
|
||||
and sum(rate(traefik_service_requests_total{service!~".*nextcloud.*|.*grafana.*"}[5m])) by (service) > 0.1
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "4xx rate on {{ $labels.service }}: {{ $value | printf \"%.1f\" }}% (threshold: 30%)"
|
||||
- alert: HighServiceLatency
|
||||
expr: |
|
||||
histogram_quantile(0.99,
|
||||
sum(rate(traefik_service_request_duration_seconds_bucket[5m])) by (service, le)
|
||||
) > 10
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "p99 latency on {{ $labels.service }}: {{ $value | printf \"%.1f\" }}s (threshold: 10s)"
|
||||
- alert: TLSCertExpiringSoon
|
||||
expr: (traefik_tls_certs_not_after - time()) / 86400 < 7
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "TLS cert {{ $labels.cn }} expires in {{ $value | printf \"%.0f\" }} days"
|
||||
- alert: TraefikHighOpenConnections
|
||||
expr: sum(traefik_service_open_connections) by (service) > 500
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "{{ $labels.service }} has {{ $value | printf \"%.0f\" }} open connections (threshold: 500)"
|
||||
# - alert: OpenWRT High Memory Usage
|
||||
# expr: 100 - ((openwrt_node_memory_MemAvailable_bytes * 100) / openwrt_node_memory_MemTotal_bytes) > 90
|
||||
# for: 10m
|
||||
# labels:
|
||||
# severity: page
|
||||
# annotations:
|
||||
# summary: OpenWRT high memory usage. Can cause services getting stuck.
|
||||
# - alert: Mail server has no replicas available
|
||||
# expr: (kube_deployment_status_replicas_available{namespace="mailserver"} or on() vector(0)) < 1
|
||||
# for: 10m
|
||||
# labels:
|
||||
# severity: page
|
||||
# annotations:
|
||||
# summary: Mail server has no available replicas. This means mail may not be received.
|
||||
# - alert: Hackmd has no replicas available
|
||||
# expr: (kube_deployment_status_replicas_available{namespace="hackmd"} or on() vector(0)) < 1
|
||||
# for: 1m
|
||||
# labels:
|
||||
# severity: page
|
||||
# annotations:
|
||||
# summary: Hackmd has no available replicas.
|
||||
# - alert: Privatebin has no replicas available
|
||||
# expr: (kube_deployment_status_replicas_available{namespace="privatebin"} or on() vector(0)) < 1
|
||||
# for: 10m
|
||||
# labels:
|
||||
# severity: page
|
||||
# annotations:
|
||||
# summary: Privatebin has no available replicas.
|
||||
# - name: London OpenWRT Down
|
||||
# rules:
|
||||
# - alert: OpenWRT client unreachable
|
||||
# expr: (openwrt_node_openwrt_info or on() vector(0)) == 0
|
||||
# for: 10m
|
||||
# labels:
|
||||
# severity: page
|
||||
# annotations:
|
||||
# summary: London OpenWRT router unreachable through VPN
|
||||
# - alert: OpenWRT high system load
|
||||
# expr: openwrt_node_load1 > 0.9
|
||||
# for: 15m
|
||||
# labels:
|
||||
# severity: page
|
||||
# annotations:
|
||||
# summary: High system load on OpenWRT
|
||||
# - alert: Finance app webhook exceptions
|
||||
# expr: changes(webhook_failure_total[5m]) >= 1
|
||||
# for: 1m
|
||||
# labels:
|
||||
# severity: page
|
||||
# annotations:
|
||||
# summary: Finance app webhook exceptions
|
||||
# - alert: Finance app unhandled exceptions
|
||||
# expr: changes(flask_http_request_exceptions_total[5m]) >= 1
|
||||
# for: 1m
|
||||
# labels:
|
||||
# severity: page
|
||||
# annotations:
|
||||
# summary: Finance app unhandled exceptions
|
||||
- alert: New Tailscale client
|
||||
expr: irate(headscale_machine_registrations_total{action="reauth"}[5m]) > 0
|
||||
labels:
|
||||
severity: info
|
||||
annotations:
|
||||
summary: "New Tailscale client registered"
|
||||
|
||||
extraScrapeConfigs: |
|
||||
- job_name: 'proxmox-host'
|
||||
static_configs:
|
||||
- targets:
|
||||
- "192.168.1.127:9100"
|
||||
labels:
|
||||
node: 'pve-node-r730'
|
||||
metrics_path: '/metrics'
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: instance
|
||||
replacement: 'pve-node-r730' # Giving it a friendly name
|
||||
- job_name: 'istiod'
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
namespaces:
|
||||
names:
|
||||
- istio-system
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
|
||||
action: keep
|
||||
regex: istiod;http-monitoring
|
||||
- job_name: 'envoy-stats'
|
||||
metrics_path: /stats/prometheus
|
||||
kubernetes_sd_configs:
|
||||
- role: pod
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_pod_container_port_name]
|
||||
action: keep
|
||||
regex: '.*-envoy-prom'
|
||||
|
||||
- job_name: 'crowdsec'
|
||||
static_configs:
|
||||
- targets:
|
||||
- "crowdsec-service.crowdsec.svc.cluster.local:6060"
|
||||
metrics_path: '/metrics'
|
||||
- job_name: 'snmp-idrac'
|
||||
scrape_interval: 1m
|
||||
scrape_timeout: 45s
|
||||
static_configs:
|
||||
- targets:
|
||||
- "idrac.viktorbarzin.lan.:161"
|
||||
metrics_path: '/snmp'
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: 'snmp-exporter.monitoring.svc.cluster.local:9116'
|
||||
metric_relabel_configs:
|
||||
- source_labels: [ __name__ ]
|
||||
target_label: '__name__'
|
||||
action: replace
|
||||
regex: '(.*)'
|
||||
replacement: 'r730_idrac_$${1}'
|
||||
- job_name: 'redfish-idrac'
|
||||
scrape_interval: 3m
|
||||
scrape_timeout: 45s
|
||||
metrics_path: /metrics
|
||||
static_configs:
|
||||
- targets:
|
||||
- 192.168.1.4
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: idrac-redfish-exporter.monitoring.svc.cluster.local:9090
|
||||
metric_relabel_configs:
|
||||
- source_labels: [ __name__ ]
|
||||
target_label: '__name__'
|
||||
action: replace
|
||||
regex: '(.*)'
|
||||
replacement: 'r730_idrac_$${1}'
|
||||
- job_name: 'openwrt'
|
||||
static_configs:
|
||||
- targets:
|
||||
#- "home.viktorbarzin.lan:9100"
|
||||
#- "10.0.20.100:9100"
|
||||
- "192.168.2.1:9100"
|
||||
metrics_path: '/metrics'
|
||||
#relabel_configs:
|
||||
# - source_labels: [__address__]
|
||||
# target_label: __param_target
|
||||
# - source_labels: [__param_target]
|
||||
# target_label: instance
|
||||
# - target_label: __address__
|
||||
# #replacement: 'home.viktorbarzin.lan:9100'
|
||||
# #replacement: '10.0.20.100:9100'
|
||||
metric_relabel_configs:
|
||||
- source_labels: [ __name__ ]
|
||||
target_label: '__name__'
|
||||
action: replace
|
||||
regex: '(.*)'
|
||||
replacement: 'openwrt_$${1}'
|
||||
- job_name: 'snmp-ups'
|
||||
params:
|
||||
module: [huawei]
|
||||
static_configs:
|
||||
- targets:
|
||||
- "ups.viktorbarzin.lan.:161"
|
||||
metrics_path: '/snmp'
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: 'snmp-exporter.monitoring.svc.cluster.local:9116'
|
||||
metric_relabel_configs:
|
||||
- source_labels: [ __name__ ]
|
||||
target_label: '__name__'
|
||||
action: replace
|
||||
regex: '(.*)'
|
||||
replacement: 'ups_$${1}'
|
||||
- job_name: 'registry'
|
||||
static_configs:
|
||||
- targets:
|
||||
#- "192.168.1.10:5001" # rpi
|
||||
#- "10.0.10.10:5001" # devvm
|
||||
- "10.0.20.10:5001" # registry-vm
|
||||
metrics_path: '/metrics'
|
||||
metric_relabel_configs:
|
||||
- source_labels: [ __name__ ]
|
||||
target_label: '__name__'
|
||||
action: replace
|
||||
regex: '(.*)'
|
||||
replacement: 'registry_$${1}'
|
||||
- job_name: 'automatic-transfer-switch'
|
||||
static_configs:
|
||||
- targets:
|
||||
- "tuya-bridge.tuya-bridge.svc.cluster.local:80"
|
||||
metrics_path: '/metrics/bfe98afa941d5a1e2def8s'
|
||||
params:
|
||||
api-key: ['${tuya_api_key}']
|
||||
metric_relabel_configs:
|
||||
- source_labels: [ __name__ ]
|
||||
target_label: '__name__'
|
||||
action: replace
|
||||
regex: '(.*)'
|
||||
replacement: 'automatic_transfer_switch_$${1}'
|
||||
- job_name: 'fuse-garage'
|
||||
static_configs:
|
||||
- targets:
|
||||
- "tuya-bridge.tuya-bridge.svc.cluster.local:80"
|
||||
metrics_path: '/metrics/bf62301ef04e38d881ugcu'
|
||||
params:
|
||||
api-key: ['${tuya_api_key}']
|
||||
metric_relabel_configs:
|
||||
- source_labels: [ __name__ ]
|
||||
target_label: '__name__'
|
||||
action: replace
|
||||
regex: '(.*)'
|
||||
replacement: 'fuse_garage_$${1}'
|
||||
- job_name: 'fuse-main'
|
||||
static_configs:
|
||||
- targets:
|
||||
- "tuya-bridge.tuya-bridge.svc.cluster.local:80"
|
||||
metrics_path: '/metrics/bf1a684e80ae942e4dji6b'
|
||||
params:
|
||||
api-key: ['${tuya_api_key}']
|
||||
metric_relabel_configs:
|
||||
- source_labels: [ __name__ ]
|
||||
target_label: '__name__'
|
||||
action: replace
|
||||
regex: '(.*)'
|
||||
replacement: 'fuse_main_$${1}'
|
||||
- job_name: 'haos'
|
||||
static_configs:
|
||||
- targets:
|
||||
- "ha-sofia.viktorbarzin.lan.:8123"
|
||||
metrics_path: '/api/prometheus'
|
||||
bearer_token: "${haos_api_token}"
|
||||
- job_name: 'nvidia'
|
||||
static_configs:
|
||||
- targets:
|
||||
- "nvidia-exporter.nvidia.svc.cluster.local"
|
||||
metrics_path: '/metrics'
|
||||
metric_relabel_configs:
|
||||
- source_labels: [ __name__ ]
|
||||
target_label: '__name__'
|
||||
action: replace
|
||||
regex: '(.*)'
|
||||
replacement: 'nvidia_tesla_t4_$${1}'
|
||||
- job_name: 'gpu-pod-memory'
|
||||
static_configs:
|
||||
- targets:
|
||||
- "gpu-pod-exporter.nvidia.svc.cluster.local"
|
||||
metrics_path: '/metrics'
|
||||
- job_name: 'traefik'
|
||||
kubernetes_sd_configs:
|
||||
- role: pod
|
||||
namespaces:
|
||||
names:
|
||||
- traefik
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_pod_container_port_name]
|
||||
action: keep
|
||||
regex: metrics
|
||||
- source_labels: [__meta_kubernetes_pod_name]
|
||||
target_label: instance
|
||||
- job_name: 'realestate-crawler-api'
|
||||
static_configs:
|
||||
- targets:
|
||||
- "realestate-crawler-api.realestate-crawler.svc.cluster.local:80"
|
||||
metrics_path: '/metrics'
|
||||
- job_name: 'realestate-crawler-celery'
|
||||
static_configs:
|
||||
- targets:
|
||||
- "realestate-crawler-celery-metrics.realestate-crawler.svc.cluster.local:9090"
|
||||
metrics_path: '/metrics'
|
||||
|
||||
80939
stacks/platform/modules/monitoring/prometheus_snmp_chart_values.yaml
Normal file
80939
stacks/platform/modules/monitoring/prometheus_snmp_chart_values.yaml
Normal file
File diff suppressed because it is too large
Load diff
106
stacks/platform/modules/monitoring/pve_exporter.tf
Normal file
106
stacks/platform/modules/monitoring/pve_exporter.tf
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
|
||||
resource "kubernetes_secret" "pve_exporter_config" {
|
||||
metadata {
|
||||
name = "pve-exporter-config"
|
||||
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||
}
|
||||
|
||||
data = {
|
||||
"pve.yml" = <<-EOF
|
||||
default:
|
||||
user: "root@pam"
|
||||
password: ${var.pve_password}
|
||||
verify_ssl: false
|
||||
timeout: 30
|
||||
EOF
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_deployment" "pve_exporter" {
|
||||
metadata {
|
||||
name = "proxmox-exporter"
|
||||
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||
labels = {
|
||||
tier = var.tier
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
replicas = 1
|
||||
selector {
|
||||
match_labels = {
|
||||
app = "proxmox-exporter"
|
||||
}
|
||||
}
|
||||
|
||||
template {
|
||||
metadata {
|
||||
labels = {
|
||||
app = "proxmox-exporter"
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
container {
|
||||
name = "proxmox-exporter"
|
||||
image = "prompve/prometheus-pve-exporter:latest"
|
||||
|
||||
port {
|
||||
container_port = 9221
|
||||
}
|
||||
|
||||
# Mount the file into the container
|
||||
volume_mount {
|
||||
name = "config-volume"
|
||||
mount_path = "/etc/prometheus"
|
||||
read_only = true
|
||||
}
|
||||
}
|
||||
|
||||
volume {
|
||||
name = "config-volume"
|
||||
secret {
|
||||
secret_name = kubernetes_secret.pve_exporter_config.metadata[0].name
|
||||
items {
|
||||
key = "pve.yml"
|
||||
path = "pve.yml" # This results in /etc/prometheus/pve.yml
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_service" "proxmox-exporter" {
|
||||
metadata {
|
||||
name = "proxmox-exporter"
|
||||
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||
labels = {
|
||||
"app" = "proxmox-exporter"
|
||||
}
|
||||
annotations = {
|
||||
"prometheus.io/scrape" = "true"
|
||||
"prometheus.io/port" = 9221
|
||||
"prometheus.io/path" = "/pve"
|
||||
"prometheus.io/param_target" = "192.168.1.127"
|
||||
"prometheus.io/param_node" = "1"
|
||||
"prometheus.io/param_cluster" = "1"
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
selector = {
|
||||
"app" = "proxmox-exporter"
|
||||
}
|
||||
port {
|
||||
name = "http"
|
||||
port = 9221
|
||||
target_port = 9221
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# To monitor the pve node, use the node exporter and the playbook in this repo. from the root run:
|
||||
# ansible-playbook -i ./playbooks/inventory.ini ./playbooks/deploy_node_exporter.yaml
|
||||
# This installs the exporter binary
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import signal
|
||||
import sys
|
||||
import time
|
||||
|
||||
import aiohttp
|
||||
|
||||
iDRAC_HOST = 'idrac'
|
||||
iDRAC_USER_ENV_VAR = 'idrac_user'
|
||||
iDRAC_PASSWORD_ENV_VAR = 'idrac_password'
|
||||
SHOULD_RUN = True
|
||||
|
||||
|
||||
def signal_handler(sig, frame):
|
||||
logging.warning(f'signal {sig} received. shutting down gracefully...')
|
||||
global SHOULD_RUN
|
||||
SHOULD_RUN = False
|
||||
time.sleep(60)
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
# define signal handlers
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
|
||||
user = os.environ.get(iDRAC_USER_ENV_VAR)
|
||||
if user is None:
|
||||
logging.critical('missing environment variable for idrac user'
|
||||
f' please set {iDRAC_USER_ENV_VAR}')
|
||||
return
|
||||
|
||||
password = os.environ.get(iDRAC_PASSWORD_ENV_VAR)
|
||||
if password is None:
|
||||
logging.critical('missing environment variable for idrac password'
|
||||
f' please set {iDRAC_PASSWORD_ENV_VAR}')
|
||||
return
|
||||
|
||||
logging.info('service initiated with credentials')
|
||||
return await monitor(user, password)
|
||||
|
||||
|
||||
async def monitor(user: str, password: str) -> None:
|
||||
while SHOULD_RUN:
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# abandoned bc server cannot start itself when it's off :/
|
||||
asyncio.run(main())
|
||||
|
|
@ -0,0 +1,66 @@
|
|||
#!/bin/sh
|
||||
|
||||
tag=server-power-cycle-script
|
||||
logger -t $tag start $(date '+%F-%R')
|
||||
|
||||
if [ -f /tmp/server-power-cycle-lock ]; then
|
||||
logger -t $tag 'Script already running. exiting'
|
||||
exit 0
|
||||
fi
|
||||
touch /tmp/server-power-cycle-lock
|
||||
|
||||
|
||||
if [ -f /root/server-power-cycle/state.off ]; then
|
||||
logger -t $tag 'Server state set to off'
|
||||
while true; do
|
||||
sleep 60 # sleep 1 minute
|
||||
logger -t $tag 'Trying to connect to idrac system...'
|
||||
curl --connect-timeout 5 -s -k -u root:calvin -H"Content-type: application/json" -X GET https://192.168.1.4/redfish/v1/Chassis/System.Embedded.1/Power/PowerSupplies/PSU.Slot.2
|
||||
if [[ $? -eq 0 ]]; then
|
||||
logger -t $tag "Connected to idrac, assuming power is back on"
|
||||
logger -t $tag "Power supply restored, sending power on command"
|
||||
curl -s -k -u root:calvin -X POST -d '{"Action": "Reset", "ResetType": "On"}' -H"Content-type: application/json" https://192.168.1.4/redfish/v1/Systems/System.Embedded.1/Actions/ComputerSystem.Reset
|
||||
rm /root/server-power-cycle/state.off
|
||||
|
||||
logger -t $tag end $(date '+%F-%R')
|
||||
rm /tmp/server-power-cycle-lock
|
||||
exit 0
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
|
||||
voltage=$(curl -s -k -u root:calvin -H"Content-type: application/json" -X GET https://192.168.1.4/redfish/v1/Chassis/System.Embedded.1/Power/PowerSupplies/PSU.Slot.2 |jq .LineInputVoltage)
|
||||
# check input voltage on the pwoer supply connected to the outer system
|
||||
if [[ $voltage -gt 0 ]]; then
|
||||
logger -t $tag "power supply is on. exiting"
|
||||
logger -t $tag end $(date '+%F-%R')
|
||||
rm /tmp/server-power-cycle-lock
|
||||
exit 0
|
||||
fi
|
||||
|
||||
to_wait=30
|
||||
echo "Continuously checking power supply for the next $to_wait minutes"
|
||||
|
||||
for i in $(seq 30); do
|
||||
logger -t $tag "Sleeping a minute..Minute $i"
|
||||
sleep 60
|
||||
|
||||
# check input voltage on the pwoer supply connected to the outer system
|
||||
voltage=$(curl -s -k -u root:calvin -H"Content-type: application/json" -X GET https://192.168.1.4/redfish/v1/Chassis/System.Embedded.1/Power/PowerSupplies/PSU.Slot.2 |jq .LineInputVoltage)
|
||||
if [[ $voltage -gt 0 ]]; then
|
||||
logger -t $tag "power supply is on. exiting"
|
||||
|
||||
logger -t $tag end $(date '+%F-%R')
|
||||
rm /tmp/server-power-cycle-lock
|
||||
exit 0
|
||||
fi
|
||||
|
||||
done
|
||||
|
||||
logger -t $tag "Power supply did not come back, sending graceful shutdown signal"
|
||||
curl -s -k -u root:calvin -X POST -d '{"Action": "Reset", "ResetType": "GracefulShutdown"}' -H"Content-type: application/json" https://192.168.1.4/redfish/v1/Systems/System.Embedded.1/Actions/ComputerSystem.Reset
|
||||
|
||||
touch /root/server-power-cycle/state.off
|
||||
rm /tmp/server-power-cycle-lock
|
||||
logger -t $tag end $(date '+%F-%R')
|
||||
113
stacks/platform/modules/monitoring/snmp_exporter.tf
Normal file
113
stacks/platform/modules/monitoring/snmp_exporter.tf
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
|
||||
/**
|
||||
1. clone snmp exporter
|
||||
2. update generator.yaml to include only interesting modules
|
||||
3. make generate
|
||||
4. cp snmp.yml to whereever is used
|
||||
5. scrape service with curl 'http://snmp-exporter.monitoring.svc.cluster.local:9116/snmp?auth=public_v2&module=huawei&target=192.168.1.5%3A161'
|
||||
|
||||
generate reference - https://github.com/prometheus/snmp_exporter/tree/main/generator
|
||||
https://sbcode.net/prometheus/snmp-generate-huawei/
|
||||
*/
|
||||
resource "kubernetes_config_map" "snmp-exporter-yaml" {
|
||||
metadata {
|
||||
name = "snmp-exporter-yaml"
|
||||
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||
|
||||
annotations = {
|
||||
"reloader.stakater.com/match" = "true"
|
||||
}
|
||||
}
|
||||
data = {
|
||||
"snmp.yml" = file("${path.module}/ups_snmp_values.yaml")
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_deployment" "snmp-exporter" {
|
||||
metadata {
|
||||
name = "snmp-exporter"
|
||||
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||
labels = {
|
||||
app = "snmp-exporter"
|
||||
tier = var.tier
|
||||
}
|
||||
annotations = {
|
||||
"reloader.stakater.com/search" = "true"
|
||||
}
|
||||
}
|
||||
spec {
|
||||
replicas = 1
|
||||
selector {
|
||||
match_labels = {
|
||||
app = "snmp-exporter"
|
||||
}
|
||||
}
|
||||
template {
|
||||
metadata {
|
||||
labels = {
|
||||
app = "snmp-exporter"
|
||||
}
|
||||
}
|
||||
spec {
|
||||
container {
|
||||
image = "prom/snmp-exporter"
|
||||
name = "snmp-exporter"
|
||||
# command = ["/usr/local/bin/redfish_exporter", "--config.file", "/app/config.yml"]
|
||||
port {
|
||||
container_port = 9116
|
||||
}
|
||||
|
||||
volume_mount {
|
||||
name = "config-volume"
|
||||
mount_path = "/etc/snmp_exporter/"
|
||||
}
|
||||
}
|
||||
volume {
|
||||
name = "config-volume"
|
||||
|
||||
config_map {
|
||||
name = "snmp-exporter-yaml"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_service" "snmp-exporter" {
|
||||
metadata {
|
||||
name = "snmp-exporter"
|
||||
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||
labels = {
|
||||
"app" = "snmp-exporter"
|
||||
}
|
||||
# annotations = {
|
||||
# "prometheus.io/scrape" = "true"
|
||||
# "prometheus.io/path" = "/snmp?auth=Public0&target=tcp%3A%2F%2F192.%3A161"
|
||||
# "prometheus.io/port" = "9116"
|
||||
# }
|
||||
}
|
||||
|
||||
spec {
|
||||
selector = {
|
||||
"app" = "snmp-exporter"
|
||||
}
|
||||
port {
|
||||
name = "http"
|
||||
port = "9116"
|
||||
target_port = "9116"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module "snmp-exporter-ingress" {
|
||||
source = "../../../../modules/kubernetes/ingress_factory"
|
||||
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||
name = "snmp-exporter"
|
||||
root_domain = "viktorbarzin.lan"
|
||||
tls_secret_name = var.tls_secret_name
|
||||
allow_local_access_only = true
|
||||
ssl_redirect = false
|
||||
port = 9116
|
||||
}
|
||||
1996
stacks/platform/modules/monitoring/ups_snmp_values.yaml
Executable file
1996
stacks/platform/modules/monitoring/ups_snmp_values.yaml
Executable file
File diff suppressed because it is too large
Load diff
27
stacks/platform/modules/nvidia/Dockerfile
Normal file
27
stacks/platform/modules/nvidia/Dockerfile
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
# GPU container
|
||||
|
||||
FROM ubuntu
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# Install Python and pip
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
python3 \
|
||||
python3-pip \
|
||||
python3-venv
|
||||
|
||||
# Deps
|
||||
RUN apt-get install -y ffmpeg espeak-ng
|
||||
|
||||
# Set a working directory
|
||||
WORKDIR /app
|
||||
|
||||
RUN python3 -m venv audiblez && ./audiblez/bin/pip install audiblez
|
||||
# RUN python3 -m venv audiblez
|
||||
|
||||
CMD ["/usr/bin/sleep", "86400"]
|
||||
# RUN pip install audiblez
|
||||
|
||||
# # Default command
|
||||
# CMD ["/usr/bin/sleep", "86400"]
|
||||
654
stacks/platform/modules/nvidia/main.tf
Normal file
654
stacks/platform/modules/nvidia/main.tf
Normal file
|
|
@ -0,0 +1,654 @@
|
|||
variable "tls_secret_name" {}
|
||||
variable "tier" { type = string }
|
||||
|
||||
module "tls_secret" {
|
||||
source = "../../../../modules/kubernetes/setup_tls_secret"
|
||||
namespace = kubernetes_namespace.nvidia.metadata[0].name
|
||||
tls_secret_name = var.tls_secret_name
|
||||
}
|
||||
|
||||
resource "kubernetes_namespace" "nvidia" {
|
||||
metadata {
|
||||
name = "nvidia"
|
||||
labels = {
|
||||
"istio-injection" : "disabled"
|
||||
tier = var.tier
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Apply GPU taint to ensure only GPU workloads run on GPU node
|
||||
resource "null_resource" "gpu_node_taint" {
|
||||
provisioner "local-exec" {
|
||||
command = "kubectl taint nodes k8s-node1 nvidia.com/gpu=true:NoSchedule --overwrite"
|
||||
}
|
||||
|
||||
# Re-run if namespace changes (proxy for cluster changes)
|
||||
triggers = {
|
||||
namespace = kubernetes_namespace.nvidia.metadata[0].name
|
||||
}
|
||||
}
|
||||
|
||||
# [not needed anymore; part of the chart values] Apply to operator with:
|
||||
# kubectl patch clusterpolicies.nvidia.com/cluster-policy -n gpu-operator --type merge -p '{"spec": {"devicePlugin": {"config": {"name": "time-slicing-config", "default": "any"}}}}'
|
||||
|
||||
resource "kubernetes_config_map" "time_slicing_config" {
|
||||
metadata {
|
||||
name = "time-slicing-config"
|
||||
namespace = kubernetes_namespace.nvidia.metadata[0].name
|
||||
}
|
||||
|
||||
data = {
|
||||
any = <<-EOF
|
||||
flags:
|
||||
migStrategy: none
|
||||
sharing:
|
||||
timeSlicing:
|
||||
renameByDefault: false
|
||||
failRequestsGreaterThanOne: false
|
||||
resources:
|
||||
- name: nvidia.com/gpu
|
||||
replicas: 100
|
||||
EOF
|
||||
}
|
||||
depends_on = [kubernetes_namespace.nvidia]
|
||||
}
|
||||
|
||||
resource "helm_release" "nvidia-gpu-operator" {
|
||||
namespace = kubernetes_namespace.nvidia.metadata[0].name
|
||||
name = "nvidia-gpu-operator"
|
||||
|
||||
repository = "https://helm.ngc.nvidia.com/nvidia"
|
||||
chart = "gpu-operator"
|
||||
atomic = true
|
||||
# version = "0.9.3"
|
||||
timeout = 6000
|
||||
|
||||
values = [templatefile("${path.module}/values.yaml", {})]
|
||||
depends_on = [kubernetes_config_map.time_slicing_config]
|
||||
}
|
||||
|
||||
resource "kubernetes_deployment" "nvidia-exporter" {
|
||||
metadata {
|
||||
name = "nvidia-exporter"
|
||||
namespace = kubernetes_namespace.nvidia.metadata[0].name
|
||||
labels = {
|
||||
app = "nvidia-exporter"
|
||||
tier = var.tier
|
||||
}
|
||||
}
|
||||
spec {
|
||||
replicas = 1
|
||||
selector {
|
||||
match_labels = {
|
||||
app = "nvidia-exporter"
|
||||
}
|
||||
}
|
||||
template {
|
||||
metadata {
|
||||
labels = {
|
||||
app = "nvidia-exporter"
|
||||
}
|
||||
}
|
||||
spec {
|
||||
node_selector = {
|
||||
"gpu" : "true"
|
||||
}
|
||||
toleration {
|
||||
key = "nvidia.com/gpu"
|
||||
operator = "Equal"
|
||||
value = "true"
|
||||
effect = "NoSchedule"
|
||||
}
|
||||
container {
|
||||
image = "nvidia/dcgm-exporter:latest"
|
||||
name = "nvidia-exporter"
|
||||
port {
|
||||
container_port = 9400
|
||||
}
|
||||
security_context {
|
||||
privileged = true
|
||||
capabilities {
|
||||
add = ["SYS_ADMIN"]
|
||||
}
|
||||
}
|
||||
resources {
|
||||
limits = {
|
||||
"nvidia.com/gpu" = "1"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
depends_on = [helm_release.nvidia-gpu-operator]
|
||||
}
|
||||
|
||||
resource "kubernetes_service" "nvidia-exporter" {
|
||||
metadata {
|
||||
name = "nvidia-exporter"
|
||||
namespace = kubernetes_namespace.nvidia.metadata[0].name
|
||||
labels = {
|
||||
"app" = "nvidia-exporter"
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
selector = {
|
||||
app = "nvidia-exporter"
|
||||
}
|
||||
port {
|
||||
name = "http"
|
||||
port = 80
|
||||
target_port = 9400
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
module "ingress" {
|
||||
source = "../../../../modules/kubernetes/ingress_factory"
|
||||
namespace = kubernetes_namespace.nvidia.metadata[0].name
|
||||
name = "nvidia-exporter"
|
||||
root_domain = "viktorbarzin.lan"
|
||||
tls_secret_name = var.tls_secret_name
|
||||
allow_local_access_only = true
|
||||
ssl_redirect = false
|
||||
}
|
||||
|
||||
# resource "kubernetes_ingress_v1" "nvidia-exporter" {
|
||||
# metadata {
|
||||
# name = "nvidia-exporter"
|
||||
# namespace = kubernetes_namespace.nvidia.metadata[0].name
|
||||
# annotations = {
|
||||
# "kubernetes.io/ingress.class" = "nginx"
|
||||
# "nginx.ingress.kubernetes.io/whitelist-source-range" : "192.168.1.0/24, 10.0.0.0/8"
|
||||
# "nginx.ingress.kubernetes.io/ssl-redirect" : "false" # used only in LAN
|
||||
|
||||
# }
|
||||
# }
|
||||
# spec {
|
||||
# tls {
|
||||
# hosts = ["nvidia-exporter.viktorbarzin.lan"]
|
||||
# secret_name = var.tls_secret_name
|
||||
# }
|
||||
# rule {
|
||||
# host = "nvidia-exporter.viktorbarzin.lan"
|
||||
# http {
|
||||
# path {
|
||||
# backend {
|
||||
# service {
|
||||
# name = "nvidia-exporter"
|
||||
# port {
|
||||
# number = 80
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
|
||||
|
||||
# resource "kubernetes_deployment" "gpu-container" {
|
||||
# metadata {
|
||||
# name = "gpu-container"
|
||||
# namespace = kubernetes_namespace.nvidia.metadata[0].name
|
||||
# labels = {
|
||||
# app = "gpu-container"
|
||||
# }
|
||||
# }
|
||||
# spec {
|
||||
# replicas = 1
|
||||
# selector {
|
||||
# match_labels = {
|
||||
# app = "gpu-container"
|
||||
# }
|
||||
# }
|
||||
# template {
|
||||
# metadata {
|
||||
# labels = {
|
||||
# app = "gpu-container"
|
||||
# }
|
||||
# }
|
||||
# spec {
|
||||
# node_selector = {
|
||||
# "gpu" : "true"
|
||||
# }
|
||||
# container {
|
||||
# image = "ubuntu"
|
||||
# name = "gpu-container"
|
||||
# command = ["/usr/bin/sleep", "3600"]
|
||||
# # security_context {
|
||||
# # privileged = true
|
||||
# # capabilities {
|
||||
# # add = ["SYS_ADMIN"]
|
||||
# # }
|
||||
# # }
|
||||
# resources {
|
||||
# limits = {
|
||||
# "nvidia.com/gpu" = "1"
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
# depends_on = [helm_release.nvidia-gpu-operator]
|
||||
# }
|
||||
|
||||
# GPU Pod Memory Exporter - exposes per-pod GPU memory usage as Prometheus metrics
|
||||
resource "kubernetes_config_map" "gpu_pod_exporter_script" {
|
||||
metadata {
|
||||
name = "gpu-pod-exporter-script"
|
||||
namespace = kubernetes_namespace.nvidia.metadata[0].name
|
||||
}
|
||||
|
||||
data = {
|
||||
"exporter.py" = <<-EOF
|
||||
#!/usr/bin/env python3
|
||||
"""GPU Pod Memory Exporter - Collects per-pod GPU memory usage."""
|
||||
|
||||
import subprocess
|
||||
import time
|
||||
import re
|
||||
import os
|
||||
import json
|
||||
import urllib.request
|
||||
import ssl
|
||||
from http.server import HTTPServer, BaseHTTPRequestHandler
|
||||
|
||||
METRICS_PORT = 9401
|
||||
SCRAPE_INTERVAL = 15
|
||||
|
||||
# Kubernetes API configuration
|
||||
K8S_API = "https://kubernetes.default.svc"
|
||||
TOKEN_PATH = "/var/run/secrets/kubernetes.io/serviceaccount/token"
|
||||
CA_PATH = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
|
||||
|
||||
# Cache for container ID to pod info mapping
|
||||
container_cache = {}
|
||||
cache_refresh_time = 0
|
||||
CACHE_TTL = 60 # Refresh cache every 60 seconds
|
||||
|
||||
def get_k8s_token():
|
||||
"""Read Kubernetes service account token."""
|
||||
try:
|
||||
with open(TOKEN_PATH, 'r') as f:
|
||||
return f.read().strip()
|
||||
except:
|
||||
return None
|
||||
|
||||
def refresh_container_cache():
|
||||
"""Refresh the container ID to pod mapping from Kubernetes API."""
|
||||
global container_cache, cache_refresh_time
|
||||
|
||||
token = get_k8s_token()
|
||||
if not token:
|
||||
return
|
||||
|
||||
try:
|
||||
# Create SSL context with K8s CA
|
||||
ctx = ssl.create_default_context()
|
||||
if os.path.exists(CA_PATH):
|
||||
ctx.load_verify_locations(CA_PATH)
|
||||
|
||||
# Get all pods on this node
|
||||
node_name = os.environ.get('NODE_NAME', '')
|
||||
url = f"{K8S_API}/api/v1/pods?fieldSelector=spec.nodeName={node_name}"
|
||||
|
||||
req = urllib.request.Request(url, headers={
|
||||
'Authorization': f'Bearer {token}',
|
||||
'Accept': 'application/json'
|
||||
})
|
||||
|
||||
with urllib.request.urlopen(req, context=ctx, timeout=10) as resp:
|
||||
data = json.loads(resp.read().decode())
|
||||
|
||||
new_cache = {}
|
||||
for pod in data.get('items', []):
|
||||
pod_name = pod['metadata']['name']
|
||||
namespace = pod['metadata']['namespace']
|
||||
|
||||
# Get container statuses
|
||||
for status in pod.get('status', {}).get('containerStatuses', []):
|
||||
container_id = status.get('containerID', '')
|
||||
# Extract the ID part (e.g., "containerd://abc123..." -> "abc123")
|
||||
if '://' in container_id:
|
||||
container_id = container_id.split('://')[-1]
|
||||
if container_id:
|
||||
short_id = container_id[:12]
|
||||
new_cache[short_id] = {
|
||||
'pod': pod_name,
|
||||
'namespace': namespace,
|
||||
'container': status.get('name', 'unknown')
|
||||
}
|
||||
|
||||
container_cache = new_cache
|
||||
cache_refresh_time = time.time()
|
||||
print(f"Refreshed container cache: {len(new_cache)} containers")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error refreshing container cache: {e}")
|
||||
|
||||
def get_pod_info(container_id):
|
||||
"""Look up pod info for a container ID."""
|
||||
global cache_refresh_time
|
||||
|
||||
# Refresh cache if stale
|
||||
if time.time() - cache_refresh_time > CACHE_TTL:
|
||||
refresh_container_cache()
|
||||
|
||||
return container_cache.get(container_id, {
|
||||
'pod': 'unknown',
|
||||
'namespace': 'unknown',
|
||||
'container': 'unknown'
|
||||
})
|
||||
|
||||
def get_gpu_processes():
|
||||
"""Run nvidia-smi to get GPU process info."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["nvidia-smi", "--query-compute-apps=pid,used_memory,process_name", "--format=csv,noheader,nounits"],
|
||||
capture_output=True, text=True, timeout=10
|
||||
)
|
||||
if result.returncode != 0:
|
||||
print(f"nvidia-smi error: {result.stderr}")
|
||||
return []
|
||||
|
||||
processes = []
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
if not line.strip():
|
||||
continue
|
||||
parts = [p.strip() for p in line.split(',')]
|
||||
if len(parts) >= 3:
|
||||
pid, memory_mib, process_name = parts[0], parts[1], parts[2]
|
||||
processes.append({
|
||||
'pid': pid,
|
||||
'memory_bytes': int(memory_mib) * 1024 * 1024,
|
||||
'process_name': process_name
|
||||
})
|
||||
return processes
|
||||
except Exception as e:
|
||||
print(f"Error running nvidia-smi: {e}")
|
||||
return []
|
||||
|
||||
def get_container_id(pid):
|
||||
"""Map PID to container ID via cgroup."""
|
||||
cgroup_path = f"/host_proc/{pid}/cgroup"
|
||||
try:
|
||||
with open(cgroup_path, 'r') as f:
|
||||
for line in f:
|
||||
# Match container ID patterns (docker, containerd, cri-o)
|
||||
match = re.search(r'[:/]([a-f0-9]{64})', line)
|
||||
if match:
|
||||
return match.group(1)[:12]
|
||||
match = re.search(r'cri-containerd-([a-f0-9]{64})', line)
|
||||
if match:
|
||||
return match.group(1)[:12]
|
||||
except (FileNotFoundError, PermissionError):
|
||||
pass
|
||||
return "host"
|
||||
|
||||
# Global metrics storage
|
||||
current_metrics = []
|
||||
|
||||
def collect_metrics():
|
||||
"""Collect GPU memory metrics."""
|
||||
global current_metrics
|
||||
metrics = []
|
||||
processes = get_gpu_processes()
|
||||
|
||||
for proc in processes:
|
||||
container_id = get_container_id(proc['pid'])
|
||||
pod_info = get_pod_info(container_id)
|
||||
metrics.append({
|
||||
'container_id': container_id,
|
||||
'pid': proc['pid'],
|
||||
'process_name': proc['process_name'],
|
||||
'memory_bytes': proc['memory_bytes'],
|
||||
'pod': pod_info['pod'],
|
||||
'namespace': pod_info['namespace'],
|
||||
'container': pod_info['container']
|
||||
})
|
||||
|
||||
current_metrics = metrics
|
||||
|
||||
def format_metrics():
|
||||
"""Format metrics in Prometheus exposition format."""
|
||||
lines = [
|
||||
"# HELP gpu_pod_memory_used_bytes GPU memory used by pod",
|
||||
"# TYPE gpu_pod_memory_used_bytes gauge"
|
||||
]
|
||||
|
||||
for m in current_metrics:
|
||||
labels = ','.join([
|
||||
f'namespace="{m["namespace"]}"',
|
||||
f'pod="{m["pod"]}"',
|
||||
f'container="{m["container"]}"',
|
||||
f'process_name="{m["process_name"]}"',
|
||||
f'pid="{m["pid"]}"'
|
||||
])
|
||||
lines.append(f'gpu_pod_memory_used_bytes{{{labels}}} {m["memory_bytes"]}')
|
||||
|
||||
return '\n'.join(lines) + '\n'
|
||||
|
||||
class MetricsHandler(BaseHTTPRequestHandler):
|
||||
def do_GET(self):
|
||||
if self.path == '/metrics':
|
||||
content = format_metrics()
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'text/plain; charset=utf-8')
|
||||
self.end_headers()
|
||||
self.wfile.write(content.encode())
|
||||
elif self.path == '/health':
|
||||
self.send_response(200)
|
||||
self.end_headers()
|
||||
self.wfile.write(b'ok')
|
||||
else:
|
||||
self.send_response(404)
|
||||
self.end_headers()
|
||||
|
||||
def log_message(self, format, *args):
|
||||
pass # Suppress request logging
|
||||
|
||||
def background_collector():
|
||||
"""Background thread to collect metrics periodically."""
|
||||
import threading
|
||||
def run():
|
||||
while True:
|
||||
collect_metrics()
|
||||
time.sleep(SCRAPE_INTERVAL)
|
||||
thread = threading.Thread(target=run, daemon=True)
|
||||
thread.start()
|
||||
|
||||
if __name__ == '__main__':
|
||||
print(f"Starting GPU Pod Memory Exporter on port {METRICS_PORT}")
|
||||
refresh_container_cache() # Initial cache load
|
||||
collect_metrics() # Initial collection
|
||||
background_collector()
|
||||
|
||||
server = HTTPServer(('', METRICS_PORT), MetricsHandler)
|
||||
server.serve_forever()
|
||||
EOF
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_service_account" "gpu_pod_exporter" {
|
||||
metadata {
|
||||
name = "gpu-pod-exporter"
|
||||
namespace = kubernetes_namespace.nvidia.metadata[0].name
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_cluster_role" "gpu_pod_exporter" {
|
||||
metadata {
|
||||
name = "gpu-pod-exporter"
|
||||
}
|
||||
|
||||
rule {
|
||||
api_groups = [""]
|
||||
resources = ["pods"]
|
||||
verbs = ["list"]
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_cluster_role_binding" "gpu_pod_exporter" {
|
||||
metadata {
|
||||
name = "gpu-pod-exporter"
|
||||
}
|
||||
|
||||
role_ref {
|
||||
api_group = "rbac.authorization.k8s.io"
|
||||
kind = "ClusterRole"
|
||||
name = kubernetes_cluster_role.gpu_pod_exporter.metadata[0].name
|
||||
}
|
||||
|
||||
subject {
|
||||
kind = "ServiceAccount"
|
||||
name = kubernetes_service_account.gpu_pod_exporter.metadata[0].name
|
||||
namespace = kubernetes_namespace.nvidia.metadata[0].name
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_daemonset" "gpu_pod_exporter" {
|
||||
metadata {
|
||||
name = "gpu-pod-exporter"
|
||||
namespace = kubernetes_namespace.nvidia.metadata[0].name
|
||||
labels = {
|
||||
app = "gpu-pod-exporter"
|
||||
tier = var.tier
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
selector {
|
||||
match_labels = {
|
||||
app = "gpu-pod-exporter"
|
||||
}
|
||||
}
|
||||
|
||||
template {
|
||||
metadata {
|
||||
labels = {
|
||||
app = "gpu-pod-exporter"
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
host_pid = true
|
||||
service_account_name = kubernetes_service_account.gpu_pod_exporter.metadata[0].name
|
||||
|
||||
node_selector = {
|
||||
"gpu" : "true"
|
||||
}
|
||||
|
||||
toleration {
|
||||
key = "nvidia.com/gpu"
|
||||
operator = "Equal"
|
||||
value = "true"
|
||||
effect = "NoSchedule"
|
||||
}
|
||||
|
||||
container {
|
||||
name = "exporter"
|
||||
image = "python:3.11-slim"
|
||||
|
||||
command = ["/bin/bash", "-c"]
|
||||
args = [
|
||||
"python3 /scripts/exporter.py"
|
||||
]
|
||||
|
||||
env {
|
||||
name = "NODE_NAME"
|
||||
value_from {
|
||||
field_ref {
|
||||
field_path = "spec.nodeName"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
port {
|
||||
container_port = 9401
|
||||
name = "metrics"
|
||||
}
|
||||
|
||||
volume_mount {
|
||||
name = "scripts"
|
||||
mount_path = "/scripts"
|
||||
read_only = true
|
||||
}
|
||||
|
||||
volume_mount {
|
||||
name = "host-proc"
|
||||
mount_path = "/host_proc"
|
||||
read_only = true
|
||||
}
|
||||
|
||||
resources {
|
||||
requests = {
|
||||
cpu = "50m"
|
||||
memory = "128Mi"
|
||||
}
|
||||
limits = {
|
||||
cpu = "200m"
|
||||
memory = "256Mi"
|
||||
"nvidia.com/gpu" = "1"
|
||||
}
|
||||
}
|
||||
|
||||
liveness_probe {
|
||||
http_get {
|
||||
path = "/health"
|
||||
port = 9401
|
||||
}
|
||||
initial_delay_seconds = 30
|
||||
period_seconds = 30
|
||||
timeout_seconds = 5
|
||||
}
|
||||
}
|
||||
|
||||
volume {
|
||||
name = "scripts"
|
||||
config_map {
|
||||
name = kubernetes_config_map.gpu_pod_exporter_script.metadata[0].name
|
||||
default_mode = "0755"
|
||||
}
|
||||
}
|
||||
|
||||
volume {
|
||||
name = "host-proc"
|
||||
host_path {
|
||||
path = "/proc"
|
||||
type = "Directory"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
depends_on = [helm_release.nvidia-gpu-operator]
|
||||
}
|
||||
|
||||
resource "kubernetes_service" "gpu_pod_exporter" {
|
||||
metadata {
|
||||
name = "gpu-pod-exporter"
|
||||
namespace = kubernetes_namespace.nvidia.metadata[0].name
|
||||
labels = {
|
||||
app = "gpu-pod-exporter"
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
selector = {
|
||||
app = "gpu-pod-exporter"
|
||||
}
|
||||
|
||||
port {
|
||||
name = "metrics"
|
||||
port = 80
|
||||
target_port = 9401
|
||||
}
|
||||
}
|
||||
}
|
||||
27
stacks/platform/modules/nvidia/values.yaml
Normal file
27
stacks/platform/modules/nvidia/values.yaml
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
driver:
|
||||
enabled: true
|
||||
# repository: nvcr.io/nvidia/driver
|
||||
# choose a driver version compatible with your GPU + CUDA 12.x (example)
|
||||
# NVIDIA GPU driver - https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/platform-support.html#known-issue
|
||||
# https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/
|
||||
# 13.x >= 580
|
||||
# 12.x >= 525, <580
|
||||
# 11.x >= 450, <525
|
||||
#
|
||||
# Delete the cluster policy before each change
|
||||
# version: "575.57.08" # CUDA 12.9
|
||||
version: "570.195.03" # CUDA 12.8
|
||||
upgradePolicy:
|
||||
autoUpgrade: false
|
||||
|
||||
devicePlugin:
|
||||
config:
|
||||
name: time-slicing-config
|
||||
|
||||
# Tolerate GPU node taint for all GPU operator components
|
||||
daemonsets:
|
||||
tolerations:
|
||||
- key: "nvidia.com/gpu"
|
||||
operator: "Equal"
|
||||
value: "true"
|
||||
effect: "NoSchedule"
|
||||
55
stacks/platform/modules/rbac/apiserver-oidc.tf
Normal file
55
stacks/platform/modules/rbac/apiserver-oidc.tf
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
# Configure kube-apiserver for OIDC authentication
|
||||
# This SSHs to k8s-master and adds OIDC flags to the static pod manifest.
|
||||
# Kubelet auto-restarts the API server when the manifest changes.
|
||||
|
||||
variable "k8s_master_host" {
|
||||
type = string
|
||||
default = "10.0.20.100"
|
||||
}
|
||||
|
||||
variable "ssh_private_key" {
|
||||
type = string
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
variable "oidc_issuer_url" {
|
||||
type = string
|
||||
default = "https://authentik.viktorbarzin.me/application/o/kubernetes/"
|
||||
}
|
||||
|
||||
variable "oidc_client_id" {
|
||||
type = string
|
||||
default = "kubernetes"
|
||||
}
|
||||
|
||||
resource "null_resource" "apiserver_oidc_config" {
|
||||
connection {
|
||||
type = "ssh"
|
||||
user = "wizard"
|
||||
host = var.k8s_master_host
|
||||
private_key = var.ssh_private_key
|
||||
}
|
||||
|
||||
provisioner "remote-exec" {
|
||||
inline = [
|
||||
# Check if OIDC flags already present
|
||||
"if grep -q 'oidc-issuer-url' /etc/kubernetes/manifests/kube-apiserver.yaml; then echo 'OIDC flags already configured'; exit 0; fi",
|
||||
|
||||
# Backup the manifest
|
||||
"sudo cp /etc/kubernetes/manifests/kube-apiserver.yaml /etc/kubernetes/manifests/kube-apiserver.yaml.bak",
|
||||
|
||||
# Add OIDC flags after the last --tls-private-key-file flag (safe insertion point)
|
||||
"sudo sed -i '/- --tls-private-key-file/a\\ - --oidc-issuer-url=${var.oidc_issuer_url}\\n - --oidc-client-id=${var.oidc_client_id}\\n - --oidc-username-claim=email\\n - --oidc-groups-claim=groups' /etc/kubernetes/manifests/kube-apiserver.yaml",
|
||||
|
||||
# Wait for API server to restart (kubelet watches the manifest)
|
||||
"echo 'Waiting for API server to restart...'",
|
||||
"sleep 30",
|
||||
"sudo kubectl --kubeconfig=/etc/kubernetes/admin.conf get nodes || echo 'API server still restarting, check manually'",
|
||||
]
|
||||
}
|
||||
|
||||
triggers = {
|
||||
oidc_issuer_url = var.oidc_issuer_url
|
||||
oidc_client_id = var.oidc_client_id
|
||||
}
|
||||
}
|
||||
95
stacks/platform/modules/rbac/audit-policy.tf
Normal file
95
stacks/platform/modules/rbac/audit-policy.tf
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
# Deploy audit policy to k8s-master and configure kube-apiserver to use it.
|
||||
# Audit logs are written to /var/log/kubernetes/audit.log on the master node.
|
||||
# Alloy (log collector DaemonSet) will pick them up and ship to Loki.
|
||||
|
||||
resource "null_resource" "audit_policy" {
|
||||
connection {
|
||||
type = "ssh"
|
||||
user = "wizard"
|
||||
host = var.k8s_master_host
|
||||
private_key = var.ssh_private_key
|
||||
}
|
||||
|
||||
# Upload audit policy file
|
||||
provisioner "file" {
|
||||
content = yamlencode({
|
||||
apiVersion = "audit.k8s.io/v1"
|
||||
kind = "Policy"
|
||||
rules = [
|
||||
{
|
||||
# Don't log requests to the API discovery endpoints (very noisy)
|
||||
level = "None"
|
||||
resources = [{
|
||||
group = ""
|
||||
resources = ["endpoints", "services", "services/status"]
|
||||
}]
|
||||
users = ["system:kube-proxy"]
|
||||
},
|
||||
{
|
||||
# Don't log watch requests (very noisy)
|
||||
level = "None"
|
||||
verbs = ["watch"]
|
||||
},
|
||||
{
|
||||
# Don't log health checks
|
||||
level = "None"
|
||||
nonResourceURLs = ["/healthz*", "/readyz*", "/livez*"]
|
||||
},
|
||||
{
|
||||
# Log secret access at Metadata level only (no request/response bodies)
|
||||
level = "Metadata"
|
||||
resources = [{
|
||||
group = ""
|
||||
resources = ["secrets"]
|
||||
}]
|
||||
},
|
||||
{
|
||||
# Log all other mutating requests at RequestResponse level
|
||||
level = "RequestResponse"
|
||||
verbs = ["create", "update", "patch", "delete"]
|
||||
},
|
||||
{
|
||||
# Log read requests at Metadata level
|
||||
level = "Metadata"
|
||||
verbs = ["get", "list"]
|
||||
},
|
||||
]
|
||||
})
|
||||
destination = "/tmp/audit-policy.yaml"
|
||||
}
|
||||
|
||||
provisioner "remote-exec" {
|
||||
inline = [
|
||||
# Move audit policy to proper location
|
||||
"sudo mkdir -p /etc/kubernetes/policies",
|
||||
"sudo mv /tmp/audit-policy.yaml /etc/kubernetes/policies/audit-policy.yaml",
|
||||
"sudo chown root:root /etc/kubernetes/policies/audit-policy.yaml",
|
||||
|
||||
# Create audit log directory
|
||||
"sudo mkdir -p /var/log/kubernetes",
|
||||
|
||||
# Check if audit flags already present
|
||||
"if grep -q 'audit-policy-file' /etc/kubernetes/manifests/kube-apiserver.yaml; then echo 'Audit flags already configured'; exit 0; fi",
|
||||
|
||||
# Add audit flags to kube-apiserver manifest
|
||||
"sudo sed -i '/- --oidc-groups-claim/a\\ - --audit-policy-file=/etc/kubernetes/policies/audit-policy.yaml\\n - --audit-log-path=/var/log/kubernetes/audit.log\\n - --audit-log-maxage=7\\n - --audit-log-maxbackup=3\\n - --audit-log-maxsize=100' /etc/kubernetes/manifests/kube-apiserver.yaml",
|
||||
|
||||
# Add volume mount for audit policy (hostPath)
|
||||
# The kube-apiserver pod needs access to the policy file and log directory
|
||||
"sudo sed -i '/volumes:/a\\ - hostPath:\\n path: /etc/kubernetes/policies\\n type: DirectoryOrCreate\\n name: audit-policy\\n - hostPath:\\n path: /var/log/kubernetes\\n type: DirectoryOrCreate\\n name: audit-log' /etc/kubernetes/manifests/kube-apiserver.yaml",
|
||||
|
||||
"sudo sed -i '/volumeMounts:/a\\ - mountPath: /etc/kubernetes/policies\\n name: audit-policy\\n readOnly: true\\n - mountPath: /var/log/kubernetes\\n name: audit-log' /etc/kubernetes/manifests/kube-apiserver.yaml",
|
||||
|
||||
# Wait for API server to restart
|
||||
"echo 'Waiting for API server to restart with audit logging...'",
|
||||
"sleep 30",
|
||||
"sudo kubectl --kubeconfig=/etc/kubernetes/admin.conf get nodes || echo 'API server still restarting'",
|
||||
]
|
||||
}
|
||||
|
||||
triggers = {
|
||||
policy_version = "v1" # Bump to re-apply
|
||||
}
|
||||
|
||||
depends_on = [null_resource.apiserver_oidc_config]
|
||||
}
|
||||
252
stacks/platform/modules/rbac/main.tf
Normal file
252
stacks/platform/modules/rbac/main.tf
Normal file
|
|
@ -0,0 +1,252 @@
|
|||
variable "tls_secret_name" {}
|
||||
variable "tier" { type = string }
|
||||
|
||||
variable "k8s_users" {
|
||||
type = map(object({
|
||||
role = string # "admin", "power-user", "namespace-owner"
|
||||
email = string # OIDC email claim
|
||||
namespaces = optional(list(string), []) # for namespace-owners
|
||||
quota = optional(object({
|
||||
cpu_requests = optional(string, "2")
|
||||
memory_requests = optional(string, "4Gi")
|
||||
cpu_limits = optional(string, "4")
|
||||
memory_limits = optional(string, "8Gi")
|
||||
pods = optional(string, "20")
|
||||
}), {})
|
||||
}))
|
||||
default = {}
|
||||
}
|
||||
|
||||
# --- Admin role ---
|
||||
# Binds to built-in cluster-admin ClusterRole
|
||||
|
||||
resource "kubernetes_cluster_role_binding" "admin_users" {
|
||||
for_each = { for name, user in var.k8s_users : name => user if user.role == "admin" }
|
||||
|
||||
metadata {
|
||||
name = "oidc-admin-${each.key}"
|
||||
}
|
||||
|
||||
role_ref {
|
||||
api_group = "rbac.authorization.k8s.io"
|
||||
kind = "ClusterRole"
|
||||
name = "cluster-admin"
|
||||
}
|
||||
|
||||
subject {
|
||||
kind = "User"
|
||||
name = each.value.email
|
||||
api_group = "rbac.authorization.k8s.io"
|
||||
}
|
||||
}
|
||||
|
||||
# --- Power-user role ---
|
||||
# Can manage workloads cluster-wide but cannot modify RBAC, nodes, or persistent volumes
|
||||
|
||||
resource "kubernetes_cluster_role" "power_user" {
|
||||
metadata {
|
||||
name = "oidc-power-user"
|
||||
}
|
||||
|
||||
# Core resources
|
||||
rule {
|
||||
api_groups = [""]
|
||||
resources = ["pods", "pods/log", "pods/exec", "services", "endpoints", "configmaps", "secrets", "persistentvolumeclaims", "events", "namespaces"]
|
||||
verbs = ["get", "list", "watch"]
|
||||
}
|
||||
|
||||
rule {
|
||||
api_groups = [""]
|
||||
resources = ["pods", "services", "configmaps", "secrets", "persistentvolumeclaims"]
|
||||
verbs = ["create", "update", "patch", "delete"]
|
||||
}
|
||||
|
||||
# Apps
|
||||
rule {
|
||||
api_groups = ["apps"]
|
||||
resources = ["deployments", "statefulsets", "daemonsets", "replicasets"]
|
||||
verbs = ["get", "list", "watch", "create", "update", "patch", "delete"]
|
||||
}
|
||||
|
||||
# Batch
|
||||
rule {
|
||||
api_groups = ["batch"]
|
||||
resources = ["jobs", "cronjobs"]
|
||||
verbs = ["get", "list", "watch", "create", "update", "patch", "delete"]
|
||||
}
|
||||
|
||||
# Networking
|
||||
rule {
|
||||
api_groups = ["networking.k8s.io"]
|
||||
resources = ["ingresses", "networkpolicies"]
|
||||
verbs = ["get", "list", "watch", "create", "update", "patch", "delete"]
|
||||
}
|
||||
|
||||
# Autoscaling
|
||||
rule {
|
||||
api_groups = ["autoscaling"]
|
||||
resources = ["horizontalpodautoscalers"]
|
||||
verbs = ["get", "list", "watch", "create", "update", "patch", "delete"]
|
||||
}
|
||||
|
||||
# Read-only on cluster-level resources
|
||||
rule {
|
||||
api_groups = [""]
|
||||
resources = ["nodes"]
|
||||
verbs = ["get", "list", "watch"]
|
||||
}
|
||||
|
||||
rule {
|
||||
api_groups = ["storage.k8s.io"]
|
||||
resources = ["storageclasses"]
|
||||
verbs = ["get", "list", "watch"]
|
||||
}
|
||||
|
||||
rule {
|
||||
api_groups = ["rbac.authorization.k8s.io"]
|
||||
resources = ["clusterroles", "clusterrolebindings", "roles", "rolebindings"]
|
||||
verbs = ["get", "list", "watch"]
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_cluster_role_binding" "power_users" {
|
||||
for_each = { for name, user in var.k8s_users : name => user if user.role == "power-user" }
|
||||
|
||||
metadata {
|
||||
name = "oidc-power-user-${each.key}"
|
||||
}
|
||||
|
||||
role_ref {
|
||||
api_group = "rbac.authorization.k8s.io"
|
||||
kind = "ClusterRole"
|
||||
name = kubernetes_cluster_role.power_user.metadata[0].name
|
||||
}
|
||||
|
||||
subject {
|
||||
kind = "User"
|
||||
name = each.value.email
|
||||
api_group = "rbac.authorization.k8s.io"
|
||||
}
|
||||
}
|
||||
|
||||
# --- Namespace-owner role ---
|
||||
# Full admin within assigned namespaces + read-only cluster-wide
|
||||
|
||||
locals {
|
||||
# Flatten user->namespace pairs for iteration
|
||||
namespace_owner_pairs = flatten([
|
||||
for name, user in var.k8s_users : [
|
||||
for ns in user.namespaces : {
|
||||
user_key = name
|
||||
namespace = ns
|
||||
email = user.email
|
||||
quota = user.quota
|
||||
}
|
||||
] if user.role == "namespace-owner"
|
||||
])
|
||||
}
|
||||
|
||||
resource "kubernetes_role_binding" "namespace_owner" {
|
||||
for_each = { for pair in local.namespace_owner_pairs : "${pair.user_key}-${pair.namespace}" => pair }
|
||||
|
||||
metadata {
|
||||
name = "namespace-owner-${each.value.user_key}"
|
||||
namespace = each.value.namespace
|
||||
}
|
||||
|
||||
role_ref {
|
||||
api_group = "rbac.authorization.k8s.io"
|
||||
kind = "ClusterRole"
|
||||
name = "admin" # Built-in ClusterRole with full namespace access
|
||||
}
|
||||
|
||||
subject {
|
||||
kind = "User"
|
||||
name = each.value.email
|
||||
api_group = "rbac.authorization.k8s.io"
|
||||
}
|
||||
}
|
||||
|
||||
# Read-only cluster-wide access for namespace owners
|
||||
resource "kubernetes_cluster_role" "namespace_owner_readonly" {
|
||||
metadata {
|
||||
name = "oidc-namespace-owner-readonly"
|
||||
}
|
||||
|
||||
rule {
|
||||
api_groups = [""]
|
||||
resources = ["namespaces", "nodes"]
|
||||
verbs = ["get", "list", "watch"]
|
||||
}
|
||||
|
||||
rule {
|
||||
api_groups = [""]
|
||||
resources = ["pods", "services", "configmaps", "events"]
|
||||
verbs = ["get", "list", "watch"]
|
||||
}
|
||||
|
||||
rule {
|
||||
api_groups = ["apps"]
|
||||
resources = ["deployments", "statefulsets", "daemonsets"]
|
||||
verbs = ["get", "list", "watch"]
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_cluster_role_binding" "namespace_owner_readonly" {
|
||||
for_each = { for name, user in var.k8s_users : name => user if user.role == "namespace-owner" }
|
||||
|
||||
metadata {
|
||||
name = "oidc-ns-owner-readonly-${each.key}"
|
||||
}
|
||||
|
||||
role_ref {
|
||||
api_group = "rbac.authorization.k8s.io"
|
||||
kind = "ClusterRole"
|
||||
name = kubernetes_cluster_role.namespace_owner_readonly.metadata[0].name
|
||||
}
|
||||
|
||||
subject {
|
||||
kind = "User"
|
||||
name = each.value.email
|
||||
api_group = "rbac.authorization.k8s.io"
|
||||
}
|
||||
}
|
||||
|
||||
# Resource quotas per user namespace
|
||||
resource "kubernetes_resource_quota" "user_namespace_quota" {
|
||||
for_each = { for pair in local.namespace_owner_pairs : "${pair.user_key}-${pair.namespace}" => pair }
|
||||
|
||||
metadata {
|
||||
name = "user-quota"
|
||||
namespace = each.value.namespace
|
||||
}
|
||||
|
||||
spec {
|
||||
hard = {
|
||||
"requests.cpu" = each.value.quota.cpu_requests
|
||||
"requests.memory" = each.value.quota.memory_requests
|
||||
"limits.cpu" = each.value.quota.cpu_limits
|
||||
"limits.memory" = each.value.quota.memory_limits
|
||||
"pods" = each.value.quota.pods
|
||||
}
|
||||
}
|
||||
|
||||
depends_on = [kubernetes_role_binding.namespace_owner]
|
||||
}
|
||||
|
||||
# ConfigMap with user-role mapping for the self-service portal
|
||||
resource "kubernetes_config_map" "user_roles" {
|
||||
metadata {
|
||||
name = "k8s-user-roles"
|
||||
namespace = "k8s-portal"
|
||||
}
|
||||
|
||||
data = {
|
||||
"users.json" = jsonencode({
|
||||
for name, user in var.k8s_users : user.email => {
|
||||
role = user.role
|
||||
namespaces = user.namespaces
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
104
stacks/platform/modules/redis/main.tf
Normal file
104
stacks/platform/modules/redis/main.tf
Normal file
|
|
@ -0,0 +1,104 @@
|
|||
variable "tls_secret_name" {}
|
||||
variable "tier" { type = string }
|
||||
|
||||
resource "kubernetes_namespace" "redis" {
|
||||
metadata {
|
||||
name = "redis"
|
||||
labels = {
|
||||
tier = var.tier
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module "tls_secret" {
|
||||
source = "../../../../modules/kubernetes/setup_tls_secret"
|
||||
namespace = kubernetes_namespace.redis.metadata[0].name
|
||||
tls_secret_name = var.tls_secret_name
|
||||
}
|
||||
|
||||
resource "kubernetes_deployment" "redis" {
|
||||
metadata {
|
||||
name = "redis"
|
||||
namespace = kubernetes_namespace.redis.metadata[0].name
|
||||
labels = {
|
||||
app = "redis"
|
||||
tier = var.tier
|
||||
}
|
||||
annotations = {
|
||||
"reloader.stakater.com/search" = "true"
|
||||
}
|
||||
}
|
||||
spec {
|
||||
replicas = 1
|
||||
strategy {
|
||||
type = "Recreate"
|
||||
}
|
||||
selector {
|
||||
match_labels = {
|
||||
app = "redis"
|
||||
}
|
||||
}
|
||||
template {
|
||||
metadata {
|
||||
labels = {
|
||||
app = "redis"
|
||||
}
|
||||
}
|
||||
spec {
|
||||
container {
|
||||
image = "redis/redis-stack:latest"
|
||||
name = "redis"
|
||||
|
||||
port {
|
||||
container_port = 6379
|
||||
}
|
||||
port {
|
||||
container_port = 8001
|
||||
}
|
||||
volume_mount {
|
||||
name = "data"
|
||||
mount_path = "/data"
|
||||
}
|
||||
}
|
||||
volume {
|
||||
name = "data"
|
||||
nfs {
|
||||
path = "/mnt/main/redis"
|
||||
server = "10.0.10.15"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
resource "kubernetes_service" "redis" {
|
||||
metadata {
|
||||
name = "redis"
|
||||
namespace = kubernetes_namespace.redis.metadata[0].name
|
||||
labels = {
|
||||
app = "redis"
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
selector = {
|
||||
app = "redis"
|
||||
}
|
||||
port {
|
||||
name = "redis"
|
||||
port = 6379
|
||||
}
|
||||
port {
|
||||
name = "http"
|
||||
port = 8001
|
||||
}
|
||||
}
|
||||
}
|
||||
module "ingress" {
|
||||
source = "../../../../modules/kubernetes/ingress_factory"
|
||||
namespace = kubernetes_namespace.redis.metadata[0].name
|
||||
name = "redis"
|
||||
tls_secret_name = var.tls_secret_name
|
||||
protected = true
|
||||
port = 8001
|
||||
}
|
||||
163
stacks/platform/modules/reverse_proxy/factory/main.tf
Normal file
163
stacks/platform/modules/reverse_proxy/factory/main.tf
Normal file
|
|
@ -0,0 +1,163 @@
|
|||
variable "name" {}
|
||||
variable "namespace" {
|
||||
default = "reverse-proxy"
|
||||
}
|
||||
variable "external_name" {}
|
||||
variable "port" {
|
||||
default = "80"
|
||||
}
|
||||
variable "tls_secret_name" {}
|
||||
variable "backend_protocol" {
|
||||
default = "HTTP"
|
||||
}
|
||||
variable "protected" {
|
||||
type = bool
|
||||
default = true
|
||||
}
|
||||
variable "ingress_path" {
|
||||
type = list(string)
|
||||
default = ["/"]
|
||||
}
|
||||
variable "max_body_size" {
|
||||
type = string
|
||||
default = "50m"
|
||||
}
|
||||
variable "extra_annotations" {
|
||||
default = {}
|
||||
}
|
||||
variable "rybbit_site_id" {
|
||||
default = null
|
||||
type = string
|
||||
}
|
||||
variable "custom_content_security_policy" {
|
||||
default = null
|
||||
type = string
|
||||
}
|
||||
variable "strip_auth_headers" {
|
||||
type = bool
|
||||
default = false
|
||||
}
|
||||
variable "extra_middlewares" {
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
|
||||
resource "kubernetes_service" "proxied-service" {
|
||||
metadata {
|
||||
name = var.name
|
||||
namespace = var.namespace
|
||||
labels = {
|
||||
"app" = var.name
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
type = "ExternalName"
|
||||
external_name = var.external_name
|
||||
|
||||
port {
|
||||
name = var.backend_protocol == "HTTPS" ? "https-${var.name}" : "${var.name}-web"
|
||||
port = var.port
|
||||
protocol = "TCP"
|
||||
target_port = var.port
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_ingress_v1" "proxied-ingress" {
|
||||
metadata {
|
||||
name = var.name
|
||||
namespace = var.namespace
|
||||
annotations = merge({
|
||||
"traefik.ingress.kubernetes.io/router.middlewares" = join(",", compact(concat([
|
||||
"traefik-rate-limit@kubernetescrd",
|
||||
var.custom_content_security_policy == null ? "traefik-csp-headers@kubernetescrd" : null,
|
||||
"traefik-crowdsec@kubernetescrd",
|
||||
var.protected ? "traefik-authentik-forward-auth@kubernetescrd" : null,
|
||||
var.strip_auth_headers ? "traefik-strip-auth-headers@kubernetescrd" : null,
|
||||
var.rybbit_site_id != null ? "traefik-strip-accept-encoding@kubernetescrd" : null,
|
||||
var.rybbit_site_id != null ? "${var.namespace}-rybbit-analytics-${var.name}@kubernetescrd" : null,
|
||||
var.custom_content_security_policy != null ? "${var.namespace}-custom-csp-${var.name}@kubernetescrd" : null,
|
||||
], var.extra_middlewares)))
|
||||
"traefik.ingress.kubernetes.io/router.entrypoints" = "websecure"
|
||||
"traefik.ingress.kubernetes.io/service.serversscheme" = var.backend_protocol == "HTTPS" ? "https" : null
|
||||
"traefik.ingress.kubernetes.io/service.serverstransport" = var.backend_protocol == "HTTPS" ? "traefik-insecure-skip-verify@kubernetescrd" : null
|
||||
}, var.extra_annotations)
|
||||
}
|
||||
|
||||
spec {
|
||||
ingress_class_name = "traefik"
|
||||
tls {
|
||||
hosts = ["${var.name}.viktorbarzin.me"]
|
||||
secret_name = var.tls_secret_name
|
||||
}
|
||||
rule {
|
||||
host = "${var.name}.viktorbarzin.me"
|
||||
http {
|
||||
dynamic "path" {
|
||||
for_each = var.ingress_path
|
||||
|
||||
content {
|
||||
path = path.value
|
||||
backend {
|
||||
service {
|
||||
|
||||
name = var.name
|
||||
port {
|
||||
number = var.port
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Rybbit analytics middleware (rewrite-body plugin with content-type filtering) - created per service when rybbit_site_id is set
|
||||
resource "kubernetes_manifest" "rybbit_analytics" {
|
||||
count = var.rybbit_site_id != null ? 1 : 0
|
||||
|
||||
manifest = {
|
||||
apiVersion = "traefik.io/v1alpha1"
|
||||
kind = "Middleware"
|
||||
metadata = {
|
||||
name = "rybbit-analytics-${var.name}"
|
||||
namespace = var.namespace
|
||||
}
|
||||
spec = {
|
||||
plugin = {
|
||||
rewrite-body = {
|
||||
rewrites = [{
|
||||
regex = "</head>"
|
||||
replacement = "<script src=\"https://rybbit.viktorbarzin.me/api/script.js\" data-site-id=\"${var.rybbit_site_id}\" defer></script></head>"
|
||||
}]
|
||||
monitoring = {
|
||||
types = ["text/html"]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Custom CSP headers middleware - created per service when custom_content_security_policy is set
|
||||
resource "kubernetes_manifest" "custom_csp" {
|
||||
count = var.custom_content_security_policy != null ? 1 : 0
|
||||
|
||||
manifest = {
|
||||
apiVersion = "traefik.io/v1alpha1"
|
||||
kind = "Middleware"
|
||||
metadata = {
|
||||
name = "custom-csp-${var.name}"
|
||||
namespace = var.namespace
|
||||
}
|
||||
spec = {
|
||||
headers = {
|
||||
contentSecurityPolicy = var.custom_content_security_policy
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
295
stacks/platform/modules/reverse_proxy/main.tf
Normal file
295
stacks/platform/modules/reverse_proxy/main.tf
Normal file
|
|
@ -0,0 +1,295 @@
|
|||
# Reverse proxy for things in my infra that are
|
||||
# outside of K8S but would be nice to use the Nginx-ingress
|
||||
|
||||
variable "tls_secret_name" {}
|
||||
variable "truenas_homepage_token" {}
|
||||
variable "pfsense_homepage_token" {}
|
||||
|
||||
resource "kubernetes_namespace" "reverse-proxy" {
|
||||
metadata {
|
||||
name = "reverse-proxy"
|
||||
}
|
||||
}
|
||||
|
||||
module "tls_secret" {
|
||||
source = "../../../../modules/kubernetes/setup_tls_secret"
|
||||
namespace = "reverse-proxy"
|
||||
tls_secret_name = var.tls_secret_name
|
||||
depends_on = [kubernetes_namespace.reverse-proxy]
|
||||
}
|
||||
|
||||
# https://pfsense.viktorbarzin.me/
|
||||
module "pfsense" {
|
||||
source = "./factory"
|
||||
name = "pfsense"
|
||||
external_name = "pfsense.viktorbarzin.lan"
|
||||
tls_secret_name = var.tls_secret_name
|
||||
port = 443
|
||||
backend_protocol = "HTTPS"
|
||||
|
||||
extra_annotations = {
|
||||
"gethomepage.dev/enabled" : "true"
|
||||
"gethomepage.dev/description" : "Cluster Firewall"
|
||||
# gethomepage.dev/group: Media
|
||||
"gethomepage.dev/icon" : "pfsense.png"
|
||||
"gethomepage.dev/name" : "pFsense"
|
||||
"gethomepage.dev/widget.type" : "pfsense"
|
||||
"gethomepage.dev/widget.version" : "2"
|
||||
"gethomepage.dev/widget.url" : "https://10.0.20.1"
|
||||
# "gethomepage.dev/widget.token" = var.homepage_token
|
||||
"gethomepage.dev/widget.username" : "admin"
|
||||
"gethomepage.dev/widget.password" : var.pfsense_homepage_token
|
||||
"gethomepage.dev/widget.fields" = "[\"load\", \"memory\", \"wanStatus\", \"disk\"]"
|
||||
"gethomepage.dev/widget.wan" = "vmx0"
|
||||
# "gethomepage.dev/pod-selector" : ""
|
||||
}
|
||||
depends_on = [kubernetes_namespace.reverse-proxy]
|
||||
rybbit_site_id = "b029580e5a7c"
|
||||
}
|
||||
|
||||
# https://nas.viktorbarzin.me/
|
||||
module "nas" {
|
||||
source = "./factory"
|
||||
name = "nas"
|
||||
external_name = "nas.viktorbarzin.lan"
|
||||
port = 5001
|
||||
tls_secret_name = var.tls_secret_name
|
||||
backend_protocol = "HTTPS"
|
||||
max_body_size = "0m"
|
||||
depends_on = [kubernetes_namespace.reverse-proxy]
|
||||
rybbit_site_id = "1e11f8449f7d"
|
||||
}
|
||||
|
||||
# https://files.viktorbarzin.me/
|
||||
module "nas-files" {
|
||||
source = "./factory"
|
||||
name = "files"
|
||||
external_name = "nas.viktorbarzin.lan"
|
||||
port = 5001
|
||||
tls_secret_name = var.tls_secret_name
|
||||
backend_protocol = "HTTPS"
|
||||
protected = false # allow anyone to download files
|
||||
ingress_path = ["/sharing", "/scripts", "/webman", "/wfmlogindialog.js", "/fsdownload"]
|
||||
max_body_size = "0m"
|
||||
depends_on = [kubernetes_namespace.reverse-proxy]
|
||||
}
|
||||
|
||||
# https://idrac.viktorbarzin.me/
|
||||
module "idrac" {
|
||||
source = "./factory"
|
||||
name = "idrac"
|
||||
external_name = "idrac.viktorbarzin.lan"
|
||||
port = 443
|
||||
tls_secret_name = var.tls_secret_name
|
||||
backend_protocol = "HTTPS"
|
||||
strip_auth_headers = true
|
||||
extra_annotations = {}
|
||||
depends_on = [kubernetes_namespace.reverse-proxy]
|
||||
}
|
||||
|
||||
# Can either listen on https or http; can't do both :/
|
||||
# TODO: Not working yet
|
||||
module "tp-link-gateway" {
|
||||
source = "./factory"
|
||||
name = "gw"
|
||||
external_name = "gw.viktorbarzin.lan"
|
||||
port = 443
|
||||
tls_secret_name = var.tls_secret_name
|
||||
backend_protocol = "HTTPS"
|
||||
depends_on = [kubernetes_namespace.reverse-proxy]
|
||||
protected = true
|
||||
strip_auth_headers = true
|
||||
extra_annotations = {}
|
||||
}
|
||||
|
||||
# https://truenas.viktorbarzin.me/
|
||||
module "truenas" {
|
||||
source = "./factory"
|
||||
name = "truenas"
|
||||
external_name = "truenas.viktorbarzin.lan"
|
||||
port = 80
|
||||
tls_secret_name = var.tls_secret_name
|
||||
max_body_size = "0m"
|
||||
|
||||
extra_annotations = {
|
||||
"gethomepage.dev/enabled" : "true"
|
||||
"gethomepage.dev/description" : "TrueNAS"
|
||||
# gethomepage.dev/group: Media
|
||||
"gethomepage.dev/icon" : "truenas.png"
|
||||
"gethomepage.dev/name" : "TrueNAS"
|
||||
"gethomepage.dev/widget.type" : "truenas"
|
||||
"gethomepage.dev/widget.url" : "https://truenas.viktorbarzin.lan"
|
||||
"gethomepage.dev/widget.key" : var.truenas_homepage_token
|
||||
# "gethomepage.dev/widget.enablePools" : "true"
|
||||
# "gethomepage.dev/pod-selector" : ""
|
||||
}
|
||||
depends_on = [kubernetes_namespace.reverse-proxy]
|
||||
rybbit_site_id = "b66fbd3cb58a"
|
||||
}
|
||||
|
||||
# https://r730.viktorbarzin.me/
|
||||
module "r730" {
|
||||
source = "./factory"
|
||||
name = "r730"
|
||||
external_name = "r730.viktorbarzin.lan"
|
||||
port = 443
|
||||
tls_secret_name = var.tls_secret_name
|
||||
backend_protocol = "HTTPS"
|
||||
depends_on = [kubernetes_namespace.reverse-proxy]
|
||||
}
|
||||
|
||||
# https://proxmox.viktorbarzin.me/
|
||||
module "proxmox" {
|
||||
source = "./factory"
|
||||
name = "proxmox"
|
||||
external_name = "proxmox.viktorbarzin.lan"
|
||||
port = 8006
|
||||
tls_secret_name = var.tls_secret_name
|
||||
backend_protocol = "HTTPS"
|
||||
max_body_size = "0" # unlimited
|
||||
depends_on = [kubernetes_namespace.reverse-proxy]
|
||||
rybbit_site_id = "190a7ad3e1c7"
|
||||
}
|
||||
|
||||
# https://registry.viktorbarzin.me/
|
||||
module "docker-registry-ui" {
|
||||
source = "./factory"
|
||||
name = "registry"
|
||||
external_name = "docker-registry.viktorbarzin.lan"
|
||||
port = 8080
|
||||
tls_secret_name = var.tls_secret_name
|
||||
depends_on = [kubernetes_namespace.reverse-proxy]
|
||||
extra_annotations = {
|
||||
# Override middleware chain to remove rate-limit; the UI fires many API calls to list repos/tags
|
||||
"traefik.ingress.kubernetes.io/router.middlewares" = "traefik-csp-headers@kubernetescrd,traefik-crowdsec@kubernetescrd,traefik-authentik-forward-auth@kubernetescrd"
|
||||
}
|
||||
}
|
||||
|
||||
# https://valchedrym.viktorbarzin.me/
|
||||
module "valchedrym" {
|
||||
source = "./factory"
|
||||
name = "valchedrym"
|
||||
external_name = "valchedrym.viktorbarzin.lan"
|
||||
tls_secret_name = var.tls_secret_name
|
||||
port = 80
|
||||
backend_protocol = "HTTP"
|
||||
depends_on = [kubernetes_namespace.reverse-proxy]
|
||||
}
|
||||
|
||||
# https://ip150.viktorbarzin.me/
|
||||
# Server has funky behaviour based on headers; works on some browrsers not others...
|
||||
# module "valchedrym-ip150" {
|
||||
# source = "./factory"
|
||||
# name = "ip150"
|
||||
# # external_name = "valchedrym.ddns.net"
|
||||
# external_name = "192.168.0.10"
|
||||
# port = 80
|
||||
# backend_protocol = "HTTP"
|
||||
# use_proxy_protocol = false
|
||||
# tls_secret_name = var.tls_secret_name
|
||||
# protected = false
|
||||
# depends_on = [kubernetes_namespace.reverse-proxy]
|
||||
# }
|
||||
|
||||
# https://mladost3.viktorbarzin.me/
|
||||
module "mladost3" {
|
||||
source = "./factory"
|
||||
name = "mladost3"
|
||||
external_name = "mladost3.ddns.net"
|
||||
port = 8080
|
||||
tls_secret_name = var.tls_secret_name
|
||||
depends_on = [kubernetes_namespace.reverse-proxy]
|
||||
}
|
||||
|
||||
# # https://server-switch.viktorbarzin.me/
|
||||
# module "server-switch" {
|
||||
# source = "./factory"
|
||||
# name = "server-switch"
|
||||
# external_name = "server-switch.viktorbarzin.lan"
|
||||
# port = 80
|
||||
# tls_secret_name = var.tls_secret_name
|
||||
# depends_on = [kubernetes_namespace.reverse-proxy]
|
||||
# }
|
||||
|
||||
# https://ha-sofia.viktorbarzin.me/
|
||||
module "ha-sofia" {
|
||||
source = "./factory"
|
||||
name = "ha-sofia"
|
||||
external_name = "ha-sofia.viktorbarzin.lan"
|
||||
port = 8123
|
||||
tls_secret_name = var.tls_secret_name
|
||||
depends_on = [kubernetes_namespace.reverse-proxy]
|
||||
protected = false
|
||||
rybbit_site_id = "590fc392690a"
|
||||
}
|
||||
|
||||
# https://ha-london.viktorbarzin.me/
|
||||
module "ha-london" {
|
||||
source = "./factory"
|
||||
name = "ha-london"
|
||||
external_name = "ha-london.viktorbarzin.lan"
|
||||
port = 8123
|
||||
tls_secret_name = var.tls_secret_name
|
||||
depends_on = [kubernetes_namespace.reverse-proxy]
|
||||
protected = false
|
||||
}
|
||||
|
||||
# https://london.viktorbarzin.me/
|
||||
module "london" {
|
||||
source = "./factory"
|
||||
name = "london"
|
||||
external_name = "openwrt-london.viktorbarzin.lan"
|
||||
port = 443
|
||||
tls_secret_name = var.tls_secret_name
|
||||
backend_protocol = "HTTPS"
|
||||
protected = true
|
||||
depends_on = [kubernetes_namespace.reverse-proxy]
|
||||
extra_annotations = {
|
||||
"gethomepage.dev/enabled" : "false"
|
||||
"gethomepage.dev/description" : "OpenWRT London"
|
||||
# gethomepage.dev/group: Media
|
||||
"gethomepage.dev/icon" : "openwrt.png"
|
||||
"gethomepage.dev/name" : "OpenWRT London"
|
||||
"gethomepage.dev/widget.type" : "openwrt"
|
||||
"gethomepage.dev/widget.url" : "https://100.64.0.14"
|
||||
# "gethomepage.dev/widget.token" = var.homepage_token
|
||||
"gethomepage.dev/widget.username" : "homepage"
|
||||
"gethomepage.dev/widget.password" : "" # add later as Flint2's openwrt is a little odd
|
||||
"gethomepage.dev/pod-selector" : ""
|
||||
}
|
||||
}
|
||||
module "pi-lights" {
|
||||
source = "./factory"
|
||||
name = "pi"
|
||||
external_name = "ha-london.viktorbarzin.lan"
|
||||
port = 5000
|
||||
tls_secret_name = var.tls_secret_name
|
||||
protected = true
|
||||
depends_on = [kubernetes_namespace.reverse-proxy]
|
||||
}
|
||||
|
||||
# module "ups" { # .NET app doesn't work well behind host
|
||||
# source = "./factory"
|
||||
# name = "ups"
|
||||
# external_name = "ups.viktorbarzin.lan"
|
||||
# backend_protocol = "HTTPS"
|
||||
# port = 443
|
||||
# tls_secret_name = var.tls_secret_name
|
||||
# # protected = true
|
||||
# protected = false
|
||||
# depends_on = [kubernetes_namespace.reverse-proxy]
|
||||
# extra_annotations = {
|
||||
# "nginx.ingress.kubernetes.io/upstream-vhost" : "",
|
||||
# # "nginx.ingress.kubernetes.io/proxy-set-header" : "Host: <>",
|
||||
# }
|
||||
# }
|
||||
|
||||
module "mbp14" {
|
||||
source = "./factory"
|
||||
name = "mbp14"
|
||||
external_name = "mbp14.viktorbarzin.lan"
|
||||
port = 4020
|
||||
tls_secret_name = var.tls_secret_name
|
||||
protected = true
|
||||
depends_on = [kubernetes_namespace.reverse-proxy]
|
||||
}
|
||||
308
stacks/platform/modules/technitium/main.tf
Normal file
308
stacks/platform/modules/technitium/main.tf
Normal file
|
|
@ -0,0 +1,308 @@
|
|||
variable "tls_secret_name" {}
|
||||
variable "tier" { type = string }
|
||||
variable "homepage_token" {}
|
||||
variable "technitium_db_password" {}
|
||||
|
||||
resource "kubernetes_namespace" "technitium" {
|
||||
metadata {
|
||||
name = "technitium"
|
||||
labels = {
|
||||
tier = var.tier
|
||||
}
|
||||
# stale cache error when trying to resolve
|
||||
# labels = {
|
||||
# "istio-injection" : "enabled"
|
||||
# }
|
||||
}
|
||||
}
|
||||
|
||||
module "tls_secret" {
|
||||
source = "../../../../modules/kubernetes/setup_tls_secret"
|
||||
namespace = kubernetes_namespace.technitium.metadata[0].name
|
||||
tls_secret_name = var.tls_secret_name
|
||||
}
|
||||
|
||||
# CoreDNS Corefile - manages cluster DNS resolution
|
||||
# The viktorbarzin.lan block forwards to Technitium via LoadBalancer.
|
||||
# A template regex in the viktorbarzin.lan block short-circuits junk queries
|
||||
# caused by ndots:5 search domain expansion (e.g. www.cloudflare.com.viktorbarzin.lan,
|
||||
# redis.redis.svc.cluster.local.viktorbarzin.lan) by returning NXDOMAIN for any
|
||||
# query with 2+ labels before .viktorbarzin.lan. Legitimate single-label queries
|
||||
# (e.g. idrac.viktorbarzin.lan) fall through to Technitium.
|
||||
resource "kubernetes_config_map" "coredns" {
|
||||
metadata {
|
||||
name = "coredns"
|
||||
namespace = "kube-system"
|
||||
}
|
||||
|
||||
data = {
|
||||
Corefile = <<-EOF
|
||||
.:53 {
|
||||
#log
|
||||
errors
|
||||
health {
|
||||
lameduck 5s
|
||||
}
|
||||
ready
|
||||
kubernetes cluster.local in-addr.arpa ip6.arpa {
|
||||
pods insecure
|
||||
fallthrough in-addr.arpa ip6.arpa
|
||||
ttl 30
|
||||
}
|
||||
prometheus :9153
|
||||
#forward . 1.1.1.1
|
||||
forward . 10.0.20.1
|
||||
#forward . /etc/resolv.conf
|
||||
cache {
|
||||
success 10000 300 6
|
||||
denial 10000 300 60
|
||||
}
|
||||
loop
|
||||
reload
|
||||
loadbalance
|
||||
}
|
||||
viktorbarzin.lan:53 {
|
||||
#log
|
||||
errors
|
||||
template ANY ANY viktorbarzin.lan {
|
||||
match ".*\..*\.viktorbarzin\.lan\.$"
|
||||
rcode NXDOMAIN
|
||||
fallthrough
|
||||
}
|
||||
forward . 10.0.20.204 # Technitium LoadBalancer
|
||||
cache {
|
||||
success 10000 300 6
|
||||
denial 10000 300 60
|
||||
}
|
||||
}
|
||||
EOF
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_deployment" "technitium" {
|
||||
# resource "kubernetes_daemonset" "technitium" {
|
||||
metadata {
|
||||
name = "technitium"
|
||||
namespace = kubernetes_namespace.technitium.metadata[0].name
|
||||
labels = {
|
||||
app = "technitium"
|
||||
tier = var.tier
|
||||
}
|
||||
}
|
||||
spec {
|
||||
strategy {
|
||||
type = "Recreate"
|
||||
}
|
||||
# replicas = 1
|
||||
selector {
|
||||
match_labels = {
|
||||
app = "technitium"
|
||||
}
|
||||
}
|
||||
template {
|
||||
metadata {
|
||||
annotations = {
|
||||
"diun.enable" = "false"
|
||||
# "diun.include_tags" = "^\\d+(?:\\.\\d+)?(?:\\.\\d+)?$"
|
||||
"diun.include_tags" = "latest"
|
||||
}
|
||||
labels = {
|
||||
app = "technitium"
|
||||
}
|
||||
}
|
||||
spec {
|
||||
# Prefer nodes running Traefik for network locality
|
||||
affinity {
|
||||
pod_affinity {
|
||||
preferred_during_scheduling_ignored_during_execution {
|
||||
weight = 100
|
||||
pod_affinity_term {
|
||||
label_selector {
|
||||
match_expressions {
|
||||
key = "app.kubernetes.io/name"
|
||||
operator = "In"
|
||||
values = ["traefik"]
|
||||
}
|
||||
}
|
||||
topology_key = "kubernetes.io/hostname"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
container {
|
||||
image = "technitium/dns-server:latest"
|
||||
name = "technitium"
|
||||
resources {
|
||||
# limits = {
|
||||
# cpu = "1"
|
||||
# memory = "1Gi"
|
||||
# }
|
||||
# requests = {
|
||||
# cpu = "1"
|
||||
# memory = "1Gi"
|
||||
# }
|
||||
}
|
||||
port {
|
||||
container_port = 5380
|
||||
}
|
||||
port {
|
||||
container_port = 53
|
||||
}
|
||||
port {
|
||||
container_port = 80
|
||||
}
|
||||
volume_mount {
|
||||
mount_path = "/etc/dns"
|
||||
name = "nfs-config"
|
||||
}
|
||||
volume_mount {
|
||||
mount_path = "/etc/tls/"
|
||||
name = "tls-cert"
|
||||
}
|
||||
}
|
||||
volume {
|
||||
name = "nfs-config"
|
||||
nfs {
|
||||
path = "/mnt/main/technitium"
|
||||
server = "10.0.10.15"
|
||||
}
|
||||
}
|
||||
volume {
|
||||
name = "tls-cert"
|
||||
secret {
|
||||
secret_name = var.tls_secret_name
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
resource "kubernetes_service" "technitium-web" {
|
||||
metadata {
|
||||
name = "technitium-web"
|
||||
namespace = kubernetes_namespace.technitium.metadata[0].name
|
||||
labels = {
|
||||
"app" = "technitium"
|
||||
}
|
||||
# annotations = {
|
||||
# "metallb.universe.tf/allow-shared-ip" : "shared"
|
||||
# }
|
||||
}
|
||||
|
||||
spec {
|
||||
# type = "LoadBalancer"
|
||||
# external_traffic_policy = "Cluster"
|
||||
selector = {
|
||||
app = "technitium"
|
||||
}
|
||||
port {
|
||||
name = "technitium-dns"
|
||||
port = "5380"
|
||||
protocol = "TCP"
|
||||
}
|
||||
port {
|
||||
name = "technitium-doh"
|
||||
port = "80"
|
||||
protocol = "TCP"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_service" "technitium-dns" {
|
||||
metadata {
|
||||
name = "technitium-dns"
|
||||
namespace = kubernetes_namespace.technitium.metadata[0].name
|
||||
labels = {
|
||||
"app" = "technitium"
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
type = "LoadBalancer"
|
||||
port {
|
||||
name = "technitium-dns"
|
||||
port = 53
|
||||
protocol = "UDP"
|
||||
}
|
||||
external_traffic_policy = "Local"
|
||||
selector = {
|
||||
app = "technitium"
|
||||
}
|
||||
}
|
||||
}
|
||||
module "ingress" {
|
||||
source = "../../../../modules/kubernetes/ingress_factory"
|
||||
namespace = kubernetes_namespace.technitium.metadata[0].name
|
||||
name = "technitium"
|
||||
tls_secret_name = var.tls_secret_name
|
||||
port = 5380
|
||||
service_name = "technitium-web"
|
||||
extra_annotations = {
|
||||
"gethomepage.dev/enabled" = "true"
|
||||
"gethomepage.dev/description" = "Internal DNS Server and Recursive Resolver"
|
||||
# gethomepage.dev/group: Media
|
||||
"gethomepage.dev/icon" : "technitium.png"
|
||||
"gethomepage.dev/name" = "Technitium"
|
||||
"gethomepage.dev/widget.type" = "technitium"
|
||||
"gethomepage.dev/widget.url" = "http://technitium-web.technitium.svc.cluster.local:5380"
|
||||
"gethomepage.dev/widget.key" = var.homepage_token
|
||||
|
||||
"gethomepage.dev/widget.range" = "LastWeek"
|
||||
"gethomepage.dev/widget.fields" = "[\"totalQueries\", \"totalCached\", \"totalBlocked\", \"totalRecursive\"]"
|
||||
"gethomepage.dev/pod-selector" = ""
|
||||
}
|
||||
}
|
||||
|
||||
module "ingress-doh" {
|
||||
source = "../../../../modules/kubernetes/ingress_factory"
|
||||
namespace = kubernetes_namespace.technitium.metadata[0].name
|
||||
name = "technitium-doh"
|
||||
tls_secret_name = var.tls_secret_name
|
||||
host = "dns"
|
||||
service_name = "technitium-web"
|
||||
}
|
||||
|
||||
# Grafana datasource for Technitium DNS query logs in MySQL
|
||||
resource "kubernetes_config_map" "grafana_technitium_datasource" {
|
||||
metadata {
|
||||
name = "grafana-technitium-datasource"
|
||||
namespace = "monitoring"
|
||||
labels = {
|
||||
grafana_datasource = "1"
|
||||
}
|
||||
}
|
||||
data = {
|
||||
"technitium-datasource.yaml" = yamlencode({
|
||||
apiVersion = 1
|
||||
datasources = [{
|
||||
name = "Technitium MySQL"
|
||||
type = "mysql"
|
||||
access = "proxy"
|
||||
url = "mysql.dbaas.svc.cluster.local:3306"
|
||||
database = "technitium"
|
||||
user = "technitium"
|
||||
uid = "technitium-mysql"
|
||||
secureJsonData = {
|
||||
password = var.technitium_db_password
|
||||
}
|
||||
}]
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
# Grafana dashboard for Technitium DNS query logs
|
||||
resource "kubernetes_config_map" "grafana_technitium_dashboard" {
|
||||
metadata {
|
||||
name = "grafana-technitium-dashboard"
|
||||
namespace = "monitoring"
|
||||
labels = {
|
||||
grafana_dashboard = "1"
|
||||
}
|
||||
}
|
||||
data = {
|
||||
"technitium-dns.json" = file("${path.module}/../monitoring/dashboards/technitium-dns.json")
|
||||
}
|
||||
}
|
||||
|
||||
244
stacks/platform/modules/traefik/main.tf
Normal file
244
stacks/platform/modules/traefik/main.tf
Normal file
|
|
@ -0,0 +1,244 @@
|
|||
variable "tier" { type = string }
|
||||
variable "crowdsec_api_key" { type = string }
|
||||
variable "tls_secret_name" {}
|
||||
|
||||
resource "kubernetes_namespace" "traefik" {
|
||||
metadata {
|
||||
name = "traefik"
|
||||
labels = {
|
||||
"app.kubernetes.io/name" = "traefik"
|
||||
"app.kubernetes.io/instance" = "traefik"
|
||||
tier = var.tier
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "helm_release" "traefik" {
|
||||
namespace = kubernetes_namespace.traefik.metadata[0].name
|
||||
create_namespace = false
|
||||
name = "traefik"
|
||||
repository = "https://traefik.github.io/charts"
|
||||
chart = "traefik"
|
||||
atomic = true
|
||||
timeout = 600
|
||||
|
||||
values = [yamlencode({
|
||||
deployment = {
|
||||
replicas = 3
|
||||
podAnnotations = {
|
||||
"diun.enable" = "true"
|
||||
"diun.include_tags" = "^v\\d+(?:\\.\\d+)?(?:\\.\\d+)?.*$"
|
||||
}
|
||||
initContainers = [{
|
||||
name = "download-plugins"
|
||||
image = "alpine:3"
|
||||
command = ["sh", "-c", join("", [
|
||||
"set -e; ",
|
||||
"STORAGE=/plugins-storage; ",
|
||||
"mkdir -p \"$STORAGE/archives/github.com/maxlerebourg/crowdsec-bouncer-traefik-plugin\"; ",
|
||||
"mkdir -p \"$STORAGE/archives/github.com/packruler/rewrite-body\"; ",
|
||||
"wget -q -T 30 -O \"$STORAGE/archives/github.com/maxlerebourg/crowdsec-bouncer-traefik-plugin/v1.4.2.zip\" ",
|
||||
"\"https://github.com/maxlerebourg/crowdsec-bouncer-traefik-plugin/archive/refs/tags/v1.4.2.zip\"; ",
|
||||
"wget -q -T 30 -O \"$STORAGE/archives/github.com/packruler/rewrite-body/v1.2.0.zip\" ",
|
||||
"\"https://github.com/packruler/rewrite-body/archive/refs/tags/v1.2.0.zip\"; ",
|
||||
"printf '{\"github.com/maxlerebourg/crowdsec-bouncer-traefik-plugin\":\"v1.4.2\",\"github.com/packruler/rewrite-body\":\"v1.2.0\"}' ",
|
||||
"> \"$STORAGE/archives/state.json\"; ",
|
||||
"echo \"Plugins pre-downloaded successfully\"",
|
||||
])]
|
||||
volumeMounts = [{
|
||||
name = "plugins"
|
||||
mountPath = "/plugins-storage"
|
||||
}]
|
||||
}]
|
||||
}
|
||||
|
||||
updateStrategy = {
|
||||
type = "RollingUpdate"
|
||||
rollingUpdate = {
|
||||
maxUnavailable = 1
|
||||
maxSurge = 2
|
||||
}
|
||||
}
|
||||
|
||||
ingressClass = {
|
||||
enabled = true
|
||||
isDefaultClass = true
|
||||
}
|
||||
|
||||
providers = {
|
||||
kubernetesIngress = {
|
||||
enabled = true
|
||||
allowExternalNameServices = true
|
||||
publishedService = { enabled = true }
|
||||
}
|
||||
kubernetesCRD = {
|
||||
enabled = true
|
||||
allowExternalNameServices = true
|
||||
allowCrossNamespace = true
|
||||
}
|
||||
}
|
||||
|
||||
# Enable dashboard API (accessible on port 8080 internally)
|
||||
api = {
|
||||
insecure = true
|
||||
}
|
||||
|
||||
# Entrypoints
|
||||
ports = {
|
||||
web = {
|
||||
port = 8000
|
||||
exposedPort = 80
|
||||
protocol = "TCP"
|
||||
http = {
|
||||
redirections = {
|
||||
entryPoint = {
|
||||
to = "websecure"
|
||||
scheme = "https"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
websecure = {
|
||||
port = 8443
|
||||
exposedPort = 443
|
||||
protocol = "TCP"
|
||||
http = {
|
||||
tls = {
|
||||
enabled = true
|
||||
}
|
||||
}
|
||||
http3 = {
|
||||
enabled = true
|
||||
advertisedPort = 443
|
||||
}
|
||||
}
|
||||
whisper-tcp = {
|
||||
port = 10300
|
||||
exposedPort = 10300
|
||||
protocol = "TCP"
|
||||
expose = { default = true }
|
||||
}
|
||||
piper-tcp = {
|
||||
port = 10200
|
||||
exposedPort = 10200
|
||||
protocol = "TCP"
|
||||
expose = { default = true }
|
||||
}
|
||||
ollama-tcp = {
|
||||
port = 11434
|
||||
exposedPort = 11434
|
||||
protocol = "TCP"
|
||||
expose = { default = true }
|
||||
}
|
||||
}
|
||||
|
||||
service = {
|
||||
type = "LoadBalancer"
|
||||
annotations = {
|
||||
"metallb.universe.tf/loadBalancerIPs" = "10.0.20.202"
|
||||
}
|
||||
spec = {
|
||||
externalTrafficPolicy = "Local"
|
||||
}
|
||||
}
|
||||
|
||||
# Plugins
|
||||
experimental = {
|
||||
plugins = {
|
||||
crowdsec-bouncer = {
|
||||
moduleName = "github.com/maxlerebourg/crowdsec-bouncer-traefik-plugin"
|
||||
version = "v1.4.2"
|
||||
}
|
||||
rewrite-body = {
|
||||
moduleName = "github.com/packruler/rewrite-body"
|
||||
version = "v1.2.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Prometheus metrics
|
||||
metrics = {
|
||||
prometheus = {
|
||||
entryPoint = "metrics"
|
||||
addEntryPointsLabels = true
|
||||
addServicesLabels = true
|
||||
addRoutersLabels = true
|
||||
}
|
||||
}
|
||||
|
||||
# Access logs
|
||||
logs = {
|
||||
access = {
|
||||
enabled = true
|
||||
}
|
||||
}
|
||||
|
||||
additionalArguments = [
|
||||
"--api.insecure=true",
|
||||
"--global.checknewversion=false",
|
||||
"--global.sendanonymoususage=false",
|
||||
# Skip TLS verification for self-signed backend certs (proxmox, idrac, etc.)
|
||||
"--serversTransport.insecureSkipVerify=true",
|
||||
# Increase timeouts for services like Immich
|
||||
"--serversTransport.forwardingTimeouts.dialTimeout=60s",
|
||||
"--serversTransport.forwardingTimeouts.responseHeaderTimeout=0s",
|
||||
"--serversTransport.forwardingTimeouts.idleConnTimeout=90s",
|
||||
# Use forwarded headers from trusted proxies
|
||||
"--entryPoints.websecure.forwardedHeaders.insecure=true",
|
||||
"--entryPoints.web.forwardedHeaders.insecure=true",
|
||||
]
|
||||
|
||||
resources = {
|
||||
requests = {
|
||||
cpu = "100m"
|
||||
memory = "128Mi"
|
||||
}
|
||||
}
|
||||
|
||||
nodeSelector = {
|
||||
"kubernetes.io/os" = "linux"
|
||||
}
|
||||
|
||||
tolerations = []
|
||||
})]
|
||||
}
|
||||
|
||||
# Dashboard resources
|
||||
module "tls_secret" {
|
||||
source = "../../../../modules/kubernetes/setup_tls_secret"
|
||||
namespace = kubernetes_namespace.traefik.metadata[0].name
|
||||
tls_secret_name = var.tls_secret_name
|
||||
}
|
||||
|
||||
resource "kubernetes_service" "traefik_dashboard" {
|
||||
metadata {
|
||||
name = "traefik-dashboard"
|
||||
namespace = kubernetes_namespace.traefik.metadata[0].name
|
||||
labels = {
|
||||
"app" = "traefik-dashboard"
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
selector = {
|
||||
"app.kubernetes.io/name" = "traefik"
|
||||
}
|
||||
port {
|
||||
name = "http"
|
||||
port = 8080
|
||||
target_port = 8080
|
||||
protocol = "TCP"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module "ingress" {
|
||||
source = "../../../../modules/kubernetes/ingress_factory"
|
||||
namespace = kubernetes_namespace.traefik.metadata[0].name
|
||||
name = "traefik"
|
||||
service_name = "traefik-dashboard"
|
||||
host = "traefik"
|
||||
port = 8080
|
||||
tls_secret_name = var.tls_secret_name
|
||||
protected = true
|
||||
}
|
||||
243
stacks/platform/modules/traefik/middleware.tf
Normal file
243
stacks/platform/modules/traefik/middleware.tf
Normal file
|
|
@ -0,0 +1,243 @@
|
|||
# Shared Traefik Middleware CRDs
|
||||
# These are referenced by ingress resources via annotations like:
|
||||
# "traefik.ingress.kubernetes.io/router.middlewares" = "traefik-rate-limit@kubernetescrd"
|
||||
|
||||
# Rate limiting middleware
|
||||
resource "kubernetes_manifest" "middleware_rate_limit" {
|
||||
manifest = {
|
||||
apiVersion = "traefik.io/v1alpha1"
|
||||
kind = "Middleware"
|
||||
metadata = {
|
||||
name = "rate-limit"
|
||||
namespace = kubernetes_namespace.traefik.metadata[0].name
|
||||
}
|
||||
spec = {
|
||||
rateLimit = {
|
||||
average = 5
|
||||
burst = 250
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
depends_on = [helm_release.traefik]
|
||||
}
|
||||
|
||||
# Authentik forward auth middleware
|
||||
resource "kubernetes_manifest" "middleware_authentik_forward_auth" {
|
||||
manifest = {
|
||||
apiVersion = "traefik.io/v1alpha1"
|
||||
kind = "Middleware"
|
||||
metadata = {
|
||||
name = "authentik-forward-auth"
|
||||
namespace = kubernetes_namespace.traefik.metadata[0].name
|
||||
}
|
||||
spec = {
|
||||
forwardAuth = {
|
||||
address = "http://ak-outpost-authentik-embedded-outpost.authentik.svc.cluster.local:9000/outpost.goauthentik.io/auth/traefik"
|
||||
trustForwardHeader = true
|
||||
authResponseHeaders = [
|
||||
"X-authentik-username",
|
||||
"X-authentik-uid",
|
||||
"X-authentik-email",
|
||||
"X-authentik-name",
|
||||
"X-authentik-groups",
|
||||
"Set-Cookie",
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
depends_on = [helm_release.traefik]
|
||||
}
|
||||
|
||||
# IP allowlist for local-only access
|
||||
resource "kubernetes_manifest" "middleware_local_only" {
|
||||
manifest = {
|
||||
apiVersion = "traefik.io/v1alpha1"
|
||||
kind = "Middleware"
|
||||
metadata = {
|
||||
name = "local-only"
|
||||
namespace = kubernetes_namespace.traefik.metadata[0].name
|
||||
}
|
||||
spec = {
|
||||
ipAllowList = {
|
||||
sourceRange = [
|
||||
"192.168.1.0/24",
|
||||
"10.0.0.0/8",
|
||||
"fc00::/7",
|
||||
"fe80::/10",
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
depends_on = [helm_release.traefik]
|
||||
}
|
||||
|
||||
# HTTPS redirect middleware
|
||||
resource "kubernetes_manifest" "middleware_redirect_https" {
|
||||
manifest = {
|
||||
apiVersion = "traefik.io/v1alpha1"
|
||||
kind = "Middleware"
|
||||
metadata = {
|
||||
name = "redirect-https"
|
||||
namespace = kubernetes_namespace.traefik.metadata[0].name
|
||||
}
|
||||
spec = {
|
||||
redirectScheme = {
|
||||
scheme = "https"
|
||||
permanent = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
depends_on = [helm_release.traefik]
|
||||
}
|
||||
|
||||
# CSP headers middleware (default)
|
||||
resource "kubernetes_manifest" "middleware_csp_headers" {
|
||||
manifest = {
|
||||
apiVersion = "traefik.io/v1alpha1"
|
||||
kind = "Middleware"
|
||||
metadata = {
|
||||
name = "csp-headers"
|
||||
namespace = kubernetes_namespace.traefik.metadata[0].name
|
||||
}
|
||||
spec = {
|
||||
headers = {
|
||||
contentSecurityPolicy = "frame-ancestors 'self' *.viktorbarzin.me viktorbarzin.me"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
depends_on = [helm_release.traefik]
|
||||
}
|
||||
|
||||
# CrowdSec bouncer plugin middleware
|
||||
resource "kubernetes_manifest" "middleware_crowdsec" {
|
||||
manifest = {
|
||||
apiVersion = "traefik.io/v1alpha1"
|
||||
kind = "Middleware"
|
||||
metadata = {
|
||||
name = "crowdsec"
|
||||
namespace = kubernetes_namespace.traefik.metadata[0].name
|
||||
}
|
||||
spec = {
|
||||
plugin = {
|
||||
crowdsec-bouncer = {
|
||||
crowdsecLapiKey = var.crowdsec_api_key
|
||||
crowdsecLapiHost = "crowdsec-service.crowdsec.svc.cluster.local:8080"
|
||||
crowdsecMode = "stream"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
depends_on = [helm_release.traefik]
|
||||
}
|
||||
|
||||
# TLS option for mTLS (client certificate auth)
|
||||
resource "kubernetes_manifest" "tls_option_mtls" {
|
||||
manifest = {
|
||||
apiVersion = "traefik.io/v1alpha1"
|
||||
kind = "TLSOption"
|
||||
metadata = {
|
||||
name = "mtls"
|
||||
namespace = kubernetes_namespace.traefik.metadata[0].name
|
||||
}
|
||||
spec = {
|
||||
clientAuth = {
|
||||
secretNames = ["ca-secret"]
|
||||
clientAuthType = "RequireAndVerifyClientCert"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
depends_on = [helm_release.traefik]
|
||||
}
|
||||
|
||||
# ServersTransport for backends with self-signed certificates
|
||||
resource "kubernetes_manifest" "servers_transport_insecure" {
|
||||
manifest = {
|
||||
apiVersion = "traefik.io/v1alpha1"
|
||||
kind = "ServersTransport"
|
||||
metadata = {
|
||||
name = "insecure-skip-verify"
|
||||
namespace = kubernetes_namespace.traefik.metadata[0].name
|
||||
}
|
||||
spec = {
|
||||
insecureSkipVerify = true
|
||||
}
|
||||
}
|
||||
|
||||
depends_on = [helm_release.traefik]
|
||||
}
|
||||
|
||||
# Strip Authentik auth headers/cookies before forwarding to backend
|
||||
# Useful for backends (iDRAC, TP-Link) that break when receiving extra headers
|
||||
resource "kubernetes_manifest" "middleware_strip_auth_headers" {
|
||||
manifest = {
|
||||
apiVersion = "traefik.io/v1alpha1"
|
||||
kind = "Middleware"
|
||||
metadata = {
|
||||
name = "strip-auth-headers"
|
||||
namespace = kubernetes_namespace.traefik.metadata[0].name
|
||||
}
|
||||
spec = {
|
||||
headers = {
|
||||
customRequestHeaders = {
|
||||
"X-authentik-username" = ""
|
||||
"X-authentik-uid" = ""
|
||||
"X-authentik-email" = ""
|
||||
"X-authentik-name" = ""
|
||||
"X-authentik-groups" = ""
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
depends_on = [helm_release.traefik]
|
||||
}
|
||||
|
||||
# Immich-specific rate limit (higher limits for photo uploads)
|
||||
resource "kubernetes_manifest" "middleware_immich_rate_limit" {
|
||||
manifest = {
|
||||
apiVersion = "traefik.io/v1alpha1"
|
||||
kind = "Middleware"
|
||||
metadata = {
|
||||
name = "immich-rate-limit"
|
||||
namespace = kubernetes_namespace.traefik.metadata[0].name
|
||||
}
|
||||
spec = {
|
||||
rateLimit = {
|
||||
average = 100
|
||||
burst = 1000
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
depends_on = [helm_release.traefik]
|
||||
}
|
||||
|
||||
# Strip Accept-Encoding header so backends send uncompressed responses.
|
||||
# Used alongside rewrite-body plugin (rybbit analytics) which fails to
|
||||
# decompress certain gzip responses (flate: corrupt input before offset 5).
|
||||
resource "kubernetes_manifest" "middleware_strip_accept_encoding" {
|
||||
manifest = {
|
||||
apiVersion = "traefik.io/v1alpha1"
|
||||
kind = "Middleware"
|
||||
metadata = {
|
||||
name = "strip-accept-encoding"
|
||||
namespace = kubernetes_namespace.traefik.metadata[0].name
|
||||
}
|
||||
spec = {
|
||||
headers = {
|
||||
customRequestHeaders = {
|
||||
"Accept-Encoding" = ""
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
depends_on = [helm_release.traefik]
|
||||
}
|
||||
172
stacks/platform/modules/uptime-kuma/main.tf
Normal file
172
stacks/platform/modules/uptime-kuma/main.tf
Normal file
|
|
@ -0,0 +1,172 @@
|
|||
variable "tls_secret_name" {}
|
||||
variable "tier" { type = string }
|
||||
|
||||
resource "kubernetes_namespace" "uptime-kuma" {
|
||||
metadata {
|
||||
name = "uptime-kuma"
|
||||
labels = {
|
||||
tier = var.tier
|
||||
}
|
||||
# labels = {
|
||||
# "istio-injection" : "enabled"
|
||||
# }
|
||||
}
|
||||
}
|
||||
|
||||
module "tls_secret" {
|
||||
source = "../../../../modules/kubernetes/setup_tls_secret"
|
||||
namespace = kubernetes_namespace.uptime-kuma.metadata[0].name
|
||||
tls_secret_name = var.tls_secret_name
|
||||
}
|
||||
|
||||
resource "kubernetes_deployment" "uptime-kuma" {
|
||||
metadata {
|
||||
name = "uptime-kuma"
|
||||
namespace = kubernetes_namespace.uptime-kuma.metadata[0].name
|
||||
labels = {
|
||||
app = "uptime-kuma"
|
||||
tier = var.tier
|
||||
}
|
||||
annotations = {
|
||||
"reloader.stakater.com/search" = "true"
|
||||
}
|
||||
}
|
||||
spec {
|
||||
replicas = 1
|
||||
strategy {
|
||||
type = "Recreate"
|
||||
}
|
||||
selector {
|
||||
match_labels = {
|
||||
app = "uptime-kuma"
|
||||
}
|
||||
}
|
||||
template {
|
||||
metadata {
|
||||
annotations = {
|
||||
"diun.enable" = "true"
|
||||
"diun.include_tags" = "latest"
|
||||
}
|
||||
labels = {
|
||||
app = "uptime-kuma"
|
||||
}
|
||||
}
|
||||
spec {
|
||||
container {
|
||||
image = "louislam/uptime-kuma:2"
|
||||
name = "uptime-kuma"
|
||||
|
||||
port {
|
||||
container_port = 3001
|
||||
}
|
||||
volume_mount {
|
||||
name = "data"
|
||||
mount_path = "/app/data"
|
||||
}
|
||||
}
|
||||
volume {
|
||||
name = "data"
|
||||
nfs {
|
||||
server = "10.0.10.15"
|
||||
path = "/mnt/main/uptime-kuma"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
resource "kubernetes_service" "uptime-kuma" {
|
||||
metadata {
|
||||
name = "uptime-kuma"
|
||||
namespace = kubernetes_namespace.uptime-kuma.metadata[0].name
|
||||
labels = {
|
||||
"app" = "uptime-kuma"
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
selector = {
|
||||
app = "uptime-kuma"
|
||||
}
|
||||
port {
|
||||
port = "80"
|
||||
target_port = "3001"
|
||||
}
|
||||
}
|
||||
}
|
||||
module "ingress" {
|
||||
source = "../../../../modules/kubernetes/ingress_factory"
|
||||
namespace = kubernetes_namespace.uptime-kuma.metadata[0].name
|
||||
name = "uptime"
|
||||
tls_secret_name = var.tls_secret_name
|
||||
service_name = "uptime-kuma"
|
||||
extra_annotations = {
|
||||
"gethomepage.dev/enabled" = "true"
|
||||
"gethomepage.dev/description" = "Uptime monitor"
|
||||
# gethomepage.dev/group: Media
|
||||
"gethomepage.dev/icon" : "uptime-kuma.png"
|
||||
"gethomepage.dev/name" = "Uptime Kuma"
|
||||
"gethomepage.dev/widget.type" = "uptimekuma"
|
||||
"gethomepage.dev/widget.url" = "https://uptime.viktorbarzin.me"
|
||||
"gethomepage.dev/widget.slug" = "cluster-internal"
|
||||
"gethomepage.dev/pod-selector" = ""
|
||||
}
|
||||
rybbit_site_id = "8fef77b1f7fe"
|
||||
}
|
||||
|
||||
# CronJob for daily SQLite backups # no longer needed as we're using the mysql
|
||||
# resource "kubernetes_cron_job_v1" "sqlite-backup" {
|
||||
# metadata {
|
||||
# name = "backup"
|
||||
# namespace = kubernetes_namespace.uptime-kuma.metadata[0].name
|
||||
# }
|
||||
# spec {
|
||||
# concurrency_policy = "Replace"
|
||||
# failed_jobs_history_limit = 5
|
||||
# schedule = "0 0 * * *"
|
||||
# # schedule = "* * * * *"
|
||||
# starting_deadline_seconds = 10
|
||||
# successful_jobs_history_limit = 3
|
||||
# job_template {
|
||||
# metadata {}
|
||||
# spec {
|
||||
# active_deadline_seconds = 600 # should finish in 10 minutes
|
||||
# backoff_limit = 3
|
||||
# ttl_seconds_after_finished = 10
|
||||
# template {
|
||||
# metadata {}
|
||||
# spec {
|
||||
# container {
|
||||
# name = "backup"
|
||||
# image = "alpine/sqlite:latest"
|
||||
# command = ["/bin/sh", "-c", <<-EOT
|
||||
# set -e
|
||||
# export now=$(date +"%Y_%m_%d_%H_%M")
|
||||
# echo "Backing up SQLite database to /app/data/backup/backup_$now.sqlite"
|
||||
# sqlite3 /app/data/kuma.db ".backup /app/data/backup/backup_$now.sqlite"
|
||||
# echo "Backup completed. Deleting old backups..."
|
||||
|
||||
# # Rotate - delete last log file
|
||||
# cd /app/data/backup
|
||||
# find . -name "*.sqlite" -type f -mtime +7 -delete # 7 day retention of backups
|
||||
# echo "Old backups deleted."
|
||||
# EOT
|
||||
# ]
|
||||
# volume_mount {
|
||||
# name = "data"
|
||||
# mount_path = "/app/data"
|
||||
# }
|
||||
# }
|
||||
# volume {
|
||||
# name = "data"
|
||||
# nfs {
|
||||
# server = "10.0.10.15"
|
||||
# path = "/mnt/main/uptime-kuma"
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
134
stacks/platform/modules/vaultwarden/main.tf
Normal file
134
stacks/platform/modules/vaultwarden/main.tf
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
variable "tls_secret_name" {}
|
||||
variable "tier" { type = string }
|
||||
variable "smtp_password" {}
|
||||
|
||||
resource "kubernetes_namespace" "vaultwarden" {
|
||||
metadata {
|
||||
name = "vaultwarden"
|
||||
labels = {
|
||||
"istio-injection" : "disabled"
|
||||
tier = var.tier
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module "tls_secret" {
|
||||
source = "../../../../modules/kubernetes/setup_tls_secret"
|
||||
namespace = kubernetes_namespace.vaultwarden.metadata[0].name
|
||||
tls_secret_name = var.tls_secret_name
|
||||
}
|
||||
|
||||
resource "kubernetes_deployment" "vaultwarden" {
|
||||
metadata {
|
||||
name = "vaultwarden"
|
||||
namespace = kubernetes_namespace.vaultwarden.metadata[0].name
|
||||
labels = {
|
||||
app = "vaultwarden"
|
||||
tier = var.tier
|
||||
}
|
||||
annotations = {
|
||||
"reloader.stakater.com/search" = "true"
|
||||
}
|
||||
}
|
||||
spec {
|
||||
replicas = 1
|
||||
selector {
|
||||
match_labels = {
|
||||
app = "vaultwarden"
|
||||
}
|
||||
}
|
||||
template {
|
||||
metadata {
|
||||
annotations = {
|
||||
"diun.enable" = "true"
|
||||
"diun.include_tags" = "^\\d+(?:\\.\\d+)?(?:\\.\\d+)?$"
|
||||
}
|
||||
labels = {
|
||||
"app" = "vaultwarden"
|
||||
}
|
||||
}
|
||||
spec {
|
||||
container {
|
||||
image = "vaultwarden/server:1.35.2"
|
||||
name = "vaultwarden"
|
||||
env {
|
||||
name = "DOMAIN"
|
||||
value = "https://vaultwarden.viktorbarzin.me"
|
||||
}
|
||||
# env {
|
||||
# name = "ADMIN_TOKEN"
|
||||
# value = ""
|
||||
# }
|
||||
env {
|
||||
name = "SMTP_HOST"
|
||||
value = "mail.viktorbarzin.me"
|
||||
}
|
||||
env {
|
||||
name = "SMTP_FROM"
|
||||
value = "vaultwarden@viktorbarzin.me"
|
||||
}
|
||||
env {
|
||||
name = "SMTP_PORT"
|
||||
value = "587"
|
||||
}
|
||||
env {
|
||||
name = "SMTP_SECURITY"
|
||||
value = "starttls"
|
||||
}
|
||||
env {
|
||||
name = "SMTP_USERNAME"
|
||||
value = "vaultwarden@viktorbarzin.me"
|
||||
}
|
||||
env {
|
||||
name = "SMTP_PASSWORD"
|
||||
value = var.smtp_password
|
||||
}
|
||||
|
||||
port {
|
||||
container_port = 80
|
||||
}
|
||||
volume_mount {
|
||||
name = "data"
|
||||
mount_path = "/data"
|
||||
}
|
||||
}
|
||||
volume {
|
||||
name = "data"
|
||||
nfs {
|
||||
path = "/mnt/main/vaultwarden"
|
||||
server = "10.0.10.15"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_service" "vaultwarden" {
|
||||
metadata {
|
||||
name = "vaultwarden"
|
||||
namespace = kubernetes_namespace.vaultwarden.metadata[0].name
|
||||
labels = {
|
||||
"app" = "vaultwarden"
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
selector = {
|
||||
app = "vaultwarden"
|
||||
}
|
||||
port {
|
||||
name = "http"
|
||||
port = "80"
|
||||
protocol = "TCP"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module "ingress" {
|
||||
source = "../../../../modules/kubernetes/ingress_factory"
|
||||
namespace = kubernetes_namespace.vaultwarden.metadata[0].name
|
||||
name = "vaultwarden"
|
||||
tls_secret_name = var.tls_secret_name
|
||||
rybbit_site_id = "b8fc85e18683"
|
||||
}
|
||||
4
stacks/platform/modules/wireguard/extra/clients.conf
Normal file
4
stacks/platform/modules/wireguard/extra/clients.conf
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
[Peer]
|
||||
# friendly_name = anca
|
||||
PublicKey = fr4DB6FHhxYyzrtnoNbhdT8Fqwvsz7QkhTnZpSQmBCY=
|
||||
AllowedIPs = 10.3.3.13/32
|
||||
1
stacks/platform/modules/wireguard/extra/last_ip.txt
Normal file
1
stacks/platform/modules/wireguard/extra/last_ip.txt
Normal file
|
|
@ -0,0 +1 @@
|
|||
# DO NOT MANUALLY EDIT THIS LINE. Last IP: 10.3.3.15/24
|
||||
227
stacks/platform/modules/wireguard/main.tf
Normal file
227
stacks/platform/modules/wireguard/main.tf
Normal file
|
|
@ -0,0 +1,227 @@
|
|||
variable "tls_secret_name" {}
|
||||
variable "tier" { type = string }
|
||||
variable "wg_0_conf" {}
|
||||
variable "firewall_sh" {}
|
||||
variable "wg_0_key" {}
|
||||
|
||||
module "tls_secret" {
|
||||
source = "../../../../modules/kubernetes/setup_tls_secret"
|
||||
namespace = kubernetes_namespace.wireguard.metadata[0].name
|
||||
tls_secret_name = var.tls_secret_name
|
||||
}
|
||||
|
||||
resource "kubernetes_namespace" "wireguard" {
|
||||
metadata {
|
||||
name = "wireguard"
|
||||
labels = {
|
||||
tier = var.tier
|
||||
}
|
||||
}
|
||||
}
|
||||
resource "kubernetes_config_map" "wg_0_conf" {
|
||||
metadata {
|
||||
name = "wg0-conf"
|
||||
namespace = kubernetes_namespace.wireguard.metadata[0].name
|
||||
|
||||
labels = {
|
||||
app = "wireguard"
|
||||
}
|
||||
annotations = {
|
||||
"reloader.stakater.com/match" = "true"
|
||||
}
|
||||
}
|
||||
|
||||
data = {
|
||||
"setup-firewall.sh" = var.firewall_sh
|
||||
"wg0.conf" = format("%s%s", var.wg_0_conf, file("${path.module}/extra/clients.conf"))
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_secret" "wg_0_key" {
|
||||
metadata {
|
||||
name = "wg0-key"
|
||||
namespace = kubernetes_namespace.wireguard.metadata[0].name
|
||||
|
||||
annotations = {
|
||||
"reloader.stakater.com/match" = "true"
|
||||
}
|
||||
}
|
||||
data = {
|
||||
"wg0.key" = var.wg_0_key
|
||||
# If thep rivate key changes the pub key must be updated manually
|
||||
"wg-ui-config" = format("{\"PrivateKey\": \"%s\",\"PublicKey\": \"%s\",\"Users\": {}}", var.wg_0_key, "3OeDa6Z3Z6vPVxn/WKJujYL7DoDYPPpI5W+2glUYLHU=")
|
||||
}
|
||||
type = "generic"
|
||||
}
|
||||
|
||||
|
||||
resource "kubernetes_deployment" "wireguard" {
|
||||
metadata {
|
||||
name = "wireguard"
|
||||
namespace = kubernetes_namespace.wireguard.metadata[0].name
|
||||
labels = {
|
||||
app = "wireguard"
|
||||
tier = var.tier
|
||||
}
|
||||
annotations = {
|
||||
"reloader.stakater.com/search" = "true"
|
||||
}
|
||||
}
|
||||
spec {
|
||||
replicas = 1
|
||||
strategy {
|
||||
rolling_update {
|
||||
max_surge = "2"
|
||||
max_unavailable = "0"
|
||||
}
|
||||
}
|
||||
selector {
|
||||
match_labels = {
|
||||
app = "wireguard"
|
||||
}
|
||||
}
|
||||
template {
|
||||
metadata {
|
||||
labels = {
|
||||
app = "wireguard"
|
||||
}
|
||||
annotations = {
|
||||
"prometheus.io/scrape" = "true"
|
||||
"prometheus.io/port" = "9586"
|
||||
}
|
||||
}
|
||||
spec {
|
||||
init_container {
|
||||
name = "sysctl-setup"
|
||||
image = "busybox"
|
||||
command = ["/bin/sh", "-c", "echo 1 > /proc/sys/net/ipv4/ip_forward"]
|
||||
|
||||
security_context {
|
||||
privileged = true
|
||||
}
|
||||
}
|
||||
container {
|
||||
image = "sclevine/wg:latest"
|
||||
name = "wireguard"
|
||||
image_pull_policy = "IfNotPresent"
|
||||
lifecycle {
|
||||
post_start {
|
||||
exec {
|
||||
command = ["wg-quick", "up", "wg0"]
|
||||
}
|
||||
}
|
||||
pre_stop {
|
||||
exec {
|
||||
command = ["wg-quick", "down", "wg0"]
|
||||
}
|
||||
}
|
||||
}
|
||||
command = ["tail", "-f", "/dev/null"]
|
||||
port {
|
||||
container_port = 51820
|
||||
protocol = "UDP"
|
||||
}
|
||||
volume_mount {
|
||||
name = "wg0-key"
|
||||
mount_path = "/etc/wireguard/wg0.key"
|
||||
sub_path = "wg0.key"
|
||||
}
|
||||
volume_mount {
|
||||
name = "wg0-conf"
|
||||
mount_path = "/etc/wireguard/wg0.conf"
|
||||
sub_path = "wg0.conf"
|
||||
}
|
||||
volume_mount {
|
||||
name = "wg0-conf"
|
||||
mount_path = "/etc/wireguard/setup-firewall.sh"
|
||||
sub_path = "setup-firewall.sh"
|
||||
}
|
||||
security_context {
|
||||
capabilities {
|
||||
add = ["NET_ADMIN", "SYS_MODULE"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
container {
|
||||
name = "prometheus-exporter"
|
||||
image = "mindflavor/prometheus-wireguard-exporter"
|
||||
image_pull_policy = "IfNotPresent"
|
||||
command = ["prometheus_wireguard_exporter", "-a", "true", "-v", "true", "-n", "/etc/wireguard/wg0.conf"]
|
||||
volume_mount {
|
||||
name = "wg0-conf"
|
||||
mount_path = "/etc/wireguard/wg0.conf"
|
||||
sub_path = "wg0.conf"
|
||||
}
|
||||
security_context {
|
||||
capabilities {
|
||||
add = ["NET_ADMIN"]
|
||||
}
|
||||
}
|
||||
port {
|
||||
container_port = 9586
|
||||
protocol = "TCP"
|
||||
}
|
||||
}
|
||||
volume {
|
||||
name = "wg0-key"
|
||||
secret {
|
||||
secret_name = "wg0-key"
|
||||
}
|
||||
}
|
||||
volume {
|
||||
name = "wg0-conf"
|
||||
config_map {
|
||||
name = "wg0-conf"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_service" "wireguard" {
|
||||
metadata {
|
||||
name = "wireguard"
|
||||
namespace = kubernetes_namespace.wireguard.metadata[0].name
|
||||
annotations = {
|
||||
"metallb.universe.tf/allow-shared-ip" = "shared"
|
||||
}
|
||||
labels = {
|
||||
"app" = "wireguard"
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
type = "LoadBalancer"
|
||||
external_traffic_policy = "Cluster"
|
||||
selector = {
|
||||
app = "wireguard"
|
||||
}
|
||||
port {
|
||||
port = "51820"
|
||||
protocol = "UDP"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
resource "kubernetes_service" "wireguard_exporter" {
|
||||
metadata {
|
||||
name = "wireguard-exporter"
|
||||
namespace = kubernetes_namespace.wireguard.metadata[0].name
|
||||
labels = {
|
||||
"app" = "wireguard-exporter"
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
selector = {
|
||||
app = "wireguard"
|
||||
}
|
||||
port {
|
||||
port = "9102"
|
||||
target_port = "9586"
|
||||
}
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue