state(dbaas): update encrypted state
This commit is contained in:
parent
67d1ce453c
commit
21bb3036af
34 changed files with 381 additions and 186 deletions
44
stacks/platform/.terraform.lock.hcl
generated
44
stacks/platform/.terraform.lock.hcl
generated
|
|
@ -1,30 +1,6 @@
|
|||
# This file is maintained automatically by "terraform init".
|
||||
# Manual edits may be lost in future updates.
|
||||
|
||||
provider "registry.terraform.io/cloudflare/cloudflare" {
|
||||
version = "4.52.5"
|
||||
constraints = "~> 4.0"
|
||||
hashes = [
|
||||
"h1:+rfzF+16ZcWZWnTyW/p1HHTzYbPKX8Zt2nIFtR/+f+E=",
|
||||
"h1:18bXaaOSq8MWKuMxo/4y7EB7/i7G90y5QsKHZRmkoDo=",
|
||||
"zh:1a3400cb38863b2585968d1876706bcfc67a148e1318a1d325c6c7704adc999b",
|
||||
"zh:4c5062cb9e9da1676f06ae92b8370186d98976cc4c7030d3cd76df12af54282a",
|
||||
"zh:52110f493b5f0587ef77a1cfd1a67001fd4c617b14c6502d732ab47352bdc2f7",
|
||||
"zh:5aa536f9eaeb43823aaf2aa80e7d39b25ef2b383405ed034aa16a28b446a9238",
|
||||
"zh:5cc39459a1c6be8a918f17054e4fbba573825ed5597dcada588fe99614d98a5b",
|
||||
"zh:629ae6a7ba298815131da826474d199312d21cec53a4d5ded4fa56a692e6f072",
|
||||
"zh:719cc7c75dc1d3eb30c22ff5102a017996d9788b948078c7e1c5b3446aeca661",
|
||||
"zh:8698635a3ca04383c1e93b21d6963346bdae54d27177a48e4b1435b7f731731c",
|
||||
"zh:890df766e9b839623b1f0437355032a3c006226a6c200cd911e15ee1a9014e9f",
|
||||
"zh:8a9993f1dcadf1dd6ca43b23348abe374605d29945a2fafc07fb3457644e6a54",
|
||||
"zh:b1b9a1e6bcc24d5863a664a411d2dc906373ae7a2399d2d65548ce7377057852",
|
||||
"zh:b270184cdeec277218e84b94cb136fead753da717f9b9dc378e51907f3f00bb0",
|
||||
"zh:dff2bc10071210181726ce270f954995fe42c696e61e2e8f874021fed02521e5",
|
||||
"zh:e8e87b40b6a87dc097b0fdc20d3f725cec0d82abc9cc3755c1f89f8f6e8b0036",
|
||||
"zh:ee964a6573d399a5dd22ce328fb38ca1207797a02248f14b2e4913ee390e7803",
|
||||
]
|
||||
}
|
||||
|
||||
provider "registry.terraform.io/hashicorp/helm" {
|
||||
version = "3.1.1"
|
||||
hashes = [
|
||||
|
|
@ -65,26 +41,6 @@ provider "registry.terraform.io/hashicorp/kubernetes" {
|
|||
]
|
||||
}
|
||||
|
||||
provider "registry.terraform.io/hashicorp/null" {
|
||||
version = "3.2.4"
|
||||
hashes = [
|
||||
"h1:L5V05xwp/Gto1leRryuesxjMfgZwjb7oool4WS1UEFQ=",
|
||||
"h1:hkf5w5B6q8e2A42ND2CjAvgvSN3puAosDmOJb3zCVQM=",
|
||||
"zh:59f6b52ab4ff35739647f9509ee6d93d7c032985d9f8c6237d1f8a59471bbbe2",
|
||||
"zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3",
|
||||
"zh:795c897119ff082133150121d39ff26cb5f89a730a2c8c26f3a9c1abf81a9c43",
|
||||
"zh:7b9c7b16f118fbc2b05a983817b8ce2f86df125857966ad356353baf4bff5c0a",
|
||||
"zh:85e33ab43e0e1726e5f97a874b8e24820b6565ff8076523cc2922ba671492991",
|
||||
"zh:9d32ac3619cfc93eb3c4f423492a8e0f79db05fec58e449dee9b2d5873d5f69f",
|
||||
"zh:9e15c3c9dd8e0d1e3731841d44c34571b6c97f5b95e8296a45318b94e5287a6e",
|
||||
"zh:b4c2ab35d1b7696c30b64bf2c0f3a62329107bd1a9121ce70683dec58af19615",
|
||||
"zh:c43723e8cc65bcdf5e0c92581dcbbdcbdcf18b8d2037406a5f2033b1e22de442",
|
||||
"zh:ceb5495d9c31bfb299d246ab333f08c7fb0d67a4f82681fbf47f2a21c3e11ab5",
|
||||
"zh:e171026b3659305c558d9804062762d168f50ba02b88b231d20ec99578a6233f",
|
||||
"zh:ed0fe2acdb61330b01841fa790be00ec6beaac91d41f311fb8254f74eb6a711f",
|
||||
]
|
||||
}
|
||||
|
||||
provider "registry.terraform.io/hashicorp/vault" {
|
||||
version = "4.8.0"
|
||||
constraints = "~> 4.0"
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa
|
||||
terraform {
|
||||
backend "local" {
|
||||
path = "/woodpecker/src/github.com/ViktorBarzin/infra/state/stacks/platform/terraform.tfstate"
|
||||
path = "/Users/viktorbarzin/code/infra/state/stacks/platform/terraform.tfstate"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -49,6 +49,9 @@ agent:
|
|||
- name: whitelist
|
||||
configMap:
|
||||
name: crowdsec-whitelist
|
||||
podAnnotations:
|
||||
dependency.kyverno.io/wait-for: "mysql.dbaas:3306"
|
||||
|
||||
lapi:
|
||||
resources:
|
||||
requests:
|
||||
|
|
|
|||
|
|
@ -101,8 +101,8 @@ resource "kubernetes_cron_job_v1" "backup-etcd" {
|
|||
container {
|
||||
name = "backup-etcd"
|
||||
image = "registry.k8s.io/etcd:3.5.21-0"
|
||||
command = ["etcdctl"]
|
||||
args = ["--endpoints=https://127.0.0.1:2379", "--cacert=/etc/kubernetes/pki/etcd/ca.crt", "--cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt", "--key=/etc/kubernetes/pki/etcd/healthcheck-client.key", "snapshot", "save", "/backup/etcd-snapshot-latest.db"]
|
||||
command = ["/bin/sh", "-c"]
|
||||
args = ["ETCDCTL_API=3 etcdctl --endpoints=https://127.0.0.1:2379 --cacert=/etc/kubernetes/pki/etcd/ca.crt --cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt --key=/etc/kubernetes/pki/etcd/healthcheck-client.key snapshot save /backup/etcd-snapshot-$(date +%Y%m%d-%H%M%S).db"]
|
||||
env {
|
||||
name = "ETCDCTL_API"
|
||||
value = "3"
|
||||
|
|
|
|||
|
|
@ -160,6 +160,97 @@ resource "kubernetes_manifest" "policy_restrict_capabilities" {
|
|||
depends_on = [helm_release.kyverno]
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Image Pull Policy Governance
|
||||
# =============================================================================
|
||||
# Mutate imagePullPolicy to IfNotPresent for all containers with pinned tags
|
||||
# (non-:latest). This prevents pods from getting stuck in ImagePullBackOff
|
||||
# when the pull-through cache at 10.0.20.10 has transient failures.
|
||||
# For :latest or untagged images, set to Always so stale images don't persist.
|
||||
|
||||
resource "kubernetes_manifest" "policy_set_image_pull_policy" {
|
||||
manifest = {
|
||||
apiVersion = "kyverno.io/v1"
|
||||
kind = "ClusterPolicy"
|
||||
metadata = {
|
||||
name = "set-image-pull-policy"
|
||||
annotations = {
|
||||
"policies.kyverno.io/title" = "Set Image Pull Policy"
|
||||
"policies.kyverno.io/category" = "Best Practices"
|
||||
"policies.kyverno.io/severity" = "medium"
|
||||
"policies.kyverno.io/description" = "Set imagePullPolicy to IfNotPresent for pinned tags and Always for :latest to prevent ImagePullBackOff from transient cache failures."
|
||||
}
|
||||
}
|
||||
spec = {
|
||||
background = false
|
||||
rules = [
|
||||
{
|
||||
name = "set-ifnotpresent-for-pinned-tags"
|
||||
match = {
|
||||
any = [{
|
||||
resources = {
|
||||
kinds = ["Pod"]
|
||||
}
|
||||
}]
|
||||
}
|
||||
mutate = {
|
||||
foreach = [{
|
||||
list = "request.object.spec.containers"
|
||||
preconditions = {
|
||||
all = [{
|
||||
key = "{{ ends_with(element.image, ':latest') || !contains(element.image, ':') }}"
|
||||
operator = "Equals"
|
||||
value = false
|
||||
}]
|
||||
}
|
||||
patchStrategicMerge = {
|
||||
spec = {
|
||||
containers = [{
|
||||
name = "{{ element.name }}"
|
||||
imagePullPolicy = "IfNotPresent"
|
||||
}]
|
||||
}
|
||||
}
|
||||
}]
|
||||
}
|
||||
},
|
||||
{
|
||||
name = "set-always-for-latest"
|
||||
match = {
|
||||
any = [{
|
||||
resources = {
|
||||
kinds = ["Pod"]
|
||||
}
|
||||
}]
|
||||
}
|
||||
mutate = {
|
||||
foreach = [{
|
||||
list = "request.object.spec.containers"
|
||||
preconditions = {
|
||||
all = [{
|
||||
key = "{{ ends_with(element.image, ':latest') || !contains(element.image, ':') }}"
|
||||
operator = "Equals"
|
||||
value = true
|
||||
}]
|
||||
}
|
||||
patchStrategicMerge = {
|
||||
spec = {
|
||||
containers = [{
|
||||
name = "{{ element.name }}"
|
||||
imagePullPolicy = "Always"
|
||||
}]
|
||||
}
|
||||
}
|
||||
}]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
depends_on = [helm_release.kyverno]
|
||||
}
|
||||
|
||||
resource "kubernetes_manifest" "policy_require_trusted_registries" {
|
||||
manifest = {
|
||||
apiVersion = "kyverno.io/v1"
|
||||
|
|
|
|||
|
|
@ -500,6 +500,90 @@ serverFiles:
|
|||
severity: critical
|
||||
annotations:
|
||||
summary: "etcd backup CronJob has never completed successfully"
|
||||
- alert: PostgreSQLBackupStale
|
||||
expr: (time() - kube_cronjob_status_last_successful_time{cronjob="postgresql-backup", namespace="dbaas"}) > 129600
|
||||
for: 30m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "PostgreSQL backup is {{ $value | humanizeDuration }} old (threshold: 36h)"
|
||||
- alert: PostgreSQLBackupNeverSucceeded
|
||||
expr: kube_cronjob_status_last_successful_time{cronjob="postgresql-backup", namespace="dbaas"} == 0
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "PostgreSQL backup CronJob has never completed successfully"
|
||||
- alert: MySQLBackupStale
|
||||
expr: (time() - kube_cronjob_status_last_successful_time{cronjob="mysql-backup", namespace="dbaas"}) > 129600
|
||||
for: 30m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "MySQL backup is {{ $value | humanizeDuration }} old (threshold: 36h)"
|
||||
- alert: MySQLBackupNeverSucceeded
|
||||
expr: kube_cronjob_status_last_successful_time{cronjob="mysql-backup", namespace="dbaas"} == 0
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "MySQL backup CronJob has never completed successfully"
|
||||
- alert: VaultBackupStale
|
||||
expr: (time() - kube_cronjob_status_last_successful_time{cronjob="vault-raft-backup", namespace="vault"}) > 129600
|
||||
for: 30m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Vault backup is {{ $value | humanizeDuration }} old (threshold: 36h)"
|
||||
- alert: VaultBackupNeverSucceeded
|
||||
expr: kube_cronjob_status_last_successful_time{cronjob="vault-raft-backup", namespace="vault"} == 0
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Vault backup CronJob has never completed successfully"
|
||||
- alert: VaultwardenBackupStale
|
||||
expr: (time() - kube_cronjob_status_last_successful_time{cronjob="vaultwarden-backup", namespace="vaultwarden"}) > 129600
|
||||
for: 30m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Vaultwarden backup is {{ $value | humanizeDuration }} old (threshold: 36h)"
|
||||
- alert: VaultwardenBackupNeverSucceeded
|
||||
expr: kube_cronjob_status_last_successful_time{cronjob="vaultwarden-backup", namespace="vaultwarden"} == 0
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Vaultwarden backup CronJob has never completed successfully"
|
||||
- alert: RedisBackupStale
|
||||
expr: (time() - kube_cronjob_status_last_successful_time{cronjob="redis-backup", namespace="redis"}) > 14400
|
||||
for: 30m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Redis backup is {{ $value | humanizeDuration }} old (threshold: 4h)"
|
||||
- alert: RedisBackupNeverSucceeded
|
||||
expr: kube_cronjob_status_last_successful_time{cronjob="redis-backup", namespace="redis"} == 0
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Redis backup CronJob has never completed successfully"
|
||||
- alert: CSIDriverCrashLoop
|
||||
expr: kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff", namespace=~"nfs-csi|iscsi-csi"} > 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "CSI driver CrashLoopBackOff in {{ $labels.namespace }}/{{ $labels.pod }} — storage-layer failure risk"
|
||||
- alert: BackupCronJobFailed
|
||||
expr: kube_job_status_failed{job_name=~".*backup.*"} > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Backup job failed: {{ $labels.namespace }}/{{ $labels.job_name }}"
|
||||
- alert: NewTailscaleClient
|
||||
expr: irate(headscale_machine_registrations_total{action="reauth"}[5m]) > 0
|
||||
for: 5m
|
||||
|
|
|
|||
|
|
@ -283,12 +283,15 @@ resource "kubernetes_cron_job_v1" "redis-backup" {
|
|||
image = "redis:7-alpine"
|
||||
command = ["/bin/sh", "-c", <<-EOT
|
||||
set -eux
|
||||
TIMESTAMP=$(date +%Y%m%d-%H%M)
|
||||
# Trigger a fresh RDB save on the master
|
||||
redis-cli -h redis.redis BGSAVE
|
||||
sleep 5
|
||||
# Copy the RDB via redis-cli --rdb
|
||||
redis-cli -h redis.redis --rdb /backup/dump.rdb
|
||||
echo "Backup complete: $(ls -lh /backup/dump.rdb)"
|
||||
redis-cli -h redis.redis --rdb /backup/redis-$TIMESTAMP.rdb
|
||||
# Rotate — 7-day retention
|
||||
find /backup -name 'redis-*.rdb' -type f -mtime +7 -delete
|
||||
echo "Backup complete: redis-$TIMESTAMP.rdb"
|
||||
EOT
|
||||
]
|
||||
volume_mount {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue