diff --git a/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl b/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl index 7c5cd7e6..365a98a9 100755 --- a/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl +++ b/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl @@ -636,6 +636,20 @@ serverFiles: severity: critical annotations: summary: "Vaultwarden has no available replicas — password manager down" + - alert: VaultwardenSQLiteCorrupt + expr: vaultwarden_sqlite_integrity_ok == 0 + for: 0m + labels: + severity: critical + annotations: + summary: "Vaultwarden SQLite database failed integrity check — data corruption detected" + - alert: VaultwardenIntegrityCheckStale + expr: (time() - vaultwarden_sqlite_integrity_check_timestamp) > 7200 + for: 15m + labels: + severity: warning + annotations: + summary: "Vaultwarden integrity check hasn't run in {{ $value | humanizeDuration }} (expected hourly)" - alert: RedisBackupStale expr: (time() - kube_cronjob_status_last_successful_time{cronjob="redis-backup", namespace="redis"}) > 691200 for: 30m diff --git a/stacks/platform/modules/vaultwarden/main.tf b/stacks/platform/modules/vaultwarden/main.tf index 7de11e85..820d6cf0 100644 --- a/stacks/platform/modules/vaultwarden/main.tf +++ b/stacks/platform/modules/vaultwarden/main.tf @@ -306,3 +306,87 @@ resource "kubernetes_cron_job_v1" "vaultwarden-backup" { } } } + +# ----------------------------------------------------------------------------- +# Integrity Check — Hourly SQLite PRAGMA check, pushes metric to Prometheus +# ----------------------------------------------------------------------------- + +resource "kubernetes_cron_job_v1" "vaultwarden-integrity-check" { + metadata { + name = "vaultwarden-integrity-check" + namespace = kubernetes_namespace.vaultwarden.metadata[0].name + } + spec { + concurrency_policy = "Replace" + failed_jobs_history_limit = 5 + schedule = "30 * * * *" + starting_deadline_seconds = 10 + successful_jobs_history_limit = 3 + job_template { + metadata {} + spec { + backoff_limit = 1 + ttl_seconds_after_finished = 10 + template { + metadata {} + spec { + affinity { + pod_affinity { + required_during_scheduling_ignored_during_execution { + label_selector { + match_labels = { + app = "vaultwarden" + } + } + topology_key = "kubernetes.io/hostname" + } + } + } + container { + name = "integrity-check" + image = "docker.io/library/alpine" + command = ["/bin/sh", "-c", <<-EOT + set -euo pipefail + apk add --no-cache sqlite curl >/dev/null 2>&1 + PUSHGW="http://prometheus-prometheus-pushgateway.monitoring.svc.cluster.local:9091" + result=$(sqlite3 /data/db.sqlite3 "PRAGMA integrity_check;" 2>&1) + if echo "$$result" | grep -q "^ok$$"; then + echo "SQLite integrity check passed" + cat </dev/null 2>&1 + PUSHGW="http://prometheus-prometheus-pushgateway.monitoring.svc.cluster.local:9091" + result=$(sqlite3 /data/db.sqlite3 "PRAGMA integrity_check;" 2>&1) + if echo "$$result" | grep -q "^ok$$"; then + echo "SQLite integrity check passed" + cat <