add hourly SQLite integrity check for vaultwarden with Prometheus alerting
- New CronJob runs PRAGMA integrity_check every hour - Pushes vaultwarden_sqlite_integrity_ok metric to Prometheus pushgateway - VaultwardenSQLiteCorrupt alert fires immediately on corruption (critical) - VaultwardenIntegrityCheckStale alert if check hasn't run in 2h (warning) - Prevents running for days on a corrupted DB unnoticed
This commit is contained in:
parent
3b89a7d7e4
commit
311ff5dd9e
3 changed files with 182 additions and 0 deletions
|
|
@ -636,6 +636,20 @@ serverFiles:
|
|||
severity: critical
|
||||
annotations:
|
||||
summary: "Vaultwarden has no available replicas — password manager down"
|
||||
- alert: VaultwardenSQLiteCorrupt
|
||||
expr: vaultwarden_sqlite_integrity_ok == 0
|
||||
for: 0m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Vaultwarden SQLite database failed integrity check — data corruption detected"
|
||||
- alert: VaultwardenIntegrityCheckStale
|
||||
expr: (time() - vaultwarden_sqlite_integrity_check_timestamp) > 7200
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Vaultwarden integrity check hasn't run in {{ $value | humanizeDuration }} (expected hourly)"
|
||||
- alert: RedisBackupStale
|
||||
expr: (time() - kube_cronjob_status_last_successful_time{cronjob="redis-backup", namespace="redis"}) > 691200
|
||||
for: 30m
|
||||
|
|
|
|||
|
|
@ -306,3 +306,87 @@ resource "kubernetes_cron_job_v1" "vaultwarden-backup" {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Integrity Check — Hourly SQLite PRAGMA check, pushes metric to Prometheus
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
resource "kubernetes_cron_job_v1" "vaultwarden-integrity-check" {
|
||||
metadata {
|
||||
name = "vaultwarden-integrity-check"
|
||||
namespace = kubernetes_namespace.vaultwarden.metadata[0].name
|
||||
}
|
||||
spec {
|
||||
concurrency_policy = "Replace"
|
||||
failed_jobs_history_limit = 5
|
||||
schedule = "30 * * * *"
|
||||
starting_deadline_seconds = 10
|
||||
successful_jobs_history_limit = 3
|
||||
job_template {
|
||||
metadata {}
|
||||
spec {
|
||||
backoff_limit = 1
|
||||
ttl_seconds_after_finished = 10
|
||||
template {
|
||||
metadata {}
|
||||
spec {
|
||||
affinity {
|
||||
pod_affinity {
|
||||
required_during_scheduling_ignored_during_execution {
|
||||
label_selector {
|
||||
match_labels = {
|
||||
app = "vaultwarden"
|
||||
}
|
||||
}
|
||||
topology_key = "kubernetes.io/hostname"
|
||||
}
|
||||
}
|
||||
}
|
||||
container {
|
||||
name = "integrity-check"
|
||||
image = "docker.io/library/alpine"
|
||||
command = ["/bin/sh", "-c", <<-EOT
|
||||
set -euo pipefail
|
||||
apk add --no-cache sqlite curl >/dev/null 2>&1
|
||||
PUSHGW="http://prometheus-prometheus-pushgateway.monitoring.svc.cluster.local:9091"
|
||||
result=$(sqlite3 /data/db.sqlite3 "PRAGMA integrity_check;" 2>&1)
|
||||
if echo "$$result" | grep -q "^ok$$"; then
|
||||
echo "SQLite integrity check passed"
|
||||
cat <<METRICS | curl -s --data-binary @- "$$PUSHGW/metrics/job/vaultwarden-integrity/instance/vaultwarden"
|
||||
vaultwarden_sqlite_integrity_ok 1
|
||||
vaultwarden_sqlite_integrity_check_timestamp $(date +%s)
|
||||
METRICS
|
||||
else
|
||||
echo "ERROR: SQLite integrity check FAILED: $$result"
|
||||
cat <<METRICS | curl -s --data-binary @- "$$PUSHGW/metrics/job/vaultwarden-integrity/instance/vaultwarden"
|
||||
vaultwarden_sqlite_integrity_ok 0
|
||||
vaultwarden_sqlite_integrity_check_timestamp $(date +%s)
|
||||
METRICS
|
||||
exit 1
|
||||
fi
|
||||
EOT
|
||||
]
|
||||
volume_mount {
|
||||
name = "data"
|
||||
mount_path = "/data"
|
||||
read_only = true
|
||||
}
|
||||
}
|
||||
volume {
|
||||
name = "data"
|
||||
persistent_volume_claim {
|
||||
claim_name = kubernetes_persistent_volume_claim.vaultwarden_data.metadata[0].name
|
||||
}
|
||||
}
|
||||
dns_config {
|
||||
option {
|
||||
name = "ndots"
|
||||
value = "2"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -306,3 +306,87 @@ resource "kubernetes_cron_job_v1" "vaultwarden-backup" {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Integrity Check — Hourly SQLite PRAGMA check, pushes metric to Prometheus
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
resource "kubernetes_cron_job_v1" "vaultwarden-integrity-check" {
|
||||
metadata {
|
||||
name = "vaultwarden-integrity-check"
|
||||
namespace = kubernetes_namespace.vaultwarden.metadata[0].name
|
||||
}
|
||||
spec {
|
||||
concurrency_policy = "Replace"
|
||||
failed_jobs_history_limit = 5
|
||||
schedule = "30 * * * *"
|
||||
starting_deadline_seconds = 10
|
||||
successful_jobs_history_limit = 3
|
||||
job_template {
|
||||
metadata {}
|
||||
spec {
|
||||
backoff_limit = 1
|
||||
ttl_seconds_after_finished = 10
|
||||
template {
|
||||
metadata {}
|
||||
spec {
|
||||
affinity {
|
||||
pod_affinity {
|
||||
required_during_scheduling_ignored_during_execution {
|
||||
label_selector {
|
||||
match_labels = {
|
||||
app = "vaultwarden"
|
||||
}
|
||||
}
|
||||
topology_key = "kubernetes.io/hostname"
|
||||
}
|
||||
}
|
||||
}
|
||||
container {
|
||||
name = "integrity-check"
|
||||
image = "docker.io/library/alpine"
|
||||
command = ["/bin/sh", "-c", <<-EOT
|
||||
set -euo pipefail
|
||||
apk add --no-cache sqlite curl >/dev/null 2>&1
|
||||
PUSHGW="http://prometheus-prometheus-pushgateway.monitoring.svc.cluster.local:9091"
|
||||
result=$(sqlite3 /data/db.sqlite3 "PRAGMA integrity_check;" 2>&1)
|
||||
if echo "$$result" | grep -q "^ok$$"; then
|
||||
echo "SQLite integrity check passed"
|
||||
cat <<METRICS | curl -s --data-binary @- "$$PUSHGW/metrics/job/vaultwarden-integrity/instance/vaultwarden"
|
||||
vaultwarden_sqlite_integrity_ok 1
|
||||
vaultwarden_sqlite_integrity_check_timestamp $(date +%s)
|
||||
METRICS
|
||||
else
|
||||
echo "ERROR: SQLite integrity check FAILED: $$result"
|
||||
cat <<METRICS | curl -s --data-binary @- "$$PUSHGW/metrics/job/vaultwarden-integrity/instance/vaultwarden"
|
||||
vaultwarden_sqlite_integrity_ok 0
|
||||
vaultwarden_sqlite_integrity_check_timestamp $(date +%s)
|
||||
METRICS
|
||||
exit 1
|
||||
fi
|
||||
EOT
|
||||
]
|
||||
volume_mount {
|
||||
name = "data"
|
||||
mount_path = "/data"
|
||||
read_only = true
|
||||
}
|
||||
}
|
||||
volume {
|
||||
name = "data"
|
||||
persistent_volume_claim {
|
||||
claim_name = kubernetes_persistent_volume_claim.vaultwarden_data.metadata[0].name
|
||||
}
|
||||
}
|
||||
dns_config {
|
||||
option {
|
||||
name = "ndots"
|
||||
value = "2"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue