diff --git a/modules/kubernetes/immich/main.tf b/modules/kubernetes/immich/main.tf index aca51fe6..aa80ea99 100644 --- a/modules/kubernetes/immich/main.tf +++ b/modules/kubernetes/immich/main.tf @@ -372,6 +372,10 @@ resource "kubernetes_deployment" "immich-machine-learning" { protocol = "TCP" name = "immich-ml" } + env { + name = "MACHINE_LEARNING_MODEL_TTL" + value = 0 + } env { name = "TRANSFORMERS_CACHE" value = "/cache" diff --git a/modules/kubernetes/monitoring/prometheus_chart_values.tpl b/modules/kubernetes/monitoring/prometheus_chart_values.tpl index 075a5d2e..0a85d08d 100644 --- a/modules/kubernetes/monitoring/prometheus_chart_values.tpl +++ b/modules/kubernetes/monitoring/prometheus_chart_values.tpl @@ -199,13 +199,6 @@ serverFiles: severity: page annotations: summary: No iDRAC amperage reading. Can signal that prometheus is not scraping - - alert: HighRAMUsage - expr: clamp_min((1 - (node_memory_MemAvailable_bytes{instance="pve-node-r730"} / node_memory_MemTotal_bytes{instance="pve-node-r730"})) * 100, 0) > 90 - for: 30m - labels: - severity: page - annotations: - summary: "High memory usage: {{ $value }}. Risk of OOM-ing." - alert: HighSystemLoad expr: scalar(node_load1{instance="pve-node-r730"}) * 100 / count(count(node_cpu_seconds_total{instance="pve-node-r730"}) by (cpu)) > 50 for: 30m @@ -310,7 +303,7 @@ serverFiles: annotations: summary: "High CPU usage on {{ $labels.node }} - {{ $value }}" - alert: NodeLowFreeMemory - expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) or on() vector(1)) > 0.9 + expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) or on() vector(1)) > 0.95 for: 10m labels: severity: page diff --git a/modules/kubernetes/nextcloud/main.tf b/modules/kubernetes/nextcloud/main.tf old mode 100644 new mode 100755 index f86319bb..fc8c854b --- a/modules/kubernetes/nextcloud/main.tf +++ b/modules/kubernetes/nextcloud/main.tf @@ -182,3 +182,153 @@ module "whiteboard_ingress" { "nginx.ingress.kubernetes.io/proxy-send-timeout" : "6000s", } } + +resource "kubernetes_config_map" "backup-script" { + metadata { + name = "nextcloud-backup-script" + namespace = kubernetes_namespace.nextcloud.metadata[0].name + } + + data = { + "backup.sh" = <<-EOF + #!/bin/bash + set -e + + BACKUP_DIR="/backup" + DATA_DIR="/nextcloud-data" + DATE=$(date +%Y%m%d_%H%M%S) + BACKUP_PATH="$BACKUP_DIR/$DATE" + + echo "Starting Nextcloud backup at $(date)" + + # Note: Maintenance mode is skipped because occ is not available in the NFS mount. + # For a proper backup with maintenance mode, exec into the nextcloud pod: + # kubectl exec -n nextcloud deployment/nextcloud -- php occ maintenance:mode --on + + # Create backup directory + mkdir -p "$BACKUP_PATH" + + # Backup everything (config, data, custom_apps, themes, etc.) + echo "Backing up Nextcloud installation..." + rsync -a "$DATA_DIR/" "$BACKUP_PATH/" + + # Keep only last 7 backups + echo "Cleaning old backups..." + cd "$BACKUP_DIR" + ls -dt */ | tail -n +8 | xargs -r rm -rf + + echo "Backup completed at $(date)" + echo "Backup stored at: $BACKUP_PATH" + EOF + + "restore.sh" = <<-EOF + #!/bin/bash + # Restore script - run manually when needed + # Usage: ./restore.sh + # Example: ./restore.sh 20250117_030000 + # + # Before restoring, enable maintenance mode: + # kubectl exec -n nextcloud deployment/nextcloud -- php occ maintenance:mode --on + # After restoring, disable it: + # kubectl exec -n nextcloud deployment/nextcloud -- php occ maintenance:mode --off + + set -e + + if [ -z "$1" ]; then + echo "Usage: $0 " + echo "Available backups:" + ls -1 /backup/ + exit 1 + fi + + BACKUP_PATH="/backup/$1" + DATA_DIR="/nextcloud-data" + + if [ ! -d "$BACKUP_PATH" ]; then + echo "Backup not found: $BACKUP_PATH" + exit 1 + fi + + echo "Restoring from $BACKUP_PATH" + + # Restore everything + echo "Restoring Nextcloud installation..." + rsync -a "$BACKUP_PATH/" "$DATA_DIR/" + + echo "Restore completed!" + echo "Remember to run: kubectl exec -n nextcloud deployment/nextcloud -- php occ maintenance:mode --off" + EOF + } +} + +resource "kubernetes_cron_job_v1" "nextcloud-backup" { + metadata { + name = "nextcloud-backup" + namespace = kubernetes_namespace.nextcloud.metadata[0].name + } + + spec { + schedule = "0 3 * * 0" # Sunday at 3 AM + successful_jobs_history_limit = 3 + failed_jobs_history_limit = 3 + concurrency_policy = "Forbid" + + job_template { + metadata {} + spec { + template { + metadata {} + spec { + restart_policy = "OnFailure" + + container { + name = "backup" + image = "alpine:latest" + + command = ["/bin/sh", "-c", "apk add --no-cache rsync bash && /scripts/backup.sh"] + + volume_mount { + name = "nextcloud-data" + mount_path = "/nextcloud-data" + } + + volume_mount { + name = "backup" + mount_path = "/backup" + } + + volume_mount { + name = "scripts" + mount_path = "/scripts" + } + } + + volume { + name = "nextcloud-data" + nfs { + server = "10.0.10.15" + path = "/mnt/main/nextcloud" + } + } + + volume { + name = "backup" + nfs { + server = "10.0.10.15" + path = "/mnt/main/nextcloud-backup" + } + } + + volume { + name = "scripts" + config_map { + name = kubernetes_config_map.backup-script.metadata[0].name + default_mode = "0755" + } + } + } + } + } + } + } +} diff --git a/terraform.tfstate b/terraform.tfstate index 24946bc1..562765a9 100644 Binary files a/terraform.tfstate and b/terraform.tfstate differ