infra/stacks/nextcloud/chart_values.yaml
Viktor Barzin ddd582a28c backup: stop offsite-copying regenerable data; shrink nextcloud backup; pin nextcloud image
The offsite Synology hit 97% — the Backup share grew +670G in a week, traced
to the 2026-05-26 change that began mirroring large regenerable services
offsite, plus an unbounded nextcloud.log bloating its backups to 87G.

- nfs-mirror: re-exclude ollama, prometheus-backup, audiblez, ebook2audiobook
  (regenerable; live-only on sdc). Keep *-backup DB dumps (real safety copies).
- offsite-sync Step 2: nfs-ssd leg is now immich-only; ollama/llamacpp on the
  SSD no longer ship offsite (re-pullable models).
- daily-backup: skip nextcloud/nextcloud-data-proxmox (orphaned pre-encryption
  PV, still backed up weekly).
- nextcloud: cap+rotate the log (log_rotate_size=10MB); the dedicated backup
  now excludes html/ (app code, from image), logs, and preview cache and keeps
  only the latest copy (pvc-data holds version history) → <5G (was 87G).
- nextcloud: pin image to 32.0.9 in chart_values. A 2026-05-26 Keel bump moved
  the live pod to 32.0.9 (data migrated to 32.0.9.2) but TF still defaulted to
  32.0.3; reconciling that drift this session rolled a 32.0.3 pod that
  CrashLooped on the downgrade. Pinning eliminates the drift.

Docs: backup-dr.md + infra CLAUDE.md updated (add nfs-mirror, new exclusions).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-01 15:15:26 +00:00

188 lines
5.3 KiB
YAML

# Pin the image to 32.0.9 (apache). On 2026-05-26 Keel bumped the live
# Deployment 32.0.3 → 32.0.9-apache and the DATA migrated to 32.0.9.2; Keel
# was then disabled but chart_values was never pinned, so it kept defaulting
# to the chart's appVersion (32.0.3). A 2026-06-01 `terragrunt apply`
# reconciled that drift, rolled a 32.0.3 pod, and Nextcloud refused to
# downgrade (data 32.0.9.2 > image 32.0.3.2) → CrashLoopBackOff. Pinning here
# keeps TF the source of truth and matches the on-disk data version.
image:
flavor: apache
tag: "32.0.9"
nextcloud:
host: nextcloud.viktorbarzin.me
trustedDomains:
- nextcloud.viktorbarzin.me
# mail:
# enabled: true
# # the user we send email as
# fromAddress: nextcloud@viktorbarzin.me
# # the domain we send email from
# domain: viktorbarzin.me
# smtp:
# host: mail.viktorbarzin.me
# secure: starttls
# port: 587
# authtype: LOGIN
# name: nextcloud@viktorbarzin.me
# password:
extraEnv:
- name: TRUSTED_PROXIES
value: "10.0.0.0/8"
- name: PHP_MEMORY_LIMIT
value: "512M"
- name: PHP_UPLOAD_LIMIT
value: "16G"
# - name: mail_smtpdebug
# value: "true"
# - name: loglevel
# value: "0"
configs:
zzz-redis.config.php: |
<?php
// Redis via HAProxy master-only service. HAProxy (3 replicas, PDB
// minAvailable=2) health-checks all v2 pods via `INFO replication` and
// routes to the current role:master. Sentinel failover takes <30s, and
// HAProxy detects the new master via its 1s tcp-check interval and
// starts routing within ~3s of detection. Removed the old in-process
// sentinel-query loop on 2026-04-19 after the Redis rework — see
// beads code-v2b and infra/docs/architecture/databases.md.
$CONFIG = array(
'memcache.distributed' => '\\OC\\Memcache\\Redis',
'memcache.locking' => '\\OC\\Memcache\\Redis',
'redis' => array(
'host' => 'redis-master.redis.svc.cluster.local',
'port' => 6379,
'password' => '',
'timeout' => 1.5,
'read_timeout' => 1.5,
),
);
performance.config.php: |
<?php
$CONFIG = array(
'loglevel' => 2,
// Cap + rotate nextcloud.log. Without this it grew unbounded to
// 10GB+ and bloated every backup (2026-06-01 space incident).
// At 10MB the log rotates to nextcloud.log.1 (1 kept) → ~20MB max.
'log_rotate_size' => 10485760,
'mail_smtpdebug' => false,
);
zzz-mysql.config.php: |
<?php
$CONFIG = array(
'mysql.utf8mb4' => true,
);
phpConfigs:
zzz-custom.ini: |
max_execution_time = 300
max_input_time = 300
default_socket_timeout = 300
opcache.enable_file_override = 1
apc.shm_size = 128M
extraVolumes:
- name: apache-tuning
configMap:
name: nextcloud-apache-tuning
- name: db-password-sync
configMap:
name: nextcloud-db-password-sync
defaultMode: 0755
- name: pve-nfs
persistentVolumeClaim:
claimName: nextcloud-pve-nfs-root
- name: pve-nfs-ssd
persistentVolumeClaim:
claimName: nextcloud-pve-nfs-ssd-root
extraVolumeMounts:
- name: apache-tuning
mountPath: /etc/apache2/mods-available/mpm_prefork.conf
subPath: mpm_prefork.conf
- name: db-password-sync
mountPath: /docker-entrypoint-hooks.d/before-starting
- name: pve-nfs
mountPath: /mnt/pve-nfs
- name: pve-nfs-ssd
mountPath: /mnt/pve-nfs-ssd
internalDatabase:
enabled: false
externalRedis:
enabled: false
externalDatabase:
enabled: true
type: mysql
host: ${mysql_host}
user: nextcloud
database: nextcloud
existingSecret:
secretName: nextcloud-db-creds
usernameKey: db-username
passwordKey: DB_PASSWORD
persistence:
enabled: true
existingClaim: nextcloud-data-encrypted
accessMode: ReadWriteOnce
size: 20Gi
startupProbe:
enabled: true
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 30
failureThreshold: 60
successThreshold: 1
livenessProbe:
enabled: true
initialDelaySeconds: 30
periodSeconds: 60
timeoutSeconds: 30
failureThreshold: 10
successThreshold: 1
readinessProbe:
enabled: true
initialDelaySeconds: 30
periodSeconds: 60
timeoutSeconds: 30
failureThreshold: 5
successThreshold: 1
podAnnotations:
diun.enable: "true"
diun.include_tags: "^[0-9]+(?:.[0-9]+)?(?:.[0-9]+)?.*"
dependency.kyverno.io/wait-for: "mysql.dbaas:3306,redis-master.redis:6379"
secret.reloader.stakater.com/reload: "nextcloud-db-creds"
# OnRootMismatch: kubelet only recursively chowns the volume to fsGroup if the
# root dir's GID doesn't already match. Without this, every pod restart triggers
# a ~30-min recursive chown of /srv/nfs and /srv/nfs-ssd (600k+ files) — the
# default policy "Always" recurses every time. Locks fsGroup=33 explicitly so
# this block fully replaces the chart's default {fsGroup: 33}.
securityContext:
fsGroup: 33
fsGroupChangePolicy: OnRootMismatch
collabora:
enabled: false # Using onlyoffice instead
resources:
limits:
memory: 8Gi
requests:
cpu: 50m
memory: 256Mi
cronjob:
enabled: true
resources:
limits:
memory: 384Mi
requests:
cpu: 25m
memory: 384Mi