paperless-ngx: add Bulgarian OCR (bul+eng) + raise data PVC ceiling to 30Gi
Preparing Paperless for Emo's document import from the NAS. His archive is Bulgarian (Cyrillic) + English, but OCR was English-only (tesseract had no 'bul' pack and PAPERLESS_OCR_LANGUAGE was unset/defaulted to eng), so scanned BG documents would OCR to garbage and be unsearchable. Add bul to the install list and set OCR_LANGUAGE=bul+eng. Also raise the data PVC autoresize ceiling from 5Gi to 30Gi: everything (originals + archive via PAPERLESS_MEDIA_ROOT=../data) lives on the single encrypted PVC, and the ~2.7GB in-scope import would blow past the 5Gi cap mid-ingest. The topolvm autoresizer grows the volume on demand up to the ceiling; 30Gi gives ample headroom. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
82a7b2585b
commit
7988a690ed
1 changed files with 15 additions and 1 deletions
|
|
@ -77,7 +77,7 @@ resource "kubernetes_persistent_volume_claim" "data_encrypted" {
|
||||||
annotations = {
|
annotations = {
|
||||||
"resize.topolvm.io/threshold" = "10%"
|
"resize.topolvm.io/threshold" = "10%"
|
||||||
"resize.topolvm.io/increase" = "100%"
|
"resize.topolvm.io/increase" = "100%"
|
||||||
"resize.topolvm.io/storage_limit" = "5Gi"
|
"resize.topolvm.io/storage_limit" = "30Gi"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
spec {
|
spec {
|
||||||
|
|
@ -186,6 +186,20 @@ resource "kubernetes_deployment" "paperless-ngx" {
|
||||||
name = "PAPERLESS_OCR_USER_ARGS"
|
name = "PAPERLESS_OCR_USER_ARGS"
|
||||||
value = "{\"invalidate_digital_signatures\": true}"
|
value = "{\"invalidate_digital_signatures\": true}"
|
||||||
}
|
}
|
||||||
|
# OCR language(s) used per document. bul+eng covers the Bulgarian
|
||||||
|
# (Cyrillic) + English document set being imported (e.g. emo's
|
||||||
|
# archive). Multiple langs => tesseract tries all; "+" not " ".
|
||||||
|
env {
|
||||||
|
name = "PAPERLESS_OCR_LANGUAGE"
|
||||||
|
value = "bul+eng"
|
||||||
|
}
|
||||||
|
# Language data packages installed at container start (space-
|
||||||
|
# separated). The image ships eng (+deu/fra/ita/spa); bul must be
|
||||||
|
# apt-installed here so OCR_LANGUAGE=bul+eng resolves.
|
||||||
|
env {
|
||||||
|
name = "PAPERLESS_OCR_LANGUAGES"
|
||||||
|
value = "bul eng"
|
||||||
|
}
|
||||||
volume_mount {
|
volume_mount {
|
||||||
name = "data"
|
name = "data"
|
||||||
mount_path = "/usr/src/paperless/data"
|
mount_path = "/usr/src/paperless/data"
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue