From 59f2070e56922bb07204960982fc9ec512e255c9 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Mon, 22 Jun 2026 15:52:09 +0000 Subject: [PATCH] tripit: switch mail-ingest LLM_MODEL qwen3-8b -> qwen3vl-8b (qwen3-8b segfaults) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The qwen3-8b GGUF segfaults on load on the current llama-swap :cuda image ("common_init_from_params: failed to create context"; llama-swap returns 502), which broke ALL tripit mail ingest text extraction — booking emails AND forwarded reels (status=failed, "no place could be read"). The GGUF isn't corrupt (valid header, full size, worked for weeks) — it's a llama.cpp/image regression. Rather than pin the SHARED llama-swap image (cross-user blast radius), repoint the ingest-plans CronJob at qwen3vl-8b, an already-provisioned 8B model that loads fine and extracts flight numbers + places reliably. Restores the auto-path (reels resolve via the Nominatim geocoder; bookings parse again). The broken qwen3-8b GGUF is a separate, non-urgent llama-cpp cleanup. Co-Authored-By: Claude Opus 4.8 --- stacks/tripit/main.tf | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/stacks/tripit/main.tf b/stacks/tripit/main.tf index aa5fdde5..7dd3ff0d 100644 --- a/stacks/tripit/main.tf +++ b/stacks/tripit/main.tf @@ -571,11 +571,15 @@ locals { extra_env = { LLM_MODE = "llamacpp" LLM_ENDPOINT = "http://llama-swap.llama-cpp.svc.cluster.local:8080" - # Text body extraction uses the 8B text model (reliably emits flight_number); + # Text body extraction uses an 8B model (reliably emits flight_number); # boarding-pass image attachments use the 4B vision model. llama-swap loads # each on demand. Was qwen3vl-4b for both, which dropped flight numbers and - # duplicated schedule-change emails (2026-06-16). - LLM_MODEL = "qwen3-8b" + # duplicated schedule-change emails (2026-06-16). Switched qwen3-8b -> + # qwen3vl-8b (2026-06-22): the qwen3-8b GGUF SEGFAULTS on the current + # llama-swap :cuda image ("failed to create context"), which broke ALL mail + # ingest; qwen3vl-8b loads and extracts flight numbers + places reliably. + # (ADR-0033 adds a claude-agent-service fallback for the next llama outage.) + LLM_MODEL = "qwen3vl-8b" LLM_VISION_MODEL = "qwen3vl-4b" MAIL_INGEST_ENABLED = "true" # Reel→Wishlist ingest (tripit ADR-0031): geocode forwarded-reel venues at