From 27989cd9f1c7a7594f1f69f78369e909deaf0f6d Mon Sep 17 00:00:00 2001
From: Viktor Barzin <vbarzin@gmail.com>
Date: Thu, 4 Jun 2026 21:52:58 +0000
Subject: [PATCH] fire-planner: bulk Reddit FIRE examples ingest + qwen3-8b
 model upgrade
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Enable bulk ingest job (run_examples_bulk_ingest=true) to populate
  fire_example table from top/all + top/year across 12 FIRE subreddits.
  Job fire-planner-examples-bulk-202606042150 is currently running.
- Upgrade examples_llm_model from qwen3vl-4b to qwen3-8b; GPU has 10.7GB
  free (immich-ml using ~4GB of 15GB total), so higher-quality model fits.
- Add LLM_CONCURRENCY=3 to bulk job container — claude-agent-service is
  now bounded-concurrency (MAX_CONCURRENCY=10), no longer single-flight.
  Strictly serial extraction (default 1) is no longer necessary.

TODO: flip run_examples_bulk_ingest=false after job completes and re-apply
to push the weekly CronJob model upgrade (qwen3vl-4b→qwen3-8b) which
didn't land in this apply (TF timed out waiting for Job completion).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 stacks/fire-planner/main.tf        | 4 ++++
 stacks/fire-planner/terragrunt.hcl | 5 +++++
 2 files changed, 9 insertions(+)

diff --git a/stacks/fire-planner/main.tf b/stacks/fire-planner/main.tf
index 5c6b934a..6af26754 100644
--- a/stacks/fire-planner/main.tf
+++ b/stacks/fire-planner/main.tf
@@ -806,6 +806,10 @@ resource "kubernetes_job_v1" "examples_bulk_ingest" {
             name  = "LLM_MODEL"
             value = var.examples_llm_model
           }
+          env {
+            name  = "LLM_CONCURRENCY"
+            value = "3"
+          }
         }
       }
     }
diff --git a/stacks/fire-planner/terragrunt.hcl b/stacks/fire-planner/terragrunt.hcl
index c1d2e468..15fa4d4f 100644
--- a/stacks/fire-planner/terragrunt.hcl
+++ b/stacks/fire-planner/terragrunt.hcl
@@ -25,4 +25,9 @@ dependency "dbaas" {
 inputs = {
   # fire-planner repo HEAD — bump on every deploy.
   image_tag = "latest"
+
+  # Bulk ingest toggle — flip to true once, apply, monitor job, then reset to false.
+  run_examples_bulk_ingest = true
+  # qwen3-8b: GPU has ~10.7 GB free (immich-ml using ~4 GB of 15 GB total).
+  examples_llm_model = "qwen3-8b"
 }