examples: RawPost + ExtractedExample + Summary Pydantic schemas

2026-05-28 22:12:01 +00:00 · 2026-05-28 22:12:01 +00:00 · c9bdf537ac
commit c9bdf537ac
parent 8f2a80f563
3 changed files with 133 additions and 0 deletions
--- a/fire_planner/examples/init.py
+++ b/fire_planner/examples/init.py
@ -0,0 +1,21 @@
+"""Reddit FIRE examples ingest + lookup.
+
+Scrapes a curated set of FIRE subreddits, extracts structured fields
+with a local LLM, and exposes per-country summaries to the simulator
+and API. Informational overlay only — does not drive scenario inputs.
+"""
+from fire_planner.examples.models import (
+    ExtractedExample,
+    FiStatus,
+    RawPost,
+    Summary,
+    SummaryStats,
+)
+
+__all__ = [
+    "ExtractedExample",
+    "FiStatus",
+    "RawPost",
+    "Summary",
+    "SummaryStats",
+]
--- a/fire_planner/examples/models.py
+++ b/fire_planner/examples/models.py
@ -0,0 +1,66 @@
+"""Pydantic schemas for the Reddit examples pipeline.
+
+`RawPost`         — what PRAW gives us (title + body + metadata).
+`ExtractedExample`— what the LLM returns (all nullable; confidence-gated).
+`Summary`         — per-country headline stats served from the API.
+"""
+from __future__ import annotations
+
+from datetime import date
+from decimal import Decimal
+from enum import StrEnum
+
+from pydantic import BaseModel, ConfigDict, Field
+
+
+class FiStatus(StrEnum):
+    ACCUMULATING = "accumulating"
+    COAST_FIRE = "coastFIRE"
+    BARISTA_FIRE = "baristaFIRE"
+    LEAN_FIRE = "leanFIRE"
+    FIRE = "FIRE"
+    FAT_FIRE = "fatFIRE"
+    UNKNOWN = "unknown"
+
+
+class RawPost(BaseModel):
+    """A single Reddit post fetched from PRAW (no LLM processing yet)."""
+
+    model_config = ConfigDict(frozen=True)
+
+    reddit_id: str
+    source_sub: str
+    url: str
+    title: str
+    body: str
+    created_at: date
+
+
+class ExtractedExample(BaseModel):
+    """LLM output — all extracted fields nullable except confidence + model."""
+
+    country: str | None = None
+    city: str | None = None
+    portfolio_native: Decimal | None = None
+    annual_exp_native: Decimal | None = None
+    raw_currency: str | None = None
+    age: int | None = Field(default=None, ge=0, le=120)
+    family_size: int | None = Field(default=None, ge=1, le=20)
+    fi_status: FiStatus | None = None
+    is_retired: bool | None = None
+    confidence: Decimal = Field(ge=Decimal("0"), le=Decimal("1"))
+    llm_model: str
+
+
+class SummaryStats(BaseModel):
+    median: Decimal | None
+    p25: Decimal | None
+    p75: Decimal | None
+
+
+class Summary(BaseModel):
+    country: str
+    count: int
+    portfolio_gbp: SummaryStats
+    annual_exp_gbp: SummaryStats
+    sample_links: list[str]
--- a/tests/test_examples_filters.py
+++ b/tests/test_examples_filters.py
@ -0,0 +1,46 @@
+"""Tests for fire_planner.examples.models — Pydantic schemas."""
+from __future__ import annotations
+
+from datetime import date
+from decimal import Decimal
+
+import pytest
+from pydantic import ValidationError
+
+from fire_planner.examples import ExtractedExample, FiStatus, RawPost, SummaryStats
+
+
+def test_raw_post_minimal() -> None:
+    p = RawPost(
+        reddit_id="abc123",
+        source_sub="financialindependence",
+        url="https://reddit.com/r/financialindependence/abc123",
+        title="Hit FIRE at 38",
+        body="Net worth £1.2m, living in Lisbon, family of 3, retired last year.",
+        created_at=date(2026, 1, 1),
+    )
+    assert p.reddit_id == "abc123"
+
+
+def test_extracted_example_confidence_bounds() -> None:
+    with pytest.raises(ValidationError):
+        ExtractedExample(
+            country="Portugal",
+            confidence=Decimal("1.5"),  # out of range
+            llm_model="qwen3-8b",
+        )
+
+
+def test_extracted_example_fi_status_enum() -> None:
+    ex = ExtractedExample(
+        country="Philippines",
+        fi_status=FiStatus.FIRE,
+        confidence=Decimal("0.8"),
+        llm_model="qwen3-8b",
+    )
+    assert ex.fi_status == "FIRE"
+
+
+def test_summary_stats_optional_fields() -> None:
+    s = SummaryStats(median=None, p25=None, p75=None)
+    assert s.median is None