"""Pydantic schemas for the Reddit examples pipeline. `RawPost` — what PRAW gives us (title + body + metadata). `ExtractedExample`— what the LLM returns (all nullable; confidence-gated). `Summary` — per-country headline stats served from the API. """ from __future__ import annotations from datetime import date from decimal import Decimal from enum import StrEnum from pydantic import BaseModel, ConfigDict, Field class FiStatus(StrEnum): ACCUMULATING = "accumulating" COAST_FIRE = "coastFIRE" BARISTA_FIRE = "baristaFIRE" LEAN_FIRE = "leanFIRE" FIRE = "FIRE" FAT_FIRE = "fatFIRE" UNKNOWN = "unknown" class RawPost(BaseModel): """A single Reddit post fetched from PRAW (no LLM processing yet).""" model_config = ConfigDict(frozen=True) reddit_id: str source_sub: str url: str title: str body: str created_at: date class ExtractedExample(BaseModel): """LLM output — all extracted fields nullable except confidence + model.""" country: str | None = None city: str | None = None portfolio_native: Decimal | None = None annual_exp_native: Decimal | None = None raw_currency: str | None = None age: int | None = Field(default=None, ge=0, le=120) family_size: int | None = Field(default=None, ge=1, le=20) fi_status: FiStatus | None = None is_retired: bool | None = None confidence: Decimal = Field(ge=Decimal("0"), le=Decimal("1")) llm_model: str class SummaryStats(BaseModel): median: Decimal | None p25: Decimal | None p75: Decimal | None class Summary(BaseModel): country: str count: int portfolio_gbp: SummaryStats annual_exp_gbp: SummaryStats sample_links: list[str]