66 lines
1.7 KiB
Python
66 lines
1.7 KiB
Python
"""Pydantic schemas for the Reddit examples pipeline.
|
|
|
|
`RawPost` — what PRAW gives us (title + body + metadata).
|
|
`ExtractedExample`— what the LLM returns (all nullable; confidence-gated).
|
|
`Summary` — per-country headline stats served from the API.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from datetime import date
|
|
from decimal import Decimal
|
|
from enum import StrEnum
|
|
|
|
from pydantic import BaseModel, ConfigDict, Field
|
|
|
|
|
|
class FiStatus(StrEnum):
|
|
ACCUMULATING = "accumulating"
|
|
COAST_FIRE = "coastFIRE"
|
|
BARISTA_FIRE = "baristaFIRE"
|
|
LEAN_FIRE = "leanFIRE"
|
|
FIRE = "FIRE"
|
|
FAT_FIRE = "fatFIRE"
|
|
UNKNOWN = "unknown"
|
|
|
|
|
|
class RawPost(BaseModel):
|
|
"""A single Reddit post fetched from PRAW (no LLM processing yet)."""
|
|
|
|
model_config = ConfigDict(frozen=True)
|
|
|
|
reddit_id: str
|
|
source_sub: str
|
|
url: str
|
|
title: str
|
|
body: str
|
|
created_at: date
|
|
|
|
|
|
class ExtractedExample(BaseModel):
|
|
"""LLM output — all extracted fields nullable except confidence + model."""
|
|
|
|
country: str | None = None
|
|
city: str | None = None
|
|
portfolio_native: Decimal | None = None
|
|
annual_exp_native: Decimal | None = None
|
|
raw_currency: str | None = None
|
|
age: int | None = Field(default=None, ge=0, le=120)
|
|
family_size: int | None = Field(default=None, ge=1, le=20)
|
|
fi_status: FiStatus | None = None
|
|
is_retired: bool | None = None
|
|
confidence: Decimal = Field(ge=Decimal("0"), le=Decimal("1"))
|
|
llm_model: str
|
|
|
|
|
|
class SummaryStats(BaseModel):
|
|
median: Decimal | None
|
|
p25: Decimal | None
|
|
p75: Decimal | None
|
|
|
|
|
|
class Summary(BaseModel):
|
|
country: str
|
|
count: int
|
|
portfolio_gbp: SummaryStats
|
|
annual_exp_gbp: SummaryStats
|
|
sample_links: list[str]
|