examples: RawPost + ExtractedExample + Summary Pydantic schemas
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful

This commit is contained in:
Viktor Barzin 2026-05-28 22:12:01 +00:00
parent 8f2a80f563
commit c9bdf537ac
3 changed files with 133 additions and 0 deletions

View file

@ -0,0 +1,21 @@
"""Reddit FIRE examples ingest + lookup.
Scrapes a curated set of FIRE subreddits, extracts structured fields
with a local LLM, and exposes per-country summaries to the simulator
and API. Informational overlay only does not drive scenario inputs.
"""
from fire_planner.examples.models import (
ExtractedExample,
FiStatus,
RawPost,
Summary,
SummaryStats,
)
__all__ = [
"ExtractedExample",
"FiStatus",
"RawPost",
"Summary",
"SummaryStats",
]

View file

@ -0,0 +1,66 @@
"""Pydantic schemas for the Reddit examples pipeline.
`RawPost` what PRAW gives us (title + body + metadata).
`ExtractedExample` what the LLM returns (all nullable; confidence-gated).
`Summary` per-country headline stats served from the API.
"""
from __future__ import annotations
from datetime import date
from decimal import Decimal
from enum import StrEnum
from pydantic import BaseModel, ConfigDict, Field
class FiStatus(StrEnum):
ACCUMULATING = "accumulating"
COAST_FIRE = "coastFIRE"
BARISTA_FIRE = "baristaFIRE"
LEAN_FIRE = "leanFIRE"
FIRE = "FIRE"
FAT_FIRE = "fatFIRE"
UNKNOWN = "unknown"
class RawPost(BaseModel):
"""A single Reddit post fetched from PRAW (no LLM processing yet)."""
model_config = ConfigDict(frozen=True)
reddit_id: str
source_sub: str
url: str
title: str
body: str
created_at: date
class ExtractedExample(BaseModel):
"""LLM output — all extracted fields nullable except confidence + model."""
country: str | None = None
city: str | None = None
portfolio_native: Decimal | None = None
annual_exp_native: Decimal | None = None
raw_currency: str | None = None
age: int | None = Field(default=None, ge=0, le=120)
family_size: int | None = Field(default=None, ge=1, le=20)
fi_status: FiStatus | None = None
is_retired: bool | None = None
confidence: Decimal = Field(ge=Decimal("0"), le=Decimal("1"))
llm_model: str
class SummaryStats(BaseModel):
median: Decimal | None
p25: Decimal | None
p75: Decimal | None
class Summary(BaseModel):
country: str
count: int
portfolio_gbp: SummaryStats
annual_exp_gbp: SummaryStats
sample_links: list[str]