Fix live logs: replace monkey-patch with direct log injection
The previous approach monkey-patched task.update_state on the Celery Task instance, but the assignment didn't take effect (Celery's Task singleton may prevent instance method shadowing). Additionally, the celery.task logger level was left at WARNING by Celery's worker setup, silencing all INFO-level log capture. Fix: - Replace wrapper with _update_task_state() helper that directly injects logs from a module-level _active_log_buffer into every meta dict - Attach TaskLogHandler to BOTH celery.task and uvicorn.error loggers - Force both loggers to INFO level during task execution - Every task.update_state call site now uses _update_task_state()
This commit is contained in:
parent
b4837e1603
commit
7e8f1f0339
1 changed files with 68 additions and 57 deletions
|
|
@ -34,6 +34,10 @@ if not celery_logger.handlers:
|
||||||
SCRAPE_LOCK_NAME = "scrape_listings"
|
SCRAPE_LOCK_NAME = "scrape_listings"
|
||||||
LOG_BUFFER_MAX_LINES = 200
|
LOG_BUFFER_MAX_LINES = 200
|
||||||
|
|
||||||
|
# Module-level log buffer — active only during task execution.
|
||||||
|
# The TaskLogHandler appends here; _update_task_state reads from here.
|
||||||
|
_active_log_buffer: deque[str] | None = None
|
||||||
|
|
||||||
|
|
||||||
class TaskLogHandler(logging.Handler):
|
class TaskLogHandler(logging.Handler):
|
||||||
"""Captures log records into a deque for inclusion in task state updates."""
|
"""Captures log records into a deque for inclusion in task state updates."""
|
||||||
|
|
@ -49,6 +53,13 @@ class TaskLogHandler(logging.Handler):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _update_task_state(task: Task, state: str, meta: dict[str, Any]) -> None:
|
||||||
|
"""Call task.update_state with logs injected from the active log buffer."""
|
||||||
|
if _active_log_buffer is not None:
|
||||||
|
meta["logs"] = list(_active_log_buffer)
|
||||||
|
task.update_state(state=state, meta=meta)
|
||||||
|
|
||||||
|
|
||||||
@app.task(bind=True, pydantic=True)
|
@app.task(bind=True, pydantic=True)
|
||||||
def dump_listings_task(self: Task, parameters_json: str) -> dict[str, Any]:
|
def dump_listings_task(self: Task, parameters_json: str) -> dict[str, Any]:
|
||||||
with redis_lock(SCRAPE_LOCK_NAME) as acquired:
|
with redis_lock(SCRAPE_LOCK_NAME) as acquired:
|
||||||
|
|
@ -86,39 +97,44 @@ async def dump_listings_full(
|
||||||
*, task: Task, parameters: QueryParameters
|
*, task: Task, parameters: QueryParameters
|
||||||
) -> list[Listing]:
|
) -> list[Listing]:
|
||||||
"""Fetches all listings, images as well as detects floorplans"""
|
"""Fetches all listings, images as well as detects floorplans"""
|
||||||
# Set up log capture: a ring buffer handler that we inject into every
|
global _active_log_buffer
|
||||||
# task.update_state() call so the frontend can display live logs.
|
|
||||||
|
# Set up log capture into a module-level buffer so _update_task_state
|
||||||
|
# can inject logs into every state update.
|
||||||
log_buffer: deque[str] = deque(maxlen=LOG_BUFFER_MAX_LINES)
|
log_buffer: deque[str] = deque(maxlen=LOG_BUFFER_MAX_LINES)
|
||||||
log_handler = TaskLogHandler(log_buffer)
|
log_handler = TaskLogHandler(log_buffer)
|
||||||
log_handler.setFormatter(
|
log_handler.setFormatter(
|
||||||
logging.Formatter("%(asctime)s %(message)s", datefmt="%H:%M:%S")
|
logging.Formatter("%(asctime)s %(message)s", datefmt="%H:%M:%S")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Attach handler to both loggers used in the codebase, and ensure
|
||||||
|
# they accept INFO-level messages (Celery's worker setup may leave
|
||||||
|
# the celery.task logger at WARNING).
|
||||||
|
_prev_celery_level = celery_logger.level
|
||||||
|
_prev_logger_level = logger.level
|
||||||
celery_logger.addHandler(log_handler)
|
celery_logger.addHandler(log_handler)
|
||||||
|
logger.addHandler(log_handler)
|
||||||
|
if celery_logger.level == logging.NOTSET or celery_logger.level > logging.INFO:
|
||||||
|
celery_logger.setLevel(logging.INFO)
|
||||||
|
if logger.level == logging.NOTSET or logger.level > logging.INFO:
|
||||||
|
logger.setLevel(logging.INFO)
|
||||||
|
|
||||||
# Wrap task.update_state so every call automatically includes logs
|
_active_log_buffer = log_buffer
|
||||||
_original_update_state = task.update_state
|
|
||||||
|
|
||||||
def _update_state_with_logs(
|
|
||||||
state: str | None = None, meta: dict[str, Any] | None = None, **kwargs: Any
|
|
||||||
) -> None:
|
|
||||||
if meta is None:
|
|
||||||
meta = {}
|
|
||||||
meta["logs"] = list(log_buffer)
|
|
||||||
_original_update_state(state=state, meta=meta, **kwargs)
|
|
||||||
|
|
||||||
task.update_state = _update_state_with_logs # type: ignore[assignment]
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return await _dump_listings_full_inner(task=task, parameters=parameters)
|
return await _dump_listings_full_inner(task=task, parameters=parameters)
|
||||||
finally:
|
finally:
|
||||||
|
_active_log_buffer = None
|
||||||
celery_logger.removeHandler(log_handler)
|
celery_logger.removeHandler(log_handler)
|
||||||
task.update_state = _original_update_state # type: ignore[assignment]
|
logger.removeHandler(log_handler)
|
||||||
|
celery_logger.setLevel(_prev_celery_level)
|
||||||
|
logger.setLevel(_prev_logger_level)
|
||||||
|
|
||||||
|
|
||||||
async def _dump_listings_full_inner(
|
async def _dump_listings_full_inner(
|
||||||
*, task: Task, parameters: QueryParameters
|
*, task: Task, parameters: QueryParameters
|
||||||
) -> list[Listing]:
|
) -> list[Listing]:
|
||||||
"""Inner implementation — called with log-capturing update_state wrapper."""
|
"""Inner implementation with log capture active."""
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
celery_logger.info("=" * 60)
|
celery_logger.info("=" * 60)
|
||||||
celery_logger.info("PHASE 1: Initializing listing fetch")
|
celery_logger.info("PHASE 1: Initializing listing fetch")
|
||||||
|
|
@ -126,7 +142,7 @@ async def _dump_listings_full_inner(
|
||||||
|
|
||||||
repository = ListingRepository(engine)
|
repository = ListingRepository(engine)
|
||||||
|
|
||||||
task.update_state(state="Identifying missing listings", meta={"phase": "splitting", "progress": 0})
|
_update_task_state(task, "Identifying missing listings", {"phase": "splitting", "progress": 0})
|
||||||
celery_logger.info("Querying Rightmove API to identify new listings...")
|
celery_logger.info("Querying Rightmove API to identify new listings...")
|
||||||
ids_to_process = await get_ids_to_process(
|
ids_to_process = await get_ids_to_process(
|
||||||
parameters=parameters, repository=repository, task=task
|
parameters=parameters, repository=repository, task=task
|
||||||
|
|
@ -139,10 +155,10 @@ async def _dump_listings_full_inner(
|
||||||
elapsed = time.time() - start_time
|
elapsed = time.time() - start_time
|
||||||
celery_logger.info(f"No new listings found. Completed in {elapsed:.1f}s")
|
celery_logger.info(f"No new listings found. Completed in {elapsed:.1f}s")
|
||||||
invalidate_cache()
|
invalidate_cache()
|
||||||
task.update_state(
|
_update_task_state(task, "No new listings found", {
|
||||||
state="No new listings found",
|
"phase": "completed", "progress": 1, "processed": 0, "total": 0,
|
||||||
meta={"phase": "completed", "progress": 1, "processed": 0, "total": 0, "message": "All listings are up to date"},
|
"message": "All listings are up to date",
|
||||||
)
|
})
|
||||||
return []
|
return []
|
||||||
|
|
||||||
celery_logger.info("=" * 60)
|
celery_logger.info("=" * 60)
|
||||||
|
|
@ -165,16 +181,11 @@ async def _dump_listings_full_inner(
|
||||||
invalidate_cache()
|
invalidate_cache()
|
||||||
|
|
||||||
# Send final state so the frontend has rich data even after task completes
|
# Send final state so the frontend has rich data even after task completes
|
||||||
task.update_state(
|
_update_task_state(task, "Completed", {
|
||||||
state="Completed",
|
"phase": "completed", "progress": 1,
|
||||||
meta={
|
"processed": len(result), "total": len(ids_to_process),
|
||||||
"phase": "completed",
|
"message": f"Processed {len(result)} listings in {elapsed:.0f}s",
|
||||||
"progress": 1,
|
})
|
||||||
"processed": len(result),
|
|
||||||
"total": len(ids_to_process),
|
|
||||||
"message": f"Processed {len(result)} listings in {elapsed:.0f}s",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
@ -232,9 +243,10 @@ async def dump_listings_and_monitor(
|
||||||
)
|
)
|
||||||
last_progress = progress_ratio
|
last_progress = progress_ratio
|
||||||
|
|
||||||
task.update_state(
|
_update_task_state(
|
||||||
state=f"Processing: {progress_ratio * 100:.0f}% ({progress}/{len(missing_ids)})",
|
task,
|
||||||
meta={
|
f"Processing: {progress_ratio * 100:.0f}% ({progress}/{len(missing_ids)})",
|
||||||
|
{
|
||||||
"phase": "processing",
|
"phase": "processing",
|
||||||
"progress": progress_ratio,
|
"progress": progress_ratio,
|
||||||
"processed": progress,
|
"processed": progress,
|
||||||
|
|
@ -315,7 +327,7 @@ async def get_ids_to_process(
|
||||||
def on_progress(phase: str, message: str, **kwargs: Any) -> None:
|
def on_progress(phase: str, message: str, **kwargs: Any) -> None:
|
||||||
meta: dict[str, Any] = {"phase": phase, "message": message}
|
meta: dict[str, Any] = {"phase": phase, "message": message}
|
||||||
meta.update(kwargs)
|
meta.update(kwargs)
|
||||||
task.update_state(state=message, meta=meta)
|
_update_task_state(task, message, meta)
|
||||||
celery_logger.info(f"[{phase}] {message}")
|
celery_logger.info(f"[{phase}] {message}")
|
||||||
|
|
||||||
celery_logger.info("Starting query splitting and probing...")
|
celery_logger.info("Starting query splitting and probing...")
|
||||||
|
|
@ -323,10 +335,9 @@ async def get_ids_to_process(
|
||||||
try:
|
try:
|
||||||
async with create_session(config) as session:
|
async with create_session(config) as session:
|
||||||
# Phase 1 & 2: Split and probe queries
|
# Phase 1 & 2: Split and probe queries
|
||||||
task.update_state(
|
_update_task_state(task, "Analyzing query and splitting by price bands...", {
|
||||||
state="Analyzing query and splitting by price bands...",
|
"phase": "splitting", "progress": 0,
|
||||||
meta={"phase": "splitting", "progress": 0},
|
})
|
||||||
)
|
|
||||||
subqueries = await splitter.split(parameters, session, on_progress)
|
subqueries = await splitter.split(parameters, session, on_progress)
|
||||||
|
|
||||||
total_estimated = splitter.calculate_total_estimated_results(subqueries)
|
total_estimated = splitter.calculate_total_estimated_results(subqueries)
|
||||||
|
|
@ -340,9 +351,10 @@ async def get_ids_to_process(
|
||||||
)
|
)
|
||||||
|
|
||||||
# Phase 3: Fetch all pages for each subquery
|
# Phase 3: Fetch all pages for each subquery
|
||||||
task.update_state(
|
_update_task_state(
|
||||||
state=f"Fetching listings from {len(subqueries)} subqueries...",
|
task,
|
||||||
meta={
|
f"Fetching listings from {len(subqueries)} subqueries...",
|
||||||
|
{
|
||||||
"phase": "fetching",
|
"phase": "fetching",
|
||||||
"subqueries_completed": 0,
|
"subqueries_completed": 0,
|
||||||
"subqueries_total": len(subqueries),
|
"subqueries_total": len(subqueries),
|
||||||
|
|
@ -368,9 +380,10 @@ async def get_ids_to_process(
|
||||||
estimated = sq.estimated_results or 0
|
estimated = sq.estimated_results or 0
|
||||||
if estimated == 0:
|
if estimated == 0:
|
||||||
completed_subqueries += 1
|
completed_subqueries += 1
|
||||||
task.update_state(
|
_update_task_state(
|
||||||
state=f"Fetching: {completed_subqueries}/{len(subqueries)} subqueries",
|
task,
|
||||||
meta={
|
f"Fetching: {completed_subqueries}/{len(subqueries)} subqueries",
|
||||||
|
{
|
||||||
"phase": "fetching",
|
"phase": "fetching",
|
||||||
"subqueries_completed": completed_subqueries,
|
"subqueries_completed": completed_subqueries,
|
||||||
"subqueries_total": len(subqueries),
|
"subqueries_total": len(subqueries),
|
||||||
|
|
@ -436,9 +449,10 @@ async def get_ids_to_process(
|
||||||
break
|
break
|
||||||
|
|
||||||
completed_subqueries += 1
|
completed_subqueries += 1
|
||||||
task.update_state(
|
_update_task_state(
|
||||||
state=f"Fetching: {completed_subqueries}/{len(subqueries)} subqueries",
|
task,
|
||||||
meta={
|
f"Fetching: {completed_subqueries}/{len(subqueries)} subqueries",
|
||||||
|
{
|
||||||
"phase": "fetching",
|
"phase": "fetching",
|
||||||
"subqueries_completed": completed_subqueries,
|
"subqueries_completed": completed_subqueries,
|
||||||
"subqueries_total": len(subqueries),
|
"subqueries_total": len(subqueries),
|
||||||
|
|
@ -499,14 +513,11 @@ async def get_ids_to_process(
|
||||||
f"{len(new_ids)} new to process"
|
f"{len(new_ids)} new to process"
|
||||||
)
|
)
|
||||||
|
|
||||||
task.update_state(
|
_update_task_state(task, f"Found {len(new_ids)} new listings to process", {
|
||||||
state=f"Found {len(new_ids)} new listings to process",
|
"phase": "filtering",
|
||||||
meta={
|
"total_found": len(identifiers),
|
||||||
"phase": "filtering",
|
"existing_in_db": len(all_listing_ids),
|
||||||
"total_found": len(identifiers),
|
"new_listings": len(new_ids),
|
||||||
"existing_in_db": len(all_listing_ids),
|
})
|
||||||
"new_listings": len(new_ids),
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
return new_ids
|
return new_ids
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue