Refactor codebase following Clean Code principles and add 229 tests
- Extract helpers to reduce function sizes (listing_tasks, app.py, query.py, listing_fetcher) - Replace nonlocal mutations with _PipelineState dataclass in listing_tasks - Fix bugs: isinstance→equality check in repository, verify_exp for OIDC tokens - Consolidate duplicate filter methods in listing_repository - Move hardcoded config to env vars with backward-compatible defaults - Simplify CLI decorator to auto-build QueryParameters - Add deprecation docstring to data_access.py - Test count: 158 → 387 (all passing)
This commit is contained in:
parent
7e05b3c971
commit
150342bb9e
48 changed files with 5029 additions and 990 deletions
|
|
@ -5,6 +5,16 @@ Manages background task operations using Celery.
|
|||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
import json
|
||||
import logging
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Standard Celery states; anything else is treated as a custom state
|
||||
# whose name is used as the human-readable status message.
|
||||
_CELERY_STANDARD_STATES = frozenset(
|
||||
{"PENDING", "STARTED", "SUCCESS", "FAILURE", "REVOKED", "RETRY"}
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -21,6 +31,68 @@ class TaskStatus:
|
|||
traceback: str | None # Full traceback if failed
|
||||
|
||||
|
||||
def _make_system_user(email: str) -> Any:
|
||||
"""Create a minimal User object used only for Redis key generation.
|
||||
|
||||
These are *not* real authenticated users -- they exist solely so that
|
||||
RedisRepository can derive the per-user storage key from the email.
|
||||
"""
|
||||
# Lazy import: api.auth imports from api.app which eventually imports
|
||||
# services, so importing at module level would create a circular dependency.
|
||||
from api.auth import User
|
||||
|
||||
return User(sub="", email=email, name="")
|
||||
|
||||
|
||||
def _extract_result(task_result: Any) -> tuple[Any, str | None]:
|
||||
"""Extract a serialisable result and an error string from a Celery AsyncResult.
|
||||
|
||||
Returns:
|
||||
(result, error) -- exactly one of the two will be non-None (or both None
|
||||
for tasks that haven't produced output yet).
|
||||
"""
|
||||
if task_result.failed():
|
||||
error = str(task_result.result) if task_result.result else None
|
||||
return None, error
|
||||
|
||||
try:
|
||||
result = json.loads(json.dumps(task_result.result))
|
||||
except (TypeError, json.JSONDecodeError):
|
||||
result = str(task_result.result) if task_result.result else None
|
||||
return result, None
|
||||
|
||||
|
||||
def _extract_progress_info(task_result: Any) -> dict[str, Any]:
|
||||
"""Extract progress metadata from a Celery AsyncResult's ``info`` dict.
|
||||
|
||||
Returns a dict with keys ``progress``, ``processed``, ``total``, and
|
||||
``message`` (any of which may be None).
|
||||
"""
|
||||
progress: float | None = None
|
||||
processed: int | None = None
|
||||
total: int | None = None
|
||||
message: str | None = None
|
||||
|
||||
if task_result.info and isinstance(task_result.info, dict):
|
||||
progress = task_result.info.get("progress")
|
||||
processed = task_result.info.get("processed")
|
||||
total = task_result.info.get("total")
|
||||
# Use 'message' if available, fall back to 'reason' for SKIPPED tasks
|
||||
message = task_result.info.get("message") or task_result.info.get("reason")
|
||||
|
||||
# For custom states (like "Fetching listings"), use the state as message
|
||||
# if no message was provided in info
|
||||
if not message and task_result.status not in _CELERY_STANDARD_STATES:
|
||||
message = task_result.status
|
||||
|
||||
return {
|
||||
"progress": progress,
|
||||
"processed": processed,
|
||||
"total": total,
|
||||
"message": message,
|
||||
}
|
||||
|
||||
|
||||
def get_task_status(task_id: str) -> TaskStatus:
|
||||
"""Get the status of a background task.
|
||||
|
||||
|
|
@ -33,55 +105,24 @@ def get_task_status(task_id: str) -> TaskStatus:
|
|||
Returns:
|
||||
TaskStatus with current state
|
||||
"""
|
||||
# Lazy import: listing_tasks imports the Celery app which in turn
|
||||
# pulls in broker configuration; importing at module level would
|
||||
# create a circular dependency chain.
|
||||
from tasks.listing_tasks import dump_listings_task
|
||||
|
||||
task_result = dump_listings_task.AsyncResult(task_id)
|
||||
|
||||
# Try to serialize result
|
||||
result = None
|
||||
error = None
|
||||
if task_result.failed():
|
||||
# Extract error message from failed task
|
||||
error = str(task_result.result) if task_result.result else None
|
||||
else:
|
||||
try:
|
||||
result = json.loads(json.dumps(task_result.result))
|
||||
except (TypeError, json.JSONDecodeError):
|
||||
result = str(task_result.result) if task_result.result else None
|
||||
|
||||
# Extract traceback if available
|
||||
result, error = _extract_result(task_result)
|
||||
task_traceback = task_result.traceback if task_result.failed() else None
|
||||
|
||||
# Extract progress, processed, total, and message from task meta
|
||||
progress = None
|
||||
processed = None
|
||||
total = None
|
||||
message = None
|
||||
|
||||
if task_result.info and isinstance(task_result.info, dict):
|
||||
progress = task_result.info.get("progress")
|
||||
processed = task_result.info.get("processed")
|
||||
total = task_result.info.get("total")
|
||||
# Use 'message' if available, fall back to 'reason' for SKIPPED tasks
|
||||
message = task_result.info.get("message") or task_result.info.get("reason")
|
||||
|
||||
# For custom states (like "Fetching listings"), use the state as message
|
||||
# if no message was provided in info
|
||||
if not message and task_result.status not in (
|
||||
"PENDING", "STARTED", "SUCCESS", "FAILURE", "REVOKED", "RETRY"
|
||||
):
|
||||
message = task_result.status
|
||||
progress_info = _extract_progress_info(task_result)
|
||||
|
||||
return TaskStatus(
|
||||
task_id=task_id,
|
||||
status=task_result.status,
|
||||
result=result,
|
||||
progress=progress,
|
||||
processed=processed,
|
||||
total=total,
|
||||
message=message,
|
||||
error=error,
|
||||
traceback=task_traceback,
|
||||
**progress_info,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -97,12 +138,12 @@ def get_user_tasks(user_email: str) -> list[str]:
|
|||
Returns:
|
||||
List of task IDs
|
||||
"""
|
||||
# Lazy import: RedisRepository depends on redis which may not be
|
||||
# available at import time in all contexts (CLI, tests).
|
||||
from redis_repository import RedisRepository
|
||||
from api.auth import User
|
||||
|
||||
redis_repo = RedisRepository.instance()
|
||||
# Create a minimal User object for the lookup
|
||||
user = User(sub="", email=user_email, name="")
|
||||
user = _make_system_user(user_email)
|
||||
return redis_repo.get_tasks_for_user(user)
|
||||
|
||||
|
||||
|
|
@ -116,11 +157,11 @@ def add_task_for_user(user_email: str, task_id: str) -> None:
|
|||
user_email: The user's email address
|
||||
task_id: The Celery task ID
|
||||
"""
|
||||
# Lazy import: see get_user_tasks for rationale.
|
||||
from redis_repository import RedisRepository
|
||||
from api.auth import User
|
||||
|
||||
redis_repo = RedisRepository.instance()
|
||||
user = User(sub="", email=user_email, name="")
|
||||
user = _make_system_user(user_email)
|
||||
redis_repo.add_task_for_user(user, task_id)
|
||||
|
||||
|
||||
|
|
@ -134,8 +175,10 @@ def cancel_task(task_id: str, user_email: str | None = None) -> bool:
|
|||
Returns:
|
||||
True if task was cancelled successfully
|
||||
"""
|
||||
# Lazy import: celery_app bootstraps the broker connection.
|
||||
from celery_app import app as celery_app
|
||||
|
||||
logger.info("Cancelling task %s (user=%s)", task_id, user_email)
|
||||
# Revoke the task in Celery
|
||||
celery_app.control.revoke(task_id, terminate=True)
|
||||
|
||||
|
|
@ -158,11 +201,11 @@ def remove_task_from_user(user_email: str, task_id: str) -> bool:
|
|||
Returns:
|
||||
True if task was removed, False if not found
|
||||
"""
|
||||
# Lazy import: see get_user_tasks for rationale.
|
||||
from redis_repository import RedisRepository
|
||||
from api.auth import User
|
||||
|
||||
redis_repo = RedisRepository.instance()
|
||||
user = User(sub="", email=user_email, name="")
|
||||
user = _make_system_user(user_email)
|
||||
return redis_repo.remove_task_for_user(user, task_id)
|
||||
|
||||
|
||||
|
|
@ -176,12 +219,14 @@ def clear_all_tasks(user_email: str, revoke: bool = True) -> int:
|
|||
Returns:
|
||||
Number of tasks cleared
|
||||
"""
|
||||
# Lazy imports: see get_user_tasks and cancel_task for rationale.
|
||||
from redis_repository import RedisRepository
|
||||
from celery_app import app as celery_app
|
||||
from api.auth import User
|
||||
|
||||
redis_repo = RedisRepository.instance()
|
||||
user = User(sub="", email=user_email, name="")
|
||||
user = _make_system_user(user_email)
|
||||
|
||||
logger.info("Clearing all tasks for user %s (revoke=%s)", user_email, revoke)
|
||||
|
||||
# Get tasks before clearing to revoke them
|
||||
if revoke:
|
||||
|
|
@ -189,7 +234,9 @@ def clear_all_tasks(user_email: str, revoke: bool = True) -> int:
|
|||
for task_id in tasks:
|
||||
try:
|
||||
celery_app.control.revoke(task_id, terminate=True)
|
||||
except Exception:
|
||||
pass # Best effort, continue clearing
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to revoke task %s: %s", task_id, e
|
||||
)
|
||||
|
||||
return redis_repo.clear_tasks_for_user(user)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue