Refactor codebase following Clean Code principles and add 229 tests

- Extract helpers to reduce function sizes (listing_tasks, app.py, query.py, listing_fetcher)
  - Replace nonlocal mutations with _PipelineState dataclass in listing_tasks
  - Fix bugs: isinstance→equality check in repository, verify_exp for OIDC tokens
  - Consolidate duplicate filter methods in listing_repository
  - Move hardcoded config to env vars with backward-compatible defaults
  - Simplify CLI decorator to auto-build QueryParameters
  - Add deprecation docstring to data_access.py
  - Test count: 158 → 387 (all passing)
This commit is contained in:
Viktor Barzin 2026-02-07 20:19:57 +00:00
parent 7e05b3c971
commit 150342bb9e
No known key found for this signature in database
GPG key ID: 0EB088298288D958
48 changed files with 5029 additions and 990 deletions

View file

@ -5,6 +5,16 @@ Manages background task operations using Celery.
from dataclasses import dataclass
from typing import Any
import json
import logging
logger = logging.getLogger(__name__)
# Standard Celery states; anything else is treated as a custom state
# whose name is used as the human-readable status message.
_CELERY_STANDARD_STATES = frozenset(
{"PENDING", "STARTED", "SUCCESS", "FAILURE", "REVOKED", "RETRY"}
)
@dataclass
@ -21,6 +31,68 @@ class TaskStatus:
traceback: str | None # Full traceback if failed
def _make_system_user(email: str) -> Any:
"""Create a minimal User object used only for Redis key generation.
These are *not* real authenticated users -- they exist solely so that
RedisRepository can derive the per-user storage key from the email.
"""
# Lazy import: api.auth imports from api.app which eventually imports
# services, so importing at module level would create a circular dependency.
from api.auth import User
return User(sub="", email=email, name="")
def _extract_result(task_result: Any) -> tuple[Any, str | None]:
"""Extract a serialisable result and an error string from a Celery AsyncResult.
Returns:
(result, error) -- exactly one of the two will be non-None (or both None
for tasks that haven't produced output yet).
"""
if task_result.failed():
error = str(task_result.result) if task_result.result else None
return None, error
try:
result = json.loads(json.dumps(task_result.result))
except (TypeError, json.JSONDecodeError):
result = str(task_result.result) if task_result.result else None
return result, None
def _extract_progress_info(task_result: Any) -> dict[str, Any]:
"""Extract progress metadata from a Celery AsyncResult's ``info`` dict.
Returns a dict with keys ``progress``, ``processed``, ``total``, and
``message`` (any of which may be None).
"""
progress: float | None = None
processed: int | None = None
total: int | None = None
message: str | None = None
if task_result.info and isinstance(task_result.info, dict):
progress = task_result.info.get("progress")
processed = task_result.info.get("processed")
total = task_result.info.get("total")
# Use 'message' if available, fall back to 'reason' for SKIPPED tasks
message = task_result.info.get("message") or task_result.info.get("reason")
# For custom states (like "Fetching listings"), use the state as message
# if no message was provided in info
if not message and task_result.status not in _CELERY_STANDARD_STATES:
message = task_result.status
return {
"progress": progress,
"processed": processed,
"total": total,
"message": message,
}
def get_task_status(task_id: str) -> TaskStatus:
"""Get the status of a background task.
@ -33,55 +105,24 @@ def get_task_status(task_id: str) -> TaskStatus:
Returns:
TaskStatus with current state
"""
# Lazy import: listing_tasks imports the Celery app which in turn
# pulls in broker configuration; importing at module level would
# create a circular dependency chain.
from tasks.listing_tasks import dump_listings_task
task_result = dump_listings_task.AsyncResult(task_id)
# Try to serialize result
result = None
error = None
if task_result.failed():
# Extract error message from failed task
error = str(task_result.result) if task_result.result else None
else:
try:
result = json.loads(json.dumps(task_result.result))
except (TypeError, json.JSONDecodeError):
result = str(task_result.result) if task_result.result else None
# Extract traceback if available
result, error = _extract_result(task_result)
task_traceback = task_result.traceback if task_result.failed() else None
# Extract progress, processed, total, and message from task meta
progress = None
processed = None
total = None
message = None
if task_result.info and isinstance(task_result.info, dict):
progress = task_result.info.get("progress")
processed = task_result.info.get("processed")
total = task_result.info.get("total")
# Use 'message' if available, fall back to 'reason' for SKIPPED tasks
message = task_result.info.get("message") or task_result.info.get("reason")
# For custom states (like "Fetching listings"), use the state as message
# if no message was provided in info
if not message and task_result.status not in (
"PENDING", "STARTED", "SUCCESS", "FAILURE", "REVOKED", "RETRY"
):
message = task_result.status
progress_info = _extract_progress_info(task_result)
return TaskStatus(
task_id=task_id,
status=task_result.status,
result=result,
progress=progress,
processed=processed,
total=total,
message=message,
error=error,
traceback=task_traceback,
**progress_info,
)
@ -97,12 +138,12 @@ def get_user_tasks(user_email: str) -> list[str]:
Returns:
List of task IDs
"""
# Lazy import: RedisRepository depends on redis which may not be
# available at import time in all contexts (CLI, tests).
from redis_repository import RedisRepository
from api.auth import User
redis_repo = RedisRepository.instance()
# Create a minimal User object for the lookup
user = User(sub="", email=user_email, name="")
user = _make_system_user(user_email)
return redis_repo.get_tasks_for_user(user)
@ -116,11 +157,11 @@ def add_task_for_user(user_email: str, task_id: str) -> None:
user_email: The user's email address
task_id: The Celery task ID
"""
# Lazy import: see get_user_tasks for rationale.
from redis_repository import RedisRepository
from api.auth import User
redis_repo = RedisRepository.instance()
user = User(sub="", email=user_email, name="")
user = _make_system_user(user_email)
redis_repo.add_task_for_user(user, task_id)
@ -134,8 +175,10 @@ def cancel_task(task_id: str, user_email: str | None = None) -> bool:
Returns:
True if task was cancelled successfully
"""
# Lazy import: celery_app bootstraps the broker connection.
from celery_app import app as celery_app
logger.info("Cancelling task %s (user=%s)", task_id, user_email)
# Revoke the task in Celery
celery_app.control.revoke(task_id, terminate=True)
@ -158,11 +201,11 @@ def remove_task_from_user(user_email: str, task_id: str) -> bool:
Returns:
True if task was removed, False if not found
"""
# Lazy import: see get_user_tasks for rationale.
from redis_repository import RedisRepository
from api.auth import User
redis_repo = RedisRepository.instance()
user = User(sub="", email=user_email, name="")
user = _make_system_user(user_email)
return redis_repo.remove_task_for_user(user, task_id)
@ -176,12 +219,14 @@ def clear_all_tasks(user_email: str, revoke: bool = True) -> int:
Returns:
Number of tasks cleared
"""
# Lazy imports: see get_user_tasks and cancel_task for rationale.
from redis_repository import RedisRepository
from celery_app import app as celery_app
from api.auth import User
redis_repo = RedisRepository.instance()
user = User(sub="", email=user_email, name="")
user = _make_system_user(user_email)
logger.info("Clearing all tasks for user %s (revoke=%s)", user_email, revoke)
# Get tasks before clearing to revoke them
if revoke:
@ -189,7 +234,9 @@ def clear_all_tasks(user_email: str, revoke: bool = True) -> int:
for task_id in tasks:
try:
celery_app.control.revoke(task_id, terminate=True)
except Exception:
pass # Best effort, continue clearing
except Exception as e:
logger.warning(
"Failed to revoke task %s: %s", task_id, e
)
return redis_repo.clear_tasks_for_user(user)