fix(recall): cap default limit to 30 + relevance-bound OR-broadening

memory_recall was returning almost the entire store instead of a small set of relevant matches. Two compounding causes, both fixed here: 1. Default limit was 10000 (commit d03a77a "effectively unlimited"). recall_memories/MemoryRecall and the memory_recall MCP tool now default to 30 (ceiling stays 10000 for callers that opt in). 2. The OR-broadening fallback (fires when the precise AND-match is sparse) ordered by the importance hybrid and padded up to `limit`, so with limit=10000 it flooded results with high-importance but irrelevant memories. It now orders OR-matches by ts_rank(relevance) DESC and applies a minimum-rank floor (OR_BROADEN_MIN_RANK=0.01) to drop rows that merely contain a query word incidentally. Tests: add test_recall_default_limit_is_capped (asserts 30 passed to fetch) and test_recall_or_broadening_is_relevance_bounded (asserts the OR query is relevance-ordered + rank-floored). Full suite 176 green, ruff + mypy clean. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-04 18:48:11 +00:00 · 2026-06-04 18:48:11 +00:00 · 1c0193f011
commit 1c0193f011
parent 1fa6c2031e
3 changed files with 64 additions and 4 deletions
--- a/src/claude_memory/api/app.py
+++ b/src/claude_memory/api/app.py
@ -190,6 +190,12 @@ async def store_memory(body: MemoryStore, user: AuthUser = Depends(get_current_u
    return MemoryResponse(id=row["id"], category=row["category"], importance=row["importance"])


+# OR-broadening fallback: when the precise AND-match is sparse we widen to an
+# OR-match to fill results, but only with rows whose relevance (ts_rank) clears
+# this floor. Below it a row merely contains one query word incidentally — noise.
+OR_BROADEN_MIN_RANK = 0.01
+
+
@app.post("/api/memories/recall")
 async def recall_memories(body: MemoryRecall, user: AuthUser = Depends(get_current_user)) -> dict[str, Any]:
    pool = await get_pool()
@ -251,8 +257,9 @@ async def recall_memories(body: MemoryRecall, user: AuthUser = Depends(get_curre
                    FROM memories, to_tsquery('english', $2) query
                    WHERE deleted_at IS NULL
                      AND search_vector @@ query
+                      AND ts_rank(search_vector, query) > {OR_BROADEN_MIN_RANK}
                      {or_cat_filter}
-                    ORDER BY {order_clause}
+                    ORDER BY ts_rank(search_vector, query) DESC
                    LIMIT $3
                    """,
                    *or_params,
@ -896,7 +903,7 @@ async def memory_store(content: str, category: str = "facts", tags: str = "",
@mcp_server.tool()
 async def memory_recall(context: str, expanded_query: str = "",
                        category: str | None = None, sort_by: str = "importance",
-                        limit: int = 10000) -> str:
+                        limit: int = 30) -> str:
    """Recall memories by semantic search."""
    pool = await get_pool()
    user_id = _current_user.get()
@ -957,8 +964,9 @@ async def memory_recall(context: str, expanded_query: str = "",
                    FROM memories, to_tsquery('english', $2) query
                    WHERE deleted_at IS NULL
                      AND search_vector @@ query
+                      AND ts_rank(search_vector, query) > {OR_BROADEN_MIN_RANK}
                      {or_cat_filter}
-                    ORDER BY {order_clause}
+                    ORDER BY ts_rank(search_vector, query) DESC
                    LIMIT $3
                    """,
                    *or_params,
--- a/src/claude_memory/api/models.py
+++ b/src/claude_memory/api/models.py
@ -17,7 +17,9 @@ class MemoryRecall(BaseModel):
    expanded_query: str = ""
    category: Optional[str] = None
    sort_by: Literal["importance", "relevance", "recency"] = "importance"
-    limit: int = Field(default=10000, ge=1, le=10000)
+    # Default to a small top-N so recall returns the most relevant matches, not
+    # the whole store. Ceiling stays high for callers that explicitly want more.
+    limit: int = Field(default=30, ge=1, le=10000)


 class MemoryResponse(BaseModel):