fix(recall): cap default limit to 30 + relevance-bound OR-broadening
memory_recall was returning almost the entire store instead of a small
set of relevant matches. Two compounding causes, both fixed here:
1. Default limit was 10000 (commit d03a77a "effectively unlimited").
recall_memories/MemoryRecall and the memory_recall MCP tool now
default to 30 (ceiling stays 10000 for callers that opt in).
2. The OR-broadening fallback (fires when the precise AND-match is
sparse) ordered by the importance hybrid and padded up to `limit`,
so with limit=10000 it flooded results with high-importance but
irrelevant memories. It now orders OR-matches by ts_rank(relevance)
DESC and applies a minimum-rank floor (OR_BROADEN_MIN_RANK=0.01) to
drop rows that merely contain a query word incidentally.
Tests: add test_recall_default_limit_is_capped (asserts 30 passed to
fetch) and test_recall_or_broadening_is_relevance_bounded (asserts the
OR query is relevance-ordered + rank-floored). Full suite 176 green,
ruff + mypy clean.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
1fa6c2031e
commit
1c0193f011
3 changed files with 64 additions and 4 deletions
|
|
@ -190,6 +190,12 @@ async def store_memory(body: MemoryStore, user: AuthUser = Depends(get_current_u
|
|||
return MemoryResponse(id=row["id"], category=row["category"], importance=row["importance"])
|
||||
|
||||
|
||||
# OR-broadening fallback: when the precise AND-match is sparse we widen to an
|
||||
# OR-match to fill results, but only with rows whose relevance (ts_rank) clears
|
||||
# this floor. Below it a row merely contains one query word incidentally — noise.
|
||||
OR_BROADEN_MIN_RANK = 0.01
|
||||
|
||||
|
||||
@app.post("/api/memories/recall")
|
||||
async def recall_memories(body: MemoryRecall, user: AuthUser = Depends(get_current_user)) -> dict[str, Any]:
|
||||
pool = await get_pool()
|
||||
|
|
@ -251,8 +257,9 @@ async def recall_memories(body: MemoryRecall, user: AuthUser = Depends(get_curre
|
|||
FROM memories, to_tsquery('english', $2) query
|
||||
WHERE deleted_at IS NULL
|
||||
AND search_vector @@ query
|
||||
AND ts_rank(search_vector, query) > {OR_BROADEN_MIN_RANK}
|
||||
{or_cat_filter}
|
||||
ORDER BY {order_clause}
|
||||
ORDER BY ts_rank(search_vector, query) DESC
|
||||
LIMIT $3
|
||||
""",
|
||||
*or_params,
|
||||
|
|
@ -896,7 +903,7 @@ async def memory_store(content: str, category: str = "facts", tags: str = "",
|
|||
@mcp_server.tool()
|
||||
async def memory_recall(context: str, expanded_query: str = "",
|
||||
category: str | None = None, sort_by: str = "importance",
|
||||
limit: int = 10000) -> str:
|
||||
limit: int = 30) -> str:
|
||||
"""Recall memories by semantic search."""
|
||||
pool = await get_pool()
|
||||
user_id = _current_user.get()
|
||||
|
|
@ -957,8 +964,9 @@ async def memory_recall(context: str, expanded_query: str = "",
|
|||
FROM memories, to_tsquery('english', $2) query
|
||||
WHERE deleted_at IS NULL
|
||||
AND search_vector @@ query
|
||||
AND ts_rank(search_vector, query) > {OR_BROADEN_MIN_RANK}
|
||||
{or_cat_filter}
|
||||
ORDER BY {order_clause}
|
||||
ORDER BY ts_rank(search_vector, query) DESC
|
||||
LIMIT $3
|
||||
""",
|
||||
*or_params,
|
||||
|
|
|
|||
|
|
@ -17,7 +17,9 @@ class MemoryRecall(BaseModel):
|
|||
expanded_query: str = ""
|
||||
category: Optional[str] = None
|
||||
sort_by: Literal["importance", "relevance", "recency"] = "importance"
|
||||
limit: int = Field(default=10000, ge=1, le=10000)
|
||||
# Default to a small top-N so recall returns the most relevant matches, not
|
||||
# the whole store. Ceiling stays high for callers that explicitly want more.
|
||||
limit: int = Field(default=30, ge=1, le=10000)
|
||||
|
||||
|
||||
class MemoryResponse(BaseModel):
|
||||
|
|
|
|||
|
|
@ -162,6 +162,56 @@ async def test_recall_returns_all_memories(client):
|
|||
assert results[0]["owner"] == "testuser"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_recall_default_limit_is_capped(client):
|
||||
"""Default recall limit must be a small top-N (30), not the whole store.
|
||||
|
||||
Regression for the 'recall returns the entire ~1460-memory store' bug:
|
||||
the default was 10000, so a default call returned everything.
|
||||
"""
|
||||
ac, conn, app_mod = client
|
||||
conn.fetch.return_value = [_make_memory_row(id=1)]
|
||||
|
||||
async with ac:
|
||||
resp = await ac.post(
|
||||
"/api/memories/recall",
|
||||
json={"context": "singleword"}, # 1 word -> no OR-broadening
|
||||
headers={"Authorization": "Bearer test-key"},
|
||||
)
|
||||
|
||||
assert resp.status_code == 200
|
||||
# fetch(sql, user_id, query_text, limit[, category]) -> limit is the 4th arg
|
||||
first_args = conn.fetch.call_args_list[0].args
|
||||
assert first_args[3] == 30, f"default recall limit should be 30, got {first_args[3]}"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_recall_or_broadening_is_relevance_bounded(client):
|
||||
"""When the precise AND-match is sparse, the OR-broadening fallback must
|
||||
order by relevance (ts_rank) and apply a minimum-rank floor — not pad up to
|
||||
`limit` ordered by the importance hybrid (which floods with high-importance
|
||||
but irrelevant memories).
|
||||
"""
|
||||
ac, conn, app_mod = client
|
||||
conn.fetch.side_effect = [
|
||||
[_make_memory_row(id=1)], # AND-match: 1 row (< limit -> triggers OR)
|
||||
[_make_memory_row(id=2)], # OR-broadening result
|
||||
]
|
||||
|
||||
async with ac:
|
||||
resp = await ac.post(
|
||||
"/api/memories/recall",
|
||||
json={"context": "two words"}, # >1 word -> OR-broadening fires
|
||||
headers={"Authorization": "Bearer test-key"},
|
||||
)
|
||||
|
||||
assert resp.status_code == 200
|
||||
assert len(conn.fetch.call_args_list) == 2, "OR-broadening query should fire"
|
||||
or_sql = conn.fetch.call_args_list[1].args[0]
|
||||
assert "ts_rank(search_vector, query) DESC" in or_sql, "OR matches must be ordered by relevance"
|
||||
assert "ts_rank(search_vector, query) >" in or_sql, "OR matches must have a minimum-rank floor"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_recall_redacts_sensitive_memories(client):
|
||||
ac, conn, app_mod = client
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue