improve search relevance: hybrid scoring + AND-then-OR matching
- Blend BM25/ts_rank relevance with importance instead of sorting by one dimension only. Default mode: 40% relevance + 60% importance. Relevance mode: 70% relevance + 30% importance. - Try AND-match first for precise results, fall back to OR-match when too few results are found. Prevents single-word matches from flooding results. - Applied to both SQLite (local) and PostgreSQL (API) search paths.
This commit is contained in:
parent
5a73dff622
commit
17206cd855
2 changed files with 59 additions and 20 deletions
|
|
@ -151,10 +151,13 @@ async def recall_memories(body: MemoryRecall, user: AuthUser = Depends(get_curre
|
|||
|
||||
query_text = f"{body.context} {body.expanded_query}".strip()
|
||||
|
||||
order_clause = "ts_rank(search_vector, query) DESC"
|
||||
# Hybrid scoring: blend ts_rank relevance (0-1) with importance (0-1)
|
||||
hybrid_score = "(ts_rank(search_vector, query) * 0.7 + importance * 0.3)"
|
||||
if body.sort_by == "importance":
|
||||
order_clause = "importance DESC, ts_rank(search_vector, query) DESC"
|
||||
elif body.sort_by == "recency":
|
||||
hybrid_score = "(ts_rank(search_vector, query) * 0.4 + importance * 0.6)"
|
||||
|
||||
order_clause = f"{hybrid_score} DESC"
|
||||
if body.sort_by == "recency":
|
||||
order_clause = "created_at DESC"
|
||||
|
||||
category_filter = ""
|
||||
|
|
@ -164,6 +167,8 @@ async def recall_memories(body: MemoryRecall, user: AuthUser = Depends(get_curre
|
|||
params.append(body.category)
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
# Try AND-match first (plainto_tsquery ANDs by default), fall back to
|
||||
# OR-match via individual word disjunction for broader results
|
||||
rows = await conn.fetch(
|
||||
f"""
|
||||
SELECT id, content, category, tags, importance, is_sensitive,
|
||||
|
|
@ -180,6 +185,35 @@ async def recall_memories(body: MemoryRecall, user: AuthUser = Depends(get_curre
|
|||
*params,
|
||||
)
|
||||
|
||||
# If AND-match returned too few results, broaden to OR-match
|
||||
if len(rows) < body.limit and query_text:
|
||||
words = query_text.split()
|
||||
if len(words) > 1:
|
||||
or_tsquery = " | ".join(w for w in words if w)
|
||||
or_params: list = [user.user_id, or_tsquery, body.limit]
|
||||
or_cat_filter = ""
|
||||
if body.category:
|
||||
or_cat_filter = "AND category = $4"
|
||||
or_params.append(body.category)
|
||||
seen_ids = {r["id"] for r in rows}
|
||||
or_rows = await conn.fetch(
|
||||
f"""
|
||||
SELECT id, content, category, tags, importance, is_sensitive,
|
||||
ts_rank(search_vector, query) AS rank,
|
||||
created_at, updated_at
|
||||
FROM memories, to_tsquery('english', $2) query
|
||||
WHERE user_id = $1
|
||||
AND deleted_at IS NULL
|
||||
AND search_vector @@ query
|
||||
{or_cat_filter}
|
||||
ORDER BY {order_clause}
|
||||
LIMIT $3
|
||||
""",
|
||||
*or_params,
|
||||
)
|
||||
rows = list(rows) + [r for r in or_rows if r["id"] not in seen_ids]
|
||||
rows = rows[:body.limit]
|
||||
|
||||
results = []
|
||||
for row in rows:
|
||||
content = row["content"]
|
||||
|
|
|
|||
|
|
@ -435,30 +435,35 @@ class MemoryServer:
|
|||
import sqlite3
|
||||
|
||||
all_terms = f"{context} {expanded_query}".strip()
|
||||
words = all_terms.split()
|
||||
fts_query = " OR ".join(f'"{w.replace(chr(34), "")}"' for w in words if w)
|
||||
words = [w.replace(chr(34), "") for w in all_terms.split() if w]
|
||||
and_query = " AND ".join(f'"{w}"' for w in words)
|
||||
or_query = " OR ".join(f'"{w}"' for w in words)
|
||||
|
||||
# Hybrid scoring: blend BM25 relevance with importance
|
||||
# bm25() returns negative values (lower = better match), so negate it
|
||||
order = (
|
||||
"bm25(memories_fts), m.importance DESC"
|
||||
"(-bm25(memories_fts) * 0.7 + m.importance * 0.3) DESC"
|
||||
if sort_by == "relevance"
|
||||
else "m.importance DESC, m.created_at DESC"
|
||||
else "(-bm25(memories_fts) * 0.4 + m.importance * 0.6) DESC"
|
||||
)
|
||||
|
||||
base_select = (
|
||||
f"SELECT m.id, m.content, m.category, m.tags, m.importance, m.created_at "
|
||||
f"FROM memories m JOIN memories_fts fts ON m.id = fts.rowid "
|
||||
)
|
||||
cursor = self.sqlite_conn.cursor()
|
||||
try:
|
||||
if category:
|
||||
# Try AND first for precise matches, fall back to OR for broader results
|
||||
cat_filter = "AND m.category = ?" if category else ""
|
||||
for fts_query in (and_query, or_query):
|
||||
params = [fts_query, category, limit] if category else [fts_query, limit]
|
||||
cursor.execute(
|
||||
f"SELECT m.id, m.content, m.category, m.tags, m.importance, m.created_at "
|
||||
f"FROM memories m JOIN memories_fts fts ON m.id = fts.rowid "
|
||||
f"WHERE memories_fts MATCH ? AND m.category = ? ORDER BY {order} LIMIT ?",
|
||||
(fts_query, category, limit),
|
||||
f"{base_select}WHERE memories_fts MATCH ? {cat_filter} ORDER BY {order} LIMIT ?",
|
||||
tuple(p for p in params if p is not None),
|
||||
)
|
||||
else:
|
||||
cursor.execute(
|
||||
f"SELECT m.id, m.content, m.category, m.tags, m.importance, m.created_at "
|
||||
f"FROM memories m JOIN memories_fts fts ON m.id = fts.rowid "
|
||||
f"WHERE memories_fts MATCH ? ORDER BY {order} LIMIT ?",
|
||||
(fts_query, limit),
|
||||
)
|
||||
rows = cursor.fetchall()
|
||||
rows = cursor.fetchall()
|
||||
if rows:
|
||||
break
|
||||
except sqlite3.OperationalError:
|
||||
like = f"%{context}%"
|
||||
if category:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue