improve search relevance: hybrid scoring + AND-then-OR matching
- Blend BM25/ts_rank relevance with importance instead of sorting by one dimension only. Default mode: 40% relevance + 60% importance. Relevance mode: 70% relevance + 30% importance. - Try AND-match first for precise results, fall back to OR-match when too few results are found. Prevents single-word matches from flooding results. - Applied to both SQLite (local) and PostgreSQL (API) search paths.
This commit is contained in:
parent
5a73dff622
commit
17206cd855
2 changed files with 59 additions and 20 deletions
|
|
@ -151,10 +151,13 @@ async def recall_memories(body: MemoryRecall, user: AuthUser = Depends(get_curre
|
||||||
|
|
||||||
query_text = f"{body.context} {body.expanded_query}".strip()
|
query_text = f"{body.context} {body.expanded_query}".strip()
|
||||||
|
|
||||||
order_clause = "ts_rank(search_vector, query) DESC"
|
# Hybrid scoring: blend ts_rank relevance (0-1) with importance (0-1)
|
||||||
|
hybrid_score = "(ts_rank(search_vector, query) * 0.7 + importance * 0.3)"
|
||||||
if body.sort_by == "importance":
|
if body.sort_by == "importance":
|
||||||
order_clause = "importance DESC, ts_rank(search_vector, query) DESC"
|
hybrid_score = "(ts_rank(search_vector, query) * 0.4 + importance * 0.6)"
|
||||||
elif body.sort_by == "recency":
|
|
||||||
|
order_clause = f"{hybrid_score} DESC"
|
||||||
|
if body.sort_by == "recency":
|
||||||
order_clause = "created_at DESC"
|
order_clause = "created_at DESC"
|
||||||
|
|
||||||
category_filter = ""
|
category_filter = ""
|
||||||
|
|
@ -164,6 +167,8 @@ async def recall_memories(body: MemoryRecall, user: AuthUser = Depends(get_curre
|
||||||
params.append(body.category)
|
params.append(body.category)
|
||||||
|
|
||||||
async with pool.acquire() as conn:
|
async with pool.acquire() as conn:
|
||||||
|
# Try AND-match first (plainto_tsquery ANDs by default), fall back to
|
||||||
|
# OR-match via individual word disjunction for broader results
|
||||||
rows = await conn.fetch(
|
rows = await conn.fetch(
|
||||||
f"""
|
f"""
|
||||||
SELECT id, content, category, tags, importance, is_sensitive,
|
SELECT id, content, category, tags, importance, is_sensitive,
|
||||||
|
|
@ -180,6 +185,35 @@ async def recall_memories(body: MemoryRecall, user: AuthUser = Depends(get_curre
|
||||||
*params,
|
*params,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# If AND-match returned too few results, broaden to OR-match
|
||||||
|
if len(rows) < body.limit and query_text:
|
||||||
|
words = query_text.split()
|
||||||
|
if len(words) > 1:
|
||||||
|
or_tsquery = " | ".join(w for w in words if w)
|
||||||
|
or_params: list = [user.user_id, or_tsquery, body.limit]
|
||||||
|
or_cat_filter = ""
|
||||||
|
if body.category:
|
||||||
|
or_cat_filter = "AND category = $4"
|
||||||
|
or_params.append(body.category)
|
||||||
|
seen_ids = {r["id"] for r in rows}
|
||||||
|
or_rows = await conn.fetch(
|
||||||
|
f"""
|
||||||
|
SELECT id, content, category, tags, importance, is_sensitive,
|
||||||
|
ts_rank(search_vector, query) AS rank,
|
||||||
|
created_at, updated_at
|
||||||
|
FROM memories, to_tsquery('english', $2) query
|
||||||
|
WHERE user_id = $1
|
||||||
|
AND deleted_at IS NULL
|
||||||
|
AND search_vector @@ query
|
||||||
|
{or_cat_filter}
|
||||||
|
ORDER BY {order_clause}
|
||||||
|
LIMIT $3
|
||||||
|
""",
|
||||||
|
*or_params,
|
||||||
|
)
|
||||||
|
rows = list(rows) + [r for r in or_rows if r["id"] not in seen_ids]
|
||||||
|
rows = rows[:body.limit]
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
for row in rows:
|
for row in rows:
|
||||||
content = row["content"]
|
content = row["content"]
|
||||||
|
|
|
||||||
|
|
@ -435,30 +435,35 @@ class MemoryServer:
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
|
||||||
all_terms = f"{context} {expanded_query}".strip()
|
all_terms = f"{context} {expanded_query}".strip()
|
||||||
words = all_terms.split()
|
words = [w.replace(chr(34), "") for w in all_terms.split() if w]
|
||||||
fts_query = " OR ".join(f'"{w.replace(chr(34), "")}"' for w in words if w)
|
and_query = " AND ".join(f'"{w}"' for w in words)
|
||||||
|
or_query = " OR ".join(f'"{w}"' for w in words)
|
||||||
|
|
||||||
|
# Hybrid scoring: blend BM25 relevance with importance
|
||||||
|
# bm25() returns negative values (lower = better match), so negate it
|
||||||
order = (
|
order = (
|
||||||
"bm25(memories_fts), m.importance DESC"
|
"(-bm25(memories_fts) * 0.7 + m.importance * 0.3) DESC"
|
||||||
if sort_by == "relevance"
|
if sort_by == "relevance"
|
||||||
else "m.importance DESC, m.created_at DESC"
|
else "(-bm25(memories_fts) * 0.4 + m.importance * 0.6) DESC"
|
||||||
|
)
|
||||||
|
|
||||||
|
base_select = (
|
||||||
|
f"SELECT m.id, m.content, m.category, m.tags, m.importance, m.created_at "
|
||||||
|
f"FROM memories m JOIN memories_fts fts ON m.id = fts.rowid "
|
||||||
)
|
)
|
||||||
cursor = self.sqlite_conn.cursor()
|
cursor = self.sqlite_conn.cursor()
|
||||||
try:
|
try:
|
||||||
if category:
|
# Try AND first for precise matches, fall back to OR for broader results
|
||||||
|
cat_filter = "AND m.category = ?" if category else ""
|
||||||
|
for fts_query in (and_query, or_query):
|
||||||
|
params = [fts_query, category, limit] if category else [fts_query, limit]
|
||||||
cursor.execute(
|
cursor.execute(
|
||||||
f"SELECT m.id, m.content, m.category, m.tags, m.importance, m.created_at "
|
f"{base_select}WHERE memories_fts MATCH ? {cat_filter} ORDER BY {order} LIMIT ?",
|
||||||
f"FROM memories m JOIN memories_fts fts ON m.id = fts.rowid "
|
tuple(p for p in params if p is not None),
|
||||||
f"WHERE memories_fts MATCH ? AND m.category = ? ORDER BY {order} LIMIT ?",
|
|
||||||
(fts_query, category, limit),
|
|
||||||
)
|
)
|
||||||
else:
|
rows = cursor.fetchall()
|
||||||
cursor.execute(
|
if rows:
|
||||||
f"SELECT m.id, m.content, m.category, m.tags, m.importance, m.created_at "
|
break
|
||||||
f"FROM memories m JOIN memories_fts fts ON m.id = fts.rowid "
|
|
||||||
f"WHERE memories_fts MATCH ? ORDER BY {order} LIMIT ?",
|
|
||||||
(fts_query, limit),
|
|
||||||
)
|
|
||||||
rows = cursor.fetchall()
|
|
||||||
except sqlite3.OperationalError:
|
except sqlite3.OperationalError:
|
||||||
like = f"%{context}%"
|
like = f"%{context}%"
|
||||||
if category:
|
if category:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue