From 17206cd855f002e33b17cc90cdf5bd7434d8abc5 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sun, 15 Mar 2026 22:36:35 +0000 Subject: [PATCH] improve search relevance: hybrid scoring + AND-then-OR matching - Blend BM25/ts_rank relevance with importance instead of sorting by one dimension only. Default mode: 40% relevance + 60% importance. Relevance mode: 70% relevance + 30% importance. - Try AND-match first for precise results, fall back to OR-match when too few results are found. Prevents single-word matches from flooding results. - Applied to both SQLite (local) and PostgreSQL (API) search paths. --- src/claude_memory/api/app.py | 40 ++++++++++++++++++++++++++++++--- src/claude_memory/mcp_server.py | 39 ++++++++++++++++++-------------- 2 files changed, 59 insertions(+), 20 deletions(-) diff --git a/src/claude_memory/api/app.py b/src/claude_memory/api/app.py index 5b3a173..5e0f220 100644 --- a/src/claude_memory/api/app.py +++ b/src/claude_memory/api/app.py @@ -151,10 +151,13 @@ async def recall_memories(body: MemoryRecall, user: AuthUser = Depends(get_curre query_text = f"{body.context} {body.expanded_query}".strip() - order_clause = "ts_rank(search_vector, query) DESC" + # Hybrid scoring: blend ts_rank relevance (0-1) with importance (0-1) + hybrid_score = "(ts_rank(search_vector, query) * 0.7 + importance * 0.3)" if body.sort_by == "importance": - order_clause = "importance DESC, ts_rank(search_vector, query) DESC" - elif body.sort_by == "recency": + hybrid_score = "(ts_rank(search_vector, query) * 0.4 + importance * 0.6)" + + order_clause = f"{hybrid_score} DESC" + if body.sort_by == "recency": order_clause = "created_at DESC" category_filter = "" @@ -164,6 +167,8 @@ async def recall_memories(body: MemoryRecall, user: AuthUser = Depends(get_curre params.append(body.category) async with pool.acquire() as conn: + # Try AND-match first (plainto_tsquery ANDs by default), fall back to + # OR-match via individual word disjunction for broader results rows = await conn.fetch( f""" SELECT id, content, category, tags, importance, is_sensitive, @@ -180,6 +185,35 @@ async def recall_memories(body: MemoryRecall, user: AuthUser = Depends(get_curre *params, ) + # If AND-match returned too few results, broaden to OR-match + if len(rows) < body.limit and query_text: + words = query_text.split() + if len(words) > 1: + or_tsquery = " | ".join(w for w in words if w) + or_params: list = [user.user_id, or_tsquery, body.limit] + or_cat_filter = "" + if body.category: + or_cat_filter = "AND category = $4" + or_params.append(body.category) + seen_ids = {r["id"] for r in rows} + or_rows = await conn.fetch( + f""" + SELECT id, content, category, tags, importance, is_sensitive, + ts_rank(search_vector, query) AS rank, + created_at, updated_at + FROM memories, to_tsquery('english', $2) query + WHERE user_id = $1 + AND deleted_at IS NULL + AND search_vector @@ query + {or_cat_filter} + ORDER BY {order_clause} + LIMIT $3 + """, + *or_params, + ) + rows = list(rows) + [r for r in or_rows if r["id"] not in seen_ids] + rows = rows[:body.limit] + results = [] for row in rows: content = row["content"] diff --git a/src/claude_memory/mcp_server.py b/src/claude_memory/mcp_server.py index 0a105a7..4e85a93 100644 --- a/src/claude_memory/mcp_server.py +++ b/src/claude_memory/mcp_server.py @@ -435,30 +435,35 @@ class MemoryServer: import sqlite3 all_terms = f"{context} {expanded_query}".strip() - words = all_terms.split() - fts_query = " OR ".join(f'"{w.replace(chr(34), "")}"' for w in words if w) + words = [w.replace(chr(34), "") for w in all_terms.split() if w] + and_query = " AND ".join(f'"{w}"' for w in words) + or_query = " OR ".join(f'"{w}"' for w in words) + + # Hybrid scoring: blend BM25 relevance with importance + # bm25() returns negative values (lower = better match), so negate it order = ( - "bm25(memories_fts), m.importance DESC" + "(-bm25(memories_fts) * 0.7 + m.importance * 0.3) DESC" if sort_by == "relevance" - else "m.importance DESC, m.created_at DESC" + else "(-bm25(memories_fts) * 0.4 + m.importance * 0.6) DESC" + ) + + base_select = ( + f"SELECT m.id, m.content, m.category, m.tags, m.importance, m.created_at " + f"FROM memories m JOIN memories_fts fts ON m.id = fts.rowid " ) cursor = self.sqlite_conn.cursor() try: - if category: + # Try AND first for precise matches, fall back to OR for broader results + cat_filter = "AND m.category = ?" if category else "" + for fts_query in (and_query, or_query): + params = [fts_query, category, limit] if category else [fts_query, limit] cursor.execute( - f"SELECT m.id, m.content, m.category, m.tags, m.importance, m.created_at " - f"FROM memories m JOIN memories_fts fts ON m.id = fts.rowid " - f"WHERE memories_fts MATCH ? AND m.category = ? ORDER BY {order} LIMIT ?", - (fts_query, category, limit), + f"{base_select}WHERE memories_fts MATCH ? {cat_filter} ORDER BY {order} LIMIT ?", + tuple(p for p in params if p is not None), ) - else: - cursor.execute( - f"SELECT m.id, m.content, m.category, m.tags, m.importance, m.created_at " - f"FROM memories m JOIN memories_fts fts ON m.id = fts.rowid " - f"WHERE memories_fts MATCH ? ORDER BY {order} LIMIT ?", - (fts_query, limit), - ) - rows = cursor.fetchall() + rows = cursor.fetchall() + if rows: + break except sqlite3.OperationalError: like = f"%{context}%" if category: