beadboard/.agents/skills/rlm-mem/brain/scripts/auto_linker.py

231 lines
No EOL
8.6 KiB
Python

"""
RLM-MEM - Auto-Linking System
D1.4: Automatic link generation between chunks.
Provides AutoLinker for automatic relationship generation between memories.
"""
import logging
from datetime import datetime, timedelta
from dataclasses import dataclass, field
from pathlib import Path
from typing import Optional, List, Dict, Set, Any, Tuple
try:
from .memory_store import Chunk, ChunkStore, ChunkLinks
except ImportError:
# For running directly
from memory_store import Chunk, ChunkStore, ChunkLinks
logger = logging.getLogger(__name__)
@dataclass
class LinkStrength:
"""Link strength with reasoning."""
score: float
reason: Optional[str] = None
class AutoLinker:
"""
Automatic link generation between chunks.
Link Types:
- context_of: Same conversation_id (bidirectional)
- follows: Created within temporal window before this one (unidirectional)
- related_to: Shares any tag (bidirectional)
"""
def __init__(self, chunk_store: ChunkStore,
temporal_window_minutes: int = 5):
self.chunk_store = chunk_store
self.temporal_window = timedelta(minutes=temporal_window_minutes)
def link_on_create(self, new_chunk: Chunk) -> Chunk:
"""
Generate automatic links when chunk is created.
Args:
new_chunk: The newly created chunk
Returns:
The chunk with updated links
"""
chunk_id = new_chunk.id
conversation_id = new_chunk.metadata.conversation_id
# Support both .created and .created_at metadata fields
created_str = getattr(new_chunk.metadata, 'created', getattr(new_chunk.metadata, 'created_at', None))
tags = new_chunk.tags
# Parse creation timestamp
try:
created = datetime.fromisoformat(created_str.replace("Z", "+00:00"))
except (ValueError, AttributeError):
logger.warning(f"Invalid created timestamp for chunk {chunk_id}")
created = datetime.utcnow()
# 1. Find conversation context links
context_chunks = self._find_conversation_chunks(conversation_id, chunk_id)
for target_id in context_chunks:
if target_id not in new_chunk.links.context_of:
new_chunk.links.context_of.append(target_id)
# Bidirectional
self._add_reverse_link(target_id, chunk_id, "context_of")
# 2. Find temporal predecessors
predecessor_chunks = self._find_temporal_predecessors(
created, conversation_id, chunk_id
)
for target_id in predecessor_chunks:
if target_id not in new_chunk.links.follows:
new_chunk.links.follows.append(target_id)
# 3. Find tag-related chunks
related_chunks = self._find_tag_related(tags, chunk_id)
for target_id in related_chunks:
# Avoid duplicate links - if already context_of, skip weak related_to
if target_id not in new_chunk.links.context_of:
if target_id not in new_chunk.links.related_to:
new_chunk.links.related_to.append(target_id)
# Bidirectional - add to target chunk as well
self._add_related_to_link(target_id, chunk_id)
# Save updated chunk
self._save_chunk(new_chunk)
logger.info(f"Auto-linked chunk {chunk_id}: "
f"context={len(context_chunks)}, "
f"follows={len(predecessor_chunks)}, "
f"related={len(related_chunks)}")
return new_chunk
def _add_reverse_link(self, chunk_id: str, target_id: str, link_type: str):
"""
Add bidirectional link to existing chunk.
"""
chunk = self.chunk_store.get_chunk(chunk_id)
if chunk:
if link_type == "context_of":
if target_id not in chunk.links.context_of:
chunk.links.context_of.append(target_id)
self._save_chunk(chunk)
elif link_type == "related_to":
if target_id not in chunk.links.related_to:
chunk.links.related_to.append(target_id)
self._save_chunk(chunk)
def _add_related_to_link(self, target_id: str, new_chunk_id: str):
"""Add related_to link from target chunk to new chunk."""
chunk = self.chunk_store.get_chunk(target_id)
if chunk:
if new_chunk_id not in chunk.links.related_to:
chunk.links.related_to.append(new_chunk_id)
self._save_chunk(chunk)
def _save_chunk(self, chunk: Chunk):
"""Save chunk to storage without updating access tracking."""
if hasattr(self.chunk_store, "save_chunk"):
self.chunk_store.save_chunk(chunk)
return
chunk_path = self.chunk_store._get_chunk_path(chunk.id)
chunk_path.write_text(chunk.to_json(), encoding="utf-8")
def _find_conversation_chunks(self, conversation_id: str,
exclude: str) -> List[str]:
"""
Find other chunks from same conversation.
"""
chunks = self.chunk_store.list_chunks(
conversation_id=conversation_id
)
return [c for c in chunks if c != exclude]
def _find_temporal_predecessors(self, created: datetime,
conversation_id: str,
exclude: str) -> List[str]:
"""
Find chunks within temporal window before this one.
"""
window_start = created - self.temporal_window
# Get chunks from same conversation within time window
chunks = self.chunk_store.list_chunks(
conversation_id=conversation_id,
created_after=window_start,
created_before=created
)
return [c for c in chunks if c != exclude]
def _find_tag_related(self, tags: List[str], exclude: str) -> List[str]:
"""
Find chunks sharing any tag.
"""
if not tags:
return []
related = set()
for tag in tags:
# Check if tag_index exists (it might be mocked or missing in some adapters)
if hasattr(self.chunk_store, 'tag_index') and hasattr(self.chunk_store.tag_index, 'get_list'):
chunks = self.chunk_store.tag_index.get_list(tag)
related.update(chunks)
# Exclude the new chunk itself
related.discard(exclude)
return list(related)
def calculate_link_strength(source: Chunk, target: Chunk,
link_type: str) -> float:
"""
Calculate link strength based on link type and chunk attributes.
"""
if link_type == "context_of":
return 1.0
elif link_type == "follows":
# Time-decayed strength
try:
source_time_str = getattr(source.metadata, 'created', getattr(source.metadata, 'created_at', None))
target_time_str = getattr(target.metadata, 'created', getattr(target.metadata, 'created_at', None))
source_time = datetime.fromisoformat(source_time_str.replace("Z", "+00:00"))
target_time = datetime.fromisoformat(target_time_str.replace("Z", "+00:00"))
time_diff = (source_time - target_time).total_seconds()
minutes = abs(time_diff) / 60
return max(0.3, 1.0 - (minutes / 5))
except (ValueError, AttributeError):
return 0.5
elif link_type == "related_to":
# Based on shared tags
shared = len(set(source.tags) & set(target.tags))
return min(0.9, 0.3 + (shared * 0.2))
return 0.5
# Integration function for ChunkStore
def create_chunk_with_links(store: ChunkStore, linker: AutoLinker,
content: str, chunk_type: str,
conversation_id: str, tokens: int,
tags: List[str] = None,
confidence: float = 0.7) -> Chunk:
"""
Create chunk and auto-link it.
"""
chunk = store.create_chunk(
content=content,
chunk_type=chunk_type,
conversation_id=conversation_id,
tokens=tokens,
tags=tags,
confidence=confidence
)
return linker.link_on_create(chunk)