231 lines
No EOL
8.6 KiB
Python
231 lines
No EOL
8.6 KiB
Python
"""
|
|
RLM-MEM - Auto-Linking System
|
|
D1.4: Automatic link generation between chunks.
|
|
|
|
Provides AutoLinker for automatic relationship generation between memories.
|
|
"""
|
|
|
|
import logging
|
|
from datetime import datetime, timedelta
|
|
from dataclasses import dataclass, field
|
|
from pathlib import Path
|
|
from typing import Optional, List, Dict, Set, Any, Tuple
|
|
|
|
try:
|
|
from .memory_store import Chunk, ChunkStore, ChunkLinks
|
|
except ImportError:
|
|
# For running directly
|
|
from memory_store import Chunk, ChunkStore, ChunkLinks
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class LinkStrength:
|
|
"""Link strength with reasoning."""
|
|
score: float
|
|
reason: Optional[str] = None
|
|
|
|
|
|
class AutoLinker:
|
|
"""
|
|
Automatic link generation between chunks.
|
|
|
|
Link Types:
|
|
- context_of: Same conversation_id (bidirectional)
|
|
- follows: Created within temporal window before this one (unidirectional)
|
|
- related_to: Shares any tag (bidirectional)
|
|
"""
|
|
|
|
def __init__(self, chunk_store: ChunkStore,
|
|
temporal_window_minutes: int = 5):
|
|
self.chunk_store = chunk_store
|
|
self.temporal_window = timedelta(minutes=temporal_window_minutes)
|
|
|
|
def link_on_create(self, new_chunk: Chunk) -> Chunk:
|
|
"""
|
|
Generate automatic links when chunk is created.
|
|
|
|
Args:
|
|
new_chunk: The newly created chunk
|
|
|
|
Returns:
|
|
The chunk with updated links
|
|
"""
|
|
chunk_id = new_chunk.id
|
|
conversation_id = new_chunk.metadata.conversation_id
|
|
|
|
# Support both .created and .created_at metadata fields
|
|
created_str = getattr(new_chunk.metadata, 'created', getattr(new_chunk.metadata, 'created_at', None))
|
|
tags = new_chunk.tags
|
|
|
|
# Parse creation timestamp
|
|
try:
|
|
created = datetime.fromisoformat(created_str.replace("Z", "+00:00"))
|
|
except (ValueError, AttributeError):
|
|
logger.warning(f"Invalid created timestamp for chunk {chunk_id}")
|
|
created = datetime.utcnow()
|
|
|
|
# 1. Find conversation context links
|
|
context_chunks = self._find_conversation_chunks(conversation_id, chunk_id)
|
|
for target_id in context_chunks:
|
|
if target_id not in new_chunk.links.context_of:
|
|
new_chunk.links.context_of.append(target_id)
|
|
# Bidirectional
|
|
self._add_reverse_link(target_id, chunk_id, "context_of")
|
|
|
|
# 2. Find temporal predecessors
|
|
predecessor_chunks = self._find_temporal_predecessors(
|
|
created, conversation_id, chunk_id
|
|
)
|
|
for target_id in predecessor_chunks:
|
|
if target_id not in new_chunk.links.follows:
|
|
new_chunk.links.follows.append(target_id)
|
|
|
|
# 3. Find tag-related chunks
|
|
related_chunks = self._find_tag_related(tags, chunk_id)
|
|
for target_id in related_chunks:
|
|
# Avoid duplicate links - if already context_of, skip weak related_to
|
|
if target_id not in new_chunk.links.context_of:
|
|
if target_id not in new_chunk.links.related_to:
|
|
new_chunk.links.related_to.append(target_id)
|
|
# Bidirectional - add to target chunk as well
|
|
self._add_related_to_link(target_id, chunk_id)
|
|
|
|
# Save updated chunk
|
|
self._save_chunk(new_chunk)
|
|
|
|
logger.info(f"Auto-linked chunk {chunk_id}: "
|
|
f"context={len(context_chunks)}, "
|
|
f"follows={len(predecessor_chunks)}, "
|
|
f"related={len(related_chunks)}")
|
|
|
|
return new_chunk
|
|
|
|
def _add_reverse_link(self, chunk_id: str, target_id: str, link_type: str):
|
|
"""
|
|
Add bidirectional link to existing chunk.
|
|
"""
|
|
chunk = self.chunk_store.get_chunk(chunk_id)
|
|
if chunk:
|
|
if link_type == "context_of":
|
|
if target_id not in chunk.links.context_of:
|
|
chunk.links.context_of.append(target_id)
|
|
self._save_chunk(chunk)
|
|
elif link_type == "related_to":
|
|
if target_id not in chunk.links.related_to:
|
|
chunk.links.related_to.append(target_id)
|
|
self._save_chunk(chunk)
|
|
|
|
def _add_related_to_link(self, target_id: str, new_chunk_id: str):
|
|
"""Add related_to link from target chunk to new chunk."""
|
|
chunk = self.chunk_store.get_chunk(target_id)
|
|
if chunk:
|
|
if new_chunk_id not in chunk.links.related_to:
|
|
chunk.links.related_to.append(new_chunk_id)
|
|
self._save_chunk(chunk)
|
|
|
|
def _save_chunk(self, chunk: Chunk):
|
|
"""Save chunk to storage without updating access tracking."""
|
|
if hasattr(self.chunk_store, "save_chunk"):
|
|
self.chunk_store.save_chunk(chunk)
|
|
return
|
|
|
|
chunk_path = self.chunk_store._get_chunk_path(chunk.id)
|
|
chunk_path.write_text(chunk.to_json(), encoding="utf-8")
|
|
|
|
def _find_conversation_chunks(self, conversation_id: str,
|
|
exclude: str) -> List[str]:
|
|
"""
|
|
Find other chunks from same conversation.
|
|
"""
|
|
chunks = self.chunk_store.list_chunks(
|
|
conversation_id=conversation_id
|
|
)
|
|
return [c for c in chunks if c != exclude]
|
|
|
|
def _find_temporal_predecessors(self, created: datetime,
|
|
conversation_id: str,
|
|
exclude: str) -> List[str]:
|
|
"""
|
|
Find chunks within temporal window before this one.
|
|
"""
|
|
window_start = created - self.temporal_window
|
|
|
|
# Get chunks from same conversation within time window
|
|
chunks = self.chunk_store.list_chunks(
|
|
conversation_id=conversation_id,
|
|
created_after=window_start,
|
|
created_before=created
|
|
)
|
|
|
|
return [c for c in chunks if c != exclude]
|
|
|
|
def _find_tag_related(self, tags: List[str], exclude: str) -> List[str]:
|
|
"""
|
|
Find chunks sharing any tag.
|
|
"""
|
|
if not tags:
|
|
return []
|
|
|
|
related = set()
|
|
for tag in tags:
|
|
# Check if tag_index exists (it might be mocked or missing in some adapters)
|
|
if hasattr(self.chunk_store, 'tag_index') and hasattr(self.chunk_store.tag_index, 'get_list'):
|
|
chunks = self.chunk_store.tag_index.get_list(tag)
|
|
related.update(chunks)
|
|
|
|
# Exclude the new chunk itself
|
|
related.discard(exclude)
|
|
|
|
return list(related)
|
|
|
|
|
|
def calculate_link_strength(source: Chunk, target: Chunk,
|
|
link_type: str) -> float:
|
|
"""
|
|
Calculate link strength based on link type and chunk attributes.
|
|
"""
|
|
if link_type == "context_of":
|
|
return 1.0
|
|
|
|
elif link_type == "follows":
|
|
# Time-decayed strength
|
|
try:
|
|
source_time_str = getattr(source.metadata, 'created', getattr(source.metadata, 'created_at', None))
|
|
target_time_str = getattr(target.metadata, 'created', getattr(target.metadata, 'created_at', None))
|
|
source_time = datetime.fromisoformat(source_time_str.replace("Z", "+00:00"))
|
|
target_time = datetime.fromisoformat(target_time_str.replace("Z", "+00:00"))
|
|
time_diff = (source_time - target_time).total_seconds()
|
|
minutes = abs(time_diff) / 60
|
|
return max(0.3, 1.0 - (minutes / 5))
|
|
except (ValueError, AttributeError):
|
|
return 0.5
|
|
|
|
elif link_type == "related_to":
|
|
# Based on shared tags
|
|
shared = len(set(source.tags) & set(target.tags))
|
|
return min(0.9, 0.3 + (shared * 0.2))
|
|
|
|
return 0.5
|
|
|
|
|
|
# Integration function for ChunkStore
|
|
def create_chunk_with_links(store: ChunkStore, linker: AutoLinker,
|
|
content: str, chunk_type: str,
|
|
conversation_id: str, tokens: int,
|
|
tags: List[str] = None,
|
|
confidence: float = 0.7) -> Chunk:
|
|
"""
|
|
Create chunk and auto-link it.
|
|
"""
|
|
chunk = store.create_chunk(
|
|
content=content,
|
|
chunk_type=chunk_type,
|
|
conversation_id=conversation_id,
|
|
tokens=tokens,
|
|
tags=tags,
|
|
confidence=confidence
|
|
)
|
|
|
|
return linker.link_on_create(chunk) |