beadboard/.agents/skills/rlm-mem/brain/scripts/memory_schema.py

140 lines
4.2 KiB
Python

"""
Layered memory schema validation utilities.
"""
import json
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union
REQUIRED_FIELDS = (
"id",
"created_at",
"scope",
"entry_type",
"content",
"project_id",
)
ALLOWED_SCOPES = {
"project_agent",
"project_global",
"user_agent",
"user_global",
}
AGENT_SCOPES = {"project_agent", "user_agent"}
WarningDict = Dict[str, Any]
RecordDict = Dict[str, Any]
def _warning(
*,
code: str,
message: str,
source_path: Union[str, Path],
line_number: int,
**extra: Any,
) -> WarningDict:
result: WarningDict = {
"code": code,
"message": message,
"path": str(source_path),
"line": line_number,
}
result.update(extra)
return result
def validate_record(
record: Any, line_number: int, source_path: Union[str, Path]
) -> Tuple[Optional[RecordDict], Optional[WarningDict]]:
"""Validate a single memory record against required layered schema."""
if not isinstance(record, dict):
return None, _warning(
code="invalid_record_type",
message="Memory record must be a JSON object.",
source_path=source_path,
line_number=line_number,
actual_type=type(record).__name__,
)
missing_fields = [field for field in REQUIRED_FIELDS if not record.get(field)]
if missing_fields:
return None, _warning(
code="missing_required_fields",
message="Record missing required fields.",
source_path=source_path,
line_number=line_number,
missing_fields=missing_fields,
)
scope = record.get("scope")
if scope not in ALLOWED_SCOPES:
return None, _warning(
code="invalid_scope",
message="Record scope is not supported.",
source_path=source_path,
line_number=line_number,
scope=scope,
allowed_scopes=sorted(ALLOWED_SCOPES),
)
if scope in AGENT_SCOPES and not record.get("agent_id"):
return None, _warning(
code="invalid_agent_scope",
message="Agent scope records require agent_id.",
source_path=source_path,
line_number=line_number,
scope=scope,
)
normalized = dict(record)
if "tags" not in normalized or normalized["tags"] is None:
normalized["tags"] = []
if "confidence" not in normalized or normalized["confidence"] is None:
normalized["confidence"] = 0.7
if "source" not in normalized or not normalized["source"]:
normalized["source"] = "unknown"
if "expires_at" not in normalized:
normalized["expires_at"] = None
return normalized, None
def load_jsonl_records(path: Union[str, Path]) -> Tuple[List[RecordDict], List[WarningDict]]:
"""Load JSONL file and return valid records plus structured validation warnings."""
source_path = Path(path)
valid_records: List[RecordDict] = []
warnings: List[WarningDict] = []
if not source_path.exists():
return valid_records, warnings
with source_path.open("r", encoding="utf-8") as handle:
for line_number, raw_line in enumerate(handle, start=1):
line = raw_line.strip()
if not line:
continue
try:
parsed = json.loads(line)
except json.JSONDecodeError as exc:
warnings.append(
_warning(
code="invalid_json",
message="Could not decode JSON line.",
source_path=source_path,
line_number=line_number,
error=str(exc),
)
)
continue
validated, warning = validate_record(parsed, line_number, source_path)
if warning is not None:
warnings.append(warning)
continue
valid_records.append(validated)
return valid_records, warnings