beadboard/.agents/skills/rlm-mem/brain/scripts/migration_tool.py

"""
Migration tool for legacy JSON memory chunks to Layered JSONL format.

Usage:
    python -m brain.scripts.migration_tool --src brain/memory --dest .agents/memory/global --scope project_global
"""

import argparse
import json
import shutil
import sys
from pathlib import Path
from datetime import datetime

try:
    from .layered_memory_store import LayeredMemoryStore
    from .memory_policy import MemoryPolicy
    from .layered_adapter import LayeredChunkStoreAdapter
except ImportError:
    # Allow running as script
    sys.path.append(str(Path.cwd()))
    from brain.scripts.layered_memory_store import LayeredMemoryStore
    from brain.scripts.memory_policy import MemoryPolicy
    from brain.scripts.layered_adapter import LayeredChunkStoreAdapter

def migrate_chunks(src_dir: Path, dest_layer: str, default_scope: str, dry_run: bool = False, backup: bool = False):
    """
    Migrate legacy JSON chunks to layered store with idempotency and safety rails.
    """
    if not src_dir.exists():
        print(f"Error: Source directory {src_dir} does not exist.")
        return

    # Setup store
    policy = MemoryPolicy(project_root=Path.cwd())

    # Ensure target layer is allowed for writes during migration
    if dest_layer not in policy.write_layers:
        policy.write_layers.append(dest_layer)

    store = LayeredMemoryStore(policy=policy, agent_id="migration-tool")
    adapter = LayeredChunkStoreAdapter(store)

    # 0. Backup destination if requested
    if backup and not dry_run:
        dest_path = store._paths.get(dest_layer)
        if dest_path and dest_path.exists():
            timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
            backup_path = dest_path.with_suffix(f".{timestamp}.bak")
            print(f"Backing up destination {dest_layer} to {backup_path}")
            shutil.copy2(dest_path, backup_path)

    # 1. Load existing IDs to prevent duplicates (Idempotency)
    existing_chunks = set(adapter.list_chunks())
    print(f"Loaded {len(existing_chunks)} existing chunks for deduplication.")

    count = 0
    skipped = 0
    errors = 0

    # Find all JSON files in subdirectories (e.g. 2026-02/chunk-*.json)
    files = list(src_dir.rglob("chunk-*.json"))
    print(f"Found {len(files)} legacy chunks to migrate.")

    if dry_run:
        print("--- DRY RUN MODE: No writes will be performed ---")

    for file_path in files:
        try:
            content = file_path.read_text(encoding="utf-8")
            data = json.loads(content)

            chunk_id = data.get("id")

            # Idempotency Check
            if chunk_id in existing_chunks:
                skipped += 1
                continue

            # Map legacy fields to new schema
            record = {
                "id": chunk_id,
                "content": data.get("content"),
                "entry_type": data.get("type", "note"),
                "scope": default_scope,
                "project_id": "rlm-mem", # Default
                "tags": data.get("tags", []),
                "created_at": data.get("metadata", {}).get("created_at", datetime.utcnow().isoformat() + "Z"),
                "metadata": {
                    "migrated_from": str(file_path),
                    "original_metadata": data.get("metadata", {})
                }
            }

            if not dry_run:
                store.append_entry(dest_layer, record)
            else:
                print(f"[DRY RUN] Would migrate {chunk_id}")

            count += 1
            if count % 10 == 0 and not dry_run:
                print(f"Migrated {count} chunks...", end="\r")

        except Exception as e:
            print(f"\nFailed to migrate {file_path}: {e}")
            errors += 1

    print(f"\nMigration complete.")
    if dry_run:
        print(f"Would have migrated: {count}")
    else:
        print(f"Successfully migrated: {count}")
    print(f"Skipped (duplicates): {skipped}")
    print(f"Errors: {errors}")

def main():
    parser = argparse.ArgumentParser(description="Migrate legacy memory chunks")
    parser.add_argument("--src", default="brain/memory", help="Source directory (legacy)")
    parser.add_argument("--layer", default="project_global", help="Target layer (e.g. project_global)")
    parser.add_argument("--scope", default="project_global", help="Scope label for records")
    parser.add_argument("--dry-run", action="store_true", help="Do not write changes")
    parser.add_argument("--backup", action="store_true", help="Back up destination file before writing")

    args = parser.parse_args()

    migrate_chunks(Path(args.src), args.layer, args.scope, dry_run=args.dry_run, backup=args.backup)

if __name__ == "__main__":
    main()