From 8a7239fb77afe2a2589307b906d9906ec574433e Mon Sep 17 00:00:00 2001
From: Viktor Barzin <viktorbarzin@meta.com>
Date: Sat, 14 Mar 2026 10:34:45 +0000
Subject: [PATCH] feat: add Alembic for database migrations

Replace inline migration logic with proper Alembic migrations:
- 001: Initial schema (creates memories table with FTS)
- 002: Add multi-user and secrets columns (user_id, is_sensitive,
  vault_path, encrypted_content)

Migrations run automatically on app startup. Existing databases
are handled gracefully with IF NOT EXISTS / column existence checks.
---
 alembic.ini                                   | 36 +++++++
 docker/Dockerfile                             |  3 +-
 migrations/env.py                             | 40 ++++++++
 migrations/script.py.mako                     | 26 ++++++
 migrations/versions/001_initial_schema.py     | 50 ++++++++++
 .../002_add_multi_user_and_secrets.py         | 52 +++++++++++
 pyproject.toml                                |  2 +-
 src/claude_memory/api/database.py             | 93 ++++++++-----------
 8 files changed, 244 insertions(+), 58 deletions(-)
 create mode 100644 alembic.ini
 create mode 100644 migrations/env.py
 create mode 100644 migrations/script.py.mako
 create mode 100644 migrations/versions/001_initial_schema.py
 create mode 100644 migrations/versions/002_add_multi_user_and_secrets.py

diff --git a/alembic.ini b/alembic.ini
new file mode 100644
index 0000000..8ac5640
--- /dev/null
+++ b/alembic.ini
@@ -0,0 +1,36 @@
+[alembic]
+script_location = migrations
+sqlalchemy.url = env:DATABASE_URL
+
+[loggers]
+keys = root,sqlalchemy,alembic
+
+[handlers]
+keys = console
+
+[formatters]
+keys = generic
+
+[logger_root]
+level = WARN
+handlers = console
+
+[logger_sqlalchemy]
+level = WARN
+handlers =
+qualname = sqlalchemy.engine
+
+[logger_alembic]
+level = INFO
+handlers =
+qualname = alembic
+
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+
+[formatter_generic]
+format = %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 0d8927e..ddff614 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -2,8 +2,9 @@ FROM python:3.12-slim AS base
 
 WORKDIR /app
 
-COPY pyproject.toml README.md ./
+COPY pyproject.toml README.md alembic.ini ./
 COPY src/ src/
+COPY migrations/ migrations/
 
 RUN pip install --no-cache-dir ".[api]"
 
diff --git a/migrations/env.py b/migrations/env.py
new file mode 100644
index 0000000..4f14ca7
--- /dev/null
+++ b/migrations/env.py
@@ -0,0 +1,40 @@
+"""Alembic environment configuration."""
+
+import os
+from logging.config import fileConfig
+
+from alembic import context
+from sqlalchemy import create_engine, pool
+
+config = context.config
+
+if config.config_file_name is not None:
+    fileConfig(config.config_file_name)
+
+# Override sqlalchemy.url from environment variable
+db_url = os.environ.get("DATABASE_URL", "")
+if db_url:
+    config.set_main_option("sqlalchemy.url", db_url)
+
+
+def run_migrations_offline() -> None:
+    """Run migrations in 'offline' mode."""
+    url = config.get_main_option("sqlalchemy.url")
+    context.configure(url=url, target_metadata=None, literal_binds=True, dialect_opts={"paramstyle": "named"})
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+def run_migrations_online() -> None:
+    """Run migrations in 'online' mode."""
+    connectable = create_engine(config.get_main_option("sqlalchemy.url"), poolclass=pool.NullPool)
+    with connectable.connect() as connection:
+        context.configure(connection=connection, target_metadata=None)
+        with context.begin_transaction():
+            context.run_migrations()
+
+
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()
diff --git a/migrations/script.py.mako b/migrations/script.py.mako
new file mode 100644
index 0000000..fbc4b07
--- /dev/null
+++ b/migrations/script.py.mako
@@ -0,0 +1,26 @@
+"""${message}
+
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+
+# revision identifiers, used by Alembic.
+revision: str = ${repr(up_revision)}
+down_revision: Union[str, None] = ${repr(down_revision)}
+branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
+depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
+
+
+def upgrade() -> None:
+    ${upgrades if upgrades else "pass"}
+
+
+def downgrade() -> None:
+    ${downgrades if downgrades else "pass"}
diff --git a/migrations/versions/001_initial_schema.py b/migrations/versions/001_initial_schema.py
new file mode 100644
index 0000000..a632b1c
--- /dev/null
+++ b/migrations/versions/001_initial_schema.py
@@ -0,0 +1,50 @@
+"""Initial schema with memories table.
+
+Revision ID: 001
+Revises:
+Create Date: 2026-03-14
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+
+revision: str = "001"
+down_revision: Union[str, None] = None
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    conn = op.get_bind()
+    # Check if table already exists (handles pre-Alembic installations)
+    result = conn.execute(
+        sa.text("SELECT EXISTS(SELECT 1 FROM information_schema.tables WHERE table_name = 'memories')")
+    )
+    if result.scalar():
+        return
+
+    op.execute("""
+        CREATE TABLE memories (
+            id SERIAL PRIMARY KEY,
+            content TEXT NOT NULL,
+            category VARCHAR(50) DEFAULT 'facts',
+            tags TEXT DEFAULT '',
+            expanded_keywords TEXT DEFAULT '',
+            importance REAL DEFAULT 0.5,
+            created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+            updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+            search_vector tsvector GENERATED ALWAYS AS (
+                setweight(to_tsvector('english', coalesce(content, '')), 'A') ||
+                setweight(to_tsvector('english', coalesce(expanded_keywords, '')), 'B') ||
+                setweight(to_tsvector('english', coalesce(tags, '')), 'C') ||
+                setweight(to_tsvector('english', coalesce(category, '')), 'D')
+            ) STORED
+        )
+    """)
+    op.execute("CREATE INDEX idx_memories_search ON memories USING GIN(search_vector)")
+
+
+def downgrade() -> None:
+    op.drop_index("idx_memories_search")
+    op.drop_table("memories")
diff --git a/migrations/versions/002_add_multi_user_and_secrets.py b/migrations/versions/002_add_multi_user_and_secrets.py
new file mode 100644
index 0000000..203ee98
--- /dev/null
+++ b/migrations/versions/002_add_multi_user_and_secrets.py
@@ -0,0 +1,52 @@
+"""Add multi-user support and secret management columns.
+
+Revision ID: 002
+Revises: 001
+Create Date: 2026-03-14
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+
+revision: str = "002"
+down_revision: Union[str, None] = "001"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def _column_exists(conn, column_name: str) -> bool:
+    result = conn.execute(
+        sa.text(
+            "SELECT EXISTS(SELECT 1 FROM information_schema.columns "
+            "WHERE table_name = 'memories' AND column_name = :col)"
+        ),
+        {"col": column_name},
+    )
+    return result.scalar()
+
+
+def upgrade() -> None:
+    conn = op.get_bind()
+
+    if not _column_exists(conn, "user_id"):
+        op.add_column("memories", sa.Column("user_id", sa.String(100), nullable=False, server_default="default"))
+
+    if not _column_exists(conn, "is_sensitive"):
+        op.add_column("memories", sa.Column("is_sensitive", sa.Boolean(), server_default="false"))
+
+    if not _column_exists(conn, "vault_path"):
+        op.add_column("memories", sa.Column("vault_path", sa.Text(), nullable=True))
+
+    if not _column_exists(conn, "encrypted_content"):
+        op.add_column("memories", sa.Column("encrypted_content", sa.LargeBinary(), nullable=True))
+
+    op.execute("CREATE INDEX IF NOT EXISTS idx_memories_user ON memories(user_id)")
+
+
+def downgrade() -> None:
+    op.drop_index("idx_memories_user")
+    op.drop_column("memories", "encrypted_content")
+    op.drop_column("memories", "vault_path")
+    op.drop_column("memories", "is_sensitive")
+    op.drop_column("memories", "user_id")
diff --git a/pyproject.toml b/pyproject.toml
index ee579c5..fd2c9a8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,7 +16,7 @@ classifiers = [
 ]
 
 [project.optional-dependencies]
-api = ["fastapi>=0.115", "asyncpg>=0.30", "uvicorn>=0.34", "pydantic>=2.0"]
+api = ["fastapi>=0.115", "asyncpg>=0.30", "uvicorn>=0.34", "pydantic>=2.0", "alembic>=1.14", "sqlalchemy>=2.0"]
 vault = ["hvac>=2.0"]
 dev = ["pytest>=8.0", "pytest-asyncio>=0.24", "ruff>=0.8", "mypy>=1.13", "httpx>=0.28", "cryptography>=43.0"]
 
diff --git a/src/claude_memory/api/database.py b/src/claude_memory/api/database.py
index 5eb871d..063a004 100644
--- a/src/claude_memory/api/database.py
+++ b/src/claude_memory/api/database.py
@@ -1,74 +1,55 @@
+import logging
 import os
 
 import asyncpg
 
+logger = logging.getLogger(__name__)
+
 DATABASE_URL = os.environ.get("DATABASE_URL", "")
 
 pool: asyncpg.Pool | None = None
 
 
+def run_migrations() -> None:
+    """Run Alembic migrations to latest revision."""
+    try:
+        from alembic import command
+        from alembic.config import Config
+
+        alembic_cfg = Config()
+        # Find migrations directory relative to this file or project root
+        migrations_dir = os.environ.get("ALEMBIC_MIGRATIONS_DIR", "")
+        if not migrations_dir:
+            # Check common locations
+            for candidate in [
+                os.path.join(os.path.dirname(__file__), "..", "..", "..", "migrations"),
+                os.path.join(os.getcwd(), "migrations"),
+                "/app/migrations",
+            ]:
+                if os.path.isdir(candidate):
+                    migrations_dir = candidate
+                    break
+
+        if not migrations_dir or not os.path.isdir(migrations_dir):
+            logger.warning("Alembic migrations directory not found, skipping migrations")
+            return
+
+        alembic_cfg.set_main_option("script_location", migrations_dir)
+        alembic_cfg.set_main_option("sqlalchemy.url", DATABASE_URL)
+        command.upgrade(alembic_cfg, "head")
+        logger.info("Database migrations completed successfully")
+    except Exception as e:
+        logger.warning("Failed to run Alembic migrations: %s", e)
+
+
 async def init_pool() -> asyncpg.Pool:
     global pool
+    run_migrations()
     pool = await asyncpg.create_pool(DATABASE_URL, min_size=2, max_size=10)
-    async with pool.acquire() as conn:
-        # Check if table exists
-        exists = await conn.fetchval(
-            "SELECT EXISTS(SELECT 1 FROM information_schema.tables WHERE table_name = 'memories')"
-        )
-        if not exists:
-            await conn.execute("""
-                CREATE TABLE memories (
-                    id SERIAL PRIMARY KEY,
-                    user_id VARCHAR(100) NOT NULL DEFAULT 'default',
-                    content TEXT NOT NULL,
-                    category VARCHAR(50) DEFAULT 'facts',
-                    tags TEXT DEFAULT '',
-                    expanded_keywords TEXT DEFAULT '',
-                    importance REAL DEFAULT 0.5,
-                    is_sensitive BOOLEAN DEFAULT FALSE,
-                    vault_path TEXT DEFAULT NULL,
-                    encrypted_content BYTEA DEFAULT NULL,
-                    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
-                    updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
-                    search_vector tsvector GENERATED ALWAYS AS (
-                        setweight(to_tsvector('english', coalesce(content, '')), 'A') ||
-                        setweight(to_tsvector('english', coalesce(expanded_keywords, '')), 'B') ||
-                        setweight(to_tsvector('english', coalesce(tags, '')), 'C') ||
-                        setweight(to_tsvector('english', coalesce(category, '')), 'D')
-                    ) STORED
-                )
-            """)
-        else:
-            # Migrate existing table: add new columns if missing
-            columns = [row["column_name"] for row in await conn.fetch(
-                "SELECT column_name FROM information_schema.columns WHERE table_name = 'memories'"
-            )]
-            if "user_id" not in columns:
-                await conn.execute(
-                    "ALTER TABLE memories ADD COLUMN user_id VARCHAR(100) NOT NULL DEFAULT 'default'"
-                )
-            if "is_sensitive" not in columns:
-                await conn.execute(
-                    "ALTER TABLE memories ADD COLUMN is_sensitive BOOLEAN DEFAULT FALSE"
-                )
-            if "vault_path" not in columns:
-                await conn.execute(
-                    "ALTER TABLE memories ADD COLUMN vault_path TEXT DEFAULT NULL"
-                )
-            if "encrypted_content" not in columns:
-                await conn.execute(
-                    "ALTER TABLE memories ADD COLUMN encrypted_content BYTEA DEFAULT NULL"
-                )
-        await conn.execute(
-            "CREATE INDEX IF NOT EXISTS idx_memories_search ON memories USING GIN(search_vector)"
-        )
-        await conn.execute(
-            "CREATE INDEX IF NOT EXISTS idx_memories_user ON memories(user_id)"
-        )
     return pool
 
 
-async def close_pool():
+async def close_pool() -> None:
     global pool
     if pool:
         await pool.close()