From 8a7239fb77afe2a2589307b906d9906ec574433e Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sat, 14 Mar 2026 10:34:45 +0000 Subject: [PATCH] feat: add Alembic for database migrations Replace inline migration logic with proper Alembic migrations: - 001: Initial schema (creates memories table with FTS) - 002: Add multi-user and secrets columns (user_id, is_sensitive, vault_path, encrypted_content) Migrations run automatically on app startup. Existing databases are handled gracefully with IF NOT EXISTS / column existence checks. --- alembic.ini | 36 +++++++ docker/Dockerfile | 3 +- migrations/env.py | 40 ++++++++ migrations/script.py.mako | 26 ++++++ migrations/versions/001_initial_schema.py | 50 ++++++++++ .../002_add_multi_user_and_secrets.py | 52 +++++++++++ pyproject.toml | 2 +- src/claude_memory/api/database.py | 93 ++++++++----------- 8 files changed, 244 insertions(+), 58 deletions(-) create mode 100644 alembic.ini create mode 100644 migrations/env.py create mode 100644 migrations/script.py.mako create mode 100644 migrations/versions/001_initial_schema.py create mode 100644 migrations/versions/002_add_multi_user_and_secrets.py diff --git a/alembic.ini b/alembic.ini new file mode 100644 index 0000000..8ac5640 --- /dev/null +++ b/alembic.ini @@ -0,0 +1,36 @@ +[alembic] +script_location = migrations +sqlalchemy.url = env:DATABASE_URL + +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/docker/Dockerfile b/docker/Dockerfile index 0d8927e..ddff614 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -2,8 +2,9 @@ FROM python:3.12-slim AS base WORKDIR /app -COPY pyproject.toml README.md ./ +COPY pyproject.toml README.md alembic.ini ./ COPY src/ src/ +COPY migrations/ migrations/ RUN pip install --no-cache-dir ".[api]" diff --git a/migrations/env.py b/migrations/env.py new file mode 100644 index 0000000..4f14ca7 --- /dev/null +++ b/migrations/env.py @@ -0,0 +1,40 @@ +"""Alembic environment configuration.""" + +import os +from logging.config import fileConfig + +from alembic import context +from sqlalchemy import create_engine, pool + +config = context.config + +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +# Override sqlalchemy.url from environment variable +db_url = os.environ.get("DATABASE_URL", "") +if db_url: + config.set_main_option("sqlalchemy.url", db_url) + + +def run_migrations_offline() -> None: + """Run migrations in 'offline' mode.""" + url = config.get_main_option("sqlalchemy.url") + context.configure(url=url, target_metadata=None, literal_binds=True, dialect_opts={"paramstyle": "named"}) + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online() -> None: + """Run migrations in 'online' mode.""" + connectable = create_engine(config.get_main_option("sqlalchemy.url"), poolclass=pool.NullPool) + with connectable.connect() as connection: + context.configure(connection=connection, target_metadata=None) + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/migrations/script.py.mako b/migrations/script.py.mako new file mode 100644 index 0000000..fbc4b07 --- /dev/null +++ b/migrations/script.py.mako @@ -0,0 +1,26 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision: str = ${repr(up_revision)} +down_revision: Union[str, None] = ${repr(down_revision)} +branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)} +depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} + + +def upgrade() -> None: + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + ${downgrades if downgrades else "pass"} diff --git a/migrations/versions/001_initial_schema.py b/migrations/versions/001_initial_schema.py new file mode 100644 index 0000000..a632b1c --- /dev/null +++ b/migrations/versions/001_initial_schema.py @@ -0,0 +1,50 @@ +"""Initial schema with memories table. + +Revision ID: 001 +Revises: +Create Date: 2026-03-14 +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + +revision: str = "001" +down_revision: Union[str, None] = None +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + conn = op.get_bind() + # Check if table already exists (handles pre-Alembic installations) + result = conn.execute( + sa.text("SELECT EXISTS(SELECT 1 FROM information_schema.tables WHERE table_name = 'memories')") + ) + if result.scalar(): + return + + op.execute(""" + CREATE TABLE memories ( + id SERIAL PRIMARY KEY, + content TEXT NOT NULL, + category VARCHAR(50) DEFAULT 'facts', + tags TEXT DEFAULT '', + expanded_keywords TEXT DEFAULT '', + importance REAL DEFAULT 0.5, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + search_vector tsvector GENERATED ALWAYS AS ( + setweight(to_tsvector('english', coalesce(content, '')), 'A') || + setweight(to_tsvector('english', coalesce(expanded_keywords, '')), 'B') || + setweight(to_tsvector('english', coalesce(tags, '')), 'C') || + setweight(to_tsvector('english', coalesce(category, '')), 'D') + ) STORED + ) + """) + op.execute("CREATE INDEX idx_memories_search ON memories USING GIN(search_vector)") + + +def downgrade() -> None: + op.drop_index("idx_memories_search") + op.drop_table("memories") diff --git a/migrations/versions/002_add_multi_user_and_secrets.py b/migrations/versions/002_add_multi_user_and_secrets.py new file mode 100644 index 0000000..203ee98 --- /dev/null +++ b/migrations/versions/002_add_multi_user_and_secrets.py @@ -0,0 +1,52 @@ +"""Add multi-user support and secret management columns. + +Revision ID: 002 +Revises: 001 +Create Date: 2026-03-14 +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + +revision: str = "002" +down_revision: Union[str, None] = "001" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def _column_exists(conn, column_name: str) -> bool: + result = conn.execute( + sa.text( + "SELECT EXISTS(SELECT 1 FROM information_schema.columns " + "WHERE table_name = 'memories' AND column_name = :col)" + ), + {"col": column_name}, + ) + return result.scalar() + + +def upgrade() -> None: + conn = op.get_bind() + + if not _column_exists(conn, "user_id"): + op.add_column("memories", sa.Column("user_id", sa.String(100), nullable=False, server_default="default")) + + if not _column_exists(conn, "is_sensitive"): + op.add_column("memories", sa.Column("is_sensitive", sa.Boolean(), server_default="false")) + + if not _column_exists(conn, "vault_path"): + op.add_column("memories", sa.Column("vault_path", sa.Text(), nullable=True)) + + if not _column_exists(conn, "encrypted_content"): + op.add_column("memories", sa.Column("encrypted_content", sa.LargeBinary(), nullable=True)) + + op.execute("CREATE INDEX IF NOT EXISTS idx_memories_user ON memories(user_id)") + + +def downgrade() -> None: + op.drop_index("idx_memories_user") + op.drop_column("memories", "encrypted_content") + op.drop_column("memories", "vault_path") + op.drop_column("memories", "is_sensitive") + op.drop_column("memories", "user_id") diff --git a/pyproject.toml b/pyproject.toml index ee579c5..fd2c9a8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ classifiers = [ ] [project.optional-dependencies] -api = ["fastapi>=0.115", "asyncpg>=0.30", "uvicorn>=0.34", "pydantic>=2.0"] +api = ["fastapi>=0.115", "asyncpg>=0.30", "uvicorn>=0.34", "pydantic>=2.0", "alembic>=1.14", "sqlalchemy>=2.0"] vault = ["hvac>=2.0"] dev = ["pytest>=8.0", "pytest-asyncio>=0.24", "ruff>=0.8", "mypy>=1.13", "httpx>=0.28", "cryptography>=43.0"] diff --git a/src/claude_memory/api/database.py b/src/claude_memory/api/database.py index 5eb871d..063a004 100644 --- a/src/claude_memory/api/database.py +++ b/src/claude_memory/api/database.py @@ -1,74 +1,55 @@ +import logging import os import asyncpg +logger = logging.getLogger(__name__) + DATABASE_URL = os.environ.get("DATABASE_URL", "") pool: asyncpg.Pool | None = None +def run_migrations() -> None: + """Run Alembic migrations to latest revision.""" + try: + from alembic import command + from alembic.config import Config + + alembic_cfg = Config() + # Find migrations directory relative to this file or project root + migrations_dir = os.environ.get("ALEMBIC_MIGRATIONS_DIR", "") + if not migrations_dir: + # Check common locations + for candidate in [ + os.path.join(os.path.dirname(__file__), "..", "..", "..", "migrations"), + os.path.join(os.getcwd(), "migrations"), + "/app/migrations", + ]: + if os.path.isdir(candidate): + migrations_dir = candidate + break + + if not migrations_dir or not os.path.isdir(migrations_dir): + logger.warning("Alembic migrations directory not found, skipping migrations") + return + + alembic_cfg.set_main_option("script_location", migrations_dir) + alembic_cfg.set_main_option("sqlalchemy.url", DATABASE_URL) + command.upgrade(alembic_cfg, "head") + logger.info("Database migrations completed successfully") + except Exception as e: + logger.warning("Failed to run Alembic migrations: %s", e) + + async def init_pool() -> asyncpg.Pool: global pool + run_migrations() pool = await asyncpg.create_pool(DATABASE_URL, min_size=2, max_size=10) - async with pool.acquire() as conn: - # Check if table exists - exists = await conn.fetchval( - "SELECT EXISTS(SELECT 1 FROM information_schema.tables WHERE table_name = 'memories')" - ) - if not exists: - await conn.execute(""" - CREATE TABLE memories ( - id SERIAL PRIMARY KEY, - user_id VARCHAR(100) NOT NULL DEFAULT 'default', - content TEXT NOT NULL, - category VARCHAR(50) DEFAULT 'facts', - tags TEXT DEFAULT '', - expanded_keywords TEXT DEFAULT '', - importance REAL DEFAULT 0.5, - is_sensitive BOOLEAN DEFAULT FALSE, - vault_path TEXT DEFAULT NULL, - encrypted_content BYTEA DEFAULT NULL, - created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), - updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), - search_vector tsvector GENERATED ALWAYS AS ( - setweight(to_tsvector('english', coalesce(content, '')), 'A') || - setweight(to_tsvector('english', coalesce(expanded_keywords, '')), 'B') || - setweight(to_tsvector('english', coalesce(tags, '')), 'C') || - setweight(to_tsvector('english', coalesce(category, '')), 'D') - ) STORED - ) - """) - else: - # Migrate existing table: add new columns if missing - columns = [row["column_name"] for row in await conn.fetch( - "SELECT column_name FROM information_schema.columns WHERE table_name = 'memories'" - )] - if "user_id" not in columns: - await conn.execute( - "ALTER TABLE memories ADD COLUMN user_id VARCHAR(100) NOT NULL DEFAULT 'default'" - ) - if "is_sensitive" not in columns: - await conn.execute( - "ALTER TABLE memories ADD COLUMN is_sensitive BOOLEAN DEFAULT FALSE" - ) - if "vault_path" not in columns: - await conn.execute( - "ALTER TABLE memories ADD COLUMN vault_path TEXT DEFAULT NULL" - ) - if "encrypted_content" not in columns: - await conn.execute( - "ALTER TABLE memories ADD COLUMN encrypted_content BYTEA DEFAULT NULL" - ) - await conn.execute( - "CREATE INDEX IF NOT EXISTS idx_memories_search ON memories USING GIN(search_vector)" - ) - await conn.execute( - "CREATE INDEX IF NOT EXISTS idx_memories_user ON memories(user_id)" - ) return pool -async def close_pool(): +async def close_pool() -> None: global pool if pool: await pool.close()