feat: add Alembic for database migrations

Replace inline migration logic with proper Alembic migrations:
- 001: Initial schema (creates memories table with FTS)
- 002: Add multi-user and secrets columns (user_id, is_sensitive,
  vault_path, encrypted_content)

Migrations run automatically on app startup. Existing databases
are handled gracefully with IF NOT EXISTS / column existence checks.
This commit is contained in:
Viktor Barzin 2026-03-14 10:34:45 +00:00
parent 63205dbd0c
commit 8a7239fb77
No known key found for this signature in database
GPG key ID: 0EB088298288D958
8 changed files with 244 additions and 58 deletions

36
alembic.ini Normal file
View file

@ -0,0 +1,36 @@
[alembic]
script_location = migrations
sqlalchemy.url = env:DATABASE_URL
[loggers]
keys = root,sqlalchemy,alembic
[handlers]
keys = console
[formatters]
keys = generic
[logger_root]
level = WARN
handlers = console
[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine
[logger_alembic]
level = INFO
handlers =
qualname = alembic
[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic
[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S

View file

@ -2,8 +2,9 @@ FROM python:3.12-slim AS base
WORKDIR /app
COPY pyproject.toml README.md ./
COPY pyproject.toml README.md alembic.ini ./
COPY src/ src/
COPY migrations/ migrations/
RUN pip install --no-cache-dir ".[api]"

40
migrations/env.py Normal file
View file

@ -0,0 +1,40 @@
"""Alembic environment configuration."""
import os
from logging.config import fileConfig
from alembic import context
from sqlalchemy import create_engine, pool
config = context.config
if config.config_file_name is not None:
fileConfig(config.config_file_name)
# Override sqlalchemy.url from environment variable
db_url = os.environ.get("DATABASE_URL", "")
if db_url:
config.set_main_option("sqlalchemy.url", db_url)
def run_migrations_offline() -> None:
"""Run migrations in 'offline' mode."""
url = config.get_main_option("sqlalchemy.url")
context.configure(url=url, target_metadata=None, literal_binds=True, dialect_opts={"paramstyle": "named"})
with context.begin_transaction():
context.run_migrations()
def run_migrations_online() -> None:
"""Run migrations in 'online' mode."""
connectable = create_engine(config.get_main_option("sqlalchemy.url"), poolclass=pool.NullPool)
with connectable.connect() as connection:
context.configure(connection=connection, target_metadata=None)
with context.begin_transaction():
context.run_migrations()
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()

26
migrations/script.py.mako Normal file
View file

@ -0,0 +1,26 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}
# revision identifiers, used by Alembic.
revision: str = ${repr(up_revision)}
down_revision: Union[str, None] = ${repr(down_revision)}
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
def upgrade() -> None:
${upgrades if upgrades else "pass"}
def downgrade() -> None:
${downgrades if downgrades else "pass"}

View file

@ -0,0 +1,50 @@
"""Initial schema with memories table.
Revision ID: 001
Revises:
Create Date: 2026-03-14
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
revision: str = "001"
down_revision: Union[str, None] = None
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
conn = op.get_bind()
# Check if table already exists (handles pre-Alembic installations)
result = conn.execute(
sa.text("SELECT EXISTS(SELECT 1 FROM information_schema.tables WHERE table_name = 'memories')")
)
if result.scalar():
return
op.execute("""
CREATE TABLE memories (
id SERIAL PRIMARY KEY,
content TEXT NOT NULL,
category VARCHAR(50) DEFAULT 'facts',
tags TEXT DEFAULT '',
expanded_keywords TEXT DEFAULT '',
importance REAL DEFAULT 0.5,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
search_vector tsvector GENERATED ALWAYS AS (
setweight(to_tsvector('english', coalesce(content, '')), 'A') ||
setweight(to_tsvector('english', coalesce(expanded_keywords, '')), 'B') ||
setweight(to_tsvector('english', coalesce(tags, '')), 'C') ||
setweight(to_tsvector('english', coalesce(category, '')), 'D')
) STORED
)
""")
op.execute("CREATE INDEX idx_memories_search ON memories USING GIN(search_vector)")
def downgrade() -> None:
op.drop_index("idx_memories_search")
op.drop_table("memories")

View file

@ -0,0 +1,52 @@
"""Add multi-user support and secret management columns.
Revision ID: 002
Revises: 001
Create Date: 2026-03-14
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
revision: str = "002"
down_revision: Union[str, None] = "001"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def _column_exists(conn, column_name: str) -> bool:
result = conn.execute(
sa.text(
"SELECT EXISTS(SELECT 1 FROM information_schema.columns "
"WHERE table_name = 'memories' AND column_name = :col)"
),
{"col": column_name},
)
return result.scalar()
def upgrade() -> None:
conn = op.get_bind()
if not _column_exists(conn, "user_id"):
op.add_column("memories", sa.Column("user_id", sa.String(100), nullable=False, server_default="default"))
if not _column_exists(conn, "is_sensitive"):
op.add_column("memories", sa.Column("is_sensitive", sa.Boolean(), server_default="false"))
if not _column_exists(conn, "vault_path"):
op.add_column("memories", sa.Column("vault_path", sa.Text(), nullable=True))
if not _column_exists(conn, "encrypted_content"):
op.add_column("memories", sa.Column("encrypted_content", sa.LargeBinary(), nullable=True))
op.execute("CREATE INDEX IF NOT EXISTS idx_memories_user ON memories(user_id)")
def downgrade() -> None:
op.drop_index("idx_memories_user")
op.drop_column("memories", "encrypted_content")
op.drop_column("memories", "vault_path")
op.drop_column("memories", "is_sensitive")
op.drop_column("memories", "user_id")

View file

@ -16,7 +16,7 @@ classifiers = [
]
[project.optional-dependencies]
api = ["fastapi>=0.115", "asyncpg>=0.30", "uvicorn>=0.34", "pydantic>=2.0"]
api = ["fastapi>=0.115", "asyncpg>=0.30", "uvicorn>=0.34", "pydantic>=2.0", "alembic>=1.14", "sqlalchemy>=2.0"]
vault = ["hvac>=2.0"]
dev = ["pytest>=8.0", "pytest-asyncio>=0.24", "ruff>=0.8", "mypy>=1.13", "httpx>=0.28", "cryptography>=43.0"]

View file

@ -1,74 +1,55 @@
import logging
import os
import asyncpg
logger = logging.getLogger(__name__)
DATABASE_URL = os.environ.get("DATABASE_URL", "")
pool: asyncpg.Pool | None = None
def run_migrations() -> None:
"""Run Alembic migrations to latest revision."""
try:
from alembic import command
from alembic.config import Config
alembic_cfg = Config()
# Find migrations directory relative to this file or project root
migrations_dir = os.environ.get("ALEMBIC_MIGRATIONS_DIR", "")
if not migrations_dir:
# Check common locations
for candidate in [
os.path.join(os.path.dirname(__file__), "..", "..", "..", "migrations"),
os.path.join(os.getcwd(), "migrations"),
"/app/migrations",
]:
if os.path.isdir(candidate):
migrations_dir = candidate
break
if not migrations_dir or not os.path.isdir(migrations_dir):
logger.warning("Alembic migrations directory not found, skipping migrations")
return
alembic_cfg.set_main_option("script_location", migrations_dir)
alembic_cfg.set_main_option("sqlalchemy.url", DATABASE_URL)
command.upgrade(alembic_cfg, "head")
logger.info("Database migrations completed successfully")
except Exception as e:
logger.warning("Failed to run Alembic migrations: %s", e)
async def init_pool() -> asyncpg.Pool:
global pool
run_migrations()
pool = await asyncpg.create_pool(DATABASE_URL, min_size=2, max_size=10)
async with pool.acquire() as conn:
# Check if table exists
exists = await conn.fetchval(
"SELECT EXISTS(SELECT 1 FROM information_schema.tables WHERE table_name = 'memories')"
)
if not exists:
await conn.execute("""
CREATE TABLE memories (
id SERIAL PRIMARY KEY,
user_id VARCHAR(100) NOT NULL DEFAULT 'default',
content TEXT NOT NULL,
category VARCHAR(50) DEFAULT 'facts',
tags TEXT DEFAULT '',
expanded_keywords TEXT DEFAULT '',
importance REAL DEFAULT 0.5,
is_sensitive BOOLEAN DEFAULT FALSE,
vault_path TEXT DEFAULT NULL,
encrypted_content BYTEA DEFAULT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
search_vector tsvector GENERATED ALWAYS AS (
setweight(to_tsvector('english', coalesce(content, '')), 'A') ||
setweight(to_tsvector('english', coalesce(expanded_keywords, '')), 'B') ||
setweight(to_tsvector('english', coalesce(tags, '')), 'C') ||
setweight(to_tsvector('english', coalesce(category, '')), 'D')
) STORED
)
""")
else:
# Migrate existing table: add new columns if missing
columns = [row["column_name"] for row in await conn.fetch(
"SELECT column_name FROM information_schema.columns WHERE table_name = 'memories'"
)]
if "user_id" not in columns:
await conn.execute(
"ALTER TABLE memories ADD COLUMN user_id VARCHAR(100) NOT NULL DEFAULT 'default'"
)
if "is_sensitive" not in columns:
await conn.execute(
"ALTER TABLE memories ADD COLUMN is_sensitive BOOLEAN DEFAULT FALSE"
)
if "vault_path" not in columns:
await conn.execute(
"ALTER TABLE memories ADD COLUMN vault_path TEXT DEFAULT NULL"
)
if "encrypted_content" not in columns:
await conn.execute(
"ALTER TABLE memories ADD COLUMN encrypted_content BYTEA DEFAULT NULL"
)
await conn.execute(
"CREATE INDEX IF NOT EXISTS idx_memories_search ON memories USING GIN(search_vector)"
)
await conn.execute(
"CREATE INDEX IF NOT EXISTS idx_memories_user ON memories(user_id)"
)
return pool
async def close_pool():
async def close_pool() -> None:
global pool
if pool:
await pool.close()