feat: standalone claude-memory-mcp with multi-user support and Vault integration
Extracted from private infra repo into standalone open-source project. Three operating modes: - Local: SQLite + FTS5 (zero dependencies) - Server: PostgreSQL via HTTP API with multi-user auth - Full: PostgreSQL + HashiCorp Vault for secret management Features: - MCP stdio server with 5 tools (store/recall/list/delete/secret_get) - FastAPI HTTP API with multi-user Bearer token auth (API_KEYS JSON map) - Regex-based credential detection with auto-redaction - AES-256-GCM encryption fallback for non-Vault deployments - Vault KV v2 client (stdlib urllib, K8s SA auto-auth) - Per-user data isolation (all queries scoped by user_id) - Secret migration endpoint for existing plain-text credentials - Backward-compatible env var aliases (CLAUDE_MEMORY_API_URL) Infrastructure: - Docker + docker-compose (API + PostgreSQL + optional Vault) - Woodpecker CI (test → build → push → kubectl deploy) - GitHub Actions CI (Python 3.11/3.12/3.13) + Release (GHCR + PyPI) - Helm chart + raw Kubernetes manifests 96 tests passing across 6 test files.
This commit is contained in:
commit
0ed5e1e016
40 changed files with 3381 additions and 0 deletions
76
src/claude_memory/credential_detector.py
Normal file
76
src/claude_memory/credential_detector.py
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
"""Detect credentials and secrets in text content."""
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class DetectedCredential:
|
||||
type: str # e.g. "password", "api_key", "private_key", "connection_string", "token"
|
||||
confidence: float # 0.0 to 1.0
|
||||
start: int # position in text
|
||||
end: int # position in text
|
||||
matched_text: str # the actual matched text (for redaction)
|
||||
|
||||
|
||||
# Patterns ordered by confidence
|
||||
_PATTERNS: list[tuple[str, str, float]] = [
|
||||
# High confidence (0.9+)
|
||||
("private_key", r"-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----[\s\S]*?-----END (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----", 0.95),
|
||||
("connection_string", r"(?:postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis|amqp)://[^\s'\"]+", 0.9),
|
||||
("aws_key", r"(?:AKIA|ASIA)[A-Z0-9]{16}", 0.95),
|
||||
("github_token", r"gh[pousr]_[A-Za-z0-9_]{36,}", 0.95),
|
||||
|
||||
# Medium confidence (0.7-0.89)
|
||||
("api_key", r"(?:api[_-]?key|apikey)\s*[:=]\s*['\"]?([A-Za-z0-9_\-]{20,})['\"]?", 0.8),
|
||||
("password", r"(?:password|passwd|pwd)\s*[:=]\s*['\"]?([^\s'\"]{8,})['\"]?", 0.8),
|
||||
("token", r"(?:token|secret|bearer)\s*[:=]\s*['\"]?([A-Za-z0-9_\-\.]{20,})['\"]?", 0.75),
|
||||
("basic_auth", r"(?:Basic\s+)[A-Za-z0-9+/=]{20,}", 0.85),
|
||||
("bearer_token", r"Bearer\s+[A-Za-z0-9_\-\.]{20,}", 0.85),
|
||||
|
||||
# Lower confidence (0.5-0.69)
|
||||
("generic_secret", r"(?:secret|credential|auth)\s*[:=]\s*['\"]?([^\s'\"]{12,})['\"]?", 0.6),
|
||||
("hex_key", r"(?:key|secret)\s*[:=]\s*['\"]?([0-9a-fA-F]{32,})['\"]?", 0.65),
|
||||
]
|
||||
|
||||
|
||||
def detect_credentials(text: str, min_confidence: float = 0.5) -> list[DetectedCredential]:
|
||||
"""Scan text for potential credentials and secrets."""
|
||||
results: list[DetectedCredential] = []
|
||||
for cred_type, pattern, confidence in _PATTERNS:
|
||||
if confidence < min_confidence:
|
||||
continue
|
||||
for match in re.finditer(pattern, text, re.IGNORECASE):
|
||||
results.append(DetectedCredential(
|
||||
type=cred_type,
|
||||
confidence=confidence,
|
||||
start=match.start(),
|
||||
end=match.end(),
|
||||
matched_text=match.group(0),
|
||||
))
|
||||
# Deduplicate overlapping matches, keeping highest confidence
|
||||
results.sort(key=lambda c: (-c.confidence, c.start))
|
||||
filtered: list[DetectedCredential] = []
|
||||
for cred in results:
|
||||
if not any(c.start <= cred.start and c.end >= cred.end for c in filtered):
|
||||
filtered.append(cred)
|
||||
return sorted(filtered, key=lambda c: c.start)
|
||||
|
||||
|
||||
def redact_credentials(text: str, credentials: list[DetectedCredential]) -> str:
|
||||
"""Replace detected credentials with [REDACTED] markers."""
|
||||
if not credentials:
|
||||
return text
|
||||
parts: list[str] = []
|
||||
last_end = 0
|
||||
for cred in sorted(credentials, key=lambda c: c.start):
|
||||
parts.append(text[last_end:cred.start])
|
||||
parts.append(f"[REDACTED:{cred.type}]")
|
||||
last_end = cred.end
|
||||
parts.append(text[last_end:])
|
||||
return "".join(parts)
|
||||
|
||||
|
||||
def is_sensitive(text: str, min_confidence: float = 0.7) -> bool:
|
||||
"""Quick check if text likely contains credentials."""
|
||||
return len(detect_credentials(text, min_confidence)) > 0
|
||||
Loading…
Add table
Add a link
Reference in a new issue