resilient memory sync: decouple push/pull, startup full resync, auth failure handling
- Decouple push and pull in _sync_once() so pull always runs even if push fails - Add startup full resync to catch drift from other agents and schema changes - Add periodic full resync every ~10 minutes for continuous drift correction - Add auth failure detection (401/403) with graceful SQLite-only degradation - Add /api/auth-check endpoint for lightweight key validation - Add retry cap (5 attempts) on pending ops to prevent infinite queue buildup - Add orphan reconciliation: push local-only records with content dedup - Add memory_count MCP tool for sync diagnostics - Add version-based SQLite schema migration (PRAGMA user_version) - Fix API key in ~/.claude.json to match server - Update README with sync resilience docs, test structure, project layout - Add 30 new tests covering all new behaviors (155 total, all passing)
This commit is contained in:
parent
a18b94d310
commit
e47efee6b6
8 changed files with 948 additions and 134 deletions
|
|
@ -3,8 +3,9 @@
|
|||
import json
|
||||
import os
|
||||
import sys
|
||||
import urllib.error
|
||||
from datetime import datetime, timezone
|
||||
from unittest.mock import patch
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
|
|
@ -154,21 +155,25 @@ class TestPushPendingOps:
|
|||
"""A 404 on delete means already deleted on server — should still clear queue."""
|
||||
engine.enqueue_delete(42)
|
||||
|
||||
import urllib.error
|
||||
with patch.object(engine, "_api_request") as mock_api:
|
||||
mock_api.side_effect = RuntimeError("API error 404: not found")
|
||||
mock_api.side_effect = urllib.error.HTTPError(
|
||||
url="http://fake", code=404, msg="Not Found", hdrs=None, fp=None
|
||||
)
|
||||
engine._push_pending_ops()
|
||||
|
||||
cursor = engine._conn.execute("SELECT COUNT(*) as cnt FROM pending_ops")
|
||||
assert cursor.fetchone()["cnt"] == 0
|
||||
|
||||
def test_push_failure_keeps_queue(self, engine):
|
||||
def test_push_failure_keeps_queue_returns_false(self, engine):
|
||||
"""Push failure should keep the op in queue and return False (not raise)."""
|
||||
engine.enqueue_store(1, "test", "facts", "", "kw", 0.5)
|
||||
|
||||
with patch.object(engine, "_api_request") as mock_api:
|
||||
mock_api.side_effect = RuntimeError("Connection refused")
|
||||
with pytest.raises(RuntimeError):
|
||||
engine._push_pending_ops()
|
||||
result = engine._push_pending_ops()
|
||||
|
||||
assert result is False
|
||||
cursor = engine._conn.execute("SELECT COUNT(*) as cnt FROM pending_ops")
|
||||
assert cursor.fetchone()["cnt"] == 1
|
||||
|
||||
|
|
@ -393,3 +398,361 @@ class TestFullSyncCycle:
|
|||
# Should be gone locally
|
||||
cursor = engine._conn.execute("SELECT * FROM memories WHERE server_id = 500")
|
||||
assert cursor.fetchone() is None
|
||||
|
||||
|
||||
class TestAuthFailureHandling:
|
||||
def test_auth_flag_set_on_401(self, engine):
|
||||
"""401 from _api_request should set _auth_failed flag."""
|
||||
engine.enqueue_store(1, "test", "facts", "", "kw", 0.5)
|
||||
|
||||
with patch.object(engine, "_api_request") as mock_api:
|
||||
mock_api.side_effect = urllib.error.HTTPError(
|
||||
url="http://fake", code=401, msg="Unauthorized", hdrs=None, fp=None
|
||||
)
|
||||
result = engine._push_pending_ops()
|
||||
|
||||
assert result is False
|
||||
assert engine._auth_failed is True
|
||||
|
||||
def test_auth_flag_set_on_403(self, engine):
|
||||
engine.enqueue_store(1, "test", "facts", "", "kw", 0.5)
|
||||
|
||||
with patch.object(engine, "_api_request") as mock_api:
|
||||
mock_api.side_effect = urllib.error.HTTPError(
|
||||
url="http://fake", code=403, msg="Forbidden", hdrs=None, fp=None
|
||||
)
|
||||
result = engine._push_pending_ops()
|
||||
|
||||
assert result is False
|
||||
assert engine._auth_failed is True
|
||||
|
||||
def test_push_aborts_on_auth_failure(self, engine):
|
||||
"""On 401, push should abort immediately — no further ops attempted."""
|
||||
engine.enqueue_store(1, "test1", "facts", "", "kw", 0.5)
|
||||
engine.enqueue_store(2, "test2", "facts", "", "kw", 0.5)
|
||||
|
||||
with patch.object(engine, "_api_request") as mock_api:
|
||||
mock_api.side_effect = urllib.error.HTTPError(
|
||||
url="http://fake", code=401, msg="Unauthorized", hdrs=None, fp=None
|
||||
)
|
||||
engine._push_pending_ops()
|
||||
|
||||
# Both ops should still be in queue (aborted before processing second)
|
||||
cursor = engine._conn.execute("SELECT COUNT(*) as cnt FROM pending_ops")
|
||||
assert cursor.fetchone()["cnt"] == 2
|
||||
|
||||
def test_try_sync_store_queues_when_auth_failed(self, engine):
|
||||
"""When auth is failed, try_sync_store should queue without attempting API call."""
|
||||
engine._auth_failed = True
|
||||
|
||||
result = engine.try_sync_store(1, "test", "facts", "", "kw", 0.5)
|
||||
|
||||
assert result is None
|
||||
cursor = engine._conn.execute("SELECT COUNT(*) as cnt FROM pending_ops")
|
||||
assert cursor.fetchone()["cnt"] == 1
|
||||
|
||||
def test_try_sync_delete_queues_when_auth_failed(self, engine):
|
||||
engine._auth_failed = True
|
||||
|
||||
result = engine.try_sync_delete(42)
|
||||
|
||||
assert result is False
|
||||
cursor = engine._conn.execute("SELECT COUNT(*) as cnt FROM pending_ops")
|
||||
assert cursor.fetchone()["cnt"] == 1
|
||||
|
||||
def test_check_auth_clears_flag_on_success(self, engine):
|
||||
engine._auth_failed = True
|
||||
|
||||
with patch.object(engine, "_api_request") as mock_api:
|
||||
mock_api.return_value = {"status": "ok", "user_id": "test"}
|
||||
result = engine._check_auth()
|
||||
|
||||
assert result is True
|
||||
assert engine._auth_failed is False
|
||||
|
||||
def test_check_auth_stays_failed_on_401(self, engine):
|
||||
engine._auth_failed = True
|
||||
|
||||
with patch.object(engine, "_api_request") as mock_api:
|
||||
mock_api.side_effect = urllib.error.HTTPError(
|
||||
url="http://fake", code=401, msg="Unauthorized", hdrs=None, fp=None
|
||||
)
|
||||
# Also mock urlopen for /health fallback
|
||||
with patch("urllib.request.urlopen") as mock_urlopen:
|
||||
mock_urlopen.return_value.__enter__ = MagicMock()
|
||||
mock_urlopen.return_value.__exit__ = MagicMock(return_value=False)
|
||||
result = engine._check_auth()
|
||||
|
||||
assert result is False
|
||||
assert engine._auth_failed is True
|
||||
|
||||
|
||||
class TestRetryCount:
|
||||
def test_retry_count_incremented_on_failure(self, engine):
|
||||
engine.enqueue_store(1, "test", "facts", "", "kw", 0.5)
|
||||
|
||||
with patch.object(engine, "_api_request") as mock_api:
|
||||
mock_api.side_effect = RuntimeError("Connection refused")
|
||||
engine._push_pending_ops()
|
||||
|
||||
cursor = engine._conn.execute("SELECT retry_count FROM pending_ops WHERE id = 1")
|
||||
assert cursor.fetchone()["retry_count"] == 1
|
||||
|
||||
def test_op_skipped_after_max_retries(self, engine):
|
||||
engine.enqueue_store(1, "test", "facts", "", "kw", 0.5)
|
||||
# Set retry_count to max
|
||||
engine._conn.execute("UPDATE pending_ops SET retry_count = 5 WHERE id = 1")
|
||||
engine._conn.commit()
|
||||
|
||||
with patch.object(engine, "_api_request") as mock_api:
|
||||
result = engine._push_pending_ops()
|
||||
|
||||
# Op should be deleted (skipped), API never called
|
||||
cursor = engine._conn.execute("SELECT COUNT(*) as cnt FROM pending_ops")
|
||||
assert cursor.fetchone()["cnt"] == 0
|
||||
mock_api.assert_not_called()
|
||||
|
||||
def test_retry_count_persists_across_pushes(self, engine):
|
||||
engine.enqueue_store(1, "test", "facts", "", "kw", 0.5)
|
||||
|
||||
with patch.object(engine, "_api_request") as mock_api:
|
||||
mock_api.side_effect = RuntimeError("fail")
|
||||
engine._push_pending_ops()
|
||||
engine._push_pending_ops()
|
||||
engine._push_pending_ops()
|
||||
|
||||
cursor = engine._conn.execute("SELECT retry_count FROM pending_ops WHERE id = 1")
|
||||
assert cursor.fetchone()["retry_count"] == 3
|
||||
|
||||
|
||||
class TestDecoupledPushPull:
|
||||
def test_pull_runs_even_when_push_fails(self, engine):
|
||||
"""Pull should execute even if push fails — they're decoupled."""
|
||||
engine.enqueue_store(1, "test", "facts", "", "kw", 0.5)
|
||||
now = datetime.now(timezone.utc).isoformat()
|
||||
|
||||
call_count = 0
|
||||
|
||||
def mock_api(method, path, body=None):
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
if "POST" == method:
|
||||
raise RuntimeError("Push failed")
|
||||
# GET for pull
|
||||
return {
|
||||
"memories": [{
|
||||
"id": 99, "content": "from server", "category": "facts",
|
||||
"tags": "", "expanded_keywords": "", "importance": 0.5,
|
||||
"is_sensitive": False, "created_at": now, "updated_at": now,
|
||||
"deleted_at": None,
|
||||
}],
|
||||
"server_time": now,
|
||||
}
|
||||
|
||||
with patch.object(engine, "_api_request", side_effect=mock_api):
|
||||
engine._sync_once()
|
||||
|
||||
# Pull should have inserted the server memory
|
||||
cursor = engine._conn.execute("SELECT * FROM memories WHERE server_id = 99")
|
||||
assert cursor.fetchone() is not None
|
||||
|
||||
def test_sync_once_returns_normally_on_partial_failure(self, engine):
|
||||
"""If push fails but pull succeeds, _sync_once should not raise."""
|
||||
engine.enqueue_store(1, "test", "facts", "", "kw", 0.5)
|
||||
|
||||
def mock_api(method, path, body=None):
|
||||
if method == "POST":
|
||||
raise RuntimeError("Push failed")
|
||||
return {"memories": [], "server_time": "2026-03-16T12:00:00+00:00"}
|
||||
|
||||
with patch.object(engine, "_api_request", side_effect=mock_api):
|
||||
# Should not raise
|
||||
engine._sync_once()
|
||||
|
||||
|
||||
class TestFullResync:
|
||||
def test_full_resync_inserts_server_records(self, engine):
|
||||
now = datetime.now(timezone.utc).isoformat()
|
||||
with patch.object(engine, "_api_request") as mock_api:
|
||||
mock_api.return_value = {
|
||||
"memories": [
|
||||
{"id": 1, "content": "server mem 1", "category": "facts",
|
||||
"tags": "", "expanded_keywords": "", "importance": 0.5,
|
||||
"is_sensitive": False, "created_at": now, "updated_at": now},
|
||||
{"id": 2, "content": "server mem 2", "category": "projects",
|
||||
"tags": "", "expanded_keywords": "", "importance": 0.8,
|
||||
"is_sensitive": False, "created_at": now, "updated_at": now},
|
||||
],
|
||||
"server_time": now,
|
||||
}
|
||||
engine._full_resync()
|
||||
|
||||
cursor = engine._conn.execute("SELECT COUNT(*) as cnt FROM memories")
|
||||
assert cursor.fetchone()["cnt"] == 2
|
||||
|
||||
def test_full_resync_removes_stale_local_records(self, engine):
|
||||
"""Local records with server_ids not on server should be deleted."""
|
||||
now = datetime.now(timezone.utc).isoformat()
|
||||
# Insert a local record with server_id=999 (not on server)
|
||||
engine._conn.execute(
|
||||
"INSERT INTO memories (content, category, tags, expanded_keywords, importance, "
|
||||
"is_sensitive, created_at, updated_at, server_id) VALUES (?,?,?,?,?,?,?,?,?)",
|
||||
("stale", "facts", "", "", 0.5, 0, now, now, 999),
|
||||
)
|
||||
engine._conn.commit()
|
||||
|
||||
with patch.object(engine, "_api_request") as mock_api:
|
||||
mock_api.return_value = {
|
||||
"memories": [
|
||||
{"id": 1, "content": "current", "category": "facts",
|
||||
"tags": "", "expanded_keywords": "", "importance": 0.5,
|
||||
"is_sensitive": False, "created_at": now, "updated_at": now},
|
||||
],
|
||||
"server_time": now,
|
||||
}
|
||||
engine._full_resync()
|
||||
|
||||
# Stale record should be gone
|
||||
cursor = engine._conn.execute("SELECT * FROM memories WHERE server_id = 999")
|
||||
assert cursor.fetchone() is None
|
||||
# Current record should exist
|
||||
cursor = engine._conn.execute("SELECT * FROM memories WHERE server_id = 1")
|
||||
assert cursor.fetchone() is not None
|
||||
|
||||
def test_full_resync_deletes_orphans_after_push(self, engine):
|
||||
"""Orphans (server_id IS NULL) should be cleaned up after push attempt."""
|
||||
now = datetime.now(timezone.utc).isoformat()
|
||||
engine._conn.execute(
|
||||
"INSERT INTO memories (content, category, tags, expanded_keywords, importance, "
|
||||
"is_sensitive, created_at, updated_at) VALUES (?,?,?,?,?,?,?,?)",
|
||||
("orphan", "facts", "", "", 0.5, 0, now, now),
|
||||
)
|
||||
engine._conn.commit()
|
||||
|
||||
with patch.object(engine, "_api_request") as mock_api:
|
||||
mock_api.return_value = {
|
||||
"memories": [],
|
||||
"server_time": now,
|
||||
}
|
||||
engine._full_resync()
|
||||
|
||||
cursor = engine._conn.execute("SELECT * FROM memories WHERE server_id IS NULL")
|
||||
assert cursor.fetchone() is None
|
||||
|
||||
def test_full_resync_updates_last_sync_ts(self, engine):
|
||||
server_time = "2026-03-16T15:00:00+00:00"
|
||||
with patch.object(engine, "_api_request") as mock_api:
|
||||
mock_api.return_value = {"memories": [], "server_time": server_time}
|
||||
engine._full_resync()
|
||||
|
||||
assert engine.last_sync_ts == server_time
|
||||
|
||||
def test_full_resync_updates_existing_records(self, engine):
|
||||
now = datetime.now(timezone.utc).isoformat()
|
||||
engine._conn.execute(
|
||||
"INSERT INTO memories (content, category, tags, expanded_keywords, importance, "
|
||||
"is_sensitive, created_at, updated_at, server_id) VALUES (?,?,?,?,?,?,?,?,?)",
|
||||
("old content", "facts", "", "", 0.5, 0, now, now, 10),
|
||||
)
|
||||
engine._conn.commit()
|
||||
|
||||
with patch.object(engine, "_api_request") as mock_api:
|
||||
mock_api.return_value = {
|
||||
"memories": [
|
||||
{"id": 10, "content": "new content", "category": "projects",
|
||||
"tags": "updated", "expanded_keywords": "", "importance": 0.9,
|
||||
"is_sensitive": False, "created_at": now, "updated_at": now},
|
||||
],
|
||||
"server_time": now,
|
||||
}
|
||||
engine._full_resync()
|
||||
|
||||
cursor = engine._conn.execute("SELECT * FROM memories WHERE server_id = 10")
|
||||
row = cursor.fetchone()
|
||||
assert row["content"] == "new content"
|
||||
assert row["category"] == "projects"
|
||||
assert row["importance"] == 0.9
|
||||
|
||||
|
||||
class TestPushOrphans:
|
||||
def test_push_orphans_skips_duplicates(self, engine):
|
||||
now = datetime.now(timezone.utc).isoformat()
|
||||
# Insert orphan with content matching server
|
||||
engine._conn.execute(
|
||||
"INSERT INTO memories (content, category, tags, expanded_keywords, importance, "
|
||||
"is_sensitive, created_at, updated_at) VALUES (?,?,?,?,?,?,?,?)",
|
||||
("duplicate content", "facts", "", "", 0.5, 0, now, now),
|
||||
)
|
||||
engine._conn.commit()
|
||||
|
||||
call_log = []
|
||||
|
||||
def mock_api(method, path, body=None):
|
||||
call_log.append((method, path))
|
||||
return {
|
||||
"memories": [{"id": 1, "content": "duplicate content", "category": "facts",
|
||||
"tags": "", "expanded_keywords": "", "importance": 0.5,
|
||||
"is_sensitive": False, "created_at": now, "updated_at": now}],
|
||||
"server_time": now,
|
||||
}
|
||||
|
||||
with patch.object(engine, "_api_request", side_effect=mock_api):
|
||||
engine._push_orphans()
|
||||
|
||||
# Should have called GET for sync but NOT POST (duplicate skipped)
|
||||
assert all(m != "POST" for m, _ in call_log)
|
||||
|
||||
def test_push_orphans_posts_unique(self, engine):
|
||||
now = datetime.now(timezone.utc).isoformat()
|
||||
engine._conn.execute(
|
||||
"INSERT INTO memories (id, content, category, tags, expanded_keywords, importance, "
|
||||
"is_sensitive, created_at, updated_at) VALUES (?,?,?,?,?,?,?,?,?)",
|
||||
(1, "unique content", "facts", "", "", 0.5, 0, now, now),
|
||||
)
|
||||
engine._conn.commit()
|
||||
|
||||
def mock_api(method, path, body=None):
|
||||
if method == "GET":
|
||||
return {"memories": [], "server_time": now}
|
||||
if method == "POST":
|
||||
return {"id": 100, "category": "facts", "importance": 0.5}
|
||||
return {}
|
||||
|
||||
with patch.object(engine, "_api_request", side_effect=mock_api):
|
||||
engine._push_orphans()
|
||||
|
||||
# Orphan should now have server_id
|
||||
cursor = engine._conn.execute("SELECT server_id FROM memories WHERE id = 1")
|
||||
assert cursor.fetchone()["server_id"] == 100
|
||||
|
||||
|
||||
class TestGetCounts:
|
||||
def test_empty_counts(self, engine):
|
||||
counts = engine.get_counts()
|
||||
assert counts["total"] == 0
|
||||
assert counts["by_category"] == {}
|
||||
assert counts["orphans_no_server_id"] == 0
|
||||
assert counts["pending_ops"] == 0
|
||||
assert counts["auth_failed"] is False
|
||||
|
||||
def test_counts_with_data(self, engine):
|
||||
now = datetime.now(timezone.utc).isoformat()
|
||||
engine._conn.execute(
|
||||
"INSERT INTO memories (content, category, tags, expanded_keywords, importance, "
|
||||
"is_sensitive, created_at, updated_at, server_id) VALUES (?,?,?,?,?,?,?,?,?)",
|
||||
("mem1", "facts", "", "", 0.5, 0, now, now, 1),
|
||||
)
|
||||
engine._conn.execute(
|
||||
"INSERT INTO memories (content, category, tags, expanded_keywords, importance, "
|
||||
"is_sensitive, created_at, updated_at) VALUES (?,?,?,?,?,?,?,?)",
|
||||
("orphan", "projects", "", "", 0.5, 0, now, now),
|
||||
)
|
||||
engine.enqueue_store(99, "queued", "facts", "", "", 0.5)
|
||||
engine._conn.commit()
|
||||
|
||||
counts = engine.get_counts()
|
||||
assert counts["total"] == 2
|
||||
assert counts["by_category"]["facts"] == 1
|
||||
assert counts["by_category"]["projects"] == 1
|
||||
assert counts["orphans_no_server_id"] == 1
|
||||
assert counts["pending_ops"] == 1
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue