537 lines
18 KiB
Python
537 lines
18 KiB
Python
"""
|
|
Tests for D1.1: JSON Storage Infrastructure
|
|
|
|
Run: python brain/scripts/test_storage.py
|
|
"""
|
|
|
|
import unittest
|
|
import json
|
|
import tempfile
|
|
import shutil
|
|
from pathlib import Path
|
|
from datetime import datetime, timedelta
|
|
|
|
from memory_store import (
|
|
ChunkStore, ChunkIndex, Chunk, ChunkMetadata,
|
|
ChunkLinks, ChunkType, init_storage
|
|
)
|
|
|
|
|
|
class TestChunkStoreInitialization(unittest.TestCase):
|
|
"""Test ChunkStore setup and directory creation."""
|
|
|
|
def setUp(self):
|
|
self.temp_dir = tempfile.mkdtemp()
|
|
self.base_path = Path(self.temp_dir) / "brain" / "memory"
|
|
|
|
def tearDown(self):
|
|
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
|
|
|
def test_creates_directories(self):
|
|
"""Should create chunks, index, and archive directories."""
|
|
store = ChunkStore(str(self.base_path))
|
|
|
|
self.assertTrue((self.base_path / "chunks").exists())
|
|
self.assertTrue((self.base_path / "index").exists())
|
|
self.assertTrue((self.base_path / "archive").exists())
|
|
|
|
def test_init_storage_convenience(self):
|
|
"""init_storage() should return configured ChunkStore."""
|
|
store = init_storage(str(self.base_path))
|
|
self.assertIsInstance(store, ChunkStore)
|
|
self.assertEqual(store.base_path, self.base_path)
|
|
|
|
|
|
class TestChunkCreation(unittest.TestCase):
|
|
"""Test creating chunks."""
|
|
|
|
def setUp(self):
|
|
self.temp_dir = tempfile.mkdtemp()
|
|
self.store = ChunkStore(Path(self.temp_dir) / "brain" / "memory")
|
|
|
|
def tearDown(self):
|
|
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
|
|
|
def test_create_basic_chunk(self):
|
|
"""Should create chunk with required fields."""
|
|
chunk = self.store.create_chunk(
|
|
content="Test content",
|
|
chunk_type="note",
|
|
conversation_id="conv-123",
|
|
tokens=10
|
|
)
|
|
|
|
self.assertIsNotNone(chunk.id)
|
|
self.assertTrue(chunk.id.startswith("chunk-"))
|
|
self.assertEqual(chunk.content, "Test content")
|
|
self.assertEqual(chunk.tokens, 10)
|
|
self.assertEqual(chunk.type, "note")
|
|
|
|
def test_create_with_tags(self):
|
|
"""Should create chunk with tags."""
|
|
chunk = self.store.create_chunk(
|
|
content="Test",
|
|
chunk_type="fact",
|
|
conversation_id="conv-123",
|
|
tokens=5,
|
|
tags=["test", "important"]
|
|
)
|
|
|
|
self.assertEqual(chunk.tags, ["test", "important"])
|
|
|
|
def test_create_with_confidence(self):
|
|
"""Should create chunk with confidence score."""
|
|
chunk = self.store.create_chunk(
|
|
content="Test",
|
|
chunk_type="fact",
|
|
conversation_id="conv-123",
|
|
tokens=5,
|
|
confidence=0.95
|
|
)
|
|
|
|
self.assertEqual(chunk.metadata.confidence, 0.95)
|
|
|
|
def test_chunk_id_format(self):
|
|
"""Chunk ID should contain date."""
|
|
chunk = self.store.create_chunk(
|
|
content="Test",
|
|
chunk_type="note",
|
|
conversation_id="conv-123",
|
|
tokens=5
|
|
)
|
|
|
|
today = datetime.utcnow().strftime("%Y-%m-%d")
|
|
self.assertIn(today, chunk.id)
|
|
|
|
def test_file_created(self):
|
|
"""Chunk file should be created on disk."""
|
|
chunk = self.store.create_chunk(
|
|
content="Test content",
|
|
chunk_type="note",
|
|
conversation_id="conv-123",
|
|
tokens=10
|
|
)
|
|
|
|
chunk_path = self.store._get_chunk_path(chunk.id)
|
|
self.assertTrue(chunk_path.exists())
|
|
|
|
# Verify it's valid JSON
|
|
data = json.loads(chunk_path.read_text())
|
|
self.assertEqual(data["content"], "Test content")
|
|
|
|
|
|
class TestChunkRetrieval(unittest.TestCase):
|
|
"""Test retrieving chunks."""
|
|
|
|
def setUp(self):
|
|
self.temp_dir = tempfile.mkdtemp()
|
|
self.store = ChunkStore(Path(self.temp_dir) / "brain" / "memory")
|
|
self.chunk = self.store.create_chunk(
|
|
content="Test content",
|
|
chunk_type="note",
|
|
conversation_id="conv-123",
|
|
tokens=10
|
|
)
|
|
|
|
def tearDown(self):
|
|
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
|
|
|
def test_get_existing_chunk(self):
|
|
"""Should retrieve existing chunk."""
|
|
retrieved = self.store.get_chunk(self.chunk.id)
|
|
|
|
self.assertIsNotNone(retrieved)
|
|
self.assertEqual(retrieved.id, self.chunk.id)
|
|
self.assertEqual(retrieved.content, "Test content")
|
|
|
|
def test_get_nonexistent_chunk(self):
|
|
"""Should return None for non-existent chunk."""
|
|
result = self.store.get_chunk("chunk-nonexistent-12345678")
|
|
self.assertIsNone(result)
|
|
|
|
def test_get_invalid_id_format(self):
|
|
"""Should return None for invalid chunk ID."""
|
|
result = self.store.get_chunk("../../../etc/passwd")
|
|
self.assertIsNone(result)
|
|
|
|
def test_access_count_increments(self):
|
|
"""Access count should increment on retrieval."""
|
|
initial_count = self.chunk.metadata.access_count
|
|
|
|
retrieved = self.store.get_chunk(self.chunk.id)
|
|
self.assertEqual(retrieved.metadata.access_count, initial_count + 1)
|
|
|
|
# Retrieve again
|
|
retrieved2 = self.store.get_chunk(self.chunk.id)
|
|
self.assertEqual(retrieved2.metadata.access_count, initial_count + 2)
|
|
|
|
def test_last_accessed_updates(self):
|
|
"""Last accessed timestamp should update on retrieval."""
|
|
before = datetime.utcnow()
|
|
retrieved = self.store.get_chunk(self.chunk.id)
|
|
after = datetime.utcnow()
|
|
|
|
accessed = datetime.fromisoformat(
|
|
retrieved.metadata.last_accessed.replace("Z", "+00:00")
|
|
)
|
|
self.assertTrue(before <= accessed.replace(tzinfo=None) <= after)
|
|
|
|
|
|
class TestChunkUpdate(unittest.TestCase):
|
|
"""Test updating chunks."""
|
|
|
|
def setUp(self):
|
|
self.temp_dir = tempfile.mkdtemp()
|
|
self.store = ChunkStore(Path(self.temp_dir) / "brain" / "memory")
|
|
self.chunk = self.store.create_chunk(
|
|
content="Original content",
|
|
chunk_type="note",
|
|
conversation_id="conv-123",
|
|
tokens=10,
|
|
tags=["original"]
|
|
)
|
|
|
|
def tearDown(self):
|
|
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
|
|
|
def test_update_content(self):
|
|
"""Should update chunk content."""
|
|
updated = self.store.update_chunk(
|
|
self.chunk.id,
|
|
content="Updated content"
|
|
)
|
|
|
|
self.assertEqual(updated.content, "Updated content")
|
|
|
|
# Verify persisted
|
|
retrieved = self.store.get_chunk(self.chunk.id)
|
|
self.assertEqual(retrieved.content, "Updated content")
|
|
|
|
def test_update_confidence(self):
|
|
"""Should update confidence score."""
|
|
updated = self.store.update_chunk(
|
|
self.chunk.id,
|
|
confidence=0.99
|
|
)
|
|
|
|
self.assertEqual(updated.metadata.confidence, 0.99)
|
|
|
|
def test_update_tags(self):
|
|
"""Should update tags."""
|
|
updated = self.store.update_chunk(
|
|
self.chunk.id,
|
|
tags=["new", "tags"]
|
|
)
|
|
|
|
self.assertEqual(updated.tags, ["new", "tags"])
|
|
|
|
def test_update_nonexistent_chunk(self):
|
|
"""Should return None for non-existent chunk."""
|
|
result = self.store.update_chunk("chunk-nonexistent", content="Test")
|
|
self.assertIsNone(result)
|
|
|
|
|
|
class TestChunkDeletion(unittest.TestCase):
|
|
"""Test deleting chunks."""
|
|
|
|
def setUp(self):
|
|
self.temp_dir = tempfile.mkdtemp()
|
|
self.store = ChunkStore(Path(self.temp_dir) / "brain" / "memory")
|
|
self.chunk = self.store.create_chunk(
|
|
content="To be deleted",
|
|
chunk_type="note",
|
|
conversation_id="conv-123",
|
|
tokens=10
|
|
)
|
|
self.chunk_id = self.chunk.id
|
|
|
|
def tearDown(self):
|
|
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
|
|
|
def test_soft_delete_moves_to_archive(self):
|
|
"""Soft delete should move chunk to archive."""
|
|
result = self.store.delete_chunk(self.chunk_id)
|
|
self.assertTrue(result)
|
|
|
|
# Original should be gone
|
|
self.assertIsNone(self.store.get_chunk(self.chunk_id))
|
|
|
|
# Archive should exist
|
|
archive_path = self.store.archive_path / f"{self.chunk_id}.json"
|
|
self.assertTrue(archive_path.exists())
|
|
|
|
def test_permanent_delete_removes_file(self):
|
|
"""Permanent delete should remove file completely."""
|
|
result = self.store.delete_chunk(self.chunk_id, permanent=True)
|
|
self.assertTrue(result)
|
|
|
|
# Should not exist anywhere
|
|
self.assertIsNone(self.store.get_chunk(self.chunk_id))
|
|
archive_path = self.store.archive_path / f"{self.chunk_id}.json"
|
|
self.assertFalse(archive_path.exists())
|
|
|
|
def test_delete_nonexistent_chunk(self):
|
|
"""Should return False for non-existent chunk."""
|
|
result = self.store.delete_chunk("chunk-nonexistent")
|
|
self.assertFalse(result)
|
|
|
|
|
|
class TestChunkListing(unittest.TestCase):
|
|
"""Test listing chunks with filters."""
|
|
|
|
def setUp(self):
|
|
self.temp_dir = tempfile.mkdtemp()
|
|
self.store = ChunkStore(Path(self.temp_dir) / "brain" / "memory")
|
|
|
|
# Create test chunks
|
|
self.store.create_chunk(
|
|
content="Chunk 1",
|
|
chunk_type="note",
|
|
conversation_id="conv-a",
|
|
tokens=5,
|
|
tags=["tag1"]
|
|
)
|
|
self.store.create_chunk(
|
|
content="Chunk 2",
|
|
chunk_type="fact",
|
|
conversation_id="conv-a",
|
|
tokens=5,
|
|
tags=["tag2"]
|
|
)
|
|
self.store.create_chunk(
|
|
content="Chunk 3",
|
|
chunk_type="note",
|
|
conversation_id="conv-b",
|
|
tokens=5,
|
|
tags=["tag1", "tag2"]
|
|
)
|
|
|
|
def tearDown(self):
|
|
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
|
|
|
def test_list_all_chunks(self):
|
|
"""Should list all chunk IDs."""
|
|
chunks = self.store.list_chunks()
|
|
self.assertEqual(len(chunks), 3)
|
|
|
|
def test_list_by_conversation(self):
|
|
"""Should filter by conversation_id."""
|
|
chunks = self.store.list_chunks(conversation_id="conv-a")
|
|
self.assertEqual(len(chunks), 2)
|
|
|
|
def test_list_by_tags(self):
|
|
"""Should filter by tags (intersection)."""
|
|
chunks = self.store.list_chunks(tags=["tag1"])
|
|
self.assertEqual(len(chunks), 2) # chunk 1 and 3
|
|
|
|
def test_list_by_multiple_tags(self):
|
|
"""Should require all tags."""
|
|
chunks = self.store.list_chunks(tags=["tag1", "tag2"])
|
|
self.assertEqual(len(chunks), 1) # only chunk 3
|
|
|
|
|
|
class TestChunkIndex(unittest.TestCase):
|
|
"""Test ChunkIndex functionality."""
|
|
|
|
def setUp(self):
|
|
self.temp_dir = tempfile.mkdtemp()
|
|
self.index_path = Path(self.temp_dir) / "test_index.json"
|
|
self.index = ChunkIndex(self.index_path)
|
|
|
|
def tearDown(self):
|
|
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
|
|
|
def test_add_and_get(self):
|
|
"""Should add and retrieve entries."""
|
|
self.index.add("key1", {"value": 123})
|
|
|
|
result = self.index.get("key1")
|
|
self.assertEqual(result, {"value": 123})
|
|
|
|
def test_persistence(self):
|
|
"""Index should persist to disk."""
|
|
self.index.add("key1", "value1")
|
|
|
|
# Create new index instance (simulates reload)
|
|
new_index = ChunkIndex(self.index_path)
|
|
self.assertEqual(new_index.get("key1"), "value1")
|
|
|
|
def test_list_operations(self):
|
|
"""Should support list-based indexes."""
|
|
self.index.add_to_list("tag1", "chunk-a")
|
|
self.index.add_to_list("tag1", "chunk-b")
|
|
|
|
result = self.index.get_list("tag1")
|
|
self.assertIn("chunk-a", result)
|
|
self.assertIn("chunk-b", result)
|
|
|
|
|
|
class TestChunkSerialization(unittest.TestCase):
|
|
"""Test JSON serialization."""
|
|
|
|
def test_chunk_to_dict(self):
|
|
"""Chunk should serialize to dict."""
|
|
chunk = Chunk(
|
|
id="chunk-test",
|
|
content="Test",
|
|
tokens=5,
|
|
type="note",
|
|
metadata=ChunkMetadata(
|
|
created="2026-02-10T12:00:00Z",
|
|
conversation_id="conv-123"
|
|
),
|
|
links=ChunkLinks(),
|
|
tags=["test"]
|
|
)
|
|
|
|
data = chunk.to_dict()
|
|
self.assertEqual(data["id"], "chunk-test")
|
|
self.assertEqual(data["content"], "Test")
|
|
self.assertEqual(data["tags"], ["test"])
|
|
|
|
def test_chunk_from_dict(self):
|
|
"""Chunk should deserialize from dict."""
|
|
data = {
|
|
"id": "chunk-test",
|
|
"content": "Test content",
|
|
"tokens": 10,
|
|
"type": "note",
|
|
"metadata": {
|
|
"created": "2026-02-10T12:00:00Z",
|
|
"conversation_id": "conv-123",
|
|
"source": "interaction",
|
|
"confidence": 0.8,
|
|
"access_count": 0,
|
|
"last_accessed": None
|
|
},
|
|
"links": {
|
|
"context_of": [],
|
|
"follows": [],
|
|
"related_to": [],
|
|
"supports": [],
|
|
"contradicts": []
|
|
},
|
|
"tags": ["test"]
|
|
}
|
|
|
|
chunk = Chunk.from_dict(data)
|
|
self.assertEqual(chunk.id, "chunk-test")
|
|
self.assertEqual(chunk.content, "Test content")
|
|
self.assertEqual(chunk.metadata.confidence, 0.8)
|
|
|
|
def test_chunk_json_roundtrip(self):
|
|
"""Chunk should survive JSON roundtrip."""
|
|
original = Chunk(
|
|
id="chunk-test",
|
|
content="Test content",
|
|
tokens=10,
|
|
type="note",
|
|
metadata=ChunkMetadata(
|
|
created="2026-02-10T12:00:00Z",
|
|
conversation_id="conv-123",
|
|
confidence=0.9
|
|
),
|
|
links=ChunkLinks(),
|
|
tags=["test"]
|
|
)
|
|
|
|
json_str = original.to_json()
|
|
restored = Chunk.from_json(json_str)
|
|
|
|
self.assertEqual(restored.id, original.id)
|
|
self.assertEqual(restored.content, original.content)
|
|
self.assertEqual(restored.metadata.confidence, original.metadata.confidence)
|
|
|
|
def test_invalid_json_handling(self):
|
|
"""Should raise on invalid JSON."""
|
|
with self.assertRaises(json.JSONDecodeError):
|
|
Chunk.from_json("not valid json")
|
|
|
|
def test_missing_required_field(self):
|
|
"""Should raise on missing required field."""
|
|
data = {
|
|
"id": "chunk-test",
|
|
# missing "content"
|
|
"tokens": 10,
|
|
"type": "note",
|
|
"metadata": {}
|
|
}
|
|
|
|
with self.assertRaises((KeyError, ValueError)):
|
|
Chunk.from_dict(data)
|
|
|
|
|
|
class TestStats(unittest.TestCase):
|
|
"""Test statistics gathering."""
|
|
|
|
def setUp(self):
|
|
self.temp_dir = tempfile.mkdtemp()
|
|
self.store = ChunkStore(Path(self.temp_dir) / "brain" / "memory")
|
|
|
|
def tearDown(self):
|
|
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
|
|
|
def test_empty_stats(self):
|
|
"""Stats for empty store."""
|
|
stats = self.store.get_stats()
|
|
|
|
self.assertEqual(stats["total_chunks"], 0)
|
|
self.assertEqual(stats["archived_chunks"], 0)
|
|
self.assertEqual(stats["by_type"], {})
|
|
|
|
def test_stats_with_chunks(self):
|
|
"""Stats should count by type."""
|
|
self.store.create_chunk("Note 1", "note", "conv-1", 5)
|
|
self.store.create_chunk("Note 2", "note", "conv-1", 5)
|
|
self.store.create_chunk("Fact 1", "fact", "conv-1", 5)
|
|
|
|
stats = self.store.get_stats()
|
|
|
|
self.assertEqual(stats["total_chunks"], 3)
|
|
self.assertEqual(stats["by_type"]["note"], 2)
|
|
self.assertEqual(stats["by_type"]["fact"], 1)
|
|
|
|
|
|
class TestIntegration(unittest.TestCase):
|
|
"""Integration tests for full workflow."""
|
|
|
|
def setUp(self):
|
|
self.temp_dir = tempfile.mkdtemp()
|
|
self.store = ChunkStore(Path(self.temp_dir) / "brain" / "memory")
|
|
|
|
def tearDown(self):
|
|
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
|
|
|
def test_full_lifecycle(self):
|
|
"""Test create → get → update → delete workflow."""
|
|
# Create
|
|
chunk = self.store.create_chunk(
|
|
content="Original",
|
|
chunk_type="note",
|
|
conversation_id="conv-test",
|
|
tokens=5,
|
|
tags=["original"]
|
|
)
|
|
|
|
# Get
|
|
retrieved = self.store.get_chunk(chunk.id)
|
|
self.assertEqual(retrieved.content, "Original")
|
|
|
|
# Update
|
|
self.store.update_chunk(chunk.id, content="Updated", tags=["updated"])
|
|
|
|
# Verify update
|
|
updated = self.store.get_chunk(chunk.id)
|
|
self.assertEqual(updated.content, "Updated")
|
|
self.assertEqual(updated.tags, ["updated"])
|
|
|
|
# Delete
|
|
self.store.delete_chunk(chunk.id, permanent=True)
|
|
|
|
# Verify deletion
|
|
self.assertIsNone(self.store.get_chunk(chunk.id))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Run with verbose output
|
|
unittest.main(verbosity=2)
|