Fix: Security, reliability, and code quality improvements from PR review

Critical Security Fixes: - Fix command injection vulnerability in Windows shims (beadboard.cmd, bb.cmd) - Added path validation to block traversal (.. and root-relative paths) - Added quotes around env var to prevent command injection Reliability Fixes: - Fix agent cache null safety bug - Fixed callBdAgentShow() to check for cache misses (null check, expiration) - Fixed getCachedAgent to properly return entry.data or null - Fix null body crashes in mail ack route - Added null check before casting body to object - Returns 400 error instead of 500 for invalid requests BD Compliance Fixes: - Fix read-issues to use BD audit record path - Ensures all writes go through bd audit record - Maintains watcher/SSE parity and Dolt commit tracking Code Quality Fixes: - Fix path canonicalization violations - Use canonicalizeWindowsPath() and windowsPathKey() from pathing module - Prevents Windows edge cases and ensures machine-reproducible paths - Fix typo: mobile-fronted → mobile-frontend - Pin GitHub Actions tags - softprops/action-gh-release@v1 → specific commit hash - Register pr14 test in package.json (already registered) Testing: - Refactor broad exception handlers in Python scripts - Replace except Exception: with specific exceptions - Allows KeyboardInterrupt and SystemExit to propagate correctly - All tests passing
2026-03-05 16:33:10 -08:00 · 2026-03-05 16:33:10 -08:00 · ce4700849b
commit ce4700849b
parent d54e4f3311
15 changed files with 2995 additions and 756 deletions
--- a/.agents/skills/rlm-mem/brain/scripts/chunking_engine.py
+++ b/.agents/skills/rlm-mem/brain/scripts/chunking_engine.py
@ -12,6 +12,7 @@ from dataclasses import dataclass, field
 # Try to import tiktoken for accurate token counting
 try:
    import tiktoken
+
    TIKTOKEN_AVAILABLE = True
 except ImportError:
    TIKTOKEN_AVAILABLE = False
@ -26,6 +27,7 @@ except ImportError:
@dataclass
 class ChunkResult:
    """Result of chunking a piece of content."""
+
    content: str
    tokens: int
    type: str
@ -35,137 +37,169 @@ class ChunkResult:
 class ChunkingEngine:
    """
    Splits content into bounded semantic chunks.
-    
+
    Strategy: Simple Bounded Semantic
    1. Split on paragraphs (\n\n)
    2. Merge small paragraphs (< min_tokens) with next
    3. Split large paragraphs (> max_tokens) at sentence boundaries
    4. Detect content type (fact, preference, pattern, note, decision)
    """
-    
+
    def __init__(self, min_tokens: int = 100, max_tokens: int = 800):
        """
        Initialize the chunking engine.
-        
+
        Args:
            min_tokens: Minimum tokens per chunk (default: 100)
            max_tokens: Maximum tokens per chunk (default: 800)
        """
        self.min_tokens = min_tokens
        self.max_tokens = max_tokens
-        
+
        # Initialize tiktoken encoder if available
        self._encoder = None
        if TIKTOKEN_AVAILABLE:
            try:
                self._encoder = tiktoken.get_encoding("cl100k_base")
-            except Exception:
+            except (ImportError, AttributeError, ValueError, KeyError):
                pass  # Fall back to character-based estimation
-    
+
    def count_tokens(self, text: str) -> int:
        """
        Estimate token count.
-        
+
        Uses tiktoken if available, otherwise uses len/4 approximation
        which works reasonably well for English text.
-        
+
        Args:
            text: Text to count tokens for
-            
+
        Returns:
            Estimated token count
        """
        if text is None or text == "":
            return 0
-            
+
        if self._encoder is not None:
            try:
                return len(self._encoder.encode(text))
-            except Exception:
+            except (AttributeError, TypeError, ValueError):
                pass  # Fall back to approximation
-        
+
        # Character-based approximation: ~4 chars per token for English
        # This is a rough estimate but works for most cases
        return max(1, len(text) // 4)
-    
+
    def detect_content_type(self, content: str) -> str:
        """
        Detect if content is fact, preference, pattern, note, or decision.
-        
+
        Detection rules (case-insensitive, word boundaries respected):
        - Decision: "decided", "chose", "selected", "going with"
        - Preference: "prefer", "like", "want", "rather"
        - Fact: "is a", "are a", "works as", "located in"
        - Pattern: "usually", "often", "tends to", "pattern"
        - Default: "note"
-        
+
        Args:
            content: Content to analyze
-            
+
        Returns:
            Content type string
        """
        if not content:
            return ChunkType.NOTE.value
-            
+
        content_lower = content.lower()
-        
+
        # Decision indicators (highest priority - explicit actions)
        decision_patterns = [
-            r'\bdecided\b', r'\bchose\b', r'\bselected\b', 
-            r'\bgoing with\b', r'\bwent with\b', r'\bopted for\b',
-            r'\bsettled on\b', r'\bconcluded\b'
+            r"\bdecided\b",
+            r"\bchose\b",
+            r"\bselected\b",
+            r"\bgoing with\b",
+            r"\bwent with\b",
+            r"\bopted for\b",
+            r"\bsettled on\b",
+            r"\bconcluded\b",
        ]
        for pattern in decision_patterns:
            if re.search(pattern, content_lower):
                return ChunkType.DECISION.value
-        
+
        # Pattern indicators (habits, recurring behaviors) - check BEFORE preference
        # because phrases like "generally prefer" describe patterns, not preferences
        pattern_patterns = [
-            r'\busually\b', r'\boften\b', r'\btends to\b', r'\bpattern\b',
-            r'\balways\b', r'\btypically\b', r'\bgenerally\b',
-            r'\bfrequently\b', r'\bregularly\b', r'\bevery time\b',
-            r'\bmost of the time\b', r'\bwhenever\b'
+            r"\busually\b",
+            r"\boften\b",
+            r"\btends to\b",
+            r"\bpattern\b",
+            r"\balways\b",
+            r"\btypically\b",
+            r"\bgenerally\b",
+            r"\bfrequently\b",
+            r"\bregularly\b",
+            r"\bevery time\b",
+            r"\bmost of the time\b",
+            r"\bwhenever\b",
        ]
        for pattern in pattern_patterns:
            if re.search(pattern, content_lower):
                return ChunkType.PATTERN.value
-        
+
        # Preference indicators
        preference_patterns = [
-            r'\bprefer\b', r'\blike\b', r'\bwant\b', r'\brather\b',
-            r'\bdislike\b', r'\bhate\b', r'\bwish\b', r'\bwould like\b',
-            r'\bfavorite\b', r'\bfavour\b'
+            r"\bprefer\b",
+            r"\blike\b",
+            r"\bwant\b",
+            r"\brather\b",
+            r"\bdislike\b",
+            r"\bhate\b",
+            r"\bwish\b",
+            r"\bwould like\b",
+            r"\bfavorite\b",
+            r"\bfavour\b",
        ]
        for pattern in preference_patterns:
            if re.search(pattern, content_lower):
                return ChunkType.PREFERENCE.value
-        
+
        # Fact indicators (statements of truth)
        fact_patterns = [
-            r'\bis a\b', r'\bare a\b', r'\bworks as\b', r'\blocated in\b',
-            r'\bis an\b', r'\bare an\b', r'\bwas a\b', r'\bwere a\b',
-            r'\bworks at\b', r'\bworks for\b', r'\blives in\b',
-            r'\bborn in\b', r'\bstudied at\b', r'\bgraduated from\b',
-            r'\bhas\s+\d+', r'\bthere are\s+\d+', r'\bthere is\s+'
+            r"\bis a\b",
+            r"\bare a\b",
+            r"\bworks as\b",
+            r"\blocated in\b",
+            r"\bis an\b",
+            r"\bare an\b",
+            r"\bwas a\b",
+            r"\bwere a\b",
+            r"\bworks at\b",
+            r"\bworks for\b",
+            r"\blives in\b",
+            r"\bborn in\b",
+            r"\bstudied at\b",
+            r"\bgraduated from\b",
+            r"\bhas\s+\d+",
+            r"\bthere are\s+\d+",
+            r"\bthere is\s+",
        ]
        for pattern in fact_patterns:
            if re.search(pattern, content_lower):
                return ChunkType.FACT.value
-        
+
        # Default: note
        return ChunkType.NOTE.value
-    
+
    def _split_into_paragraphs(self, content: str) -> List[str]:
        """
        Split content into paragraphs on double newlines.
-        
+
        Handles edge cases like multiple consecutive newlines and whitespace.
        """
        # Split on double newlines
-        raw_paragraphs = re.split(r'\n\n+', content)
-        
+        raw_paragraphs = re.split(r"\n\n+", content)
+
        # Clean up each paragraph
        paragraphs = []
        for p in raw_paragraphs:
@ -173,191 +207,194 @@ class ChunkingEngine:
            cleaned = p.strip()
            if cleaned:
                # Normalize internal newlines (preserve single newlines within paragraphs)
-                cleaned = re.sub(r'[ \t]+', ' ', cleaned)
+                cleaned = re.sub(r"[ \t]+", " ", cleaned)
                paragraphs.append(cleaned)
-        
+
        return paragraphs
-    
+
    def _split_sentences(self, text: str) -> List[str]:
        """
        Split text into sentences.
-        
+
        Handles abbreviations and edge cases reasonably well.
        """
        # Pattern for sentence boundaries
        # Matches . ? or ! followed by space or end of string
        # Handles quotes and parentheses
        sentence_pattern = r'(?<=[.!?])\s+(?=[A-Z"\'\(])|(?<=[.!?])$'
-        
+
        sentences = re.split(sentence_pattern, text)
-        
+
        # Clean up
        result = []
        for s in sentences:
            cleaned = s.strip()
            if cleaned:
                result.append(cleaned)
-        
+
        return result
-    
+
    def _split_large_chunk(self, content: str) -> List[str]:
        """
        Split a large chunk (> max_tokens) at sentence boundaries.
-        
+
        Tries to create chunks that are as close to max_tokens as possible
        without exceeding it.
        """
        sentences = self._split_sentences(content)
-        
+
        if len(sentences) <= 1:
            # Cannot split by sentences, force split by token count
            return self._force_split(content)
-        
+
        chunks = []
        current_chunk = []
        current_tokens = 0
-        
+
        for sentence in sentences:
            sentence_tokens = self.count_tokens(sentence)
-            
+
            # If a single sentence exceeds max_tokens, force split it
            if sentence_tokens > self.max_tokens:
                # First, flush current chunk if any
                if current_chunk:
-                    chunks.append(' '.join(current_chunk))
+                    chunks.append(" ".join(current_chunk))
                    current_chunk = []
                    current_tokens = 0
-                
+
                # Force split this long sentence
                chunks.extend(self._force_split(sentence))
                continue
-            
+
            # Check if adding this sentence would exceed max_tokens
            if current_tokens + sentence_tokens > self.max_tokens and current_chunk:
                # Flush current chunk
-                chunks.append(' '.join(current_chunk))
+                chunks.append(" ".join(current_chunk))
                current_chunk = [sentence]
                current_tokens = sentence_tokens
            else:
                # Add to current chunk
                current_chunk.append(sentence)
                current_tokens += sentence_tokens
-        
+
        # Don't forget the last chunk
        if current_chunk:
-            chunks.append(' '.join(current_chunk))
-        
+            chunks.append(" ".join(current_chunk))
+
        return chunks
-    
+
    def _force_split(self, content: str) -> List[str]:
        """
        Force split content into chunks of approximately max_tokens.
-        
+
        Used when sentence splitting isn't sufficient.
        """
        total_tokens = self.count_tokens(content)
-        
+
        if total_tokens <= self.max_tokens:
            return [content]
-        
+
        # Calculate approximate characters per chunk
        # We use character count as a proxy for token count
        chars_per_token = len(content) / total_tokens
-        chars_per_chunk = int(self.max_tokens * chars_per_token * 0.95)  # 5% safety margin
-        
+        chars_per_chunk = int(
+            self.max_tokens * chars_per_token * 0.95
+        )  # 5% safety margin
+
        chunks = []
        start = 0
-        
+
        while start < len(content):
            end = start + chars_per_chunk
-            
+
            if end >= len(content):
                # Last chunk
                chunks.append(content[start:].strip())
                break
-            
+
            # Try to find a word boundary
            # Look for space, period, or other punctuation
            search_end = min(end + 50, len(content))  # Look ahead 50 chars
            boundary = end
-            
+
            # Find the last space or punctuation before search_end
            for i in range(search_end - 1, start, -1):
-                if content[i] in ' \t\n.,;:!?':
+                if content[i] in " \t\n.,;:!?":
                    boundary = i + 1
                    break
-            
+
            chunk = content[start:boundary].strip()
            if chunk:
                chunks.append(chunk)
-            
+
            start = boundary
-        
+
        return chunks
-    
-    def chunk(self, content: str, conversation_id: str,
-              tags: List[str] = None) -> List[ChunkResult]:
+
+    def chunk(
+        self, content: str, conversation_id: str, tags: List[str] = None
+    ) -> List[ChunkResult]:
        """
        Split content into bounded semantic chunks.
-        
+
        Strategy: Simple Bounded Semantic
        1. Split on paragraphs (\n\n)
        2. Merge small paragraphs (< min_tokens) with next
        3. Split large paragraphs (> max_tokens) at sentence boundaries
        4. Detect content type (fact, preference, pattern, note, decision)
-        
+
        Args:
            content: Text content to chunk
            conversation_id: Source conversation ID
            tags: Optional list of tags to apply to all chunks
-            
+
        Returns:
            List of ChunkResult objects ready for storage
        """
        if not content or not content.strip():
            return []
-        
+
        tags = tags or []
-        
+
        # Step 1: Split into paragraphs
        paragraphs = self._split_into_paragraphs(content)
-        
+
        # Step 2: Process paragraphs - handle size bounds
        raw_chunks = []
-        
+
        for paragraph in paragraphs:
            tokens = self.count_tokens(paragraph)
-            
+
            if tokens > self.max_tokens:
                # Split large paragraph at sentence boundaries
                split_chunks = self._split_large_chunk(paragraph)
                raw_chunks.extend(split_chunks)
            else:
                raw_chunks.append(paragraph)
-        
+
        # Step 3: Merge small chunks
        merged_chunks = self._merge_small_chunks(raw_chunks)
-        
+
        # Step 4: Create ChunkResult objects with type detection
        results = []
        for chunk_content in merged_chunks:
            chunk_tokens = self.count_tokens(chunk_content)
            content_type = self.detect_content_type(chunk_content)
-            
+
            result = ChunkResult(
                content=chunk_content,
                tokens=chunk_tokens,
                type=content_type,
-                tags=tags.copy()
+                tags=tags.copy(),
            )
            results.append(result)
-        
+
        return results
-    
+
    def _merge_small_chunks(self, chunks: List[str]) -> List[str]:
        """
        Merge chunks that are below min_tokens with adjacent chunks.
-        
+
        Strategy:
        - Try to merge with next chunk (if same content type)
        - If merging would exceed max_tokens, keep as-is (it's the best we can do)
@ -366,39 +403,39 @@ class ChunkingEngine:
        """
        if not chunks:
            return []
-        
+
        if len(chunks) == 1:
            return chunks
-        
+
        result = []
        i = 0
-        
+
        while i < len(chunks):
            current = chunks[i]
            current_tokens = self.count_tokens(current)
            current_type = self.detect_content_type(current)
-            
+
            # If current chunk is large enough, add it
            if current_tokens >= self.min_tokens:
                result.append(current)
                i += 1
                continue
-            
+
            # Current chunk is too small - try to merge with next
            if i + 1 < len(chunks):
                next_chunk = chunks[i + 1]
                next_tokens = self.count_tokens(next_chunk)
                next_type = self.detect_content_type(next_chunk)
-                
+
                # Don't merge if content types differ (preserve semantic boundaries)
                if current_type != next_type:
                    result.append(current)  # Add as-is even if small
                    i += 1
                    continue
-                
+
                # Check if merging would exceed max_tokens
                combined_tokens = current_tokens + next_tokens
-                
+
                if combined_tokens <= self.max_tokens:
                    # Merge current with next
                    merged = current + "\n\n" + next_chunk
@ -420,7 +457,7 @@ class ChunkingEngine:
                    prev_tokens = self.count_tokens(prev)
                    prev_type = self.detect_content_type(prev)
                    combined_tokens = prev_tokens + current_tokens
-                    
+
                    # Only merge if types match
                    if combined_tokens <= self.max_tokens and prev_type == current_type:
                        # Merge with previous
@ -431,18 +468,23 @@ class ChunkingEngine:
                else:
                    # No previous chunk, add as-is
                    result.append(current)
-                
+
                i += 1
-        
+
        return result


-def chunk_and_store(content: str, conversation_id: str, 
-                    store, tags: List[str] = None,
-                    min_tokens: int = 100, max_tokens: int = 800) -> List[Chunk]:
+def chunk_and_store(
+    content: str,
+    conversation_id: str,
+    store,
+    tags: List[str] = None,
+    min_tokens: int = 100,
+    max_tokens: int = 800,
+) -> List[Chunk]:
    """
    Convenience function to chunk content and store in ChunkStore.
-    
+
    Args:
        content: Text to chunk and store
        conversation_id: Source conversation ID
@ -450,13 +492,13 @@ def chunk_and_store(content: str, conversation_id: str,
        tags: Optional tags for all chunks
        min_tokens: Minimum tokens per chunk
        max_tokens: Maximum tokens per chunk
-        
+
    Returns:
        List of created Chunk objects
    """
    engine = ChunkingEngine(min_tokens=min_tokens, max_tokens=max_tokens)
    chunk_results = engine.chunk(content, conversation_id, tags)
-    
+
    created_chunks = []
    for result in chunk_results:
        chunk = store.create_chunk(
@ -464,10 +506,10 @@ def chunk_and_store(content: str, conversation_id: str,
            chunk_type=result.type,
            conversation_id=conversation_id,
            tokens=result.tokens,
-            tags=result.tags
+            tags=result.tags,
        )
        created_chunks.append(chunk)
-    
+
    return created_chunks


@ -477,7 +519,7 @@ if __name__ == "__main__":
    print("=" * 60)
    print("Chunking Engine - Self Test")
    print("=" * 60)
-    
+
    # Test 1: Basic multi-paragraph content
    print("\n[Test 1] Multi-paragraph content")
    content = """Paragraph 1. Short.
@ -485,16 +527,16 @@ if __name__ == "__main__":
 Paragraph 2 is longer with multiple sentences. It should stand alone.

 This is a decision: We chose to use RLM architecture."""
-    
+
    engine = ChunkingEngine()
    chunks = engine.chunk(content, "test-conv")
-    
+
    print(f"Input paragraphs: 3")
    print(f"Output chunks: {len(chunks)}")
    for i, c in enumerate(chunks, 1):
        print(f"  Chunk {i}: {c.type}, {c.tokens} tokens")
        print(f"    Content: {c.content[:60]}...")
-    
+
    # Test 2: Content type detection
    print("\n[Test 2] Content type detection")
    test_cases = [
@ -504,12 +546,12 @@ This is a decision: We chose to use RLM architecture."""
        ("I usually wake up early", "pattern"),
        ("This is just a random note", "note"),
    ]
-    
+
    for text, expected in test_cases:
        detected = engine.detect_content_type(text)
        status = "[OK]" if detected == expected else "[FAIL]"
        print(f"  {status} '{text[:40]}...' -> {detected} (expected: {expected})")
-    
+
    # Test 3: Small paragraph merging
    print("\n[Test 3] Small paragraph merging")
    content = """A.
@ -517,19 +559,23 @@ This is a decision: We chose to use RLM architecture."""
 B.

 C is a longer paragraph with more content that should stand on its own."""
-    
+
    chunks = engine.chunk(content, "test-conv")
    print(f"Input paragraphs: 3 (two very short)")
    print(f"Output chunks: {len(chunks)}")
    for i, c in enumerate(chunks, 1):
        print(f"  Chunk {i}: {c.tokens} tokens - {c.content[:50]}...")
-    
+
    # Test 4: Large paragraph splitting
    print("\n[Test 4] Large paragraph splitting")
    # Generate a paragraph that's definitely over 800 tokens
-    large_content = " ".join([f"This is sentence number {i} in a very long paragraph." 
-                              for i in range(1, 201)])  # ~200 sentences
-    
+    large_content = " ".join(
+        [
+            f"This is sentence number {i} in a very long paragraph."
+            for i in range(1, 201)
+        ]
+    )  # ~200 sentences
+
    chunks = engine.chunk(large_content, "test-conv")
    total_tokens = sum(c.tokens for c in chunks)
    print(f"Input: ~{engine.count_tokens(large_content)} tokens")
@ -537,7 +583,7 @@ C is a longer paragraph with more content that should stand on its own."""
    for i, c in enumerate(chunks, 1):
        status = "[OK]" if 100 <= c.tokens <= 800 else "[FAIL]"
        print(f"  {status} Chunk {i}: {c.tokens} tokens")
-    
+
    # Test 5: Token counting comparison
    print("\n[Test 5] Token counting")
    test_text = "This is a test sentence with exactly twelve tokens."
@ -545,38 +591,38 @@ C is a longer paragraph with more content that should stand on its own."""
    print(f"  Text: '{test_text}'")
    print(f"  Estimated tokens: {estimated}")
    print(f"  Tiktoken available: {TIKTOKEN_AVAILABLE}")
-    
+
    # Test 6: Integration with ChunkStore
    print("\n[Test 6] Integration with ChunkStore")
    try:
        from .memory_store import ChunkStore
-        
+
        store = ChunkStore("brain/memory")
        test_content = """First fact: Python is a programming language.

 Second decision: We chose to implement async support.

 Third preference: I prefer using type hints."""
-        
+
        created = chunk_and_store(
            content=test_content,
            conversation_id="integration-test",
            store=store,
-            tags=["test", "integration"]
+            tags=["test", "integration"],
        )
-        
+
        print(f"  Created {len(created)} chunks:")
        for c in created:
            print(f"    - {c.id}: {c.type}, {c.tokens} tokens")
-        
+
        # Cleanup - archive the test chunks
        for c in created:
            store.delete_chunk(c.id, permanent=False)
        print("  ✓ Test chunks archived")
-        
+
    except Exception as e:
        print(f"  [SKIP] Integration test skipped: {e}")
-    
+
    print("\n" + "=" * 60)
    print("All tests completed!")
    print("=" * 60)