homelab v0.8.2: fix memory recall truncating multibyte UTF-8 mid-character
emo's Claude Code sessions hit "UserPromptSubmit hook error" on almost every prompt. Root cause: the homelab-memory-recall.py UserPromptSubmit hook runs `homelab memory recall <prompt>` and strict-decodes its stdout. printMemories truncated each memory's preview with a BYTE slice (c[:240]), which cuts through the middle of a 2-byte Cyrillic character and emits invalid UTF-8 (a dangling 0xd0 lead byte). The hook's subprocess.run(text=True) then raised UnicodeDecodeError — not caught by its `except (TimeoutExpired, OSError)` — so the hook exited non-zero and Claude surfaced the error. It is Cyrillic-specific (ASCII has no multibyte chars to split), so it bit emo (Bulgarian prompts) every turn while English users almost never saw it. Two-layer fix: - cli: truncatePreview() now counts RUNES, not bytes, so the preview never splits a character. Regression test asserts valid UTF-8 on a long Cyrillic string. Fixes the root for every consumer of `memory recall` / `memory list`. - hook: subprocess.run gains errors="replace" and the except is broadened to honor the script's own "best-effort, exit 0" contract — so a truncated or otherwise odd payload can never again surface as a hook error. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
a3eb309e26
commit
0fa5852ec6
4 changed files with 48 additions and 7 deletions
|
|
@ -1 +1 @@
|
|||
v0.8.1
|
||||
v0.8.2
|
||||
|
|
|
|||
|
|
@ -54,10 +54,7 @@ func printMemories(raw []byte, jsonOut bool) error {
|
|||
return nil
|
||||
}
|
||||
for _, m := range r.Memories {
|
||||
c := strings.ReplaceAll(m.Content, "\n", " ")
|
||||
if len(c) > 240 {
|
||||
c = c[:240] + "…"
|
||||
}
|
||||
c := truncatePreview(strings.ReplaceAll(m.Content, "\n", " "), 240)
|
||||
fmt.Printf("#%d [%s] (%.2f) %s\n", m.ID, m.Category, m.Importance, c)
|
||||
if m.Tags != "" {
|
||||
fmt.Printf(" tags: %s\n", m.Tags)
|
||||
|
|
@ -66,6 +63,21 @@ func printMemories(raw []byte, jsonOut bool) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// truncatePreview shortens s to at most maxRunes RUNES, appending "…" when it
|
||||
// trims. Counting runes (not bytes) is load-bearing: a byte slice like s[:240]
|
||||
// can cut through the middle of a multibyte UTF-8 character (e.g. 2-byte
|
||||
// Cyrillic), leaving a dangling lead byte = invalid UTF-8. That crashed strict
|
||||
// decoders downstream — notably the homelab-memory-recall.py UserPromptSubmit
|
||||
// hook (subprocess text=True), which surfaced as a recurring "UserPromptSubmit
|
||||
// hook error" for Cyrillic-language users.
|
||||
func truncatePreview(s string, maxRunes int) string {
|
||||
r := []rune(s)
|
||||
if len(r) <= maxRunes {
|
||||
return s
|
||||
}
|
||||
return string(r[:maxRunes]) + "…"
|
||||
}
|
||||
|
||||
func memoryRecall(args []string) error {
|
||||
req := memRecallReq{}
|
||||
jsonOut := false
|
||||
|
|
|
|||
|
|
@ -5,8 +5,31 @@ import (
|
|||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
func TestTruncatePreviewKeepsValidUTF8(t *testing.T) {
|
||||
// Byte-slicing a long Cyrillic string at 240 splits a 2-byte rune and emits
|
||||
// invalid UTF-8 — the bug that crashed the recall hook. truncatePreview must
|
||||
// cut on a rune boundary and always stay valid UTF-8.
|
||||
long := strings.Repeat("я", 300) // 300 runes / 600 bytes
|
||||
got := truncatePreview(long, 240)
|
||||
if !utf8.ValidString(got) {
|
||||
t.Fatalf("truncatePreview produced invalid UTF-8: %q", got)
|
||||
}
|
||||
if r := []rune(got); len(r) != 241 || string(r[:240]) != strings.Repeat("я", 240) || r[240] != '…' {
|
||||
t.Fatalf("truncatePreview = %d runes, want 240 Cyrillic + ellipsis", len(r))
|
||||
}
|
||||
// Short multibyte strings pass through untouched (no ellipsis).
|
||||
if got := truncatePreview("кратко", 240); got != "кратко" {
|
||||
t.Fatalf("short string altered: %q", got)
|
||||
}
|
||||
// ASCII boundary still works.
|
||||
if got := truncatePreview(strings.Repeat("a", 500), 240); got != strings.Repeat("a", 240)+"…" {
|
||||
t.Fatalf("ascii truncation wrong: %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveMemoryBase(t *testing.T) {
|
||||
old1, old2 := os.Getenv("CLAUDE_MEMORY_API_URL"), os.Getenv("MEMORY_API_URL")
|
||||
defer func() { os.Setenv("CLAUDE_MEMORY_API_URL", old1); os.Setenv("MEMORY_API_URL", old2) }()
|
||||
|
|
|
|||
|
|
@ -45,9 +45,15 @@ def main() -> None:
|
|||
try:
|
||||
res = subprocess.run(
|
||||
[homelab, "memory", "recall", prompt, "--limit", "5"],
|
||||
capture_output=True, text=True, timeout=4, env=os.environ,
|
||||
capture_output=True, text=True, errors="replace", timeout=4,
|
||||
env=os.environ,
|
||||
)
|
||||
except (subprocess.TimeoutExpired, OSError):
|
||||
except Exception:
|
||||
# Best-effort: ANY failure — timeout, OSError, or a UnicodeDecodeError on
|
||||
# truncated multibyte (Cyrillic) output — must silently skip recall this
|
||||
# turn, exactly like the MCP being unavailable. errors="replace" above
|
||||
# also keeps a mid-rune-truncated payload from raising here at all. Never
|
||||
# let this hook surface a "UserPromptSubmit hook error".
|
||||
return
|
||||
|
||||
out = (res.stdout or "").strip()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue