homelab v0.8.2: fix memory recall truncating multibyte UTF-8 mid-character
Some checks are pending
Build infra CLI / build (push) Waiting to run
ci/woodpecker/push/default Pipeline was successful

emo's Claude Code sessions hit "UserPromptSubmit hook error" on almost every
prompt. Root cause: the homelab-memory-recall.py UserPromptSubmit hook runs
`homelab memory recall <prompt>` and strict-decodes its stdout. printMemories
truncated each memory's preview with a BYTE slice (c[:240]), which cuts through
the middle of a 2-byte Cyrillic character and emits invalid UTF-8 (a dangling
0xd0 lead byte). The hook's subprocess.run(text=True) then raised
UnicodeDecodeError — not caught by its `except (TimeoutExpired, OSError)` — so
the hook exited non-zero and Claude surfaced the error. It is Cyrillic-specific
(ASCII has no multibyte chars to split), so it bit emo (Bulgarian prompts) every
turn while English users almost never saw it.

Two-layer fix:
- cli: truncatePreview() now counts RUNES, not bytes, so the preview never
  splits a character. Regression test asserts valid UTF-8 on a long Cyrillic
  string. Fixes the root for every consumer of `memory recall` / `memory list`.
- hook: subprocess.run gains errors="replace" and the except is broadened to
  honor the script's own "best-effort, exit 0" contract — so a truncated or
  otherwise odd payload can never again surface as a hook error.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Viktor Barzin 2026-06-28 09:40:51 +00:00
parent a3eb309e26
commit 0fa5852ec6
4 changed files with 48 additions and 7 deletions

View file

@ -1 +1 @@
v0.8.1
v0.8.2

View file

@ -54,10 +54,7 @@ func printMemories(raw []byte, jsonOut bool) error {
return nil
}
for _, m := range r.Memories {
c := strings.ReplaceAll(m.Content, "\n", " ")
if len(c) > 240 {
c = c[:240] + "…"
}
c := truncatePreview(strings.ReplaceAll(m.Content, "\n", " "), 240)
fmt.Printf("#%d [%s] (%.2f) %s\n", m.ID, m.Category, m.Importance, c)
if m.Tags != "" {
fmt.Printf(" tags: %s\n", m.Tags)
@ -66,6 +63,21 @@ func printMemories(raw []byte, jsonOut bool) error {
return nil
}
// truncatePreview shortens s to at most maxRunes RUNES, appending "…" when it
// trims. Counting runes (not bytes) is load-bearing: a byte slice like s[:240]
// can cut through the middle of a multibyte UTF-8 character (e.g. 2-byte
// Cyrillic), leaving a dangling lead byte = invalid UTF-8. That crashed strict
// decoders downstream — notably the homelab-memory-recall.py UserPromptSubmit
// hook (subprocess text=True), which surfaced as a recurring "UserPromptSubmit
// hook error" for Cyrillic-language users.
func truncatePreview(s string, maxRunes int) string {
r := []rune(s)
if len(r) <= maxRunes {
return s
}
return string(r[:maxRunes]) + "…"
}
func memoryRecall(args []string) error {
req := memRecallReq{}
jsonOut := false

View file

@ -5,8 +5,31 @@ import (
"os"
"strings"
"testing"
"unicode/utf8"
)
func TestTruncatePreviewKeepsValidUTF8(t *testing.T) {
// Byte-slicing a long Cyrillic string at 240 splits a 2-byte rune and emits
// invalid UTF-8 — the bug that crashed the recall hook. truncatePreview must
// cut on a rune boundary and always stay valid UTF-8.
long := strings.Repeat("я", 300) // 300 runes / 600 bytes
got := truncatePreview(long, 240)
if !utf8.ValidString(got) {
t.Fatalf("truncatePreview produced invalid UTF-8: %q", got)
}
if r := []rune(got); len(r) != 241 || string(r[:240]) != strings.Repeat("я", 240) || r[240] != '…' {
t.Fatalf("truncatePreview = %d runes, want 240 Cyrillic + ellipsis", len(r))
}
// Short multibyte strings pass through untouched (no ellipsis).
if got := truncatePreview("кратко", 240); got != "кратко" {
t.Fatalf("short string altered: %q", got)
}
// ASCII boundary still works.
if got := truncatePreview(strings.Repeat("a", 500), 240); got != strings.Repeat("a", 240)+"…" {
t.Fatalf("ascii truncation wrong: %q", got)
}
}
func TestResolveMemoryBase(t *testing.T) {
old1, old2 := os.Getenv("CLAUDE_MEMORY_API_URL"), os.Getenv("MEMORY_API_URL")
defer func() { os.Setenv("CLAUDE_MEMORY_API_URL", old1); os.Setenv("MEMORY_API_URL", old2) }()

View file

@ -45,9 +45,15 @@ def main() -> None:
try:
res = subprocess.run(
[homelab, "memory", "recall", prompt, "--limit", "5"],
capture_output=True, text=True, timeout=4, env=os.environ,
capture_output=True, text=True, errors="replace", timeout=4,
env=os.environ,
)
except (subprocess.TimeoutExpired, OSError):
except Exception:
# Best-effort: ANY failure — timeout, OSError, or a UnicodeDecodeError on
# truncated multibyte (Cyrillic) output — must silently skip recall this
# turn, exactly like the MCP being unavailable. errors="replace" above
# also keeps a mid-rune-truncated payload from raising here at all. Never
# let this hook surface a "UserPromptSubmit hook error".
return
out = (res.stdout or "").strip()