add fallback chain for judge: claude CLI → ollama → heuristic
- claude CLI: run from /tmp to avoid internet-mode-used marker prompts - ollama: try small local models (qwen2.5:3b, llama3.2:3b, etc.) - heuristic: pattern matching for corrections, preferences, decisions - better JSON extraction: handles markdown fences and surrounding text
This commit is contained in:
parent
a8679d6cfb
commit
4456922294
1 changed files with 151 additions and 17 deletions
|
|
@ -173,10 +173,12 @@ def _parse_transcript(transcript_path: str, max_exchanges: int = 1) -> list[dict
|
||||||
entry = json.loads(line)
|
entry = json.loads(line)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
continue
|
continue
|
||||||
role = entry.get("role", "")
|
# Transcript format: role can be at top level or nested in message
|
||||||
|
msg = entry.get("message", entry)
|
||||||
|
role = msg.get("role", "") or entry.get("type", "")
|
||||||
if role not in ("user", "assistant"):
|
if role not in ("user", "assistant"):
|
||||||
continue
|
continue
|
||||||
content = entry.get("content", "")
|
content = msg.get("content", "")
|
||||||
if isinstance(content, list):
|
if isinstance(content, list):
|
||||||
content = " ".join(
|
content = " ".join(
|
||||||
b.get("text", "") for b in content
|
b.get("text", "") for b in content
|
||||||
|
|
@ -277,26 +279,158 @@ def _append_to_auto_memory(content: str, event_type: str) -> None:
|
||||||
f.write(f"- [{now}] **{event_type}**: {content}\n")
|
f.write(f"- [{now}] **{event_type}**: {content}\n")
|
||||||
|
|
||||||
|
|
||||||
def _call_judge(prompt: str) -> list[dict]:
|
def _parse_llm_response(response_text: str) -> list[dict]:
|
||||||
"""Call haiku as judge and return extracted events."""
|
"""Parse LLM response text into events list."""
|
||||||
|
response_text = response_text.strip()
|
||||||
|
# Strip markdown code fences if present
|
||||||
|
if response_text.startswith("```"):
|
||||||
|
lines = response_text.split("\n")
|
||||||
|
lines = [l for l in lines if not l.strip().startswith("```")]
|
||||||
|
response_text = "\n".join(lines).strip()
|
||||||
|
# Try to extract JSON from the response
|
||||||
|
# Sometimes the LLM adds text before/after the JSON
|
||||||
|
start = response_text.find("{")
|
||||||
|
end = response_text.rfind("}") + 1
|
||||||
|
if start >= 0 and end > start:
|
||||||
|
response_text = response_text[start:end]
|
||||||
|
judge_result = json.loads(response_text)
|
||||||
|
return judge_result.get("events", [])
|
||||||
|
|
||||||
|
|
||||||
|
def _call_judge_claude(prompt: str) -> list[dict] | None:
|
||||||
|
"""Try claude CLI as judge. Returns None if unavailable."""
|
||||||
|
if not shutil.which("claude"):
|
||||||
|
return None
|
||||||
try:
|
try:
|
||||||
result = subprocess.run(
|
result = subprocess.run(
|
||||||
["claude", "-p", prompt, "--model", "haiku"],
|
["claude", "-p", prompt, "--model", "haiku"],
|
||||||
capture_output=True, text=True, timeout=45,
|
capture_output=True, text=True, timeout=60,
|
||||||
|
# Run from /tmp to avoid internet-mode-used marker prompts
|
||||||
|
# Clear CLAUDECODE to prevent recursion
|
||||||
|
cwd="/tmp",
|
||||||
env={**os.environ, "CLAUDECODE": ""},
|
env={**os.environ, "CLAUDECODE": ""},
|
||||||
)
|
)
|
||||||
if result.returncode != 0:
|
if result.returncode != 0:
|
||||||
return []
|
return None
|
||||||
response_text = result.stdout.strip()
|
return _parse_llm_response(result.stdout)
|
||||||
# Strip markdown code fences if present
|
|
||||||
if response_text.startswith("```"):
|
|
||||||
lines = response_text.split("\n")
|
|
||||||
lines = [l for l in lines if not l.strip().startswith("```")]
|
|
||||||
response_text = "\n".join(lines).strip()
|
|
||||||
judge_result = json.loads(response_text)
|
|
||||||
return judge_result.get("events", [])
|
|
||||||
except (subprocess.TimeoutExpired, json.JSONDecodeError, OSError):
|
except (subprocess.TimeoutExpired, json.JSONDecodeError, OSError):
|
||||||
return []
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _call_judge_ollama(prompt: str) -> list[dict] | None:
|
||||||
|
"""Try local ollama as judge. Returns None if unavailable."""
|
||||||
|
ollama_url = os.environ.get("OLLAMA_HOST", "http://localhost:11434")
|
||||||
|
# Prefer small models for speed
|
||||||
|
models_to_try = ["qwen2.5:3b", "llama3.2:3b", "gemma2:2b", "phi3:mini"]
|
||||||
|
for model in models_to_try:
|
||||||
|
try:
|
||||||
|
data = json.dumps({
|
||||||
|
"model": model,
|
||||||
|
"prompt": prompt,
|
||||||
|
"stream": False,
|
||||||
|
"options": {"temperature": 0, "num_predict": 512},
|
||||||
|
}).encode()
|
||||||
|
req = urllib.request.Request(
|
||||||
|
f"{ollama_url}/api/generate",
|
||||||
|
data=data, method="POST",
|
||||||
|
headers={"Content-Type": "application/json"},
|
||||||
|
)
|
||||||
|
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||||
|
result = json.loads(resp.read().decode())
|
||||||
|
return _parse_llm_response(result.get("response", ""))
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _call_judge_heuristic(entries: list[dict]) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Heuristic fallback: extract learnings via pattern matching.
|
||||||
|
Less accurate than LLM but works without any external dependencies.
|
||||||
|
"""
|
||||||
|
events = []
|
||||||
|
correction_patterns = [
|
||||||
|
"actually", "that's wrong", "no,", "not correct", "instead of",
|
||||||
|
"don't use", "never use", "always use", "the correct way",
|
||||||
|
"the issue was", "the problem was", "root cause",
|
||||||
|
]
|
||||||
|
preference_patterns = [
|
||||||
|
"i prefer", "i like", "i want", "please always", "please never",
|
||||||
|
"remember to", "from now on", "going forward",
|
||||||
|
]
|
||||||
|
decision_patterns = [
|
||||||
|
"let's go with", "we decided", "the approach is",
|
||||||
|
"we'll use", "switched to", "migrated to",
|
||||||
|
]
|
||||||
|
|
||||||
|
for entry in entries:
|
||||||
|
if entry["role"] != "user":
|
||||||
|
continue
|
||||||
|
text_lower = entry["content"].lower()
|
||||||
|
|
||||||
|
for pattern in correction_patterns:
|
||||||
|
if pattern in text_lower:
|
||||||
|
# Extract the sentence containing the pattern
|
||||||
|
for sentence in entry["content"].replace("\n", ". ").split(". "):
|
||||||
|
if pattern in sentence.lower() and len(sentence) > 20:
|
||||||
|
events.append({
|
||||||
|
"type": "correction",
|
||||||
|
"content": sentence.strip()[:200],
|
||||||
|
"importance": 0.8,
|
||||||
|
"tags": "auto-learned,heuristic,correction",
|
||||||
|
"expanded_keywords": " ".join(sentence.lower().split()[:10]),
|
||||||
|
})
|
||||||
|
break
|
||||||
|
break
|
||||||
|
|
||||||
|
for pattern in preference_patterns:
|
||||||
|
if pattern in text_lower:
|
||||||
|
for sentence in entry["content"].replace("\n", ". ").split(". "):
|
||||||
|
if pattern in sentence.lower() and len(sentence) > 15:
|
||||||
|
events.append({
|
||||||
|
"type": "preference",
|
||||||
|
"content": sentence.strip()[:200],
|
||||||
|
"importance": 0.7,
|
||||||
|
"tags": "auto-learned,heuristic,preference",
|
||||||
|
"expanded_keywords": " ".join(sentence.lower().split()[:10]),
|
||||||
|
})
|
||||||
|
break
|
||||||
|
break
|
||||||
|
|
||||||
|
for pattern in decision_patterns:
|
||||||
|
if pattern in text_lower:
|
||||||
|
for sentence in entry["content"].replace("\n", ". ").split(". "):
|
||||||
|
if pattern in sentence.lower() and len(sentence) > 20:
|
||||||
|
events.append({
|
||||||
|
"type": "decision",
|
||||||
|
"content": sentence.strip()[:200],
|
||||||
|
"importance": 0.7,
|
||||||
|
"tags": "auto-learned,heuristic,decision",
|
||||||
|
"expanded_keywords": " ".join(sentence.lower().split()[:10]),
|
||||||
|
})
|
||||||
|
break
|
||||||
|
break
|
||||||
|
|
||||||
|
return events[:5] # Max 5 events
|
||||||
|
|
||||||
|
|
||||||
|
def _call_judge(prompt: str, entries: list[dict] | None = None) -> list[dict]:
|
||||||
|
"""Call judge with fallback chain: claude CLI → ollama → heuristic."""
|
||||||
|
# Try claude CLI first
|
||||||
|
result = _call_judge_claude(prompt)
|
||||||
|
if result is not None:
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Try ollama
|
||||||
|
result = _call_judge_ollama(prompt)
|
||||||
|
if result is not None:
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Fall back to heuristic (only for deep extraction with entries)
|
||||||
|
if entries:
|
||||||
|
return _call_judge_heuristic(entries)
|
||||||
|
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
def _format_conversation(entries: list[dict]) -> str:
|
def _format_conversation(entries: list[dict]) -> str:
|
||||||
|
|
@ -409,7 +543,7 @@ def main() -> None:
|
||||||
n_exchanges=n_exchanges,
|
n_exchanges=n_exchanges,
|
||||||
conversation=conversation[:8000], # Cap total context
|
conversation=conversation[:8000], # Cap total context
|
||||||
)
|
)
|
||||||
events = _call_judge(prompt)
|
events = _call_judge(prompt, entries)
|
||||||
state["last_deep_turn"] = turn_count
|
state["last_deep_turn"] = turn_count
|
||||||
else:
|
else:
|
||||||
# Single-turn extraction: just the last exchange
|
# Single-turn extraction: just the last exchange
|
||||||
|
|
@ -434,7 +568,7 @@ def main() -> None:
|
||||||
user_message=user_msg,
|
user_message=user_msg,
|
||||||
assistant_response=assistant_msg[:2000],
|
assistant_response=assistant_msg[:2000],
|
||||||
)
|
)
|
||||||
events = _call_judge(prompt)
|
events = _call_judge(prompt, entries)
|
||||||
|
|
||||||
# Store events
|
# Store events
|
||||||
if events:
|
if events:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue