Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,9 @@ memory/L4_raw_sessions/*
# Code Review Principles
!memory/code_review_principles.md

# Review Mode SOP
!memory/review_sop.md

# Visual Studio
.vs/
restore_commit.txt
Expand Down
135 changes: 80 additions & 55 deletions frontends/cost_tracker.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
"""Per-thread LLM token usage, captured via llmcore monkey-patches.

`install()` wraps `llmcore._record_usage` (covers all three API modes) and
`llmcore.print` (the `messages` SSE path emits the final `output_tokens`
only via `[Output] tokens=N`, never through `_record_usage`). Tracking is
keyed by `threading.current_thread().name`; each TUI session runs the
agent on a uniquely named thread (`ga-tui-agent-<id>`), so `/cost` is a
thread lookup.
"""Per-thread LLM token usage via llmcore monkey-patches.

`install()` wraps `llmcore._record_usage` + `llmcore.print` (the SSE
`messages` path only emits final `output_tokens` through `[Output] tokens=N`).
Trackers are keyed by `threading.current_thread().name`; each TUI session
runs its agent on `ga-tui-agent-<id>`, so `/cost` is a thread lookup.

Subagent processes are out-of-process, so `scan_subagent_logs` parses the
same `[Cache]` / `[Output]` print lines from `temp/*/stdout.log`.
"""
import re, threading, time
import glob, os, re, threading, time
from dataclasses import dataclass, field


Expand All @@ -18,8 +19,9 @@ class TokenStats:
output: int = 0
cache_create: int = 0
cache_read: int = 0
# Latest request's effective prompt size — used for the % context-left line.
# Latest single-LLM-call sizes — drive the spinner's `↑ N · ↓ M`.
last_input: int = 0
last_output: int = 0
started_at: float = field(default_factory=time.time)

def total_input_side(self) -> int:
Expand All @@ -36,50 +38,69 @@ def elapsed_seconds(self) -> float:
return max(0.0, time.time() - self.started_at)


# Best-effort model → context window. `startswith` match; None hides the line.
_CTX_LIMITS: list[tuple[str, int]] = [
("claude-sonnet-4-5", 1_000_000),
("claude-opus-4", 200_000),
("claude-haiku-4", 200_000),
("claude-sonnet-4", 200_000),
("claude-3-5-sonnet", 200_000),
("claude-3-5-haiku", 200_000),
("claude-3-7-sonnet", 200_000),
("claude-3-opus", 200_000),
("claude-3-haiku", 200_000),
("claude-3-sonnet", 200_000),
("gpt-5-pro", 400_000),
("gpt-5", 256_000),
("gpt-4o", 128_000),
("gpt-4-turbo", 128_000),
("gpt-4", 8_192),
("o1", 200_000),
("o3", 200_000),
("o4", 200_000),
("gemini-2.5", 2_000_000),
("gemini-2", 1_000_000),
("gemini-1.5", 1_000_000),
("glm-5", 256_000),
("glm-4", 128_000),
("qwen", 128_000),
("deepseek", 64_000),
("kimi", 200_000),
("moonshot", 200_000),
]


def context_limit_for(model: str | None) -> int | None:
if not model: return None
m = model.lower()
for prefix, limit in _CTX_LIMITS:
if m.startswith(prefix): return limit
return None
# GA's real context budget lives on `BaseSession.context_win` (chars). The
# trim trigger is `context_win * 3` (see llmcore.trim_messages_history), so
# `/cost` compares actual-history chars against that cap for consistent units.
def context_window_chars(backend) -> int:
"""`context_win * 3` — the char cap before `trim_messages_history` kicks
in. Reads dynamically so a `mykey.py` override propagates. Returns 0 on
bad/missing backend so the caller can hide the row."""
try:
return int(getattr(backend, 'context_win', 0)) * 3
except (TypeError, ValueError):
return 0


def current_input_chars(backend) -> int:
"""Char-size of the message history (same unit as `trim_messages_history`)."""
try:
import json as _json
history = getattr(backend, 'history', None) or []
return sum(len(_json.dumps(m, ensure_ascii=False)) for m in history)
except Exception:
return 0


_trackers: dict[str, TokenStats] = {}
_lock = threading.Lock()
_OUT_RE = re.compile(r'\[Output\]\s+tokens=(\d+)')
_CACHE_RE_NEW = re.compile(r'\[Cache\]\s+input=(\d+)\s+creation=(\d+)\s+read=(\d+)')
_CACHE_RE_OLD = re.compile(r'\[Cache\]\s+input=(\d+)\s+cached=(\d+)')
_INSTALLED = False
_SUBAGENT_GLOB = os.path.join("temp", "*", "stdout.log")


def scan_subagent_logs(since: float = 0.0, root: str | None = None) -> TokenStats:
"""Aggregate subagent tokens from `temp/<task>/stdout.log` files; pass
`since=tui_start_time` to scope to this run. Best-effort: bad logs skipped."""
out = TokenStats()
if since > 0: out.started_at = since
pattern = os.path.join(root, _SUBAGENT_GLOB) if root else _SUBAGENT_GLOB
for p in glob.glob(pattern):
try:
if since and os.path.getmtime(p) < since: continue
with open(p, encoding="utf-8", errors="ignore") as f:
for line in f:
if line.startswith("[Output]"):
m = _OUT_RE.match(line)
if m:
out.output += int(m.group(1)); out.requests += 1
elif line.startswith("[Cache]"):
# messages → `input=N creation=C read=R` (input excl. cache);
# chat_completions / responses → `input=N cached=R` (input incl. cached).
m = _CACHE_RE_NEW.match(line)
if m:
i, c, r = int(m.group(1)), int(m.group(2)), int(m.group(3))
out.input += i
out.cache_create += c; out.cache_read += r
continue
m = _CACHE_RE_OLD.match(line)
if m:
i, r = int(m.group(1)), int(m.group(2))
out.input += max(0, i - r); out.cache_read += r
except OSError:
continue
return out


def get(thread_name: str) -> TokenStats:
Expand Down Expand Up @@ -107,31 +128,32 @@ def install() -> None:
orig_record, orig_print = llmcore._record_usage, print

def record_patched(usage, api_mode):
# Handles INPUT / CACHE only; OUTPUT comes via `[Output]` print_patched
# below (the SSE path emits it that way; double-counting was the prior bug).
try:
if usage:
t = get(threading.current_thread().name)
t.requests += 1
if api_mode == 'messages':
# SSE delivers final output via [Output] print; non-stream
# delivers it here. `output_tokens` in stream message_start
# is a 0–1 placeholder, acceptable noise.
inp = int(usage.get('input_tokens', 0) or 0)
cc = int(usage.get('cache_creation_input_tokens', 0) or 0)
cr = int(usage.get('cache_read_input_tokens', 0) or 0)
t.input += inp; t.cache_create += cc; t.cache_read += cr
t.output += int(usage.get('output_tokens', 0) or 0)
# Non-stream `messages` skips the [Output] print, so count
# output_tokens here; SSE message_start carries a 1-token
# placeholder to skip.
out = int(usage.get('output_tokens', 0) or 0)
if out > 1: t.output += out; t.last_output = out
t.last_input = inp + cc + cr
elif api_mode == 'chat_completions':
cached = int((usage.get('prompt_tokens_details') or {}).get('cached_tokens', 0) or 0)
inp = int(usage.get('prompt_tokens', 0) or 0) - cached
t.input += inp; t.cache_read += cached
t.output += int(usage.get('completion_tokens', 0) or 0)
t.last_input = inp + cached
elif api_mode == 'responses':
cached = int((usage.get('input_tokens_details') or {}).get('cached_tokens', 0) or 0)
inp = int(usage.get('input_tokens', 0) or 0) - cached
t.input += inp; t.cache_read += cached
t.output += int(usage.get('output_tokens', 0) or 0)
t.last_input = inp + cached
except Exception: pass
return orig_record(usage, api_mode)
Expand All @@ -141,7 +163,10 @@ def print_patched(*args, **kwargs):
try:
if args and isinstance(args[0], str):
m = _OUT_RE.match(args[0])
if m: get(threading.current_thread().name).output += int(m.group(1))
if m:
t = get(threading.current_thread().name)
n = int(m.group(1))
t.output += n; t.last_output = n
except Exception: pass
return orig_print(*args, **kwargs)
llmcore.print = print_patched
Expand Down
180 changes: 180 additions & 0 deletions frontends/plan_state.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
"""Plan / todo state — pure stdlib, no UI framework dependency.

API:
extract(text) → [(content, "open"|"done"), …]
is_active(agent, messages=None) → plan mode on (stash OR per-session msg ref)
resolve_path(agent, messages=None) → live plan.md path (or None)
find_path_in_messages(messages) → most recent plan.md path mentioned
current_step(messages) → latest `当前步骤:…` snippet (or "")
summary(items) → (n_done, n_total)
is_complete(items) → all done (or empty)

Supported task-line shapes (all matched by `extract`):
- [ ] foo ← bullet + open
- [x] foo ← bullet + done
1. [✓] foo ← numbered + done
2. [✓ 2026-05-16] foo ← numbered + timestamped done, content after bracket
3. [✓ 已生成: foo] ← numbered + done with description *inside* bracket
4. [D][P] foo ← two marker groups (delegate + parallel), still open
5. [D] foo ← non-standard marker "D" → open (not done)
"""
from __future__ import annotations
import os, re
from typing import Optional

_DONE_CHARS = set("xX✓✔√☑")
# Newline-insert before a bullet stuck to JSON debris (`{"content": "- [ ] …`).
_GLUE_RE = re.compile(r"(?<!\n)((?:[-*+]|\d+\s*[.)、:)]) \[)")
_BULLET_RE = re.compile(r"^\s*(?:[-*+]|\d+\s*[.)、:)])\s+")
_BRACKET_RE = re.compile(r"\[([^\]]*)\]")
# Strip `✓ ` / `x ` / timestamp prefix when bracket content is used as title.
_INLINE_STRIP_RE = re.compile(
r"^[" + re.escape("".join(_DONE_CHARS)) + r"]\s*(?:\d{4}-\d{2}-\d{2}\s+\d{1,2}:\d{2}(?::\d{2})?\s*)?"
)
_DEBRIS_RE = re.compile(r'["\\<].*$')
# Strip markdown emphasis since planbar renders rich.Text, not Markdown.
_MD_EMPHASIS_RE = re.compile(
r"\*\*([^*\n]+)\*\*|\*([^*\n]+)\*|__([^_\n]+)__|_([^_\n]+)_|`([^`\n]+)`"
)
def _strip_md(s: str) -> str:
return _MD_EMPHASIS_RE.sub(lambda m: next(g for g in m.groups() if g is not None), s)


def _has_done_glyph(marker: str) -> bool:
return any(c in _DONE_CHARS for c in marker)


def extract(text: str) -> list[tuple[str, str]]:
if not text: return []
norm = text.replace("\\n", "\n") if "\\n" in text else text
norm = _GLUE_RE.sub(r"\n\1", norm)
found: dict[str, str] = {}
for line in norm.splitlines():
head = _BULLET_RE.match(line)
if not head: continue
rest = line[head.end():]
groups: list[str] = []
# Consume any number of consecutive `[...]` groups — covers `[D][P]`
# task-type chains as well as the plain `[ ]` / `[x]` single form.
while True:
b = _BRACKET_RE.match(rest)
if not b: break
groups.append(b.group(1))
rest = rest[b.end():]
if not groups: continue
is_done = any(_has_done_glyph(g) for g in groups)
inline = rest.strip()
if inline:
content = inline
elif is_done:
# `[✓ description]` shape — description lives inside the bracket
# next to the glyph. Strip the glyph + optional timestamp.
done_g = next(g for g in groups if _has_done_glyph(g))
content = _INLINE_STRIP_RE.sub("", done_g).strip()
else:
continue
k = _strip_md(_DEBRIS_RE.sub("", content).strip())
if not k: continue
status = "done" if is_done else "open"
# Same content seen twice — done wins over open.
if k not in found or status == "done":
found[k] = status
return list(found.items())


def _stashed_plan_path(agent) -> str:
# First non-empty `working['in_plan_mode']` from (handler, agent).
for src in (getattr(agent, "handler", None), agent):
p = ((getattr(src, "working", None) or {}).get("in_plan_mode") or "").strip()
if p: return p
return ""


def _resolve_stashed(p: str) -> Optional[str]:
if not p: return None
rel = p.lstrip("./\\")
cwd = os.getcwd()
for c in (p, os.path.join(cwd, "temp", rel), os.path.join(cwd, rel)):
if os.path.isfile(c) and os.path.getsize(c) > 0: return c
return None


# Strict per-session discovery — scan this session's own messages only.
_PATH_RE = re.compile(r"""((?:\.\/)?(?:temp\/)?plan_[A-Za-z0-9_\-]+\/plan\.md)""")


def _slice(messages, start_idx: int):
if not messages: return []
if start_idx <= 0: return list(messages)
return list(messages)[start_idx:]


def find_path_in_messages(messages, start_idx: int = 0) -> Optional[str]:
"""Latest existing `plan_XXX/plan.md` referenced after `start_idx`.
Items can be `ChatMessage`-like (`.content`) or plain strings;
only paths that exist on disk are returned."""
sliced = _slice(messages, start_idx)
if not sliced: return None
for m in reversed(sliced):
text = getattr(m, "content", None)
if text is None: text = m if isinstance(m, str) else ""
if not text or "plan.md" not in text: continue
for hit in reversed(_PATH_RE.findall(text)):
p = _resolve_stashed(hit.strip().strip("\"'"))
if p: return p
return None


# Prefer concise `<summary>` narrative over the long plan-item echo;
# treat `❌ 当前步骤:` as "step done", not "current step".
_SUMMARY_STEP_RE = re.compile(
r"<summary>[^<]*?当前步骤[::]\s*([^<\n]{1,160})</summary>", re.DOTALL)
_STEP_RE = re.compile(r"📌\s*当前步骤[::]\s*([^\n。!!??]{1,160})")
_DONE_STEP_RE = re.compile(r"❌\s*当前步骤[::]")


def current_step(messages, start_idx: int = 0, max_len: int = 60) -> str:
"""Latest `当前步骤:…` snippet; `<summary>` form preferred, `❌`-prefixed
skipped. Trimmed to `max_len` chars so it fits the 5-row plan card."""
sliced = _slice(messages, start_idx)
if not sliced: return ""

def _clean(s: str) -> str:
return _strip_md(re.sub(r"\s+", " ", s).strip().rstrip(" ::—-"))

def _cap(s: str) -> str:
s = _clean(s)
if len(s) <= max_len: return s
return s[:max_len - 1].rstrip() + "…"

for m in reversed(sliced):
text = getattr(m, "content", None)
if text is None: text = m if isinstance(m, str) else ""
if not text or "当前步骤" not in text: continue
hits = _SUMMARY_STEP_RE.findall(text)
if hits: return _cap(hits[-1])
for raw in reversed(_STEP_RE.findall(text)):
if _DONE_STEP_RE.search(raw): continue
return _cap(raw)
return ""


def is_active(agent, messages=None, start_idx: int = 0) -> bool:
"""Plan mode is on. Primary: `working['in_plan_mode']`. Fallback:
a `plan_*/plan.md` referenced in this session's messages (no global scan)."""
if _stashed_plan_path(agent): return True
return find_path_in_messages(messages, start_idx) is not None


def resolve_path(agent, messages=None, start_idx: int = 0) -> Optional[str]:
p = _resolve_stashed(_stashed_plan_path(agent))
if p: return p
return find_path_in_messages(messages, start_idx)


def summary(items: list[tuple[str, str]]) -> tuple[int, int]:
return sum(1 for _, st in items if st == "done"), len(items)


def is_complete(items: list[tuple[str, str]]) -> bool:
return not items or all(st == "done" for _, st in items)
7 changes: 5 additions & 2 deletions frontends/tuiapp.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,11 @@ def stash(match: re.Match[str]) -> str:
placeholders.append(match.group(0))
return f"\x00PH{len(placeholders) - 1}\x00"

safe = re.sub(r"`{4,}.*?`{4,}", stash, text, flags=re.DOTALL)
safe = re.sub(r"`{4,}[^`].*$", stash, safe, flags=re.DOTALL)
# Line-anchored fence matcher — see tuiapp_v2.fold_turns for rationale.
# Unanchored variant mis-paired backticks embedded in file_read output
# with later real fences, swallowing turn markers and ballooning the
# final "text" segment to MBs (1.85s markdown render on /continue).
safe = re.sub(r"^`{4,}.*?^`{4,}\n?", stash, text, flags=re.DOTALL | re.MULTILINE)
parts = re.split(r"(\**LLM Running \(Turn \d+\) \.\.\.\**)", safe)

def restore(part: str) -> str:
Expand Down
Loading