Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bin/ultracode
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ BASE_URL="http://127.0.0.1:${PORT}"
# append [1m] to 1M-capable Claude base ids before launch. Disable with
# UC_FORCE_1M=0; override the capable set with UC_1M_MODELS (comma-separated).
UC_FORCE_1M="${UC_FORCE_1M:-1}"
UC_1M_MODELS="${UC_1M_MODELS:-claude-opus-4-8,claude-opus-4-7,claude-opus-4-6,claude-sonnet-4-6}"
UC_1M_MODELS="${UC_1M_MODELS:-claude-opus-4-8,claude-opus-4-7,claude-opus-4-6,claude-sonnet-4-6,claude-opus}"
uc_add_1m() {
# Echo $1 with a [1m] suffix iff it is a bare, 1M-capable Claude id. Anything
# else (Auto Router, Gemini/GPT/Composer, Haiku, already-suffixed, empty) is
Expand Down
23 changes: 16 additions & 7 deletions docs/TROUBLESHOOTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -296,15 +296,24 @@ the 200k window even though Opus 4.8 / Opus 4.7 / Opus 4.6 / Sonnet 4.6 serve 1M
natively on the Anthropic API. Nothing is actually lost upstream — the window is
just mis-sized in the client.

**Fix.** The launcher now appends `[1m]` automatically when the chosen
orchestrator is a 1M-capable Claude model. Relaunch, pick the model, and confirm
**Fix.** Two parts work together. (1) The **launcher** appends `[1m]` to a
1M-capable Claude model chosen at launch. (2) The **proxy** also *advertises* the
`[1m]` suffix on `/v1/models` + `/healthz` for any **configured real-Claude
passthrough route** whose upstream model is 1M-capable (e.g. a `claude-opus` route
mapping to `claude-opus-4-8`) — so even an **in-session `/model` switch** (not just
the launch-time pick) gets the 1M window. The proxy strips the `[1m]` again before
routing, so it never reaches the backend. Relaunch, pick the model, and confirm
`/context` reads `/ 1M`.

- **Disable it** (back to bare ids): set `UC_FORCE_1M=0`.
- **Change the capable set:** set `UC_1M_MODELS` to a comma-separated list of base
ids (default `claude-opus-4-8,claude-opus-4-7,claude-opus-4-6,claude-sonnet-4-6`).
- **Not affected:** Haiku 4.5 (200k only), `claude-auto`, and non-Claude routes
(Gemini / GPT / Composer) are never given a `[1m]` suffix.
- **Disable launcher suffixing** (back to bare ids): set `UC_FORCE_1M=0`.
- **Change the launcher's capable set:** set `UC_1M_MODELS` to a comma-separated
list of base ids (default `claude-opus-4-8,claude-opus-4-7,claude-opus-4-6,claude-sonnet-4-6,claude-opus`).
- **Disable proxy advertising:** set `UC_ADVERTISE_1M=0`. Change which upstream
models count as 1M with `UC_1M_UPSTREAM` (comma-separated upstream model ids;
default the Opus 4.6–4.8 + Sonnet 4.6 family).
- **Not affected:** Haiku 4.5 (200k only), `claude-auto`, worker (`Worker → …`)
entries, and non-Claude routes (Gemini / GPT / Composer) never get a `[1m]`
suffix.
- **Caveat:** if your Anthropic-passthrough hop can fall back to a backend that
only supports 200k, a conversation that grows past 200k may then fail there —
make sure that fallback also honors 1M.
Expand Down
67 changes: 55 additions & 12 deletions proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,46 @@
DIRECTIVES = {"planner": None, "strip": True} # filled from config in main()
_ROUTE_ALIASES = {} # normalized token -> concrete route id

# 1M context window: Claude Code sizes its context meter (and auto-compaction) to
# 1M only when the model id it holds carries a "[1m]" suffix. For a real-Claude
# passthrough route whose upstream model is 1M-capable, we ADVERTISE the picker id
# with that suffix on /v1/models + /healthz, so even an in-session /model switch
# (not just a launch-time pick) gets the 1M window. The suffix is a client-side
# convention, not an Anthropic model id: it is stripped before routing and
# normalized off the sticky orchestrator/worker selection, so internal ids stay
# clean. Disable with UC_ADVERTISE_1M=0. See docs/DIRECTIVES.md / PR #8 + #10.
_ONEM_SUFFIX = "[1m]"
ADVERTISE_1M = os.environ.get("UC_ADVERTISE_1M", "1") != "0"
_CONTEXT_1M_UPSTREAM = set(t.strip() for t in os.environ.get(
"UC_1M_UPSTREAM",
"claude-opus-4-8,claude-opus-4-7,claude-opus-4-6,claude-sonnet-4-6").split(",") if t.strip())


def _strip_1m(mid):
"""Model id without a trailing [1m] window suffix (the client convention)."""
if isinstance(mid, str) and mid.endswith(_ONEM_SUFFIX):
return mid[:-len(_ONEM_SUFFIX)]
return mid


def _advertise_id(model_entry):
"""The id to advertise for a configured model on /v1/models + /healthz. Appends
[1m] when ADVERTISE_1M is on and the model is a real-Claude PASSTHROUGH route to
a 1M-capable upstream model, so Claude Code renders the 1M window for it (incl.
in-session /model picks). Worker entries and non-passthrough routes are returned
unchanged. Never raises."""
mid = model_entry.get("id") if isinstance(model_entry, dict) else None
if not (ADVERTISE_1M and isinstance(mid, str)):
return mid
if mid.endswith(_ONEM_SUFFIX) or mid.startswith(WORKER_ID_PREFIX):
return mid
slot = UC_SLOT_MAP.get(mid)
if not isinstance(slot, dict) or slot.get("type") not in (None, "anthropic"):
return mid # passthrough (real Claude) only
if (slot.get("model") or mid) in _CONTEXT_1M_UPSTREAM:
return mid + _ONEM_SUFFIX
return mid

try:
UC_MODEL_MAP = json.loads(os.environ.get("UC_MODEL_MAP", "") or "{}")
if not isinstance(UC_MODEL_MAP, dict):
Expand Down Expand Up @@ -566,6 +606,7 @@ def _set_selection(orch=None, worker=None):
"""Directly pre-set the sticky orchestrator/worker selection (used by the
two-column pre-launch selector via POST /uc/select). Either may be None to
leave that tier unchanged. Returns the resolved active selection dict."""
orch, worker = _strip_1m(orch), _strip_1m(worker) # selections store clean ids
with _SEL_LOCK:
if orch is not None:
_ACTIVE["orch"] = orch or None
Expand All @@ -584,6 +625,7 @@ def _select_target(mid, tier: str):
fresh sessions behave exactly as before."""
if not ORCH_WORKER:
return mid
mid = _strip_1m(mid) # a [1m]-suffixed pick maps to its clean route id
with _SEL_LOCK:
if mid in _WORKER_MAP:
_ACTIVE["worker"] = _WORKER_MAP[mid]
Expand Down Expand Up @@ -893,14 +935,14 @@ def transform_messages_body(raw: bytes):
route = {}

# 1M context window: Claude Code appends a "[1m]" suffix to a model id to ask
# the client for the 1M window (it also sends the context-1m beta header; see
# the launchers' UC_FORCE_1M / [1m] handling). That suffix is a client-side
# convention, not an Anthropic model id, so it must not reach routing (it would
# not match a configured route or an orchestrator/worker pick) or the upstream.
# Strip it up front so "<id>[1m]" behaves exactly like "<id>" everywhere below;
# the 1M window is unaffected because it rides the beta header, left untouched.
if isinstance(model_before, str) and model_before.endswith("[1m]"):
model_before = model_before[:-len("[1m]")]
# the client for the 1M window (it also sends the context-1m beta header). That
# suffix is a client-side convention, not an Anthropic model id, so it must not
# reach routing (it wouldn't match a route) or the upstream. Strip it up front
# so "<id>[1m]" behaves exactly like "<id>" everywhere below; the 1M window is
# unaffected because it comes from the beta header, which we leave untouched.
stripped = _strip_1m(model_before)
if stripped != model_before:
model_before = stripped
body["model"] = model_before
changed = True

Expand Down Expand Up @@ -1918,7 +1960,7 @@ def _maybe_health(self) -> bool:
"candidates": [{"id": c["id"], "cost": c.get("cost")}
for c in _router_available_candidates()],
},
"custom_models": [{"id": m["id"], "display_name": m["display_name"]}
"custom_models": [{"id": _advertise_id(m), "display_name": m["display_name"]}
for m in UC_MODELS],
"stock_models": [{"id": m["id"], "display_name": m["display_name"]}
for m in _stock_models()],
Expand Down Expand Up @@ -2036,9 +2078,10 @@ def _handle_models(self) -> bool:
existing.add(m["id"])
stock_families.add(fam)
for m in UC_MODELS:
if m["id"] not in existing:
data.append(dict(m))
existing.add(m["id"])
adv = _advertise_id(m)
if adv not in existing:
data.append({**m, "id": adv})
existing.add(adv)
self._raw(200, "application/json", json.dumps(base).encode("utf-8"))
return True

Expand Down
37 changes: 21 additions & 16 deletions test_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,21 +263,6 @@ def main():
assert up._expand_env("Bearer ${MOCK_KEY}") == "Bearer secret123"
print("[ok] ${ENV} expansion in route auth")

# PR #8 companion: Claude Code's [1m] context-window suffix on the model id
# is stripped before routing, so "<id>[1m]" still matches "<id>"'s route
# (the 1M window itself rides the context-1m beta header, not the id). A
# naive exact-match lookup would otherwise miss the route once the launcher
# appends [1m] to a 1M-capable Claude pick.
_saved_slots = up.UC_SLOT_MAP
up.UC_SLOT_MAP = {"claude-big": {"type": "openai_compat", "model": "big-real",
"upstream": mock + "/v1", "auth": "Bearer ${MOCK_KEY}"}}
out_1m, _ = up.transform_messages_body(json.dumps({
"model": "claude-big[1m]", "max_tokens": 8,
"messages": [{"role": "user", "content": "hi"}]}).encode())
assert json.loads(out_1m)["model"] == "big-real", json.loads(out_1m)["model"]
up.UC_SLOT_MAP = _saved_slots
print("[ok] 1M [1m] window suffix stripped before routing")

# Stock Claude models: the built-in fallback so real Claude stays in
# /model even with no upstream list. Toggle + override are honored, and
# every advertised id obeys Claude Code's /^(claude|anthropic)/i rule.
Expand Down Expand Up @@ -457,6 +442,26 @@ def _pin(text):
# plan-mode detection drives the optional planner auto-route
assert up._is_plan_mode({"tools": [{"name": "ExitPlanMode"}]}) is True
assert up._is_plan_mode({"tools": [{"name": "Bash"}]}) is False
# 1M context-window suffix: "<id>[1m]" is stripped before routing, so it
# resolves to "<id>"'s route (the 1M window itself rides the beta header).
out1m, _ = up.transform_messages_body(json.dumps({
"model": "claude-composer[1m]", "max_tokens": 16,
"messages": [{"role": "user", "content": "hi"}]}).encode())
assert json.loads(out1m)["model"] == "cursor/composer-2.5", json.loads(out1m)["model"]
# advertise [1m] on a real-Claude PASSTHROUGH route to a 1M model, so the
# /model picker id carries it and Claude Code renders 1M even on in-session
# switches; worker + non-passthrough entries are left untouched
assert up._advertise_id({"id": "claude-opus"}) == "claude-opus[1m]"
assert up._advertise_id({"id": "claude-composer"}) == "claude-composer" # openai_compat
assert up._advertise_id({"id": "claude-worker-opus"}) == "claude-worker-opus"
assert up._strip_1m("claude-opus[1m]") == "claude-opus"
# a [1m]-suffixed pick still routes to its clean route (selection normalized)
up._ACTIVE.update({"orch": None, "worker": None, "worker_explicit": False})
out_adv, _ = up.transform_messages_body(json.dumps({
"model": "claude-opus[1m]", "max_tokens": 16,
"messages": [{"role": "user", "content": "hi"}]}).encode())
assert json.loads(out_adv)["model"] == "claude-opus-4-8", json.loads(out_adv)["model"]
up._ACTIVE.update({"orch": None, "worker": None, "worker_explicit": False})
# a name that maps to TWO routes (gpt-5.5 head AND a gpt-oss model head) is
# dropped as ambiguous -> resolves to nothing (regression for the docs/gpt gap)
_slots0, _models0 = up.UC_SLOT_MAP, up.UC_MODELS
Expand Down Expand Up @@ -485,7 +490,7 @@ def _pin(text):
assert _pin("@composer do it")[0] is None
up.UC_SLOT_MAP, up.UC_MODELS, up._ROUTE_ALIASES, up.DIRECTIVES = (
_saved[0], _saved[1], _saved[2], _saved[3])
print("[ok] routing directives: opt-in default-off / NL opt-in / surgical strip / planner-gated / gpt-collision / dispatch")
print("[ok] routing directives: opt-in default-off / NL opt-in / surgical strip / planner-gated / gpt-collision / dispatch / [1m] strip + advertise")

# issue #3: a rejected tool call (with or without a comment) must not leave
# an assistant tool_calls message unanswered, and tool replies must come
Expand Down
2 changes: 1 addition & 1 deletion windows/Start-UltraCode.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ function Add-Uc1m {
if ([string]::IsNullOrEmpty($ModelId)) { return $ModelId }
if ($ModelId.Contains("[1m]")) { return $ModelId }
$set = if ($env:UC_1M_MODELS) { $env:UC_1M_MODELS }
else { "claude-opus-4-8,claude-opus-4-7,claude-opus-4-6,claude-sonnet-4-6" }
else { "claude-opus-4-8,claude-opus-4-7,claude-opus-4-6,claude-sonnet-4-6,claude-opus" }
foreach ($id in $set.Split(",")) {
if ($ModelId -eq $id.Trim()) { return "${ModelId}[1m]" }
}
Expand Down
Loading