From cd7ce23ae375d3ff3a297e7ff6537163554ed4b7 Mon Sep 17 00:00:00 2001 From: Payne Date: Wed, 3 Jun 2026 22:28:22 +0300 Subject: [PATCH] Advertise [1m] so configured 1M-capable Claude routes get the 1M window Follow-up to #8 (launcher appends [1m]) and #10 (proxy strips [1m] before routing). Those give the 1M context window to a launch-time pick of a stock id, but an in-session /model switch to a CONFIGURED real-Claude route -- e.g. the shipped `claude-opus` route, which maps to claude-opus-4-8 -- used the bare gateway id, so /context showed 200k and auto-compaction was mis-keyed. Claude Code sizes its context meter to 1M only when the model id it holds carries the [1m] suffix (verified: it honors the suffix on a custom gateway id, not just native ids). So the proxy now ADVERTISES the suffix on /v1/models + /healthz for real-Claude PASSTHROUGH routes whose upstream model is 1M-capable. The /model picker id then carries [1m] and the 1M window engages even on in-session switches. - The suffix is stripped before routing (the inline strip from #10 is refactored into a shared _strip_1m helper) and normalized off the sticky orchestrator/worker selection, so internal route ids stay clean (claude-opus[1m] -> claude-opus). - Scope: real-Claude passthrough routes only. Worker ("Worker -> ...") entries and non-passthrough routes (openai_compat / codex / cursor) are never suffixed. - Launcher: add `claude-opus` (the shipped route) to the UC_1M_MODELS default so a launch/selector pick of it matches the /model behavior. Toggles: UC_ADVERTISE_1M=0 (off), UC_1M_UPSTREAM (the 1M-capable upstream set). Verified live: picking claude-opus via /model now shows /context = / 1M. Tests + doctor pass; TROUBLESHOOTING updated. Co-Authored-By: Claude Opus 4.8 --- bin/ultracode | 2 +- docs/TROUBLESHOOTING.md | 23 +++++++++---- proxy.py | 67 ++++++++++++++++++++++++++++++------- test_proxy.py | 37 +++++++++++--------- windows/Start-UltraCode.ps1 | 2 +- 5 files changed, 94 insertions(+), 37 deletions(-) diff --git a/bin/ultracode b/bin/ultracode index dc83190..c7f51ce 100755 --- a/bin/ultracode +++ b/bin/ultracode @@ -61,7 +61,7 @@ BASE_URL="http://127.0.0.1:${PORT}" # append [1m] to 1M-capable Claude base ids before launch. Disable with # UC_FORCE_1M=0; override the capable set with UC_1M_MODELS (comma-separated). UC_FORCE_1M="${UC_FORCE_1M:-1}" -UC_1M_MODELS="${UC_1M_MODELS:-claude-opus-4-8,claude-opus-4-7,claude-opus-4-6,claude-sonnet-4-6}" +UC_1M_MODELS="${UC_1M_MODELS:-claude-opus-4-8,claude-opus-4-7,claude-opus-4-6,claude-sonnet-4-6,claude-opus}" uc_add_1m() { # Echo $1 with a [1m] suffix iff it is a bare, 1M-capable Claude id. Anything # else (Auto Router, Gemini/GPT/Composer, Haiku, already-suffixed, empty) is diff --git a/docs/TROUBLESHOOTING.md b/docs/TROUBLESHOOTING.md index dd7bb36..02bd045 100644 --- a/docs/TROUBLESHOOTING.md +++ b/docs/TROUBLESHOOTING.md @@ -296,15 +296,24 @@ the 200k window even though Opus 4.8 / Opus 4.7 / Opus 4.6 / Sonnet 4.6 serve 1M natively on the Anthropic API. Nothing is actually lost upstream — the window is just mis-sized in the client. -**Fix.** The launcher now appends `[1m]` automatically when the chosen -orchestrator is a 1M-capable Claude model. Relaunch, pick the model, and confirm +**Fix.** Two parts work together. (1) The **launcher** appends `[1m]` to a +1M-capable Claude model chosen at launch. (2) The **proxy** also *advertises* the +`[1m]` suffix on `/v1/models` + `/healthz` for any **configured real-Claude +passthrough route** whose upstream model is 1M-capable (e.g. a `claude-opus` route +mapping to `claude-opus-4-8`) — so even an **in-session `/model` switch** (not just +the launch-time pick) gets the 1M window. The proxy strips the `[1m]` again before +routing, so it never reaches the backend. Relaunch, pick the model, and confirm `/context` reads `/ 1M`. -- **Disable it** (back to bare ids): set `UC_FORCE_1M=0`. -- **Change the capable set:** set `UC_1M_MODELS` to a comma-separated list of base - ids (default `claude-opus-4-8,claude-opus-4-7,claude-opus-4-6,claude-sonnet-4-6`). -- **Not affected:** Haiku 4.5 (200k only), `claude-auto`, and non-Claude routes - (Gemini / GPT / Composer) are never given a `[1m]` suffix. +- **Disable launcher suffixing** (back to bare ids): set `UC_FORCE_1M=0`. +- **Change the launcher's capable set:** set `UC_1M_MODELS` to a comma-separated + list of base ids (default `claude-opus-4-8,claude-opus-4-7,claude-opus-4-6,claude-sonnet-4-6,claude-opus`). +- **Disable proxy advertising:** set `UC_ADVERTISE_1M=0`. Change which upstream + models count as 1M with `UC_1M_UPSTREAM` (comma-separated upstream model ids; + default the Opus 4.6–4.8 + Sonnet 4.6 family). +- **Not affected:** Haiku 4.5 (200k only), `claude-auto`, worker (`Worker → …`) + entries, and non-Claude routes (Gemini / GPT / Composer) never get a `[1m]` + suffix. - **Caveat:** if your Anthropic-passthrough hop can fall back to a backend that only supports 200k, a conversation that grows past 200k may then fail there — make sure that fallback also honors 1M. diff --git a/proxy.py b/proxy.py index 2a8ceaf..0e7fa9a 100644 --- a/proxy.py +++ b/proxy.py @@ -146,6 +146,46 @@ DIRECTIVES = {"planner": None, "strip": True} # filled from config in main() _ROUTE_ALIASES = {} # normalized token -> concrete route id +# 1M context window: Claude Code sizes its context meter (and auto-compaction) to +# 1M only when the model id it holds carries a "[1m]" suffix. For a real-Claude +# passthrough route whose upstream model is 1M-capable, we ADVERTISE the picker id +# with that suffix on /v1/models + /healthz, so even an in-session /model switch +# (not just a launch-time pick) gets the 1M window. The suffix is a client-side +# convention, not an Anthropic model id: it is stripped before routing and +# normalized off the sticky orchestrator/worker selection, so internal ids stay +# clean. Disable with UC_ADVERTISE_1M=0. See docs/DIRECTIVES.md / PR #8 + #10. +_ONEM_SUFFIX = "[1m]" +ADVERTISE_1M = os.environ.get("UC_ADVERTISE_1M", "1") != "0" +_CONTEXT_1M_UPSTREAM = set(t.strip() for t in os.environ.get( + "UC_1M_UPSTREAM", + "claude-opus-4-8,claude-opus-4-7,claude-opus-4-6,claude-sonnet-4-6").split(",") if t.strip()) + + +def _strip_1m(mid): + """Model id without a trailing [1m] window suffix (the client convention).""" + if isinstance(mid, str) and mid.endswith(_ONEM_SUFFIX): + return mid[:-len(_ONEM_SUFFIX)] + return mid + + +def _advertise_id(model_entry): + """The id to advertise for a configured model on /v1/models + /healthz. Appends + [1m] when ADVERTISE_1M is on and the model is a real-Claude PASSTHROUGH route to + a 1M-capable upstream model, so Claude Code renders the 1M window for it (incl. + in-session /model picks). Worker entries and non-passthrough routes are returned + unchanged. Never raises.""" + mid = model_entry.get("id") if isinstance(model_entry, dict) else None + if not (ADVERTISE_1M and isinstance(mid, str)): + return mid + if mid.endswith(_ONEM_SUFFIX) or mid.startswith(WORKER_ID_PREFIX): + return mid + slot = UC_SLOT_MAP.get(mid) + if not isinstance(slot, dict) or slot.get("type") not in (None, "anthropic"): + return mid # passthrough (real Claude) only + if (slot.get("model") or mid) in _CONTEXT_1M_UPSTREAM: + return mid + _ONEM_SUFFIX + return mid + try: UC_MODEL_MAP = json.loads(os.environ.get("UC_MODEL_MAP", "") or "{}") if not isinstance(UC_MODEL_MAP, dict): @@ -566,6 +606,7 @@ def _set_selection(orch=None, worker=None): """Directly pre-set the sticky orchestrator/worker selection (used by the two-column pre-launch selector via POST /uc/select). Either may be None to leave that tier unchanged. Returns the resolved active selection dict.""" + orch, worker = _strip_1m(orch), _strip_1m(worker) # selections store clean ids with _SEL_LOCK: if orch is not None: _ACTIVE["orch"] = orch or None @@ -584,6 +625,7 @@ def _select_target(mid, tier: str): fresh sessions behave exactly as before.""" if not ORCH_WORKER: return mid + mid = _strip_1m(mid) # a [1m]-suffixed pick maps to its clean route id with _SEL_LOCK: if mid in _WORKER_MAP: _ACTIVE["worker"] = _WORKER_MAP[mid] @@ -893,14 +935,14 @@ def transform_messages_body(raw: bytes): route = {} # 1M context window: Claude Code appends a "[1m]" suffix to a model id to ask - # the client for the 1M window (it also sends the context-1m beta header; see - # the launchers' UC_FORCE_1M / [1m] handling). That suffix is a client-side - # convention, not an Anthropic model id, so it must not reach routing (it would - # not match a configured route or an orchestrator/worker pick) or the upstream. - # Strip it up front so "[1m]" behaves exactly like "" everywhere below; - # the 1M window is unaffected because it rides the beta header, left untouched. - if isinstance(model_before, str) and model_before.endswith("[1m]"): - model_before = model_before[:-len("[1m]")] + # the client for the 1M window (it also sends the context-1m beta header). That + # suffix is a client-side convention, not an Anthropic model id, so it must not + # reach routing (it wouldn't match a route) or the upstream. Strip it up front + # so "[1m]" behaves exactly like "" everywhere below; the 1M window is + # unaffected because it comes from the beta header, which we leave untouched. + stripped = _strip_1m(model_before) + if stripped != model_before: + model_before = stripped body["model"] = model_before changed = True @@ -1918,7 +1960,7 @@ def _maybe_health(self) -> bool: "candidates": [{"id": c["id"], "cost": c.get("cost")} for c in _router_available_candidates()], }, - "custom_models": [{"id": m["id"], "display_name": m["display_name"]} + "custom_models": [{"id": _advertise_id(m), "display_name": m["display_name"]} for m in UC_MODELS], "stock_models": [{"id": m["id"], "display_name": m["display_name"]} for m in _stock_models()], @@ -2036,9 +2078,10 @@ def _handle_models(self) -> bool: existing.add(m["id"]) stock_families.add(fam) for m in UC_MODELS: - if m["id"] not in existing: - data.append(dict(m)) - existing.add(m["id"]) + adv = _advertise_id(m) + if adv not in existing: + data.append({**m, "id": adv}) + existing.add(adv) self._raw(200, "application/json", json.dumps(base).encode("utf-8")) return True diff --git a/test_proxy.py b/test_proxy.py index 18ec568..79922a6 100755 --- a/test_proxy.py +++ b/test_proxy.py @@ -263,21 +263,6 @@ def main(): assert up._expand_env("Bearer ${MOCK_KEY}") == "Bearer secret123" print("[ok] ${ENV} expansion in route auth") - # PR #8 companion: Claude Code's [1m] context-window suffix on the model id - # is stripped before routing, so "[1m]" still matches ""'s route - # (the 1M window itself rides the context-1m beta header, not the id). A - # naive exact-match lookup would otherwise miss the route once the launcher - # appends [1m] to a 1M-capable Claude pick. - _saved_slots = up.UC_SLOT_MAP - up.UC_SLOT_MAP = {"claude-big": {"type": "openai_compat", "model": "big-real", - "upstream": mock + "/v1", "auth": "Bearer ${MOCK_KEY}"}} - out_1m, _ = up.transform_messages_body(json.dumps({ - "model": "claude-big[1m]", "max_tokens": 8, - "messages": [{"role": "user", "content": "hi"}]}).encode()) - assert json.loads(out_1m)["model"] == "big-real", json.loads(out_1m)["model"] - up.UC_SLOT_MAP = _saved_slots - print("[ok] 1M [1m] window suffix stripped before routing") - # Stock Claude models: the built-in fallback so real Claude stays in # /model even with no upstream list. Toggle + override are honored, and # every advertised id obeys Claude Code's /^(claude|anthropic)/i rule. @@ -457,6 +442,26 @@ def _pin(text): # plan-mode detection drives the optional planner auto-route assert up._is_plan_mode({"tools": [{"name": "ExitPlanMode"}]}) is True assert up._is_plan_mode({"tools": [{"name": "Bash"}]}) is False + # 1M context-window suffix: "[1m]" is stripped before routing, so it + # resolves to ""'s route (the 1M window itself rides the beta header). + out1m, _ = up.transform_messages_body(json.dumps({ + "model": "claude-composer[1m]", "max_tokens": 16, + "messages": [{"role": "user", "content": "hi"}]}).encode()) + assert json.loads(out1m)["model"] == "cursor/composer-2.5", json.loads(out1m)["model"] + # advertise [1m] on a real-Claude PASSTHROUGH route to a 1M model, so the + # /model picker id carries it and Claude Code renders 1M even on in-session + # switches; worker + non-passthrough entries are left untouched + assert up._advertise_id({"id": "claude-opus"}) == "claude-opus[1m]" + assert up._advertise_id({"id": "claude-composer"}) == "claude-composer" # openai_compat + assert up._advertise_id({"id": "claude-worker-opus"}) == "claude-worker-opus" + assert up._strip_1m("claude-opus[1m]") == "claude-opus" + # a [1m]-suffixed pick still routes to its clean route (selection normalized) + up._ACTIVE.update({"orch": None, "worker": None, "worker_explicit": False}) + out_adv, _ = up.transform_messages_body(json.dumps({ + "model": "claude-opus[1m]", "max_tokens": 16, + "messages": [{"role": "user", "content": "hi"}]}).encode()) + assert json.loads(out_adv)["model"] == "claude-opus-4-8", json.loads(out_adv)["model"] + up._ACTIVE.update({"orch": None, "worker": None, "worker_explicit": False}) # a name that maps to TWO routes (gpt-5.5 head AND a gpt-oss model head) is # dropped as ambiguous -> resolves to nothing (regression for the docs/gpt gap) _slots0, _models0 = up.UC_SLOT_MAP, up.UC_MODELS @@ -485,7 +490,7 @@ def _pin(text): assert _pin("@composer do it")[0] is None up.UC_SLOT_MAP, up.UC_MODELS, up._ROUTE_ALIASES, up.DIRECTIVES = ( _saved[0], _saved[1], _saved[2], _saved[3]) - print("[ok] routing directives: opt-in default-off / NL opt-in / surgical strip / planner-gated / gpt-collision / dispatch") + print("[ok] routing directives: opt-in default-off / NL opt-in / surgical strip / planner-gated / gpt-collision / dispatch / [1m] strip + advertise") # issue #3: a rejected tool call (with or without a comment) must not leave # an assistant tool_calls message unanswered, and tool replies must come diff --git a/windows/Start-UltraCode.ps1 b/windows/Start-UltraCode.ps1 index a1d5a3a..14150a3 100644 --- a/windows/Start-UltraCode.ps1 +++ b/windows/Start-UltraCode.ps1 @@ -98,7 +98,7 @@ function Add-Uc1m { if ([string]::IsNullOrEmpty($ModelId)) { return $ModelId } if ($ModelId.Contains("[1m]")) { return $ModelId } $set = if ($env:UC_1M_MODELS) { $env:UC_1M_MODELS } - else { "claude-opus-4-8,claude-opus-4-7,claude-opus-4-6,claude-sonnet-4-6" } + else { "claude-opus-4-8,claude-opus-4-7,claude-opus-4-6,claude-sonnet-4-6,claude-opus" } foreach ($id in $set.Split(",")) { if ($ModelId -eq $id.Trim()) { return "${ModelId}[1m]" } }