From cd7ce23ae375d3ff3a297e7ff6537163554ed4b7 Mon Sep 17 00:00:00 2001
From: Payne <baboialex95@gmail.com>
Date: Wed, 3 Jun 2026 22:28:22 +0300
Subject: [PATCH] Advertise [1m] so configured 1M-capable Claude routes get the
 1M window

Follow-up to #8 (launcher appends [1m]) and #10 (proxy strips [1m] before
routing). Those give the 1M context window to a launch-time pick of a stock id,
but an in-session /model switch to a CONFIGURED real-Claude route -- e.g. the
shipped `claude-opus` route, which maps to claude-opus-4-8 -- used the bare
gateway id, so /context showed 200k and auto-compaction was mis-keyed.

Claude Code sizes its context meter to 1M only when the model id it holds carries
the [1m] suffix (verified: it honors the suffix on a custom gateway id, not just
native ids). So the proxy now ADVERTISES the suffix on /v1/models + /healthz for
real-Claude PASSTHROUGH routes whose upstream model is 1M-capable. The /model
picker id then carries [1m] and the 1M window engages even on in-session switches.

- The suffix is stripped before routing (the inline strip from #10 is refactored
  into a shared _strip_1m helper) and normalized off the sticky orchestrator/worker
  selection, so internal route ids stay clean (claude-opus[1m] -> claude-opus).
- Scope: real-Claude passthrough routes only. Worker ("Worker -> ...") entries and
  non-passthrough routes (openai_compat / codex / cursor) are never suffixed.
- Launcher: add `claude-opus` (the shipped route) to the UC_1M_MODELS default so a
  launch/selector pick of it matches the /model behavior.

Toggles: UC_ADVERTISE_1M=0 (off), UC_1M_UPSTREAM (the 1M-capable upstream set).
Verified live: picking claude-opus via /model now shows /context = / 1M. Tests +
doctor pass; TROUBLESHOOTING updated.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 bin/ultracode               |  2 +-
 docs/TROUBLESHOOTING.md     | 23 +++++++++----
 proxy.py                    | 67 ++++++++++++++++++++++++++++++-------
 test_proxy.py               | 37 +++++++++++---------
 windows/Start-UltraCode.ps1 |  2 +-
 5 files changed, 94 insertions(+), 37 deletions(-)

diff --git a/bin/ultracode b/bin/ultracode
index dc83190..c7f51ce 100755
--- a/bin/ultracode
+++ b/bin/ultracode
@@ -61,7 +61,7 @@ BASE_URL="http://127.0.0.1:${PORT}"
 # append [1m] to 1M-capable Claude base ids before launch. Disable with
 # UC_FORCE_1M=0; override the capable set with UC_1M_MODELS (comma-separated).
 UC_FORCE_1M="${UC_FORCE_1M:-1}"
-UC_1M_MODELS="${UC_1M_MODELS:-claude-opus-4-8,claude-opus-4-7,claude-opus-4-6,claude-sonnet-4-6}"
+UC_1M_MODELS="${UC_1M_MODELS:-claude-opus-4-8,claude-opus-4-7,claude-opus-4-6,claude-sonnet-4-6,claude-opus}"
 uc_add_1m() {
   # Echo $1 with a [1m] suffix iff it is a bare, 1M-capable Claude id. Anything
   # else (Auto Router, Gemini/GPT/Composer, Haiku, already-suffixed, empty) is
diff --git a/docs/TROUBLESHOOTING.md b/docs/TROUBLESHOOTING.md
index dd7bb36..02bd045 100644
--- a/docs/TROUBLESHOOTING.md
+++ b/docs/TROUBLESHOOTING.md
@@ -296,15 +296,24 @@ the 200k window even though Opus 4.8 / Opus 4.7 / Opus 4.6 / Sonnet 4.6 serve 1M
 natively on the Anthropic API. Nothing is actually lost upstream — the window is
 just mis-sized in the client.
 
-**Fix.** The launcher now appends `[1m]` automatically when the chosen
-orchestrator is a 1M-capable Claude model. Relaunch, pick the model, and confirm
+**Fix.** Two parts work together. (1) The **launcher** appends `[1m]` to a
+1M-capable Claude model chosen at launch. (2) The **proxy** also *advertises* the
+`[1m]` suffix on `/v1/models` + `/healthz` for any **configured real-Claude
+passthrough route** whose upstream model is 1M-capable (e.g. a `claude-opus` route
+mapping to `claude-opus-4-8`) — so even an **in-session `/model` switch** (not just
+the launch-time pick) gets the 1M window. The proxy strips the `[1m]` again before
+routing, so it never reaches the backend. Relaunch, pick the model, and confirm
 `/context` reads `/ 1M`.
 
-- **Disable it** (back to bare ids): set `UC_FORCE_1M=0`.
-- **Change the capable set:** set `UC_1M_MODELS` to a comma-separated list of base
-  ids (default `claude-opus-4-8,claude-opus-4-7,claude-opus-4-6,claude-sonnet-4-6`).
-- **Not affected:** Haiku 4.5 (200k only), `claude-auto`, and non-Claude routes
-  (Gemini / GPT / Composer) are never given a `[1m]` suffix.
+- **Disable launcher suffixing** (back to bare ids): set `UC_FORCE_1M=0`.
+- **Change the launcher's capable set:** set `UC_1M_MODELS` to a comma-separated
+  list of base ids (default `claude-opus-4-8,claude-opus-4-7,claude-opus-4-6,claude-sonnet-4-6,claude-opus`).
+- **Disable proxy advertising:** set `UC_ADVERTISE_1M=0`. Change which upstream
+  models count as 1M with `UC_1M_UPSTREAM` (comma-separated upstream model ids;
+  default the Opus 4.6–4.8 + Sonnet 4.6 family).
+- **Not affected:** Haiku 4.5 (200k only), `claude-auto`, worker (`Worker → …`)
+  entries, and non-Claude routes (Gemini / GPT / Composer) never get a `[1m]`
+  suffix.
 - **Caveat:** if your Anthropic-passthrough hop can fall back to a backend that
   only supports 200k, a conversation that grows past 200k may then fail there —
   make sure that fallback also honors 1M.
diff --git a/proxy.py b/proxy.py
index 2a8ceaf..0e7fa9a 100644
--- a/proxy.py
+++ b/proxy.py
@@ -146,6 +146,46 @@
 DIRECTIVES = {"planner": None, "strip": True}   # filled from config in main()
 _ROUTE_ALIASES = {}                              # normalized token -> concrete route id
 
+# 1M context window: Claude Code sizes its context meter (and auto-compaction) to
+# 1M only when the model id it holds carries a "[1m]" suffix. For a real-Claude
+# passthrough route whose upstream model is 1M-capable, we ADVERTISE the picker id
+# with that suffix on /v1/models + /healthz, so even an in-session /model switch
+# (not just a launch-time pick) gets the 1M window. The suffix is a client-side
+# convention, not an Anthropic model id: it is stripped before routing and
+# normalized off the sticky orchestrator/worker selection, so internal ids stay
+# clean. Disable with UC_ADVERTISE_1M=0. See docs/DIRECTIVES.md / PR #8 + #10.
+_ONEM_SUFFIX = "[1m]"
+ADVERTISE_1M = os.environ.get("UC_ADVERTISE_1M", "1") != "0"
+_CONTEXT_1M_UPSTREAM = set(t.strip() for t in os.environ.get(
+    "UC_1M_UPSTREAM",
+    "claude-opus-4-8,claude-opus-4-7,claude-opus-4-6,claude-sonnet-4-6").split(",") if t.strip())
+
+
+def _strip_1m(mid):
+    """Model id without a trailing [1m] window suffix (the client convention)."""
+    if isinstance(mid, str) and mid.endswith(_ONEM_SUFFIX):
+        return mid[:-len(_ONEM_SUFFIX)]
+    return mid
+
+
+def _advertise_id(model_entry):
+    """The id to advertise for a configured model on /v1/models + /healthz. Appends
+    [1m] when ADVERTISE_1M is on and the model is a real-Claude PASSTHROUGH route to
+    a 1M-capable upstream model, so Claude Code renders the 1M window for it (incl.
+    in-session /model picks). Worker entries and non-passthrough routes are returned
+    unchanged. Never raises."""
+    mid = model_entry.get("id") if isinstance(model_entry, dict) else None
+    if not (ADVERTISE_1M and isinstance(mid, str)):
+        return mid
+    if mid.endswith(_ONEM_SUFFIX) or mid.startswith(WORKER_ID_PREFIX):
+        return mid
+    slot = UC_SLOT_MAP.get(mid)
+    if not isinstance(slot, dict) or slot.get("type") not in (None, "anthropic"):
+        return mid                                  # passthrough (real Claude) only
+    if (slot.get("model") or mid) in _CONTEXT_1M_UPSTREAM:
+        return mid + _ONEM_SUFFIX
+    return mid
+
 try:
     UC_MODEL_MAP = json.loads(os.environ.get("UC_MODEL_MAP", "") or "{}")
     if not isinstance(UC_MODEL_MAP, dict):
@@ -566,6 +606,7 @@ def _set_selection(orch=None, worker=None):
     """Directly pre-set the sticky orchestrator/worker selection (used by the
     two-column pre-launch selector via POST /uc/select). Either may be None to
     leave that tier unchanged. Returns the resolved active selection dict."""
+    orch, worker = _strip_1m(orch), _strip_1m(worker)   # selections store clean ids
     with _SEL_LOCK:
         if orch is not None:
             _ACTIVE["orch"] = orch or None
@@ -584,6 +625,7 @@ def _select_target(mid, tier: str):
     fresh sessions behave exactly as before."""
     if not ORCH_WORKER:
         return mid
+    mid = _strip_1m(mid)   # a [1m]-suffixed pick maps to its clean route id
     with _SEL_LOCK:
         if mid in _WORKER_MAP:
             _ACTIVE["worker"] = _WORKER_MAP[mid]
@@ -893,14 +935,14 @@ def transform_messages_body(raw: bytes):
     route = {}
 
     # 1M context window: Claude Code appends a "[1m]" suffix to a model id to ask
-    # the client for the 1M window (it also sends the context-1m beta header; see
-    # the launchers' UC_FORCE_1M / [1m] handling). That suffix is a client-side
-    # convention, not an Anthropic model id, so it must not reach routing (it would
-    # not match a configured route or an orchestrator/worker pick) or the upstream.
-    # Strip it up front so "<id>[1m]" behaves exactly like "<id>" everywhere below;
-    # the 1M window is unaffected because it rides the beta header, left untouched.
-    if isinstance(model_before, str) and model_before.endswith("[1m]"):
-        model_before = model_before[:-len("[1m]")]
+    # the client for the 1M window (it also sends the context-1m beta header). That
+    # suffix is a client-side convention, not an Anthropic model id, so it must not
+    # reach routing (it wouldn't match a route) or the upstream. Strip it up front
+    # so "<id>[1m]" behaves exactly like "<id>" everywhere below; the 1M window is
+    # unaffected because it comes from the beta header, which we leave untouched.
+    stripped = _strip_1m(model_before)
+    if stripped != model_before:
+        model_before = stripped
         body["model"] = model_before
         changed = True
 
@@ -1918,7 +1960,7 @@ def _maybe_health(self) -> bool:
                     "candidates": [{"id": c["id"], "cost": c.get("cost")}
                                    for c in _router_available_candidates()],
                 },
-                "custom_models": [{"id": m["id"], "display_name": m["display_name"]}
+                "custom_models": [{"id": _advertise_id(m), "display_name": m["display_name"]}
                                   for m in UC_MODELS],
                 "stock_models": [{"id": m["id"], "display_name": m["display_name"]}
                                  for m in _stock_models()],
@@ -2036,9 +2078,10 @@ def _handle_models(self) -> bool:
             existing.add(m["id"])
             stock_families.add(fam)
         for m in UC_MODELS:
-            if m["id"] not in existing:
-                data.append(dict(m))
-                existing.add(m["id"])
+            adv = _advertise_id(m)
+            if adv not in existing:
+                data.append({**m, "id": adv})
+                existing.add(adv)
         self._raw(200, "application/json", json.dumps(base).encode("utf-8"))
         return True
 
diff --git a/test_proxy.py b/test_proxy.py
index 18ec568..79922a6 100755
--- a/test_proxy.py
+++ b/test_proxy.py
@@ -263,21 +263,6 @@ def main():
         assert up._expand_env("Bearer ${MOCK_KEY}") == "Bearer secret123"
         print("[ok] ${ENV} expansion in route auth")
 
-        # PR #8 companion: Claude Code's [1m] context-window suffix on the model id
-        # is stripped before routing, so "<id>[1m]" still matches "<id>"'s route
-        # (the 1M window itself rides the context-1m beta header, not the id). A
-        # naive exact-match lookup would otherwise miss the route once the launcher
-        # appends [1m] to a 1M-capable Claude pick.
-        _saved_slots = up.UC_SLOT_MAP
-        up.UC_SLOT_MAP = {"claude-big": {"type": "openai_compat", "model": "big-real",
-                                         "upstream": mock + "/v1", "auth": "Bearer ${MOCK_KEY}"}}
-        out_1m, _ = up.transform_messages_body(json.dumps({
-            "model": "claude-big[1m]", "max_tokens": 8,
-            "messages": [{"role": "user", "content": "hi"}]}).encode())
-        assert json.loads(out_1m)["model"] == "big-real", json.loads(out_1m)["model"]
-        up.UC_SLOT_MAP = _saved_slots
-        print("[ok] 1M [1m] window suffix stripped before routing")
-
         # Stock Claude models: the built-in fallback so real Claude stays in
         # /model even with no upstream list. Toggle + override are honored, and
         # every advertised id obeys Claude Code's /^(claude|anthropic)/i rule.
@@ -457,6 +442,26 @@ def _pin(text):
         # plan-mode detection drives the optional planner auto-route
         assert up._is_plan_mode({"tools": [{"name": "ExitPlanMode"}]}) is True
         assert up._is_plan_mode({"tools": [{"name": "Bash"}]}) is False
+        # 1M context-window suffix: "<id>[1m]" is stripped before routing, so it
+        # resolves to "<id>"'s route (the 1M window itself rides the beta header).
+        out1m, _ = up.transform_messages_body(json.dumps({
+            "model": "claude-composer[1m]", "max_tokens": 16,
+            "messages": [{"role": "user", "content": "hi"}]}).encode())
+        assert json.loads(out1m)["model"] == "cursor/composer-2.5", json.loads(out1m)["model"]
+        # advertise [1m] on a real-Claude PASSTHROUGH route to a 1M model, so the
+        # /model picker id carries it and Claude Code renders 1M even on in-session
+        # switches; worker + non-passthrough entries are left untouched
+        assert up._advertise_id({"id": "claude-opus"}) == "claude-opus[1m]"
+        assert up._advertise_id({"id": "claude-composer"}) == "claude-composer"   # openai_compat
+        assert up._advertise_id({"id": "claude-worker-opus"}) == "claude-worker-opus"
+        assert up._strip_1m("claude-opus[1m]") == "claude-opus"
+        # a [1m]-suffixed pick still routes to its clean route (selection normalized)
+        up._ACTIVE.update({"orch": None, "worker": None, "worker_explicit": False})
+        out_adv, _ = up.transform_messages_body(json.dumps({
+            "model": "claude-opus[1m]", "max_tokens": 16,
+            "messages": [{"role": "user", "content": "hi"}]}).encode())
+        assert json.loads(out_adv)["model"] == "claude-opus-4-8", json.loads(out_adv)["model"]
+        up._ACTIVE.update({"orch": None, "worker": None, "worker_explicit": False})
         # a name that maps to TWO routes (gpt-5.5 head AND a gpt-oss model head) is
         # dropped as ambiguous -> resolves to nothing (regression for the docs/gpt gap)
         _slots0, _models0 = up.UC_SLOT_MAP, up.UC_MODELS
@@ -485,7 +490,7 @@ def _pin(text):
         assert _pin("@composer do it")[0] is None
         up.UC_SLOT_MAP, up.UC_MODELS, up._ROUTE_ALIASES, up.DIRECTIVES = (
             _saved[0], _saved[1], _saved[2], _saved[3])
-        print("[ok] routing directives: opt-in default-off / NL opt-in / surgical strip / planner-gated / gpt-collision / dispatch")
+        print("[ok] routing directives: opt-in default-off / NL opt-in / surgical strip / planner-gated / gpt-collision / dispatch / [1m] strip + advertise")
 
         # issue #3: a rejected tool call (with or without a comment) must not leave
         # an assistant tool_calls message unanswered, and tool replies must come
diff --git a/windows/Start-UltraCode.ps1 b/windows/Start-UltraCode.ps1
index a1d5a3a..14150a3 100644
--- a/windows/Start-UltraCode.ps1
+++ b/windows/Start-UltraCode.ps1
@@ -98,7 +98,7 @@ function Add-Uc1m {
     if ([string]::IsNullOrEmpty($ModelId)) { return $ModelId }
     if ($ModelId.Contains("[1m]")) { return $ModelId }
     $set = if ($env:UC_1M_MODELS) { $env:UC_1M_MODELS }
-           else { "claude-opus-4-8,claude-opus-4-7,claude-opus-4-6,claude-sonnet-4-6" }
+           else { "claude-opus-4-8,claude-opus-4-7,claude-opus-4-6,claude-sonnet-4-6,claude-opus" }
     foreach ($id in $set.Split(",")) {
         if ($ModelId -eq $id.Trim()) { return "${ModelId}[1m]" }
     }