diff --git a/src/ucode/agents/claude.py b/src/ucode/agents/claude.py
index d0d0380..1afe962 100644
--- a/src/ucode/agents/claude.py
+++ b/src/ucode/agents/claude.py
@@ -62,7 +62,11 @@ def _resolve_web_search_model(state: dict) -> str | None:
 
 
 WEB_SEARCH_MCP_NAME = "web_search"
-_CLAUDE_MODEL_RE = re.compile(r"^databricks-claude-(opus|sonnet)-(\d+)-(\d+)(.*)$")
+# Matches both the AI Gateway form (`databricks-claude-opus-4-8`) and the UC
+# model-services form (`system.ai.claude-opus-4-8`).
+_CLAUDE_MODEL_RE = re.compile(
+    r"^(?:system\.ai\.)?(?:databricks-)?claude-(opus|sonnet)-(\d+)-(\d+)(.*)$"
+)
 
 # Env keys the MLflow Stop hook reads to route traces. Written into the
 # settings `env` block alongside the hook itself.
diff --git a/src/ucode/agents/codex.py b/src/ucode/agents/codex.py
index e0bb64b..5a64d4b 100644
--- a/src/ucode/agents/codex.py
+++ b/src/ucode/agents/codex.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import json
 import os
 import re
 from pathlib import Path
@@ -13,6 +14,7 @@
     backup_existing_file,
     deep_merge_dict,
     read_toml_safe,
+    write_text_file,
     write_toml_file,
 )
 from ucode.databricks import (
@@ -29,6 +31,14 @@
 CODEX_BACKUP_PATH = APP_DIR / "codex-ucode-config.backup.toml"
 LEGACY_CODEX_CONFIG_PATH = CODEX_CONFIG_DIR / "config.toml"
 LEGACY_CODEX_BACKUP_PATH = APP_DIR / "codex-config.backup.toml"
+# Static model catalog written when the workspace uses UC model-services
+# discovery. Pointing Codex at this file via `model_catalog_json` switches it
+# to `StaticModelsManager`, which bypasses the `GET /v1/models` listing the
+# AI Gateway currently rejects for `system.ai.*` (it requires a
+# `Databricks-Model-Provider-Service` header that Codex doesn't send). Schema
+# documented at openai/codex#14757; ucode emits the minimum required fields
+# and lets Codex fill in the rest from `model_info_from_slug`-style defaults.
+CODEX_MODEL_CATALOG_PATH = APP_DIR / "codex-model-catalog.json"
 CODEX_MODEL_PROVIDER_NAME = "ucode-databricks"
 MINIMUM_CODEX_VERSION = (0, 134, 0)
 MINIMUM_CODEX_VERSION_TEXT = "0.134.0"
@@ -45,6 +55,7 @@
 MANAGED_KEYS: list[list[str]] = [
     ["model_provider"],
     ["model"],
+    ["model_catalog_json"],
     ["model_providers", CODEX_MODEL_PROVIDER_NAME],
     ["model_providers", CODEX_MODEL_PROVIDER_NAME, "http_headers"],
 ]
@@ -120,12 +131,111 @@ def _provider_block(workspace: str, databricks_profile: str | None) -> dict:
     }
 
 
+def _model_catalog_entry(slug: str) -> dict:
+    """Minimum-viable Codex `ModelInfo` for a UC model-services slug.
+
+    Codex's `model_catalog_json` deserializer requires every non-`#[serde(default)]`
+    field to be present (Option-typed fields accept `null`). We surface the
+    minimum required keys plus a few optional ones (`context_window`,
+    `web_search_tool_type`, `input_modalities`) chosen to match Codex's own
+    fallback defaults so behaviour matches what users would have got from the
+    `GET /v1/models` listing.
+
+    Schema reference: openai/codex protocol/src/openai_models.rs:257-315 and
+    issue openai/codex#14757 for the field-by-field contract.
+    """
+    return {
+        "slug": slug,
+        "display_name": slug,
+        "description": None,
+        # Empty list keeps the picker quiet without claiming reasoning support.
+        "supported_reasoning_levels": [],
+        "shell_type": "default",
+        "visibility": "list",
+        "supported_in_api": True,
+        # Identical priority across entries — the picker breaks ties by slug
+        # which matches the alphabetic order users already see today.
+        "priority": 10,
+        "availability_nux": None,
+        "upgrade": None,
+        # Empty `base_instructions` leaves the bundled system prompt in place
+        # (Codex falls back to its default when the field is empty after
+        # personality substitution; see client.rs reasoning-field gating notes).
+        "base_instructions": "",
+        # Conservative defaults: don't claim reasoning summaries or verbosity
+        # support, since the gateway-fronted models may not implement either.
+        # Users can still send the request; this just keeps Codex from
+        # serializing fields the model can't honour.
+        "supports_reasoning_summaries": False,
+        "support_verbosity": False,
+        "default_verbosity": None,
+        # `freeform` enables apply_patch with the lark grammar that GPT-5
+        # variants are trained on. Setting this to null would silently drop
+        # apply_patch from the tool list, which is a worse UX than a runtime
+        # error if a particular model doesn't support it.
+        "apply_patch_tool_type": "freeform",
+        "truncation_policy": {"mode": "bytes", "limit": 10000},
+        "supports_parallel_tool_calls": True,
+        "experimental_supported_tools": [],
+    }
+
+
+def build_model_catalog(codex_models: list[str] | None) -> dict | None:
+    """Build a `{"models": [...]}` document, or None if there are no entries.
+
+    Codex rejects empty catalogs at startup, so we return None (and therefore
+    skip writing the file at all) when discovery returned no GPT models.
+    """
+    if not codex_models:
+        return None
+    return {"models": [_model_catalog_entry(slug) for slug in codex_models]}
+
+
+def _write_model_catalog_file(state: dict) -> Path | None:
+    """Materialise the static catalog when this workspace uses model-services.
+
+    Returns the catalog path so the caller can wire it into the toml overlay.
+    Returns None when the workspace is on the AI-gateway path, in which case
+    we also remove any stale catalog left over from a previous configure run.
+    """
+    if not state.get("use_model_services"):
+        if CODEX_MODEL_CATALOG_PATH.exists():
+            try:
+                CODEX_MODEL_CATALOG_PATH.unlink()
+            except OSError:
+                # Non-fatal — Codex just re-fetches via /v1/models when
+                # `model_catalog_json` is unset. Leave the stale file alone.
+                pass
+        return None
+    catalog = build_model_catalog(state.get("codex_models"))
+    if catalog is None:
+        return None
+    write_text_file(CODEX_MODEL_CATALOG_PATH, json.dumps(catalog, indent=2))
+    return CODEX_MODEL_CATALOG_PATH
+
+
+def revert_model_catalog_file() -> bool:
+    """Delete the static catalog written during configure. Idempotent."""
+    if not CODEX_MODEL_CATALOG_PATH.exists():
+        return False
+    try:
+        CODEX_MODEL_CATALOG_PATH.unlink()
+        return True
+    except OSError:
+        return False
+
+
 def render_overlay(
-    workspace: str, model: str | None = None, databricks_profile: str | None = None
+    workspace: str,
+    model: str | None = None,
+    databricks_profile: str | None = None,
+    model_catalog_path: Path | None = None,
 ) -> dict:
     overlay: dict = {"model_provider": CODEX_MODEL_PROVIDER_NAME}
     if model:
         overlay["model"] = model
+    if model_catalog_path is not None:
+        overlay["model_catalog_json"] = str(model_catalog_path)
     overlay["model_providers"] = {
         CODEX_MODEL_PROVIDER_NAME: _provider_block(workspace, databricks_profile),
     }
@@ -255,6 +365,10 @@ def _openai_model_id(model: str | None) -> str | None:
 
 
 def _codex_model_id(model: str | None) -> str | None:
+    # UC model-services ids (`system.ai.gpt-5`) route by name through the
+    # gateway, so they must be sent verbatim — not rewritten to an OpenAI id.
+    if model and model.startswith("system.ai."):
+        return model
     if model in CODEX_OPENAI_ID_INCOMPATIBLE_MODELS:
         return model
     return _openai_model_id(model)
@@ -263,7 +377,12 @@ def _codex_model_id(model: str | None) -> str | None:
 def _parse_gpt(model: str | None) -> tuple[int, int | None, int | None, str] | None:
     if not model:
         return None
-    match = _GPT_RE.fullmatch(model.split("/")[-1])
+    # Strip the UC model-services prefix so `system.ai.gpt-5` parses for version
+    # selection; the original id is preserved by callers that need it verbatim.
+    tail = model.split("/")[-1]
+    if tail.startswith("system.ai."):
+        tail = tail[len("system.ai.") :]
+    match = _GPT_RE.fullmatch(tail)
     if not match:
         return None
     major, minor, patch, suffix = match.groups()
@@ -296,8 +415,19 @@ def write_tool_config(state: dict, model: str | None = None) -> dict:
 
     _remove_legacy_ucode_profile()
     backup_existing_file(CODEX_CONFIG_PATH, CODEX_BACKUP_PATH)
-    overlay = render_overlay(workspace, chosen_model, databricks_profile)
+    # Static catalog written iff the workspace is on UC model-services. When
+    # set, Codex switches to StaticModelsManager and never calls /v1/models —
+    # which is currently rejected by the AI Gateway for `system.ai.*` ids.
+    catalog_path = _write_model_catalog_file(state)
+    overlay = render_overlay(
+        workspace, chosen_model, databricks_profile, model_catalog_path=catalog_path
+    )
     doc = read_toml_safe(CODEX_CONFIG_PATH)
+    # Strip a stale `model_catalog_json` if we're no longer in model-services
+    # mode — `deep_merge_dict` only adds/overwrites keys, it won't clear one
+    # that the new overlay omits.
+    if catalog_path is None:
+        doc.pop("model_catalog_json", None)
     deep_merge_dict(doc, overlay)
     write_toml_file(CODEX_CONFIG_PATH, doc)
     state = mark_tool_managed(state, "codex", MANAGED_KEYS)
diff --git a/src/ucode/cli.py b/src/ucode/cli.py
index c363e22..659e916 100644
--- a/src/ucode/cli.py
+++ b/src/ucode/cli.py
@@ -25,7 +25,10 @@
 from ucode.agents import (
     launch as launch_agent,
 )
-from ucode.agents.codex import revert_legacy_shared_config
+from ucode.agents.codex import (
+    revert_legacy_shared_config,
+    revert_model_catalog_file,
+)
 from ucode.agents.pi import PI_SETTINGS_BACKUP_PATH, PI_SETTINGS_PATH
 from ucode.config_io import restore_file, set_dry_run
 from ucode.databricks import (
@@ -33,6 +36,7 @@
     discover_claude_models,
     discover_codex_models,
     discover_gemini_models,
+    discover_model_services,
     ensure_ai_gateway_v2,
     ensure_databricks_auth,
     find_profile_name_for_host,
@@ -41,6 +45,7 @@
     install_databricks_cli,
     normalize_workspace_url,
     run_databricks_login,
+    use_model_services,
 )
 from ucode.mcp import (
     MCP_CLIENTS,
@@ -160,7 +165,13 @@ def configure_shared_state(
     don't error out. If ``None``, we resolve it from the host after login.
     """
     workspace = normalize_workspace_url(workspace)
-    previous_workspace = load_state().get("workspace")
+    prior_state = load_state()
+    previous_workspace = prior_state.get("workspace")
+    # The flag is sticky: an explicit env var wins, otherwise fall back to what
+    # was persisted when the workspace was configured. Without this, every
+    # launch re-runs discovery and a missing env var would silently revert a
+    # model-services workspace to the databricks-* gateway names.
+    model_services = use_model_services(default=bool(prior_state.get("use_model_services")))
     fetch_all = tools is None
     if force_login:
         run_databricks_login(workspace, profile)
@@ -184,19 +195,29 @@ def configure_shared_state(
     claude_reason: str | None = None
     gemini_reason: str | None = None
     codex_reason: str | None = None
-    with spinner("Fetching available models..."):
+    claude_models = {}
+    gemini_models = []
+    codex_models = []
+    if model_services:
+        # Opt-in: one UC model-services call yields all families as
+        # `system.ai.<model-name>` ids, bucketed by name. The single reason is
+        # shared across the families that were requested.
+        with spinner("Fetching available models (model services)..."):
+            ms_claude, ms_codex, ms_gemini, ms_reason = discover_model_services(workspace, token)
         if want_claude:
-            claude_models, claude_reason = discover_claude_models(workspace, token)
-        else:
-            claude_models = {}
+            claude_models, claude_reason = ms_claude, ms_reason
         if want_gemini:
-            gemini_models, gemini_reason = discover_gemini_models(workspace, token)
-        else:
-            gemini_models = []
+            gemini_models, gemini_reason = ms_gemini, ms_reason
         if want_codex:
-            codex_models, codex_reason = discover_codex_models(workspace, token)
-        else:
-            codex_models = []
+            codex_models, codex_reason = ms_codex, ms_reason
+    else:
+        with spinner("Fetching available models..."):
+            if want_claude:
+                claude_models, claude_reason = discover_claude_models(workspace, token)
+            if want_gemini:
+                gemini_models, gemini_reason = discover_gemini_models(workspace, token)
+            if want_codex:
+                codex_models, codex_reason = discover_codex_models(workspace, token)
     opencode_models: dict[str, list[str]] = {}
     if claude_models:
         opencode_models["anthropic"] = list(claude_models.values())
@@ -210,6 +231,9 @@ def configure_shared_state(
         state["profile"] = profile
     else:
         state.pop("profile", None)
+    # Persist the resolved flag so subsequent launches stay on the same
+    # discovery path without the env var being re-exported.
+    state["use_model_services"] = model_services
     state["base_urls"] = build_shared_base_urls(workspace)
     if want_claude:
         state["claude_models"] = claude_models
@@ -371,6 +395,12 @@ def status() -> int:
     profile = state.get("profile")
     if profile:
         print_kv("CLI profile", profile)
+    print_kv(
+        "Model discovery",
+        "model-services (system.ai.*)"
+        if state.get("use_model_services")
+        else "ai-gateway (databricks-*)",
+    )
 
     print_heading("Coding Agents")
     for tool, spec in TOOL_SPECS.items():
@@ -444,6 +474,9 @@ def revert() -> int:
     # Older Codex (< 0.134.0) had ucode edit the shared ~/.codex/config.toml in
     # place; restoring the per-profile file above does not undo that.
     legacy_codex_stripped = revert_legacy_shared_config()
+    # The static model catalog (only written when the workspace uses UC
+    # model-services) lives outside the toml backup, so clean it up here.
+    codex_catalog_removed = revert_model_catalog_file()
     clear_state()
 
     print_heading("Revert")
@@ -452,6 +485,8 @@ def revert() -> int:
         print_kv(f"{spec['display']} config", "restored" if results[tool] else "unchanged")
     if legacy_codex_stripped:
         print_kv("Codex shared config", "ucode entries removed")
+    if codex_catalog_removed:
+        print_kv("Codex model catalog", "removed")
     print_kv("Pi settings", "restored" if pi_settings_restored else "unchanged")
     for client, spec in MCP_CLIENTS.items():
         print_kv(
diff --git a/src/ucode/databricks.py b/src/ucode/databricks.py
index 2d45feb..4202b94 100644
--- a/src/ucode/databricks.py
+++ b/src/ucode/databricks.py
@@ -17,7 +17,7 @@
 from typing import Literal, cast, overload
 from urllib import error as urllib_error
 from urllib import request as urllib_request
-from urllib.parse import urlparse
+from urllib.parse import urlencode, urlparse
 
 from databricks.sql.exc import ServerOperationError
 
@@ -977,6 +977,188 @@ def build_auth_shell_command(workspace: str, profile: str | None = None) -> str:
     )
 
 
+def use_model_services(default: bool = False) -> bool:
+    """True when the opt-in UC model-services discovery path is enabled.
+
+    Set ``UCODE_USE_MODEL_SERVICES=1`` (or true/yes/on) to discover models via
+    the Unity Catalog model-services API and address them as
+    ``system.ai.<model-name>`` instead of the per-family AI Gateway listings.
+
+    The env var, when set to any value, wins. ``default`` is the fallback used
+    when the env var is unset — callers pass the value persisted in state so a
+    workspace configured with the flag keeps using model services on later
+    launches without the env var being re-exported each time.
+    """
+    raw = os.environ.get("UCODE_USE_MODEL_SERVICES")
+    if raw is None or not raw.strip():
+        return default
+    return raw.strip().lower() in {"1", "true", "yes", "on"}
+
+
+# A model-service's `name` is `model-services/system.ai.<model-name>`; the
+# part after the prefix is exactly the model string agents send (no
+# `databricks-` infix — that only appears on the inner destination name).
+_MODEL_SERVICE_NAME_PREFIX = "model-services/"
+# The metastore-scope listing returns services from EVERY schema (e.g.
+# `main.user.foo`, `temp.*`, internal DLT schemas). We only want the
+# Databricks-managed foundation models under `system.ai`.
+_MODEL_SERVICE_REQUIRED_PREFIX = "system.ai."
+
+
+def _model_service_id(service: dict) -> str | None:
+    """Extract the `system.ai.<model-name>` id from one model-service entry.
+
+    Returns None for services in any other schema, so user/internal model
+    services don't leak into the family buckets."""
+    name = service.get("name")
+    if not isinstance(name, str):
+        return None
+    name = name.strip()
+    if name.startswith(_MODEL_SERVICE_NAME_PREFIX):
+        name = name[len(_MODEL_SERVICE_NAME_PREFIX) :]
+    if not name.startswith(_MODEL_SERVICE_REQUIRED_PREFIX):
+        return None
+    return name or None
+
+
+# The model-services metastore listing is slow and flaky — large pages
+# routinely 504 with `Timeout listing model services under metastore`. A small
+# page is far more likely to come back, and each page gets a few retries before
+# we give up.
+_MODEL_SERVICES_PAGE_SIZE = 10
+_MODEL_SERVICES_PAGE_RETRIES = 4
+
+
+def _get_model_services_page(
+    url: str, token: str, *, retries: int = _MODEL_SERVICES_PAGE_RETRIES
+) -> tuple[dict | list | None, str | None]:
+    """GET one model-services page, retrying on failure.
+
+    The endpoint frequently 504s under load; a retry usually succeeds. Returns
+    the same (payload, reason) shape as ``_http_get_json`` — the last attempt's
+    result when all retries are exhausted."""
+    payload: dict | list | None = None
+    reason: str | None = None
+    for attempt in range(retries):
+        payload, reason = _http_get_json(url, token, timeout=30)
+        if payload is not None:
+            return payload, None
+        _debug("model-services page", f"attempt {attempt + 1}/{retries} failed: {reason}")
+    return payload, reason
+
+
+def list_model_services(
+    workspace: str,
+    token: str,
+    *,
+    page_size: int = _MODEL_SERVICES_PAGE_SIZE,
+    max_pages: int = 100,
+) -> tuple[list[str], str | None]:
+    """List all `system.ai.*` model ids via the UC model-services API.
+
+    Pages through ``/api/2.1/unity-catalog/model-services`` (metastore scope)
+    and returns the de-duplicated, sorted list of ``system.ai.<model-name>``
+    ids. Uses a small page size with per-page retries because the endpoint is
+    slow and frequently 504s. Returns (ids, reason); reason is None on success,
+    otherwise it describes why the list is empty (HTTP/network error or no
+    services).
+    """
+    hostname = workspace_hostname(workspace)
+    ids: list[str] = []
+    page_token: str | None = None
+    seen_tokens: set[str] = set()
+    last_reason: str | None = None
+    for _ in range(max_pages):
+        params: dict[str, str] = {"page_size": str(page_size)}
+        if page_token:
+            params["page_token"] = page_token
+        url = f"https://{hostname}/api/2.1/unity-catalog/model-services?{urlencode(params)}"
+        payload, reason = _get_model_services_page(url, token)
+        if payload is None:
+            # Mid-pagination failure: keep whatever we collected, but propagate
+            # the failure reason so callers can warn the user that the list is
+            # truncated.
+            last_reason = reason
+            break
+        data = cast(dict, payload) if isinstance(payload, dict) else {}
+        # `dict.get(key, default)` returns the present value even if it's None.
+        services = data.get("model_services") or []
+        for service in services:
+            if isinstance(service, dict):
+                model_id = _model_service_id(service)
+                if model_id:
+                    ids.append(model_id)
+        page_token = data.get("next_page_token") or None
+        if not page_token:
+            last_reason = None
+            break
+        if page_token in seen_tokens:
+            break
+        seen_tokens.add(page_token)
+
+    deduped = sorted(set(ids))
+    if deduped:
+        # Even on partial success, surface the reason so the caller can warn upstream.
+        return deduped, last_reason
+    if last_reason:
+        return [], last_reason
+    # Empty listing with no HTTP error. The metastore listing mixes
+    # `system.ai.*` foundation models with user-created services in
+    # non-deterministic order across page sizes (verified against
+    # e2-dogfood 2026-06-10), so distinguishing "saw 0 entries" from
+    # "saw N entries, all in user schemas" doesn't change what the user
+    # should do — retry, or verify foundation models are provisioned.
+    return [], "no `system.ai.*` model services found"
+
+
+def discover_model_services(
+    workspace: str, token: str
+) -> tuple[dict[str, str], list[str], list[str], str | None]:
+    """Discover models via UC model-services and bucket them by family name.
+
+    Returns (claude_models, codex_models, gemini_models, reason):
+
+    - ``claude_models`` maps ``opus``/``sonnet``/``haiku`` to the newest
+      matching ``system.ai.claude-*`` id (mirrors ``discover_claude_models``).
+    - ``codex_models`` is the list of ``system.ai.*gpt-*`` ids.
+    - ``gemini_models`` is the list of ``system.ai.*gemini-*`` ids, newest first.
+
+    ``reason`` is None on success, else explains why nothing was found. Family
+    bucketing is by name substring because the model-services API does not
+    expose per-model API dialects.
+    """
+    ids, reason = list_model_services(workspace, token)
+    if not ids:
+        return {}, [], [], reason
+
+    claude_models: dict[str, str] = {}
+    for family in ("opus", "sonnet", "haiku"):
+        candidates = sorted(
+            [m for m in ids if f"claude-{family}-" in m],
+            reverse=True,
+        )
+        if candidates:
+            claude_models[family] = candidates[0]
+
+    codex_models = [m for m in ids if "gpt-" in m]
+    gemini_models = sorted([m for m in ids if "gemini-" in m], key=model_version_sort_key)
+
+    if not (claude_models or codex_models or gemini_models):
+        sample = ", ".join(ids[:5])
+        return (
+            {},
+            [],
+            [],
+            (
+                "model-services returned model ids but none matched "
+                f"claude/gpt/gemini families (got: {sample})"
+            ),
+        )
+    # Pass `reason` through even on success — `list_model_services` sets it on
+    # partial pagination so the CLI layer can warn about truncation.
+    return claude_models, codex_models, gemini_models, reason
+
+
 def discover_claude_models(workspace: str, token: str) -> tuple[dict[str, str], str | None]:
     """Discover Claude families on this workspace's AI Gateway.
 
diff --git a/src/ucode/usage.py b/src/ucode/usage.py
index ab9a0c4..2ce8e7b 100644
--- a/src/ucode/usage.py
+++ b/src/ucode/usage.py
@@ -172,9 +172,14 @@ def simplify_model_name(tool: str, model_name: str) -> str:
     if not normalized:
         return "-"
 
-    prefix = "databricks-"
-    if normalized.startswith(prefix):
-        normalized = normalized[len(prefix) :]
+    # Strip whichever family prefix is in use so usage rows stay consistent
+    # regardless of whether a workspace uses the AI Gateway path
+    # (`databricks-claude-...`) or the UC model-services path
+    # (`system.ai.claude-...`). Order doesn't matter — only one will match.
+    for prefix in ("databricks-", "system.ai."):
+        if normalized.startswith(prefix):
+            normalized = normalized[len(prefix) :]
+            break
 
     tool_prefixes = {
         "claude": "claude-",
diff --git a/tests/test_agent_claude.py b/tests/test_agent_claude.py
index ea33c63..9888efd 100644
--- a/tests/test_agent_claude.py
+++ b/tests/test_agent_claude.py
@@ -41,6 +41,14 @@ def test_does_not_duplicate_1m_suffix(self):
         overlay, _ = claude.render_overlay(WS, "databricks-claude-opus-4-7[1m]")
         assert overlay["env"]["ANTHROPIC_MODEL"] == "databricks-claude-opus-4-7[1m]"
 
+    def test_adds_1m_suffix_for_model_services_name(self):
+        overlay, _ = claude.render_overlay(WS, "system.ai.claude-opus-4-8")
+        assert overlay["env"]["ANTHROPIC_MODEL"] == "system.ai.claude-opus-4-8[1m]"
+
+    def test_no_1m_suffix_for_model_services_haiku(self):
+        overlay, _ = claude.render_overlay(WS, "system.ai.claude-haiku-4-6")
+        assert overlay["env"]["ANTHROPIC_MODEL"] == "system.ai.claude-haiku-4-6"
+
     def test_sets_anthropic_base_url(self):
         overlay, _ = claude.render_overlay(WS, "s4")
         assert overlay["env"]["ANTHROPIC_BASE_URL"] == f"{WS}/ai-gateway/anthropic"
diff --git a/tests/test_agent_codex.py b/tests/test_agent_codex.py
index b84b667..5772960 100644
--- a/tests/test_agent_codex.py
+++ b/tests/test_agent_codex.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import json
 import os
 
 from ucode.agents import codex
@@ -337,6 +338,17 @@ def test_openai_model_id_maps_databricks_naming(self):
     def test_codex_model_id_preserves_openai_incompatible_models(self):
         assert codex._codex_model_id("databricks-gpt-5-2-codex") == "databricks-gpt-5-2-codex"
         assert codex._codex_model_id("databricks-gpt-5-4-nano") == "databricks-gpt-5-4-nano"
+
+    def test_codex_model_id_passes_model_services_id_verbatim(self):
+        # UC model-services ids route by name, so they must not be rewritten
+        # to the OpenAI id form.
+        assert codex._codex_model_id("system.ai.gpt-5") == "system.ai.gpt-5"
+        assert codex._codex_model_id("system.ai.gpt-5-2-codex") == "system.ai.gpt-5-2-codex"
+
+    def test_default_model_selects_model_services_gpt(self):
+        models = ["system.ai.gpt-5", "system.ai.gpt-5-5", "system.ai.claude-opus-4-8"]
+
+        assert codex.default_model({"codex_models": models}) == "system.ai.gpt-5-5"
         assert codex._codex_model_id("databricks-gpt-5-5") == "gpt-5.5"
 
 
@@ -385,3 +397,230 @@ def fake_execvp(binary: str, args: list[str]) -> None:
 
         assert os.environ["OAUTH_TOKEN"] == "fresh-token"
         assert exec_calls == [("codex", ["codex", "--profile", "ucode", "--search"])]
+
+
+class TestBuildModelCatalog:
+    def test_returns_none_when_no_models(self):
+        # Codex rejects empty catalogs at startup, so we must skip writing the
+        # file entirely rather than emit `{"models": []}`.
+        assert codex.build_model_catalog([]) is None
+        assert codex.build_model_catalog(None) is None
+
+    def test_emits_one_entry_per_model(self):
+        catalog = codex.build_model_catalog(["system.ai.gpt-5", "system.ai.gpt-5-5"])
+
+        slugs = [entry["slug"] for entry in catalog["models"]]
+        assert slugs == ["system.ai.gpt-5", "system.ai.gpt-5-5"]
+
+    def test_each_entry_has_required_keys(self):
+        # The Codex protocol demands every non-`#[serde(default)]` field be
+        # present; an omission causes Codex to refuse to start. Guard the
+        # contract so a future field rename doesn't quietly break configure.
+        catalog = codex.build_model_catalog(["system.ai.gpt-5"])
+        entry = catalog["models"][0]
+
+        for key in (
+            "slug",
+            "display_name",
+            "description",
+            "supported_reasoning_levels",
+            "shell_type",
+            "visibility",
+            "supported_in_api",
+            "priority",
+            "availability_nux",
+            "upgrade",
+            "base_instructions",
+            "supports_reasoning_summaries",
+            "support_verbosity",
+            "default_verbosity",
+            "apply_patch_tool_type",
+            "truncation_policy",
+            "supports_parallel_tool_calls",
+            "experimental_supported_tools",
+        ):
+            assert key in entry, f"missing required ModelInfo field: {key}"
+
+    def test_truncation_policy_is_well_formed(self):
+        # Codex requires both `mode` and `limit`; a malformed sub-struct fails
+        # the whole catalog load with `unknown variant ...`.
+        entry = codex.build_model_catalog(["system.ai.gpt-5"])["models"][0]
+
+        assert entry["truncation_policy"] == {"mode": "bytes", "limit": 10000}
+
+    def test_visibility_is_protocol_compliant(self):
+        # `list` / `hide` / `none` are the only accepted strings — `custom` and
+        # other values fail catalog deserialization.
+        entry = codex.build_model_catalog(["system.ai.gpt-5"])["models"][0]
+
+        assert entry["visibility"] in {"list", "hide", "none"}
+
+    def test_apply_patch_uses_freeform_for_codex_models(self):
+        # GPT-5 variants ship with the lark-grammar apply_patch tool; we keep
+        # it on so users see the same agent capabilities regardless of which
+        # discovery path their workspace uses.
+        entry = codex.build_model_catalog(["system.ai.gpt-5"])["models"][0]
+
+        assert entry["apply_patch_tool_type"] == "freeform"
+
+
+class TestModelCatalogFile:
+    def _patch_paths(self, tmp_path, monkeypatch):
+        catalog_path = tmp_path / "codex-model-catalog.json"
+        monkeypatch.setattr(codex, "CODEX_MODEL_CATALOG_PATH", catalog_path)
+        return catalog_path
+
+    def test_skips_file_when_use_model_services_false(self, tmp_path, monkeypatch):
+        catalog_path = self._patch_paths(tmp_path, monkeypatch)
+
+        result = codex._write_model_catalog_file(
+            {"codex_models": ["databricks-gpt-5"], "use_model_services": False}
+        )
+
+        assert result is None
+        assert not catalog_path.exists()
+
+    def test_skips_file_when_no_codex_models(self, tmp_path, monkeypatch):
+        # `use_model_services=True` without any GPT models would produce an
+        # empty catalog, which Codex rejects. Don't write the file at all.
+        catalog_path = self._patch_paths(tmp_path, monkeypatch)
+
+        result = codex._write_model_catalog_file({"codex_models": [], "use_model_services": True})
+
+        assert result is None
+        assert not catalog_path.exists()
+
+    def test_writes_catalog_when_use_model_services_true(self, tmp_path, monkeypatch):
+        catalog_path = self._patch_paths(tmp_path, monkeypatch)
+
+        result = codex._write_model_catalog_file(
+            {"codex_models": ["system.ai.gpt-5", "system.ai.gpt-5-5"], "use_model_services": True}
+        )
+
+        assert result == catalog_path
+        catalog = json.loads(catalog_path.read_text(encoding="utf-8"))
+        slugs = [entry["slug"] for entry in catalog["models"]]
+        assert slugs == ["system.ai.gpt-5", "system.ai.gpt-5-5"]
+
+    def test_removes_stale_catalog_when_toggling_off_model_services(self, tmp_path, monkeypatch):
+        # If a workspace previously had `UCODE_USE_MODEL_SERVICES=1` and now
+        # doesn't, the old catalog must go away — otherwise `model_catalog_json`
+        # in the toml would still resolve and Codex would silently pin the
+        # stale model list.
+        catalog_path = self._patch_paths(tmp_path, monkeypatch)
+        catalog_path.write_text('{"models": []}', encoding="utf-8")
+
+        result = codex._write_model_catalog_file(
+            {"codex_models": ["databricks-gpt-5"], "use_model_services": False}
+        )
+
+        assert result is None
+        assert not catalog_path.exists()
+
+
+class TestRevertModelCatalogFile:
+    def test_returns_false_when_no_catalog(self, tmp_path, monkeypatch):
+        catalog_path = tmp_path / "codex-model-catalog.json"
+        monkeypatch.setattr(codex, "CODEX_MODEL_CATALOG_PATH", catalog_path)
+
+        assert codex.revert_model_catalog_file() is False
+
+    def test_removes_existing_catalog(self, tmp_path, monkeypatch):
+        catalog_path = tmp_path / "codex-model-catalog.json"
+        catalog_path.write_text('{"models": []}', encoding="utf-8")
+        monkeypatch.setattr(codex, "CODEX_MODEL_CATALOG_PATH", catalog_path)
+
+        assert codex.revert_model_catalog_file() is True
+        assert not catalog_path.exists()
+
+
+class TestRenderOverlayWithModelCatalog:
+    def test_omits_model_catalog_json_by_default(self):
+        # AI-gateway path: Codex's `OpenAiModelsManager` should hit /v1/models
+        # the way it always has.
+        overlay = codex.render_overlay(WS, "databricks-gpt-5")
+
+        assert "model_catalog_json" not in overlay
+
+    def test_includes_model_catalog_json_when_path_provided(self, tmp_path):
+        # model-services path: forcing `StaticModelsManager` is the whole
+        # point of this knob, so the overlay must surface the path string.
+        catalog_path = tmp_path / "codex-model-catalog.json"
+
+        overlay = codex.render_overlay(WS, "system.ai.gpt-5", model_catalog_path=catalog_path)
+
+        assert overlay["model_catalog_json"] == str(catalog_path)
+
+
+class TestWriteToolConfigModelCatalog:
+    def test_writes_model_catalog_when_use_model_services_true(self, tmp_path, monkeypatch):
+        config_path = tmp_path / ".codex" / "ucode.config.toml"
+        backup_path = tmp_path / "codex-ucode-config.backup.toml"
+        catalog_path = tmp_path / "codex-model-catalog.json"
+        monkeypatch.setattr(codex, "CODEX_CONFIG_PATH", config_path)
+        monkeypatch.setattr(codex, "CODEX_BACKUP_PATH", backup_path)
+        monkeypatch.setattr(codex, "CODEX_MODEL_CATALOG_PATH", catalog_path)
+        monkeypatch.setattr(codex, "agent_version", lambda binary: "0.134.0")
+        monkeypatch.setattr(codex, "save_state", lambda state: None)
+
+        codex.write_tool_config(
+            {
+                "workspace": WS,
+                "codex_models": ["system.ai.gpt-5", "system.ai.gpt-5-5"],
+                "use_model_services": True,
+            }
+        )
+
+        doc = read_toml_safe(config_path)
+        assert doc["model_catalog_json"] == str(catalog_path)
+        catalog = json.loads(catalog_path.read_text(encoding="utf-8"))
+        assert {entry["slug"] for entry in catalog["models"]} == {
+            "system.ai.gpt-5",
+            "system.ai.gpt-5-5",
+        }
+
+    def test_omits_model_catalog_json_when_ai_gateway(self, tmp_path, monkeypatch):
+        config_path = tmp_path / ".codex" / "ucode.config.toml"
+        backup_path = tmp_path / "codex-ucode-config.backup.toml"
+        catalog_path = tmp_path / "codex-model-catalog.json"
+        monkeypatch.setattr(codex, "CODEX_CONFIG_PATH", config_path)
+        monkeypatch.setattr(codex, "CODEX_BACKUP_PATH", backup_path)
+        monkeypatch.setattr(codex, "CODEX_MODEL_CATALOG_PATH", catalog_path)
+        monkeypatch.setattr(codex, "agent_version", lambda binary: "0.134.0")
+        monkeypatch.setattr(codex, "save_state", lambda state: None)
+
+        codex.write_tool_config({"workspace": WS, "codex_models": ["databricks-gpt-5"]})
+
+        doc = read_toml_safe(config_path)
+        assert "model_catalog_json" not in doc
+        assert not catalog_path.exists()
+
+    def test_clears_stale_model_catalog_json_when_toggling_off(self, tmp_path, monkeypatch):
+        # Re-running `ucode configure` after dropping
+        # `UCODE_USE_MODEL_SERVICES=1` must not leave the static-catalog
+        # pointer behind — otherwise Codex still pins the cached list.
+        config_path = tmp_path / ".codex" / "ucode.config.toml"
+        backup_path = tmp_path / "codex-ucode-config.backup.toml"
+        catalog_path = tmp_path / "codex-model-catalog.json"
+        config_path.parent.mkdir(parents=True)
+        config_path.write_text(
+            'model_catalog_json = "/tmp/old.json"\nmodel = "system.ai.gpt-5"\n',
+            encoding="utf-8",
+        )
+        monkeypatch.setattr(codex, "CODEX_CONFIG_PATH", config_path)
+        monkeypatch.setattr(codex, "CODEX_BACKUP_PATH", backup_path)
+        monkeypatch.setattr(codex, "CODEX_MODEL_CATALOG_PATH", catalog_path)
+        monkeypatch.setattr(codex, "agent_version", lambda binary: "0.134.0")
+        monkeypatch.setattr(codex, "save_state", lambda state: None)
+
+        codex.write_tool_config({"workspace": WS, "codex_models": ["databricks-gpt-5"]})
+
+        doc = read_toml_safe(config_path)
+        assert "model_catalog_json" not in doc
+
+
+class TestManagedKeysModelCatalog:
+    def test_managed_keys_include_model_catalog_json(self):
+        # Revert must strip `model_catalog_json` out of the toml; if it
+        # vanishes from MANAGED_KEYS the cleanup silently regresses.
+        assert ["model_catalog_json"] in codex.MANAGED_KEYS
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 9809d1a..e9355b6 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -196,6 +196,27 @@ def test_shows_mcp_servers_configured_by_ucode(self):
         assert "MCP Server:" not in result.output
         assert "Configured tools:" not in result.output
 
+    def test_shows_ai_gateway_discovery_when_flag_unset(self):
+        with patch("ucode.cli.load_state", return_value=MINIMAL_STATE):
+            result = runner.invoke(app, ["status"])
+
+        assert result.exit_code == 0, result.output
+        assert "Model discovery:" in result.output
+        assert "ai-gateway" in result.output
+        assert "system.ai" not in result.output
+
+    def test_shows_model_services_discovery_when_flag_set(self):
+        # Surface the active discovery path so users don't have to read
+        # state.json to remember whether a workspace is on UC model-services.
+        state = {**MINIMAL_STATE, "use_model_services": True}
+        with patch("ucode.cli.load_state", return_value=state):
+            result = runner.invoke(app, ["status"])
+
+        assert result.exit_code == 0, result.output
+        assert "Model discovery:" in result.output
+        assert "model-services" in result.output
+        assert "system.ai" in result.output
+
     def test_status_treats_available_tools_as_configured_agents(self):
         state = {
             **MINIMAL_STATE,
diff --git a/tests/test_databricks.py b/tests/test_databricks.py
index d3feeba..3e02707 100644
--- a/tests/test_databricks.py
+++ b/tests/test_databricks.py
@@ -132,6 +132,249 @@ def test_selects_opus_4_8_when_advertised(self, monkeypatch):
         assert models["opus"] == "databricks-claude-opus-4-8"
 
 
+def _model_service(model_id: str) -> dict:
+    """A model-services entry whose `name` strips to `model_id`."""
+    return {"name": f"model-services/{model_id}"}
+
+
+class TestUseModelServices:
+    def test_off_by_default(self, monkeypatch):
+        monkeypatch.delenv("UCODE_USE_MODEL_SERVICES", raising=False)
+        assert db_mod.use_model_services() is False
+
+    def test_truthy_values_enable(self, monkeypatch):
+        for value in ("1", "true", "TRUE", "yes", "on"):
+            monkeypatch.setenv("UCODE_USE_MODEL_SERVICES", value)
+            assert db_mod.use_model_services() is True
+
+    def test_falsey_values_disable(self, monkeypatch):
+        # A non-empty, non-truthy value explicitly disables — even over a
+        # persisted default of True.
+        for value in ("0", "false", "no"):
+            monkeypatch.setenv("UCODE_USE_MODEL_SERVICES", value)
+            assert db_mod.use_model_services(default=True) is False
+
+    def test_unset_falls_back_to_default(self, monkeypatch):
+        # Sticky behavior: when the env var is unset (or blank), the persisted
+        # default decides.
+        monkeypatch.delenv("UCODE_USE_MODEL_SERVICES", raising=False)
+        assert db_mod.use_model_services(default=True) is True
+        assert db_mod.use_model_services(default=False) is False
+        monkeypatch.setenv("UCODE_USE_MODEL_SERVICES", "")
+        assert db_mod.use_model_services(default=True) is True
+
+    def test_env_var_overrides_default(self, monkeypatch):
+        monkeypatch.setenv("UCODE_USE_MODEL_SERVICES", "1")
+        assert db_mod.use_model_services(default=False) is True
+
+
+class TestDiscoverModelServices:
+    def test_buckets_families_by_name(self, monkeypatch):
+        payload = {
+            "model_services": [
+                _model_service("system.ai.claude-opus-4-7"),
+                _model_service("system.ai.claude-opus-4-8"),
+                _model_service("system.ai.claude-sonnet-4-6"),
+                _model_service("system.ai.gpt-5"),
+                _model_service("system.ai.gemini-2-5-flash"),
+                _model_service("system.ai.gemini-3-5-flash"),
+                _model_service("system.ai.llama-4-maverick"),
+            ]
+        }
+        monkeypatch.setattr(
+            db_mod, "_http_get_json", lambda url, token, timeout=10: (payload, None)
+        )
+
+        claude, codex, gemini, reason = db_mod.discover_model_services(WS, "token")
+
+        assert reason is None
+        # Newest opus wins; sonnet bucketed; haiku absent.
+        assert claude == {
+            "opus": "system.ai.claude-opus-4-8",
+            "sonnet": "system.ai.claude-sonnet-4-6",
+        }
+        assert codex == ["system.ai.gpt-5"]
+        # Gemini ordered newest-first via the shared sort key.
+        assert gemini[0] == "system.ai.gemini-3-5-flash"
+        # llama is not bucketed into any of the three families.
+        assert "system.ai.llama-4-maverick" not in codex + gemini
+
+    def test_paginates_via_next_page_token(self, monkeypatch):
+        pages = {
+            None: {
+                "model_services": [_model_service("system.ai.gpt-5")],
+                "next_page_token": "tok2",
+            },
+            "tok2": {
+                "model_services": [_model_service("system.ai.claude-opus-4-8")],
+            },
+        }
+
+        def fake_get(url, token, timeout=10):
+            token_param = None
+            if "page_token=" in url:
+                token_param = url.split("page_token=")[1].split("&")[0]
+            return pages[token_param], None
+
+        monkeypatch.setattr(db_mod, "_http_get_json", fake_get)
+
+        claude, codex, _, reason = db_mod.discover_model_services(WS, "token")
+
+        assert reason is None
+        assert codex == ["system.ai.gpt-5"]
+        assert claude == {"opus": "system.ai.claude-opus-4-8"}
+
+    def test_http_failure_returns_reason(self, monkeypatch):
+        monkeypatch.setattr(
+            db_mod, "_http_get_json", lambda url, token, timeout=10: (None, "HTTP 500 Server Error")
+        )
+
+        claude, codex, gemini, reason = db_mod.discover_model_services(WS, "token")
+
+        assert (claude, codex, gemini) == ({}, [], [])
+        assert reason == "HTTP 500 Server Error"
+
+    def test_no_matching_families_reports_sample(self, monkeypatch):
+        payload = {"model_services": [_model_service("system.ai.llama-4-maverick")]}
+        monkeypatch.setattr(
+            db_mod, "_http_get_json", lambda url, token, timeout=10: (payload, None)
+        )
+
+        claude, codex, gemini, reason = db_mod.discover_model_services(WS, "token")
+
+        assert (claude, codex, gemini) == ({}, [], [])
+        assert reason is not None and "llama-4-maverick" in reason
+
+    def test_ignores_non_system_ai_schemas(self, monkeypatch):
+        # The metastore listing returns services from every schema; only
+        # system.ai.* foundation models should be picked up.
+        payload = {
+            "model_services": [
+                _model_service("system.ai.gpt-5"),
+                _model_service("main.svenwb.gpt-5-5"),
+                _model_service("temp.erni.claude-opus-4-8"),
+                _model_service("dnasi_agent_cuj.default.dnasi-gpt55-test"),
+            ]
+        }
+        monkeypatch.setattr(
+            db_mod, "_http_get_json", lambda url, token, timeout=10: (payload, None)
+        )
+
+        claude, codex, gemini, reason = db_mod.discover_model_services(WS, "token")
+
+        assert reason is None
+        assert codex == ["system.ai.gpt-5"]
+        assert claude == {}  # temp.erni.claude-* must not be bucketed
+        assert gemini == []
+
+    def test_retries_page_before_giving_up(self, monkeypatch):
+        payload = {"model_services": [_model_service("system.ai.gpt-5")]}
+        calls = {"n": 0}
+
+        def flaky_get(url, token, timeout=10):
+            calls["n"] += 1
+            if calls["n"] < 3:
+                return None, "HTTP 504 Gateway Timeout"
+            return payload, None
+
+        monkeypatch.setattr(db_mod, "_http_get_json", flaky_get)
+
+        ids, reason = db_mod.list_model_services(WS, "token")
+
+        assert reason is None
+        assert ids == ["system.ai.gpt-5"]
+        assert calls["n"] == 3  # two failures, third succeeds
+
+    def test_null_model_services_field_does_not_crash(self, monkeypatch):
+        # Detect UC list endpoints sometimes serialize empty buckets as `null`
+        payload = {"model_services": None}
+        monkeypatch.setattr(
+            db_mod, "_http_get_json", lambda url, token, timeout=10: (payload, None)
+        )
+
+        ids, reason = db_mod.list_model_services(WS, "token")
+
+        assert ids == []
+        assert reason == "no `system.ai.*` model services found"
+
+    def test_partial_pagination_failure_propagates_reason(self, monkeypatch):
+        # Surface the failure reason for page fails after retries.
+        pages = [
+            (
+                {
+                    "model_services": [_model_service("system.ai.gpt-5")],
+                    "next_page_token": "tok2",
+                },
+                None,
+            ),
+            (None, "HTTP 504 Gateway Timeout"),
+        ]
+        calls = {"i": 0}
+
+        def fake_get(url, token, timeout=10):
+            idx = min(calls["i"], len(pages) - 1)
+            calls["i"] += 1
+            return pages[idx]
+
+        # Disable per-page retries so the second-page failure isn't masked.
+        monkeypatch.setattr(db_mod, "_MODEL_SERVICES_PAGE_RETRIES", 1)
+        monkeypatch.setattr(db_mod, "_http_get_json", fake_get)
+
+        ids, reason = db_mod.list_model_services(WS, "token")
+
+        assert ids == ["system.ai.gpt-5"]  # still got the first page
+        assert reason == "HTTP 504 Gateway Timeout"  # but caller is warned
+
+    def test_empty_listing_returns_one_reason_regardless_of_user_services(self, monkeypatch):
+        for payload in (
+            {"model_services": []},
+            {
+                "model_services": [
+                    _model_service("main.svenwb.my-gpt"),
+                    _model_service("temp.erni.claude-opus-4-8"),
+                ]
+            },
+        ):
+            monkeypatch.setattr(
+                db_mod, "_http_get_json", lambda url, token, timeout=10, p=payload: (p, None)
+            )
+
+            ids, reason = db_mod.list_model_services(WS, "token")
+
+            assert ids == []
+            assert reason == "no `system.ai.*` model services found"
+
+    def test_partial_truncation_exposed_via_discover_too(self, monkeypatch):
+        # End-to-end: discover_model_services preserves the truncation reason
+        # so the CLI layer can warn about partial results.
+        pages = [
+            (
+                {
+                    "model_services": [_model_service("system.ai.gpt-5")],
+                    "next_page_token": "tok2",
+                },
+                None,
+            ),
+            (None, "HTTP 504 Gateway Timeout"),
+        ]
+        calls = {"i": 0}
+
+        def fake_get(url, token, timeout=10):
+            idx = min(calls["i"], len(pages) - 1)
+            calls["i"] += 1
+            return pages[idx]
+
+        monkeypatch.setattr(db_mod, "_MODEL_SERVICES_PAGE_RETRIES", 1)
+        monkeypatch.setattr(db_mod, "_http_get_json", fake_get)
+
+        claude, codex, gemini, reason = db_mod.discover_model_services(WS, "token")
+
+        assert codex == ["system.ai.gpt-5"]
+        assert claude == {}
+        assert gemini == []
+        assert reason == "HTTP 504 Gateway Timeout"
+
+
 def _foundation_models_payload(names):
     return {
         "endpoints": [
diff --git a/tests/test_usage.py b/tests/test_usage.py
index d3c36bc..4f759be 100644
--- a/tests/test_usage.py
+++ b/tests/test_usage.py
@@ -206,6 +206,24 @@ def test_only_databricks_prefix_stripped_for_unknown_tool(self):
         result = simplify_model_name("opencode", "databricks-claude-sonnet-4")
         assert result == "claude-sonnet-4"
 
+    def test_strips_system_ai_prefix_for_model_services(self):
+        # Model-services-form ids use a `system.ai.` prefix instead of
+        # `databricks-`; the simplified label must match the AI Gateway form
+        # so usage rows stay consistent across discovery paths.
+        assert simplify_model_name("claude", "system.ai.claude-sonnet-4") == "sonnet-4"
+        assert simplify_model_name("codex", "system.ai.gpt-5-5") == "5-5"
+        assert simplify_model_name("gemini", "system.ai.gemini-3-5-flash") == "3-5-flash"
+
+    def test_system_ai_only_stripped_once(self):
+        # Defensive: a name that *starts with* `system.ai.` and embeds
+        # `databricks-` further in is not real, but we want to be sure
+        # we don't double-strip across the two family prefixes. Only the
+        # leading prefix is removed; the embedded one is left alone.
+        assert (
+            simplify_model_name("claude", "system.ai.databricks-claude-sonnet-4")
+            == "databricks-claude-sonnet-4"
+        )
+
 
 class TestExtractModelNames:
     def test_single_model(self):