diff --git a/src/ucode/agents/claude.py b/src/ucode/agents/claude.py index d0d0380..1afe962 100644 --- a/src/ucode/agents/claude.py +++ b/src/ucode/agents/claude.py @@ -62,7 +62,11 @@ def _resolve_web_search_model(state: dict) -> str | None: WEB_SEARCH_MCP_NAME = "web_search" -_CLAUDE_MODEL_RE = re.compile(r"^databricks-claude-(opus|sonnet)-(\d+)-(\d+)(.*)$") +# Matches both the AI Gateway form (`databricks-claude-opus-4-8`) and the UC +# model-services form (`system.ai.claude-opus-4-8`). +_CLAUDE_MODEL_RE = re.compile( + r"^(?:system\.ai\.)?(?:databricks-)?claude-(opus|sonnet)-(\d+)-(\d+)(.*)$" +) # Env keys the MLflow Stop hook reads to route traces. Written into the # settings `env` block alongside the hook itself. diff --git a/src/ucode/agents/codex.py b/src/ucode/agents/codex.py index e0bb64b..5a64d4b 100644 --- a/src/ucode/agents/codex.py +++ b/src/ucode/agents/codex.py @@ -2,6 +2,7 @@ from __future__ import annotations +import json import os import re from pathlib import Path @@ -13,6 +14,7 @@ backup_existing_file, deep_merge_dict, read_toml_safe, + write_text_file, write_toml_file, ) from ucode.databricks import ( @@ -29,6 +31,14 @@ CODEX_BACKUP_PATH = APP_DIR / "codex-ucode-config.backup.toml" LEGACY_CODEX_CONFIG_PATH = CODEX_CONFIG_DIR / "config.toml" LEGACY_CODEX_BACKUP_PATH = APP_DIR / "codex-config.backup.toml" +# Static model catalog written when the workspace uses UC model-services +# discovery. Pointing Codex at this file via `model_catalog_json` switches it +# to `StaticModelsManager`, which bypasses the `GET /v1/models` listing the +# AI Gateway currently rejects for `system.ai.*` (it requires a +# `Databricks-Model-Provider-Service` header that Codex doesn't send). Schema +# documented at openai/codex#14757; ucode emits the minimum required fields +# and lets Codex fill in the rest from `model_info_from_slug`-style defaults. +CODEX_MODEL_CATALOG_PATH = APP_DIR / "codex-model-catalog.json" CODEX_MODEL_PROVIDER_NAME = "ucode-databricks" MINIMUM_CODEX_VERSION = (0, 134, 0) MINIMUM_CODEX_VERSION_TEXT = "0.134.0" @@ -45,6 +55,7 @@ MANAGED_KEYS: list[list[str]] = [ ["model_provider"], ["model"], + ["model_catalog_json"], ["model_providers", CODEX_MODEL_PROVIDER_NAME], ["model_providers", CODEX_MODEL_PROVIDER_NAME, "http_headers"], ] @@ -120,12 +131,111 @@ def _provider_block(workspace: str, databricks_profile: str | None) -> dict: } +def _model_catalog_entry(slug: str) -> dict: + """Minimum-viable Codex `ModelInfo` for a UC model-services slug. + + Codex's `model_catalog_json` deserializer requires every non-`#[serde(default)]` + field to be present (Option-typed fields accept `null`). We surface the + minimum required keys plus a few optional ones (`context_window`, + `web_search_tool_type`, `input_modalities`) chosen to match Codex's own + fallback defaults so behaviour matches what users would have got from the + `GET /v1/models` listing. + + Schema reference: openai/codex protocol/src/openai_models.rs:257-315 and + issue openai/codex#14757 for the field-by-field contract. + """ + return { + "slug": slug, + "display_name": slug, + "description": None, + # Empty list keeps the picker quiet without claiming reasoning support. + "supported_reasoning_levels": [], + "shell_type": "default", + "visibility": "list", + "supported_in_api": True, + # Identical priority across entries — the picker breaks ties by slug + # which matches the alphabetic order users already see today. + "priority": 10, + "availability_nux": None, + "upgrade": None, + # Empty `base_instructions` leaves the bundled system prompt in place + # (Codex falls back to its default when the field is empty after + # personality substitution; see client.rs reasoning-field gating notes). + "base_instructions": "", + # Conservative defaults: don't claim reasoning summaries or verbosity + # support, since the gateway-fronted models may not implement either. + # Users can still send the request; this just keeps Codex from + # serializing fields the model can't honour. + "supports_reasoning_summaries": False, + "support_verbosity": False, + "default_verbosity": None, + # `freeform` enables apply_patch with the lark grammar that GPT-5 + # variants are trained on. Setting this to null would silently drop + # apply_patch from the tool list, which is a worse UX than a runtime + # error if a particular model doesn't support it. + "apply_patch_tool_type": "freeform", + "truncation_policy": {"mode": "bytes", "limit": 10000}, + "supports_parallel_tool_calls": True, + "experimental_supported_tools": [], + } + + +def build_model_catalog(codex_models: list[str] | None) -> dict | None: + """Build a `{"models": [...]}` document, or None if there are no entries. + + Codex rejects empty catalogs at startup, so we return None (and therefore + skip writing the file at all) when discovery returned no GPT models. + """ + if not codex_models: + return None + return {"models": [_model_catalog_entry(slug) for slug in codex_models]} + + +def _write_model_catalog_file(state: dict) -> Path | None: + """Materialise the static catalog when this workspace uses model-services. + + Returns the catalog path so the caller can wire it into the toml overlay. + Returns None when the workspace is on the AI-gateway path, in which case + we also remove any stale catalog left over from a previous configure run. + """ + if not state.get("use_model_services"): + if CODEX_MODEL_CATALOG_PATH.exists(): + try: + CODEX_MODEL_CATALOG_PATH.unlink() + except OSError: + # Non-fatal — Codex just re-fetches via /v1/models when + # `model_catalog_json` is unset. Leave the stale file alone. + pass + return None + catalog = build_model_catalog(state.get("codex_models")) + if catalog is None: + return None + write_text_file(CODEX_MODEL_CATALOG_PATH, json.dumps(catalog, indent=2)) + return CODEX_MODEL_CATALOG_PATH + + +def revert_model_catalog_file() -> bool: + """Delete the static catalog written during configure. Idempotent.""" + if not CODEX_MODEL_CATALOG_PATH.exists(): + return False + try: + CODEX_MODEL_CATALOG_PATH.unlink() + return True + except OSError: + return False + + def render_overlay( - workspace: str, model: str | None = None, databricks_profile: str | None = None + workspace: str, + model: str | None = None, + databricks_profile: str | None = None, + model_catalog_path: Path | None = None, ) -> dict: overlay: dict = {"model_provider": CODEX_MODEL_PROVIDER_NAME} if model: overlay["model"] = model + if model_catalog_path is not None: + overlay["model_catalog_json"] = str(model_catalog_path) overlay["model_providers"] = { CODEX_MODEL_PROVIDER_NAME: _provider_block(workspace, databricks_profile), } @@ -255,6 +365,10 @@ def _openai_model_id(model: str | None) -> str | None: def _codex_model_id(model: str | None) -> str | None: + # UC model-services ids (`system.ai.gpt-5`) route by name through the + # gateway, so they must be sent verbatim — not rewritten to an OpenAI id. + if model and model.startswith("system.ai."): + return model if model in CODEX_OPENAI_ID_INCOMPATIBLE_MODELS: return model return _openai_model_id(model) @@ -263,7 +377,12 @@ def _codex_model_id(model: str | None) -> str | None: def _parse_gpt(model: str | None) -> tuple[int, int | None, int | None, str] | None: if not model: return None - match = _GPT_RE.fullmatch(model.split("/")[-1]) + # Strip the UC model-services prefix so `system.ai.gpt-5` parses for version + # selection; the original id is preserved by callers that need it verbatim. + tail = model.split("/")[-1] + if tail.startswith("system.ai."): + tail = tail[len("system.ai.") :] + match = _GPT_RE.fullmatch(tail) if not match: return None major, minor, patch, suffix = match.groups() @@ -296,8 +415,19 @@ def write_tool_config(state: dict, model: str | None = None) -> dict: _remove_legacy_ucode_profile() backup_existing_file(CODEX_CONFIG_PATH, CODEX_BACKUP_PATH) - overlay = render_overlay(workspace, chosen_model, databricks_profile) + # Static catalog written iff the workspace is on UC model-services. When + # set, Codex switches to StaticModelsManager and never calls /v1/models — + # which is currently rejected by the AI Gateway for `system.ai.*` ids. + catalog_path = _write_model_catalog_file(state) + overlay = render_overlay( + workspace, chosen_model, databricks_profile, model_catalog_path=catalog_path + ) doc = read_toml_safe(CODEX_CONFIG_PATH) + # Strip a stale `model_catalog_json` if we're no longer in model-services + # mode — `deep_merge_dict` only adds/overwrites keys, it won't clear one + # that the new overlay omits. + if catalog_path is None: + doc.pop("model_catalog_json", None) deep_merge_dict(doc, overlay) write_toml_file(CODEX_CONFIG_PATH, doc) state = mark_tool_managed(state, "codex", MANAGED_KEYS) diff --git a/src/ucode/cli.py b/src/ucode/cli.py index c363e22..659e916 100644 --- a/src/ucode/cli.py +++ b/src/ucode/cli.py @@ -25,7 +25,10 @@ from ucode.agents import ( launch as launch_agent, ) -from ucode.agents.codex import revert_legacy_shared_config +from ucode.agents.codex import ( + revert_legacy_shared_config, + revert_model_catalog_file, +) from ucode.agents.pi import PI_SETTINGS_BACKUP_PATH, PI_SETTINGS_PATH from ucode.config_io import restore_file, set_dry_run from ucode.databricks import ( @@ -33,6 +36,7 @@ discover_claude_models, discover_codex_models, discover_gemini_models, + discover_model_services, ensure_ai_gateway_v2, ensure_databricks_auth, find_profile_name_for_host, @@ -41,6 +45,7 @@ install_databricks_cli, normalize_workspace_url, run_databricks_login, + use_model_services, ) from ucode.mcp import ( MCP_CLIENTS, @@ -160,7 +165,13 @@ def configure_shared_state( don't error out. If ``None``, we resolve it from the host after login. """ workspace = normalize_workspace_url(workspace) - previous_workspace = load_state().get("workspace") + prior_state = load_state() + previous_workspace = prior_state.get("workspace") + # The flag is sticky: an explicit env var wins, otherwise fall back to what + # was persisted when the workspace was configured. Without this, every + # launch re-runs discovery and a missing env var would silently revert a + # model-services workspace to the databricks-* gateway names. + model_services = use_model_services(default=bool(prior_state.get("use_model_services"))) fetch_all = tools is None if force_login: run_databricks_login(workspace, profile) @@ -184,19 +195,29 @@ def configure_shared_state( claude_reason: str | None = None gemini_reason: str | None = None codex_reason: str | None = None - with spinner("Fetching available models..."): + claude_models = {} + gemini_models = [] + codex_models = [] + if model_services: + # Opt-in: one UC model-services call yields all families as + # `system.ai.` ids, bucketed by name. The single reason is + # shared across the families that were requested. + with spinner("Fetching available models (model services)..."): + ms_claude, ms_codex, ms_gemini, ms_reason = discover_model_services(workspace, token) if want_claude: - claude_models, claude_reason = discover_claude_models(workspace, token) - else: - claude_models = {} + claude_models, claude_reason = ms_claude, ms_reason if want_gemini: - gemini_models, gemini_reason = discover_gemini_models(workspace, token) - else: - gemini_models = [] + gemini_models, gemini_reason = ms_gemini, ms_reason if want_codex: - codex_models, codex_reason = discover_codex_models(workspace, token) - else: - codex_models = [] + codex_models, codex_reason = ms_codex, ms_reason + else: + with spinner("Fetching available models..."): + if want_claude: + claude_models, claude_reason = discover_claude_models(workspace, token) + if want_gemini: + gemini_models, gemini_reason = discover_gemini_models(workspace, token) + if want_codex: + codex_models, codex_reason = discover_codex_models(workspace, token) opencode_models: dict[str, list[str]] = {} if claude_models: opencode_models["anthropic"] = list(claude_models.values()) @@ -210,6 +231,9 @@ def configure_shared_state( state["profile"] = profile else: state.pop("profile", None) + # Persist the resolved flag so subsequent launches stay on the same + # discovery path without the env var being re-exported. + state["use_model_services"] = model_services state["base_urls"] = build_shared_base_urls(workspace) if want_claude: state["claude_models"] = claude_models @@ -371,6 +395,12 @@ def status() -> int: profile = state.get("profile") if profile: print_kv("CLI profile", profile) + print_kv( + "Model discovery", + "model-services (system.ai.*)" + if state.get("use_model_services") + else "ai-gateway (databricks-*)", + ) print_heading("Coding Agents") for tool, spec in TOOL_SPECS.items(): @@ -444,6 +474,9 @@ def revert() -> int: # Older Codex (< 0.134.0) had ucode edit the shared ~/.codex/config.toml in # place; restoring the per-profile file above does not undo that. legacy_codex_stripped = revert_legacy_shared_config() + # The static model catalog (only written when the workspace uses UC + # model-services) lives outside the toml backup, so clean it up here. + codex_catalog_removed = revert_model_catalog_file() clear_state() print_heading("Revert") @@ -452,6 +485,8 @@ def revert() -> int: print_kv(f"{spec['display']} config", "restored" if results[tool] else "unchanged") if legacy_codex_stripped: print_kv("Codex shared config", "ucode entries removed") + if codex_catalog_removed: + print_kv("Codex model catalog", "removed") print_kv("Pi settings", "restored" if pi_settings_restored else "unchanged") for client, spec in MCP_CLIENTS.items(): print_kv( diff --git a/src/ucode/databricks.py b/src/ucode/databricks.py index 2d45feb..4202b94 100644 --- a/src/ucode/databricks.py +++ b/src/ucode/databricks.py @@ -17,7 +17,7 @@ from typing import Literal, cast, overload from urllib import error as urllib_error from urllib import request as urllib_request -from urllib.parse import urlparse +from urllib.parse import urlencode, urlparse from databricks.sql.exc import ServerOperationError @@ -977,6 +977,188 @@ def build_auth_shell_command(workspace: str, profile: str | None = None) -> str: ) +def use_model_services(default: bool = False) -> bool: + """True when the opt-in UC model-services discovery path is enabled. + + Set ``UCODE_USE_MODEL_SERVICES=1`` (or true/yes/on) to discover models via + the Unity Catalog model-services API and address them as + ``system.ai.`` instead of the per-family AI Gateway listings. + + The env var, when set to any value, wins. ``default`` is the fallback used + when the env var is unset — callers pass the value persisted in state so a + workspace configured with the flag keeps using model services on later + launches without the env var being re-exported each time. + """ + raw = os.environ.get("UCODE_USE_MODEL_SERVICES") + if raw is None or not raw.strip(): + return default + return raw.strip().lower() in {"1", "true", "yes", "on"} + + +# A model-service's `name` is `model-services/system.ai.`; the +# part after the prefix is exactly the model string agents send (no +# `databricks-` infix — that only appears on the inner destination name). +_MODEL_SERVICE_NAME_PREFIX = "model-services/" +# The metastore-scope listing returns services from EVERY schema (e.g. +# `main.user.foo`, `temp.*`, internal DLT schemas). We only want the +# Databricks-managed foundation models under `system.ai`. +_MODEL_SERVICE_REQUIRED_PREFIX = "system.ai." + + +def _model_service_id(service: dict) -> str | None: + """Extract the `system.ai.` id from one model-service entry. + + Returns None for services in any other schema, so user/internal model + services don't leak into the family buckets.""" + name = service.get("name") + if not isinstance(name, str): + return None + name = name.strip() + if name.startswith(_MODEL_SERVICE_NAME_PREFIX): + name = name[len(_MODEL_SERVICE_NAME_PREFIX) :] + if not name.startswith(_MODEL_SERVICE_REQUIRED_PREFIX): + return None + return name or None + + +# The model-services metastore listing is slow and flaky — large pages +# routinely 504 with `Timeout listing model services under metastore`. A small +# page is far more likely to come back, and each page gets a few retries before +# we give up. +_MODEL_SERVICES_PAGE_SIZE = 10 +_MODEL_SERVICES_PAGE_RETRIES = 4 + + +def _get_model_services_page( + url: str, token: str, *, retries: int = _MODEL_SERVICES_PAGE_RETRIES +) -> tuple[dict | list | None, str | None]: + """GET one model-services page, retrying on failure. + + The endpoint frequently 504s under load; a retry usually succeeds. Returns + the same (payload, reason) shape as ``_http_get_json`` — the last attempt's + result when all retries are exhausted.""" + payload: dict | list | None = None + reason: str | None = None + for attempt in range(retries): + payload, reason = _http_get_json(url, token, timeout=30) + if payload is not None: + return payload, None + _debug("model-services page", f"attempt {attempt + 1}/{retries} failed: {reason}") + return payload, reason + + +def list_model_services( + workspace: str, + token: str, + *, + page_size: int = _MODEL_SERVICES_PAGE_SIZE, + max_pages: int = 100, +) -> tuple[list[str], str | None]: + """List all `system.ai.*` model ids via the UC model-services API. + + Pages through ``/api/2.1/unity-catalog/model-services`` (metastore scope) + and returns the de-duplicated, sorted list of ``system.ai.`` + ids. Uses a small page size with per-page retries because the endpoint is + slow and frequently 504s. Returns (ids, reason); reason is None on success, + otherwise it describes why the list is empty (HTTP/network error or no + services). + """ + hostname = workspace_hostname(workspace) + ids: list[str] = [] + page_token: str | None = None + seen_tokens: set[str] = set() + last_reason: str | None = None + for _ in range(max_pages): + params: dict[str, str] = {"page_size": str(page_size)} + if page_token: + params["page_token"] = page_token + url = f"https://{hostname}/api/2.1/unity-catalog/model-services?{urlencode(params)}" + payload, reason = _get_model_services_page(url, token) + if payload is None: + # Mid-pagination failure: keep whatever we collected, but propagate + # the failure reason so callers can warn the user that the list is + # truncated. + last_reason = reason + break + data = cast(dict, payload) if isinstance(payload, dict) else {} + # `dict.get(key, default)` returns the present value even if it's None. + services = data.get("model_services") or [] + for service in services: + if isinstance(service, dict): + model_id = _model_service_id(service) + if model_id: + ids.append(model_id) + page_token = data.get("next_page_token") or None + if not page_token: + last_reason = None + break + if page_token in seen_tokens: + break + seen_tokens.add(page_token) + + deduped = sorted(set(ids)) + if deduped: + # Even on partial success, surface the reason so the caller can warn upstream. + return deduped, last_reason + if last_reason: + return [], last_reason + # Empty listing with no HTTP error. The metastore listing mixes + # `system.ai.*` foundation models with user-created services in + # non-deterministic order across page sizes (verified against + # e2-dogfood 2026-06-10), so distinguishing "saw 0 entries" from + # "saw N entries, all in user schemas" doesn't change what the user + # should do — retry, or verify foundation models are provisioned. + return [], "no `system.ai.*` model services found" + + +def discover_model_services( + workspace: str, token: str +) -> tuple[dict[str, str], list[str], list[str], str | None]: + """Discover models via UC model-services and bucket them by family name. + + Returns (claude_models, codex_models, gemini_models, reason): + + - ``claude_models`` maps ``opus``/``sonnet``/``haiku`` to the newest + matching ``system.ai.claude-*`` id (mirrors ``discover_claude_models``). + - ``codex_models`` is the list of ``system.ai.*gpt-*`` ids. + - ``gemini_models`` is the list of ``system.ai.*gemini-*`` ids, newest first. + + ``reason`` is None on success, else explains why nothing was found. Family + bucketing is by name substring because the model-services API does not + expose per-model API dialects. + """ + ids, reason = list_model_services(workspace, token) + if not ids: + return {}, [], [], reason + + claude_models: dict[str, str] = {} + for family in ("opus", "sonnet", "haiku"): + candidates = sorted( + [m for m in ids if f"claude-{family}-" in m], + reverse=True, + ) + if candidates: + claude_models[family] = candidates[0] + + codex_models = [m for m in ids if "gpt-" in m] + gemini_models = sorted([m for m in ids if "gemini-" in m], key=model_version_sort_key) + + if not (claude_models or codex_models or gemini_models): + sample = ", ".join(ids[:5]) + return ( + {}, + [], + [], + ( + "model-services returned model ids but none matched " + f"claude/gpt/gemini families (got: {sample})" + ), + ) + # Pass `reason` through even on success — `list_model_services` sets it on + # partial pagination so the CLI layer can warn about truncation. + return claude_models, codex_models, gemini_models, reason + + def discover_claude_models(workspace: str, token: str) -> tuple[dict[str, str], str | None]: """Discover Claude families on this workspace's AI Gateway. diff --git a/src/ucode/usage.py b/src/ucode/usage.py index ab9a0c4..2ce8e7b 100644 --- a/src/ucode/usage.py +++ b/src/ucode/usage.py @@ -172,9 +172,14 @@ def simplify_model_name(tool: str, model_name: str) -> str: if not normalized: return "-" - prefix = "databricks-" - if normalized.startswith(prefix): - normalized = normalized[len(prefix) :] + # Strip whichever family prefix is in use so usage rows stay consistent + # regardless of whether a workspace uses the AI Gateway path + # (`databricks-claude-...`) or the UC model-services path + # (`system.ai.claude-...`). Order doesn't matter — only one will match. + for prefix in ("databricks-", "system.ai."): + if normalized.startswith(prefix): + normalized = normalized[len(prefix) :] + break tool_prefixes = { "claude": "claude-", diff --git a/tests/test_agent_claude.py b/tests/test_agent_claude.py index ea33c63..9888efd 100644 --- a/tests/test_agent_claude.py +++ b/tests/test_agent_claude.py @@ -41,6 +41,14 @@ def test_does_not_duplicate_1m_suffix(self): overlay, _ = claude.render_overlay(WS, "databricks-claude-opus-4-7[1m]") assert overlay["env"]["ANTHROPIC_MODEL"] == "databricks-claude-opus-4-7[1m]" + def test_adds_1m_suffix_for_model_services_name(self): + overlay, _ = claude.render_overlay(WS, "system.ai.claude-opus-4-8") + assert overlay["env"]["ANTHROPIC_MODEL"] == "system.ai.claude-opus-4-8[1m]" + + def test_no_1m_suffix_for_model_services_haiku(self): + overlay, _ = claude.render_overlay(WS, "system.ai.claude-haiku-4-6") + assert overlay["env"]["ANTHROPIC_MODEL"] == "system.ai.claude-haiku-4-6" + def test_sets_anthropic_base_url(self): overlay, _ = claude.render_overlay(WS, "s4") assert overlay["env"]["ANTHROPIC_BASE_URL"] == f"{WS}/ai-gateway/anthropic" diff --git a/tests/test_agent_codex.py b/tests/test_agent_codex.py index b84b667..5772960 100644 --- a/tests/test_agent_codex.py +++ b/tests/test_agent_codex.py @@ -2,6 +2,7 @@ from __future__ import annotations +import json import os from ucode.agents import codex @@ -337,6 +338,17 @@ def test_openai_model_id_maps_databricks_naming(self): def test_codex_model_id_preserves_openai_incompatible_models(self): assert codex._codex_model_id("databricks-gpt-5-2-codex") == "databricks-gpt-5-2-codex" assert codex._codex_model_id("databricks-gpt-5-4-nano") == "databricks-gpt-5-4-nano" + + def test_codex_model_id_passes_model_services_id_verbatim(self): + # UC model-services ids route by name, so they must not be rewritten + # to the OpenAI id form. + assert codex._codex_model_id("system.ai.gpt-5") == "system.ai.gpt-5" + assert codex._codex_model_id("system.ai.gpt-5-2-codex") == "system.ai.gpt-5-2-codex" + + def test_default_model_selects_model_services_gpt(self): + models = ["system.ai.gpt-5", "system.ai.gpt-5-5", "system.ai.claude-opus-4-8"] + + assert codex.default_model({"codex_models": models}) == "system.ai.gpt-5-5" assert codex._codex_model_id("databricks-gpt-5-5") == "gpt-5.5" @@ -385,3 +397,230 @@ def fake_execvp(binary: str, args: list[str]) -> None: assert os.environ["OAUTH_TOKEN"] == "fresh-token" assert exec_calls == [("codex", ["codex", "--profile", "ucode", "--search"])] + + +class TestBuildModelCatalog: + def test_returns_none_when_no_models(self): + # Codex rejects empty catalogs at startup, so we must skip writing the + # file entirely rather than emit `{"models": []}`. + assert codex.build_model_catalog([]) is None + assert codex.build_model_catalog(None) is None + + def test_emits_one_entry_per_model(self): + catalog = codex.build_model_catalog(["system.ai.gpt-5", "system.ai.gpt-5-5"]) + + slugs = [entry["slug"] for entry in catalog["models"]] + assert slugs == ["system.ai.gpt-5", "system.ai.gpt-5-5"] + + def test_each_entry_has_required_keys(self): + # The Codex protocol demands every non-`#[serde(default)]` field be + # present; an omission causes Codex to refuse to start. Guard the + # contract so a future field rename doesn't quietly break configure. + catalog = codex.build_model_catalog(["system.ai.gpt-5"]) + entry = catalog["models"][0] + + for key in ( + "slug", + "display_name", + "description", + "supported_reasoning_levels", + "shell_type", + "visibility", + "supported_in_api", + "priority", + "availability_nux", + "upgrade", + "base_instructions", + "supports_reasoning_summaries", + "support_verbosity", + "default_verbosity", + "apply_patch_tool_type", + "truncation_policy", + "supports_parallel_tool_calls", + "experimental_supported_tools", + ): + assert key in entry, f"missing required ModelInfo field: {key}" + + def test_truncation_policy_is_well_formed(self): + # Codex requires both `mode` and `limit`; a malformed sub-struct fails + # the whole catalog load with `unknown variant ...`. + entry = codex.build_model_catalog(["system.ai.gpt-5"])["models"][0] + + assert entry["truncation_policy"] == {"mode": "bytes", "limit": 10000} + + def test_visibility_is_protocol_compliant(self): + # `list` / `hide` / `none` are the only accepted strings — `custom` and + # other values fail catalog deserialization. + entry = codex.build_model_catalog(["system.ai.gpt-5"])["models"][0] + + assert entry["visibility"] in {"list", "hide", "none"} + + def test_apply_patch_uses_freeform_for_codex_models(self): + # GPT-5 variants ship with the lark-grammar apply_patch tool; we keep + # it on so users see the same agent capabilities regardless of which + # discovery path their workspace uses. + entry = codex.build_model_catalog(["system.ai.gpt-5"])["models"][0] + + assert entry["apply_patch_tool_type"] == "freeform" + + +class TestModelCatalogFile: + def _patch_paths(self, tmp_path, monkeypatch): + catalog_path = tmp_path / "codex-model-catalog.json" + monkeypatch.setattr(codex, "CODEX_MODEL_CATALOG_PATH", catalog_path) + return catalog_path + + def test_skips_file_when_use_model_services_false(self, tmp_path, monkeypatch): + catalog_path = self._patch_paths(tmp_path, monkeypatch) + + result = codex._write_model_catalog_file( + {"codex_models": ["databricks-gpt-5"], "use_model_services": False} + ) + + assert result is None + assert not catalog_path.exists() + + def test_skips_file_when_no_codex_models(self, tmp_path, monkeypatch): + # `use_model_services=True` without any GPT models would produce an + # empty catalog, which Codex rejects. Don't write the file at all. + catalog_path = self._patch_paths(tmp_path, monkeypatch) + + result = codex._write_model_catalog_file({"codex_models": [], "use_model_services": True}) + + assert result is None + assert not catalog_path.exists() + + def test_writes_catalog_when_use_model_services_true(self, tmp_path, monkeypatch): + catalog_path = self._patch_paths(tmp_path, monkeypatch) + + result = codex._write_model_catalog_file( + {"codex_models": ["system.ai.gpt-5", "system.ai.gpt-5-5"], "use_model_services": True} + ) + + assert result == catalog_path + catalog = json.loads(catalog_path.read_text(encoding="utf-8")) + slugs = [entry["slug"] for entry in catalog["models"]] + assert slugs == ["system.ai.gpt-5", "system.ai.gpt-5-5"] + + def test_removes_stale_catalog_when_toggling_off_model_services(self, tmp_path, monkeypatch): + # If a workspace previously had `UCODE_USE_MODEL_SERVICES=1` and now + # doesn't, the old catalog must go away — otherwise `model_catalog_json` + # in the toml would still resolve and Codex would silently pin the + # stale model list. + catalog_path = self._patch_paths(tmp_path, monkeypatch) + catalog_path.write_text('{"models": []}', encoding="utf-8") + + result = codex._write_model_catalog_file( + {"codex_models": ["databricks-gpt-5"], "use_model_services": False} + ) + + assert result is None + assert not catalog_path.exists() + + +class TestRevertModelCatalogFile: + def test_returns_false_when_no_catalog(self, tmp_path, monkeypatch): + catalog_path = tmp_path / "codex-model-catalog.json" + monkeypatch.setattr(codex, "CODEX_MODEL_CATALOG_PATH", catalog_path) + + assert codex.revert_model_catalog_file() is False + + def test_removes_existing_catalog(self, tmp_path, monkeypatch): + catalog_path = tmp_path / "codex-model-catalog.json" + catalog_path.write_text('{"models": []}', encoding="utf-8") + monkeypatch.setattr(codex, "CODEX_MODEL_CATALOG_PATH", catalog_path) + + assert codex.revert_model_catalog_file() is True + assert not catalog_path.exists() + + +class TestRenderOverlayWithModelCatalog: + def test_omits_model_catalog_json_by_default(self): + # AI-gateway path: Codex's `OpenAiModelsManager` should hit /v1/models + # the way it always has. + overlay = codex.render_overlay(WS, "databricks-gpt-5") + + assert "model_catalog_json" not in overlay + + def test_includes_model_catalog_json_when_path_provided(self, tmp_path): + # model-services path: forcing `StaticModelsManager` is the whole + # point of this knob, so the overlay must surface the path string. + catalog_path = tmp_path / "codex-model-catalog.json" + + overlay = codex.render_overlay(WS, "system.ai.gpt-5", model_catalog_path=catalog_path) + + assert overlay["model_catalog_json"] == str(catalog_path) + + +class TestWriteToolConfigModelCatalog: + def test_writes_model_catalog_when_use_model_services_true(self, tmp_path, monkeypatch): + config_path = tmp_path / ".codex" / "ucode.config.toml" + backup_path = tmp_path / "codex-ucode-config.backup.toml" + catalog_path = tmp_path / "codex-model-catalog.json" + monkeypatch.setattr(codex, "CODEX_CONFIG_PATH", config_path) + monkeypatch.setattr(codex, "CODEX_BACKUP_PATH", backup_path) + monkeypatch.setattr(codex, "CODEX_MODEL_CATALOG_PATH", catalog_path) + monkeypatch.setattr(codex, "agent_version", lambda binary: "0.134.0") + monkeypatch.setattr(codex, "save_state", lambda state: None) + + codex.write_tool_config( + { + "workspace": WS, + "codex_models": ["system.ai.gpt-5", "system.ai.gpt-5-5"], + "use_model_services": True, + } + ) + + doc = read_toml_safe(config_path) + assert doc["model_catalog_json"] == str(catalog_path) + catalog = json.loads(catalog_path.read_text(encoding="utf-8")) + assert {entry["slug"] for entry in catalog["models"]} == { + "system.ai.gpt-5", + "system.ai.gpt-5-5", + } + + def test_omits_model_catalog_json_when_ai_gateway(self, tmp_path, monkeypatch): + config_path = tmp_path / ".codex" / "ucode.config.toml" + backup_path = tmp_path / "codex-ucode-config.backup.toml" + catalog_path = tmp_path / "codex-model-catalog.json" + monkeypatch.setattr(codex, "CODEX_CONFIG_PATH", config_path) + monkeypatch.setattr(codex, "CODEX_BACKUP_PATH", backup_path) + monkeypatch.setattr(codex, "CODEX_MODEL_CATALOG_PATH", catalog_path) + monkeypatch.setattr(codex, "agent_version", lambda binary: "0.134.0") + monkeypatch.setattr(codex, "save_state", lambda state: None) + + codex.write_tool_config({"workspace": WS, "codex_models": ["databricks-gpt-5"]}) + + doc = read_toml_safe(config_path) + assert "model_catalog_json" not in doc + assert not catalog_path.exists() + + def test_clears_stale_model_catalog_json_when_toggling_off(self, tmp_path, monkeypatch): + # Re-running `ucode configure` after dropping + # `UCODE_USE_MODEL_SERVICES=1` must not leave the static-catalog + # pointer behind — otherwise Codex still pins the cached list. + config_path = tmp_path / ".codex" / "ucode.config.toml" + backup_path = tmp_path / "codex-ucode-config.backup.toml" + catalog_path = tmp_path / "codex-model-catalog.json" + config_path.parent.mkdir(parents=True) + config_path.write_text( + 'model_catalog_json = "/tmp/old.json"\nmodel = "system.ai.gpt-5"\n', + encoding="utf-8", + ) + monkeypatch.setattr(codex, "CODEX_CONFIG_PATH", config_path) + monkeypatch.setattr(codex, "CODEX_BACKUP_PATH", backup_path) + monkeypatch.setattr(codex, "CODEX_MODEL_CATALOG_PATH", catalog_path) + monkeypatch.setattr(codex, "agent_version", lambda binary: "0.134.0") + monkeypatch.setattr(codex, "save_state", lambda state: None) + + codex.write_tool_config({"workspace": WS, "codex_models": ["databricks-gpt-5"]}) + + doc = read_toml_safe(config_path) + assert "model_catalog_json" not in doc + + +class TestManagedKeysModelCatalog: + def test_managed_keys_include_model_catalog_json(self): + # Revert must strip `model_catalog_json` out of the toml; if it + # vanishes from MANAGED_KEYS the cleanup silently regresses. + assert ["model_catalog_json"] in codex.MANAGED_KEYS diff --git a/tests/test_cli.py b/tests/test_cli.py index 9809d1a..e9355b6 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -196,6 +196,27 @@ def test_shows_mcp_servers_configured_by_ucode(self): assert "MCP Server:" not in result.output assert "Configured tools:" not in result.output + def test_shows_ai_gateway_discovery_when_flag_unset(self): + with patch("ucode.cli.load_state", return_value=MINIMAL_STATE): + result = runner.invoke(app, ["status"]) + + assert result.exit_code == 0, result.output + assert "Model discovery:" in result.output + assert "ai-gateway" in result.output + assert "system.ai" not in result.output + + def test_shows_model_services_discovery_when_flag_set(self): + # Surface the active discovery path so users don't have to read + # state.json to remember whether a workspace is on UC model-services. + state = {**MINIMAL_STATE, "use_model_services": True} + with patch("ucode.cli.load_state", return_value=state): + result = runner.invoke(app, ["status"]) + + assert result.exit_code == 0, result.output + assert "Model discovery:" in result.output + assert "model-services" in result.output + assert "system.ai" in result.output + def test_status_treats_available_tools_as_configured_agents(self): state = { **MINIMAL_STATE, diff --git a/tests/test_databricks.py b/tests/test_databricks.py index d3feeba..3e02707 100644 --- a/tests/test_databricks.py +++ b/tests/test_databricks.py @@ -132,6 +132,249 @@ def test_selects_opus_4_8_when_advertised(self, monkeypatch): assert models["opus"] == "databricks-claude-opus-4-8" +def _model_service(model_id: str) -> dict: + """A model-services entry whose `name` strips to `model_id`.""" + return {"name": f"model-services/{model_id}"} + + +class TestUseModelServices: + def test_off_by_default(self, monkeypatch): + monkeypatch.delenv("UCODE_USE_MODEL_SERVICES", raising=False) + assert db_mod.use_model_services() is False + + def test_truthy_values_enable(self, monkeypatch): + for value in ("1", "true", "TRUE", "yes", "on"): + monkeypatch.setenv("UCODE_USE_MODEL_SERVICES", value) + assert db_mod.use_model_services() is True + + def test_falsey_values_disable(self, monkeypatch): + # A non-empty, non-truthy value explicitly disables — even over a + # persisted default of True. + for value in ("0", "false", "no"): + monkeypatch.setenv("UCODE_USE_MODEL_SERVICES", value) + assert db_mod.use_model_services(default=True) is False + + def test_unset_falls_back_to_default(self, monkeypatch): + # Sticky behavior: when the env var is unset (or blank), the persisted + # default decides. + monkeypatch.delenv("UCODE_USE_MODEL_SERVICES", raising=False) + assert db_mod.use_model_services(default=True) is True + assert db_mod.use_model_services(default=False) is False + monkeypatch.setenv("UCODE_USE_MODEL_SERVICES", "") + assert db_mod.use_model_services(default=True) is True + + def test_env_var_overrides_default(self, monkeypatch): + monkeypatch.setenv("UCODE_USE_MODEL_SERVICES", "1") + assert db_mod.use_model_services(default=False) is True + + +class TestDiscoverModelServices: + def test_buckets_families_by_name(self, monkeypatch): + payload = { + "model_services": [ + _model_service("system.ai.claude-opus-4-7"), + _model_service("system.ai.claude-opus-4-8"), + _model_service("system.ai.claude-sonnet-4-6"), + _model_service("system.ai.gpt-5"), + _model_service("system.ai.gemini-2-5-flash"), + _model_service("system.ai.gemini-3-5-flash"), + _model_service("system.ai.llama-4-maverick"), + ] + } + monkeypatch.setattr( + db_mod, "_http_get_json", lambda url, token, timeout=10: (payload, None) + ) + + claude, codex, gemini, reason = db_mod.discover_model_services(WS, "token") + + assert reason is None + # Newest opus wins; sonnet bucketed; haiku absent. + assert claude == { + "opus": "system.ai.claude-opus-4-8", + "sonnet": "system.ai.claude-sonnet-4-6", + } + assert codex == ["system.ai.gpt-5"] + # Gemini ordered newest-first via the shared sort key. + assert gemini[0] == "system.ai.gemini-3-5-flash" + # llama is not bucketed into any of the three families. + assert "system.ai.llama-4-maverick" not in codex + gemini + + def test_paginates_via_next_page_token(self, monkeypatch): + pages = { + None: { + "model_services": [_model_service("system.ai.gpt-5")], + "next_page_token": "tok2", + }, + "tok2": { + "model_services": [_model_service("system.ai.claude-opus-4-8")], + }, + } + + def fake_get(url, token, timeout=10): + token_param = None + if "page_token=" in url: + token_param = url.split("page_token=")[1].split("&")[0] + return pages[token_param], None + + monkeypatch.setattr(db_mod, "_http_get_json", fake_get) + + claude, codex, _, reason = db_mod.discover_model_services(WS, "token") + + assert reason is None + assert codex == ["system.ai.gpt-5"] + assert claude == {"opus": "system.ai.claude-opus-4-8"} + + def test_http_failure_returns_reason(self, monkeypatch): + monkeypatch.setattr( + db_mod, "_http_get_json", lambda url, token, timeout=10: (None, "HTTP 500 Server Error") + ) + + claude, codex, gemini, reason = db_mod.discover_model_services(WS, "token") + + assert (claude, codex, gemini) == ({}, [], []) + assert reason == "HTTP 500 Server Error" + + def test_no_matching_families_reports_sample(self, monkeypatch): + payload = {"model_services": [_model_service("system.ai.llama-4-maverick")]} + monkeypatch.setattr( + db_mod, "_http_get_json", lambda url, token, timeout=10: (payload, None) + ) + + claude, codex, gemini, reason = db_mod.discover_model_services(WS, "token") + + assert (claude, codex, gemini) == ({}, [], []) + assert reason is not None and "llama-4-maverick" in reason + + def test_ignores_non_system_ai_schemas(self, monkeypatch): + # The metastore listing returns services from every schema; only + # system.ai.* foundation models should be picked up. + payload = { + "model_services": [ + _model_service("system.ai.gpt-5"), + _model_service("main.svenwb.gpt-5-5"), + _model_service("temp.erni.claude-opus-4-8"), + _model_service("dnasi_agent_cuj.default.dnasi-gpt55-test"), + ] + } + monkeypatch.setattr( + db_mod, "_http_get_json", lambda url, token, timeout=10: (payload, None) + ) + + claude, codex, gemini, reason = db_mod.discover_model_services(WS, "token") + + assert reason is None + assert codex == ["system.ai.gpt-5"] + assert claude == {} # temp.erni.claude-* must not be bucketed + assert gemini == [] + + def test_retries_page_before_giving_up(self, monkeypatch): + payload = {"model_services": [_model_service("system.ai.gpt-5")]} + calls = {"n": 0} + + def flaky_get(url, token, timeout=10): + calls["n"] += 1 + if calls["n"] < 3: + return None, "HTTP 504 Gateway Timeout" + return payload, None + + monkeypatch.setattr(db_mod, "_http_get_json", flaky_get) + + ids, reason = db_mod.list_model_services(WS, "token") + + assert reason is None + assert ids == ["system.ai.gpt-5"] + assert calls["n"] == 3 # two failures, third succeeds + + def test_null_model_services_field_does_not_crash(self, monkeypatch): + # Detect UC list endpoints sometimes serialize empty buckets as `null` + payload = {"model_services": None} + monkeypatch.setattr( + db_mod, "_http_get_json", lambda url, token, timeout=10: (payload, None) + ) + + ids, reason = db_mod.list_model_services(WS, "token") + + assert ids == [] + assert reason == "no `system.ai.*` model services found" + + def test_partial_pagination_failure_propagates_reason(self, monkeypatch): + # Surface the failure reason for page fails after retries. + pages = [ + ( + { + "model_services": [_model_service("system.ai.gpt-5")], + "next_page_token": "tok2", + }, + None, + ), + (None, "HTTP 504 Gateway Timeout"), + ] + calls = {"i": 0} + + def fake_get(url, token, timeout=10): + idx = min(calls["i"], len(pages) - 1) + calls["i"] += 1 + return pages[idx] + + # Disable per-page retries so the second-page failure isn't masked. + monkeypatch.setattr(db_mod, "_MODEL_SERVICES_PAGE_RETRIES", 1) + monkeypatch.setattr(db_mod, "_http_get_json", fake_get) + + ids, reason = db_mod.list_model_services(WS, "token") + + assert ids == ["system.ai.gpt-5"] # still got the first page + assert reason == "HTTP 504 Gateway Timeout" # but caller is warned + + def test_empty_listing_returns_one_reason_regardless_of_user_services(self, monkeypatch): + for payload in ( + {"model_services": []}, + { + "model_services": [ + _model_service("main.svenwb.my-gpt"), + _model_service("temp.erni.claude-opus-4-8"), + ] + }, + ): + monkeypatch.setattr( + db_mod, "_http_get_json", lambda url, token, timeout=10, p=payload: (p, None) + ) + + ids, reason = db_mod.list_model_services(WS, "token") + + assert ids == [] + assert reason == "no `system.ai.*` model services found" + + def test_partial_truncation_exposed_via_discover_too(self, monkeypatch): + # End-to-end: discover_model_services preserves the truncation reason + # so the CLI layer can warn about partial results. + pages = [ + ( + { + "model_services": [_model_service("system.ai.gpt-5")], + "next_page_token": "tok2", + }, + None, + ), + (None, "HTTP 504 Gateway Timeout"), + ] + calls = {"i": 0} + + def fake_get(url, token, timeout=10): + idx = min(calls["i"], len(pages) - 1) + calls["i"] += 1 + return pages[idx] + + monkeypatch.setattr(db_mod, "_MODEL_SERVICES_PAGE_RETRIES", 1) + monkeypatch.setattr(db_mod, "_http_get_json", fake_get) + + claude, codex, gemini, reason = db_mod.discover_model_services(WS, "token") + + assert codex == ["system.ai.gpt-5"] + assert claude == {} + assert gemini == [] + assert reason == "HTTP 504 Gateway Timeout" + + def _foundation_models_payload(names): return { "endpoints": [ diff --git a/tests/test_usage.py b/tests/test_usage.py index d3c36bc..4f759be 100644 --- a/tests/test_usage.py +++ b/tests/test_usage.py @@ -206,6 +206,24 @@ def test_only_databricks_prefix_stripped_for_unknown_tool(self): result = simplify_model_name("opencode", "databricks-claude-sonnet-4") assert result == "claude-sonnet-4" + def test_strips_system_ai_prefix_for_model_services(self): + # Model-services-form ids use a `system.ai.` prefix instead of + # `databricks-`; the simplified label must match the AI Gateway form + # so usage rows stay consistent across discovery paths. + assert simplify_model_name("claude", "system.ai.claude-sonnet-4") == "sonnet-4" + assert simplify_model_name("codex", "system.ai.gpt-5-5") == "5-5" + assert simplify_model_name("gemini", "system.ai.gemini-3-5-flash") == "3-5-flash" + + def test_system_ai_only_stripped_once(self): + # Defensive: a name that *starts with* `system.ai.` and embeds + # `databricks-` further in is not real, but we want to be sure + # we don't double-strip across the two family prefixes. Only the + # leading prefix is removed; the embedded one is left alone. + assert ( + simplify_model_name("claude", "system.ai.databricks-claude-sonnet-4") + == "databricks-claude-sonnet-4" + ) + class TestExtractModelNames: def test_single_model(self):