Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion src/ucode/agents/claude.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,11 @@ def _resolve_web_search_model(state: dict) -> str | None:


WEB_SEARCH_MCP_NAME = "web_search"
_CLAUDE_MODEL_RE = re.compile(r"^databricks-claude-(opus|sonnet)-(\d+)-(\d+)(.*)$")
# Matches both the AI Gateway form (`databricks-claude-opus-4-8`) and the UC
# model-services form (`system.ai.claude-opus-4-8`).
_CLAUDE_MODEL_RE = re.compile(
r"^(?:system\.ai\.)?(?:databricks-)?claude-(opus|sonnet)-(\d+)-(\d+)(.*)$"
)

# Env keys the MLflow Stop hook reads to route traces. Written into the
# settings `env` block alongside the hook itself.
Expand Down
136 changes: 133 additions & 3 deletions src/ucode/agents/codex.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from __future__ import annotations

import json
import os
import re
from pathlib import Path
Expand All @@ -13,6 +14,7 @@
backup_existing_file,
deep_merge_dict,
read_toml_safe,
write_text_file,
write_toml_file,
)
from ucode.databricks import (
Expand All @@ -29,6 +31,14 @@
CODEX_BACKUP_PATH = APP_DIR / "codex-ucode-config.backup.toml"
LEGACY_CODEX_CONFIG_PATH = CODEX_CONFIG_DIR / "config.toml"
LEGACY_CODEX_BACKUP_PATH = APP_DIR / "codex-config.backup.toml"
# Static model catalog written when the workspace uses UC model-services
# discovery. Pointing Codex at this file via `model_catalog_json` switches it
# to `StaticModelsManager`, which bypasses the `GET /v1/models` listing the
# AI Gateway currently rejects for `system.ai.*` (it requires a
# `Databricks-Model-Provider-Service` header that Codex doesn't send). Schema
# documented at openai/codex#14757; ucode emits the minimum required fields
# and lets Codex fill in the rest from `model_info_from_slug`-style defaults.
CODEX_MODEL_CATALOG_PATH = APP_DIR / "codex-model-catalog.json"
CODEX_MODEL_PROVIDER_NAME = "ucode-databricks"
MINIMUM_CODEX_VERSION = (0, 134, 0)
MINIMUM_CODEX_VERSION_TEXT = "0.134.0"
Expand All @@ -45,6 +55,7 @@
MANAGED_KEYS: list[list[str]] = [
["model_provider"],
["model"],
["model_catalog_json"],
["model_providers", CODEX_MODEL_PROVIDER_NAME],
["model_providers", CODEX_MODEL_PROVIDER_NAME, "http_headers"],
]
Expand Down Expand Up @@ -120,12 +131,111 @@ def _provider_block(workspace: str, databricks_profile: str | None) -> dict:
}


def _model_catalog_entry(slug: str) -> dict:
"""Minimum-viable Codex `ModelInfo` for a UC model-services slug.

Codex's `model_catalog_json` deserializer requires every non-`#[serde(default)]`
field to be present (Option-typed fields accept `null`). We surface the
minimum required keys plus a few optional ones (`context_window`,
`web_search_tool_type`, `input_modalities`) chosen to match Codex's own
fallback defaults so behaviour matches what users would have got from the
`GET /v1/models` listing.

Schema reference: openai/codex protocol/src/openai_models.rs:257-315 and
issue openai/codex#14757 for the field-by-field contract.
"""
return {
"slug": slug,
"display_name": slug,
"description": None,
# Empty list keeps the picker quiet without claiming reasoning support.
"supported_reasoning_levels": [],
"shell_type": "default",
"visibility": "list",
"supported_in_api": True,
# Identical priority across entries — the picker breaks ties by slug
# which matches the alphabetic order users already see today.
"priority": 10,
"availability_nux": None,
"upgrade": None,
# Empty `base_instructions` leaves the bundled system prompt in place
# (Codex falls back to its default when the field is empty after
# personality substitution; see client.rs reasoning-field gating notes).
"base_instructions": "",
# Conservative defaults: don't claim reasoning summaries or verbosity
# support, since the gateway-fronted models may not implement either.
# Users can still send the request; this just keeps Codex from
# serializing fields the model can't honour.
"supports_reasoning_summaries": False,
"support_verbosity": False,
"default_verbosity": None,
# `freeform` enables apply_patch with the lark grammar that GPT-5
# variants are trained on. Setting this to null would silently drop
# apply_patch from the tool list, which is a worse UX than a runtime
# error if a particular model doesn't support it.
"apply_patch_tool_type": "freeform",
"truncation_policy": {"mode": "bytes", "limit": 10000},
"supports_parallel_tool_calls": True,
"experimental_supported_tools": [],
}


def build_model_catalog(codex_models: list[str] | None) -> dict | None:
"""Build a `{"models": [...]}` document, or None if there are no entries.

Codex rejects empty catalogs at startup, so we return None (and therefore
skip writing the file at all) when discovery returned no GPT models.
"""
if not codex_models:
return None
return {"models": [_model_catalog_entry(slug) for slug in codex_models]}


def _write_model_catalog_file(state: dict) -> Path | None:
"""Materialise the static catalog when this workspace uses model-services.

Returns the catalog path so the caller can wire it into the toml overlay.
Returns None when the workspace is on the AI-gateway path, in which case
we also remove any stale catalog left over from a previous configure run.
"""
if not state.get("use_model_services"):
if CODEX_MODEL_CATALOG_PATH.exists():
try:
CODEX_MODEL_CATALOG_PATH.unlink()
except OSError:
# Non-fatal — Codex just re-fetches via /v1/models when
# `model_catalog_json` is unset. Leave the stale file alone.
pass
return None
catalog = build_model_catalog(state.get("codex_models"))
if catalog is None:
return None
write_text_file(CODEX_MODEL_CATALOG_PATH, json.dumps(catalog, indent=2))
return CODEX_MODEL_CATALOG_PATH


def revert_model_catalog_file() -> bool:
"""Delete the static catalog written during configure. Idempotent."""
if not CODEX_MODEL_CATALOG_PATH.exists():
return False
try:
CODEX_MODEL_CATALOG_PATH.unlink()
return True
except OSError:
return False


def render_overlay(
workspace: str, model: str | None = None, databricks_profile: str | None = None
workspace: str,
model: str | None = None,
databricks_profile: str | None = None,
model_catalog_path: Path | None = None,
) -> dict:
overlay: dict = {"model_provider": CODEX_MODEL_PROVIDER_NAME}
if model:
overlay["model"] = model
if model_catalog_path is not None:
overlay["model_catalog_json"] = str(model_catalog_path)
overlay["model_providers"] = {
CODEX_MODEL_PROVIDER_NAME: _provider_block(workspace, databricks_profile),
}
Expand Down Expand Up @@ -255,6 +365,10 @@ def _openai_model_id(model: str | None) -> str | None:


def _codex_model_id(model: str | None) -> str | None:
# UC model-services ids (`system.ai.gpt-5`) route by name through the
# gateway, so they must be sent verbatim — not rewritten to an OpenAI id.
if model and model.startswith("system.ai."):
return model
if model in CODEX_OPENAI_ID_INCOMPATIBLE_MODELS:
return model
return _openai_model_id(model)
Expand All @@ -263,7 +377,12 @@ def _codex_model_id(model: str | None) -> str | None:
def _parse_gpt(model: str | None) -> tuple[int, int | None, int | None, str] | None:
if not model:
return None
match = _GPT_RE.fullmatch(model.split("/")[-1])
# Strip the UC model-services prefix so `system.ai.gpt-5` parses for version
# selection; the original id is preserved by callers that need it verbatim.
tail = model.split("/")[-1]
if tail.startswith("system.ai."):
tail = tail[len("system.ai.") :]
match = _GPT_RE.fullmatch(tail)
if not match:
return None
major, minor, patch, suffix = match.groups()
Expand Down Expand Up @@ -296,8 +415,19 @@ def write_tool_config(state: dict, model: str | None = None) -> dict:

_remove_legacy_ucode_profile()
backup_existing_file(CODEX_CONFIG_PATH, CODEX_BACKUP_PATH)
overlay = render_overlay(workspace, chosen_model, databricks_profile)
# Static catalog written iff the workspace is on UC model-services. When
# set, Codex switches to StaticModelsManager and never calls /v1/models —
# which is currently rejected by the AI Gateway for `system.ai.*` ids.
catalog_path = _write_model_catalog_file(state)
overlay = render_overlay(
workspace, chosen_model, databricks_profile, model_catalog_path=catalog_path
)
doc = read_toml_safe(CODEX_CONFIG_PATH)
# Strip a stale `model_catalog_json` if we're no longer in model-services
# mode — `deep_merge_dict` only adds/overwrites keys, it won't clear one
# that the new overlay omits.
if catalog_path is None:
doc.pop("model_catalog_json", None)
deep_merge_dict(doc, overlay)
write_toml_file(CODEX_CONFIG_PATH, doc)
state = mark_tool_managed(state, "codex", MANAGED_KEYS)
Expand Down
59 changes: 47 additions & 12 deletions src/ucode/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,18 @@
from ucode.agents import (
launch as launch_agent,
)
from ucode.agents.codex import revert_legacy_shared_config
from ucode.agents.codex import (
revert_legacy_shared_config,
revert_model_catalog_file,
)
from ucode.agents.pi import PI_SETTINGS_BACKUP_PATH, PI_SETTINGS_PATH
from ucode.config_io import restore_file, set_dry_run
from ucode.databricks import (
build_shared_base_urls,
discover_claude_models,
discover_codex_models,
discover_gemini_models,
discover_model_services,
ensure_ai_gateway_v2,
ensure_databricks_auth,
find_profile_name_for_host,
Expand All @@ -41,6 +45,7 @@
install_databricks_cli,
normalize_workspace_url,
run_databricks_login,
use_model_services,
)
from ucode.mcp import (
MCP_CLIENTS,
Expand Down Expand Up @@ -160,7 +165,13 @@ def configure_shared_state(
don't error out. If ``None``, we resolve it from the host after login.
"""
workspace = normalize_workspace_url(workspace)
previous_workspace = load_state().get("workspace")
prior_state = load_state()
previous_workspace = prior_state.get("workspace")
# The flag is sticky: an explicit env var wins, otherwise fall back to what
# was persisted when the workspace was configured. Without this, every
# launch re-runs discovery and a missing env var would silently revert a
# model-services workspace to the databricks-* gateway names.
model_services = use_model_services(default=bool(prior_state.get("use_model_services")))
fetch_all = tools is None
if force_login:
run_databricks_login(workspace, profile)
Expand All @@ -184,19 +195,29 @@ def configure_shared_state(
claude_reason: str | None = None
gemini_reason: str | None = None
codex_reason: str | None = None
with spinner("Fetching available models..."):
claude_models = {}
gemini_models = []
codex_models = []
if model_services:
# Opt-in: one UC model-services call yields all families as
# `system.ai.<model-name>` ids, bucketed by name. The single reason is
# shared across the families that were requested.
with spinner("Fetching available models (model services)..."):
ms_claude, ms_codex, ms_gemini, ms_reason = discover_model_services(workspace, token)
if want_claude:
claude_models, claude_reason = discover_claude_models(workspace, token)
else:
claude_models = {}
claude_models, claude_reason = ms_claude, ms_reason
if want_gemini:
gemini_models, gemini_reason = discover_gemini_models(workspace, token)
else:
gemini_models = []
gemini_models, gemini_reason = ms_gemini, ms_reason
if want_codex:
codex_models, codex_reason = discover_codex_models(workspace, token)
else:
codex_models = []
codex_models, codex_reason = ms_codex, ms_reason
else:
with spinner("Fetching available models..."):
if want_claude:
claude_models, claude_reason = discover_claude_models(workspace, token)
if want_gemini:
gemini_models, gemini_reason = discover_gemini_models(workspace, token)
if want_codex:
codex_models, codex_reason = discover_codex_models(workspace, token)
opencode_models: dict[str, list[str]] = {}
if claude_models:
opencode_models["anthropic"] = list(claude_models.values())
Expand All @@ -210,6 +231,9 @@ def configure_shared_state(
state["profile"] = profile
else:
state.pop("profile", None)
# Persist the resolved flag so subsequent launches stay on the same
# discovery path without the env var being re-exported.
state["use_model_services"] = model_services
state["base_urls"] = build_shared_base_urls(workspace)
if want_claude:
state["claude_models"] = claude_models
Expand Down Expand Up @@ -371,6 +395,12 @@ def status() -> int:
profile = state.get("profile")
if profile:
print_kv("CLI profile", profile)
print_kv(
"Model discovery",
"model-services (system.ai.*)"
if state.get("use_model_services")
else "ai-gateway (databricks-*)",
)

print_heading("Coding Agents")
for tool, spec in TOOL_SPECS.items():
Expand Down Expand Up @@ -444,6 +474,9 @@ def revert() -> int:
# Older Codex (< 0.134.0) had ucode edit the shared ~/.codex/config.toml in
# place; restoring the per-profile file above does not undo that.
legacy_codex_stripped = revert_legacy_shared_config()
# The static model catalog (only written when the workspace uses UC
# model-services) lives outside the toml backup, so clean it up here.
codex_catalog_removed = revert_model_catalog_file()
clear_state()

print_heading("Revert")
Expand All @@ -452,6 +485,8 @@ def revert() -> int:
print_kv(f"{spec['display']} config", "restored" if results[tool] else "unchanged")
if legacy_codex_stripped:
print_kv("Codex shared config", "ucode entries removed")
if codex_catalog_removed:
print_kv("Codex model catalog", "removed")
print_kv("Pi settings", "restored" if pi_settings_restored else "unchanged")
for client, spec in MCP_CLIENTS.items():
print_kv(
Expand Down
Loading
Loading