diff --git a/packages/uipath-platform/src/uipath/platform/chat/_llm_gateway_service.py b/packages/uipath-platform/src/uipath/platform/chat/_llm_gateway_service.py index ffe0bff99..bc89fd82a 100644 --- a/packages/uipath-platform/src/uipath/platform/chat/_llm_gateway_service.py +++ b/packages/uipath-platform/src/uipath/platform/chat/_llm_gateway_service.py @@ -401,7 +401,7 @@ async def chat_completions( presence_penalty: float = 0, top_p: float | None = 1, top_k: int | None = None, - tools: list[ToolDefinition] | None = None, + tools: list[ToolDefinition | dict[str, Any]] | None = None, tool_choice: ToolChoice | None = None, response_format: dict[str, Any] | type[BaseModel] | None = None, api_version: str = NORMALIZED_API_VERSION, @@ -583,10 +583,15 @@ class Country(BaseModel): # Use provided dictionary format directly request_body["response_format"] = response_format - # Add tools if provided - convert to UiPath format + # Add tools if provided. A tool already in UiPath wire format (a dict) is + # passed through unchanged so callers can supply an arbitrary JSON schema + # for the parameters; ToolDefinition objects are converted as before. if tools: request_body["tools"] = [ - self._convert_tool_to_uipath_format(tool) for tool in tools + tool + if isinstance(tool, dict) + else self._convert_tool_to_uipath_format(tool) + for tool in tools ] # Handle tool_choice diff --git a/packages/uipath-platform/tests/services/test_uipath_llm_integration.py b/packages/uipath-platform/tests/services/test_uipath_llm_integration.py index 124ccad8b..9e2292c60 100644 --- a/packages/uipath-platform/tests/services/test_uipath_llm_integration.py +++ b/packages/uipath-platform/tests/services/test_uipath_llm_integration.py @@ -7,6 +7,7 @@ from uipath.platform.chat import ( AutoToolChoice, ChatModels, + RequiredToolChoice, SpecificToolChoice, ToolDefinition, ToolFunctionDefinition, @@ -369,6 +370,87 @@ async def test_tool_call_required_mocked(self, mock_request, llm_service): assert result.choices[0].message.tool_calls[0].arguments["name"] == "John" assert result.choices[0].message.tool_calls[0].arguments["password"] == "1234" + @pytest.mark.asyncio + @patch.object(UiPathLlmChatService, "request_async") + async def test_raw_dict_tool_passthrough_mocked(self, mock_request, llm_service): + """A tool supplied as a raw dict is sent unchanged, preserving nested schema. + + ToolDefinition's converter only emits flat properties, so callers that need + an arbitrary nested JSON schema (e.g. the eval mockers) pass the tool as a + dict already in UiPath wire format. It must reach the gateway verbatim. + """ + mock_response = MagicMock() + mock_response.json.return_value = { + "id": "chatcmpl-raw", + "object": "chat.completion", + "created": 1677858242, + "model": "gpt-4o-mini-2024-07-18", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": None, + "tool_calls": [ + { + "id": "call_raw", + "name": "submit_tool_response", + "arguments": {"response": {"items": [{"sku": "A1"}]}}, + } + ], + }, + "finish_reason": "tool_calls", + } + ], + "usage": { + "prompt_tokens": 10, + "completion_tokens": 5, + "total_tokens": 15, + "cache_read_input_tokens": None, + }, + } + mock_request.return_value = mock_response + + nested_tool = { + "name": "submit_tool_response", + "description": "Return the simulated response matching the schema.", + "parameters": { + "type": "object", + "properties": { + "response": { + "type": "object", + "properties": { + "items": { + "type": "array", + "items": { + "type": "object", + "properties": {"sku": {"type": "string"}}, + }, + } + }, + } + }, + "required": ["response"], + }, + } + + result = await llm_service.chat_completions( + messages=[{"role": "user", "content": "go"}], + model=ChatModels.gpt_4_1_mini_2025_04_14, + tools=[nested_tool], + tool_choice=RequiredToolChoice(), + ) + + mock_request.assert_called_once() + _, kwargs = mock_request.call_args + body = kwargs["json"] + # The dict tool is forwarded byte-for-byte, nested array schema intact. + assert body["tools"] == [nested_tool] + assert body["tool_choice"] == {"type": "required"} + assert result.choices[0].message.tool_calls[0].arguments == { + "response": {"items": [{"sku": "A1"}]} + } + @pytest.mark.asyncio @patch.object(UiPathLlmChatService, "request_async") async def test_chat_with_conversation_history_mocked( diff --git a/packages/uipath/src/uipath/eval/mocks/_input_mocker.py b/packages/uipath/src/uipath/eval/mocks/_input_mocker.py index 57a727ec1..3c4daac51 100644 --- a/packages/uipath/src/uipath/eval/mocks/_input_mocker.py +++ b/packages/uipath/src/uipath/eval/mocks/_input_mocker.py @@ -11,10 +11,12 @@ from uipath.platform import UiPath from uipath.platform.chat import UiPathLlmChatService from uipath.platform.chat._llm_gateway_service import ChatModels +from uipath.platform.chat.llm_gateway import RequiredToolChoice from .._execution_context import eval_set_run_id_context from ._mock_context import cache_manager_context from ._mocker import UiPathInputMockingError +from ._structured_output import build_response_tool, extract_response from ._types import ( InputMockingStrategy, ) @@ -105,14 +107,13 @@ async def generate_llm_input( prompt = get_input_mocking_prompt(**prompt_generation_args) - response_format = { - "type": "json_schema", - "json_schema": { - "name": "agent_input", - "strict": False, - "schema": input_schema, - }, - } + # Request structured output via function calling so it works across all + # model providers (OpenAI, Claude/Bedrock, Gemini); response_format is only + # honored for OpenAI models on the normalized gateway. + response_tool = build_response_tool( + input_schema, + description="Return the simulated agent input matching the required schema.", + ) model_parameters = mocking_strategy.model if mocking_strategy else None completion_kwargs = ( @@ -128,7 +129,7 @@ async def generate_llm_input( if cache_manager is not None: cache_key_data = { - "response_format": response_format, + "response_tool": response_tool, "completion_kwargs": completion_kwargs, "prompt_generation_args": prompt_generation_args, } @@ -144,12 +145,12 @@ async def generate_llm_input( response = await llm.chat_completions( [{"role": "user", "content": prompt}], - response_format=response_format, + tools=[response_tool], + tool_choice=RequiredToolChoice(), **completion_kwargs, ) - generated_input_str = response.choices[0].message.content - result = json.loads(generated_input_str) + result = extract_response(response) if cache_manager is not None: cache_manager.set( @@ -160,10 +161,6 @@ async def generate_llm_input( ) return result - except json.JSONDecodeError as e: - raise UiPathInputMockingError( - f"Failed to parse LLM response as JSON: {str(e)}" - ) from e except UiPathInputMockingError: raise except Exception as e: diff --git a/packages/uipath/src/uipath/eval/mocks/_llm_mocker.py b/packages/uipath/src/uipath/eval/mocks/_llm_mocker.py index d1fd2a1c9..ce932da11 100644 --- a/packages/uipath/src/uipath/eval/mocks/_llm_mocker.py +++ b/packages/uipath/src/uipath/eval/mocks/_llm_mocker.py @@ -11,6 +11,7 @@ from uipath.platform import UiPath from uipath.platform.chat import UiPathLlmChatService from uipath.platform.chat._llm_gateway_service import ChatModels, _cleanup_schema +from uipath.platform.chat.llm_gateway import RequiredToolChoice from .._execution_context import ( eval_set_run_id_context, @@ -28,6 +29,7 @@ UiPathMockResponseGenerationError, UiPathNoMockFoundError, ) +from ._structured_output import build_response_tool, extract_response from ._types import ( ExampleCall, LLMMockingStrategy, @@ -125,14 +127,16 @@ async def response( "output_schema", TypeAdapter(return_type).json_schema() ) - response_format = { - "type": "json_schema", - "json_schema": { - "name": "OutputSchema", - "strict": False, - "schema": _cleanup_schema(output_schema), - }, - } + # Request structured output via function calling so it works across + # all model providers (OpenAI, Claude/Bedrock, Gemini); response_format + # is only honored for OpenAI models on the normalized gateway. + response_tool = build_response_tool( + _cleanup_schema(output_schema), + description=( + "Return the simulated response for tool " + f"'{function_name}' matching the required schema." + ), + ) try: # Safely pull examples from params. example_calls = params.get("example_calls", []) @@ -197,7 +201,7 @@ async def response( formatted_prompt = PROMPT.format(**prompt_generation_args) cache_key_data = { - "response_format": response_format, + "response_tool": response_tool, "completion_kwargs": completion_kwargs, "prompt_generation_args": prompt_generation_args, } @@ -220,10 +224,11 @@ async def response( "content": formatted_prompt, }, ], - response_format=response_format, + tools=[response_tool], + tool_choice=RequiredToolChoice(), **completion_kwargs, ) - result = json.loads(response.choices[0].message.content) + result = extract_response(response) if cache_manager is not None: cache_manager.set( @@ -235,7 +240,7 @@ async def response( return result except Exception as e: - raise UiPathMockResponseGenerationError() from e + raise UiPathMockResponseGenerationError(str(e)) from e else: raise UiPathNoMockFoundError(f"Method '{function_name}' is not simulated.") diff --git a/packages/uipath/src/uipath/eval/mocks/_structured_output.py b/packages/uipath/src/uipath/eval/mocks/_structured_output.py new file mode 100644 index 000000000..424935190 --- /dev/null +++ b/packages/uipath/src/uipath/eval/mocks/_structured_output.py @@ -0,0 +1,70 @@ +"""Provider-agnostic structured output via LLM function calling. + +The normalized LLM Gateway honors OpenAI-style ``response_format`` (json_schema) +only for OpenAI models. Non-OpenAI providers (Anthropic/Claude via Bedrock, +Gemini) return such requests with ``choices[0].message.content`` empty/None, +which breaks JSON parsing. Function calling is honored across all providers, so +the mockers request structured output as a forced tool call and read the result +from the tool call's parsed arguments. +""" + +from typing import Any + +RESPONSE_TOOL_NAME = "submit_tool_response" +RESPONSE_KEY = "response" + + +def build_response_tool(schema: dict[str, Any], description: str) -> dict[str, Any]: + """Build a normalized-API function tool that wraps ``schema`` under ``response``. + + Tool-call arguments are always a JSON object, so an arbitrary output schema + (which may be a scalar, array, or object) is nested under a single + ``response`` property and unwrapped after the call. + + Schemas from nested Pydantic models carry root ``$defs`` referenced by + ``$ref`` values like ``#/$defs/Item``. Those ``$ref`` paths resolve from the + parameters root, so ``$defs`` is hoisted there instead of being buried under + ``response`` (which would leave the references dangling). + """ + response_schema = dict(schema) + parameters: dict[str, Any] = { + "type": "object", + "properties": {RESPONSE_KEY: response_schema}, + "required": [RESPONSE_KEY], + } + defs = response_schema.pop("$defs", None) + if defs is not None: + parameters["$defs"] = defs + + return { + "name": RESPONSE_TOOL_NAME, + "description": description, + "parameters": parameters, + } + + +def extract_response(response: Any) -> Any: + """Extract the wrapped value from the forced tool call. + + Raises: + ValueError: if the response carries no usable tool call or is missing the + wrapped ``response`` key. + """ + choices = getattr(response, "choices", None) + if not choices: + raise ValueError("LLM response contained no choices") + + message = choices[0].message + tool_calls = getattr(message, "tool_calls", None) + if not tool_calls: + raise ValueError( + f"LLM response contained no tool calls (content={message.content!r})" + ) + + arguments = tool_calls[0].arguments + if RESPONSE_KEY not in arguments: + raise ValueError( + f"Tool call arguments missing '{RESPONSE_KEY}' key: {arguments}" + ) + + return arguments[RESPONSE_KEY] diff --git a/packages/uipath/tests/cli/eval/mocks/test_input_mocker.py b/packages/uipath/tests/cli/eval/mocks/test_input_mocker.py index 72b3765df..181a14b6a 100644 --- a/packages/uipath/tests/cli/eval/mocks/test_input_mocker.py +++ b/packages/uipath/tests/cli/eval/mocks/test_input_mocker.py @@ -83,10 +83,18 @@ async def test_generate_llm_input_with_model_settings( "index": 0, "message": { "role": "assistant", - "content": '{"query": "Calculate 5 times 7"}', - "tool_calls": None, + "content": None, + "tool_calls": [ + { + "id": "call_1", + "name": "submit_tool_response", + "arguments": { + "response": {"query": "Calculate 5 times 7"} + }, + } + ], }, - "finish_reason": "stop", + "finish_reason": "tool_calls", } ], "usage": { @@ -112,3 +120,15 @@ async def test_generate_llm_input_with_model_settings( assert len(chat_completion_requests) == 1, ( "Expected exactly one chat completion request" ) + + # Structured output is requested via function calling (provider-agnostic), + # not via response_format which the gateway only honors for OpenAI models. + import json + + body = json.loads(chat_completion_requests[0].content.decode("utf-8")) + assert "response_format" not in body + assert body["tool_choice"] == {"type": "required"} + tools = body["tools"] + assert len(tools) == 1 + assert tools[0]["name"] == "submit_tool_response" + assert tools[0]["parameters"]["properties"]["response"] == input_schema diff --git a/packages/uipath/tests/cli/eval/mocks/test_input_mocker_span.py b/packages/uipath/tests/cli/eval/mocks/test_input_mocker_span.py index 19a432fef..aeffa2d1e 100644 --- a/packages/uipath/tests/cli/eval/mocks/test_input_mocker_span.py +++ b/packages/uipath/tests/cli/eval/mocks/test_input_mocker_span.py @@ -57,10 +57,21 @@ async def test_simulate_input_span_attributes(httpx_mock: HTTPXMock, monkeypatch "index": 0, "message": { "role": "assistant", - "content": '{"name": "Alice", "greeting_style": "formal"}', - "tool_calls": None, + "content": None, + "tool_calls": [ + { + "id": "call_1", + "name": "submit_tool_response", + "arguments": { + "response": { + "name": "Alice", + "greeting_style": "formal", + } + }, + } + ], }, - "finish_reason": "stop", + "finish_reason": "tool_calls", } ], "usage": { @@ -199,10 +210,17 @@ async def test_simulate_input_span_on_error(httpx_mock: HTTPXMock, monkeypatch): "index": 0, "message": { "role": "assistant", - "content": "invalid json{{{", # Invalid JSON - "tool_calls": None, + # Malformed: tool call is missing the wrapped "response" key + "content": None, + "tool_calls": [ + { + "id": "call_1", + "name": "submit_tool_response", + "arguments": {}, + } + ], }, - "finish_reason": "stop", + "finish_reason": "tool_calls", } ], "usage": { diff --git a/packages/uipath/tests/cli/eval/mocks/test_mocks.py b/packages/uipath/tests/cli/eval/mocks/test_mocks.py index c4bc26ee3..521871d5f 100644 --- a/packages/uipath/tests/cli/eval/mocks/test_mocks.py +++ b/packages/uipath/tests/cli/eval/mocks/test_mocks.py @@ -569,11 +569,17 @@ def foofoo(*args, **kwargs): { "index": 0, "message": { - "role": "ai", - "content": '"bar1"', - "tool_calls": None, + "role": "assistant", + "content": None, + "tool_calls": [ + { + "id": "call_1", + "name": "submit_tool_response", + "arguments": {"response": "bar1"}, + } + ], }, - "finish_reason": "EOS", + "finish_reason": "tool_calls", } ], "usage": { @@ -599,14 +605,13 @@ def foofoo(*args, **kwargs): mock_request = httpx_mock.get_request(method="POST") assert mock_request request = json.loads(mock_request.content.decode("utf-8")) - assert request["response_format"] == { - "type": "json_schema", - "json_schema": { - "name": "OutputSchema", - "strict": False, - "schema": {"type": "string"}, - }, - } + assert "response_format" not in request + assert request["tool_choice"] == {"type": "required"} + tools = request["tools"] + assert len(tools) == 1 + assert tools[0]["name"] == "submit_tool_response" + assert tools[0]["parameters"]["properties"]["response"] == {"type": "string"} + assert tools[0]["parameters"]["required"] == ["response"] with pytest.raises(NotImplementedError): assert foofoo() @@ -678,11 +683,17 @@ async def foofoo(*args, **kwargs): { "index": 0, "message": { - "role": "ai", - "content": '"bar1"', - "tool_calls": None, + "role": "assistant", + "content": None, + "tool_calls": [ + { + "id": "call_1", + "name": "submit_tool_response", + "arguments": {"response": "bar1"}, + } + ], }, - "finish_reason": "EOS", + "finish_reason": "tool_calls", } ], "usage": { @@ -708,14 +719,13 @@ async def foofoo(*args, **kwargs): mock_request = httpx_mock.get_request() assert mock_request request = json.loads(mock_request.content.decode("utf-8")) - assert request["response_format"] == { - "type": "json_schema", - "json_schema": { - "name": "OutputSchema", - "strict": False, - "schema": {"type": "string"}, - }, - } + assert "response_format" not in request + assert request["tool_choice"] == {"type": "required"} + tools = request["tools"] + assert len(tools) == 1 + assert tools[0]["name"] == "submit_tool_response" + assert tools[0]["parameters"]["properties"]["response"] == {"type": "string"} + assert tools[0]["parameters"]["required"] == ["response"] with pytest.raises(NotImplementedError): assert await foofoo() @@ -786,11 +796,17 @@ def foo(*args, **kwargs) -> dict[str, Any]: { "index": 0, "message": { - "role": "ai", - "content": '{"content": "bar1"}', - "tool_calls": None, + "role": "assistant", + "content": None, + "tool_calls": [ + { + "id": "call_1", + "name": "submit_tool_response", + "arguments": {"response": {"content": "bar1"}}, + } + ], }, - "finish_reason": "EOS", + "finish_reason": "tool_calls", } ], "usage": { @@ -815,19 +831,18 @@ def foo(*args, **kwargs) -> dict[str, Any]: mock_request = httpx_mock.get_request() assert mock_request request = json.loads(mock_request.content.decode("utf-8")) - assert request["response_format"] == { - "type": "json_schema", - "json_schema": { - "name": "OutputSchema", - "strict": False, - "schema": { - "required": ["content"], - "type": "object", - "additionalProperties": False, - "properties": {"content": {"type": "string"}}, - }, - }, + assert "response_format" not in request + assert request["tool_choice"] == {"type": "required"} + tools = request["tools"] + assert len(tools) == 1 + assert tools[0]["name"] == "submit_tool_response" + assert tools[0]["parameters"]["properties"]["response"] == { + "required": ["content"], + "type": "object", + "additionalProperties": False, + "properties": {"content": {"type": "string"}}, } + assert tools[0]["parameters"]["required"] == ["response"] @pytest.mark.asyncio @@ -887,11 +902,17 @@ async def foo(*args, **kwargs) -> dict[str, Any]: { "index": 0, "message": { - "role": "ai", - "content": '{"content": "bar1"}', - "tool_calls": None, + "role": "assistant", + "content": None, + "tool_calls": [ + { + "id": "call_1", + "name": "submit_tool_response", + "arguments": {"response": {"content": "bar1"}}, + } + ], }, - "finish_reason": "EOS", + "finish_reason": "tool_calls", } ], "usage": { @@ -916,19 +937,136 @@ async def foo(*args, **kwargs) -> dict[str, Any]: mock_request = httpx_mock.get_request() assert mock_request request = json.loads(mock_request.content.decode("utf-8")) - assert request["response_format"] == { - "type": "json_schema", - "json_schema": { - "name": "OutputSchema", - "strict": False, - "schema": { - "required": ["content"], - "type": "object", - "additionalProperties": False, - "properties": {"content": {"type": "string"}}, - }, + assert "response_format" not in request + assert request["tool_choice"] == {"type": "required"} + tools = request["tools"] + assert len(tools) == 1 + assert tools[0]["name"] == "submit_tool_response" + assert tools[0]["parameters"]["properties"]["response"] == { + "required": ["content"], + "type": "object", + "additionalProperties": False, + "properties": {"content": {"type": "string"}}, + } + assert tools[0]["parameters"]["required"] == ["response"] + + +@pytest.mark.parametrize( + "model", + [ + "gpt-4.1-mini-2025-04-14", + "anthropic.claude-sonnet-4-5-20250929-v1:0", + "gemini-2.5-pro", + ], +) +@pytest.mark.asyncio +@pytest.mark.httpx_mock(assert_all_responses_were_requested=False) +async def test_llm_mockable_structured_output_via_tool_call( + model: str, httpx_mock: HTTPXMock, monkeypatch: MonkeyPatch +): + """Tool simulation must work for all model providers (AE-1646). + + The mocker requests structured output via function calling and reads the + result from the forced tool call's arguments, so it does not depend on the + OpenAI-only ``choices[0].message.content`` shape. Non-OpenAI providers + (Claude/Bedrock, Gemini) return structured output through ``tool_calls`` with + ``content`` set to ``None``; that must not raise. + """ + monkeypatch.setenv("UIPATH_URL", "https://example.com") + monkeypatch.setenv("UIPATH_ACCESS_TOKEN", "1234567890") + monkeypatch.setattr(CacheManager, "get", lambda *args, **kwargs: None) + monkeypatch.setattr(CacheManager, "set", lambda *args, **kwargs: None) + + @mockable() + async def foo(*args, **kwargs) -> str: + raise NotImplementedError() + + evaluation_item: dict[str, Any] = { + "id": "evaluation-id", + "name": "Mock foo", + "inputs": {}, + "evaluationCriterias": { + "ExactMatchEvaluator": None, + }, + "mockingStrategy": { + "type": "llm", + "prompt": "response is 'bar1'", + "toolsToSimulate": [{"name": "foo"}], + "model": {"model": model}, }, } + evaluation = EvaluationItem(**evaluation_item) + assert isinstance(evaluation.mocking_strategy, LLMMockingStrategy) + httpx_mock.add_response( + url="https://example.com/agenthub_/llm/api/capabilities", + status_code=200, + json={}, + ) + httpx_mock.add_response( + url="https://example.com/orchestrator_/llm/api/capabilities", + status_code=200, + json={}, + ) + + httpx_mock.add_response( + url="https://example.com/llm/api/chat/completions" + "?api-version=2024-08-01-preview", + status_code=200, + json={ + "id": "response-id", + "object": "", + "created": 0, + "model": model, + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": None, + "tool_calls": [ + { + "id": "call_1", + "name": "submit_tool_response", + "arguments": {"response": "bar1"}, + } + ], + }, + "finish_reason": "tool_calls", + } + ], + "usage": { + "prompt_tokens": 1, + "completion_tokens": 1, + "total_tokens": 2, + }, + }, + ) + + set_execution_context( + MockingContext( + strategy=evaluation.mocking_strategy, + name=evaluation.name, + inputs=evaluation.inputs, + ), + _mock_span_collector, + "test-execution-id", + ) + + assert await foo() == "bar1" + + mock_request = httpx_mock.get_request(method="POST") + assert mock_request + request = json.loads(mock_request.content.decode("utf-8")) + # Structured output is requested via function calling, not response_format, + # so it works across all providers. + assert "response_format" not in request + assert request["tool_choice"] == {"type": "required"} + assert mock_request.headers["X-UiPath-LlmGateway-NormalizedApi-ModelName"] == model + tools = request["tools"] + assert len(tools) == 1 + assert tools[0]["name"] == "submit_tool_response" + assert tools[0]["parameters"]["properties"]["response"] == {"type": "string"} + assert tools[0]["parameters"]["required"] == ["response"] class TestUiPathMockRuntime: diff --git a/packages/uipath/tests/cli/eval/mocks/test_structured_output.py b/packages/uipath/tests/cli/eval/mocks/test_structured_output.py new file mode 100644 index 000000000..5cb0fc1fb --- /dev/null +++ b/packages/uipath/tests/cli/eval/mocks/test_structured_output.py @@ -0,0 +1,73 @@ +"""Unit tests for the provider-agnostic structured-output helpers.""" + +from types import SimpleNamespace + +import pytest + +from uipath.eval.mocks._structured_output import ( + RESPONSE_KEY, + RESPONSE_TOOL_NAME, + build_response_tool, + extract_response, +) + + +def _response(message: SimpleNamespace | None) -> SimpleNamespace: + choices = [] if message is None else [SimpleNamespace(message=message)] + return SimpleNamespace(choices=choices) + + +def test_build_response_tool_wraps_schema_under_response(): + tool = build_response_tool({"type": "string"}, description="desc") + assert tool["name"] == RESPONSE_TOOL_NAME + assert tool["description"] == "desc" + assert tool["parameters"]["properties"][RESPONSE_KEY] == {"type": "string"} + assert tool["parameters"]["required"] == [RESPONSE_KEY] + + +def test_build_response_tool_hoists_defs_to_root(): + # Nested Pydantic models emit root $defs + $ref. Wrapping the schema under + # "response" must hoist $defs to the tool-parameters root so "#/$defs/Item" + # still resolves; otherwise nested-model schemas are invalid. + item_def = {"type": "object", "properties": {"sku": {"type": "string"}}} + schema = { + "type": "object", + "properties": {"items": {"type": "array", "items": {"$ref": "#/$defs/Item"}}}, + "$defs": {"Item": item_def}, + } + + tool = build_response_tool(schema, description="d") + params = tool["parameters"] + + assert params["$defs"] == {"Item": item_def} + assert "$defs" not in params["properties"][RESPONSE_KEY] + # the caller's schema dict is not mutated + assert "$defs" in schema + + +def test_extract_response_returns_wrapped_value(): + message = SimpleNamespace( + content=None, + tool_calls=[SimpleNamespace(arguments={RESPONSE_KEY: {"a": 1}})], + ) + assert extract_response(_response(message)) == {"a": 1} + + +def test_extract_response_raises_when_no_choices(): + with pytest.raises(ValueError, match="no choices"): + extract_response(_response(None)) + + +def test_extract_response_raises_when_no_tool_calls(): + # Non-OpenAI text response without a tool call: surface a clear error. + message = SimpleNamespace(content="not a tool call", tool_calls=None) + with pytest.raises(ValueError, match="no tool calls"): + extract_response(_response(message)) + + +def test_extract_response_raises_when_response_key_missing(): + message = SimpleNamespace( + content=None, tool_calls=[SimpleNamespace(arguments={"other": 1})] + ) + with pytest.raises(ValueError, match=RESPONSE_KEY): + extract_response(_response(message))