Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ async def chat_completions(
presence_penalty: float = 0,
top_p: float | None = 1,
top_k: int | None = None,
tools: list[ToolDefinition] | None = None,
tools: list[ToolDefinition | dict[str, Any]] | None = None,
tool_choice: ToolChoice | None = None,
response_format: dict[str, Any] | type[BaseModel] | None = None,
api_version: str = NORMALIZED_API_VERSION,
Expand Down Expand Up @@ -583,10 +583,15 @@ class Country(BaseModel):
# Use provided dictionary format directly
request_body["response_format"] = response_format

# Add tools if provided - convert to UiPath format
# Add tools if provided. A tool already in UiPath wire format (a dict) is
# passed through unchanged so callers can supply an arbitrary JSON schema
# for the parameters; ToolDefinition objects are converted as before.
if tools:
request_body["tools"] = [
self._convert_tool_to_uipath_format(tool) for tool in tools
tool
if isinstance(tool, dict)
else self._convert_tool_to_uipath_format(tool)
for tool in tools
]

# Handle tool_choice
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from uipath.platform.chat import (
AutoToolChoice,
ChatModels,
RequiredToolChoice,
SpecificToolChoice,
ToolDefinition,
ToolFunctionDefinition,
Expand Down Expand Up @@ -369,6 +370,87 @@ async def test_tool_call_required_mocked(self, mock_request, llm_service):
assert result.choices[0].message.tool_calls[0].arguments["name"] == "John"
assert result.choices[0].message.tool_calls[0].arguments["password"] == "1234"

@pytest.mark.asyncio
@patch.object(UiPathLlmChatService, "request_async")
async def test_raw_dict_tool_passthrough_mocked(self, mock_request, llm_service):
"""A tool supplied as a raw dict is sent unchanged, preserving nested schema.

ToolDefinition's converter only emits flat properties, so callers that need
an arbitrary nested JSON schema (e.g. the eval mockers) pass the tool as a
dict already in UiPath wire format. It must reach the gateway verbatim.
"""
mock_response = MagicMock()
mock_response.json.return_value = {
"id": "chatcmpl-raw",
"object": "chat.completion",
"created": 1677858242,
"model": "gpt-4o-mini-2024-07-18",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": None,
"tool_calls": [
{
"id": "call_raw",
"name": "submit_tool_response",
"arguments": {"response": {"items": [{"sku": "A1"}]}},
}
],
},
"finish_reason": "tool_calls",
}
],
"usage": {
"prompt_tokens": 10,
"completion_tokens": 5,
"total_tokens": 15,
"cache_read_input_tokens": None,
},
}
mock_request.return_value = mock_response

nested_tool = {
"name": "submit_tool_response",
"description": "Return the simulated response matching the schema.",
"parameters": {
"type": "object",
"properties": {
"response": {
"type": "object",
"properties": {
"items": {
"type": "array",
"items": {
"type": "object",
"properties": {"sku": {"type": "string"}},
},
}
},
}
},
"required": ["response"],
},
}

result = await llm_service.chat_completions(
messages=[{"role": "user", "content": "go"}],
model=ChatModels.gpt_4_1_mini_2025_04_14,
tools=[nested_tool],
tool_choice=RequiredToolChoice(),
)

mock_request.assert_called_once()
_, kwargs = mock_request.call_args
body = kwargs["json"]
# The dict tool is forwarded byte-for-byte, nested array schema intact.
assert body["tools"] == [nested_tool]
assert body["tool_choice"] == {"type": "required"}
assert result.choices[0].message.tool_calls[0].arguments == {
"response": {"items": [{"sku": "A1"}]}
}

@pytest.mark.asyncio
@patch.object(UiPathLlmChatService, "request_async")
async def test_chat_with_conversation_history_mocked(
Expand Down
29 changes: 13 additions & 16 deletions packages/uipath/src/uipath/eval/mocks/_input_mocker.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,12 @@
from uipath.platform import UiPath
from uipath.platform.chat import UiPathLlmChatService
from uipath.platform.chat._llm_gateway_service import ChatModels
from uipath.platform.chat.llm_gateway import RequiredToolChoice

from .._execution_context import eval_set_run_id_context
from ._mock_context import cache_manager_context
from ._mocker import UiPathInputMockingError
from ._structured_output import build_response_tool, extract_response
from ._types import (
InputMockingStrategy,
)
Expand Down Expand Up @@ -105,14 +107,13 @@ async def generate_llm_input(

prompt = get_input_mocking_prompt(**prompt_generation_args)

response_format = {
"type": "json_schema",
"json_schema": {
"name": "agent_input",
"strict": False,
"schema": input_schema,
},
}
# Request structured output via function calling so it works across all
# model providers (OpenAI, Claude/Bedrock, Gemini); response_format is only
# honored for OpenAI models on the normalized gateway.
response_tool = build_response_tool(
input_schema,
description="Return the simulated agent input matching the required schema.",
)

model_parameters = mocking_strategy.model if mocking_strategy else None
completion_kwargs = (
Expand All @@ -128,7 +129,7 @@ async def generate_llm_input(

if cache_manager is not None:
cache_key_data = {
"response_format": response_format,
"response_tool": response_tool,
"completion_kwargs": completion_kwargs,
"prompt_generation_args": prompt_generation_args,
}
Expand All @@ -144,12 +145,12 @@ async def generate_llm_input(

response = await llm.chat_completions(
[{"role": "user", "content": prompt}],
response_format=response_format,
tools=[response_tool],
tool_choice=RequiredToolChoice(),
**completion_kwargs,
)

generated_input_str = response.choices[0].message.content
result = json.loads(generated_input_str)
result = extract_response(response)

if cache_manager is not None:
cache_manager.set(
Expand All @@ -160,10 +161,6 @@ async def generate_llm_input(
)

return result
except json.JSONDecodeError as e:
raise UiPathInputMockingError(
f"Failed to parse LLM response as JSON: {str(e)}"
) from e
except UiPathInputMockingError:
raise
except Exception as e:
Expand Down
29 changes: 17 additions & 12 deletions packages/uipath/src/uipath/eval/mocks/_llm_mocker.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from uipath.platform import UiPath
from uipath.platform.chat import UiPathLlmChatService
from uipath.platform.chat._llm_gateway_service import ChatModels, _cleanup_schema
from uipath.platform.chat.llm_gateway import RequiredToolChoice

from .._execution_context import (
eval_set_run_id_context,
Expand All @@ -28,6 +29,7 @@
UiPathMockResponseGenerationError,
UiPathNoMockFoundError,
)
from ._structured_output import build_response_tool, extract_response
from ._types import (
ExampleCall,
LLMMockingStrategy,
Expand Down Expand Up @@ -125,14 +127,16 @@ async def response(
"output_schema", TypeAdapter(return_type).json_schema()
)

response_format = {
"type": "json_schema",
"json_schema": {
"name": "OutputSchema",
"strict": False,
"schema": _cleanup_schema(output_schema),
},
}
# Request structured output via function calling so it works across
# all model providers (OpenAI, Claude/Bedrock, Gemini); response_format
# is only honored for OpenAI models on the normalized gateway.
response_tool = build_response_tool(
_cleanup_schema(output_schema),
description=(
"Return the simulated response for tool "
f"'{function_name}' matching the required schema."
),
)
try:
# Safely pull examples from params.
example_calls = params.get("example_calls", [])
Expand Down Expand Up @@ -197,7 +201,7 @@ async def response(
formatted_prompt = PROMPT.format(**prompt_generation_args)

cache_key_data = {
"response_format": response_format,
"response_tool": response_tool,
"completion_kwargs": completion_kwargs,
"prompt_generation_args": prompt_generation_args,
}
Expand All @@ -220,10 +224,11 @@ async def response(
"content": formatted_prompt,
},
],
response_format=response_format,
tools=[response_tool],
tool_choice=RequiredToolChoice(),
**completion_kwargs,
)
result = json.loads(response.choices[0].message.content)
result = extract_response(response)

if cache_manager is not None:
cache_manager.set(
Expand All @@ -235,7 +240,7 @@ async def response(

return result
except Exception as e:
raise UiPathMockResponseGenerationError() from e
raise UiPathMockResponseGenerationError(str(e)) from e
else:
raise UiPathNoMockFoundError(f"Method '{function_name}' is not simulated.")

Expand Down
70 changes: 70 additions & 0 deletions packages/uipath/src/uipath/eval/mocks/_structured_output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
"""Provider-agnostic structured output via LLM function calling.

The normalized LLM Gateway honors OpenAI-style ``response_format`` (json_schema)
only for OpenAI models. Non-OpenAI providers (Anthropic/Claude via Bedrock,
Gemini) return such requests with ``choices[0].message.content`` empty/None,
which breaks JSON parsing. Function calling is honored across all providers, so
the mockers request structured output as a forced tool call and read the result
from the tool call's parsed arguments.
"""

from typing import Any

RESPONSE_TOOL_NAME = "submit_tool_response"
RESPONSE_KEY = "response"


def build_response_tool(schema: dict[str, Any], description: str) -> dict[str, Any]:
"""Build a normalized-API function tool that wraps ``schema`` under ``response``.

Tool-call arguments are always a JSON object, so an arbitrary output schema
(which may be a scalar, array, or object) is nested under a single
``response`` property and unwrapped after the call.

Schemas from nested Pydantic models carry root ``$defs`` referenced by
``$ref`` values like ``#/$defs/Item``. Those ``$ref`` paths resolve from the
parameters root, so ``$defs`` is hoisted there instead of being buried under
``response`` (which would leave the references dangling).
"""
response_schema = dict(schema)
parameters: dict[str, Any] = {
"type": "object",
"properties": {RESPONSE_KEY: response_schema},
"required": [RESPONSE_KEY],
}
defs = response_schema.pop("$defs", None)
if defs is not None:
parameters["$defs"] = defs

return {
"name": RESPONSE_TOOL_NAME,
"description": description,
"parameters": parameters,
}


def extract_response(response: Any) -> Any:
"""Extract the wrapped value from the forced tool call.

Raises:
ValueError: if the response carries no usable tool call or is missing the
wrapped ``response`` key.
"""
choices = getattr(response, "choices", None)
if not choices:
raise ValueError("LLM response contained no choices")

message = choices[0].message
tool_calls = getattr(message, "tool_calls", None)
if not tool_calls:
raise ValueError(
f"LLM response contained no tool calls (content={message.content!r})"
)

arguments = tool_calls[0].arguments
if RESPONSE_KEY not in arguments:
raise ValueError(
f"Tool call arguments missing '{RESPONSE_KEY}' key: {arguments}"
)

return arguments[RESPONSE_KEY]
26 changes: 23 additions & 3 deletions packages/uipath/tests/cli/eval/mocks/test_input_mocker.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,18 @@ async def test_generate_llm_input_with_model_settings(
"index": 0,
"message": {
"role": "assistant",
"content": '{"query": "Calculate 5 times 7"}',
"tool_calls": None,
"content": None,
"tool_calls": [
{
"id": "call_1",
"name": "submit_tool_response",
"arguments": {
"response": {"query": "Calculate 5 times 7"}
},
}
],
},
"finish_reason": "stop",
"finish_reason": "tool_calls",
}
],
"usage": {
Expand All @@ -112,3 +120,15 @@ async def test_generate_llm_input_with_model_settings(
assert len(chat_completion_requests) == 1, (
"Expected exactly one chat completion request"
)

# Structured output is requested via function calling (provider-agnostic),
# not via response_format which the gateway only honors for OpenAI models.
import json

body = json.loads(chat_completion_requests[0].content.decode("utf-8"))
assert "response_format" not in body
assert body["tool_choice"] == {"type": "required"}
tools = body["tools"]
assert len(tools) == 1
assert tools[0]["name"] == "submit_tool_response"
assert tools[0]["parameters"]["properties"]["response"] == input_schema
Loading
Loading