diff --git a/packages/uipath-platform/pyproject.toml b/packages/uipath-platform/pyproject.toml
index 215882460..dfc759f4d 100644
--- a/packages/uipath-platform/pyproject.toml
+++ b/packages/uipath-platform/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "uipath-platform"
-version = "0.1.59"
+version = "0.1.60"
 description = "HTTP client library for programmatic access to UiPath Platform"
 readme = { file = "README.md", content-type = "text/markdown" }
 requires-python = ">=3.11"
diff --git a/packages/uipath-platform/src/uipath/platform/chat/_llm_gateway_service.py b/packages/uipath-platform/src/uipath/platform/chat/_llm_gateway_service.py
index ffe0bff99..bc89fd82a 100644
--- a/packages/uipath-platform/src/uipath/platform/chat/_llm_gateway_service.py
+++ b/packages/uipath-platform/src/uipath/platform/chat/_llm_gateway_service.py
@@ -401,7 +401,7 @@ async def chat_completions(
         presence_penalty: float = 0,
         top_p: float | None = 1,
         top_k: int | None = None,
-        tools: list[ToolDefinition] | None = None,
+        tools: list[ToolDefinition | dict[str, Any]] | None = None,
         tool_choice: ToolChoice | None = None,
         response_format: dict[str, Any] | type[BaseModel] | None = None,
         api_version: str = NORMALIZED_API_VERSION,
@@ -583,10 +583,15 @@ class Country(BaseModel):
                 # Use provided dictionary format directly
                 request_body["response_format"] = response_format
 
-        # Add tools if provided - convert to UiPath format
+        # Add tools if provided. A tool already in UiPath wire format (a dict) is
+        # passed through unchanged so callers can supply an arbitrary JSON schema
+        # for the parameters; ToolDefinition objects are converted as before.
         if tools:
             request_body["tools"] = [
-                self._convert_tool_to_uipath_format(tool) for tool in tools
+                tool
+                if isinstance(tool, dict)
+                else self._convert_tool_to_uipath_format(tool)
+                for tool in tools
             ]
 
         # Handle tool_choice
diff --git a/packages/uipath-platform/tests/services/test_uipath_llm_integration.py b/packages/uipath-platform/tests/services/test_uipath_llm_integration.py
index 124ccad8b..9e2292c60 100644
--- a/packages/uipath-platform/tests/services/test_uipath_llm_integration.py
+++ b/packages/uipath-platform/tests/services/test_uipath_llm_integration.py
@@ -7,6 +7,7 @@
 from uipath.platform.chat import (
     AutoToolChoice,
     ChatModels,
+    RequiredToolChoice,
     SpecificToolChoice,
     ToolDefinition,
     ToolFunctionDefinition,
@@ -369,6 +370,87 @@ async def test_tool_call_required_mocked(self, mock_request, llm_service):
         assert result.choices[0].message.tool_calls[0].arguments["name"] == "John"
         assert result.choices[0].message.tool_calls[0].arguments["password"] == "1234"
 
+    @pytest.mark.asyncio
+    @patch.object(UiPathLlmChatService, "request_async")
+    async def test_raw_dict_tool_passthrough_mocked(self, mock_request, llm_service):
+        """A tool supplied as a raw dict is sent unchanged, preserving nested schema.
+
+        ToolDefinition's converter only emits flat properties, so callers that need
+        an arbitrary nested JSON schema (e.g. the eval mockers) pass the tool as a
+        dict already in UiPath wire format. It must reach the gateway verbatim.
+        """
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "id": "chatcmpl-raw",
+            "object": "chat.completion",
+            "created": 1677858242,
+            "model": "gpt-4o-mini-2024-07-18",
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": None,
+                        "tool_calls": [
+                            {
+                                "id": "call_raw",
+                                "name": "submit_tool_response",
+                                "arguments": {"response": {"items": [{"sku": "A1"}]}},
+                            }
+                        ],
+                    },
+                    "finish_reason": "tool_calls",
+                }
+            ],
+            "usage": {
+                "prompt_tokens": 10,
+                "completion_tokens": 5,
+                "total_tokens": 15,
+                "cache_read_input_tokens": None,
+            },
+        }
+        mock_request.return_value = mock_response
+
+        nested_tool = {
+            "name": "submit_tool_response",
+            "description": "Return the simulated response matching the schema.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "response": {
+                        "type": "object",
+                        "properties": {
+                            "items": {
+                                "type": "array",
+                                "items": {
+                                    "type": "object",
+                                    "properties": {"sku": {"type": "string"}},
+                                },
+                            }
+                        },
+                    }
+                },
+                "required": ["response"],
+            },
+        }
+
+        result = await llm_service.chat_completions(
+            messages=[{"role": "user", "content": "go"}],
+            model=ChatModels.gpt_4_1_mini_2025_04_14,
+            tools=[nested_tool],
+            tool_choice=RequiredToolChoice(),
+        )
+
+        mock_request.assert_called_once()
+        _, kwargs = mock_request.call_args
+        body = kwargs["json"]
+        # The dict tool is forwarded byte-for-byte, nested array schema intact.
+        assert body["tools"] == [nested_tool]
+        assert body["tool_choice"] == {"type": "required"}
+        assert result.choices[0].message.tool_calls[0].arguments == {
+            "response": {"items": [{"sku": "A1"}]}
+        }
+
     @pytest.mark.asyncio
     @patch.object(UiPathLlmChatService, "request_async")
     async def test_chat_with_conversation_history_mocked(
diff --git a/packages/uipath-platform/uv.lock b/packages/uipath-platform/uv.lock
index dabbd63ad..084f3efb8 100644
--- a/packages/uipath-platform/uv.lock
+++ b/packages/uipath-platform/uv.lock
@@ -1095,7 +1095,7 @@ dev = [
 
 [[package]]
 name = "uipath-platform"
-version = "0.1.59"
+version = "0.1.60"
 source = { editable = "." }
 dependencies = [
     { name = "httpx" },
diff --git a/packages/uipath/pyproject.toml b/packages/uipath/pyproject.toml
index a7a978f7c..21c22f8a8 100644
--- a/packages/uipath/pyproject.toml
+++ b/packages/uipath/pyproject.toml
@@ -1,13 +1,13 @@
 [project]
 name = "uipath"
-version = "2.10.73"
+version = "2.10.74"
 description = "Python SDK and CLI for UiPath Platform, enabling programmatic interaction with automation services, process management, and deployment tools."
 readme = { file = "README.md", content-type = "text/markdown" }
 requires-python = ">=3.11"
 dependencies = [
   "uipath-core>=0.5.8, <0.6.0",
   "uipath-runtime>=0.10.1, <0.11.0",
-  "uipath-platform>=0.1.59, <0.2.0",
+  "uipath-platform>=0.1.60, <0.2.0",
   "click>=8.3.1",
   "httpx>=0.28.1",
   "pyjwt>=2.10.1",
diff --git a/packages/uipath/src/uipath/eval/mocks/_input_mocker.py b/packages/uipath/src/uipath/eval/mocks/_input_mocker.py
index 57a727ec1..a542fc7ad 100644
--- a/packages/uipath/src/uipath/eval/mocks/_input_mocker.py
+++ b/packages/uipath/src/uipath/eval/mocks/_input_mocker.py
@@ -15,6 +15,7 @@
 from .._execution_context import eval_set_run_id_context
 from ._mock_context import cache_manager_context
 from ._mocker import UiPathInputMockingError
+from ._structured_output import generate_structured_output
 from ._types import (
     InputMockingStrategy,
 )
@@ -105,15 +106,6 @@ async def generate_llm_input(
 
         prompt = get_input_mocking_prompt(**prompt_generation_args)
 
-        response_format = {
-            "type": "json_schema",
-            "json_schema": {
-                "name": "agent_input",
-                "strict": False,
-                "schema": input_schema,
-            },
-        }
-
         model_parameters = mocking_strategy.model if mocking_strategy else None
         completion_kwargs = (
             model_parameters.model_dump(by_alias=False, exclude_none=True)
@@ -128,7 +120,7 @@ async def generate_llm_input(
 
         if cache_manager is not None:
             cache_key_data = {
-                "response_format": response_format,
+                "input_schema": input_schema,
                 "completion_kwargs": completion_kwargs,
                 "prompt_generation_args": prompt_generation_args,
             }
@@ -142,15 +134,15 @@ async def generate_llm_input(
             if cached_response is not None:
                 return cached_response
 
-        response = await llm.chat_completions(
+        result = await generate_structured_output(
+            llm,
             [{"role": "user", "content": prompt}],
-            response_format=response_format,
-            **completion_kwargs,
+            schema=input_schema,
+            response_format_name="agent_input",
+            description="Return the simulated agent input matching the required schema.",
+            completion_kwargs=completion_kwargs,
         )
 
-        generated_input_str = response.choices[0].message.content
-        result = json.loads(generated_input_str)
-
         if cache_manager is not None:
             cache_manager.set(
                 mocker_type="input_mocker",
@@ -160,10 +152,6 @@ async def generate_llm_input(
             )
 
         return result
-    except json.JSONDecodeError as e:
-        raise UiPathInputMockingError(
-            f"Failed to parse LLM response as JSON: {str(e)}"
-        ) from e
     except UiPathInputMockingError:
         raise
     except Exception as e:
diff --git a/packages/uipath/src/uipath/eval/mocks/_llm_mocker.py b/packages/uipath/src/uipath/eval/mocks/_llm_mocker.py
index d1fd2a1c9..a9ab7005e 100644
--- a/packages/uipath/src/uipath/eval/mocks/_llm_mocker.py
+++ b/packages/uipath/src/uipath/eval/mocks/_llm_mocker.py
@@ -28,6 +28,7 @@
     UiPathMockResponseGenerationError,
     UiPathNoMockFoundError,
 )
+from ._structured_output import generate_structured_output
 from ._types import (
     ExampleCall,
     LLMMockingStrategy,
@@ -125,14 +126,7 @@ async def response(
                 "output_schema", TypeAdapter(return_type).json_schema()
             )
 
-            response_format = {
-                "type": "json_schema",
-                "json_schema": {
-                    "name": "OutputSchema",
-                    "strict": False,
-                    "schema": _cleanup_schema(output_schema),
-                },
-            }
+            cleaned_schema = _cleanup_schema(output_schema)
             try:
                 # Safely pull examples from params.
                 example_calls = params.get("example_calls", [])
@@ -197,7 +191,7 @@ async def response(
                 formatted_prompt = PROMPT.format(**prompt_generation_args)
 
                 cache_key_data = {
-                    "response_format": response_format,
+                    "output_schema": cleaned_schema,
                     "completion_kwargs": completion_kwargs,
                     "prompt_generation_args": prompt_generation_args,
                 }
@@ -213,17 +207,17 @@ async def response(
                     if cached_response is not None:
                         return cached_response
 
-                response = await llm.chat_completions(
-                    [
-                        {
-                            "role": "user",
-                            "content": formatted_prompt,
-                        },
-                    ],
-                    response_format=response_format,
-                    **completion_kwargs,
+                result = await generate_structured_output(
+                    llm,
+                    [{"role": "user", "content": formatted_prompt}],
+                    schema=cleaned_schema,
+                    response_format_name="OutputSchema",
+                    description=(
+                        "Return the simulated response for tool "
+                        f"'{function_name}' matching the required schema."
+                    ),
+                    completion_kwargs=completion_kwargs,
                 )
-                result = json.loads(response.choices[0].message.content)
 
                 if cache_manager is not None:
                     cache_manager.set(
@@ -235,7 +229,7 @@ async def response(
 
                 return result
             except Exception as e:
-                raise UiPathMockResponseGenerationError() from e
+                raise UiPathMockResponseGenerationError(str(e)) from e
         else:
             raise UiPathNoMockFoundError(f"Method '{function_name}' is not simulated.")
 
diff --git a/packages/uipath/src/uipath/eval/mocks/_structured_output.py b/packages/uipath/src/uipath/eval/mocks/_structured_output.py
new file mode 100644
index 000000000..1f67565b9
--- /dev/null
+++ b/packages/uipath/src/uipath/eval/mocks/_structured_output.py
@@ -0,0 +1,168 @@
+"""Provider-agnostic structured output for the eval mockers.
+
+The normalized LLM Gateway honors OpenAI-style ``response_format`` (json_schema)
+only for OpenAI models — and does so reliably, including native ``$defs``
+support. Non-OpenAI providers (Anthropic/Claude via Bedrock, Gemini) return such
+requests with ``choices[0].message.content`` empty/None, which breaks JSON
+parsing. Function calling is honored across providers but is less reliable for
+OpenAI on some schemas, so it is used only as a fallback: prefer
+``response_format`` and fall back to a forced tool call when the content comes
+back empty.
+"""
+
+import json
+import logging
+from typing import Any
+
+from uipath.platform.chat.llm_gateway import RequiredToolChoice
+
+RESPONSE_TOOL_NAME = "submit_tool_response"
+RESPONSE_KEY = "response"
+_DEFS_PREFIX = "#/$defs/"
+
+logger = logging.getLogger(__name__)
+
+
+def _inline_defs(
+    schema: dict[str, Any],
+) -> tuple[dict[str, Any], dict[str, Any]]:
+    """Inline ``$defs``/``$ref`` into a self-contained schema.
+
+    Nested Pydantic models and enums emit root ``$defs`` referenced by ``$ref``.
+    The normalized gateway accepts those in ``response_format`` but not inside a
+    tool's ``parameters``, so they are inlined here. Self-referential definitions
+    cannot be inlined without looping; any ``$ref`` reached while its target is
+    already on the current resolution path is left untouched and its definitions
+    are returned so the caller can keep them reachable.
+
+    Returns:
+        A tuple of (inlined schema, leftover ``$defs`` needed for cyclic refs).
+    """
+    defs = schema.get("$defs", {})
+    leftover: dict[str, Any] = {}
+
+    def resolve(node: Any, active: frozenset[str]) -> Any:
+        if isinstance(node, dict):
+            ref = node.get("$ref")
+            if isinstance(ref, str) and ref.startswith(_DEFS_PREFIX):
+                name = ref[len(_DEFS_PREFIX) :]
+                if name in defs and name not in active:
+                    return resolve(defs[name], active | {name})
+                # Cyclic or unknown ref: keep it and preserve its definition.
+                if name in defs:
+                    leftover[name] = defs[name]
+                return dict(node)
+            return {
+                key: resolve(value, active)
+                for key, value in node.items()
+                if key != "$defs"
+            }
+        if isinstance(node, list):
+            return [resolve(item, active) for item in node]
+        return node
+
+    root = {key: value for key, value in schema.items() if key != "$defs"}
+    inlined = resolve(root, frozenset())
+    return inlined, leftover
+
+
+def build_response_tool(schema: dict[str, Any], description: str) -> dict[str, Any]:
+    """Build a normalized-API function tool that wraps ``schema`` under ``response``.
+
+    Tool-call arguments are always a JSON object, so an arbitrary output schema
+    (which may be a scalar, array, or object) is nested under a single
+    ``response`` property and unwrapped after the call. ``$defs``/``$ref`` are
+    inlined so the tool parameters are self-contained, which the gateway requires
+    for tool schemas (unlike ``response_format``).
+    """
+    response_schema, leftover_defs = _inline_defs(schema)
+    parameters: dict[str, Any] = {
+        "type": "object",
+        "properties": {RESPONSE_KEY: response_schema},
+        "required": [RESPONSE_KEY],
+    }
+    if leftover_defs:
+        parameters["$defs"] = leftover_defs
+
+    return {
+        "name": RESPONSE_TOOL_NAME,
+        "description": description,
+        "parameters": parameters,
+    }
+
+
+def extract_response(response: Any) -> Any:
+    """Extract the wrapped value from the forced tool call.
+
+    Raises:
+        ValueError: if the response carries no usable tool call or is missing the
+            wrapped ``response`` key.
+    """
+    choices = getattr(response, "choices", None)
+    if not choices:
+        raise ValueError("LLM response contained no choices")
+
+    message = choices[0].message
+    tool_calls = getattr(message, "tool_calls", None)
+    if not tool_calls:
+        raise ValueError(
+            f"LLM response contained no tool calls (content={message.content!r})"
+        )
+
+    arguments = tool_calls[0].arguments
+    if RESPONSE_KEY not in arguments:
+        raise ValueError(
+            f"Tool call arguments missing '{RESPONSE_KEY}' key: {arguments}"
+        )
+
+    return arguments[RESPONSE_KEY]
+
+
+async def generate_structured_output(
+    llm: Any,
+    messages: list[dict[str, str]],
+    *,
+    schema: dict[str, Any],
+    response_format_name: str,
+    description: str,
+    completion_kwargs: dict[str, Any],
+) -> Any:
+    """Generate structured output that works across all model providers.
+
+    Prefers ``response_format`` (json_schema) — honored reliably by OpenAI with
+    native ``$defs`` support. When the provider returns empty content (the
+    non-OpenAI failure mode, e.g. Claude/Bedrock), falls back to a forced tool
+    call, which is honored across providers.
+    """
+    response_format = {
+        "type": "json_schema",
+        "json_schema": {
+            "name": response_format_name,
+            "strict": False,
+            "schema": schema,
+        },
+    }
+
+    content: str | None = None
+    try:
+        rf_response = await llm.chat_completions(
+            messages, response_format=response_format, **completion_kwargs
+        )
+        choices = getattr(rf_response, "choices", None)
+        if choices:
+            content = choices[0].message.content
+    except Exception as e:
+        # Some providers reject response_format outright; fall back to tools.
+        logger.info("response_format path failed, falling back to tools: %s", e)
+
+    if content:
+        return json.loads(content)
+
+    tool = build_response_tool(schema, description)
+    tc_response = await llm.chat_completions(
+        messages,
+        tools=[tool],
+        tool_choice=RequiredToolChoice(),
+        **completion_kwargs,
+    )
+    return extract_response(tc_response)
diff --git a/packages/uipath/tests/cli/eval/mocks/test_input_mocker.py b/packages/uipath/tests/cli/eval/mocks/test_input_mocker.py
index 72b3765df..a8a8a64ec 100644
--- a/packages/uipath/tests/cli/eval/mocks/test_input_mocker.py
+++ b/packages/uipath/tests/cli/eval/mocks/test_input_mocker.py
@@ -112,3 +112,10 @@ async def test_generate_llm_input_with_model_settings(
     assert len(chat_completion_requests) == 1, (
         "Expected exactly one chat completion request"
     )
+
+    # OpenAI returns content via response_format; no tool-call fallback needed.
+    import json
+
+    body = json.loads(chat_completion_requests[0].content.decode("utf-8"))
+    assert "response_format" in body
+    assert "tools" not in body
diff --git a/packages/uipath/tests/cli/eval/mocks/test_mocks.py b/packages/uipath/tests/cli/eval/mocks/test_mocks.py
index c4bc26ee3..ab85deb96 100644
--- a/packages/uipath/tests/cli/eval/mocks/test_mocks.py
+++ b/packages/uipath/tests/cli/eval/mocks/test_mocks.py
@@ -610,12 +610,14 @@ def foofoo(*args, **kwargs):
 
     with pytest.raises(NotImplementedError):
         assert foofoo()
-    httpx_mock.add_response(
-        url="https://example.com/llm/api/chat/completions"
-        "?api-version=2024-08-01-preview",
-        status_code=200,
-        json={},
-    )
+    # Two empty responses: the response_format attempt and the tool-call fallback.
+    for _ in range(2):
+        httpx_mock.add_response(
+            url="https://example.com/llm/api/chat/completions"
+            "?api-version=2024-08-01-preview",
+            status_code=200,
+            json={},
+        )
     with pytest.raises(UiPathMockResponseGenerationError):
         assert foo()
 
@@ -720,12 +722,14 @@ async def foofoo(*args, **kwargs):
     with pytest.raises(NotImplementedError):
         assert await foofoo()
 
-    httpx_mock.add_response(
-        url="https://example.com/llm/api/chat/completions"
-        "?api-version=2024-08-01-preview",
-        status_code=200,
-        json={},
-    )
+    # Two empty responses: the response_format attempt and the tool-call fallback.
+    for _ in range(2):
+        httpx_mock.add_response(
+            url="https://example.com/llm/api/chat/completions"
+            "?api-version=2024-08-01-preview",
+            status_code=200,
+            json={},
+        )
     with pytest.raises(UiPathMockResponseGenerationError):
         assert await foo()
 
@@ -931,6 +935,116 @@ async def foo(*args, **kwargs) -> dict[str, Any]:
     }
 
 
+@pytest.mark.asyncio
+@pytest.mark.httpx_mock(assert_all_responses_were_requested=False)
+async def test_llm_mockable_falls_back_to_tool_call_for_non_openai(
+    httpx_mock: HTTPXMock, monkeypatch: MonkeyPatch
+):
+    """Tool simulation works for non-OpenAI providers (AE-1646).
+
+    Non-OpenAI providers (Claude/Bedrock, Gemini) return ``response_format``
+    requests with empty ``content``. The mocker must then fall back to function
+    calling and read the result from the forced tool call's arguments.
+    """
+    monkeypatch.setenv("UIPATH_URL", "https://example.com")
+    monkeypatch.setenv("UIPATH_ACCESS_TOKEN", "1234567890")
+    monkeypatch.setattr(CacheManager, "get", lambda *args, **kwargs: None)
+    monkeypatch.setattr(CacheManager, "set", lambda *args, **kwargs: None)
+
+    @mockable()
+    async def foo(*args, **kwargs) -> str:
+        raise NotImplementedError()
+
+    evaluation_item: dict[str, Any] = {
+        "id": "evaluation-id",
+        "name": "Mock foo",
+        "inputs": {},
+        "evaluationCriterias": {
+            "ExactMatchEvaluator": None,
+        },
+        "mockingStrategy": {
+            "type": "llm",
+            "prompt": "response is 'bar1'",
+            "toolsToSimulate": [{"name": "foo"}],
+            "model": {"model": "anthropic.claude-sonnet-4-5-20250929-v1:0"},
+        },
+    }
+    evaluation = EvaluationItem(**evaluation_item)
+    assert isinstance(evaluation.mocking_strategy, LLMMockingStrategy)
+    httpx_mock.add_response(
+        url="https://example.com/agenthub_/llm/api/capabilities",
+        status_code=200,
+        json={},
+    )
+    httpx_mock.add_response(
+        url="https://example.com/orchestrator_/llm/api/capabilities",
+        status_code=200,
+        json={},
+    )
+
+    def _completion(message: dict[str, Any]) -> dict[str, Any]:
+        return {
+            "id": "response-id",
+            "object": "",
+            "created": 0,
+            "model": "anthropic.claude-sonnet-4-5-20250929-v1:0",
+            "choices": [{"index": 0, "message": message, "finish_reason": "stop"}],
+            "usage": {"prompt_tokens": 1, "completion_tokens": 1, "total_tokens": 2},
+        }
+
+    # First call (response_format) returns empty content — the non-OpenAI failure.
+    httpx_mock.add_response(
+        url="https://example.com/llm/api/chat/completions"
+        "?api-version=2024-08-01-preview",
+        status_code=200,
+        json=_completion({"role": "assistant", "content": None, "tool_calls": None}),
+    )
+    # Fallback call (function calling) returns the structured result.
+    httpx_mock.add_response(
+        url="https://example.com/llm/api/chat/completions"
+        "?api-version=2024-08-01-preview",
+        status_code=200,
+        json=_completion(
+            {
+                "role": "assistant",
+                "content": None,
+                "tool_calls": [
+                    {
+                        "id": "call_1",
+                        "name": "submit_tool_response",
+                        "arguments": {"response": "bar1"},
+                    }
+                ],
+            }
+        ),
+    )
+
+    set_execution_context(
+        MockingContext(
+            strategy=evaluation.mocking_strategy,
+            name=evaluation.name,
+            inputs=evaluation.inputs,
+        ),
+        _mock_span_collector,
+        "test-execution-id",
+    )
+
+    assert await foo() == "bar1"
+
+    requests = [
+        r for r in httpx_mock.get_requests() if "chat/completions" in str(r.url)
+    ]
+    assert len(requests) == 2
+    first = json.loads(requests[0].content.decode("utf-8"))
+    second = json.loads(requests[1].content.decode("utf-8"))
+    # First attempt uses response_format; fallback uses a forced tool call.
+    assert "response_format" in first
+    assert "tools" not in first
+    assert second["tool_choice"] == {"type": "required"}
+    assert second["tools"][0]["name"] == "submit_tool_response"
+    assert "response_format" not in second
+
+
 class TestUiPathMockRuntime:
     """Tests for UiPathMockRuntime execute/stream/get_schema paths."""
 
diff --git a/packages/uipath/tests/cli/eval/mocks/test_structured_output.py b/packages/uipath/tests/cli/eval/mocks/test_structured_output.py
new file mode 100644
index 000000000..a19e2605e
--- /dev/null
+++ b/packages/uipath/tests/cli/eval/mocks/test_structured_output.py
@@ -0,0 +1,195 @@
+"""Unit tests for the provider-agnostic structured-output helpers."""
+
+import json
+from types import SimpleNamespace
+from typing import Any
+
+import pytest
+
+from uipath.eval.mocks._structured_output import (
+    RESPONSE_KEY,
+    RESPONSE_TOOL_NAME,
+    build_response_tool,
+    extract_response,
+    generate_structured_output,
+)
+
+
+def _response(message: SimpleNamespace | None) -> SimpleNamespace:
+    choices = [] if message is None else [SimpleNamespace(message=message)]
+    return SimpleNamespace(choices=choices)
+
+
+class _FakeLLM:
+    """Records chat_completions calls and replays queued responses in order."""
+
+    def __init__(self, responses: list[Any]):
+        self._responses = list(responses)
+        self.calls: list[dict[str, Any]] = []
+
+    async def chat_completions(self, messages: Any, **kwargs: Any) -> Any:
+        self.calls.append(kwargs)
+        nxt = self._responses.pop(0)
+        if isinstance(nxt, Exception):
+            raise nxt
+        return nxt
+
+
+def test_build_response_tool_wraps_schema_under_response():
+    tool = build_response_tool({"type": "string"}, description="desc")
+    assert tool["name"] == RESPONSE_TOOL_NAME
+    assert tool["description"] == "desc"
+    assert tool["parameters"]["properties"][RESPONSE_KEY] == {"type": "string"}
+    assert tool["parameters"]["required"] == [RESPONSE_KEY]
+
+
+def test_build_response_tool_inlines_refs_into_self_contained_schema():
+    # Nested Pydantic models / enums emit $defs + $ref. The normalized gateway
+    # accepts $ref/$defs in response_format but NOT in a tool's parameters, so the
+    # schema must be inlined into a self-contained form (no $ref/$defs anywhere).
+    operator_def = {"enum": ["+", "-", "*", "/"], "type": "string"}
+    item_def = {"type": "object", "properties": {"sku": {"type": "string"}}}
+    schema = {
+        "type": "object",
+        "properties": {
+            "operator": {"$ref": "#/$defs/Operator"},
+            "items": {"type": "array", "items": {"$ref": "#/$defs/Item"}},
+        },
+        "required": ["operator"],
+        "$defs": {"Operator": operator_def, "Item": item_def},
+    }
+
+    tool = build_response_tool(schema, description="d")
+    params = tool["parameters"]
+
+    blob = json.dumps(params)
+    assert "$ref" not in blob
+    assert "$defs" not in blob
+
+    response = params["properties"][RESPONSE_KEY]
+    assert response["properties"]["operator"] == operator_def
+    assert response["properties"]["items"]["items"] == item_def
+    # caller's schema is not mutated
+    assert "$defs" in schema
+
+
+def test_build_response_tool_keeps_defs_for_cyclic_refs():
+    # Self-referential schemas can't be fully inlined; keep $defs hoisted so the
+    # remaining $ref still resolves rather than infinite-looping.
+    node_def = {
+        "type": "object",
+        "properties": {"child": {"$ref": "#/$defs/Node"}},
+    }
+    schema = {
+        "type": "object",
+        "properties": {"root": {"$ref": "#/$defs/Node"}},
+        "$defs": {"Node": node_def},
+    }
+
+    tool = build_response_tool(schema, description="d")
+    params = tool["parameters"]
+
+    assert "$defs" in params
+    assert "$ref" in json.dumps(params)
+    # the caller's schema dict is not mutated
+    assert "$defs" in schema
+
+
+def test_extract_response_returns_wrapped_value():
+    message = SimpleNamespace(
+        content=None,
+        tool_calls=[SimpleNamespace(arguments={RESPONSE_KEY: {"a": 1}})],
+    )
+    assert extract_response(_response(message)) == {"a": 1}
+
+
+def test_extract_response_raises_when_no_choices():
+    with pytest.raises(ValueError, match="no choices"):
+        extract_response(_response(None))
+
+
+def test_extract_response_raises_when_no_tool_calls():
+    # Non-OpenAI text response without a tool call: surface a clear error.
+    message = SimpleNamespace(content="not a tool call", tool_calls=None)
+    with pytest.raises(ValueError, match="no tool calls"):
+        extract_response(_response(message))
+
+
+def test_extract_response_raises_when_response_key_missing():
+    message = SimpleNamespace(
+        content=None, tool_calls=[SimpleNamespace(arguments={"other": 1})]
+    )
+    with pytest.raises(ValueError, match=RESPONSE_KEY):
+        extract_response(_response(message))
+
+
+@pytest.mark.asyncio
+async def test_generate_structured_output_prefers_response_format_content():
+    # OpenAI returns content via response_format; no fallback call is made.
+    llm = _FakeLLM([_response(SimpleNamespace(content='{"a": 1}', tool_calls=None))])
+    result = await generate_structured_output(
+        llm,
+        [{"role": "user", "content": "x"}],
+        schema={"type": "object"},
+        response_format_name="OutputSchema",
+        description="d",
+        completion_kwargs={},
+    )
+    assert result == {"a": 1}
+    assert len(llm.calls) == 1
+    assert "response_format" in llm.calls[0]
+    assert "tools" not in llm.calls[0]
+
+
+@pytest.mark.asyncio
+async def test_generate_structured_output_falls_back_on_empty_content():
+    # Non-OpenAI: response_format yields empty content -> fall back to tool call.
+    llm = _FakeLLM(
+        [
+            _response(SimpleNamespace(content=None, tool_calls=None)),
+            _response(
+                SimpleNamespace(
+                    content=None,
+                    tool_calls=[SimpleNamespace(arguments={RESPONSE_KEY: {"a": 1}})],
+                )
+            ),
+        ]
+    )
+    result = await generate_structured_output(
+        llm,
+        [{"role": "user", "content": "x"}],
+        schema={"type": "object"},
+        response_format_name="OutputSchema",
+        description="d",
+        completion_kwargs={},
+    )
+    assert result == {"a": 1}
+    assert len(llm.calls) == 2
+    assert "response_format" in llm.calls[0]
+    assert "tools" in llm.calls[1] and "tool_choice" in llm.calls[1]
+
+
+@pytest.mark.asyncio
+async def test_generate_structured_output_falls_back_when_response_format_raises():
+    # A provider that rejects response_format outright still gets a tool fallback.
+    llm = _FakeLLM(
+        [
+            RuntimeError("response_format unsupported"),
+            _response(
+                SimpleNamespace(
+                    content=None,
+                    tool_calls=[SimpleNamespace(arguments={RESPONSE_KEY: "ok"})],
+                )
+            ),
+        ]
+    )
+    result = await generate_structured_output(
+        llm,
+        [{"role": "user", "content": "x"}],
+        schema={"type": "string"},
+        response_format_name="OutputSchema",
+        description="d",
+        completion_kwargs={},
+    )
+    assert result == "ok"
+    assert len(llm.calls) == 2
diff --git a/packages/uipath/uv.lock b/packages/uipath/uv.lock
index 3cf75bd40..365ed4d35 100644
--- a/packages/uipath/uv.lock
+++ b/packages/uipath/uv.lock
@@ -2552,7 +2552,7 @@ wheels = [
 
 [[package]]
 name = "uipath"
-version = "2.10.73"
+version = "2.10.74"
 source = { editable = "." }
 dependencies = [
     { name = "applicationinsights" },
@@ -2691,7 +2691,7 @@ dev = [
 
 [[package]]
 name = "uipath-platform"
-version = "0.1.59"
+version = "0.1.60"
 source = { editable = "../uipath-platform" }
 dependencies = [
     { name = "httpx" },