From 66e12934b043395e0b8f3de37bf10076976c5e5a Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 6 Mar 2026 10:24:24 +0100
Subject: [PATCH 1/8] test(openai-agents): Replace mocks with httpx in
 non-error single-response tests

---
 .../openai_agents/test_openai_agents.py       | 313 +++++++++++-------
 1 file changed, 185 insertions(+), 128 deletions(-)

diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index 1390455317..5e077183dc 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -5,16 +5,17 @@
 import os
 import json
 import logging
+import httpx
 
 import sentry_sdk
 from sentry_sdk import start_span
-from sentry_sdk.consts import SPANDATA
+from sentry_sdk.consts import SPANDATA, OP
 from sentry_sdk.integrations.logging import LoggingIntegration
 from sentry_sdk.integrations.openai_agents import OpenAIAgentsIntegration
 from sentry_sdk.integrations.openai_agents.utils import _set_input_data, safe_serialize
-from sentry_sdk.utils import parse_version, package_version
+from sentry_sdk.utils import parse_version
 
-from openai import AsyncOpenAI
+from openai import AsyncOpenAI, InternalServerError
 from agents.models.openai_responses import OpenAIResponsesModel
 
 from unittest import mock
@@ -37,8 +38,6 @@
 from agents.exceptions import MaxTurnsExceeded, ModelBehaviorError
 from agents.version import __version__ as OPENAI_AGENTS_VERSION
 
-OPENAI_VERSION = package_version("openai")
-
 from openai.types.responses import (
     ResponseCreatedEvent,
     ResponseTextDeltaEvent,
@@ -237,8 +236,9 @@ def mock_usage():
 
 
 @pytest.fixture
-def mock_model_response(mock_usage):
-    return ModelResponse(
+def mock_model_response():
+    return Response(
+        id="resp_123",
         output=[
             ResponseOutputMessage(
                 id="msg_123",
@@ -254,8 +254,23 @@ def mock_model_response(mock_usage):
                 role="assistant",
             )
         ],
-        usage=mock_usage,
-        response_id="resp_123",
+        parallel_tool_calls=False,
+        tool_choice="none",
+        tools=[],
+        created_at=10000000,
+        model="gpt-4",
+        object="response",
+        usage=ResponseUsage(
+            input_tokens=20,
+            input_tokens_details=InputTokensDetails(
+                cached_tokens=5,
+            ),
+            output_tokens=10,
+            output_tokens_details=OutputTokensDetails(
+                reasoning_tokens=8,
+            ),
+            total_tokens=30,
+        ),
     )
 
 
@@ -297,51 +312,60 @@ def inner(instructions):
 
 
 @pytest.fixture
-def test_agent_custom_model():
-    """Create a real Agent instance for testing."""
-    return Agent(
-        name="test_agent_custom_model",
-        instructions="You are a helpful test assistant.",
-        # the model could be agents.OpenAIChatCompletionsModel()
-        model="my-custom-model",
-        model_settings=ModelSettings(
-            max_tokens=100,
-            temperature=0.7,
-            top_p=1.0,
-            presence_penalty=0.0,
-            frequency_penalty=0.0,
-        ),
-    )
+def get_model_response():
+    def inner(response_content):
+        model_request = httpx.Request(
+            "POST",
+            "/responses",
+        )
+
+        response = httpx.Response(
+            200,
+            request=model_request,
+            content=json.dumps(response_content.model_dump()).encode("utf-8"),
+        )
+
+        return response
+
+    return inner
 
 
 @pytest.mark.asyncio
 async def test_agent_invocation_span_no_pii(
-    sentry_init, capture_events, test_agent, mock_model_response
+    sentry_init, capture_events, test_agent, mock_model_response, get_model_response
 ):
-    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
-        with patch(
-            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
-        ) as mock_get_response:
-            mock_get_response.return_value = mock_model_response
+    client = AsyncOpenAI(api_key="test-key")
+    model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
+    agent = test_agent.clone(model=model)
 
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-                send_default_pii=False,
-            )
+    response = get_model_response(mock_model_response)
 
-            events = capture_events()
+    with patch.object(
+        agent.model._client._client,
+        "send",
+        return_value=response,
+    ) as _:
+        sentry_init(
+            integrations=[OpenAIAgentsIntegration()],
+            traces_sample_rate=1.0,
+            send_default_pii=False,
+        )
 
-            result = await agents.Runner.run(
-                test_agent, "Test input", run_config=test_run_config
-            )
+        events = capture_events()
 
-            assert result is not None
-            assert result.final_output == "Hello, how can I help you?"
+        result = await agents.Runner.run(
+            agent, "Test input", run_config=test_run_config
+        )
+
+        assert result is not None
+        assert result.final_output == "Hello, how can I help you?"
 
     (transaction,) = events
     spans = transaction["spans"]
-    invoke_agent_span, ai_client_span = spans
+    invoke_agent_span = next(
+        span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
+    )
+    ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
 
     assert transaction["transaction"] == "test_agent workflow"
     assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
@@ -454,33 +478,38 @@ async def test_agent_invocation_span(
     instructions,
     input,
     request,
+    get_model_response,
 ):
     """
     Test that the integration creates spans for agent invocations.
     """
+    client = AsyncOpenAI(api_key="test-key")
+    model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
+    agent = test_agent_with_instructions(instructions).clone(model=model)
 
-    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
-        with patch(
-            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
-        ) as mock_get_response:
-            mock_get_response.return_value = mock_model_response
+    response = get_model_response(mock_model_response)
 
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-                send_default_pii=True,
-            )
+    with patch.object(
+        agent.model._client._client,
+        "send",
+        return_value=response,
+    ) as _:
+        sentry_init(
+            integrations=[OpenAIAgentsIntegration()],
+            traces_sample_rate=1.0,
+            send_default_pii=True,
+        )
 
-            events = capture_events()
+        events = capture_events()
 
-            result = await agents.Runner.run(
-                test_agent_with_instructions(instructions),
-                input,
-                run_config=test_run_config,
-            )
+        result = await agents.Runner.run(
+            agent,
+            input,
+            run_config=test_run_config,
+        )
 
-            assert result is not None
-            assert result.final_output == "Hello, how can I help you?"
+        assert result is not None
+        assert result.final_output == "Hello, how can I help you?"
 
     (transaction,) = events
     spans = transaction["spans"]
@@ -605,35 +634,56 @@ async def test_agent_invocation_span(
 
 @pytest.mark.asyncio
 async def test_client_span_custom_model(
-    sentry_init, capture_events, test_agent_custom_model, mock_model_response
+    sentry_init,
+    capture_events,
+    mock_model_response,
+    get_model_response,
 ):
     """
     Test that the integration uses the correct model name if a custom model is used.
     """
 
-    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
-        with patch(
-            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
-        ) as mock_get_response:
-            mock_get_response.return_value = mock_model_response
+    client = AsyncOpenAI(api_key="test-key")
+    model = OpenAIResponsesModel(model="my-custom-model", openai_client=client)
 
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-            )
+    agent = Agent(
+        name="test_agent_custom_model",
+        instructions="You are a helpful test assistant.",
+        # the model could be agents.OpenAIChatCompletionsModel()
+        model=model,
+        model_settings=ModelSettings(
+            max_tokens=100,
+            temperature=0.7,
+            top_p=1.0,
+            presence_penalty=0.0,
+            frequency_penalty=0.0,
+        ),
+    )
 
-            events = capture_events()
+    response = get_model_response(mock_model_response)
 
-            result = await agents.Runner.run(
-                test_agent_custom_model, "Test input", run_config=test_run_config
-            )
+    with patch.object(
+        agent.model._client._client,
+        "send",
+        return_value=response,
+    ) as _:
+        sentry_init(
+            integrations=[OpenAIAgentsIntegration()],
+            traces_sample_rate=1.0,
+        )
 
-            assert result is not None
-            assert result.final_output == "Hello, how can I help you?"
+        events = capture_events()
+
+        result = await agents.Runner.run(
+            agent, "Test input", run_config=test_run_config
+        )
+
+        assert result is not None
+        assert result.final_output == "Hello, how can I help you?"
 
     (transaction,) = events
     spans = transaction["spans"]
-    _, ai_client_span = spans
+    ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
 
     assert ai_client_span["description"] == "chat my-custom-model"
     assert ai_client_span["data"]["gen_ai.request.model"] == "my-custom-model"
@@ -644,35 +694,41 @@ def test_agent_invocation_span_sync_no_pii(
     capture_events,
     test_agent,
     mock_model_response,
+    get_model_response,
 ):
     """
     Test that the integration creates spans for agent invocations.
     """
+    client = AsyncOpenAI(api_key="test-key")
+    model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
+    agent = test_agent.clone(model=model)
 
-    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
-        with patch(
-            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
-        ) as mock_get_response:
-            mock_get_response.return_value = mock_model_response
+    response = get_model_response(mock_model_response)
 
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-                send_default_pii=False,
-            )
+    with patch.object(
+        agent.model._client._client,
+        "send",
+        return_value=response,
+    ) as _:
+        sentry_init(
+            integrations=[OpenAIAgentsIntegration()],
+            traces_sample_rate=1.0,
+            send_default_pii=False,
+        )
 
-            events = capture_events()
+        events = capture_events()
 
-            result = agents.Runner.run_sync(
-                test_agent, "Test input", run_config=test_run_config
-            )
+        result = agents.Runner.run_sync(agent, "Test input", run_config=test_run_config)
 
-            assert result is not None
-            assert result.final_output == "Hello, how can I help you?"
+        assert result is not None
+        assert result.final_output == "Hello, how can I help you?"
 
     (transaction,) = events
     spans = transaction["spans"]
-    invoke_agent_span, ai_client_span = spans
+    invoke_agent_span = next(
+        span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
+    )
+    ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
 
     assert transaction["transaction"] == "test_agent workflow"
     assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
@@ -781,33 +837,38 @@ def test_agent_invocation_span_sync(
     instructions,
     input,
     request,
+    get_model_response,
 ):
     """
     Test that the integration creates spans for agent invocations.
     """
+    client = AsyncOpenAI(api_key="test-key")
+    model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
+    agent = test_agent_with_instructions(instructions).clone(model=model)
 
-    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
-        with patch(
-            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
-        ) as mock_get_response:
-            mock_get_response.return_value = mock_model_response
+    response = get_model_response(mock_model_response)
 
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-                send_default_pii=True,
-            )
+    with patch.object(
+        agent.model._client._client,
+        "send",
+        return_value=response,
+    ) as _:
+        sentry_init(
+            integrations=[OpenAIAgentsIntegration()],
+            traces_sample_rate=1.0,
+            send_default_pii=True,
+        )
 
-            events = capture_events()
+        events = capture_events()
 
-            result = agents.Runner.run_sync(
-                test_agent_with_instructions(instructions),
-                input,
-                run_config=test_run_config,
-            )
+        result = agents.Runner.run_sync(
+            agent,
+            input,
+            run_config=test_run_config,
+        )
 
-            assert result is not None
-            assert result.final_output == "Hello, how can I help you?"
+        assert result is not None
+        assert result.final_output == "Hello, how can I help you?"
 
     (transaction,) = events
     spans = transaction["spans"]
@@ -1258,22 +1319,18 @@ def simple_test_tool(message: str) -> str:
     assert ai_client_span1["data"]["gen_ai.usage.output_tokens"] == 5
     assert ai_client_span1["data"]["gen_ai.usage.output_tokens.reasoning"] == 0
     assert ai_client_span1["data"]["gen_ai.usage.total_tokens"] == 15
-
-    tool_call = {
-        "arguments": '{"message": "hello"}',
-        "call_id": "call_123",
-        "name": "simple_test_tool",
-        "type": "function_call",
-        "id": "call_123",
-        "status": None,
-    }
-
-    if OPENAI_VERSION >= (2, 25, 0):
-        tool_call["namespace"] = None
-
-    assert json.loads(ai_client_span1["data"]["gen_ai.response.tool_calls"]) == [
-        tool_call
-    ]
+    assert ai_client_span1["data"]["gen_ai.response.tool_calls"] == safe_serialize(
+        [
+            {
+                "arguments": '{"message": "hello"}',
+                "call_id": "call_123",
+                "name": "simple_test_tool",
+                "type": "function_call",
+                "id": "call_123",
+                "status": None,
+            }
+        ]
+    )
 
     assert tool_span["description"] == "execute_tool simple_test_tool"
     assert tool_span["data"]["gen_ai.agent.name"] == "test_agent"

From c773204eeeb22284b957e2b534ae3bc94050738d Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 6 Mar 2026 10:39:22 +0100
Subject: [PATCH 2/8] add more tests

---
 .../openai_agents/test_openai_agents.py       | 132 +++++++++++-------
 1 file changed, 78 insertions(+), 54 deletions(-)

diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index 5e077183dc..ac4980f612 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -2069,34 +2069,38 @@ async def test_mcp_tool_execution_without_pii(sentry_init, capture_events, test_
 
 @pytest.mark.asyncio
 async def test_multiple_agents_asyncio(
-    sentry_init, capture_events, test_agent, mock_model_response
+    sentry_init, capture_events, test_agent, mock_model_response, get_model_response
 ):
     """
     Test that multiple agents can be run at the same time in asyncio tasks
     without interfering with each other.
     """
+    client = AsyncOpenAI(api_key="z")
+    model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
+    agent = test_agent.clone(model=model)
 
-    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
-        with patch(
-            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
-        ) as mock_get_response:
-            mock_get_response.return_value = mock_model_response
+    response = get_model_response(mock_model_response)
 
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-            )
+    with patch.object(
+        agent.model._client._client,
+        "send",
+        return_value=response,
+    ) as _:
+        sentry_init(
+            integrations=[OpenAIAgentsIntegration()],
+            traces_sample_rate=1.0,
+        )
 
-            events = capture_events()
+        events = capture_events()
 
-            async def run():
-                await agents.Runner.run(
-                    starting_agent=test_agent,
-                    input="Test input",
-                    run_config=test_run_config,
-                )
+        async def run():
+            await agents.Runner.run(
+                starting_agent=agent,
+                input="Test input",
+                run_config=test_run_config,
+            )
 
-            await asyncio.gather(*[run() for _ in range(3)])
+        await asyncio.gather(*[run() for _ in range(3)])
 
     assert len(events) == 3
     txn1, txn2, txn3 = events
@@ -2973,37 +2977,45 @@ async def test_streaming_ttft_on_chat_span(sentry_init, test_agent):
 )
 @pytest.mark.asyncio
 async def test_conversation_id_on_all_spans(
-    sentry_init, capture_events, test_agent, mock_model_response
+    sentry_init, capture_events, test_agent, mock_model_response, get_model_response
 ):
     """
     Test that gen_ai.conversation.id is set on all AI-related spans when passed to Runner.run().
     """
 
-    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
-        with patch(
-            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
-        ) as mock_get_response:
-            mock_get_response.return_value = mock_model_response
+    client = AsyncOpenAI(api_key="z")
+    model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
+    agent = test_agent.clone(model=model)
 
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-            )
+    response = get_model_response(mock_model_response)
 
-            events = capture_events()
+    with patch.object(
+        agent.model._client._client,
+        "send",
+        return_value=response,
+    ) as _:
+        sentry_init(
+            integrations=[OpenAIAgentsIntegration()],
+            traces_sample_rate=1.0,
+        )
 
-            result = await agents.Runner.run(
-                test_agent,
-                "Test input",
-                run_config=test_run_config,
-                conversation_id="conv_test_123",
-            )
+        events = capture_events()
 
-            assert result is not None
+        result = await agents.Runner.run(
+            agent,
+            "Test input",
+            run_config=test_run_config,
+            conversation_id="conv_test_123",
+        )
+
+        assert result is not None
 
     (transaction,) = events
     spans = transaction["spans"]
-    invoke_agent_span, ai_client_span = spans
+    invoke_agent_span = next(
+        span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
+    )
+    ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
 
     # Verify workflow span (transaction) has conversation_id
     assert (
@@ -3120,35 +3132,47 @@ def simple_tool(message: str) -> str:
 )
 @pytest.mark.asyncio
 async def test_no_conversation_id_when_not_provided(
-    sentry_init, capture_events, test_agent, mock_model_response
+    sentry_init,
+    capture_events,
+    test_agent,
+    mock_model_response,
+    get_model_response,
 ):
     """
     Test that gen_ai.conversation.id is not set when not passed to Runner.run().
     """
 
-    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
-        with patch(
-            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
-        ) as mock_get_response:
-            mock_get_response.return_value = mock_model_response
+    client = AsyncOpenAI(api_key="z")
+    model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
+    agent = test_agent.clone(model=model)
 
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-            )
+    response = get_model_response(mock_model_response)
 
-            events = capture_events()
+    with patch.object(
+        agent.model._client._client,
+        "send",
+        return_value=response,
+    ) as _:
+        sentry_init(
+            integrations=[OpenAIAgentsIntegration()],
+            traces_sample_rate=1.0,
+        )
 
-            # Don't pass conversation_id
-            result = await agents.Runner.run(
-                test_agent, "Test input", run_config=test_run_config
-            )
+        events = capture_events()
 
-            assert result is not None
+        # Don't pass conversation_id
+        result = await agents.Runner.run(
+            agent, "Test input", run_config=test_run_config
+        )
+
+        assert result is not None
 
     (transaction,) = events
     spans = transaction["spans"]
-    invoke_agent_span, ai_client_span = spans
+    invoke_agent_span = next(
+        span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
+    )
+    ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
 
     # Verify conversation_id is NOT set on any spans
     assert "gen_ai.conversation.id" not in transaction["contexts"]["trace"].get(

From 57c2a6eef3ca9220e7e7a1721ff79d505994c513 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 6 Mar 2026 10:45:35 +0100
Subject: [PATCH 3/8] revert uninted tool test change

---
 .../openai_agents/test_openai_agents.py       | 34 +++++++++++--------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index ac4980f612..1b2dbaf941 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -13,9 +13,9 @@
 from sentry_sdk.integrations.logging import LoggingIntegration
 from sentry_sdk.integrations.openai_agents import OpenAIAgentsIntegration
 from sentry_sdk.integrations.openai_agents.utils import _set_input_data, safe_serialize
-from sentry_sdk.utils import parse_version
+from sentry_sdk.utils import parse_version, package_version
 
-from openai import AsyncOpenAI, InternalServerError
+from openai import AsyncOpenAI
 from agents.models.openai_responses import OpenAIResponsesModel
 
 from unittest import mock
@@ -38,6 +38,8 @@
 from agents.exceptions import MaxTurnsExceeded, ModelBehaviorError
 from agents.version import __version__ as OPENAI_AGENTS_VERSION
 
+OPENAI_VERSION = package_version("openai")
+
 from openai.types.responses import (
     ResponseCreatedEvent,
     ResponseTextDeltaEvent,
@@ -1319,18 +1321,22 @@ def simple_test_tool(message: str) -> str:
     assert ai_client_span1["data"]["gen_ai.usage.output_tokens"] == 5
     assert ai_client_span1["data"]["gen_ai.usage.output_tokens.reasoning"] == 0
     assert ai_client_span1["data"]["gen_ai.usage.total_tokens"] == 15
-    assert ai_client_span1["data"]["gen_ai.response.tool_calls"] == safe_serialize(
-        [
-            {
-                "arguments": '{"message": "hello"}',
-                "call_id": "call_123",
-                "name": "simple_test_tool",
-                "type": "function_call",
-                "id": "call_123",
-                "status": None,
-            }
-        ]
-    )
+
+    tool_call = {
+        "arguments": '{"message": "hello"}',
+        "call_id": "call_123",
+        "name": "simple_test_tool",
+        "type": "function_call",
+        "id": "call_123",
+        "status": None,
+    }
+
+    if OPENAI_VERSION >= (2, 25, 0):
+        tool_call["namespace"] = None
+
+    assert json.loads(ai_client_span1["data"]["gen_ai.response.tool_calls"]) == [
+        tool_call
+    ]
 
     assert tool_span["description"] == "execute_tool simple_test_tool"
     assert tool_span["data"]["gen_ai.agent.name"] == "test_agent"

From 5b6163dc8b06406a5df3752aa88b8c806c9fe4a3 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 6 Mar 2026 10:53:05 +0100
Subject: [PATCH 4/8] revert fixture removal

---
 .../openai_agents/test_openai_agents.py       | 34 +++++++++++--------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index 1b2dbaf941..c9ab37b58d 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -313,6 +313,24 @@ def inner(instructions):
     return inner
 
 
+@pytest.fixture
+def test_agent_custom_model():
+    """Create a real Agent instance for testing."""
+    return Agent(
+        name="test_agent_custom_model",
+        instructions="You are a helpful test assistant.",
+        # the model could be agents.OpenAIChatCompletionsModel()
+        model="my-custom-model",
+        model_settings=ModelSettings(
+            max_tokens=100,
+            temperature=0.7,
+            top_p=1.0,
+            presence_penalty=0.0,
+            frequency_penalty=0.0,
+        ),
+    )
+
+
 @pytest.fixture
 def get_model_response():
     def inner(response_content):
@@ -638,6 +656,7 @@ async def test_agent_invocation_span(
 async def test_client_span_custom_model(
     sentry_init,
     capture_events,
+    test_agent_custom_model,
     mock_model_response,
     get_model_response,
 ):
@@ -647,20 +666,7 @@ async def test_client_span_custom_model(
 
     client = AsyncOpenAI(api_key="test-key")
     model = OpenAIResponsesModel(model="my-custom-model", openai_client=client)
-
-    agent = Agent(
-        name="test_agent_custom_model",
-        instructions="You are a helpful test assistant.",
-        # the model could be agents.OpenAIChatCompletionsModel()
-        model=model,
-        model_settings=ModelSettings(
-            max_tokens=100,
-            temperature=0.7,
-            top_p=1.0,
-            presence_penalty=0.0,
-            frequency_penalty=0.0,
-        ),
-    )
+    agent = test_agent_custom_model.clone(model=model)
 
     response = get_model_response(mock_model_response)
 

From 2cc4825f3c92b2b01258ec79397c3cb38c31f80c Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 6 Mar 2026 10:54:38 +0100
Subject: [PATCH 5/8] use test-key

---
 tests/integrations/openai_agents/test_openai_agents.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index c9ab37b58d..a3b18a735e 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -2087,7 +2087,7 @@ async def test_multiple_agents_asyncio(
     Test that multiple agents can be run at the same time in asyncio tasks
     without interfering with each other.
     """
-    client = AsyncOpenAI(api_key="z")
+    client = AsyncOpenAI(api_key="test-key")
     model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
     agent = test_agent.clone(model=model)
 
@@ -2995,7 +2995,7 @@ async def test_conversation_id_on_all_spans(
     Test that gen_ai.conversation.id is set on all AI-related spans when passed to Runner.run().
     """
 
-    client = AsyncOpenAI(api_key="z")
+    client = AsyncOpenAI(api_key="test-key")
     model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
     agent = test_agent.clone(model=model)
 
@@ -3154,7 +3154,7 @@ async def test_no_conversation_id_when_not_provided(
     Test that gen_ai.conversation.id is not set when not passed to Runner.run().
     """
 
-    client = AsyncOpenAI(api_key="z")
+    client = AsyncOpenAI(api_key="test-key")
     model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
     agent = test_agent.clone(model=model)
 

From 2378fd987064fc40b8bf9dfe393578d6c0dceff1 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 6 Mar 2026 11:28:36 +0100
Subject: [PATCH 6/8] more tests

---
 .../openai_agents/test_openai_agents.py       | 347 +++++++++++-------
 1 file changed, 205 insertions(+), 142 deletions(-)

diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index a3b18a735e..c47148bc89 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -2274,56 +2274,81 @@ def failing_tool(message: str) -> str:
 
 @pytest.mark.asyncio
 async def test_invoke_agent_span_includes_usage_data(
-    sentry_init, capture_events, test_agent, mock_usage
+    sentry_init,
+    capture_events,
+    test_agent,
+    get_model_response,
 ):
     """
     Test that invoke_agent spans include aggregated usage data from context_wrapper.
     This verifies the new functionality added to track token usage in invoke_agent spans.
     """
+    client = AsyncOpenAI(api_key="z")
+    model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
+    agent = test_agent.clone(model=model)
 
-    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
-        with patch(
-            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
-        ) as mock_get_response:
-            # Create a response with usage data
-            response = ModelResponse(
-                output=[
-                    ResponseOutputMessage(
-                        id="msg_123",
-                        type="message",
-                        status="completed",
-                        content=[
-                            ResponseOutputText(
-                                text="Response with usage",
-                                type="output_text",
-                                annotations=[],
-                            )
-                        ],
-                        role="assistant",
-                    )
-                ],
-                usage=mock_usage,
-                response_id="resp_123",
-            )
-            mock_get_response.return_value = response
+    response = get_model_response(
+        Response(
+            id="resp_123",
+            output=[
+                ResponseOutputMessage(
+                    id="msg_123",
+                    type="message",
+                    status="completed",
+                    content=[
+                        ResponseOutputText(
+                            text="Response with usage",
+                            type="output_text",
+                            annotations=[],
+                        )
+                    ],
+                    role="assistant",
+                )
+            ],
+            parallel_tool_calls=False,
+            tool_choice="none",
+            tools=[],
+            created_at=10000000,
+            model="gpt-4.1-2025-04-14",
+            object="response",
+            usage=ResponseUsage(
+                input_tokens=10,
+                input_tokens_details=InputTokensDetails(
+                    cached_tokens=0,
+                ),
+                output_tokens=20,
+                output_tokens_details=OutputTokensDetails(
+                    reasoning_tokens=5,
+                ),
+                total_tokens=30,
+            ),
+        )
+    )
 
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-                send_default_pii=True,
-            )
+    with patch.object(
+        agent.model._client._client,
+        "send",
+        return_value=response,
+    ) as _:
+        sentry_init(
+            integrations=[OpenAIAgentsIntegration()],
+            traces_sample_rate=1.0,
+            send_default_pii=True,
+        )
 
-            events = capture_events()
+        events = capture_events()
 
-            result = await agents.Runner.run(
-                test_agent, "Test input", run_config=test_run_config
-            )
+        result = await agents.Runner.run(
+            agent, "Test input", run_config=test_run_config
+        )
 
-            assert result is not None
+        assert result is not None
 
     (transaction,) = events
     spans = transaction["spans"]
-    invoke_agent_span, ai_client_span = spans
+    invoke_agent_span = next(
+        span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
+    )
 
     # Verify invoke_agent span has usage data from context_wrapper
     assert invoke_agent_span["description"] == "invoke_agent test_agent"
@@ -2331,7 +2356,6 @@ async def test_invoke_agent_span_includes_usage_data(
     assert "gen_ai.usage.output_tokens" in invoke_agent_span["data"]
     assert "gen_ai.usage.total_tokens" in invoke_agent_span["data"]
 
-    # The usage should match the mock_usage values (aggregated across all calls)
     assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 10
     assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20
     assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 30
@@ -2341,23 +2365,23 @@ async def test_invoke_agent_span_includes_usage_data(
 
 @pytest.mark.asyncio
 async def test_ai_client_span_includes_response_model(
-    sentry_init, capture_events, test_agent
+    sentry_init,
+    capture_events,
+    test_agent,
+    get_model_response,
 ):
     """
     Test that ai_client spans (gen_ai.chat) include the response model from the actual API response.
     This verifies we capture the actual model used (which may differ from the requested model).
     """
+    client = AsyncOpenAI(api_key="z")
+    model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
+    agent = test_agent.clone(model=model)
 
-    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
-        # Mock the _fetch_response method to return a response with a model field
-        with patch(
-            "agents.models.openai_responses.OpenAIResponsesModel._fetch_response"
-        ) as mock_fetch_response:
-            # Create a mock OpenAI Response object with a specific model version
-            mock_response = MagicMock()
-            mock_response.model = "gpt-4.1-2025-04-14"  # The actual response model
-            mock_response.id = "resp_123"
-            mock_response.output = [
+    response = get_model_response(
+        Response(
+            id="resp_123",
+            output=[
                 ResponseOutputMessage(
                     id="msg_123",
                     type="message",
@@ -2371,37 +2395,49 @@ async def test_ai_client_span_includes_response_model(
                     ],
                     role="assistant",
                 )
-            ]
-            mock_response.usage = MagicMock()
-            mock_response.usage.input_tokens = 10
-            mock_response.usage.output_tokens = 20
-            mock_response.usage.total_tokens = 30
-            mock_response.usage.input_tokens_details = InputTokensDetails(
-                cached_tokens=0
-            )
-            mock_response.usage.output_tokens_details = OutputTokensDetails(
-                reasoning_tokens=5
-            )
-
-            mock_fetch_response.return_value = mock_response
+            ],
+            parallel_tool_calls=False,
+            tool_choice="none",
+            tools=[],
+            created_at=10000000,
+            model="gpt-4.1-2025-04-14",
+            object="response",
+            usage=ResponseUsage(
+                input_tokens=10,
+                input_tokens_details=InputTokensDetails(
+                    cached_tokens=0,
+                ),
+                output_tokens=20,
+                output_tokens_details=OutputTokensDetails(
+                    reasoning_tokens=5,
+                ),
+                total_tokens=30,
+            ),
+        )
+    )
 
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-                send_default_pii=True,
-            )
+    with patch.object(
+        agent.model._client._client,
+        "send",
+        return_value=response,
+    ) as _:
+        sentry_init(
+            integrations=[OpenAIAgentsIntegration()],
+            traces_sample_rate=1.0,
+            send_default_pii=True,
+        )
 
-            events = capture_events()
+        events = capture_events()
 
-            result = await agents.Runner.run(
-                test_agent, "Test input", run_config=test_run_config
-            )
+        result = await agents.Runner.run(
+            agent, "Test input", run_config=test_run_config
+        )
 
-            assert result is not None
+        assert result is not None
 
     (transaction,) = events
     spans = transaction["spans"]
-    _, ai_client_span = spans
+    ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
 
     # Verify ai_client span has response model from API response
     assert ai_client_span["description"] == "chat gpt-4"
@@ -2411,29 +2447,28 @@ async def test_ai_client_span_includes_response_model(
 
 @pytest.mark.asyncio
 async def test_ai_client_span_response_model_with_chat_completions(
-    sentry_init, capture_events
+    sentry_init,
+    capture_events,
+    get_model_response,
 ):
     """
     Test that response model is captured when using ChatCompletions API (not Responses API).
     This ensures our implementation works with different OpenAI model types.
     """
     # Create agent that uses ChatCompletions model
+    client = AsyncOpenAI(api_key="z")
+    model = OpenAIResponsesModel(model="gpt-4o-mini", openai_client=client)
+
     agent = Agent(
         name="chat_completions_agent",
         instructions="Test agent using ChatCompletions",
-        model="gpt-4o-mini",
+        model=model,
     )
 
-    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
-        # Mock the _fetch_response method
-        with patch(
-            "agents.models.openai_responses.OpenAIResponsesModel._fetch_response"
-        ) as mock_fetch_response:
-            # Create a mock Response object
-            mock_response = MagicMock()
-            mock_response.model = "gpt-4o-mini-2024-07-18"
-            mock_response.id = "resp_123"
-            mock_response.output = [
+    response = get_model_response(
+        Response(
+            id="resp_123",
+            output=[
                 ResponseOutputMessage(
                     id="msg_123",
                     type="message",
@@ -2447,36 +2482,48 @@ async def test_ai_client_span_response_model_with_chat_completions(
                     ],
                     role="assistant",
                 )
-            ]
-            mock_response.usage = MagicMock()
-            mock_response.usage.input_tokens = 15
-            mock_response.usage.output_tokens = 25
-            mock_response.usage.total_tokens = 40
-            mock_response.usage.input_tokens_details = InputTokensDetails(
-                cached_tokens=0
-            )
-            mock_response.usage.output_tokens_details = OutputTokensDetails(
-                reasoning_tokens=0
-            )
-
-            mock_fetch_response.return_value = mock_response
+            ],
+            parallel_tool_calls=False,
+            tool_choice="none",
+            tools=[],
+            created_at=10000000,
+            model="gpt-4o-mini-2024-07-18",
+            object="response",
+            usage=ResponseUsage(
+                input_tokens=15,
+                input_tokens_details=InputTokensDetails(
+                    cached_tokens=0,
+                ),
+                output_tokens=25,
+                output_tokens_details=OutputTokensDetails(
+                    reasoning_tokens=5,
+                ),
+                total_tokens=40,
+            ),
+        )
+    )
 
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-            )
+    with patch.object(
+        agent.model._client._client,
+        "send",
+        return_value=response,
+    ) as _:
+        sentry_init(
+            integrations=[OpenAIAgentsIntegration()],
+            traces_sample_rate=1.0,
+        )
 
-            events = capture_events()
+        events = capture_events()
 
-            result = await agents.Runner.run(
-                agent, "Test input", run_config=test_run_config
-            )
+        result = await agents.Runner.run(
+            agent, "Test input", run_config=test_run_config
+        )
 
-            assert result is not None
+        assert result is not None
 
     (transaction,) = events
     spans = transaction["spans"]
-    _, ai_client_span = spans
+    ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
 
     # Verify response model from API response is captured
     assert "gen_ai.response.model" in ai_client_span["data"]
@@ -2657,21 +2704,22 @@ async def test_response_model_not_set_when_unavailable(
 
 @pytest.mark.asyncio
 async def test_invoke_agent_span_includes_response_model(
-    sentry_init, capture_events, test_agent
+    sentry_init,
+    capture_events,
+    test_agent,
+    get_model_response,
 ):
     """
     Test that invoke_agent spans include the response model from the API response.
     """
+    client = AsyncOpenAI(api_key="z")
+    model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
+    agent = test_agent.clone(model=model)
 
-    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
-        with patch(
-            "agents.models.openai_responses.OpenAIResponsesModel._fetch_response"
-        ) as mock_fetch_response:
-            # Create a mock OpenAI Response object with a specific model version
-            mock_response = MagicMock()
-            mock_response.model = "gpt-4.1-2025-04-14"  # The actual response model
-            mock_response.id = "resp_123"
-            mock_response.output = [
+    response = get_model_response(
+        Response(
+            id="resp_123",
+            output=[
                 ResponseOutputMessage(
                     id="msg_123",
                     type="message",
@@ -2685,37 +2733,52 @@ async def test_invoke_agent_span_includes_response_model(
                     ],
                     role="assistant",
                 )
-            ]
-            mock_response.usage = MagicMock()
-            mock_response.usage.input_tokens = 10
-            mock_response.usage.output_tokens = 20
-            mock_response.usage.total_tokens = 30
-            mock_response.usage.input_tokens_details = InputTokensDetails(
-                cached_tokens=0
-            )
-            mock_response.usage.output_tokens_details = OutputTokensDetails(
-                reasoning_tokens=5
-            )
-
-            mock_fetch_response.return_value = mock_response
+            ],
+            parallel_tool_calls=False,
+            tool_choice="none",
+            tools=[],
+            created_at=10000000,
+            model="gpt-4.1-2025-04-14",
+            object="response",
+            usage=ResponseUsage(
+                input_tokens=10,
+                input_tokens_details=InputTokensDetails(
+                    cached_tokens=0,
+                ),
+                output_tokens=20,
+                output_tokens_details=OutputTokensDetails(
+                    reasoning_tokens=5,
+                ),
+                total_tokens=30,
+            ),
+        )
+    )
 
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-                send_default_pii=True,
-            )
+    with patch.object(
+        agent.model._client._client,
+        "send",
+        return_value=response,
+    ) as _:
+        sentry_init(
+            integrations=[OpenAIAgentsIntegration()],
+            traces_sample_rate=1.0,
+            send_default_pii=True,
+        )
 
-            events = capture_events()
+        events = capture_events()
 
-            result = await agents.Runner.run(
-                test_agent, "Test input", run_config=test_run_config
-            )
+        result = await agents.Runner.run(
+            agent, "Test input", run_config=test_run_config
+        )
 
-            assert result is not None
+        assert result is not None
 
     (transaction,) = events
     spans = transaction["spans"]
-    invoke_agent_span, ai_client_span = spans
+    invoke_agent_span = next(
+        span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
+    )
+    ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
 
     # Verify invoke_agent span has response model from API
     assert invoke_agent_span["description"] == "invoke_agent test_agent"

From 1a1ab90d79dd259f5b04d55124ba6eec843c0c5f Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 6 Mar 2026 13:27:57 +0100
Subject: [PATCH 7/8] add one missing test

---
 .../openai_agents/test_openai_agents.py       | 32 +++++++++++--------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index c47148bc89..ef9461ff9f 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -1492,7 +1492,9 @@ async def test_hosted_mcp_tool_propagation_header_streamed(sentry_init, test_age
 
 
 @pytest.mark.asyncio
-async def test_hosted_mcp_tool_propagation_headers(sentry_init, test_agent):
+async def test_hosted_mcp_tool_propagation_headers(
+    sentry_init, test_agent, get_model_response
+):
     """
     Test responses API is given trace propagation headers with HostedMCPTool.
     """
@@ -1508,9 +1510,7 @@ async def test_hosted_mcp_tool_propagation_headers(sentry_init, test_agent):
         },
     )
 
-    client = AsyncOpenAI(api_key="z")
-    client.responses._post = AsyncMock(return_value=EXAMPLE_RESPONSE)
-
+    client = AsyncOpenAI(api_key="test-key")
     model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
 
     agent_with_tool = test_agent.clone(
@@ -1524,11 +1524,13 @@ async def test_hosted_mcp_tool_propagation_headers(sentry_init, test_agent):
         release="d08ebdb9309e1b004c6f52202de58a09c2268e42",
     )
 
+    response = get_model_response(EXAMPLE_RESPONSE)
+
     with patch.object(
-        model._client.responses,
-        "create",
-        wraps=model._client.responses.create,
-    ) as create, mock.patch(
+        agent_with_tool.model._client._client,
+        "send",
+        return_value=response,
+    ) as send, mock.patch(
         "sentry_sdk.tracing_utils.Random.randrange", return_value=500000
     ):
         with sentry_sdk.start_transaction(
@@ -1542,13 +1544,17 @@ async def test_hosted_mcp_tool_propagation_headers(sentry_init, test_agent):
                 run_config=test_run_config,
             )
 
-            ai_client_span = transaction._span_recorder.spans[-1]
+            ai_client_span = next(
+                span
+                for span in transaction._span_recorder.spans
+                if span.op == OP.GEN_AI_CHAT
+            )
 
-        args, kwargs = create.call_args
+        args, kwargs = send.call_args
 
-        assert "tools" in kwargs
-        assert len(kwargs["tools"]) == 1
-        hosted_mcp_tool = kwargs["tools"][0]
+        request = args[0]
+        body = json.loads(request.content.decode("utf-8"))
+        hosted_mcp_tool = body["tools"][0]
 
         assert hosted_mcp_tool["headers"][
             "sentry-trace"

From 0d13b60aa461a7937cdbc53ac61ff44fedcd97d8 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 6 Mar 2026 13:30:50 +0100
Subject: [PATCH 8/8] set usage to none in example

---
 .../integrations/openai_agents/test_openai_agents.py | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index ef9461ff9f..a4b92b17c1 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -262,17 +262,7 @@ def mock_model_response():
         created_at=10000000,
         model="gpt-4",
         object="response",
-        usage=ResponseUsage(
-            input_tokens=20,
-            input_tokens_details=InputTokensDetails(
-                cached_tokens=5,
-            ),
-            output_tokens=10,
-            output_tokens_details=OutputTokensDetails(
-                reasoning_tokens=8,
-            ),
-            total_tokens=30,
-        ),
+        usage=None,
     )