From 66e12934b043395e0b8f3de37bf10076976c5e5a Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 6 Mar 2026 10:24:24 +0100 Subject: [PATCH 1/8] test(openai-agents): Replace mocks with httpx in non-error single-response tests --- .../openai_agents/test_openai_agents.py | 313 +++++++++++------- 1 file changed, 185 insertions(+), 128 deletions(-) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 1390455317..5e077183dc 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -5,16 +5,17 @@ import os import json import logging +import httpx import sentry_sdk from sentry_sdk import start_span -from sentry_sdk.consts import SPANDATA +from sentry_sdk.consts import SPANDATA, OP from sentry_sdk.integrations.logging import LoggingIntegration from sentry_sdk.integrations.openai_agents import OpenAIAgentsIntegration from sentry_sdk.integrations.openai_agents.utils import _set_input_data, safe_serialize -from sentry_sdk.utils import parse_version, package_version +from sentry_sdk.utils import parse_version -from openai import AsyncOpenAI +from openai import AsyncOpenAI, InternalServerError from agents.models.openai_responses import OpenAIResponsesModel from unittest import mock @@ -37,8 +38,6 @@ from agents.exceptions import MaxTurnsExceeded, ModelBehaviorError from agents.version import __version__ as OPENAI_AGENTS_VERSION -OPENAI_VERSION = package_version("openai") - from openai.types.responses import ( ResponseCreatedEvent, ResponseTextDeltaEvent, @@ -237,8 +236,9 @@ def mock_usage(): @pytest.fixture -def mock_model_response(mock_usage): - return ModelResponse( +def mock_model_response(): + return Response( + id="resp_123", output=[ ResponseOutputMessage( id="msg_123", @@ -254,8 +254,23 @@ def mock_model_response(mock_usage): role="assistant", ) ], - usage=mock_usage, - response_id="resp_123", + parallel_tool_calls=False, + tool_choice="none", + tools=[], + created_at=10000000, + model="gpt-4", + object="response", + usage=ResponseUsage( + input_tokens=20, + input_tokens_details=InputTokensDetails( + cached_tokens=5, + ), + output_tokens=10, + output_tokens_details=OutputTokensDetails( + reasoning_tokens=8, + ), + total_tokens=30, + ), ) @@ -297,51 +312,60 @@ def inner(instructions): @pytest.fixture -def test_agent_custom_model(): - """Create a real Agent instance for testing.""" - return Agent( - name="test_agent_custom_model", - instructions="You are a helpful test assistant.", - # the model could be agents.OpenAIChatCompletionsModel() - model="my-custom-model", - model_settings=ModelSettings( - max_tokens=100, - temperature=0.7, - top_p=1.0, - presence_penalty=0.0, - frequency_penalty=0.0, - ), - ) +def get_model_response(): + def inner(response_content): + model_request = httpx.Request( + "POST", + "/responses", + ) + + response = httpx.Response( + 200, + request=model_request, + content=json.dumps(response_content.model_dump()).encode("utf-8"), + ) + + return response + + return inner @pytest.mark.asyncio async def test_agent_invocation_span_no_pii( - sentry_init, capture_events, test_agent, mock_model_response + sentry_init, capture_events, test_agent, mock_model_response, get_model_response ): - with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): - with patch( - "agents.models.openai_responses.OpenAIResponsesModel.get_response" - ) as mock_get_response: - mock_get_response.return_value = mock_model_response + client = AsyncOpenAI(api_key="test-key") + model = OpenAIResponsesModel(model="gpt-4", openai_client=client) + agent = test_agent.clone(model=model) - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=False, - ) + response = get_model_response(mock_model_response) - events = capture_events() + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=False, + ) - result = await agents.Runner.run( - test_agent, "Test input", run_config=test_run_config - ) + events = capture_events() - assert result is not None - assert result.final_output == "Hello, how can I help you?" + result = await agents.Runner.run( + agent, "Test input", run_config=test_run_config + ) + + assert result is not None + assert result.final_output == "Hello, how can I help you?" (transaction,) = events spans = transaction["spans"] - invoke_agent_span, ai_client_span = spans + invoke_agent_span = next( + span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT) assert transaction["transaction"] == "test_agent workflow" assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" @@ -454,33 +478,38 @@ async def test_agent_invocation_span( instructions, input, request, + get_model_response, ): """ Test that the integration creates spans for agent invocations. """ + client = AsyncOpenAI(api_key="test-key") + model = OpenAIResponsesModel(model="gpt-4", openai_client=client) + agent = test_agent_with_instructions(instructions).clone(model=model) - with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): - with patch( - "agents.models.openai_responses.OpenAIResponsesModel.get_response" - ) as mock_get_response: - mock_get_response.return_value = mock_model_response + response = get_model_response(mock_model_response) - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - ) + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + ) - events = capture_events() + events = capture_events() - result = await agents.Runner.run( - test_agent_with_instructions(instructions), - input, - run_config=test_run_config, - ) + result = await agents.Runner.run( + agent, + input, + run_config=test_run_config, + ) - assert result is not None - assert result.final_output == "Hello, how can I help you?" + assert result is not None + assert result.final_output == "Hello, how can I help you?" (transaction,) = events spans = transaction["spans"] @@ -605,35 +634,56 @@ async def test_agent_invocation_span( @pytest.mark.asyncio async def test_client_span_custom_model( - sentry_init, capture_events, test_agent_custom_model, mock_model_response + sentry_init, + capture_events, + mock_model_response, + get_model_response, ): """ Test that the integration uses the correct model name if a custom model is used. """ - with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): - with patch( - "agents.models.openai_responses.OpenAIResponsesModel.get_response" - ) as mock_get_response: - mock_get_response.return_value = mock_model_response + client = AsyncOpenAI(api_key="test-key") + model = OpenAIResponsesModel(model="my-custom-model", openai_client=client) - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - ) + agent = Agent( + name="test_agent_custom_model", + instructions="You are a helpful test assistant.", + # the model could be agents.OpenAIChatCompletionsModel() + model=model, + model_settings=ModelSettings( + max_tokens=100, + temperature=0.7, + top_p=1.0, + presence_penalty=0.0, + frequency_penalty=0.0, + ), + ) - events = capture_events() + response = get_model_response(mock_model_response) - result = await agents.Runner.run( - test_agent_custom_model, "Test input", run_config=test_run_config - ) + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + ) - assert result is not None - assert result.final_output == "Hello, how can I help you?" + events = capture_events() + + result = await agents.Runner.run( + agent, "Test input", run_config=test_run_config + ) + + assert result is not None + assert result.final_output == "Hello, how can I help you?" (transaction,) = events spans = transaction["spans"] - _, ai_client_span = spans + ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT) assert ai_client_span["description"] == "chat my-custom-model" assert ai_client_span["data"]["gen_ai.request.model"] == "my-custom-model" @@ -644,35 +694,41 @@ def test_agent_invocation_span_sync_no_pii( capture_events, test_agent, mock_model_response, + get_model_response, ): """ Test that the integration creates spans for agent invocations. """ + client = AsyncOpenAI(api_key="test-key") + model = OpenAIResponsesModel(model="gpt-4", openai_client=client) + agent = test_agent.clone(model=model) - with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): - with patch( - "agents.models.openai_responses.OpenAIResponsesModel.get_response" - ) as mock_get_response: - mock_get_response.return_value = mock_model_response + response = get_model_response(mock_model_response) - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=False, - ) + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=False, + ) - events = capture_events() + events = capture_events() - result = agents.Runner.run_sync( - test_agent, "Test input", run_config=test_run_config - ) + result = agents.Runner.run_sync(agent, "Test input", run_config=test_run_config) - assert result is not None - assert result.final_output == "Hello, how can I help you?" + assert result is not None + assert result.final_output == "Hello, how can I help you?" (transaction,) = events spans = transaction["spans"] - invoke_agent_span, ai_client_span = spans + invoke_agent_span = next( + span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT) assert transaction["transaction"] == "test_agent workflow" assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" @@ -781,33 +837,38 @@ def test_agent_invocation_span_sync( instructions, input, request, + get_model_response, ): """ Test that the integration creates spans for agent invocations. """ + client = AsyncOpenAI(api_key="test-key") + model = OpenAIResponsesModel(model="gpt-4", openai_client=client) + agent = test_agent_with_instructions(instructions).clone(model=model) - with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): - with patch( - "agents.models.openai_responses.OpenAIResponsesModel.get_response" - ) as mock_get_response: - mock_get_response.return_value = mock_model_response + response = get_model_response(mock_model_response) - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - ) + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + ) - events = capture_events() + events = capture_events() - result = agents.Runner.run_sync( - test_agent_with_instructions(instructions), - input, - run_config=test_run_config, - ) + result = agents.Runner.run_sync( + agent, + input, + run_config=test_run_config, + ) - assert result is not None - assert result.final_output == "Hello, how can I help you?" + assert result is not None + assert result.final_output == "Hello, how can I help you?" (transaction,) = events spans = transaction["spans"] @@ -1258,22 +1319,18 @@ def simple_test_tool(message: str) -> str: assert ai_client_span1["data"]["gen_ai.usage.output_tokens"] == 5 assert ai_client_span1["data"]["gen_ai.usage.output_tokens.reasoning"] == 0 assert ai_client_span1["data"]["gen_ai.usage.total_tokens"] == 15 - - tool_call = { - "arguments": '{"message": "hello"}', - "call_id": "call_123", - "name": "simple_test_tool", - "type": "function_call", - "id": "call_123", - "status": None, - } - - if OPENAI_VERSION >= (2, 25, 0): - tool_call["namespace"] = None - - assert json.loads(ai_client_span1["data"]["gen_ai.response.tool_calls"]) == [ - tool_call - ] + assert ai_client_span1["data"]["gen_ai.response.tool_calls"] == safe_serialize( + [ + { + "arguments": '{"message": "hello"}', + "call_id": "call_123", + "name": "simple_test_tool", + "type": "function_call", + "id": "call_123", + "status": None, + } + ] + ) assert tool_span["description"] == "execute_tool simple_test_tool" assert tool_span["data"]["gen_ai.agent.name"] == "test_agent" From c773204eeeb22284b957e2b534ae3bc94050738d Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 6 Mar 2026 10:39:22 +0100 Subject: [PATCH 2/8] add more tests --- .../openai_agents/test_openai_agents.py | 132 +++++++++++------- 1 file changed, 78 insertions(+), 54 deletions(-) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 5e077183dc..ac4980f612 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -2069,34 +2069,38 @@ async def test_mcp_tool_execution_without_pii(sentry_init, capture_events, test_ @pytest.mark.asyncio async def test_multiple_agents_asyncio( - sentry_init, capture_events, test_agent, mock_model_response + sentry_init, capture_events, test_agent, mock_model_response, get_model_response ): """ Test that multiple agents can be run at the same time in asyncio tasks without interfering with each other. """ + client = AsyncOpenAI(api_key="z") + model = OpenAIResponsesModel(model="gpt-4", openai_client=client) + agent = test_agent.clone(model=model) - with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): - with patch( - "agents.models.openai_responses.OpenAIResponsesModel.get_response" - ) as mock_get_response: - mock_get_response.return_value = mock_model_response + response = get_model_response(mock_model_response) - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - ) + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + ) - events = capture_events() + events = capture_events() - async def run(): - await agents.Runner.run( - starting_agent=test_agent, - input="Test input", - run_config=test_run_config, - ) + async def run(): + await agents.Runner.run( + starting_agent=agent, + input="Test input", + run_config=test_run_config, + ) - await asyncio.gather(*[run() for _ in range(3)]) + await asyncio.gather(*[run() for _ in range(3)]) assert len(events) == 3 txn1, txn2, txn3 = events @@ -2973,37 +2977,45 @@ async def test_streaming_ttft_on_chat_span(sentry_init, test_agent): ) @pytest.mark.asyncio async def test_conversation_id_on_all_spans( - sentry_init, capture_events, test_agent, mock_model_response + sentry_init, capture_events, test_agent, mock_model_response, get_model_response ): """ Test that gen_ai.conversation.id is set on all AI-related spans when passed to Runner.run(). """ - with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): - with patch( - "agents.models.openai_responses.OpenAIResponsesModel.get_response" - ) as mock_get_response: - mock_get_response.return_value = mock_model_response + client = AsyncOpenAI(api_key="z") + model = OpenAIResponsesModel(model="gpt-4", openai_client=client) + agent = test_agent.clone(model=model) - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - ) + response = get_model_response(mock_model_response) - events = capture_events() + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + ) - result = await agents.Runner.run( - test_agent, - "Test input", - run_config=test_run_config, - conversation_id="conv_test_123", - ) + events = capture_events() - assert result is not None + result = await agents.Runner.run( + agent, + "Test input", + run_config=test_run_config, + conversation_id="conv_test_123", + ) + + assert result is not None (transaction,) = events spans = transaction["spans"] - invoke_agent_span, ai_client_span = spans + invoke_agent_span = next( + span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT) # Verify workflow span (transaction) has conversation_id assert ( @@ -3120,35 +3132,47 @@ def simple_tool(message: str) -> str: ) @pytest.mark.asyncio async def test_no_conversation_id_when_not_provided( - sentry_init, capture_events, test_agent, mock_model_response + sentry_init, + capture_events, + test_agent, + mock_model_response, + get_model_response, ): """ Test that gen_ai.conversation.id is not set when not passed to Runner.run(). """ - with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): - with patch( - "agents.models.openai_responses.OpenAIResponsesModel.get_response" - ) as mock_get_response: - mock_get_response.return_value = mock_model_response + client = AsyncOpenAI(api_key="z") + model = OpenAIResponsesModel(model="gpt-4", openai_client=client) + agent = test_agent.clone(model=model) - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - ) + response = get_model_response(mock_model_response) - events = capture_events() + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + ) - # Don't pass conversation_id - result = await agents.Runner.run( - test_agent, "Test input", run_config=test_run_config - ) + events = capture_events() - assert result is not None + # Don't pass conversation_id + result = await agents.Runner.run( + agent, "Test input", run_config=test_run_config + ) + + assert result is not None (transaction,) = events spans = transaction["spans"] - invoke_agent_span, ai_client_span = spans + invoke_agent_span = next( + span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT) # Verify conversation_id is NOT set on any spans assert "gen_ai.conversation.id" not in transaction["contexts"]["trace"].get( From 57c2a6eef3ca9220e7e7a1721ff79d505994c513 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 6 Mar 2026 10:45:35 +0100 Subject: [PATCH 3/8] revert uninted tool test change --- .../openai_agents/test_openai_agents.py | 34 +++++++++++-------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index ac4980f612..1b2dbaf941 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -13,9 +13,9 @@ from sentry_sdk.integrations.logging import LoggingIntegration from sentry_sdk.integrations.openai_agents import OpenAIAgentsIntegration from sentry_sdk.integrations.openai_agents.utils import _set_input_data, safe_serialize -from sentry_sdk.utils import parse_version +from sentry_sdk.utils import parse_version, package_version -from openai import AsyncOpenAI, InternalServerError +from openai import AsyncOpenAI from agents.models.openai_responses import OpenAIResponsesModel from unittest import mock @@ -38,6 +38,8 @@ from agents.exceptions import MaxTurnsExceeded, ModelBehaviorError from agents.version import __version__ as OPENAI_AGENTS_VERSION +OPENAI_VERSION = package_version("openai") + from openai.types.responses import ( ResponseCreatedEvent, ResponseTextDeltaEvent, @@ -1319,18 +1321,22 @@ def simple_test_tool(message: str) -> str: assert ai_client_span1["data"]["gen_ai.usage.output_tokens"] == 5 assert ai_client_span1["data"]["gen_ai.usage.output_tokens.reasoning"] == 0 assert ai_client_span1["data"]["gen_ai.usage.total_tokens"] == 15 - assert ai_client_span1["data"]["gen_ai.response.tool_calls"] == safe_serialize( - [ - { - "arguments": '{"message": "hello"}', - "call_id": "call_123", - "name": "simple_test_tool", - "type": "function_call", - "id": "call_123", - "status": None, - } - ] - ) + + tool_call = { + "arguments": '{"message": "hello"}', + "call_id": "call_123", + "name": "simple_test_tool", + "type": "function_call", + "id": "call_123", + "status": None, + } + + if OPENAI_VERSION >= (2, 25, 0): + tool_call["namespace"] = None + + assert json.loads(ai_client_span1["data"]["gen_ai.response.tool_calls"]) == [ + tool_call + ] assert tool_span["description"] == "execute_tool simple_test_tool" assert tool_span["data"]["gen_ai.agent.name"] == "test_agent" From 5b6163dc8b06406a5df3752aa88b8c806c9fe4a3 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 6 Mar 2026 10:53:05 +0100 Subject: [PATCH 4/8] revert fixture removal --- .../openai_agents/test_openai_agents.py | 34 +++++++++++-------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 1b2dbaf941..c9ab37b58d 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -313,6 +313,24 @@ def inner(instructions): return inner +@pytest.fixture +def test_agent_custom_model(): + """Create a real Agent instance for testing.""" + return Agent( + name="test_agent_custom_model", + instructions="You are a helpful test assistant.", + # the model could be agents.OpenAIChatCompletionsModel() + model="my-custom-model", + model_settings=ModelSettings( + max_tokens=100, + temperature=0.7, + top_p=1.0, + presence_penalty=0.0, + frequency_penalty=0.0, + ), + ) + + @pytest.fixture def get_model_response(): def inner(response_content): @@ -638,6 +656,7 @@ async def test_agent_invocation_span( async def test_client_span_custom_model( sentry_init, capture_events, + test_agent_custom_model, mock_model_response, get_model_response, ): @@ -647,20 +666,7 @@ async def test_client_span_custom_model( client = AsyncOpenAI(api_key="test-key") model = OpenAIResponsesModel(model="my-custom-model", openai_client=client) - - agent = Agent( - name="test_agent_custom_model", - instructions="You are a helpful test assistant.", - # the model could be agents.OpenAIChatCompletionsModel() - model=model, - model_settings=ModelSettings( - max_tokens=100, - temperature=0.7, - top_p=1.0, - presence_penalty=0.0, - frequency_penalty=0.0, - ), - ) + agent = test_agent_custom_model.clone(model=model) response = get_model_response(mock_model_response) From 2cc4825f3c92b2b01258ec79397c3cb38c31f80c Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 6 Mar 2026 10:54:38 +0100 Subject: [PATCH 5/8] use test-key --- tests/integrations/openai_agents/test_openai_agents.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index c9ab37b58d..a3b18a735e 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -2087,7 +2087,7 @@ async def test_multiple_agents_asyncio( Test that multiple agents can be run at the same time in asyncio tasks without interfering with each other. """ - client = AsyncOpenAI(api_key="z") + client = AsyncOpenAI(api_key="test-key") model = OpenAIResponsesModel(model="gpt-4", openai_client=client) agent = test_agent.clone(model=model) @@ -2995,7 +2995,7 @@ async def test_conversation_id_on_all_spans( Test that gen_ai.conversation.id is set on all AI-related spans when passed to Runner.run(). """ - client = AsyncOpenAI(api_key="z") + client = AsyncOpenAI(api_key="test-key") model = OpenAIResponsesModel(model="gpt-4", openai_client=client) agent = test_agent.clone(model=model) @@ -3154,7 +3154,7 @@ async def test_no_conversation_id_when_not_provided( Test that gen_ai.conversation.id is not set when not passed to Runner.run(). """ - client = AsyncOpenAI(api_key="z") + client = AsyncOpenAI(api_key="test-key") model = OpenAIResponsesModel(model="gpt-4", openai_client=client) agent = test_agent.clone(model=model) From 2378fd987064fc40b8bf9dfe393578d6c0dceff1 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 6 Mar 2026 11:28:36 +0100 Subject: [PATCH 6/8] more tests --- .../openai_agents/test_openai_agents.py | 347 +++++++++++------- 1 file changed, 205 insertions(+), 142 deletions(-) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index a3b18a735e..c47148bc89 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -2274,56 +2274,81 @@ def failing_tool(message: str) -> str: @pytest.mark.asyncio async def test_invoke_agent_span_includes_usage_data( - sentry_init, capture_events, test_agent, mock_usage + sentry_init, + capture_events, + test_agent, + get_model_response, ): """ Test that invoke_agent spans include aggregated usage data from context_wrapper. This verifies the new functionality added to track token usage in invoke_agent spans. """ + client = AsyncOpenAI(api_key="z") + model = OpenAIResponsesModel(model="gpt-4", openai_client=client) + agent = test_agent.clone(model=model) - with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): - with patch( - "agents.models.openai_responses.OpenAIResponsesModel.get_response" - ) as mock_get_response: - # Create a response with usage data - response = ModelResponse( - output=[ - ResponseOutputMessage( - id="msg_123", - type="message", - status="completed", - content=[ - ResponseOutputText( - text="Response with usage", - type="output_text", - annotations=[], - ) - ], - role="assistant", - ) - ], - usage=mock_usage, - response_id="resp_123", - ) - mock_get_response.return_value = response + response = get_model_response( + Response( + id="resp_123", + output=[ + ResponseOutputMessage( + id="msg_123", + type="message", + status="completed", + content=[ + ResponseOutputText( + text="Response with usage", + type="output_text", + annotations=[], + ) + ], + role="assistant", + ) + ], + parallel_tool_calls=False, + tool_choice="none", + tools=[], + created_at=10000000, + model="gpt-4.1-2025-04-14", + object="response", + usage=ResponseUsage( + input_tokens=10, + input_tokens_details=InputTokensDetails( + cached_tokens=0, + ), + output_tokens=20, + output_tokens_details=OutputTokensDetails( + reasoning_tokens=5, + ), + total_tokens=30, + ), + ) + ) - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - ) + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + ) - events = capture_events() + events = capture_events() - result = await agents.Runner.run( - test_agent, "Test input", run_config=test_run_config - ) + result = await agents.Runner.run( + agent, "Test input", run_config=test_run_config + ) - assert result is not None + assert result is not None (transaction,) = events spans = transaction["spans"] - invoke_agent_span, ai_client_span = spans + invoke_agent_span = next( + span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT + ) # Verify invoke_agent span has usage data from context_wrapper assert invoke_agent_span["description"] == "invoke_agent test_agent" @@ -2331,7 +2356,6 @@ async def test_invoke_agent_span_includes_usage_data( assert "gen_ai.usage.output_tokens" in invoke_agent_span["data"] assert "gen_ai.usage.total_tokens" in invoke_agent_span["data"] - # The usage should match the mock_usage values (aggregated across all calls) assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 10 assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20 assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 30 @@ -2341,23 +2365,23 @@ async def test_invoke_agent_span_includes_usage_data( @pytest.mark.asyncio async def test_ai_client_span_includes_response_model( - sentry_init, capture_events, test_agent + sentry_init, + capture_events, + test_agent, + get_model_response, ): """ Test that ai_client spans (gen_ai.chat) include the response model from the actual API response. This verifies we capture the actual model used (which may differ from the requested model). """ + client = AsyncOpenAI(api_key="z") + model = OpenAIResponsesModel(model="gpt-4", openai_client=client) + agent = test_agent.clone(model=model) - with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): - # Mock the _fetch_response method to return a response with a model field - with patch( - "agents.models.openai_responses.OpenAIResponsesModel._fetch_response" - ) as mock_fetch_response: - # Create a mock OpenAI Response object with a specific model version - mock_response = MagicMock() - mock_response.model = "gpt-4.1-2025-04-14" # The actual response model - mock_response.id = "resp_123" - mock_response.output = [ + response = get_model_response( + Response( + id="resp_123", + output=[ ResponseOutputMessage( id="msg_123", type="message", @@ -2371,37 +2395,49 @@ async def test_ai_client_span_includes_response_model( ], role="assistant", ) - ] - mock_response.usage = MagicMock() - mock_response.usage.input_tokens = 10 - mock_response.usage.output_tokens = 20 - mock_response.usage.total_tokens = 30 - mock_response.usage.input_tokens_details = InputTokensDetails( - cached_tokens=0 - ) - mock_response.usage.output_tokens_details = OutputTokensDetails( - reasoning_tokens=5 - ) - - mock_fetch_response.return_value = mock_response + ], + parallel_tool_calls=False, + tool_choice="none", + tools=[], + created_at=10000000, + model="gpt-4.1-2025-04-14", + object="response", + usage=ResponseUsage( + input_tokens=10, + input_tokens_details=InputTokensDetails( + cached_tokens=0, + ), + output_tokens=20, + output_tokens_details=OutputTokensDetails( + reasoning_tokens=5, + ), + total_tokens=30, + ), + ) + ) - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - ) + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + ) - events = capture_events() + events = capture_events() - result = await agents.Runner.run( - test_agent, "Test input", run_config=test_run_config - ) + result = await agents.Runner.run( + agent, "Test input", run_config=test_run_config + ) - assert result is not None + assert result is not None (transaction,) = events spans = transaction["spans"] - _, ai_client_span = spans + ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT) # Verify ai_client span has response model from API response assert ai_client_span["description"] == "chat gpt-4" @@ -2411,29 +2447,28 @@ async def test_ai_client_span_includes_response_model( @pytest.mark.asyncio async def test_ai_client_span_response_model_with_chat_completions( - sentry_init, capture_events + sentry_init, + capture_events, + get_model_response, ): """ Test that response model is captured when using ChatCompletions API (not Responses API). This ensures our implementation works with different OpenAI model types. """ # Create agent that uses ChatCompletions model + client = AsyncOpenAI(api_key="z") + model = OpenAIResponsesModel(model="gpt-4o-mini", openai_client=client) + agent = Agent( name="chat_completions_agent", instructions="Test agent using ChatCompletions", - model="gpt-4o-mini", + model=model, ) - with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): - # Mock the _fetch_response method - with patch( - "agents.models.openai_responses.OpenAIResponsesModel._fetch_response" - ) as mock_fetch_response: - # Create a mock Response object - mock_response = MagicMock() - mock_response.model = "gpt-4o-mini-2024-07-18" - mock_response.id = "resp_123" - mock_response.output = [ + response = get_model_response( + Response( + id="resp_123", + output=[ ResponseOutputMessage( id="msg_123", type="message", @@ -2447,36 +2482,48 @@ async def test_ai_client_span_response_model_with_chat_completions( ], role="assistant", ) - ] - mock_response.usage = MagicMock() - mock_response.usage.input_tokens = 15 - mock_response.usage.output_tokens = 25 - mock_response.usage.total_tokens = 40 - mock_response.usage.input_tokens_details = InputTokensDetails( - cached_tokens=0 - ) - mock_response.usage.output_tokens_details = OutputTokensDetails( - reasoning_tokens=0 - ) - - mock_fetch_response.return_value = mock_response + ], + parallel_tool_calls=False, + tool_choice="none", + tools=[], + created_at=10000000, + model="gpt-4o-mini-2024-07-18", + object="response", + usage=ResponseUsage( + input_tokens=15, + input_tokens_details=InputTokensDetails( + cached_tokens=0, + ), + output_tokens=25, + output_tokens_details=OutputTokensDetails( + reasoning_tokens=5, + ), + total_tokens=40, + ), + ) + ) - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - ) + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + ) - events = capture_events() + events = capture_events() - result = await agents.Runner.run( - agent, "Test input", run_config=test_run_config - ) + result = await agents.Runner.run( + agent, "Test input", run_config=test_run_config + ) - assert result is not None + assert result is not None (transaction,) = events spans = transaction["spans"] - _, ai_client_span = spans + ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT) # Verify response model from API response is captured assert "gen_ai.response.model" in ai_client_span["data"] @@ -2657,21 +2704,22 @@ async def test_response_model_not_set_when_unavailable( @pytest.mark.asyncio async def test_invoke_agent_span_includes_response_model( - sentry_init, capture_events, test_agent + sentry_init, + capture_events, + test_agent, + get_model_response, ): """ Test that invoke_agent spans include the response model from the API response. """ + client = AsyncOpenAI(api_key="z") + model = OpenAIResponsesModel(model="gpt-4", openai_client=client) + agent = test_agent.clone(model=model) - with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): - with patch( - "agents.models.openai_responses.OpenAIResponsesModel._fetch_response" - ) as mock_fetch_response: - # Create a mock OpenAI Response object with a specific model version - mock_response = MagicMock() - mock_response.model = "gpt-4.1-2025-04-14" # The actual response model - mock_response.id = "resp_123" - mock_response.output = [ + response = get_model_response( + Response( + id="resp_123", + output=[ ResponseOutputMessage( id="msg_123", type="message", @@ -2685,37 +2733,52 @@ async def test_invoke_agent_span_includes_response_model( ], role="assistant", ) - ] - mock_response.usage = MagicMock() - mock_response.usage.input_tokens = 10 - mock_response.usage.output_tokens = 20 - mock_response.usage.total_tokens = 30 - mock_response.usage.input_tokens_details = InputTokensDetails( - cached_tokens=0 - ) - mock_response.usage.output_tokens_details = OutputTokensDetails( - reasoning_tokens=5 - ) - - mock_fetch_response.return_value = mock_response + ], + parallel_tool_calls=False, + tool_choice="none", + tools=[], + created_at=10000000, + model="gpt-4.1-2025-04-14", + object="response", + usage=ResponseUsage( + input_tokens=10, + input_tokens_details=InputTokensDetails( + cached_tokens=0, + ), + output_tokens=20, + output_tokens_details=OutputTokensDetails( + reasoning_tokens=5, + ), + total_tokens=30, + ), + ) + ) - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - ) + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + ) - events = capture_events() + events = capture_events() - result = await agents.Runner.run( - test_agent, "Test input", run_config=test_run_config - ) + result = await agents.Runner.run( + agent, "Test input", run_config=test_run_config + ) - assert result is not None + assert result is not None (transaction,) = events spans = transaction["spans"] - invoke_agent_span, ai_client_span = spans + invoke_agent_span = next( + span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT) # Verify invoke_agent span has response model from API assert invoke_agent_span["description"] == "invoke_agent test_agent" From 1a1ab90d79dd259f5b04d55124ba6eec843c0c5f Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 6 Mar 2026 13:27:57 +0100 Subject: [PATCH 7/8] add one missing test --- .../openai_agents/test_openai_agents.py | 32 +++++++++++-------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index c47148bc89..ef9461ff9f 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -1492,7 +1492,9 @@ async def test_hosted_mcp_tool_propagation_header_streamed(sentry_init, test_age @pytest.mark.asyncio -async def test_hosted_mcp_tool_propagation_headers(sentry_init, test_agent): +async def test_hosted_mcp_tool_propagation_headers( + sentry_init, test_agent, get_model_response +): """ Test responses API is given trace propagation headers with HostedMCPTool. """ @@ -1508,9 +1510,7 @@ async def test_hosted_mcp_tool_propagation_headers(sentry_init, test_agent): }, ) - client = AsyncOpenAI(api_key="z") - client.responses._post = AsyncMock(return_value=EXAMPLE_RESPONSE) - + client = AsyncOpenAI(api_key="test-key") model = OpenAIResponsesModel(model="gpt-4", openai_client=client) agent_with_tool = test_agent.clone( @@ -1524,11 +1524,13 @@ async def test_hosted_mcp_tool_propagation_headers(sentry_init, test_agent): release="d08ebdb9309e1b004c6f52202de58a09c2268e42", ) + response = get_model_response(EXAMPLE_RESPONSE) + with patch.object( - model._client.responses, - "create", - wraps=model._client.responses.create, - ) as create, mock.patch( + agent_with_tool.model._client._client, + "send", + return_value=response, + ) as send, mock.patch( "sentry_sdk.tracing_utils.Random.randrange", return_value=500000 ): with sentry_sdk.start_transaction( @@ -1542,13 +1544,17 @@ async def test_hosted_mcp_tool_propagation_headers(sentry_init, test_agent): run_config=test_run_config, ) - ai_client_span = transaction._span_recorder.spans[-1] + ai_client_span = next( + span + for span in transaction._span_recorder.spans + if span.op == OP.GEN_AI_CHAT + ) - args, kwargs = create.call_args + args, kwargs = send.call_args - assert "tools" in kwargs - assert len(kwargs["tools"]) == 1 - hosted_mcp_tool = kwargs["tools"][0] + request = args[0] + body = json.loads(request.content.decode("utf-8")) + hosted_mcp_tool = body["tools"][0] assert hosted_mcp_tool["headers"][ "sentry-trace" From 0d13b60aa461a7937cdbc53ac61ff44fedcd97d8 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 6 Mar 2026 13:30:50 +0100 Subject: [PATCH 8/8] set usage to none in example --- .../integrations/openai_agents/test_openai_agents.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index ef9461ff9f..a4b92b17c1 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -262,17 +262,7 @@ def mock_model_response(): created_at=10000000, model="gpt-4", object="response", - usage=ResponseUsage( - input_tokens=20, - input_tokens_details=InputTokensDetails( - cached_tokens=5, - ), - output_tokens=10, - output_tokens_details=OutputTokensDetails( - reasoning_tokens=8, - ), - total_tokens=30, - ), + usage=None, )