diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 1390455317..a4b92b17c1 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -5,10 +5,11 @@ import os import json import logging +import httpx import sentry_sdk from sentry_sdk import start_span -from sentry_sdk.consts import SPANDATA +from sentry_sdk.consts import SPANDATA, OP from sentry_sdk.integrations.logging import LoggingIntegration from sentry_sdk.integrations.openai_agents import OpenAIAgentsIntegration from sentry_sdk.integrations.openai_agents.utils import _set_input_data, safe_serialize @@ -237,8 +238,9 @@ def mock_usage(): @pytest.fixture -def mock_model_response(mock_usage): - return ModelResponse( +def mock_model_response(): + return Response( + id="resp_123", output=[ ResponseOutputMessage( id="msg_123", @@ -254,8 +256,13 @@ def mock_model_response(mock_usage): role="assistant", ) ], - usage=mock_usage, - response_id="resp_123", + parallel_tool_calls=False, + tool_choice="none", + tools=[], + created_at=10000000, + model="gpt-4", + object="response", + usage=None, ) @@ -314,34 +321,61 @@ def test_agent_custom_model(): ) +@pytest.fixture +def get_model_response(): + def inner(response_content): + model_request = httpx.Request( + "POST", + "/responses", + ) + + response = httpx.Response( + 200, + request=model_request, + content=json.dumps(response_content.model_dump()).encode("utf-8"), + ) + + return response + + return inner + + @pytest.mark.asyncio async def test_agent_invocation_span_no_pii( - sentry_init, capture_events, test_agent, mock_model_response + sentry_init, capture_events, test_agent, mock_model_response, get_model_response ): - with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): - with patch( - "agents.models.openai_responses.OpenAIResponsesModel.get_response" - ) as mock_get_response: - mock_get_response.return_value = mock_model_response + client = AsyncOpenAI(api_key="test-key") + model = OpenAIResponsesModel(model="gpt-4", openai_client=client) + agent = test_agent.clone(model=model) - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=False, - ) + response = get_model_response(mock_model_response) - events = capture_events() + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=False, + ) - result = await agents.Runner.run( - test_agent, "Test input", run_config=test_run_config - ) + events = capture_events() - assert result is not None - assert result.final_output == "Hello, how can I help you?" + result = await agents.Runner.run( + agent, "Test input", run_config=test_run_config + ) + + assert result is not None + assert result.final_output == "Hello, how can I help you?" (transaction,) = events spans = transaction["spans"] - invoke_agent_span, ai_client_span = spans + invoke_agent_span = next( + span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT) assert transaction["transaction"] == "test_agent workflow" assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" @@ -454,33 +488,38 @@ async def test_agent_invocation_span( instructions, input, request, + get_model_response, ): """ Test that the integration creates spans for agent invocations. """ + client = AsyncOpenAI(api_key="test-key") + model = OpenAIResponsesModel(model="gpt-4", openai_client=client) + agent = test_agent_with_instructions(instructions).clone(model=model) - with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): - with patch( - "agents.models.openai_responses.OpenAIResponsesModel.get_response" - ) as mock_get_response: - mock_get_response.return_value = mock_model_response + response = get_model_response(mock_model_response) - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - ) + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + ) - events = capture_events() + events = capture_events() - result = await agents.Runner.run( - test_agent_with_instructions(instructions), - input, - run_config=test_run_config, - ) + result = await agents.Runner.run( + agent, + input, + run_config=test_run_config, + ) - assert result is not None - assert result.final_output == "Hello, how can I help you?" + assert result is not None + assert result.final_output == "Hello, how can I help you?" (transaction,) = events spans = transaction["spans"] @@ -605,35 +644,44 @@ async def test_agent_invocation_span( @pytest.mark.asyncio async def test_client_span_custom_model( - sentry_init, capture_events, test_agent_custom_model, mock_model_response + sentry_init, + capture_events, + test_agent_custom_model, + mock_model_response, + get_model_response, ): """ Test that the integration uses the correct model name if a custom model is used. """ - with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): - with patch( - "agents.models.openai_responses.OpenAIResponsesModel.get_response" - ) as mock_get_response: - mock_get_response.return_value = mock_model_response + client = AsyncOpenAI(api_key="test-key") + model = OpenAIResponsesModel(model="my-custom-model", openai_client=client) + agent = test_agent_custom_model.clone(model=model) - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - ) + response = get_model_response(mock_model_response) - events = capture_events() + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + ) - result = await agents.Runner.run( - test_agent_custom_model, "Test input", run_config=test_run_config - ) + events = capture_events() - assert result is not None - assert result.final_output == "Hello, how can I help you?" + result = await agents.Runner.run( + agent, "Test input", run_config=test_run_config + ) + + assert result is not None + assert result.final_output == "Hello, how can I help you?" (transaction,) = events spans = transaction["spans"] - _, ai_client_span = spans + ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT) assert ai_client_span["description"] == "chat my-custom-model" assert ai_client_span["data"]["gen_ai.request.model"] == "my-custom-model" @@ -644,35 +692,41 @@ def test_agent_invocation_span_sync_no_pii( capture_events, test_agent, mock_model_response, + get_model_response, ): """ Test that the integration creates spans for agent invocations. """ + client = AsyncOpenAI(api_key="test-key") + model = OpenAIResponsesModel(model="gpt-4", openai_client=client) + agent = test_agent.clone(model=model) - with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): - with patch( - "agents.models.openai_responses.OpenAIResponsesModel.get_response" - ) as mock_get_response: - mock_get_response.return_value = mock_model_response + response = get_model_response(mock_model_response) - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=False, - ) + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=False, + ) - events = capture_events() + events = capture_events() - result = agents.Runner.run_sync( - test_agent, "Test input", run_config=test_run_config - ) + result = agents.Runner.run_sync(agent, "Test input", run_config=test_run_config) - assert result is not None - assert result.final_output == "Hello, how can I help you?" + assert result is not None + assert result.final_output == "Hello, how can I help you?" (transaction,) = events spans = transaction["spans"] - invoke_agent_span, ai_client_span = spans + invoke_agent_span = next( + span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT) assert transaction["transaction"] == "test_agent workflow" assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" @@ -781,33 +835,38 @@ def test_agent_invocation_span_sync( instructions, input, request, + get_model_response, ): """ Test that the integration creates spans for agent invocations. """ + client = AsyncOpenAI(api_key="test-key") + model = OpenAIResponsesModel(model="gpt-4", openai_client=client) + agent = test_agent_with_instructions(instructions).clone(model=model) - with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): - with patch( - "agents.models.openai_responses.OpenAIResponsesModel.get_response" - ) as mock_get_response: - mock_get_response.return_value = mock_model_response + response = get_model_response(mock_model_response) - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - ) + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + ) - events = capture_events() + events = capture_events() - result = agents.Runner.run_sync( - test_agent_with_instructions(instructions), - input, - run_config=test_run_config, - ) + result = agents.Runner.run_sync( + agent, + input, + run_config=test_run_config, + ) - assert result is not None - assert result.final_output == "Hello, how can I help you?" + assert result is not None + assert result.final_output == "Hello, how can I help you?" (transaction,) = events spans = transaction["spans"] @@ -1423,7 +1482,9 @@ async def test_hosted_mcp_tool_propagation_header_streamed(sentry_init, test_age @pytest.mark.asyncio -async def test_hosted_mcp_tool_propagation_headers(sentry_init, test_agent): +async def test_hosted_mcp_tool_propagation_headers( + sentry_init, test_agent, get_model_response +): """ Test responses API is given trace propagation headers with HostedMCPTool. """ @@ -1439,9 +1500,7 @@ async def test_hosted_mcp_tool_propagation_headers(sentry_init, test_agent): }, ) - client = AsyncOpenAI(api_key="z") - client.responses._post = AsyncMock(return_value=EXAMPLE_RESPONSE) - + client = AsyncOpenAI(api_key="test-key") model = OpenAIResponsesModel(model="gpt-4", openai_client=client) agent_with_tool = test_agent.clone( @@ -1455,11 +1514,13 @@ async def test_hosted_mcp_tool_propagation_headers(sentry_init, test_agent): release="d08ebdb9309e1b004c6f52202de58a09c2268e42", ) + response = get_model_response(EXAMPLE_RESPONSE) + with patch.object( - model._client.responses, - "create", - wraps=model._client.responses.create, - ) as create, mock.patch( + agent_with_tool.model._client._client, + "send", + return_value=response, + ) as send, mock.patch( "sentry_sdk.tracing_utils.Random.randrange", return_value=500000 ): with sentry_sdk.start_transaction( @@ -1473,13 +1534,17 @@ async def test_hosted_mcp_tool_propagation_headers(sentry_init, test_agent): run_config=test_run_config, ) - ai_client_span = transaction._span_recorder.spans[-1] + ai_client_span = next( + span + for span in transaction._span_recorder.spans + if span.op == OP.GEN_AI_CHAT + ) - args, kwargs = create.call_args + args, kwargs = send.call_args - assert "tools" in kwargs - assert len(kwargs["tools"]) == 1 - hosted_mcp_tool = kwargs["tools"][0] + request = args[0] + body = json.loads(request.content.decode("utf-8")) + hosted_mcp_tool = body["tools"][0] assert hosted_mcp_tool["headers"][ "sentry-trace" @@ -2012,34 +2077,38 @@ async def test_mcp_tool_execution_without_pii(sentry_init, capture_events, test_ @pytest.mark.asyncio async def test_multiple_agents_asyncio( - sentry_init, capture_events, test_agent, mock_model_response + sentry_init, capture_events, test_agent, mock_model_response, get_model_response ): """ Test that multiple agents can be run at the same time in asyncio tasks without interfering with each other. """ + client = AsyncOpenAI(api_key="test-key") + model = OpenAIResponsesModel(model="gpt-4", openai_client=client) + agent = test_agent.clone(model=model) - with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): - with patch( - "agents.models.openai_responses.OpenAIResponsesModel.get_response" - ) as mock_get_response: - mock_get_response.return_value = mock_model_response + response = get_model_response(mock_model_response) - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - ) + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + ) - events = capture_events() + events = capture_events() - async def run(): - await agents.Runner.run( - starting_agent=test_agent, - input="Test input", - run_config=test_run_config, - ) + async def run(): + await agents.Runner.run( + starting_agent=agent, + input="Test input", + run_config=test_run_config, + ) - await asyncio.gather(*[run() for _ in range(3)]) + await asyncio.gather(*[run() for _ in range(3)]) assert len(events) == 3 txn1, txn2, txn3 = events @@ -2201,56 +2270,81 @@ def failing_tool(message: str) -> str: @pytest.mark.asyncio async def test_invoke_agent_span_includes_usage_data( - sentry_init, capture_events, test_agent, mock_usage + sentry_init, + capture_events, + test_agent, + get_model_response, ): """ Test that invoke_agent spans include aggregated usage data from context_wrapper. This verifies the new functionality added to track token usage in invoke_agent spans. """ + client = AsyncOpenAI(api_key="z") + model = OpenAIResponsesModel(model="gpt-4", openai_client=client) + agent = test_agent.clone(model=model) - with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): - with patch( - "agents.models.openai_responses.OpenAIResponsesModel.get_response" - ) as mock_get_response: - # Create a response with usage data - response = ModelResponse( - output=[ - ResponseOutputMessage( - id="msg_123", - type="message", - status="completed", - content=[ - ResponseOutputText( - text="Response with usage", - type="output_text", - annotations=[], - ) - ], - role="assistant", - ) - ], - usage=mock_usage, - response_id="resp_123", - ) - mock_get_response.return_value = response + response = get_model_response( + Response( + id="resp_123", + output=[ + ResponseOutputMessage( + id="msg_123", + type="message", + status="completed", + content=[ + ResponseOutputText( + text="Response with usage", + type="output_text", + annotations=[], + ) + ], + role="assistant", + ) + ], + parallel_tool_calls=False, + tool_choice="none", + tools=[], + created_at=10000000, + model="gpt-4.1-2025-04-14", + object="response", + usage=ResponseUsage( + input_tokens=10, + input_tokens_details=InputTokensDetails( + cached_tokens=0, + ), + output_tokens=20, + output_tokens_details=OutputTokensDetails( + reasoning_tokens=5, + ), + total_tokens=30, + ), + ) + ) - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - ) + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + ) - events = capture_events() + events = capture_events() - result = await agents.Runner.run( - test_agent, "Test input", run_config=test_run_config - ) + result = await agents.Runner.run( + agent, "Test input", run_config=test_run_config + ) - assert result is not None + assert result is not None (transaction,) = events spans = transaction["spans"] - invoke_agent_span, ai_client_span = spans + invoke_agent_span = next( + span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT + ) # Verify invoke_agent span has usage data from context_wrapper assert invoke_agent_span["description"] == "invoke_agent test_agent" @@ -2258,7 +2352,6 @@ async def test_invoke_agent_span_includes_usage_data( assert "gen_ai.usage.output_tokens" in invoke_agent_span["data"] assert "gen_ai.usage.total_tokens" in invoke_agent_span["data"] - # The usage should match the mock_usage values (aggregated across all calls) assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 10 assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20 assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 30 @@ -2268,23 +2361,23 @@ async def test_invoke_agent_span_includes_usage_data( @pytest.mark.asyncio async def test_ai_client_span_includes_response_model( - sentry_init, capture_events, test_agent + sentry_init, + capture_events, + test_agent, + get_model_response, ): """ Test that ai_client spans (gen_ai.chat) include the response model from the actual API response. This verifies we capture the actual model used (which may differ from the requested model). """ + client = AsyncOpenAI(api_key="z") + model = OpenAIResponsesModel(model="gpt-4", openai_client=client) + agent = test_agent.clone(model=model) - with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): - # Mock the _fetch_response method to return a response with a model field - with patch( - "agents.models.openai_responses.OpenAIResponsesModel._fetch_response" - ) as mock_fetch_response: - # Create a mock OpenAI Response object with a specific model version - mock_response = MagicMock() - mock_response.model = "gpt-4.1-2025-04-14" # The actual response model - mock_response.id = "resp_123" - mock_response.output = [ + response = get_model_response( + Response( + id="resp_123", + output=[ ResponseOutputMessage( id="msg_123", type="message", @@ -2298,37 +2391,49 @@ async def test_ai_client_span_includes_response_model( ], role="assistant", ) - ] - mock_response.usage = MagicMock() - mock_response.usage.input_tokens = 10 - mock_response.usage.output_tokens = 20 - mock_response.usage.total_tokens = 30 - mock_response.usage.input_tokens_details = InputTokensDetails( - cached_tokens=0 - ) - mock_response.usage.output_tokens_details = OutputTokensDetails( - reasoning_tokens=5 - ) - - mock_fetch_response.return_value = mock_response + ], + parallel_tool_calls=False, + tool_choice="none", + tools=[], + created_at=10000000, + model="gpt-4.1-2025-04-14", + object="response", + usage=ResponseUsage( + input_tokens=10, + input_tokens_details=InputTokensDetails( + cached_tokens=0, + ), + output_tokens=20, + output_tokens_details=OutputTokensDetails( + reasoning_tokens=5, + ), + total_tokens=30, + ), + ) + ) - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - ) + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + ) - events = capture_events() + events = capture_events() - result = await agents.Runner.run( - test_agent, "Test input", run_config=test_run_config - ) + result = await agents.Runner.run( + agent, "Test input", run_config=test_run_config + ) - assert result is not None + assert result is not None (transaction,) = events spans = transaction["spans"] - _, ai_client_span = spans + ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT) # Verify ai_client span has response model from API response assert ai_client_span["description"] == "chat gpt-4" @@ -2338,29 +2443,28 @@ async def test_ai_client_span_includes_response_model( @pytest.mark.asyncio async def test_ai_client_span_response_model_with_chat_completions( - sentry_init, capture_events + sentry_init, + capture_events, + get_model_response, ): """ Test that response model is captured when using ChatCompletions API (not Responses API). This ensures our implementation works with different OpenAI model types. """ # Create agent that uses ChatCompletions model + client = AsyncOpenAI(api_key="z") + model = OpenAIResponsesModel(model="gpt-4o-mini", openai_client=client) + agent = Agent( name="chat_completions_agent", instructions="Test agent using ChatCompletions", - model="gpt-4o-mini", + model=model, ) - with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): - # Mock the _fetch_response method - with patch( - "agents.models.openai_responses.OpenAIResponsesModel._fetch_response" - ) as mock_fetch_response: - # Create a mock Response object - mock_response = MagicMock() - mock_response.model = "gpt-4o-mini-2024-07-18" - mock_response.id = "resp_123" - mock_response.output = [ + response = get_model_response( + Response( + id="resp_123", + output=[ ResponseOutputMessage( id="msg_123", type="message", @@ -2374,36 +2478,48 @@ async def test_ai_client_span_response_model_with_chat_completions( ], role="assistant", ) - ] - mock_response.usage = MagicMock() - mock_response.usage.input_tokens = 15 - mock_response.usage.output_tokens = 25 - mock_response.usage.total_tokens = 40 - mock_response.usage.input_tokens_details = InputTokensDetails( - cached_tokens=0 - ) - mock_response.usage.output_tokens_details = OutputTokensDetails( - reasoning_tokens=0 - ) - - mock_fetch_response.return_value = mock_response + ], + parallel_tool_calls=False, + tool_choice="none", + tools=[], + created_at=10000000, + model="gpt-4o-mini-2024-07-18", + object="response", + usage=ResponseUsage( + input_tokens=15, + input_tokens_details=InputTokensDetails( + cached_tokens=0, + ), + output_tokens=25, + output_tokens_details=OutputTokensDetails( + reasoning_tokens=5, + ), + total_tokens=40, + ), + ) + ) - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - ) + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + ) - events = capture_events() + events = capture_events() - result = await agents.Runner.run( - agent, "Test input", run_config=test_run_config - ) + result = await agents.Runner.run( + agent, "Test input", run_config=test_run_config + ) - assert result is not None + assert result is not None (transaction,) = events spans = transaction["spans"] - _, ai_client_span = spans + ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT) # Verify response model from API response is captured assert "gen_ai.response.model" in ai_client_span["data"] @@ -2584,21 +2700,22 @@ async def test_response_model_not_set_when_unavailable( @pytest.mark.asyncio async def test_invoke_agent_span_includes_response_model( - sentry_init, capture_events, test_agent + sentry_init, + capture_events, + test_agent, + get_model_response, ): """ Test that invoke_agent spans include the response model from the API response. """ + client = AsyncOpenAI(api_key="z") + model = OpenAIResponsesModel(model="gpt-4", openai_client=client) + agent = test_agent.clone(model=model) - with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): - with patch( - "agents.models.openai_responses.OpenAIResponsesModel._fetch_response" - ) as mock_fetch_response: - # Create a mock OpenAI Response object with a specific model version - mock_response = MagicMock() - mock_response.model = "gpt-4.1-2025-04-14" # The actual response model - mock_response.id = "resp_123" - mock_response.output = [ + response = get_model_response( + Response( + id="resp_123", + output=[ ResponseOutputMessage( id="msg_123", type="message", @@ -2612,37 +2729,52 @@ async def test_invoke_agent_span_includes_response_model( ], role="assistant", ) - ] - mock_response.usage = MagicMock() - mock_response.usage.input_tokens = 10 - mock_response.usage.output_tokens = 20 - mock_response.usage.total_tokens = 30 - mock_response.usage.input_tokens_details = InputTokensDetails( - cached_tokens=0 - ) - mock_response.usage.output_tokens_details = OutputTokensDetails( - reasoning_tokens=5 - ) - - mock_fetch_response.return_value = mock_response + ], + parallel_tool_calls=False, + tool_choice="none", + tools=[], + created_at=10000000, + model="gpt-4.1-2025-04-14", + object="response", + usage=ResponseUsage( + input_tokens=10, + input_tokens_details=InputTokensDetails( + cached_tokens=0, + ), + output_tokens=20, + output_tokens_details=OutputTokensDetails( + reasoning_tokens=5, + ), + total_tokens=30, + ), + ) + ) - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - ) + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + ) - events = capture_events() + events = capture_events() - result = await agents.Runner.run( - test_agent, "Test input", run_config=test_run_config - ) + result = await agents.Runner.run( + agent, "Test input", run_config=test_run_config + ) - assert result is not None + assert result is not None (transaction,) = events spans = transaction["spans"] - invoke_agent_span, ai_client_span = spans + invoke_agent_span = next( + span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT) # Verify invoke_agent span has response model from API assert invoke_agent_span["description"] == "invoke_agent test_agent" @@ -2916,37 +3048,45 @@ async def test_streaming_ttft_on_chat_span(sentry_init, test_agent): ) @pytest.mark.asyncio async def test_conversation_id_on_all_spans( - sentry_init, capture_events, test_agent, mock_model_response + sentry_init, capture_events, test_agent, mock_model_response, get_model_response ): """ Test that gen_ai.conversation.id is set on all AI-related spans when passed to Runner.run(). """ - with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): - with patch( - "agents.models.openai_responses.OpenAIResponsesModel.get_response" - ) as mock_get_response: - mock_get_response.return_value = mock_model_response + client = AsyncOpenAI(api_key="test-key") + model = OpenAIResponsesModel(model="gpt-4", openai_client=client) + agent = test_agent.clone(model=model) - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - ) + response = get_model_response(mock_model_response) - events = capture_events() + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + ) - result = await agents.Runner.run( - test_agent, - "Test input", - run_config=test_run_config, - conversation_id="conv_test_123", - ) + events = capture_events() - assert result is not None + result = await agents.Runner.run( + agent, + "Test input", + run_config=test_run_config, + conversation_id="conv_test_123", + ) + + assert result is not None (transaction,) = events spans = transaction["spans"] - invoke_agent_span, ai_client_span = spans + invoke_agent_span = next( + span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT) # Verify workflow span (transaction) has conversation_id assert ( @@ -3063,35 +3203,47 @@ def simple_tool(message: str) -> str: ) @pytest.mark.asyncio async def test_no_conversation_id_when_not_provided( - sentry_init, capture_events, test_agent, mock_model_response + sentry_init, + capture_events, + test_agent, + mock_model_response, + get_model_response, ): """ Test that gen_ai.conversation.id is not set when not passed to Runner.run(). """ - with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): - with patch( - "agents.models.openai_responses.OpenAIResponsesModel.get_response" - ) as mock_get_response: - mock_get_response.return_value = mock_model_response + client = AsyncOpenAI(api_key="test-key") + model = OpenAIResponsesModel(model="gpt-4", openai_client=client) + agent = test_agent.clone(model=model) - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - ) + response = get_model_response(mock_model_response) - events = capture_events() + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + ) - # Don't pass conversation_id - result = await agents.Runner.run( - test_agent, "Test input", run_config=test_run_config - ) + events = capture_events() - assert result is not None + # Don't pass conversation_id + result = await agents.Runner.run( + agent, "Test input", run_config=test_run_config + ) + + assert result is not None (transaction,) = events spans = transaction["spans"] - invoke_agent_span, ai_client_span = spans + invoke_agent_span = next( + span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT) # Verify conversation_id is NOT set on any spans assert "gen_ai.conversation.id" not in transaction["contexts"]["trace"].get(