diff --git a/python/packages/anthropic/agent_framework_anthropic/_chat_client.py b/python/packages/anthropic/agent_framework_anthropic/_chat_client.py index c90b061b4ff..183ae6b6012 100644 --- a/python/packages/anthropic/agent_framework_anthropic/_chat_client.py +++ b/python/packages/anthropic/agent_framework_anthropic/_chat_client.py @@ -1024,8 +1024,10 @@ def _parse_usage_from_anthropic(self, usage: BetaUsage | BetaMessageDeltaUsage | usage_details["input_token_count"] = usage.input_tokens if usage.cache_creation_input_tokens is not None: usage_details["anthropic.cache_creation_input_tokens"] = usage.cache_creation_input_tokens # type: ignore[typeddict-unknown-key] + usage_details["cache_creation_input_token_count"] = usage.cache_creation_input_tokens if usage.cache_read_input_tokens is not None: usage_details["anthropic.cache_read_input_tokens"] = usage.cache_read_input_tokens # type: ignore[typeddict-unknown-key] + usage_details["cache_read_input_token_count"] = usage.cache_read_input_tokens return usage_details def _parse_contents_from_anthropic( diff --git a/python/packages/anthropic/tests/test_anthropic_client.py b/python/packages/anthropic/tests/test_anthropic_client.py index abad158b8c0..254bfb51f2f 100644 --- a/python/packages/anthropic/tests/test_anthropic_client.py +++ b/python/packages/anthropic/tests/test_anthropic_client.py @@ -2354,6 +2354,8 @@ def test_parse_usage_with_cache_tokens(mock_anthropic_client: MagicMock) -> None assert result["input_token_count"] == 100 assert result["anthropic.cache_creation_input_tokens"] == 20 assert result["anthropic.cache_read_input_tokens"] == 30 + assert result["cache_creation_input_token_count"] == 20 + assert result["cache_read_input_token_count"] == 30 # Code Execution Result Tests diff --git a/python/packages/core/agent_framework/_types.py b/python/packages/core/agent_framework/_types.py index f30fc04789d..7c28891785a 100644 --- a/python/packages/core/agent_framework/_types.py +++ b/python/packages/core/agent_framework/_types.py @@ -400,12 +400,18 @@ class UsageDetails(TypedDict, total=False, extra_items=int): # type: ignore[cal input_token_count: The number of input tokens used. output_token_count: The number of output tokens generated. total_token_count: The total number of tokens (input + output). + cache_creation_input_token_count: Tokens written to a provider-managed cache. + cache_read_input_token_count: Tokens served from a provider-managed cache. + reasoning_output_token_count: Output tokens used for reasoning (chain-of-thought, extended thinking). """ input_token_count: int | None output_token_count: int | None total_token_count: int | None + cache_creation_input_token_count: int | None + cache_read_input_token_count: int | None + reasoning_output_token_count: int | None def add_usage_details(usage1: UsageDetails | None, usage2: UsageDetails | None) -> UsageDetails: diff --git a/python/packages/core/agent_framework/observability.py b/python/packages/core/agent_framework/observability.py index a36b1f6aae2..d7449176662 100644 --- a/python/packages/core/agent_framework/observability.py +++ b/python/packages/core/agent_framework/observability.py @@ -201,6 +201,9 @@ class OtelAttr(str, Enum): # Usage attributes INPUT_TOKENS = "gen_ai.usage.input_tokens" OUTPUT_TOKENS = "gen_ai.usage.output_tokens" + CACHE_CREATION_INPUT_TOKENS = "gen_ai.usage.cache_creation.input_tokens" + CACHE_READ_INPUT_TOKENS = "gen_ai.usage.cache_read.input_tokens" + REASONING_OUTPUT_TOKENS = "gen_ai.usage.reasoning.output_tokens" # Tool attributes TOOL_CALL_ID = "gen_ai.tool.call.id" TOOL_DESCRIPTION = "gen_ai.tool.description" @@ -328,6 +331,14 @@ def __str__(self) -> str: "length": "length", } +_USAGE_FIELD_TO_OTEL_ATTR: dict[str, str] = { + "input_token_count": OtelAttr.INPUT_TOKENS.value, + "output_token_count": OtelAttr.OUTPUT_TOKENS.value, + "cache_creation_input_token_count": OtelAttr.CACHE_CREATION_INPUT_TOKENS.value, + "cache_read_input_token_count": OtelAttr.CACHE_READ_INPUT_TOKENS.value, + "reasoning_output_token_count": OtelAttr.REASONING_OUTPUT_TOKENS.value, +} + # region Telemetry utils @@ -2378,12 +2389,16 @@ def _get_response_attributes( if model := getattr(response, "model", None): attributes[OtelAttr.RESPONSE_MODEL] = model if capture_usage and (usage := response.usage_details): - input_tokens = usage.get("input_token_count") - if input_tokens: - attributes[OtelAttr.INPUT_TOKENS] = input_tokens - output_tokens = usage.get("output_token_count") - if output_tokens: - attributes[OtelAttr.OUTPUT_TOKENS] = output_tokens + for key, value in usage.items(): + if not isinstance(value, int) or isinstance(value, bool): + continue + attr_name = _USAGE_FIELD_TO_OTEL_ATTR.get(key) + if attr_name is not None: + attributes[attr_name] = value + continue + # Fall back to prefix-based attribute for provider-specific fields + # not yet covered by the standard mapping. + attributes[f"gen_ai.usage.{key}"] = value return attributes diff --git a/python/packages/core/tests/core/test_observability.py b/python/packages/core/tests/core/test_observability.py index 46f6e2c1517..b54b80dbdda 100644 --- a/python/packages/core/tests/core/test_observability.py +++ b/python/packages/core/tests/core/test_observability.py @@ -1535,7 +1535,7 @@ def test_configure_otel_providers_explicit_console_exporters_overrides_env(monke def test_observability_settings_defaults_instrumentation_true(monkeypatch): - """ENABLE_INSTRUMENTATION unset → ObservabilitySettings defaults to True.""" + """ENABLE_INSTRUMENTATION unset -> ObservabilitySettings defaults to True.""" from agent_framework.observability import ObservabilitySettings monkeypatch.delenv("ENABLE_INSTRUMENTATION", raising=False) @@ -2154,6 +2154,29 @@ def test_get_response_attributes_with_usage(): assert result[OtelAttr.OUTPUT_TOKENS] == 50 +def test_get_response_attributes_maps_detailed_usage_to_standard_otel_attrs(): + """Test detailed usage fields use standard OTel GenAI attributes.""" + from unittest.mock import Mock + + from agent_framework.observability import OtelAttr, _get_response_attributes + + response = Mock() + response.response_id = None + response.finish_reason = None + response.raw_representation = None + response.usage_details = { + "cache_creation_input_token_count": 10, + "cache_read_input_token_count": 20, + "reasoning_output_token_count": 30, + } + + result = _get_response_attributes({}, response) + + assert result[OtelAttr.CACHE_CREATION_INPUT_TOKENS] == 10 + assert result[OtelAttr.CACHE_READ_INPUT_TOKENS] == 20 + assert result[OtelAttr.REASONING_OUTPUT_TOKENS] == 30 + + def test_get_response_attributes_capture_usage_false(): """Test _get_response_attributes skips usage when capture_usage is False.""" from unittest.mock import Mock @@ -3105,89 +3128,6 @@ async def _get() -> ChatResponse: assert agent_span.attributes[OtelAttr.OUTPUT_TOKENS] == 22 -# region Test non-ASCII character handling in JSON serialization - - -@pytest.mark.parametrize("enable_sensitive_data", [True], indirect=True) -async def test_capture_messages_preserves_non_ascii_characters(mock_chat_client, span_exporter: InMemorySpanExporter): - """Test that non-ASCII characters (e.g., Japanese) are preserved in span attributes.""" - import json - - japanese_text = "こんにちは世界" # "Hello World" in Japanese - - class ClientWithJapanese(mock_chat_client): - async def _inner_get_response(self, *, messages, options, **kwargs): - return ChatResponse( - messages=[Message(role="assistant", contents=[japanese_text])], - usage_details=UsageDetails(input_token_count=5, output_token_count=10), - ) - - client = ClientWithJapanese() - messages = [Message(role="user", contents=[japanese_text])] - - span_exporter.clear() - response = await client.get_response(messages=messages, options={"model": "Test"}) - - assert response is not None - spans = span_exporter.get_finished_spans() - assert len(spans) == 1 - span = spans[0] - - # Verify input messages preserve Japanese characters - input_messages_json = span.attributes[OtelAttr.INPUT_MESSAGES] - assert japanese_text in input_messages_json - # Ensure it's not escaped to Unicode - assert "\\u" not in input_messages_json - - # Verify output messages preserve Japanese characters - output_messages_json = span.attributes[OtelAttr.OUTPUT_MESSAGES] - assert japanese_text in output_messages_json - assert "\\u" not in output_messages_json - - # Verify JSON is valid and contains the text - input_messages = json.loads(input_messages_json) - assert input_messages[0]["parts"][0]["content"] == japanese_text - output_messages = json.loads(output_messages_json) - assert output_messages[0]["parts"][0]["content"] == japanese_text - - -@pytest.mark.parametrize("enable_sensitive_data", [True], indirect=True) -async def test_system_instructions_preserves_non_ascii_characters(span_exporter: InMemorySpanExporter): - """Test that non-ASCII characters are preserved in system instructions span attribute.""" - import json - - from opentelemetry import trace - - chinese_text = "你好世界" # "Hello World" in Chinese - - tracer = trace.get_tracer("test") - span_exporter.clear() - - with tracer.start_as_current_span("test_span") as span: - _capture_messages( - span=span, - provider_name="test_provider", - messages=[Message(role="user", contents=["Test"])], - system_instructions=chinese_text, - ) - - spans = span_exporter.get_finished_spans() - assert len(spans) == 1 - span = spans[0] - - # Verify system instructions preserve Chinese characters - system_instructions_json = span.attributes[OtelAttr.SYSTEM_INSTRUCTIONS] - assert chinese_text in system_instructions_json - assert "\\u" not in system_instructions_json - - # Verify JSON is valid and contains the text - system_instructions = json.loads(system_instructions_json) - assert system_instructions[0]["content"] == chinese_text - - input_messages = json.loads(span.attributes[OtelAttr.INPUT_MESSAGES]) - assert [msg.get("role") for msg in input_messages] == ["user"] - - @pytest.mark.parametrize("enable_sensitive_data", [True], indirect=True) def test_capture_messages_with_prepared_request_info_function_call_arguments(span_exporter: InMemorySpanExporter): """Test _capture_messages handles request-info function-call arguments prepared at Content creation.""" @@ -3301,100 +3241,6 @@ def test_capture_messages_logs_only_chat_history_when_framework_instructions_are assert logged_messages[1]["parts"][0]["content"] == "Test" -@pytest.mark.parametrize("enable_sensitive_data", [True], indirect=True) -async def test_tool_arguments_preserves_non_ascii_characters(span_exporter: InMemorySpanExporter): - """Test that non-ASCII characters are preserved in tool arguments span attribute.""" - import json - - korean_text = "안녕하세요" # "Hello" in Korean - - @tool - def greet(message: str) -> str: - """Greet with a message.""" - return f"Greeted: {message}" - - span_exporter.clear() - await greet.invoke(message=korean_text) - - spans = span_exporter.get_finished_spans() - assert len(spans) == 1 - span = spans[0] - - # Verify tool arguments preserve Korean characters - tool_arguments_json = span.attributes[OtelAttr.TOOL_ARGUMENTS] - assert korean_text in tool_arguments_json - assert "\\u" not in tool_arguments_json - - # Verify JSON is valid and contains the text - tool_arguments = json.loads(tool_arguments_json) - assert tool_arguments["message"] == korean_text - - -@pytest.mark.parametrize("enable_sensitive_data", [True], indirect=True) -async def test_tool_result_preserves_non_ascii_characters(span_exporter: InMemorySpanExporter): - """Test that non-ASCII characters are preserved in tool result span attribute.""" - arabic_text = "مرحبا بالعالم" # "Hello World" in Arabic - - @tool - def echo(text: str) -> str: - """Echo the text back.""" - return text - - span_exporter.clear() - result = await echo.invoke(text=arabic_text) - - assert isinstance(result, list) - assert result[0].text == arabic_text - spans = span_exporter.get_finished_spans() - assert len(spans) == 1 - span = spans[0] - - # Verify tool result preserves Arabic characters - tool_result = span.attributes[OtelAttr.TOOL_RESULT] - assert arabic_text in tool_result - - -@pytest.mark.parametrize("enable_sensitive_data", [True], indirect=True) -async def test_tool_arguments_pydantic_preserves_non_ascii_characters( - span_exporter: InMemorySpanExporter, -) -> None: - """Test that non-ASCII characters are preserved in tool arguments when using a Pydantic model.""" - import json - - from pydantic import BaseModel - - japanese_text = "こんにちは" # "Hello" in Japanese - - class Greeting(BaseModel): - message: str - - @tool - def greet_with_model(greeting: Greeting) -> str: - """Greet with a message contained in a Pydantic model.""" - # When invoked via the tool's input_model, greeting is passed as a dict - if isinstance(greeting, dict): - return f"Greeted: {greeting['message']}" - return f"Greeted: {greeting.message}" - - span_exporter.clear() - # Use the tool's input_model to properly pass the Pydantic model argument - input_model = greet_with_model.input_model - await greet_with_model.invoke(arguments=input_model(greeting=Greeting(message=japanese_text))) - - spans = span_exporter.get_finished_spans() - assert len(spans) == 1 - span = spans[0] - - # Verify tool arguments preserve Japanese characters - tool_arguments_json = span.attributes[OtelAttr.TOOL_ARGUMENTS] - assert japanese_text in tool_arguments_json - assert "\\u" not in tool_arguments_json - - # Verify JSON is valid and contains the text - tool_arguments = json.loads(tool_arguments_json) - assert tool_arguments["greeting"]["message"] == japanese_text - - # region Test merged options for instructions @@ -3917,7 +3763,7 @@ def mock_get_meter(*args, **kwargs): @tool(name="get_weather", description="Get weather for a city", approval_mode="never_require") def _get_weather(city: str) -> str: """Get weather for a city.""" - return "Sunny, 72°F" + return "Sunny, 72F" @pytest.mark.parametrize("enable_sensitive_data", [False], indirect=True) diff --git a/python/packages/openai/agent_framework_openai/_chat_client.py b/python/packages/openai/agent_framework_openai/_chat_client.py index 2d5cda9ee5d..065d34b51b5 100644 --- a/python/packages/openai/agent_framework_openai/_chat_client.py +++ b/python/packages/openai/agent_framework_openai/_chat_client.py @@ -2981,8 +2981,10 @@ def _parse_usage_from_openai(self, usage: ResponseUsage) -> UsageDetails | None: ) if usage.input_tokens_details and usage.input_tokens_details.cached_tokens: details["openai.cached_input_tokens"] = usage.input_tokens_details.cached_tokens # type: ignore[typeddict-unknown-key] + details["cache_read_input_token_count"] = usage.input_tokens_details.cached_tokens if usage.output_tokens_details and usage.output_tokens_details.reasoning_tokens: details["openai.reasoning_tokens"] = usage.output_tokens_details.reasoning_tokens # type: ignore[typeddict-unknown-key] + details["reasoning_output_token_count"] = usage.output_tokens_details.reasoning_tokens return details def _get_metadata_from_response(self, output: Any) -> dict[str, Any]: diff --git a/python/packages/openai/agent_framework_openai/_chat_completion_client.py b/python/packages/openai/agent_framework_openai/_chat_completion_client.py index 0fd14aa2ef5..bfd3aac0e11 100644 --- a/python/packages/openai/agent_framework_openai/_chat_completion_client.py +++ b/python/packages/openai/agent_framework_openai/_chat_completion_client.py @@ -767,6 +767,7 @@ def _parse_usage_from_openai(self, usage: CompletionUsage) -> UsageDetails: details["completion/audio_tokens"] = tokens # type: ignore[typeddict-unknown-key] if tokens := usage.completion_tokens_details.reasoning_tokens: details["completion/reasoning_tokens"] = tokens # type: ignore[typeddict-unknown-key] + details["reasoning_output_token_count"] = tokens if tokens := usage.completion_tokens_details.rejected_prediction_tokens: details["completion/rejected_prediction_tokens"] = tokens # type: ignore[typeddict-unknown-key] if usage.prompt_tokens_details: @@ -774,6 +775,7 @@ def _parse_usage_from_openai(self, usage: CompletionUsage) -> UsageDetails: details["prompt/audio_tokens"] = tokens # type: ignore[typeddict-unknown-key] if tokens := usage.prompt_tokens_details.cached_tokens: details["prompt/cached_tokens"] = tokens # type: ignore[typeddict-unknown-key] + details["cache_read_input_token_count"] = tokens return details def _parse_text_from_openai(self, choice: Choice | ChunkChoice) -> Content | None: diff --git a/python/packages/openai/tests/openai/test_openai_chat_client.py b/python/packages/openai/tests/openai/test_openai_chat_client.py index 9bc598d3cbe..a7e573a278a 100644 --- a/python/packages/openai/tests/openai/test_openai_chat_client.py +++ b/python/packages/openai/tests/openai/test_openai_chat_client.py @@ -3301,6 +3301,7 @@ def test_usage_details_with_cached_tokens() -> None: assert details is not None assert details["input_token_count"] == 200 assert details["openai.cached_input_tokens"] == 25 + assert details["cache_read_input_token_count"] == 25 def test_usage_details_with_reasoning_tokens() -> None: @@ -3319,6 +3320,7 @@ def test_usage_details_with_reasoning_tokens() -> None: assert details is not None assert details["output_token_count"] == 80 assert details["openai.reasoning_tokens"] == 30 + assert details["reasoning_output_token_count"] == 30 def test_get_metadata_from_response() -> None: diff --git a/python/samples/02-agents/tools/function_tool_declaration_only.py b/python/samples/02-agents/tools/function_tool_declaration_only.py index a8c4bd826eb..347a669fa91 100644 --- a/python/samples/02-agents/tools/function_tool_declaration_only.py +++ b/python/samples/02-agents/tools/function_tool_declaration_only.py @@ -69,7 +69,8 @@ async def main(): "input_token_count": 63, "output_token_count": 145, "total_token_count": 208, - "openai.reasoning_tokens": 128 + "openai.reasoning_tokens": 128, + "reasoning_output_token_count": 128 }, "additional_properties": {} }