Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -1024,8 +1024,10 @@ def _parse_usage_from_anthropic(self, usage: BetaUsage | BetaMessageDeltaUsage |
usage_details["input_token_count"] = usage.input_tokens
if usage.cache_creation_input_tokens is not None:
usage_details["anthropic.cache_creation_input_tokens"] = usage.cache_creation_input_tokens # type: ignore[typeddict-unknown-key]
usage_details["cache_creation_input_token_count"] = usage.cache_creation_input_tokens
if usage.cache_read_input_tokens is not None:
usage_details["anthropic.cache_read_input_tokens"] = usage.cache_read_input_tokens # type: ignore[typeddict-unknown-key]
usage_details["cache_read_input_token_count"] = usage.cache_read_input_tokens
return usage_details

def _parse_contents_from_anthropic(
Expand Down
2 changes: 2 additions & 0 deletions python/packages/anthropic/tests/test_anthropic_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2354,6 +2354,8 @@ def test_parse_usage_with_cache_tokens(mock_anthropic_client: MagicMock) -> None
assert result["input_token_count"] == 100
assert result["anthropic.cache_creation_input_tokens"] == 20
assert result["anthropic.cache_read_input_tokens"] == 30
assert result["cache_creation_input_token_count"] == 20
assert result["cache_read_input_token_count"] == 30


# Code Execution Result Tests
Expand Down
6 changes: 6 additions & 0 deletions python/packages/core/agent_framework/_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,12 +400,18 @@ class UsageDetails(TypedDict, total=False, extra_items=int): # type: ignore[cal
input_token_count: The number of input tokens used.
output_token_count: The number of output tokens generated.
total_token_count: The total number of tokens (input + output).
cache_creation_input_token_count: Tokens written to a provider-managed cache.
cache_read_input_token_count: Tokens served from a provider-managed cache.
reasoning_output_token_count: Output tokens used for reasoning (chain-of-thought, extended thinking).

"""

input_token_count: int | None
output_token_count: int | None
total_token_count: int | None
cache_creation_input_token_count: int | None
cache_read_input_token_count: int | None
reasoning_output_token_count: int | None


def add_usage_details(usage1: UsageDetails | None, usage2: UsageDetails | None) -> UsageDetails:
Expand Down
27 changes: 21 additions & 6 deletions python/packages/core/agent_framework/observability.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,9 @@ class OtelAttr(str, Enum):
# Usage attributes
INPUT_TOKENS = "gen_ai.usage.input_tokens"
OUTPUT_TOKENS = "gen_ai.usage.output_tokens"
CACHE_CREATION_INPUT_TOKENS = "gen_ai.usage.cache_creation.input_tokens"
CACHE_READ_INPUT_TOKENS = "gen_ai.usage.cache_read.input_tokens"
REASONING_OUTPUT_TOKENS = "gen_ai.usage.reasoning.output_tokens"
Comment thread
hanhan761 marked this conversation as resolved.
# Tool attributes
TOOL_CALL_ID = "gen_ai.tool.call.id"
TOOL_DESCRIPTION = "gen_ai.tool.description"
Expand Down Expand Up @@ -328,6 +331,14 @@ def __str__(self) -> str:
"length": "length",
}

_USAGE_FIELD_TO_OTEL_ATTR: dict[str, str] = {
"input_token_count": OtelAttr.INPUT_TOKENS.value,
"output_token_count": OtelAttr.OUTPUT_TOKENS.value,
"cache_creation_input_token_count": OtelAttr.CACHE_CREATION_INPUT_TOKENS.value,
"cache_read_input_token_count": OtelAttr.CACHE_READ_INPUT_TOKENS.value,
"reasoning_output_token_count": OtelAttr.REASONING_OUTPUT_TOKENS.value,
}


# region Telemetry utils

Expand Down Expand Up @@ -2378,12 +2389,16 @@ def _get_response_attributes(
if model := getattr(response, "model", None):
attributes[OtelAttr.RESPONSE_MODEL] = model
if capture_usage and (usage := response.usage_details):
input_tokens = usage.get("input_token_count")
if input_tokens:
attributes[OtelAttr.INPUT_TOKENS] = input_tokens
output_tokens = usage.get("output_token_count")
if output_tokens:
attributes[OtelAttr.OUTPUT_TOKENS] = output_tokens
for key, value in usage.items():
if not isinstance(value, int) or isinstance(value, bool):
continue
Comment thread
hanhan761 marked this conversation as resolved.
attr_name = _USAGE_FIELD_TO_OTEL_ATTR.get(key)
if attr_name is not None:
attributes[attr_name] = value
continue
# Fall back to prefix-based attribute for provider-specific fields
# not yet covered by the standard mapping.
attributes[f"gen_ai.usage.{key}"] = value
return attributes


Expand Down
204 changes: 25 additions & 179 deletions python/packages/core/tests/core/test_observability.py

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why are all these tests removed?

Original file line number Diff line number Diff line change
Expand Up @@ -1535,7 +1535,7 @@ def test_configure_otel_providers_explicit_console_exporters_overrides_env(monke


def test_observability_settings_defaults_instrumentation_true(monkeypatch):
"""ENABLE_INSTRUMENTATION unset ObservabilitySettings defaults to True."""
"""ENABLE_INSTRUMENTATION unset -> ObservabilitySettings defaults to True."""
from agent_framework.observability import ObservabilitySettings

monkeypatch.delenv("ENABLE_INSTRUMENTATION", raising=False)
Expand Down Expand Up @@ -2154,6 +2154,29 @@ def test_get_response_attributes_with_usage():
assert result[OtelAttr.OUTPUT_TOKENS] == 50


def test_get_response_attributes_maps_detailed_usage_to_standard_otel_attrs():
"""Test detailed usage fields use standard OTel GenAI attributes."""
from unittest.mock import Mock

from agent_framework.observability import OtelAttr, _get_response_attributes

response = Mock()
response.response_id = None
response.finish_reason = None
response.raw_representation = None
response.usage_details = {
"cache_creation_input_token_count": 10,
"cache_read_input_token_count": 20,
"reasoning_output_token_count": 30,
}

result = _get_response_attributes({}, response)

assert result[OtelAttr.CACHE_CREATION_INPUT_TOKENS] == 10
assert result[OtelAttr.CACHE_READ_INPUT_TOKENS] == 20
assert result[OtelAttr.REASONING_OUTPUT_TOKENS] == 30


def test_get_response_attributes_capture_usage_false():
"""Test _get_response_attributes skips usage when capture_usage is False."""
from unittest.mock import Mock
Expand Down Expand Up @@ -3105,89 +3128,6 @@ async def _get() -> ChatResponse:
assert agent_span.attributes[OtelAttr.OUTPUT_TOKENS] == 22


# region Test non-ASCII character handling in JSON serialization


@pytest.mark.parametrize("enable_sensitive_data", [True], indirect=True)
async def test_capture_messages_preserves_non_ascii_characters(mock_chat_client, span_exporter: InMemorySpanExporter):
"""Test that non-ASCII characters (e.g., Japanese) are preserved in span attributes."""
import json

japanese_text = "こんにちは世界" # "Hello World" in Japanese

class ClientWithJapanese(mock_chat_client):
async def _inner_get_response(self, *, messages, options, **kwargs):
return ChatResponse(
messages=[Message(role="assistant", contents=[japanese_text])],
usage_details=UsageDetails(input_token_count=5, output_token_count=10),
)

client = ClientWithJapanese()
messages = [Message(role="user", contents=[japanese_text])]

span_exporter.clear()
response = await client.get_response(messages=messages, options={"model": "Test"})

assert response is not None
spans = span_exporter.get_finished_spans()
assert len(spans) == 1
span = spans[0]

# Verify input messages preserve Japanese characters
input_messages_json = span.attributes[OtelAttr.INPUT_MESSAGES]
assert japanese_text in input_messages_json
# Ensure it's not escaped to Unicode
assert "\\u" not in input_messages_json

# Verify output messages preserve Japanese characters
output_messages_json = span.attributes[OtelAttr.OUTPUT_MESSAGES]
assert japanese_text in output_messages_json
assert "\\u" not in output_messages_json

# Verify JSON is valid and contains the text
input_messages = json.loads(input_messages_json)
assert input_messages[0]["parts"][0]["content"] == japanese_text
output_messages = json.loads(output_messages_json)
assert output_messages[0]["parts"][0]["content"] == japanese_text


@pytest.mark.parametrize("enable_sensitive_data", [True], indirect=True)
async def test_system_instructions_preserves_non_ascii_characters(span_exporter: InMemorySpanExporter):
"""Test that non-ASCII characters are preserved in system instructions span attribute."""
import json

from opentelemetry import trace

chinese_text = "你好世界" # "Hello World" in Chinese

tracer = trace.get_tracer("test")
span_exporter.clear()

with tracer.start_as_current_span("test_span") as span:
_capture_messages(
span=span,
provider_name="test_provider",
messages=[Message(role="user", contents=["Test"])],
system_instructions=chinese_text,
)

spans = span_exporter.get_finished_spans()
assert len(spans) == 1
span = spans[0]

# Verify system instructions preserve Chinese characters
system_instructions_json = span.attributes[OtelAttr.SYSTEM_INSTRUCTIONS]
assert chinese_text in system_instructions_json
assert "\\u" not in system_instructions_json

# Verify JSON is valid and contains the text
system_instructions = json.loads(system_instructions_json)
assert system_instructions[0]["content"] == chinese_text

input_messages = json.loads(span.attributes[OtelAttr.INPUT_MESSAGES])
assert [msg.get("role") for msg in input_messages] == ["user"]


@pytest.mark.parametrize("enable_sensitive_data", [True], indirect=True)
def test_capture_messages_with_prepared_request_info_function_call_arguments(span_exporter: InMemorySpanExporter):
"""Test _capture_messages handles request-info function-call arguments prepared at Content creation."""
Expand Down Expand Up @@ -3301,100 +3241,6 @@ def test_capture_messages_logs_only_chat_history_when_framework_instructions_are
assert logged_messages[1]["parts"][0]["content"] == "Test"


@pytest.mark.parametrize("enable_sensitive_data", [True], indirect=True)
async def test_tool_arguments_preserves_non_ascii_characters(span_exporter: InMemorySpanExporter):
"""Test that non-ASCII characters are preserved in tool arguments span attribute."""
import json

korean_text = "안녕하세요" # "Hello" in Korean

@tool
def greet(message: str) -> str:
"""Greet with a message."""
return f"Greeted: {message}"

span_exporter.clear()
await greet.invoke(message=korean_text)

spans = span_exporter.get_finished_spans()
assert len(spans) == 1
span = spans[0]

# Verify tool arguments preserve Korean characters
tool_arguments_json = span.attributes[OtelAttr.TOOL_ARGUMENTS]
assert korean_text in tool_arguments_json
assert "\\u" not in tool_arguments_json

# Verify JSON is valid and contains the text
tool_arguments = json.loads(tool_arguments_json)
assert tool_arguments["message"] == korean_text


@pytest.mark.parametrize("enable_sensitive_data", [True], indirect=True)
async def test_tool_result_preserves_non_ascii_characters(span_exporter: InMemorySpanExporter):
"""Test that non-ASCII characters are preserved in tool result span attribute."""
arabic_text = "مرحبا بالعالم" # "Hello World" in Arabic

@tool
def echo(text: str) -> str:
"""Echo the text back."""
return text

span_exporter.clear()
result = await echo.invoke(text=arabic_text)

assert isinstance(result, list)
assert result[0].text == arabic_text
spans = span_exporter.get_finished_spans()
assert len(spans) == 1
span = spans[0]

# Verify tool result preserves Arabic characters
tool_result = span.attributes[OtelAttr.TOOL_RESULT]
assert arabic_text in tool_result


@pytest.mark.parametrize("enable_sensitive_data", [True], indirect=True)
async def test_tool_arguments_pydantic_preserves_non_ascii_characters(
span_exporter: InMemorySpanExporter,
) -> None:
"""Test that non-ASCII characters are preserved in tool arguments when using a Pydantic model."""
import json

from pydantic import BaseModel

japanese_text = "こんにちは" # "Hello" in Japanese

class Greeting(BaseModel):
message: str

@tool
def greet_with_model(greeting: Greeting) -> str:
"""Greet with a message contained in a Pydantic model."""
# When invoked via the tool's input_model, greeting is passed as a dict
if isinstance(greeting, dict):
return f"Greeted: {greeting['message']}"
return f"Greeted: {greeting.message}"

span_exporter.clear()
# Use the tool's input_model to properly pass the Pydantic model argument
input_model = greet_with_model.input_model
await greet_with_model.invoke(arguments=input_model(greeting=Greeting(message=japanese_text)))

spans = span_exporter.get_finished_spans()
assert len(spans) == 1
span = spans[0]

# Verify tool arguments preserve Japanese characters
tool_arguments_json = span.attributes[OtelAttr.TOOL_ARGUMENTS]
assert japanese_text in tool_arguments_json
assert "\\u" not in tool_arguments_json

# Verify JSON is valid and contains the text
tool_arguments = json.loads(tool_arguments_json)
assert tool_arguments["greeting"]["message"] == japanese_text


# region Test merged options for instructions


Expand Down Expand Up @@ -3917,7 +3763,7 @@ def mock_get_meter(*args, **kwargs):
@tool(name="get_weather", description="Get weather for a city", approval_mode="never_require")
def _get_weather(city: str) -> str:
"""Get weather for a city."""
return "Sunny, 72°F"
return "Sunny, 72F"


@pytest.mark.parametrize("enable_sensitive_data", [False], indirect=True)
Expand Down
2 changes: 2 additions & 0 deletions python/packages/openai/agent_framework_openai/_chat_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2981,8 +2981,10 @@ def _parse_usage_from_openai(self, usage: ResponseUsage) -> UsageDetails | None:
)
if usage.input_tokens_details and usage.input_tokens_details.cached_tokens:
details["openai.cached_input_tokens"] = usage.input_tokens_details.cached_tokens # type: ignore[typeddict-unknown-key]
details["cache_read_input_token_count"] = usage.input_tokens_details.cached_tokens
if usage.output_tokens_details and usage.output_tokens_details.reasoning_tokens:
details["openai.reasoning_tokens"] = usage.output_tokens_details.reasoning_tokens # type: ignore[typeddict-unknown-key]
Comment thread
hanhan761 marked this conversation as resolved.
details["reasoning_output_token_count"] = usage.output_tokens_details.reasoning_tokens
return details

def _get_metadata_from_response(self, output: Any) -> dict[str, Any]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -767,13 +767,15 @@ def _parse_usage_from_openai(self, usage: CompletionUsage) -> UsageDetails:
details["completion/audio_tokens"] = tokens # type: ignore[typeddict-unknown-key]
if tokens := usage.completion_tokens_details.reasoning_tokens:
details["completion/reasoning_tokens"] = tokens # type: ignore[typeddict-unknown-key]
details["reasoning_output_token_count"] = tokens
Comment thread
hanhan761 marked this conversation as resolved.
if tokens := usage.completion_tokens_details.rejected_prediction_tokens:
details["completion/rejected_prediction_tokens"] = tokens # type: ignore[typeddict-unknown-key]
if usage.prompt_tokens_details:
if tokens := usage.prompt_tokens_details.audio_tokens:
details["prompt/audio_tokens"] = tokens # type: ignore[typeddict-unknown-key]
if tokens := usage.prompt_tokens_details.cached_tokens:
details["prompt/cached_tokens"] = tokens # type: ignore[typeddict-unknown-key]
details["cache_read_input_token_count"] = tokens
return details

def _parse_text_from_openai(self, choice: Choice | ChunkChoice) -> Content | None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3301,6 +3301,7 @@ def test_usage_details_with_cached_tokens() -> None:
assert details is not None
assert details["input_token_count"] == 200
assert details["openai.cached_input_tokens"] == 25
assert details["cache_read_input_token_count"] == 25


def test_usage_details_with_reasoning_tokens() -> None:
Expand All @@ -3319,6 +3320,7 @@ def test_usage_details_with_reasoning_tokens() -> None:
assert details is not None
assert details["output_token_count"] == 80
assert details["openai.reasoning_tokens"] == 30
assert details["reasoning_output_token_count"] == 30


def test_get_metadata_from_response() -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ async def main():
"input_token_count": 63,
"output_token_count": 145,
"total_token_count": 208,
"openai.reasoning_tokens": 128
"openai.reasoning_tokens": 128,
"reasoning_output_token_count": 128
},
"additional_properties": {}
}
Expand Down
Loading