From 1a107d1fea7b93906ac3c62a4dc650c7b690f0a0 Mon Sep 17 00:00:00 2001
From: Abdulrahman Alfozan <alfozan111@gmail.com>
Date: Tue, 19 May 2026 23:24:23 -0700
Subject: [PATCH] Improve OpenAI Agents SDK instrumentation

---
 instrumentation/README.md                     |   2 +-
 .../.changelog/49.fixed                       |   1 +
 .../README.rst                                |  14 +-
 .../examples/zero-code/.env.example           |   2 +-
 .../examples/zero-code/requirements.txt       |   2 +-
 .../pyproject.toml                            |   2 +-
 .../genai/openai_agents/__init__.py           |   2 +
 .../genai/openai_agents/package.py            |   4 +-
 .../genai/openai_agents/span_processor.py     | 659 ++++++++++++------
 .../tests/requirements.oldest.txt             |   2 +-
 .../tests/test_conformance.py                 |  13 +-
 .../tests/test_tracer.py                      | 254 ++++++-
 .../tests/test_z_span_processor_unit.py       |  65 +-
 .../tests/test_zz_coverage_improvements.py    |  34 +-
 uv.lock                                       |   2 +-
 15 files changed, 798 insertions(+), 260 deletions(-)
 create mode 100644 instrumentation/opentelemetry-instrumentation-genai-openai-agents/.changelog/49.fixed

diff --git a/instrumentation/README.md b/instrumentation/README.md
index 48ee952f..cf80042a 100644
--- a/instrumentation/README.md
+++ b/instrumentation/README.md
@@ -5,6 +5,6 @@
 | [opentelemetry-instrumentation-genai-claude-agent-sdk](./opentelemetry-instrumentation-genai-claude-agent-sdk) | claude-agent-sdk >= 0.1.14 | No | development
 | [opentelemetry-instrumentation-genai-langchain](./opentelemetry-instrumentation-genai-langchain) | langchain >= 0.3.21 | No | development
 | [opentelemetry-instrumentation-genai-openai](./opentelemetry-instrumentation-genai-openai) | openai >= 1.26.0 | Yes | development
-| [opentelemetry-instrumentation-genai-openai-agents](./opentelemetry-instrumentation-genai-openai-agents) | openai-agents >= 0.3.3 | No | development
+| [opentelemetry-instrumentation-genai-openai-agents](./opentelemetry-instrumentation-genai-openai-agents) | openai-agents >= 0.17.0 | Yes | development
 | [opentelemetry-instrumentation-genai-weaviate-client](./opentelemetry-instrumentation-genai-weaviate-client) | weaviate-client >= 3.0.0,<5.0.0 | No | development
 | [opentelemetry-instrumentation-google-genai](./opentelemetry-instrumentation-google-genai) | google-genai >= 1.32.0, <3 | No | development
\ No newline at end of file
diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/.changelog/49.fixed b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/.changelog/49.fixed
new file mode 100644
index 00000000..3c394b61
--- /dev/null
+++ b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/.changelog/49.fixed
@@ -0,0 +1 @@
+Improve OpenAI Agents SDK instrumentation support for current tracing payloads.
diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/README.rst b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/README.rst
index 24a4710d..a769abc2 100644
--- a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/README.rst
+++ b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/README.rst
@@ -21,8 +21,12 @@ Features
 
 * Generates spans for agents, tools, generations, guardrails, and handoffs using
   the OpenTelemetry GenAI semantic conventions.
-* Captures prompts, responses, tool arguments, and system instructions when content
-  capture is enabled.
+* Captures prompts, responses, tool arguments, tool results, and system
+  instructions when content capture is enabled.
+* Handles current Agents SDK response and generation payloads, including response
+  IDs, models, token usage, and string response inputs.
+* Preserves custom span attributes from Agents SDK custom spans, including
+  sandbox attributes such as ``sandbox.*`` and process exit data.
 * Publishes duration and token metrics for every operation.
 * Supports environment overrides so you can configure agent metadata or disable
   telemetry without code changes.
@@ -76,6 +80,12 @@ Configure OpenTelemetry as usual, then call :class:`OpenAIAgentsInstrumentor`.
     result = Runner.run_sync(assistant, "I'm visiting Barcelona this weekend. How should I pack?")
     print(result.final_output)
 
+The instrumentation maps Agents SDK spans to OpenTelemetry GenAI semantic
+attributes where applicable. Custom Agents SDK spans are preserved without a
+placeholder GenAI operation name, and supported ``CustomSpanData.data`` values
+are copied to OpenTelemetry attributes, including sandbox attributes such as
+``sandbox.*`` and process exit data.
+
 Configuration
 -------------
 
diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/examples/zero-code/.env.example b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/examples/zero-code/.env.example
index bc649aeb..d78104f0 100644
--- a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/examples/zero-code/.env.example
+++ b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/examples/zero-code/.env.example
@@ -2,7 +2,7 @@
 # OTEL_GENAI_AGENT_NAME=Travel Concierge
 
 # Update this with your real OpenAI API key
-OPENAI_API_KEY=sk-YOUR_API_KEY
+OPENAI_API_KEY=
 
 # Uncomment to use Ollama instead of OpenAI
 # OPENAI_BASE_URL=http://localhost:11434/v1
diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/examples/zero-code/requirements.txt b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/examples/zero-code/requirements.txt
index 827648b4..dfe1073c 100644
--- a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/examples/zero-code/requirements.txt
+++ b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/examples/zero-code/requirements.txt
@@ -3,5 +3,5 @@ python-dotenv~=1.0
 
 opentelemetry-sdk~=1.42.0
 opentelemetry-exporter-otlp-proto-grpc~=1.42.0
-opentelemetry-distro~=0.57b0
+opentelemetry-distro~=0.62b1
 opentelemetry-instrumentation-genai-openai-agents
diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/pyproject.toml b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/pyproject.toml
index bad22251..fb69b893 100644
--- a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/pyproject.toml
+++ b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/pyproject.toml
@@ -33,7 +33,7 @@ dependencies = [
 
 [project.optional-dependencies]
 instruments = [
-  "openai-agents >= 0.3.3",
+  "openai-agents >= 0.17.0",
 ]
 
 [project.entry-points.opentelemetry_instrumentor]
diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/src/opentelemetry/instrumentation/genai/openai_agents/__init__.py b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/src/opentelemetry/instrumentation/genai/openai_agents/__init__.py
index 955c3f1a..a4d8eea4 100644
--- a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/src/opentelemetry/instrumentation/genai/openai_agents/__init__.py
+++ b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/src/opentelemetry/instrumentation/genai/openai_agents/__init__.py
@@ -142,6 +142,7 @@ def _instrument(self, **kwargs) -> None:
             tracer_provider,
             schema_url=Schemas.V1_28_0.value,
         )
+        meter_provider = kwargs.get("meter_provider")
 
         system_override = kwargs.get("system") or os.getenv(
             _SYSTEM_OVERRIDE_ENV
@@ -174,6 +175,7 @@ def _instrument(self, **kwargs) -> None:
             != ContentCaptureMode.NO_CONTENT,
             content_mode=content_mode,
             metrics_enabled=metrics_enabled,
+            meter_provider=meter_provider,
             agent_name=agent_name,
             agent_id=agent_id,
             agent_description=agent_description,
diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/src/opentelemetry/instrumentation/genai/openai_agents/package.py b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/src/opentelemetry/instrumentation/genai/openai_agents/package.py
index c5292e65..972ff60d 100644
--- a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/src/opentelemetry/instrumentation/genai/openai_agents/package.py
+++ b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/src/opentelemetry/instrumentation/genai/openai_agents/package.py
@@ -1,5 +1,5 @@
 # Copyright The OpenTelemetry Authors
 # SPDX-License-Identifier: Apache-2.0
 
-_instruments = ("openai-agents >= 0.3.3",)
-_supports_metrics = False
+_instruments = ("openai-agents >= 0.17.0",)
+_supports_metrics = True
diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/src/opentelemetry/instrumentation/genai/openai_agents/span_processor.py b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/src/opentelemetry/instrumentation/genai/openai_agents/span_processor.py
index f78552f1..7e950260 100644
--- a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/src/opentelemetry/instrumentation/genai/openai_agents/span_processor.py
+++ b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/src/opentelemetry/instrumentation/genai/openai_agents/span_processor.py
@@ -19,43 +19,31 @@
 
 from __future__ import annotations
 
-import importlib
 import logging
+from collections.abc import Mapping as MappingABC
+from collections.abc import Sequence as SequenceABC
 from dataclasses import dataclass
 from datetime import datetime, timezone
 from enum import Enum
 from typing import TYPE_CHECKING, Any, Dict, Iterator, Optional, Sequence
 from urllib.parse import urlparse
 
-from opentelemetry.util.genai.utils import gen_ai_json_dumps
-
-try:
-    from agents.tracing import Span, Trace, TracingProcessor
-    from agents.tracing.span_data import (
-        AgentSpanData,
-        FunctionSpanData,
-        GenerationSpanData,
-        GuardrailSpanData,
-        HandoffSpanData,
-        ResponseSpanData,
-        SpeechSpanData,
-        TranscriptionSpanData,
-    )
-except ModuleNotFoundError:  # pragma: no cover - test stubs
-    tracing_module = importlib.import_module("agents.tracing")
-    Span = getattr(tracing_module, "Span")
-    Trace = getattr(tracing_module, "Trace")
-    TracingProcessor = getattr(tracing_module, "TracingProcessor")
-    AgentSpanData = getattr(tracing_module, "AgentSpanData", Any)  # type: ignore[assignment]
-    FunctionSpanData = getattr(tracing_module, "FunctionSpanData", Any)  # type: ignore[assignment]
-    GenerationSpanData = getattr(tracing_module, "GenerationSpanData", Any)  # type: ignore[assignment]
-    GuardrailSpanData = getattr(tracing_module, "GuardrailSpanData", Any)  # type: ignore[assignment]
-    HandoffSpanData = getattr(tracing_module, "HandoffSpanData", Any)  # type: ignore[assignment]
-    ResponseSpanData = getattr(tracing_module, "ResponseSpanData", Any)  # type: ignore[assignment]
-    SpeechSpanData = getattr(tracing_module, "SpeechSpanData", Any)  # type: ignore[assignment]
-    TranscriptionSpanData = getattr(
-        tracing_module, "TranscriptionSpanData", Any
-    )  # type: ignore[assignment]
+from agents.tracing import Span, Trace, TracingProcessor
+from agents.tracing.span_data import (
+    AgentSpanData,
+    CustomSpanData,
+    FunctionSpanData,
+    GenerationSpanData,
+    GuardrailSpanData,
+    HandoffSpanData,
+    MCPListToolsSpanData,
+    ResponseSpanData,
+    SpeechGroupSpanData,
+    SpeechSpanData,
+    TaskSpanData,
+    TranscriptionSpanData,
+    TurnSpanData,
+)
 
 from opentelemetry.context import attach, detach
 from opentelemetry.metrics import Histogram, get_meter
@@ -73,6 +61,11 @@
     Tracer,
     set_span_in_context,
 )
+from opentelemetry.util.genai.instruments import (
+    create_duration_histogram,
+    create_token_histogram,
+)
+from opentelemetry.util.genai.utils import gen_ai_json_dumps
 from opentelemetry.util.types import AttributeValue
 
 # Import all semantic convention constants
@@ -122,7 +115,6 @@ class GenAIOperationName:
     TRANSCRIPTION = "transcription"
     SPEECH = "speech_generation"
     GUARDRAIL = "guardrail_check"
-    HANDOFF = "agent_handoff"
     RESPONSE = "response"  # internal aggregator in current processor
 
     CLASS_FALLBACK = {
@@ -240,13 +232,22 @@ def _attr(name: str, fallback: str) -> str:
 GEN_AI_ORCHESTRATOR_AGENT_DEFINITIONS = "gen_ai.orchestrator.agent.definitions"
 GEN_AI_GUARDRAIL_NAME = "gen_ai.guardrail.name"
 GEN_AI_GUARDRAIL_TRIGGERED = "gen_ai.guardrail.triggered"
-GEN_AI_HANDOFF_FROM_AGENT = "gen_ai.handoff.from_agent"
-GEN_AI_HANDOFF_TO_AGENT = "gen_ai.handoff.to_agent"
 GEN_AI_EMBEDDINGS_DIMENSION_COUNT = "gen_ai.embeddings.dimension.count"
 GEN_AI_TOKEN_TYPE = _attr("GEN_AI_TOKEN_TYPE", "gen_ai.token.type")
 
+_DEFAULT_FINISH_REASON = "unknown"
+
 # ---- Normalization utilities (embedded from utils.py) ----
 
+_CUSTOM_ATTRIBUTE_RESERVED_PREFIXES = (
+    "gen_ai.",
+    "otel.",
+    "server.",
+    "telemetry.",
+)
+_CUSTOM_ATTRIBUTE_RESERVED_KEYS = {"span.kind"}
+_CUSTOM_ATTRIBUTE_VALUE_LENGTH_LIMIT = 8192
+
 
 def normalize_provider(provider: Optional[str]) -> Optional[str]:
     """Normalize provider name to spec-compliant value."""
@@ -304,8 +305,6 @@ def normalize_output_type(output_type: Optional[str]) -> str:
 
 logger = logging.getLogger(__name__)
 
-GEN_AI_SYSTEM_KEY = getattr(GenAIAttributes, "GEN_AI_SYSTEM", "gen_ai.system")
-
 
 class ContentCaptureMode(Enum):
     """Controls whether sensitive content is recorded on spans, events, or both."""
@@ -341,19 +340,6 @@ class ContentPayload:
     tool_result: Any = None
 
 
-def _is_instance_of(value: Any, classes: Any) -> bool:
-    """Safe isinstance that tolerates typing.Any placeholders."""
-    if not isinstance(classes, tuple):
-        classes = (classes,)
-    for cls in classes:
-        try:
-            if isinstance(value, cls):
-                return True
-        except TypeError:
-            continue
-    return False
-
-
 def _infer_server_attributes(base_url: Optional[str]) -> dict[str, Any]:
     """Return server.address / server.port attributes if base_url provided."""
     out: dict[str, Any] = {}
@@ -378,19 +364,140 @@ def safe_json_dumps(obj: Any) -> str:
         return str(obj)
 
 
+def _truncate_custom_attribute_value(value: str) -> str:
+    """Limit custom attribute strings so arbitrary span data does not explode spans."""
+    if len(value) <= _CUSTOM_ATTRIBUTE_VALUE_LENGTH_LIMIT:
+        return value
+    return value[: _CUSTOM_ATTRIBUTE_VALUE_LENGTH_LIMIT - 3] + "..."
+
+
+def _is_reserved_custom_attribute_key(key: str) -> bool:
+    """Return True when custom span data should not overwrite SDK/OTel attrs."""
+    return key in _CUSTOM_ATTRIBUTE_RESERVED_KEYS or key.startswith(
+        _CUSTOM_ATTRIBUTE_RESERVED_PREFIXES
+    )
+
+
+def _jsonable_custom_attribute_value(value: Any) -> Any:
+    """Normalize values before serializing arbitrary custom span data."""
+    if isinstance(value, Enum):
+        return value.value
+    if isinstance(value, MappingABC):
+        return {
+            str(key): _jsonable_custom_attribute_value(item)
+            for key, item in value.items()
+        }
+    if isinstance(value, (list, tuple, set)):
+        return [_jsonable_custom_attribute_value(item) for item in value]
+    return value
+
+
+def _coerce_custom_scalar_attribute_value(
+    value: Any,
+) -> str | bool | int | float | None:
+    """Return primitive OTel attribute values from custom span data."""
+    if value is None:
+        return None
+    if isinstance(value, Enum):
+        return _truncate_custom_attribute_value(str(value.value))
+    if isinstance(value, str):
+        return _truncate_custom_attribute_value(value)
+    if isinstance(value, bool):
+        return value
+    if isinstance(value, int):
+        return value
+    if isinstance(value, float):
+        return value
+    return None
+
+
+def _is_homogeneous_custom_attribute_sequence(
+    values: list[str | bool | int | float],
+) -> bool:
+    """Return True when values can be emitted as an OTel array attribute."""
+    if all(isinstance(item, str) for item in values):
+        return True
+    if all(isinstance(item, bool) for item in values):
+        return True
+    if all(
+        isinstance(item, int) and not isinstance(item, bool) for item in values
+    ):
+        return True
+    return all(isinstance(item, float) for item in values)
+
+
+def _coerce_custom_attribute_value(value: Any) -> AttributeValue | None:
+    """Convert arbitrary custom span data into valid OTel attribute values."""
+    if value is None:
+        return None
+
+    scalar_value = _coerce_custom_scalar_attribute_value(value)
+    if scalar_value is not None:
+        return scalar_value
+
+    if isinstance(value, SequenceABC) and not isinstance(
+        value, (str, bytes, bytearray)
+    ):
+        scalar_values: list[str | bool | int | float] = []
+        for item in value:
+            scalar_value = _coerce_custom_scalar_attribute_value(item)
+            if scalar_value is None:
+                break
+            scalar_values.append(scalar_value)
+        else:
+            if scalar_values and _is_homogeneous_custom_attribute_sequence(
+                scalar_values
+            ):
+                return scalar_values
+
+    serialized = safe_json_dumps(_jsonable_custom_attribute_value(value))
+    return _truncate_custom_attribute_value(serialized)
+
+
 def _as_utc_nano(dt: datetime) -> int:
     """Convert datetime to UTC nanoseconds timestamp."""
     return int(dt.astimezone(timezone.utc).timestamp() * 1_000_000_000)
 
 
-def _get_span_status(span: Span[Any]) -> Status:
+def _get_span_status(span: Span[Any]) -> Optional[Status]:
     """Get OpenTelemetry span status from agent span."""
     if error := getattr(span, "error", None):
         return Status(
             status_code=StatusCode.ERROR,
             description=f"{error.get('message', '')}: {error.get('data', '')}",
         )
-    return Status(StatusCode.OK)
+    return None
+
+
+def _get_finish_reason(value: Any) -> Optional[str]:
+    """Return a finish reason from a dict/object when one is available."""
+    if isinstance(value, dict):
+        finish_reason = value.get("finish_reason") or value.get("stop_reason")
+    else:
+        finish_reason = getattr(value, "finish_reason", None) or getattr(
+            value, "stop_reason", None
+        )
+    if finish_reason:
+        return (
+            finish_reason
+            if isinstance(finish_reason, str)
+            else str(finish_reason)
+        )
+    return None
+
+
+def _get_finish_reasons_from_sequence(
+    values: Sequence[Any] | None,
+) -> list[str]:
+    """Collect finish reasons from a response/generation output sequence."""
+    if not values:
+        return []
+    finish_reasons: list[str] = []
+    for value in values:
+        finish_reason = _get_finish_reason(value)
+        if finish_reason:
+            finish_reasons.append(finish_reason)
+    return finish_reasons
 
 
 def get_span_name(
@@ -420,9 +527,6 @@ def get_span_name(
     if operation_name == GenAIOperationName.EXECUTE_TOOL:
         return f"{base_name} {tool_name}" if tool_name else base_name
 
-    if operation_name == GenAIOperationName.HANDOFF:
-        return f"{base_name} {agent_name}" if agent_name else base_name
-
     return base_name
 
 
@@ -443,6 +547,7 @@ def __init__(
         server_address: Optional[str] = None,
         server_port: Optional[int] = None,
         metrics_enabled: bool = True,
+        meter_provider: Any = None,
         agent_name_default: Optional[str] = None,
         agent_id_default: Optional[str] = None,
         agent_description_default: Optional[str] = None,
@@ -517,6 +622,7 @@ def __init__(
 
         # Metrics configuration
         self._metrics_enabled = metrics_enabled
+        self._meter_provider = meter_provider
         self._meter = None
         self._duration_histogram: Optional[Histogram] = None
         self._token_usage_histogram: Optional[Histogram] = None
@@ -535,22 +641,12 @@ def _get_server_attributes(self) -> dict[str, Any]:
     def _init_metrics(self):
         """Initialize metric instruments."""
         self._meter = get_meter(
-            "opentelemetry.instrumentation.genai.openai_agents", "0.1.0"
-        )
-
-        # Operation duration histogram
-        self._duration_histogram = self._meter.create_histogram(
-            name="gen_ai.client.operation.duration",
-            description="GenAI operation duration",
-            unit="s",
-        )
-
-        # Token usage histogram
-        self._token_usage_histogram = self._meter.create_histogram(
-            name="gen_ai.client.token.usage",
-            description="Number of input and output tokens used",
-            unit="{token}",
+            "opentelemetry.instrumentation.genai.openai_agents",
+            "0.1.0",
+            meter_provider=self._meter_provider,
         )
+        self._duration_histogram = create_duration_histogram(self._meter)
+        self._token_usage_histogram = create_token_histogram(self._meter)
 
     def _record_metrics(
         self, span: Span[Any], attributes: dict[str, AttributeValue]
@@ -567,8 +663,12 @@ def _record_metrics(
             duration = None
             if hasattr(span, "started_at") and hasattr(span, "ended_at"):
                 try:
-                    start = datetime.fromisoformat(span.started_at)
-                    end = datetime.fromisoformat(span.ended_at)
+                    start = datetime.fromisoformat(
+                        span.started_at.replace("Z", "+00:00")
+                    )
+                    end = datetime.fromisoformat(
+                        span.ended_at.replace("Z", "+00:00")
+                    )
                     duration = (end - start).total_seconds()
                 except Exception:
                     pass
@@ -602,7 +702,9 @@ def _record_metrics(
 
             # Record duration
             if duration is not None and self._duration_histogram is not None:
-                self._duration_histogram.record(duration, metric_attrs)
+                self._duration_histogram.record(
+                    duration, attributes=metric_attrs
+                )
 
             # Record token usage
             if self._token_usage_histogram:
@@ -611,7 +713,7 @@ def _record_metrics(
                     token_attrs = dict(metric_attrs)
                     token_attrs[GEN_AI_TOKEN_TYPE] = "input"
                     self._token_usage_histogram.record(
-                        input_tokens, token_attrs
+                        input_tokens, attributes=token_attrs
                     )
 
                 output_tokens = attributes.get(GEN_AI_USAGE_OUTPUT_TOKENS)
@@ -619,7 +721,7 @@ def _record_metrics(
                     token_attrs = dict(metric_attrs)
                     token_attrs[GEN_AI_TOKEN_TYPE] = "output"
                     self._token_usage_histogram.record(
-                        output_tokens, token_attrs
+                        output_tokens, attributes=token_attrs
                     )
 
         except Exception as e:
@@ -707,19 +809,33 @@ def _normalize_to_text_parts(self, content: Any) -> list[dict[str, str]]:
 
     def _redacted_text_parts(self) -> list[dict[str, str]]:
         """Return a single redacted text part for system instructions."""
-        return [{"type": "text", "content": "readacted"}]
+        return [{"type": "text", "content": "redacted"}]
 
     def _normalize_messages_to_role_parts(
-        self, messages: Sequence[Any] | None
+        self, messages: Sequence[Any] | str | None
     ) -> list[dict[str, Any]]:
         """Normalize input messages to enforced role+parts schema.
 
         Each message becomes: {"role": <role>, "parts": [ {"type": ..., ...} ]}
         Redaction: when include_sensitive_data is False, replace text content,
-        tool_call arguments, and tool_call_response result with "readacted".
+        tool_call arguments, and tool_call_response result with "redacted".
         """
         if not messages:
             return []
+        if isinstance(messages, str):
+            return [
+                {
+                    "role": "user",
+                    "parts": [
+                        {
+                            "type": "text",
+                            "content": "redacted"
+                            if not self.include_sensitive_data
+                            else messages,
+                        }
+                    ],
+                }
+            ]
         normalized: list[dict[str, Any]] = []
         for m in messages:
             if not isinstance(m, dict):
@@ -730,7 +846,7 @@ def _normalize_messages_to_role_parts(
                         "parts": [
                             {
                                 "type": "text",
-                                "content": "readacted"
+                                "content": "redacted"
                                 if not self.include_sensitive_data
                                 else str(m),
                             }
@@ -751,7 +867,7 @@ def _normalize_messages_to_role_parts(
                         if ptype == "text":
                             txt = p.get("content") or p.get("text")
                             newp["content"] = (
-                                "readacted"
+                                "redacted"
                                 if not self.include_sensitive_data
                                 else (txt if isinstance(txt, str) else str(p))
                             )
@@ -760,7 +876,7 @@ def _normalize_messages_to_role_parts(
                             newp["name"] = p.get("name")
                             args = p.get("arguments")
                             newp["arguments"] = (
-                                "readacted"
+                                "redacted"
                                 if not self.include_sensitive_data
                                 else args
                             )
@@ -768,13 +884,13 @@ def _normalize_messages_to_role_parts(
                             newp["id"] = p.get("id") or m.get("tool_call_id")
                             result = p.get("result") or p.get("content")
                             newp["result"] = (
-                                "readacted"
+                                "redacted"
                                 if not self.include_sensitive_data
                                 else result
                             )
                         else:
                             newp["content"] = (
-                                "readacted"
+                                "redacted"
                                 if not self.include_sensitive_data
                                 else str(p)
                             )
@@ -783,7 +899,7 @@ def _normalize_messages_to_role_parts(
                         parts.append(
                             {
                                 "type": "text",
-                                "content": "readacted"
+                                "content": "redacted"
                                 if not self.include_sensitive_data
                                 else str(p),
                             }
@@ -795,7 +911,7 @@ def _normalize_messages_to_role_parts(
                 parts.append(
                     {
                         "type": "text",
-                        "content": "readacted"
+                        "content": "redacted"
                         if not self.include_sensitive_data
                         else content,
                     }
@@ -809,7 +925,7 @@ def _normalize_messages_to_role_parts(
                             parts.append(
                                 {
                                     "type": "text",
-                                    "content": "readacted"
+                                    "content": "redacted"
                                     if not self.include_sensitive_data
                                     else (
                                         txt
@@ -823,7 +939,7 @@ def _normalize_messages_to_role_parts(
                             parts.append(
                                 {
                                     "type": "text",
-                                    "content": "readacted"
+                                    "content": "redacted"
                                     if not self.include_sensitive_data
                                     else str(item),
                                 }
@@ -832,7 +948,7 @@ def _normalize_messages_to_role_parts(
                         parts.append(
                             {
                                 "type": "text",
-                                "content": "readacted"
+                                "content": "redacted"
                                 if not self.include_sensitive_data
                                 else str(item),
                             }
@@ -852,7 +968,7 @@ def _normalize_messages_to_role_parts(
                         p["name"] = fn.get("name")
                         args = fn.get("arguments")
                         p["arguments"] = (
-                            "readacted"
+                            "redacted"
                             if not self.include_sensitive_data
                             else args
                         )
@@ -864,7 +980,7 @@ def _normalize_messages_to_role_parts(
                 p["id"] = m.get("tool_call_id") or m.get("id")
                 result = m.get("result") or m.get("content")
                 p["result"] = (
-                    "readacted" if not self.include_sensitive_data else result
+                    "redacted" if not self.include_sensitive_data else result
                 )
                 parts.append(p)
 
@@ -899,7 +1015,7 @@ def _normalize_output_messages_to_role_parts(
                     {
                         "type": "text",
                         "content": (
-                            "readacted"
+                            "redacted"
                             if not self.include_sensitive_data
                             else output_text
                         ),
@@ -916,7 +1032,7 @@ def _normalize_output_messages_to_role_parts(
                                 {
                                     "type": "text",
                                     "content": (
-                                        "readacted"
+                                        "redacted"
                                         if not self.include_sensitive_data
                                         else txt
                                     ),
@@ -928,16 +1044,14 @@ def _normalize_output_messages_to_role_parts(
                                 {
                                     "type": "text",
                                     "content": (
-                                        "readacted"
+                                        "redacted"
                                         if not self.include_sensitive_data
                                         else str(item)
                                     ),
                                 }
                             )
-                        # Capture finish_reason from parts when present
-                        fr = getattr(item, "finish_reason", None)
-                        if isinstance(fr, str) and not finish_reason:
-                            finish_reason = fr
+                        if not finish_reason:
+                            finish_reason = _get_finish_reason(item)
 
         # Generation span: use span_data.output
         if not parts:
@@ -952,7 +1066,7 @@ def _normalize_output_messages_to_role_parts(
                                     {
                                         "type": "text",
                                         "content": (
-                                            "readacted"
+                                            "redacted"
                                             if not self.include_sensitive_data
                                             else txt
                                         ),
@@ -965,7 +1079,7 @@ def _normalize_output_messages_to_role_parts(
                                 {
                                     "type": "text",
                                     "content": (
-                                        "readacted"
+                                        "redacted"
                                         if not self.include_sensitive_data
                                         else item["content"]
                                     ),
@@ -976,22 +1090,20 @@ def _normalize_output_messages_to_role_parts(
                                 {
                                     "type": "text",
                                     "content": (
-                                        "readacted"
+                                        "redacted"
                                         if not self.include_sensitive_data
                                         else str(item)
                                     ),
                                 }
                             )
-                        if not finish_reason and isinstance(
-                            item.get("finish_reason"), str
-                        ):
-                            finish_reason = item.get("finish_reason")
+                        if not finish_reason:
+                            finish_reason = _get_finish_reason(item)
                     elif isinstance(item, str):
                         parts.append(
                             {
                                 "type": "text",
                                 "content": (
-                                    "readacted"
+                                    "redacted"
                                     if not self.include_sensitive_data
                                     else item
                                 ),
@@ -1002,7 +1114,7 @@ def _normalize_output_messages_to_role_parts(
                             {
                                 "type": "text",
                                 "content": (
-                                    "readacted"
+                                    "redacted"
                                     if not self.include_sensitive_data
                                     else str(item)
                                 ),
@@ -1011,10 +1123,9 @@ def _normalize_output_messages_to_role_parts(
 
         # Build assistant message
         msg: dict[str, Any] = {"role": "assistant", "parts": parts}
-        if finish_reason:
-            msg["finish_reason"] = finish_reason
         # Only include if there is content
         if parts:
+            msg["finish_reason"] = finish_reason or _DEFAULT_FINISH_REASON
             messages.append(msg)
         return messages
 
@@ -1035,10 +1146,10 @@ def _build_content_payload(self, span: Span[Any]) -> ContentPayload:
         )
         capture_tools = self._content_mode.capture_in_span or (
             self._content_mode.capture_in_event
-            and _is_instance_of(span_data, FunctionSpanData)
+            and isinstance(span_data, FunctionSpanData)
         )
 
-        if _is_instance_of(span_data, GenerationSpanData):
+        if isinstance(span_data, GenerationSpanData):
             span_input = getattr(span_data, "input", None)
             if capture_messages and span_input:
                 payload.input_messages = (
@@ -1058,7 +1169,7 @@ def _build_content_payload(self, span: Span[Any]) -> ContentPayload:
                 if normalized_out:
                     payload.output_messages = normalized_out
 
-        elif _is_instance_of(span_data, ResponseSpanData):
+        elif isinstance(span_data, ResponseSpanData):
             span_input = getattr(span_data, "input", None)
             response_obj = getattr(span_data, "response", None)
             if capture_messages and span_input:
@@ -1085,7 +1196,7 @@ def _build_content_payload(self, span: Span[Any]) -> ContentPayload:
                 if normalized_out:
                     payload.output_messages = normalized_out
 
-        elif _is_instance_of(span_data, FunctionSpanData) and capture_tools:
+        elif isinstance(span_data, FunctionSpanData) and capture_tools:
 
             def _serialize_tool_value(value: Any) -> Optional[str]:
                 if value is None:
@@ -1157,20 +1268,22 @@ def _update_agent_aggregate(
 
     def _infer_output_type(self, span_data: Any) -> str:
         """Infer gen_ai.output.type for multiple span kinds."""
-        if _is_instance_of(span_data, FunctionSpanData):
+        if isinstance(span_data, FunctionSpanData):
             # Tool results are typically JSON
             return GenAIOutputType.JSON
-        if _is_instance_of(span_data, TranscriptionSpanData):
+        if isinstance(span_data, MCPListToolsSpanData):
+            return GenAIOutputType.JSON
+        if isinstance(span_data, TranscriptionSpanData):
             return GenAIOutputType.TEXT
-        if _is_instance_of(span_data, SpeechSpanData):
+        if isinstance(span_data, (SpeechGroupSpanData, SpeechSpanData)):
             return GenAIOutputType.SPEECH
-        if _is_instance_of(span_data, GuardrailSpanData):
+        if isinstance(span_data, GuardrailSpanData):
             return GenAIOutputType.TEXT
-        if _is_instance_of(span_data, HandoffSpanData):
+        if isinstance(span_data, HandoffSpanData):
             return GenAIOutputType.TEXT
 
         # Check for embeddings operation
-        if _is_instance_of(span_data, GenerationSpanData):
+        if isinstance(span_data, GenerationSpanData):
             if hasattr(span_data, "embedding_dimension"):
                 return (
                     GenAIOutputType.TEXT
@@ -1272,9 +1385,9 @@ def _sanitize_usage_payload(usage: Any) -> None:
 
     def _get_span_kind(self, span_data: Any) -> SpanKind:
         """Determine appropriate span kind based on span data type."""
-        if _is_instance_of(span_data, FunctionSpanData):
+        if isinstance(span_data, FunctionSpanData):
             return SpanKind.INTERNAL  # Tool execution is internal
-        if _is_instance_of(
+        if isinstance(
             span_data,
             (
                 GenerationSpanData,
@@ -1284,19 +1397,72 @@ def _get_span_kind(self, span_data: Any) -> SpanKind:
             ),
         ):
             return SpanKind.CLIENT  # API calls to model providers
-        if _is_instance_of(span_data, AgentSpanData):
+        if isinstance(span_data, AgentSpanData):
             return SpanKind.CLIENT
-        if _is_instance_of(span_data, (GuardrailSpanData, HandoffSpanData)):
+        if isinstance(
+            span_data,
+            (
+                GuardrailSpanData,
+                HandoffSpanData,
+                MCPListToolsSpanData,
+                SpeechGroupSpanData,
+                TaskSpanData,
+                TurnSpanData,
+            ),
+        ):
             return SpanKind.INTERNAL  # Agent operations are internal
         return SpanKind.INTERNAL
 
+    def _get_agent_name_for_span(self, span_data: Any) -> Optional[str]:
+        """Return the best available agent label for span naming."""
+        if self.agent_name:
+            return self.agent_name
+        if isinstance(span_data, AgentSpanData):
+            return getattr(span_data, "name", None)
+        if isinstance(span_data, TaskSpanData):
+            return getattr(span_data, "name", None)
+        if isinstance(span_data, TurnSpanData):
+            return getattr(span_data, "agent_name", None)
+        return self._agent_name_default
+
+    def _get_tool_name_for_span(self, span_data: Any) -> Optional[str]:
+        """Return the best available tool label for span naming."""
+        if isinstance(span_data, FunctionSpanData):
+            return getattr(span_data, "name", None)
+        if isinstance(span_data, MCPListToolsSpanData):
+            return "list_tools"
+        return None
+
+    def _get_span_display_name(
+        self,
+        span_data: Any,
+        operation_name: Optional[str],
+        model: Optional[str],
+    ) -> str:
+        """Return the OTel span name for known and custom Agents spans."""
+        if operation_name:
+            return get_span_name(
+                operation_name,
+                model,
+                self._get_agent_name_for_span(span_data),
+                self._get_tool_name_for_span(span_data),
+            )
+
+        name = getattr(span_data, "name", None)
+        if isinstance(name, str) and name:
+            return name
+
+        span_type = getattr(span_data, "type", None)
+        if isinstance(span_type, str) and span_type:
+            return span_type
+
+        return "span"
+
     def on_trace_start(self, trace: Trace) -> None:
         """Create root span when trace starts."""
         if self._tracer:
             attributes = {
                 GEN_AI_PROVIDER_NAME: self.system_name,
-                GEN_AI_SYSTEM_KEY: self.system_name,
-                GEN_AI_OPERATION_NAME: GenAIOperationName.INVOKE_AGENT,
             }
             # Legacy emission removed
 
@@ -1319,8 +1485,6 @@ def on_trace_start(self, trace: Trace) -> None:
     def on_trace_end(self, trace: Trace) -> None:
         """End root span when trace ends."""
         if root_span := self._root_spans.pop(trace.trace_id, None):
-            if root_span.is_recording():
-                root_span.set_status(Status(StatusCode.OK))
             root_span.end()
         self._cleanup_spans_for_trace(trace.trace_id)
 
@@ -1331,7 +1495,7 @@ def on_span_start(self, span: Span[Any]) -> None:
 
         self._span_parents[span.span_id] = span.parent_id
         if (
-            _is_instance_of(span.span_data, AgentSpanData)
+            isinstance(span.span_data, AgentSpanData)
             and span.span_id not in self._agent_content
         ):
             self._agent_content[span.span_id] = {
@@ -1355,27 +1519,16 @@ def on_span_start(self, span: Span[Any]) -> None:
             response_obj = getattr(span.span_data, "response", None)
             model = getattr(response_obj, "model", None)
 
-        # Use configured agent name or get from span data
-        agent_name = self.agent_name
-        if not agent_name and _is_instance_of(span.span_data, AgentSpanData):
-            agent_name = getattr(span.span_data, "name", None)
-        if not agent_name:
-            agent_name = self._agent_name_default
-
-        tool_name = (
-            getattr(span.span_data, "name", None)
-            if _is_instance_of(span.span_data, FunctionSpanData)
-            else None
-        )
-
         # Generate spec-compliant span name
-        span_name = get_span_name(operation_name, model, agent_name, tool_name)
+        span_name = self._get_span_display_name(
+            span.span_data, operation_name, model
+        )
 
         attributes = {
             GEN_AI_PROVIDER_NAME: self.system_name,
-            GEN_AI_SYSTEM_KEY: self.system_name,
-            GEN_AI_OPERATION_NAME: operation_name,
         }
+        if operation_name:
+            attributes[GEN_AI_OPERATION_NAME] = operation_name
         # Legacy emission removed
 
         # Add configured agent and server attributes
@@ -1410,7 +1563,7 @@ def on_span_end(self, span: Span[Any]) -> None:
         self._update_agent_aggregate(span, payload)
         agent_content = (
             self._agent_content.get(span.span_id)
-            if _is_instance_of(span.span_data, AgentSpanData)
+            if isinstance(span.span_data, AgentSpanData)
             else None
         )
 
@@ -1432,7 +1585,7 @@ def on_span_end(self, span: Span[Any]) -> None:
                     span.span_id,
                     e,
                 )
-            if _is_instance_of(span.span_data, AgentSpanData):
+            if isinstance(span.span_data, AgentSpanData):
                 self._agent_content.pop(span.span_id, None)
             self._span_parents.pop(span.span_id, None)
             return
@@ -1450,7 +1603,7 @@ def on_span_end(self, span: Span[Any]) -> None:
                 otel_span.set_attribute(key, value)
                 attributes[key] = value
 
-            if _is_instance_of(
+            if isinstance(
                 span.span_data, (GenerationSpanData, ResponseSpanData)
             ):
                 operation_name = attributes.get(GEN_AI_OPERATION_NAME)
@@ -1474,7 +1627,9 @@ def on_span_end(self, span: Span[Any]) -> None:
 
             # Emit operation details event if configured
             # Set error status if applicable
-            otel_span.set_status(status=_get_span_status(span))
+            status = _get_span_status(span)
+            if status is not None:
+                otel_span.set_status(status=status)
             if getattr(span, "error", None):
                 err_obj = span.error
                 err_type = err_obj.get("type") or err_obj.get("name")
@@ -1492,7 +1647,7 @@ def on_span_end(self, span: Span[Any]) -> None:
             otel_span.set_status(Status(StatusCode.ERROR, str(e)))
             otel_span.end()
         finally:
-            if _is_instance_of(span.span_data, AgentSpanData):
+            if isinstance(span.span_data, AgentSpanData):
                 self._agent_content.pop(span.span_id, None)
             self._span_parents.pop(span.span_id, None)
 
@@ -1520,9 +1675,9 @@ def force_flush(self) -> None:
         """Force flush (no-op for this processor)."""
         pass
 
-    def _get_operation_name(self, span_data: Any) -> str:
+    def _get_operation_name(self, span_data: Any) -> Optional[str]:
         """Determine operation name from span data type."""
-        if _is_instance_of(span_data, GenerationSpanData):
+        if isinstance(span_data, GenerationSpanData):
             # Check if it's embeddings
             if hasattr(span_data, "embedding_dimension"):
                 return GenAIOperationName.EMBEDDINGS
@@ -1532,23 +1687,21 @@ def _get_operation_name(self, span_data: Any) -> str:
                 if isinstance(first_input, dict) and "role" in first_input:
                     return GenAIOperationName.CHAT
             return GenAIOperationName.TEXT_COMPLETION
-        if _is_instance_of(span_data, AgentSpanData):
+        if isinstance(span_data, AgentSpanData):
             # The OpenAI Agents SDK AgentSpanData has no "operation" field;
             # agent spans always represent invoke_agent.
             return GenAIOperationName.INVOKE_AGENT
-        if _is_instance_of(span_data, FunctionSpanData):
+        if isinstance(span_data, FunctionSpanData):
             return GenAIOperationName.EXECUTE_TOOL
-        if _is_instance_of(span_data, ResponseSpanData):
+        if isinstance(span_data, ResponseSpanData):
             return GenAIOperationName.CHAT  # Response typically from chat
-        if _is_instance_of(span_data, TranscriptionSpanData):
+        if isinstance(span_data, TranscriptionSpanData):
             return GenAIOperationName.TRANSCRIPTION
-        if _is_instance_of(span_data, SpeechSpanData):
+        if isinstance(span_data, SpeechSpanData):
             return GenAIOperationName.SPEECH
-        if _is_instance_of(span_data, GuardrailSpanData):
+        if isinstance(span_data, GuardrailSpanData):
             return GenAIOperationName.GUARDRAIL
-        if _is_instance_of(span_data, HandoffSpanData):
-            return GenAIOperationName.HANDOFF
-        return "unknown"
+        return None
 
     def _extract_genai_attributes(
         self,
@@ -1561,7 +1714,6 @@ def _extract_genai_attributes(
 
         # Base attributes
         yield GEN_AI_PROVIDER_NAME, self.system_name
-        yield GEN_AI_SYSTEM_KEY, self.system_name
         # Legacy emission removed
 
         # Add configured agent attributes (always include when set)
@@ -1582,32 +1734,72 @@ def _extract_genai_attributes(
             yield key, value
 
         # Process different span types
-        if _is_instance_of(span_data, GenerationSpanData):
+        if isinstance(span_data, GenerationSpanData):
             yield from self._get_attributes_from_generation_span_data(
                 span_data, payload
             )
-        elif _is_instance_of(span_data, AgentSpanData):
+        elif isinstance(span_data, AgentSpanData):
             yield from self._get_attributes_from_agent_span_data(
                 span_data, agent_content
             )
-        elif _is_instance_of(span_data, FunctionSpanData):
+        elif isinstance(span_data, TaskSpanData):
+            yield from self._get_attributes_from_task_span_data(span_data)
+        elif isinstance(span_data, TurnSpanData):
+            yield from self._get_attributes_from_turn_span_data(span_data)
+        elif isinstance(span_data, CustomSpanData):
+            yield from self._get_attributes_from_custom_span_data(span_data)
+        elif isinstance(span_data, FunctionSpanData):
             yield from self._get_attributes_from_function_span_data(
-                span_data, payload
+                span_data, payload, span.span_id
             )
-        elif _is_instance_of(span_data, ResponseSpanData):
+        elif isinstance(span_data, MCPListToolsSpanData):
+            yield from self._get_attributes_from_mcp_tools_span_data(span_data)
+        elif isinstance(span_data, ResponseSpanData):
             yield from self._get_attributes_from_response_span_data(
                 span_data, payload
             )
-        elif _is_instance_of(span_data, TranscriptionSpanData):
+        elif isinstance(span_data, TranscriptionSpanData):
             yield from self._get_attributes_from_transcription_span_data(
                 span_data
             )
-        elif _is_instance_of(span_data, SpeechSpanData):
+        elif isinstance(span_data, SpeechSpanData):
             yield from self._get_attributes_from_speech_span_data(span_data)
-        elif _is_instance_of(span_data, GuardrailSpanData):
+        elif isinstance(span_data, GuardrailSpanData):
             yield from self._get_attributes_from_guardrail_span_data(span_data)
-        elif _is_instance_of(span_data, HandoffSpanData):
+        elif isinstance(span_data, HandoffSpanData):
             yield from self._get_attributes_from_handoff_span_data(span_data)
+        elif isinstance(span_data, SpeechGroupSpanData):
+            yield from self._get_attributes_from_speech_group_span_data(
+                span_data
+            )
+
+    def _get_usage_attributes(
+        self, usage: Any
+    ) -> Iterator[tuple[str, AttributeValue]]:
+        """Extract token usage attributes from dict or object payloads."""
+        if not usage:
+            return
+
+        self._sanitize_usage_payload(usage)
+        if isinstance(usage, dict):
+            input_tokens = usage.get("prompt_tokens") or usage.get(
+                "input_tokens"
+            )
+            output_tokens = usage.get("completion_tokens") or usage.get(
+                "output_tokens"
+            )
+        else:
+            input_tokens = getattr(usage, "input_tokens", None)
+            if input_tokens is None:
+                input_tokens = getattr(usage, "prompt_tokens", None)
+            output_tokens = getattr(usage, "output_tokens", None)
+            if output_tokens is None:
+                output_tokens = getattr(usage, "completion_tokens", None)
+
+        if input_tokens is not None:
+            yield GEN_AI_USAGE_INPUT_TOKENS, input_tokens
+        if output_tokens is not None:
+            yield GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens
 
     def _get_attributes_from_generation_span_data(
         self, span_data: GenerationSpanData, payload: ContentPayload
@@ -1632,17 +1824,12 @@ def _get_attributes_from_generation_span_data(
         if hasattr(span_data, "data_source_id"):
             yield GEN_AI_DATA_SOURCE_ID, span_data.data_source_id
 
-        finish_reasons: list[Any] = []
-        if span_data.output:
-            for part in span_data.output:
-                if isinstance(part, dict):
-                    fr = part.get("finish_reason") or part.get("stop_reason")
-                else:
-                    fr = getattr(part, "finish_reason", None)
-                if fr:
-                    finish_reasons.append(
-                        fr if isinstance(fr, str) else str(fr)
-                    )
+        finish_reasons = _get_finish_reasons_from_sequence(span_data.output)
+        if (
+            not finish_reasons
+            and operation_name != GenAIOperationName.EMBEDDINGS
+        ):
+            finish_reasons = [_DEFAULT_FINISH_REASON]
         if finish_reasons:
             yield GEN_AI_RESPONSE_FINISH_REASONS, finish_reasons
 
@@ -1815,7 +2002,7 @@ def _is_placeholder_message(self, message: Any) -> bool:
             if (
                 not isinstance(part, dict)
                 or part.get("type") != "text"
-                or part.get("content") != "readacted"
+                or part.get("content") != "redacted"
             ):
                 return False
         return True
@@ -1924,8 +2111,58 @@ def _get_attributes_from_agent_span_data(
             normalize_output_type(self._infer_output_type(span_data)),
         )
 
+    def _get_attributes_from_task_span_data(
+        self, span_data: TaskSpanData
+    ) -> Iterator[tuple[str, AttributeValue]]:
+        """Extract attributes from an Agents SDK task span."""
+        yield from self._get_usage_attributes(
+            getattr(span_data, "usage", None)
+        )
+        yield (
+            GEN_AI_OUTPUT_TYPE,
+            normalize_output_type(self._infer_output_type(span_data)),
+        )
+
+    def _get_attributes_from_turn_span_data(
+        self, span_data: TurnSpanData
+    ) -> Iterator[tuple[str, AttributeValue]]:
+        """Extract attributes from an Agents SDK turn span."""
+        agent_name = getattr(span_data, "agent_name", None)
+        if agent_name:
+            yield GEN_AI_AGENT_NAME, agent_name
+        yield from self._get_usage_attributes(
+            getattr(span_data, "usage", None)
+        )
+        yield (
+            GEN_AI_OUTPUT_TYPE,
+            normalize_output_type(self._infer_output_type(span_data)),
+        )
+
+    def _get_attributes_from_custom_span_data(
+        self, span_data: CustomSpanData
+    ) -> Iterator[tuple[str, AttributeValue]]:
+        """Extract attributes from custom Agents SDK span data."""
+        custom_data = getattr(span_data, "data", None)
+        if isinstance(custom_data, MappingABC):
+            for key, value in custom_data.items():
+                if not isinstance(key, str) or not key:
+                    continue
+                if _is_reserved_custom_attribute_key(key):
+                    continue
+                attr_value = _coerce_custom_attribute_value(value)
+                if attr_value is not None:
+                    yield key, attr_value
+
+        yield (
+            GEN_AI_OUTPUT_TYPE,
+            normalize_output_type(self._infer_output_type(span_data)),
+        )
+
     def _get_attributes_from_function_span_data(
-        self, span_data: FunctionSpanData, payload: ContentPayload
+        self,
+        span_data: FunctionSpanData,
+        payload: ContentPayload,
+        span_id: Optional[str] = None,
     ) -> Iterator[tuple[str, AttributeValue]]:
         """Extract attributes from function/tool span."""
         yield GEN_AI_OPERATION_NAME, GenAIOperationName.EXECUTE_TOOL
@@ -1939,8 +2176,9 @@ def _get_attributes_from_function_span_data(
             tool_type = span_data.tool_type
         yield GEN_AI_TOOL_TYPE, validate_tool_type(tool_type)
 
-        if hasattr(span_data, "call_id") and span_data.call_id:
-            yield GEN_AI_TOOL_CALL_ID, span_data.call_id
+        call_id = getattr(span_data, "call_id", None) or span_id
+        if call_id:
+            yield GEN_AI_TOOL_CALL_ID, call_id
         if hasattr(span_data, "description") and span_data.description:
             yield GEN_AI_TOOL_DESCRIPTION, span_data.description
 
@@ -1973,6 +2211,30 @@ def _get_attributes_from_function_span_data(
             normalize_output_type(self._infer_output_type(span_data)),
         )
 
+    def _get_attributes_from_mcp_tools_span_data(
+        self, span_data: MCPListToolsSpanData
+    ) -> Iterator[tuple[str, AttributeValue]]:
+        """Extract attributes from MCP list-tools spans."""
+        yield GEN_AI_TOOL_NAME, "list_tools"
+        yield GEN_AI_TOOL_TYPE, GenAIToolType.EXTENSION
+
+        server = getattr(span_data, "server", None)
+        if server:
+            yield GEN_AI_DATA_SOURCE_ID, server
+
+        result = getattr(span_data, "result", None)
+        if (
+            result is not None
+            and self.include_sensitive_data
+            and self._content_mode.capture_in_span
+        ):
+            yield GEN_AI_TOOL_CALL_RESULT, safe_json_dumps(result)
+
+        yield (
+            GEN_AI_OUTPUT_TYPE,
+            normalize_output_type(self._infer_output_type(span_data)),
+        )
+
     def _get_attributes_from_response_span_data(
         self, span_data: ResponseSpanData, payload: ContentPayload
     ) -> Iterator[tuple[str, AttributeValue]]:
@@ -1994,20 +2256,10 @@ def _get_attributes_from_response_span_data(
                     yield GEN_AI_REQUEST_MODEL, span_data.response.model
 
             # Finish reasons
-            finish_reasons = []
-            if (
-                hasattr(span_data.response, "output")
-                and span_data.response.output
-            ):
-                for part in span_data.response.output:
-                    if isinstance(part, dict):
-                        fr = part.get("finish_reason") or part.get(
-                            "stop_reason"
-                        )
-                    else:
-                        fr = getattr(part, "finish_reason", None)
-                    if fr:
-                        finish_reasons.append(fr)
+            response_output = getattr(span_data.response, "output", None)
+            finish_reasons = _get_finish_reasons_from_sequence(response_output)
+            if not finish_reasons:
+                finish_reasons = [_DEFAULT_FINISH_REASON]
             if finish_reasons:
                 yield GEN_AI_RESPONSE_FINISH_REASONS, finish_reasons
 
@@ -2160,14 +2412,15 @@ def _get_attributes_from_handoff_span_data(
         self, span_data: HandoffSpanData
     ) -> Iterator[tuple[str, AttributeValue]]:
         """Extract attributes from handoff span."""
-        yield GEN_AI_OPERATION_NAME, GenAIOperationName.HANDOFF
-
-        if span_data.from_agent:
-            yield GEN_AI_HANDOFF_FROM_AGENT, span_data.from_agent
-
-        if span_data.to_agent:
-            yield GEN_AI_HANDOFF_TO_AGENT, span_data.to_agent
+        yield (
+            GEN_AI_OUTPUT_TYPE,
+            normalize_output_type(self._infer_output_type(span_data)),
+        )
 
+    def _get_attributes_from_speech_group_span_data(
+        self, span_data: SpeechGroupSpanData
+    ) -> Iterator[tuple[str, AttributeValue]]:
+        """Extract attributes from a speech group span."""
         yield (
             GEN_AI_OUTPUT_TYPE,
             normalize_output_type(self._infer_output_type(span_data)),
diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/requirements.oldest.txt b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/requirements.oldest.txt
index 6e69dd5c..21942e43 100644
--- a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/requirements.oldest.txt
+++ b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/requirements.oldest.txt
@@ -15,7 +15,7 @@
 # This variant of the requirements aims to test the system using
 # the oldest supported version of external dependencies.
 
-openai-agents==0.3.3
+openai-agents==0.17.0
 pydantic>=2.10,<3
 httpx==0.27.2
 Deprecated==1.2.14
diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_conformance.py b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_conformance.py
index 6a38496b..1a26e1e3 100644
--- a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_conformance.py
+++ b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_conformance.py
@@ -25,18 +25,7 @@
 
 @pytest.mark.parametrize(
     "scenario",
-    [
-        pytest.param(
-            OrchestrationScenario(),
-            marks=pytest.mark.skip(
-                reason=(
-                    "openai-agents instrumentation has multiple semconv gaps "
-                    "surfaced by this scenario; tracked in "
-                    "https://github.com/open-telemetry/opentelemetry-python-genai/issues/86"
-                )
-            ),
-        ),
-    ],
+    [OrchestrationScenario()],
     ids=lambda s: type(s).__name__,
 )
 def test_conformance(
diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_tracer.py b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_tracer.py
index 72379388..5ad48a75 100644
--- a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_tracer.py
+++ b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_tracer.py
@@ -6,16 +6,20 @@
 from __future__ import annotations
 
 import json
+from enum import Enum
 from types import SimpleNamespace
 from typing import Any
 
 import agents.tracing as agents_tracing
 from agents.tracing import (
     agent_span,
+    custom_span,
     function_span,
     generation_span,
+    mcp_tools_span,
     response_span,
     set_trace_processors,
+    speech_group_span,
     trace,
 )
 from openai.types.responses import FunctionTool  # noqa: E402
@@ -27,7 +31,12 @@
     ContentPayload,
     GenAISemanticProcessor,
 )
+from opentelemetry.sdk.metrics import MeterProvider  # noqa: E402
+from opentelemetry.sdk.metrics.export import InMemoryMetricReader  # noqa: E402
 from opentelemetry.sdk.trace import TracerProvider  # noqa: E402
+from opentelemetry.semconv._incubating.metrics import (  # noqa: E402
+    gen_ai_metrics,
+)
 
 try:
     from opentelemetry.sdk.trace.export import (  # type: ignore[attr-defined]
@@ -59,6 +68,10 @@
 GEN_AI_TOOL_DEFINITIONS = getattr(
     GenAI, "GEN_AI_TOOL_DEFINITIONS", "gen_ai.tool.definitions"
 )
+GEN_AI_DATA_SOURCE_ID = getattr(
+    GenAI, "GEN_AI_DATA_SOURCE_ID", "gen_ai.data_source.id"
+)
+GEN_AI_TOOL_CALL_RESULT = "gen_ai.tool.call.result"
 
 
 def _instrument_with_provider(**instrument_kwargs):
@@ -111,6 +124,181 @@ def test_generation_span_creates_client_span():
         exporter.clear()
 
 
+def test_response_span_string_input_records_single_user_message():
+    instrumentor, exporter = _instrument_with_provider(
+        include_sensitive_data=True
+    )
+
+    class _Response:
+        def __init__(self) -> None:
+            self.id = "resp-456"
+            self.model = "gpt-4o-mini"
+            self.output = []
+            self.usage = None
+            self.tools = []
+
+    try:
+        provider = agents_tracing.get_trace_provider()
+        with trace("workflow") as workflow:
+            response_span_obj = provider.create_span(
+                agents_tracing.ResponseSpanData(
+                    input="single user prompt",
+                    response=_Response(),
+                ),
+                parent=workflow,
+            )
+            response_span_obj.start()
+            response_span_obj.finish()
+
+        spans = exporter.get_finished_spans()
+        response = next(
+            span
+            for span in spans
+            if span.attributes.get(GenAI.GEN_AI_RESPONSE_ID) == "resp-456"
+        )
+
+        prompt = json.loads(response.attributes[GEN_AI_INPUT_MESSAGES])
+        assert prompt == [
+            {
+                "role": "user",
+                "parts": [{"type": "text", "content": "single user prompt"}],
+            }
+        ]
+        assert response.attributes[GenAI.GEN_AI_RESPONSE_FINISH_REASONS] == (
+            "unknown",
+        )
+    finally:
+        instrumentor.uninstrument()
+        exporter.clear()
+
+
+def _assert_current_sdk_non_genai_spans(spans):
+    task = next(span for span in spans if span.name == "Agent workflow")
+    assert GenAI.GEN_AI_OPERATION_NAME not in task.attributes
+    assert task.attributes[GenAI.GEN_AI_USAGE_INPUT_TOKENS] == 20
+    assert task.attributes[GenAI.GEN_AI_USAGE_OUTPUT_TOKENS] == 6
+
+    turn = next(span for span in spans if span.name == "turn")
+    assert GenAI.GEN_AI_OPERATION_NAME not in turn.attributes
+    assert turn.attributes[GenAI.GEN_AI_AGENT_NAME] == "Support Agent"
+    assert turn.attributes[GenAI.GEN_AI_USAGE_INPUT_TOKENS] == 8
+    assert turn.attributes[GenAI.GEN_AI_USAGE_OUTPUT_TOKENS] == 4
+
+    mcp = next(span for span in spans if span.name == "mcp_tools")
+    assert GenAI.GEN_AI_OPERATION_NAME not in mcp.attributes
+    assert mcp.attributes[GenAI.GEN_AI_TOOL_NAME] == "list_tools"
+    assert mcp.attributes[GenAI.GEN_AI_TOOL_TYPE] == "extension"
+    assert mcp.attributes[GEN_AI_DATA_SOURCE_ID] == "filesystem"
+    assert json.loads(mcp.attributes[GEN_AI_TOOL_CALL_RESULT]) == [
+        "read_file",
+        "write_file",
+    ]
+
+    speech = next(span for span in spans if span.name == "speech_group")
+    assert GenAI.GEN_AI_OPERATION_NAME not in speech.attributes
+    assert speech.attributes[GenAI.GEN_AI_OUTPUT_TYPE] == "speech"
+
+    custom = next(span for span in spans if span.name == "application work")
+    assert GenAI.GEN_AI_OPERATION_NAME not in custom.attributes
+    assert custom.attributes["step"] == "local"
+
+
+def _assert_sandbox_custom_span_attributes(spans):
+    sandbox = next(span for span in spans if span.name == "sandbox.exec")
+    assert GenAI.GEN_AI_OPERATION_NAME not in sandbox.attributes
+    assert sandbox.attributes["sandbox.backend"] == "unix_local"
+    assert sandbox.attributes["sandbox.operation"] == "exec"
+    assert sandbox.attributes["sandbox.session.id"] == "session-123"
+    assert sandbox.attributes["exit_code"] == 0
+    assert sandbox.attributes["process.exit.code"] == 0
+    assert sandbox.attributes["error_code"] == "workspace_read_not_found"
+    assert sandbox.attributes["path.parts"] == ("case", "scenario.json")
+    assert json.loads(sandbox.attributes["nested"]) == {
+        "path": "missing.txt",
+        "error_code": "workspace_read_not_found",
+    }
+    assert "none" not in sandbox.attributes
+    assert GenAI.GEN_AI_OPERATION_NAME not in sandbox.attributes
+    assert "otel.status_code" not in sandbox.attributes
+    assert (
+        sandbox.attributes[ServerAttributes.SERVER_ADDRESS] == "api.openai.com"
+    )
+
+
+def test_current_agents_sdk_span_types_do_not_emit_unknown_operations():
+    class ErrorCode(Enum):
+        WORKSPACE_READ_NOT_FOUND = "workspace_read_not_found"
+
+    instrumentor, exporter = _instrument_with_provider()
+
+    try:
+        provider = agents_tracing.get_trace_provider()
+        with trace("workflow") as workflow:
+            task_span_obj = provider.create_span(
+                agents_tracing.TaskSpanData(
+                    name="Agent workflow",
+                    usage={"input_tokens": 20, "output_tokens": 6},
+                ),
+                parent=workflow,
+            )
+            task_span_obj.start()
+            task_span_obj.finish()
+
+            turn_span_obj = provider.create_span(
+                agents_tracing.TurnSpanData(
+                    turn=1,
+                    agent_name="Support Agent",
+                    usage={"input_tokens": 8, "output_tokens": 4},
+                ),
+                parent=workflow,
+            )
+            turn_span_obj.start()
+            turn_span_obj.finish()
+
+            with mcp_tools_span(
+                server="filesystem",
+                result=["read_file", "write_file"],
+            ):
+                pass
+            with speech_group_span(input="say hello"):
+                pass
+            with custom_span(name="application work", data={"step": "local"}):
+                pass
+            with custom_span(
+                name="sandbox.exec",
+                data={
+                    "sandbox.backend": "unix_local",
+                    "sandbox.operation": "exec",
+                    "sandbox.session.id": "session-123",
+                    "exit_code": 0,
+                    "process.exit.code": 0,
+                    "error_code": ErrorCode.WORKSPACE_READ_NOT_FOUND,
+                    "path.parts": ["case", "scenario.json"],
+                    "nested": {
+                        "path": "missing.txt",
+                        "error_code": ErrorCode.WORKSPACE_READ_NOT_FOUND,
+                    },
+                    "none": None,
+                    "gen_ai.operation.name": "unknown",
+                    "otel.status_code": "ERROR",
+                    "server.address": "malicious.example",
+                },
+            ):
+                pass
+
+        spans = exporter.get_finished_spans()
+        assert all(
+            span.attributes.get(GenAI.GEN_AI_OPERATION_NAME) != "unknown"
+            for span in spans
+        )
+
+        _assert_current_sdk_non_genai_spans(spans)
+        _assert_sandbox_custom_span_attributes(spans)
+    finally:
+        instrumentor.uninstrument()
+        exporter.clear()
+
+
 def test_generation_span_without_roles_uses_text_completion():
     instrumentor, exporter = _instrument_with_provider()
 
@@ -161,6 +349,7 @@ def test_function_span_records_tool_attributes():
             tool_span.attributes[GenAI.GEN_AI_OPERATION_NAME] == "execute_tool"
         )
         assert tool_span.attributes[GenAI.GEN_AI_TOOL_NAME] == "fetch_weather"
+        assert GenAI.GEN_AI_TOOL_CALL_ID in tool_span.attributes
         assert tool_span.attributes[GenAI.GEN_AI_TOOL_TYPE] == "function"
         assert tool_span.attributes[GEN_AI_PROVIDER_NAME] == "openai"
     finally:
@@ -198,10 +387,38 @@ def test_agent_invoke_span_records_attributes():
         exporter.clear()
 
 
+def test_handoff_span_does_not_emit_undocumented_genai_operation():
+    instrumentor, exporter = _instrument_with_provider()
+
+    try:
+        provider = agents_tracing.get_trace_provider()
+        with trace("workflow") as workflow:
+            handoff = provider.create_span(
+                agents_tracing.HandoffSpanData(
+                    from_agent="triage", to_agent="weather_specialist"
+                ),
+                parent=workflow,
+            )
+            handoff.start()
+            handoff.finish()
+
+        handoff_span = next(
+            span
+            for span in exporter.get_finished_spans()
+            if span.name == "handoff"
+        )
+        assert GenAI.GEN_AI_OPERATION_NAME not in handoff_span.attributes
+        assert "gen_ai.handoff.from_agent" not in handoff_span.attributes
+        assert "gen_ai.handoff.to_agent" not in handoff_span.attributes
+    finally:
+        instrumentor.uninstrument()
+        exporter.clear()
+
+
 def _placeholder_message() -> dict[str, Any]:
     return {
         "role": "user",
-        "parts": [{"type": "text", "content": "readacted"}],
+        "parts": [{"type": "text", "content": "redacted"}],
     }
 
 
@@ -237,7 +454,7 @@ def test_agent_content_aggregation_skips_duplicate_snapshots():
             {"role": "user", "parts": [{"type": "text", "content": "hello"}]},
             {
                 "role": "user",
-                "parts": [{"type": "text", "content": "readacted"}],
+                "parts": [{"type": "text", "content": "redacted"}],
             },
         ]
     )
@@ -397,6 +614,7 @@ def test_agent_span_collects_child_messages():
             {
                 "role": "assistant",
                 "parts": [{"type": "text", "content": "hello"}],
+                "finish_reason": "unknown",
             }
         ]
 
@@ -549,3 +767,35 @@ def __init__(self) -> None:
     finally:
         instrumentor.uninstrument()
         exporter.clear()
+
+
+def test_metrics_are_emitted_with_configured_meter_provider():
+    metric_reader = InMemoryMetricReader()
+    meter_provider = MeterProvider(metric_readers=[metric_reader])
+    instrumentor, exporter = _instrument_with_provider(
+        meter_provider=meter_provider
+    )
+
+    try:
+        with trace("workflow"):
+            with generation_span(
+                input=[{"role": "user", "content": "hi"}],
+                output=[{"type": "text", "content": "hello"}],
+                model="gpt-4o-mini",
+                usage={"input_tokens": 12, "output_tokens": 3},
+            ):
+                pass
+
+        metrics_data = metric_reader.get_metrics_data()
+        metrics = {
+            metric.name: metric
+            for resource_metric in metrics_data.resource_metrics
+            for scope_metric in resource_metric.scope_metrics
+            for metric in scope_metric.metrics
+        }
+        assert gen_ai_metrics.GEN_AI_CLIENT_OPERATION_DURATION in metrics
+        assert gen_ai_metrics.GEN_AI_CLIENT_TOKEN_USAGE in metrics
+    finally:
+        instrumentor.uninstrument()
+        exporter.clear()
+        meter_provider.shutdown()
diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_z_span_processor_unit.py b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_z_span_processor_unit.py
index 0a2d2cda..58d46e1f 100644
--- a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_z_span_processor_unit.py
+++ b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_z_span_processor_unit.py
@@ -18,6 +18,7 @@
     AgentSpanData,
     FunctionSpanData,
     GenerationSpanData,
+    HandoffSpanData,
     ResponseSpanData,
 )
 
@@ -184,11 +185,18 @@ def test_operation_and_span_naming(processor_setup):
         == sp.GenAIOperationName.CHAT
     )
 
+    assert (
+        processor._get_operation_name(
+            HandoffSpanData(from_agent=None, to_agent=None)
+        )
+        is None
+    )
+
     class UnknownSpanData:
         pass
 
     unknown = UnknownSpanData()
-    assert processor._get_operation_name(unknown) == "unknown"
+    assert processor._get_operation_name(unknown) is None
 
     assert processor._get_span_kind(GenerationSpanData()) is SpanKind.CLIENT
     assert (
@@ -232,6 +240,7 @@ def test_attribute_builders(processor_setup):
             {
                 "role": "assistant",
                 "parts": [{"type": "text", "content": "hello"}],
+                "finish_reason": "stop",
             }
         ],
         system_instructions=[{"type": "text", "content": "be helpful"}],
@@ -368,6 +377,47 @@ def __init__(self) -> None:
     assert function_attrs[sp.GEN_AI_TOOL_CALL_RESULT] == {"temperature": 70}
     assert function_attrs[sp.GEN_AI_OUTPUT_TYPE] == sp.GenAIOutputType.JSON
 
+    generation_no_finish_reason = GenerationSpanData(
+        input=[{"role": "user"}],
+        output=[{"type": "text", "content": "hello"}],
+    )
+    generation_no_finish_attrs = _collect(
+        processor._get_attributes_from_generation_span_data(
+            generation_no_finish_reason, sp.ContentPayload()
+        )
+    )
+    assert generation_no_finish_attrs[sp.GEN_AI_RESPONSE_FINISH_REASONS] == [
+        "unknown"
+    ]
+    assert processor._normalize_output_messages_to_role_parts(
+        generation_no_finish_reason
+    ) == [
+        {
+            "role": "assistant",
+            "parts": [{"type": "text", "content": "hello"}],
+            "finish_reason": "unknown",
+        }
+    ]
+
+    function_without_call_id = FunctionSpanData(
+        name="lookup_weather", input=None, output=None
+    )
+    function_attrs_with_fallback = _collect(
+        processor._get_attributes_from_function_span_data(
+            function_without_call_id, sp.ContentPayload(), "span-42"
+        )
+    )
+    assert function_attrs_with_fallback[sp.GEN_AI_TOOL_CALL_ID] == "span-42"
+
+    handoff_attrs = _collect(
+        processor._get_attributes_from_handoff_span_data(
+            HandoffSpanData(from_agent="triage", to_agent="weather")
+        )
+    )
+    assert sp.GEN_AI_OPERATION_NAME not in handoff_attrs
+    assert "gen_ai.handoff.from_agent" not in handoff_attrs
+    assert "gen_ai.handoff.to_agent" not in handoff_attrs
+
 
 def test_extract_genai_attributes_unknown_type(processor_setup):
     processor, _ = processor_setup
@@ -385,7 +435,6 @@ def __init__(self) -> None:
         )
     )
     assert attrs[sp.GEN_AI_PROVIDER_NAME] == "openai"
-    assert attrs[sp.GEN_AI_SYSTEM_KEY] == "openai"
     assert sp.GEN_AI_OPERATION_NAME not in attrs
 
 
@@ -396,8 +445,7 @@ def test_span_status_helper():
     assert status.status_code is StatusCode.ERROR
     assert status.description == "boom: bad"
 
-    ok_status = sp._get_span_status(SimpleNamespace(error=None))
-    assert ok_status.status_code is StatusCode.OK
+    assert sp._get_span_status(SimpleNamespace(error=None)) is None
 
 
 @dataclass
@@ -487,8 +535,8 @@ def test_span_lifecycle_and_shutdown(processor_setup):
         statuses["execute_tool lookup"].status_code is StatusCode.ERROR
         and statuses["execute_tool lookup"].description == "boom: bad"
     )
-    assert statuses["invoke_agent agent"].status_code is StatusCode.OK
-    assert statuses["workflow"].status_code is StatusCode.OK
+    assert statuses["invoke_agent agent"].status_code is StatusCode.UNSET
+    assert statuses["workflow"].status_code is StatusCode.UNSET
     assert (
         statuses["invoke_agent"].status_code is StatusCode.ERROR
         and statuses["invoke_agent"].description == "Application shutdown"
@@ -498,10 +546,7 @@ def test_span_lifecycle_and_shutdown(processor_setup):
         and statuses["linger"].description == "Application shutdown"
     )
     workflow_span = next(span for span in finished if span.name == "workflow")
-    assert (
-        workflow_span.attributes[sp.GEN_AI_OPERATION_NAME]
-        == sp.GenAIOperationName.INVOKE_AGENT
-    )
+    assert sp.GEN_AI_OPERATION_NAME not in workflow_span.attributes
 
 
 def test_chat_span_renamed_with_model(processor_setup):
diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_zz_coverage_improvements.py b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_zz_coverage_improvements.py
index d9880806..231af110 100644
--- a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_zz_coverage_improvements.py
+++ b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_zz_coverage_improvements.py
@@ -320,12 +320,12 @@ def test_normalize_output_type_returns_text_for_unknown(self):
 class TestGetSpanName:
     """Tests for get_span_name function."""
 
-    def test_span_name_handoff(self):
+    def test_span_name_unsupported_operation(self):
         sp, _ = _get_modules()
         name = sp.get_span_name("agent_handoff", agent_name="target_agent")
-        assert name == "agent_handoff target_agent"
+        assert name == "agent_handoff"
 
-    def test_span_name_handoff_no_agent(self):
+    def test_span_name_unsupported_operation_no_agent(self):
         sp, _ = _get_modules()
         name = sp.get_span_name("agent_handoff")
         assert name == "agent_handoff"
@@ -493,7 +493,7 @@ def test_normalize_with_redaction(self):
         processor = self._make_processor(sensitive=False)
         messages = [{"role": "user", "content": "Secret message"}]
         normalized = processor._normalize_messages_to_role_parts(messages)
-        assert normalized[0]["parts"][0]["content"] == "readacted"
+        assert normalized[0]["parts"][0]["content"] == "redacted"
 
     def test_normalize_empty_messages_returns_empty_list(self):
         processor = self._make_processor()
@@ -546,18 +546,18 @@ def test_normalize_parts_tool_calls_and_tool_responses(self):
         tool_call = next(
             p for p in normalized[0]["parts"] if p["type"] == "tool_call"
         )
-        assert tool_call["arguments"] == "readacted"
+        assert tool_call["arguments"] == "redacted"
 
         tool_response = next(
             p
             for p in normalized[0]["parts"]
             if p["type"] == "tool_call_response"
         )
-        assert tool_response["result"] == "readacted"
+        assert tool_response["result"] == "redacted"
 
         assert normalized[1]["role"] == "tool"
         assert normalized[1]["parts"][0]["type"] == "tool_call_response"
-        assert normalized[1]["parts"][0]["result"] == "readacted"
+        assert normalized[1]["parts"][0]["result"] == "redacted"
 
 
 # ============================================================================
@@ -568,17 +568,6 @@ def test_normalize_parts_tool_calls_and_tool_responses(self):
 class TestHelperFunctions:
     """Tests for various helper functions."""
 
-    def test_is_instance_of_single_class(self):
-        sp, _ = _get_modules()
-        assert sp._is_instance_of("hello", str) is True
-        assert sp._is_instance_of(123, str) is False
-
-    def test_is_instance_of_tuple_classes(self):
-        sp, _ = _get_modules()
-        assert sp._is_instance_of("hello", (str, int)) is True
-        assert sp._is_instance_of(123, (str, int)) is True
-        assert sp._is_instance_of(3.14, (str, int)) is False
-
     def test_span_status_helper(self):
         from types import SimpleNamespace
 
@@ -592,8 +581,7 @@ def test_span_status_helper(self):
         assert status.status_code is StatusCode.ERROR
         assert status.description == "boom: bad"
 
-        ok_status = sp._get_span_status(SimpleNamespace(error=None))
-        assert ok_status.status_code is StatusCode.OK
+        assert sp._get_span_status(SimpleNamespace(error=None)) is None
 
 
 # ============================================================================
@@ -695,7 +683,7 @@ class _Histogram:
             def __init__(self) -> None:
                 self.records = []
 
-            def record(self, value, attributes) -> None:
+            def record(self, value, attributes=None) -> None:
                 self.records.append((value, attributes))
 
         duration_histogram = _Histogram()
@@ -709,8 +697,8 @@ def record(self, value, attributes) -> None:
         processor._token_usage_histogram = token_histogram
 
         span = SimpleNamespace(
-            started_at="2024-01-01T00:00:00+00:00",
-            ended_at="2024-01-01T00:00:02+00:00",
+            started_at="2024-01-01T00:00:00Z",
+            ended_at="2024-01-01T00:00:02Z",
             error={"type": "timeout"},
         )
         attributes = {
diff --git a/uv.lock b/uv.lock
index 871731a0..808d708d 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1239,7 +1239,7 @@ instruments = [
 
 [package.metadata]
 requires-dist = [
-    { name = "openai-agents", marker = "extra == 'instruments'", specifier = ">=0.3.3" },
+    { name = "openai-agents", marker = "extra == 'instruments'", specifier = ">=0.17.0" },
     { name = "opentelemetry-api", specifier = ">=1.40" },
     { name = "opentelemetry-instrumentation", specifier = ">=0.61b0" },
     { name = "opentelemetry-semantic-conventions", specifier = ">=0.61b0" },