diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai/README.rst b/instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai/README.rst new file mode 100644 index 000000000..f02ad1691 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai/README.rst @@ -0,0 +1,24 @@ +LoongSuite Pydantic AI Instrumentation +====================================== + +This package adapts Pydantic AI's built-in OpenTelemetry Instrumentation +Capability to LoongSuite GenAI semantic conventions. + +Use ``PydanticAIInstrumentor().instrument()`` to enable Pydantic AI's global +agent and embedding instrumentation defaults and register the LoongSuite span +processor. Add ``LoongSuiteInstrumentationCapability`` to agents that need +ReAct STEP spans: + +.. code-block:: python + + from opentelemetry.instrumentation.pydantic_ai import ( + LoongSuiteInstrumentationCapability, + PydanticAIInstrumentor, + ) + from pydantic_ai import Agent + + PydanticAIInstrumentor().instrument() + agent = Agent( + "openai:gpt-4o-mini", + capabilities=[LoongSuiteInstrumentationCapability()], + ) diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai/pyproject.toml b/instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai/pyproject.toml new file mode 100644 index 000000000..0096a00c3 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai/pyproject.toml @@ -0,0 +1,56 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "loongsuite-instrumentation-pydantic-ai" +dynamic = ["version"] +description = "LoongSuite Pydantic AI instrumentation" +readme = "README.rst" +license = "Apache-2.0" +requires-python = ">=3.10" +authors = [ + { name = "OpenTelemetry Authors", email = "cncf-opentelemetry-contributors@lists.cncf.io" }, +] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", +] +dependencies = [ + "opentelemetry-api ~= 1.37", + "opentelemetry-instrumentation ~= 0.58b0", + "opentelemetry-sdk ~= 1.37", + "opentelemetry-semantic-conventions ~= 0.58b0", + "opentelemetry-util-genai", +] + +[project.optional-dependencies] +instruments = [ + "pydantic-ai >= 1.0.0", +] + +[project.entry-points.opentelemetry_instrumentor] +pydantic_ai = "opentelemetry.instrumentation.pydantic_ai:PydanticAIInstrumentor" + +[project.urls] +Homepage = "https://github.com/alibaba/loongsuite-python-agent/tree/main/instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai" +Repository = "https://github.com/alibaba/loongsuite-python-agent" + +[tool.hatch.version] +path = "src/opentelemetry/instrumentation/pydantic_ai/version.py" + +[tool.hatch.build.targets.sdist] +include = [ + "src", + "tests", +] + +[tool.hatch.build.targets.wheel] +packages = ["src/opentelemetry"] diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai/src/opentelemetry/__init__.py b/instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai/src/opentelemetry/__init__.py new file mode 100644 index 000000000..175296eca --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai/src/opentelemetry/__init__.py @@ -0,0 +1,15 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__path__ = __import__("pkgutil").extend_path(__path__, __name__) diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai/src/opentelemetry/instrumentation/pydantic_ai/__init__.py b/instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai/src/opentelemetry/instrumentation/pydantic_ai/__init__.py new file mode 100644 index 000000000..de0a6cfd5 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai/src/opentelemetry/instrumentation/pydantic_ai/__init__.py @@ -0,0 +1,142 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); + +from __future__ import annotations + +import logging +from typing import Any, Collection + +from opentelemetry import trace +from opentelemetry.instrumentation.instrumentor import BaseInstrumentor + +from opentelemetry.instrumentation.pydantic_ai.capability import ( + LoongSuiteInstrumentationCapability, +) +from opentelemetry.instrumentation.pydantic_ai.package import _instruments +from opentelemetry.instrumentation.pydantic_ai.span_processor import ( + LoongSuiteSpanProcessor, +) +from opentelemetry.instrumentation.pydantic_ai.version import __version__ + +logger = logging.getLogger(__name__) + +__all__ = [ + "LoongSuiteInstrumentationCapability", + "LoongSuiteSpanProcessor", + "PydanticAIInstrumentor", + "__version__", +] + + +class PydanticAIInstrumentor(BaseInstrumentor): + """Enable Pydantic AI built-in instrumentation with LoongSuite normalization.""" + + def __init__(self) -> None: + super().__init__() + self._span_processor: LoongSuiteSpanProcessor | None = None + self._previous_agent_instrument: Any = None + self._previous_embedder_instrument: Any = None + self._previous_auto_capability_types: Any = None + + def instrumentation_dependencies(self) -> Collection[str]: + return _instruments + + def _instrument(self, **kwargs: Any) -> None: + tracer_provider = kwargs.get("tracer_provider") or trace.get_tracer_provider() + meter_provider = kwargs.get("meter_provider") + logger_provider = kwargs.get("logger_provider") + + self._register_span_processor(tracer_provider, meter_provider) + settings = self._build_instrumentation_settings( + tracer_provider=tracer_provider, + meter_provider=meter_provider, + logger_provider=logger_provider, + kwargs=kwargs, + ) + self._enable_pydantic_ai_defaults(settings) + + def _uninstrument(self, **kwargs: Any) -> None: + if self._span_processor is not None: + self._span_processor.disable() + self._span_processor = None + self._restore_pydantic_ai_defaults() + + def _register_span_processor( + self, + tracer_provider: Any, + meter_provider: Any, + ) -> None: + if not hasattr(tracer_provider, "add_span_processor"): + logger.warning( + "Current tracer provider does not support span processors; " + "pydantic-ai span normalization skipped." + ) + return + self._span_processor = LoongSuiteSpanProcessor( + meter_provider=meter_provider, + ) + tracer_provider.add_span_processor(self._span_processor) + + def _build_instrumentation_settings( + self, + *, + tracer_provider: Any, + meter_provider: Any, + logger_provider: Any, + kwargs: dict[str, Any], + ) -> Any: + from pydantic_ai.models.instrumented import InstrumentationSettings + + return InstrumentationSettings( + tracer_provider=tracer_provider, + meter_provider=meter_provider, + logger_provider=logger_provider, + include_binary_content=kwargs.get("include_binary_content", True), + include_content=kwargs.get("include_content", False), + version=kwargs.get("version", 5), + event_mode=kwargs.get("event_mode", "attributes"), + use_aggregated_usage_attribute_names=kwargs.get( + "use_aggregated_usage_attribute_names", + False, + ), + ) + + def _enable_pydantic_ai_defaults(self, settings: Any) -> None: + from pydantic_ai import Agent + import pydantic_ai.agent as agent_module + from pydantic_ai.embeddings import Embedder + + self._previous_agent_instrument = Agent._instrument_default + self._previous_embedder_instrument = Embedder._instrument_default + self._previous_auto_capability_types = getattr( + agent_module, + "_AUTO_INJECT_CAPABILITY_TYPES", + None, + ) + if self._previous_auto_capability_types is not None: + auto_capability_types = self._previous_auto_capability_types + if LoongSuiteInstrumentationCapability not in auto_capability_types: + agent_module._AUTO_INJECT_CAPABILITY_TYPES = ( + *auto_capability_types, + LoongSuiteInstrumentationCapability, + ) + Agent.instrument_all(settings) + Embedder.instrument_all(settings) + + def _restore_pydantic_ai_defaults(self) -> None: + try: + from pydantic_ai import Agent + import pydantic_ai.agent as agent_module + from pydantic_ai.embeddings import Embedder + + Agent.instrument_all(self._previous_agent_instrument or False) + Embedder.instrument_all(self._previous_embedder_instrument or False) + if self._previous_auto_capability_types is not None: + agent_module._AUTO_INJECT_CAPABILITY_TYPES = ( + self._previous_auto_capability_types + ) + except Exception as exc: # noqa: BLE001 + logger.debug("Failed to restore pydantic-ai instrumentation: %s", exc) + finally: + self._previous_auto_capability_types = None diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai/src/opentelemetry/instrumentation/pydantic_ai/capability.py b/instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai/src/opentelemetry/instrumentation/pydantic_ai/capability.py new file mode 100644 index 000000000..93df7a1bf --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai/src/opentelemetry/instrumentation/pydantic_ai/capability.py @@ -0,0 +1,212 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); + +from __future__ import annotations + +import hashlib +from typing import TYPE_CHECKING, Any + +from opentelemetry import trace +from opentelemetry.trace import StatusCode +from opentelemetry.util.genai.extended_semconv.gen_ai_extended_attributes import ( + GEN_AI_REACT_FINISH_REASON, + GEN_AI_REACT_ROUND, + GenAiSpanKindValues, +) +from pydantic_ai.capabilities.abstract import AbstractCapability + +from opentelemetry.instrumentation.pydantic_ai.span_processor import ( + FRAMEWORK_NAME, + GEN_AI_FRAMEWORK, + GEN_AI_OPERATION_NAME, + GEN_AI_SPAN_KIND, + GEN_AI_USAGE_INPUT_TOKENS, + GEN_AI_USAGE_OUTPUT_TOKENS, + GEN_AI_USAGE_TOTAL_TOKENS, + normalize_genai_attributes, +) + +if TYPE_CHECKING: + from pydantic_ai.capabilities.abstract import ( + AgentNode, + NodeResult, + ValidatedToolArgs, + WrapModelRequestHandler, + WrapNodeRunHandler, + WrapRunHandler, + WrapToolExecuteHandler, + ) + from pydantic_ai.messages import ModelResponse, ToolCallPart + from pydantic_ai.run import AgentRunResult + from pydantic_ai.tools import AgentDepsT, RunContext, ToolDefinition + + +class LoongSuiteInstrumentationCapability(AbstractCapability[Any]): + """Pydantic AI Capability that adds LoongSuite GenAI span semantics.""" + + def get_ordering(self): + from pydantic_ai.capabilities.abstract import CapabilityOrdering + from pydantic_ai.capabilities.instrumentation import Instrumentation + + return CapabilityOrdering(wrapped_by=(Instrumentation,)) + + async def wrap_run( + self, + ctx: RunContext[AgentDepsT], + *, + handler: WrapRunHandler, + ) -> AgentRunResult[Any]: + span = trace.get_current_span() + agent_name = _agent_name(ctx) + run_id = getattr(ctx, "run_id", None) + _set_attributes( + span, + { + GEN_AI_FRAMEWORK: FRAMEWORK_NAME, + GEN_AI_SPAN_KIND: GenAiSpanKindValues.AGENT.value, + "gen_ai.agent.id": _agent_id(agent_name, run_id), + }, + ) + result = await handler() + _set_total_tokens_from_usage(span, getattr(ctx, "usage", None)) + return result + + async def wrap_model_request( + self, + ctx: RunContext[AgentDepsT], + *, + request_context: Any, + handler: WrapModelRequestHandler, + ) -> ModelResponse: + span = trace.get_current_span() + _set_attributes( + span, + { + GEN_AI_FRAMEWORK: FRAMEWORK_NAME, + GEN_AI_SPAN_KIND: GenAiSpanKindValues.LLM.value, + }, + ) + response = await handler(request_context) + _set_total_tokens_from_usage(span, getattr(response, "usage", None)) + return response + + async def wrap_tool_execute( + self, + ctx: RunContext[AgentDepsT], + *, + call: ToolCallPart, + tool_def: ToolDefinition, + args: ValidatedToolArgs, + handler: WrapToolExecuteHandler, + ) -> Any: + span = trace.get_current_span() + _set_attributes( + span, + { + GEN_AI_FRAMEWORK: FRAMEWORK_NAME, + GEN_AI_SPAN_KIND: GenAiSpanKindValues.TOOL.value, + "gen_ai.tool.description": getattr( + tool_def, + "description", + None, + ), + "gen_ai.tool.type": "function", + }, + ) + return await handler(args) + + async def wrap_node_run( + self, + ctx: RunContext[AgentDepsT], + *, + node: AgentNode[AgentDepsT], + handler: WrapNodeRunHandler[AgentDepsT], + ) -> NodeResult[AgentDepsT]: + if not _is_model_request_node(node): + return await handler(node) + + tracer = trace.get_tracer(__name__) + attributes = { + GEN_AI_FRAMEWORK: FRAMEWORK_NAME, + GEN_AI_OPERATION_NAME: "react", + GEN_AI_SPAN_KIND: GenAiSpanKindValues.STEP.value, + GEN_AI_REACT_ROUND: _react_round(ctx), + } + with tracer.start_as_current_span( + "react step", + attributes=attributes, + record_exception=False, + set_status_on_exception=False, + ) as span: + try: + result = await handler(node) + except Exception as exc: + span.record_exception(exc, escaped=True) + span.set_status(StatusCode.ERROR) + span.set_attribute(GEN_AI_REACT_FINISH_REASON, "error") + raise + span.set_attribute( + GEN_AI_REACT_FINISH_REASON, + _finish_reason(result), + ) + return result + + +def _set_total_tokens_from_usage(span: trace.Span, usage: Any) -> None: + if usage is None or not span.is_recording(): + return + attrs = {} + if hasattr(usage, "opentelemetry_attributes"): + attrs = usage.opentelemetry_attributes() + input_tokens = _to_int(attrs.get(GEN_AI_USAGE_INPUT_TOKENS)) + output_tokens = _to_int(attrs.get(GEN_AI_USAGE_OUTPUT_TOKENS)) + if input_tokens is not None and output_tokens is not None: + span.set_attribute(GEN_AI_USAGE_TOTAL_TOKENS, input_tokens + output_tokens) + + +def _set_attributes(span: trace.Span, attributes: dict[str, Any]) -> None: + if not span.is_recording(): + return + normalized = normalize_genai_attributes( + {key: value for key, value in attributes.items() if value is not None} + ) + for key, value in normalized.items(): + span.set_attribute(key, value) + + +def _agent_name(ctx: Any) -> str: + agent = getattr(ctx, "agent", None) + return (getattr(agent, "name", None) if agent is not None else None) or "agent" + + +def _agent_id(agent_name: str, run_id: str | None) -> str: + raw = f"{agent_name}:{run_id or ''}" + return hashlib.sha256(raw.encode("utf-8")).hexdigest()[:32] + + +def _react_round(ctx: Any) -> int: + value = _to_int(getattr(ctx, "run_step", None)) + if value is None: + return 1 + return max(1, value) + + +def _is_model_request_node(node: Any) -> bool: + return type(node).__name__ == "ModelRequestNode" + + +def _finish_reason(result: Any) -> str: + name = type(result).__name__ + if name == "End": + return "stop" + return name or "unknown" + + +def _to_int(value: Any) -> int | None: + if isinstance(value, bool) or value is None: + return None + try: + return int(value) + except (TypeError, ValueError): + return None diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai/src/opentelemetry/instrumentation/pydantic_ai/package.py b/instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai/src/opentelemetry/instrumentation/pydantic_ai/package.py new file mode 100644 index 000000000..786951c8c --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai/src/opentelemetry/instrumentation/pydantic_ai/package.py @@ -0,0 +1,5 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); + +_instruments = ("pydantic-ai >= 1.0.0",) diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai/src/opentelemetry/instrumentation/pydantic_ai/span_processor.py b/instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai/src/opentelemetry/instrumentation/pydantic_ai/span_processor.py new file mode 100644 index 000000000..16dcfbd92 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai/src/opentelemetry/instrumentation/pydantic_ai/span_processor.py @@ -0,0 +1,364 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); + +from __future__ import annotations + +import logging +from collections import deque +from typing import Any + +from opentelemetry import metrics +from opentelemetry.context import Context +from opentelemetry.sdk.trace import ReadableSpan +from opentelemetry.sdk.trace.export import SpanProcessor +from opentelemetry.trace import Span, StatusCode +from opentelemetry.util.genai.extended_semconv.gen_ai_extended_attributes import ( + GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, + GEN_AI_SPAN_KIND, + GEN_AI_USAGE_TOTAL_TOKENS, + GenAiSpanKindValues, +) + +logger = logging.getLogger(__name__) + +GEN_AI_FRAMEWORK = "gen_ai.framework" +GEN_AI_OPERATION_NAME = "gen_ai.operation.name" +GEN_AI_USAGE_INPUT_TOKENS = "gen_ai.usage.input_tokens" +GEN_AI_USAGE_OUTPUT_TOKENS = "gen_ai.usage.output_tokens" +GEN_AI_REQUEST_MODEL = "gen_ai.request.model" +GEN_AI_RESPONSE_MODEL = "gen_ai.response.model" + +FRAMEWORK_NAME = "pydantic-ai" +SLOW_CALL_THRESHOLD_SECONDS = 1.0 +MAX_PROVIDER_TRACE_IDS = 2048 + +OPERATION_TO_SPAN_KIND = { + "create_agent": GenAiSpanKindValues.AGENT.value, + "invoke_agent": GenAiSpanKindValues.AGENT.value, + "chat": GenAiSpanKindValues.LLM.value, + "generate_content": GenAiSpanKindValues.LLM.value, + "text_completion": GenAiSpanKindValues.LLM.value, + "execute_tool": GenAiSpanKindValues.TOOL.value, + "embeddings": GenAiSpanKindValues.EMBEDDING.value, + "react": GenAiSpanKindValues.STEP.value, + "enter": GenAiSpanKindValues.ENTRY.value, + "retrieval": GenAiSpanKindValues.RETRIEVER.value, +} + + +def infer_span_kind(attributes: dict[str, Any]) -> str | None: + operation_name = attributes.get(GEN_AI_OPERATION_NAME) + if isinstance(operation_name, str): + mapped = OPERATION_TO_SPAN_KIND.get(operation_name) + if mapped: + return mapped + return None + + +def normalize_genai_attributes(attributes: dict[str, Any]) -> dict[str, Any]: + normalized = dict(attributes) + if GEN_AI_OPERATION_NAME in normalized: + normalized.setdefault(GEN_AI_FRAMEWORK, FRAMEWORK_NAME) + if GEN_AI_SPAN_KIND not in normalized: + span_kind = infer_span_kind(normalized) + if span_kind is not None: + normalized[GEN_AI_SPAN_KIND] = span_kind + + input_tokens = _to_int(normalized.get(GEN_AI_USAGE_INPUT_TOKENS)) + output_tokens = _to_int(normalized.get(GEN_AI_USAGE_OUTPUT_TOKENS)) + if ( + GEN_AI_USAGE_TOTAL_TOKENS not in normalized + and input_tokens is not None + and output_tokens is not None + ): + normalized[GEN_AI_USAGE_TOTAL_TOKENS] = input_tokens + output_tokens + return normalized + + +class LoongSuiteSpanProcessor(SpanProcessor): + """Normalize Pydantic AI spans to LoongSuite GenAI semantics.""" + + def __init__( + self, + *, + meter_provider: metrics.MeterProvider | None = None, + slow_call_threshold_seconds: float = SLOW_CALL_THRESHOLD_SECONDS, + ) -> None: + self._enabled = True + self._slow_call_threshold_seconds = slow_call_threshold_seconds + self._provider_llm_trace_ids: set[int] = set() + self._provider_llm_trace_order: deque[int] = deque( + maxlen=MAX_PROVIDER_TRACE_IDS + ) + self._meter = ( + meter_provider.get_meter(__name__) + if meter_provider is not None + else metrics.get_meter(__name__) + ) + self._genai_calls_count = self._meter.create_counter("genai_calls_count") + self._genai_calls_error_count = self._meter.create_counter( + "genai_calls_error_count" + ) + self._genai_calls_slow_count = self._meter.create_counter( + "genai_calls_slow_count" + ) + self._genai_calls_duration = self._meter.create_histogram( + "genai_calls_duration_seconds", + unit="s", + ) + self._genai_usage_tokens = self._meter.create_counter( + "genai_llm_usage_tokens", + unit="{token}", + ) + self._genai_first_token = self._meter.create_histogram( + "genai_llm_first_token_seconds", + unit="s", + ) + self._arms_request_metrics: dict[str, dict[str, Any]] = {} + + def on_start( + self, + span: Span, + parent_context: Context | None = None, + ) -> None: + if not self._enabled or not span.is_recording(): + return + attributes = getattr(span, "attributes", None) + if attributes is None: + return + if not _is_pydantic_ai_span(span, dict(attributes)): + return + normalized = normalize_genai_attributes(dict(attributes)) + for key, value in normalized.items(): + if attributes.get(key) != value: + span.set_attribute(key, value) + + def on_end(self, span: ReadableSpan) -> None: + if not self._enabled: + return + current_attributes = dict(span.attributes or {}) + if _is_provider_llm_span(span, current_attributes): + self._remember_provider_llm_trace(span) + return + if not _is_pydantic_ai_span(span, current_attributes): + return + attributes = normalize_genai_attributes(current_attributes) + self._replace_readable_span_attributes(span, attributes) + self._record_metrics(span, attributes) + + def shutdown(self) -> None: + self._enabled = False + + def force_flush(self, timeout_millis: int = 30000) -> bool: + return True + + def disable(self) -> None: + self._enabled = False + + def _replace_readable_span_attributes( + self, + span: ReadableSpan, + attributes: dict[str, Any], + ) -> None: + try: + span._attributes = attributes # type: ignore[attr-defined] + except Exception as exc: # noqa: BLE001 + logger.debug("Failed to normalize pydantic-ai span: %s", exc) + + def _record_metrics( + self, + span: ReadableSpan, + attributes: dict[str, Any], + ) -> None: + span_kind = attributes.get(GEN_AI_SPAN_KIND) + if span_kind not in { + GenAiSpanKindValues.AGENT.value, + GenAiSpanKindValues.LLM.value, + GenAiSpanKindValues.EMBEDDING.value, + GenAiSpanKindValues.TOOL.value, + GenAiSpanKindValues.STEP.value, + }: + return + if ( + span_kind == GenAiSpanKindValues.LLM.value + and self._has_provider_llm_span(span) + ): + return + + metric_attributes = { + "modelName": _model_name(attributes), + "spanKind": span_kind, + } + call_type = _call_type(attributes) + duration_seconds = _duration_seconds(span) + self._genai_calls_count.add(1, metric_attributes) + self._record_arms_request_count(call_type) + if duration_seconds is not None: + self._genai_calls_duration.record( + duration_seconds, + metric_attributes, + ) + self._record_arms_request_duration(call_type, duration_seconds) + if duration_seconds > self._slow_call_threshold_seconds: + self._genai_calls_slow_count.add(1, metric_attributes) + self._record_arms_request_slow_count(call_type) + + if span.status.status_code == StatusCode.ERROR: + self._genai_calls_error_count.add(1, metric_attributes) + self._record_arms_request_error_count(call_type) + + if span_kind == GenAiSpanKindValues.LLM.value: + self._record_llm_metrics(attributes, metric_attributes) + + def _record_llm_metrics( + self, + attributes: dict[str, Any], + metric_attributes: dict[str, Any], + ) -> None: + input_tokens = _to_int(attributes.get(GEN_AI_USAGE_INPUT_TOKENS)) + output_tokens = _to_int(attributes.get(GEN_AI_USAGE_OUTPUT_TOKENS)) + if input_tokens is not None: + self._genai_usage_tokens.add( + input_tokens, + {**metric_attributes, "usageType": "input"}, + ) + if output_tokens is not None: + self._genai_usage_tokens.add( + output_tokens, + {**metric_attributes, "usageType": "output"}, + ) + + ttft_ns = _to_int(attributes.get(GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN)) + if ttft_ns is not None: + self._genai_first_token.record( + ttft_ns / 1_000_000_000, + metric_attributes, + ) + + def _record_arms_request_count(self, call_type: str) -> None: + self._arms_metrics(call_type)["count"].add(1) + + def _record_arms_request_error_count(self, call_type: str) -> None: + self._arms_metrics(call_type)["error_count"].add(1) + + def _record_arms_request_slow_count(self, call_type: str) -> None: + self._arms_metrics(call_type)["slow_count"].add(1) + + def _record_arms_request_duration( + self, + call_type: str, + duration_seconds: float, + ) -> None: + self._arms_metrics(call_type)["duration"].record(duration_seconds) + + def _arms_metrics(self, call_type: str) -> dict[str, Any]: + metrics_by_type = self._arms_request_metrics.get(call_type) + if metrics_by_type is None: + metrics_by_type = { + "count": self._meter.create_counter( + f"arms_{call_type}_requests_count" + ), + "error_count": self._meter.create_counter( + f"arms_{call_type}_requests_error_count" + ), + "duration": self._meter.create_histogram( + f"arms_{call_type}_requests_seconds", + unit="s", + ), + "slow_count": self._meter.create_counter( + f"arms_{call_type}_requests_slow_count" + ), + } + self._arms_request_metrics[call_type] = metrics_by_type + return metrics_by_type + + def _remember_provider_llm_trace(self, span: ReadableSpan) -> None: + trace_id = _trace_id(span) + if trace_id is None or trace_id in self._provider_llm_trace_ids: + return + if ( + len(self._provider_llm_trace_order) + == self._provider_llm_trace_order.maxlen + ): + old_trace_id = self._provider_llm_trace_order.popleft() + self._provider_llm_trace_ids.discard(old_trace_id) + self._provider_llm_trace_ids.add(trace_id) + self._provider_llm_trace_order.append(trace_id) + + def _has_provider_llm_span(self, span: ReadableSpan) -> bool: + trace_id = _trace_id(span) + return trace_id is not None and trace_id in self._provider_llm_trace_ids + + +def _to_int(value: Any) -> int | None: + if isinstance(value, bool) or value is None: + return None + try: + return int(value) + except (TypeError, ValueError): + return None + + +def _duration_seconds(span: ReadableSpan) -> float | None: + if span.start_time is None or span.end_time is None: + return None + return (span.end_time - span.start_time) / 1_000_000_000 + + +def _model_name(attributes: dict[str, Any]) -> str: + for key in (GEN_AI_RESPONSE_MODEL, GEN_AI_REQUEST_MODEL, "model_name"): + value = attributes.get(key) + if isinstance(value, str) and value: + return value + return "unknown" + + +def _call_type(attributes: dict[str, Any]) -> str: + operation_name = attributes.get(GEN_AI_OPERATION_NAME) + if operation_name == "execute_tool": + return "tool" + if isinstance(operation_name, str) and operation_name: + return operation_name + span_kind = attributes.get(GEN_AI_SPAN_KIND) + if isinstance(span_kind, str) and span_kind: + return span_kind.lower() + return "unknown" + + +def _trace_id(span: Any) -> int | None: + context = getattr(span, "context", None) + trace_id = getattr(context, "trace_id", None) + if isinstance(trace_id, int): + return trace_id + return None + + +def _scope_name(span: Any) -> str | None: + scope = getattr(span, "instrumentation_scope", None) + scope_name = getattr(scope, "name", None) + return scope_name if isinstance(scope_name, str) else None + + +def _is_provider_llm_span(span: Any, attributes: dict[str, Any]) -> bool: + if _is_pydantic_ai_span(span, attributes): + return False + operation_name = attributes.get(GEN_AI_OPERATION_NAME) + span_kind = attributes.get(GEN_AI_SPAN_KIND) + return operation_name in { + "chat", + "generate_content", + "text_completion", + } or span_kind == GenAiSpanKindValues.LLM.value + + +def _is_pydantic_ai_span(span: Any, attributes: dict[str, Any]) -> bool: + if attributes.get(GEN_AI_FRAMEWORK) == FRAMEWORK_NAME: + return True + scope_name = _scope_name(span) + if scope_name in { + "pydantic-ai", + "opentelemetry.instrumentation.pydantic_ai.capability", + }: + return True + return any(key.startswith("pydantic_ai.") for key in attributes) diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai/src/opentelemetry/instrumentation/pydantic_ai/version.py b/instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai/src/opentelemetry/instrumentation/pydantic_ai/version.py new file mode 100644 index 000000000..5fd301e2e --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai/src/opentelemetry/instrumentation/pydantic_ai/version.py @@ -0,0 +1,15 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__version__ = "0.1.0" diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai/tests/requirements.latest.txt b/instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai/tests/requirements.latest.txt new file mode 100644 index 000000000..a2fe4ee7d --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai/tests/requirements.latest.txt @@ -0,0 +1,3 @@ +-e util/opentelemetry-util-genai +-e instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai +pydantic-ai diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai/tests/test_span_processor.py b/instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai/tests/test_span_processor.py new file mode 100644 index 000000000..475b7b0da --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai/tests/test_span_processor.py @@ -0,0 +1,240 @@ +import asyncio + +from opentelemetry.trace import StatusCode + +from opentelemetry.instrumentation.pydantic_ai.capability import ( + LoongSuiteInstrumentationCapability, +) +from opentelemetry.instrumentation.pydantic_ai.span_processor import ( + GEN_AI_FRAMEWORK, + GEN_AI_SPAN_KIND, + GEN_AI_USAGE_TOTAL_TOKENS, + LoongSuiteSpanProcessor, + normalize_genai_attributes, +) + + +def test_normalize_genai_attributes_adds_framework_and_span_kind(): + attrs = normalize_genai_attributes( + { + "gen_ai.operation.name": "chat", + "gen_ai.usage.input_tokens": 3, + "gen_ai.usage.output_tokens": 5, + } + ) + + assert attrs[GEN_AI_FRAMEWORK] == "pydantic-ai" + assert attrs[GEN_AI_SPAN_KIND] == "LLM" + assert attrs[GEN_AI_USAGE_TOTAL_TOKENS] == 8 + + +def test_normalize_genai_attributes_preserves_existing_values(): + attrs = normalize_genai_attributes( + { + "gen_ai.operation.name": "execute_tool", + GEN_AI_FRAMEWORK: "custom", + GEN_AI_SPAN_KIND: "RETRIEVER", + GEN_AI_USAGE_TOTAL_TOKENS: 10, + "gen_ai.usage.input_tokens": 1, + "gen_ai.usage.output_tokens": 2, + } + ) + + assert attrs[GEN_AI_FRAMEWORK] == "custom" + assert attrs[GEN_AI_SPAN_KIND] == "RETRIEVER" + assert attrs[GEN_AI_USAGE_TOTAL_TOKENS] == 10 + + +def test_on_end_records_genai_and_arms_metrics(): + meter = FakeMeterProvider().meter + processor = LoongSuiteSpanProcessor( + meter_provider=FakeMeterProvider(meter), + slow_call_threshold_seconds=0.5, + ) + span = FakeReadableSpan( + { + "gen_ai.operation.name": "execute_tool", + GEN_AI_FRAMEWORK: "pydantic-ai", + }, + duration_seconds=1.25, + status_code=StatusCode.ERROR, + ) + + processor.on_end(span) + + assert span.attributes[GEN_AI_SPAN_KIND] == "TOOL" + metric_attributes = {"modelName": "unknown", "spanKind": "TOOL"} + assert meter.instruments["genai_calls_count"].measurements == [ + (1, metric_attributes) + ] + assert meter.instruments["genai_calls_duration_seconds"].measurements == [ + (1.25, metric_attributes) + ] + assert meter.instruments["genai_calls_error_count"].measurements == [ + (1, metric_attributes) + ] + assert meter.instruments["genai_calls_slow_count"].measurements == [ + (1, metric_attributes) + ] + assert meter.instruments["arms_tool_requests_count"].measurements == [(1, None)] + assert meter.instruments["arms_tool_requests_seconds"].measurements == [ + (1.25, None) + ] + assert meter.instruments["arms_tool_requests_error_count"].measurements == [ + (1, None) + ] + assert meter.instruments["arms_tool_requests_slow_count"].measurements == [ + (1, None) + ] + + +def test_provider_llm_span_suppresses_framework_llm_metrics(): + meter = FakeMeterProvider().meter + processor = LoongSuiteSpanProcessor(meter_provider=FakeMeterProvider(meter)) + trace_id = 123 + + processor.on_end( + FakeReadableSpan( + {"gen_ai.operation.name": "chat"}, + scope_name="opentelemetry.instrumentation.openai_v2", + trace_id=trace_id, + ) + ) + processor.on_end( + FakeReadableSpan( + { + "gen_ai.operation.name": "chat", + GEN_AI_FRAMEWORK: "pydantic-ai", + }, + scope_name="pydantic-ai", + trace_id=trace_id, + ) + ) + + assert meter.instruments["genai_calls_count"].measurements == [] + assert "arms_chat_requests_count" not in meter.instruments + + +def test_wrap_node_run_creates_step_span(monkeypatch): + tracer = FakeTracer() + monkeypatch.setattr( + "opentelemetry.instrumentation.pydantic_ai.capability.trace.get_tracer", + lambda _name: tracer, + ) + capability = LoongSuiteInstrumentationCapability() + node = type("ModelRequestNode", (), {})() + ctx = type("Ctx", (), {"run_step": 2})() + + async def handler(received_node): + assert received_node is node + return type("End", (), {})() + + result = asyncio.run( + capability.wrap_node_run( + ctx, + node=node, + handler=handler, + ) + ) + + assert type(result).__name__ == "End" + assert tracer.span.name == "react step" + assert tracer.span.attributes["gen_ai.operation.name"] == "react" + assert tracer.span.attributes[GEN_AI_SPAN_KIND] == "STEP" + assert tracer.span.attributes["gen_ai.react.round"] == 2 + assert tracer.span.attributes["gen_ai.react.finish_reason"] == "stop" + + +class FakeMeterProvider: + def __init__(self, meter=None): + self.meter = meter or FakeMeter() + + def get_meter(self, _name): + return self.meter + + +class FakeMeter: + def __init__(self): + self.instruments = {} + + def create_counter(self, name, unit=None): + return self._instrument(name) + + def create_histogram(self, name, unit=None): + return self._instrument(name) + + def _instrument(self, name): + instrument = FakeInstrument() + self.instruments[name] = instrument + return instrument + + +class FakeInstrument: + def __init__(self): + self.measurements = [] + + def add(self, value, attributes=None): + self.measurements.append((value, attributes)) + + def record(self, value, attributes=None): + self.measurements.append((value, attributes)) + + +class FakeReadableSpan: + def __init__( + self, + attributes, + *, + duration_seconds=0.25, + status_code=StatusCode.UNSET, + scope_name="pydantic-ai", + trace_id=1, + ): + self._attributes = dict(attributes) + self.start_time = 0 + self.end_time = int(duration_seconds * 1_000_000_000) + self.status = type("Status", (), {"status_code": status_code})() + self.instrumentation_scope = type("Scope", (), {"name": scope_name})() + self.context = type("Context", (), {"trace_id": trace_id})() + + @property + def attributes(self): + return self._attributes + + +class FakeSpan: + def __init__(self, name, attributes): + self.name = name + self.attributes = dict(attributes) + + def set_attribute(self, key, value): + self.attributes[key] = value + + def record_exception(self, exc, escaped=False): + self.exception = exc + + def set_status(self, status): + self.status = status + + +class FakeSpanContextManager: + def __init__(self, span): + self.span = span + + def __enter__(self): + return self.span + + def __exit__(self, exc_type, exc, traceback): + return False + + +class FakeTracer: + def start_as_current_span( + self, + name, + attributes=None, + record_exception=None, + set_status_on_exception=None, + ): + self.span = FakeSpan(name, attributes or {}) + return FakeSpanContextManager(self.span) diff --git a/tox.ini b/tox.ini index fb431ac36..9ff707c92 100644 --- a/tox.ini +++ b/tox.ini @@ -17,6 +17,10 @@ envlist = py3{10,11,12,13,14}-test-instrumentation-openai_agents-v2-{oldest,latest} lint-instrumentation-openai_agents-v2 + ; instrumentation-pydantic-ai + py3{10,11,12,13,14}-test-instrumentation-pydantic-ai-latest + lint-instrumentation-pydantic-ai + ; instrumentation-vertexai py3{9,10,11,12,13,14}-test-instrumentation-vertexai-{oldest,latest} # Disabling pypy3 as shapely does not have wheels and fails to compile @@ -485,6 +489,11 @@ deps = openai_agents-latest: -r {toxinidir}/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/tests/requirements.latest.txt lint-instrumentation-openai_agents-v2: -r {toxinidir}/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/tests/requirements.oldest.txt + pydantic-ai-latest: {[testenv]test_deps} + pydantic-ai-latest: -r {toxinidir}/instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai/tests/requirements.latest.txt + lint-instrumentation-pydantic-ai: {[testenv]test_deps} + lint-instrumentation-pydantic-ai: -r {toxinidir}/instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai/tests/requirements.latest.txt + vertexai-oldest: -r {toxinidir}/instrumentation-genai/opentelemetry-instrumentation-vertexai/tests/requirements.oldest.txt vertexai-latest: {[testenv]test_deps} vertexai-latest: -r {toxinidir}/instrumentation-genai/opentelemetry-instrumentation-vertexai/tests/requirements.latest.txt @@ -891,6 +900,8 @@ commands = lint-instrumentation-openai-v2: sh -c "cd instrumentation-genai && pylint --rcfile ../.pylintrc opentelemetry-instrumentation-openai-v2" test-instrumentation-openai_agents-v2: pytest {toxinidir}/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/tests {posargs} lint-instrumentation-openai_agents-v2: sh -c "cd instrumentation-genai && pylint --rcfile ../.pylintrc opentelemetry-instrumentation-openai-agents-v2" + test-instrumentation-pydantic-ai: pytest {toxinidir}/instrumentation-loongsuite/loongsuite-instrumentation-pydantic-ai/tests {posargs} + lint-instrumentation-pydantic-ai: sh -c "cd instrumentation-loongsuite && pylint --rcfile ../.pylintrc loongsuite-instrumentation-pydantic-ai" test-instrumentation-vertexai: pytest {toxinidir}/instrumentation-genai/opentelemetry-instrumentation-vertexai/tests --vcr-record=none {posargs} lint-instrumentation-vertexai: sh -c "cd instrumentation-genai && pylint --rcfile ../.pylintrc opentelemetry-instrumentation-vertexai"