diff --git a/README-zh.md b/README-zh.md index aa5aa075c..f4e58a003 100644 --- a/README-zh.md +++ b/README-zh.md @@ -429,6 +429,9 @@ loongsuite-instrument \ ```bash export LOONGSUITE_PYTHON_SITE_BOOTSTRAP=True + # 交互式 CLI / 应用可选:不要把通用成功提示写入 stdout。 + export LOONGSUITE_PYTHON_SITE_BOOTSTRAP_LOG_SUCCESS=False + export LOONGSUITE_PYTHON_SITE_BOOTSTRAP_STATUS_FILE=/tmp/loongsuite-site-bootstrap-status.json ``` **步骤 4 — 创建 `~/.loongsuite/bootstrap-config.json`** @@ -445,7 +448,7 @@ loongsuite-instrument \ } ``` - 然后执行 `python demo.py`。如需使用 **console** exporter、其他后端、改用 **`loongsuite-instrument`**(而非直接 `python`),或查看完整优先级/边界场景,请阅读 [loongsuite-site-bootstrap/README.md](loongsuite-site-bootstrap/README.md)。 + 然后执行 `python demo.py`。如需使用 **console** exporter、其他后端、改用 **`loongsuite-instrument`**(而非直接 `python`)、控制成功提示输出,或查看完整优先级/边界场景,请阅读 [loongsuite-site-bootstrap/README.md](loongsuite-site-bootstrap/README.md)。 > **Beta:**Site-bootstrap 会影响其启用环境中的所有 Python 进程,生产环境使用前请先阅读包 README。 diff --git a/README.md b/README.md index 6c0f6ecdd..c45e0d72f 100644 --- a/README.md +++ b/README.md @@ -423,6 +423,9 @@ Run **without** changing codes or bootstrap commands: a **`.pth` hook** loads Lo ```bash export LOONGSUITE_PYTHON_SITE_BOOTSTRAP=True + # Optional for interactive CLIs/apps: suppress the generic success line on stdout. + export LOONGSUITE_PYTHON_SITE_BOOTSTRAP_LOG_SUCCESS=False + export LOONGSUITE_PYTHON_SITE_BOOTSTRAP_STATUS_FILE=/tmp/loongsuite-site-bootstrap-status.json ``` **Step 4 — Create `~/.loongsuite/bootstrap-config.json`** with the OpenTelemetry environments keys you need. @@ -437,7 +440,7 @@ Run **without** changing codes or bootstrap commands: a **`.pth` hook** loads Lo } ``` - Then run `python demo.py`. For **console** exporters, other backends, using **`loongsuite-instrument`** instead of plain `python`, or full precedence / edge cases, see [loongsuite-site-bootstrap/README.md](loongsuite-site-bootstrap/README.md). + Then run `python demo.py`. For **console** exporters, other backends, using **`loongsuite-instrument`** instead of plain `python`, success logging controls, or full precedence / edge cases, see [loongsuite-site-bootstrap/README.md](loongsuite-site-bootstrap/README.md). > **Beta:** Site-bootstrap affects every Python process in the environment where it is enabled; read the package README before using it in production. diff --git a/instrumentation-genai/opentelemetry-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/callback_handler.py b/instrumentation-genai/opentelemetry-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/callback_handler.py index 8f642567c..4024f8826 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/callback_handler.py +++ b/instrumentation-genai/opentelemetry-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/callback_handler.py @@ -115,7 +115,7 @@ def on_chat_model_start( for sub_messages in messages: for message in sub_messages: # Cast to Any to avoid type checking issues with LangChain's complex content type - raw_content: Any = message.content # type: ignore[misc] + raw_content: Any = message.content role = message.type parts: list[Text] = [] diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-agentscope/src/opentelemetry/instrumentation/agentscope/_wrapper.py b/instrumentation-loongsuite/loongsuite-instrumentation-agentscope/src/opentelemetry/instrumentation/agentscope/_wrapper.py index b3e9753b1..38a9cc85e 100644 --- a/instrumentation-loongsuite/loongsuite-instrumentation-agentscope/src/opentelemetry/instrumentation/agentscope/_wrapper.py +++ b/instrumentation-loongsuite/loongsuite-instrumentation-agentscope/src/opentelemetry/instrumentation/agentscope/_wrapper.py @@ -70,6 +70,7 @@ from opentelemetry.util.genai.types import Error, LLMInvocation from .utils import ( + apply_entry_baggage_identity, convert_agent_response_to_output_messages, convert_chatresponse_to_output_messages, create_agent_invocation, @@ -182,6 +183,7 @@ def hook(agent_self: Any, kwargs: dict) -> None: state.react_round += 1 inv = ReactStepInvocation(round=state.react_round) + apply_entry_baggage_identity(inv) handler.start_react_step(inv, context=state.original_context) state.active_step = inv state.pending_acting_count = 0 diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-agentscope/src/opentelemetry/instrumentation/agentscope/patch.py b/instrumentation-loongsuite/loongsuite-instrumentation-agentscope/src/opentelemetry/instrumentation/agentscope/patch.py index 7fb2cd58c..dfdbfc4f9 100644 --- a/instrumentation-loongsuite/loongsuite-instrumentation-agentscope/src/opentelemetry/instrumentation/agentscope/patch.py +++ b/instrumentation-loongsuite/loongsuite-instrumentation-agentscope/src/opentelemetry/instrumentation/agentscope/patch.py @@ -34,6 +34,11 @@ from opentelemetry.util.genai.span_utils import _apply_error_attributes from opentelemetry.util.genai.types import Error +from .utils import ( + apply_entry_baggage_identity, + entry_baggage_identity_attributes, +) + logger = logging.getLogger(__name__) @@ -407,6 +412,7 @@ async def wrap_tool_call(wrapped, instance, args, kwargs, handler): tool_description=tool_description, tool_call_arguments=tool_args, ) + apply_entry_baggage_identity(invocation) # --- Skill attributes --- # @@ -479,6 +485,7 @@ async def wrap_formatter_format(wrapped, instance, args, kwargs, tracer=None): try: # Record only basic information span.set_attribute("gen_ai.operation.name", "format") + span.set_attributes(entry_baggage_identity_attributes()) # Execute the wrapped async call result = await wrapped(*args, **kwargs) diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-agentscope/src/opentelemetry/instrumentation/agentscope/utils.py b/instrumentation-loongsuite/loongsuite-instrumentation-agentscope/src/opentelemetry/instrumentation/agentscope/utils.py index 96ebfcabf..54ff15d97 100644 --- a/instrumentation-loongsuite/loongsuite-instrumentation-agentscope/src/opentelemetry/instrumentation/agentscope/utils.py +++ b/instrumentation-loongsuite/loongsuite-instrumentation-agentscope/src/opentelemetry/instrumentation/agentscope/utils.py @@ -35,9 +35,14 @@ from agentscope.model import ChatModelBase, ChatResponse from pydantic import BaseModel +from opentelemetry import baggage from opentelemetry.semconv._incubating.attributes import ( gen_ai_attributes as GenAIAttributes, ) +from opentelemetry.util.genai.extended_semconv.gen_ai_extended_attributes import ( + GEN_AI_SESSION_ID, + GEN_AI_USER_ID, +) from opentelemetry.util.genai.extended_types import ( EmbeddingInvocation, InvokeAgentInvocation, @@ -87,6 +92,43 @@ class AgentScopeGenAiProviderName(str, Enum): ] +def _current_baggage_value(key: str) -> str | None: + try: + value = baggage.get_baggage(key) + except Exception: + return None + if value is None: + return None + text = str(value).strip() + return text or None + + +def entry_baggage_identity_attributes() -> dict[str, str]: + """Return entry-level identity from current OpenTelemetry Baggage. + + QwenPaw opens an Entry span before AgentScope runs and writes + ``gen_ai.session.id`` / ``gen_ai.user.id`` into Baggage. AgentScope has + its own ``run_id``; when both instrumentations are active, entry baggage is + the request-level identity and should color downstream spans. + """ + attributes: dict[str, str] = {} + session_id = _current_baggage_value(GEN_AI_SESSION_ID) + user_id = _current_baggage_value(GEN_AI_USER_ID) + if session_id: + attributes[GEN_AI_SESSION_ID] = session_id + if user_id: + attributes[GEN_AI_USER_ID] = user_id + return attributes + + +def apply_entry_baggage_identity(invocation: Any) -> str | None: + """Copy entry-level identity baggage onto a GenAI invocation.""" + attributes = entry_baggage_identity_attributes() + for key, value in attributes.items(): + invocation.attributes.setdefault(key, value) + return attributes.get(GEN_AI_SESSION_ID) + + def get_provider_name(chat_model: ChatModelBase) -> str: """Parse chat model provider name""" classname = chat_model.__class__.__name__ @@ -318,6 +360,9 @@ def create_llm_invocation( provider=provider_name, input_messages=input_messages, ) + entry_session_id = apply_entry_baggage_identity(invocation) + if entry_session_id and invocation.conversation_id is None: + invocation.conversation_id = entry_session_id # Set optional request parameters if present if call_kwargs.get("max_tokens"): @@ -353,6 +398,7 @@ def create_embedding_invocation( request_model=request_model, provider=provider_name, ) + apply_entry_baggage_identity(invocation) # Set encoding formats if present if call_kwargs.get("encoding_formats"): @@ -392,16 +438,18 @@ def create_agent_invocation( except Exception as e: logger.debug(f"Error converting agent input messages: {e}") + entry_session_id = _current_baggage_value(GEN_AI_SESSION_ID) invocation = InvokeAgentInvocation( provider=provider_name, agent_name=getattr(reply_instance, "name", "unknown_agent"), agent_id=getattr(reply_instance, "id", "unknown"), agent_description=inspect.getdoc(reply_instance.__class__) or "No description available", - conversation_id=_config.run_id, + conversation_id=entry_session_id or _config.run_id, request_model=request_model, input_messages=input_messages, ) + apply_entry_baggage_identity(invocation) # Set system instruction if available if hasattr(reply_instance, "sys_prompt") and reply_instance.sys_prompt: diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-agentscope/tests/test_utils.py b/instrumentation-loongsuite/loongsuite-instrumentation-agentscope/tests/test_utils.py index f088e68e6..b8ade817f 100644 --- a/instrumentation-loongsuite/loongsuite-instrumentation-agentscope/tests/test_utils.py +++ b/instrumentation-loongsuite/loongsuite-instrumentation-agentscope/tests/test_utils.py @@ -17,17 +17,32 @@ Tests for utility functions in opentelemetry.instrumentation.agentscope.utils """ +from types import SimpleNamespace + from agentscope.message import Msg, ToolResultBlock from agentscope.tracing._converter import ( _convert_block_to_part as _convert_block_to_part_framework, ) +from opentelemetry import baggage +from opentelemetry import context as otel_context +from opentelemetry.instrumentation.agentscope import utils as utils_module from opentelemetry.instrumentation.agentscope.utils import ( _convert_block_to_part as _convert_block_to_part_local, ) from opentelemetry.instrumentation.agentscope.utils import ( + apply_entry_baggage_identity, convert_agentscope_messages_to_genai_format, + create_agent_invocation, + create_embedding_invocation, + create_llm_invocation, + entry_baggage_identity_attributes, +) +from opentelemetry.util.genai.extended_semconv.gen_ai_extended_attributes import ( + GEN_AI_SESSION_ID, + GEN_AI_USER_ID, ) +from opentelemetry.util.genai.extended_types import ReactStepInvocation from opentelemetry.util.genai.types import ToolCallResponse @@ -103,3 +118,106 @@ def test_convert_with_framework_converter(self): part_obj = converted[0].parts[0] assert isinstance(part_obj, ToolCallResponse) assert part_obj.response == "framework output" + + def test_create_agent_invocation_prefers_entry_baggage_identity( + self, monkeypatch + ): + monkeypatch.setattr( + utils_module._config, "run_id", "agentscope-run-id" + ) + ctx = baggage.set_baggage(GEN_AI_SESSION_ID, "entry-session") + ctx = baggage.set_baggage(GEN_AI_USER_ID, "entry-user", ctx) + token = otel_context.attach(ctx) + try: + invocation = create_agent_invocation( + SimpleNamespace( + model=None, + name="TestAgent", + id="agent-id", + sys_prompt=None, + ), + tuple(), + {}, + ) + finally: + otel_context.detach(token) + + assert invocation.conversation_id == "entry-session" + assert invocation.attributes[GEN_AI_SESSION_ID] == "entry-session" + assert invocation.attributes[GEN_AI_USER_ID] == "entry-user" + + def test_entry_baggage_identity_attributes(self): + ctx = baggage.set_baggage(GEN_AI_SESSION_ID, "entry-session") + ctx = baggage.set_baggage(GEN_AI_USER_ID, "entry-user", ctx) + token = otel_context.attach(ctx) + try: + attributes = entry_baggage_identity_attributes() + finally: + otel_context.detach(token) + + assert attributes == { + GEN_AI_SESSION_ID: "entry-session", + GEN_AI_USER_ID: "entry-user", + } + + def test_create_agent_invocation_falls_back_to_agentscope_run_id( + self, monkeypatch + ): + monkeypatch.setattr( + utils_module._config, "run_id", "agentscope-run-id" + ) + + invocation = create_agent_invocation( + SimpleNamespace( + model=None, + name="TestAgent", + id="agent-id", + sys_prompt=None, + ), + tuple(), + {}, + ) + + assert invocation.conversation_id == "agentscope-run-id" + assert GEN_AI_SESSION_ID not in invocation.attributes + assert GEN_AI_USER_ID not in invocation.attributes + + def test_model_invocations_copy_entry_baggage_identity(self): + ctx = baggage.set_baggage(GEN_AI_SESSION_ID, "entry-session") + ctx = baggage.set_baggage(GEN_AI_USER_ID, "entry-user", ctx) + token = otel_context.attach(ctx) + try: + llm_invocation = create_llm_invocation( + SimpleNamespace(model_name="qwen-max"), + tuple(), + {}, + ) + embedding_invocation = create_embedding_invocation( + SimpleNamespace(model_name="text-embedding-v4"), + tuple(), + {}, + ) + finally: + otel_context.detach(token) + + assert llm_invocation.conversation_id == "entry-session" + assert llm_invocation.attributes[GEN_AI_SESSION_ID] == "entry-session" + assert llm_invocation.attributes[GEN_AI_USER_ID] == "entry-user" + assert ( + embedding_invocation.attributes[GEN_AI_SESSION_ID] + == "entry-session" + ) + assert embedding_invocation.attributes[GEN_AI_USER_ID] == "entry-user" + + def test_react_step_invocation_copies_entry_baggage_identity(self): + ctx = baggage.set_baggage(GEN_AI_SESSION_ID, "entry-session") + ctx = baggage.set_baggage(GEN_AI_USER_ID, "entry-user", ctx) + token = otel_context.attach(ctx) + try: + invocation = ReactStepInvocation(round=1) + apply_entry_baggage_identity(invocation) + finally: + otel_context.detach(token) + + assert invocation.attributes[GEN_AI_SESSION_ID] == "entry-session" + assert invocation.attributes[GEN_AI_USER_ID] == "entry-user" diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/internal/_tracer.py b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/internal/_tracer.py index 830159f5c..6ec443590 100644 --- a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/internal/_tracer.py +++ b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/internal/_tracer.py @@ -270,7 +270,7 @@ def _handle_llm_start(self, run: Run) -> None: rd = _RunData( run_kind="llm", span=invocation.span, - context=set_span_in_context(invocation.span) + context=otel_context.get_current() if invocation.span else None, invocation=invocation, @@ -413,9 +413,7 @@ def _start_agent(self, run: Run) -> None: rd = _RunData( run_kind="agent", span=invocation.span, - context=set_span_in_context(invocation.span) - if invocation.span - else None, + context=otel_context.get_current() if invocation.span else None, invocation=invocation, is_langgraph_react=_has_langgraph_react_metadata(run), ) @@ -437,7 +435,12 @@ def _start_chain(self, run: Run) -> None: span.set_attribute(INPUT_VALUE, _safe_json(inputs)) # Attach chain span context so non-LangChain children nest correctly. - ctx = set_span_in_context(span) + current_context = ( + parent_ctx + if parent_ctx is not None + else otel_context.get_current() + ) + ctx = set_span_in_context(span, current_context) token = otel_context.attach(ctx) # Propagate inside_langgraph_react from parent so that @@ -576,7 +579,7 @@ def _on_tool_start(self, run: Run) -> None: rd = _RunData( run_kind="tool", span=invocation.span, - context=set_span_in_context(invocation.span) + context=otel_context.get_current() if invocation.span else None, invocation=invocation, @@ -634,7 +637,7 @@ def _on_retriever_start(self, run: Run) -> None: rd = _RunData( run_kind="retriever", span=invocation.span, - context=set_span_in_context(invocation.span) + context=otel_context.get_current() if invocation.span else None, invocation=invocation, @@ -724,7 +727,7 @@ def _enter_react_step(self, agent_run_id: UUID) -> None: self._handler.start_react_step(inv, context=agent_rd.original_context) step_ctx = ( - set_span_in_context(inv.span) + otel_context.get_current() if inv.span else agent_rd.original_context ) diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_agent_spans.py b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_agent_spans.py index 00da288db..1161cd9dc 100644 --- a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_agent_spans.py +++ b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_agent_spans.py @@ -14,17 +14,41 @@ """Tests for Agent span creation — verifying AGENT_RUN_NAMES detection.""" +from uuid import uuid4 + +from opentelemetry.instrumentation.langchain.internal._tracer import ( + LoongsuiteTracer, +) from opentelemetry.instrumentation.langchain.internal._utils import ( AGENT_RUN_NAMES, _is_agent_run, ) +from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAI, +) +from opentelemetry.util.genai.extended_handler import ExtendedTelemetryHandler class _FakeRun: """Minimal stub that looks like a langchain Run for unit tests.""" - def __init__(self, name: str): + def __init__( + self, + name: str, + parent_run_id=None, + inputs=None, + outputs=None, + extra=None, + ): + self.id = uuid4() self.name = name + self.parent_run_id = parent_run_id + self.inputs = inputs or {} + self.outputs = outputs or {} + self.extra = extra or {} + self.metadata = {} + self.serialized = {} + self.error = None class TestAgentDetection: @@ -51,3 +75,50 @@ def test_none_name_not_detected(self): def test_agent_run_names_immutable(self): assert isinstance(AGENT_RUN_NAMES, frozenset) + + +def test_agent_context_colors_child_llm_and_tool_spans( + tracer_provider, span_exporter +): + handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider) + tracer = LoongsuiteTracer( + handler=handler, + tracer_provider=tracer_provider, + ) + + agent_run = _FakeRun( + "AgentExecutor", + inputs={"input": "plan a search"}, + ) + tracer._start_agent(agent_run) + + llm_run = _FakeRun( + "ChatOpenAI", + parent_run_id=agent_run.id, + inputs={"prompts": ["plan a search"]}, + extra={"invocation_params": {"model_name": "gpt-4o-mini"}}, + ) + tracer._handle_llm_start(llm_run) + llm_run.outputs = {"generations": [[{"text": "call search"}]]} + tracer._on_llm_end(llm_run) + + tool_run = _FakeRun( + "search", + parent_run_id=agent_run.id, + inputs={"input": "query"}, + outputs={"output": "result"}, + ) + tracer._on_tool_start(tool_run) + tracer._on_tool_end(tool_run) + + agent_run.outputs = {"output": "done"} + tracer._on_chain_end(agent_run) + + spans = span_exporter.get_finished_spans() + llm_span = next(span for span in spans if span.name == "chat gpt-4o-mini") + tool_span = next( + span for span in spans if span.name == "execute_tool search" + ) + + assert llm_span.attributes[GenAI.GEN_AI_AGENT_NAME] == "AgentExecutor" + assert tool_span.attributes[GenAI.GEN_AI_AGENT_NAME] == "AgentExecutor" diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-qwenpaw/README.md b/instrumentation-loongsuite/loongsuite-instrumentation-qwenpaw/README.md index 058a0a676..5ba2eaa47 100644 --- a/instrumentation-loongsuite/loongsuite-instrumentation-qwenpaw/README.md +++ b/instrumentation-loongsuite/loongsuite-instrumentation-qwenpaw/README.md @@ -56,6 +56,18 @@ put `"LOONGSUITE_PYTHON_SITE_BOOTSTRAP": "true"` in `bootstrap-config.json` (see below); environment variables take **precedence** over the file for any key that is already set in the process. +QwenPaw is an interactive app, so stdout is user-visible. If you do not want the +generic Site-bootstrap success line in QwenPaw stdout, also set: + +```bash +export LOONGSUITE_PYTHON_SITE_BOOTSTRAP_LOG_SUCCESS=False +export LOONGSUITE_PYTHON_SITE_BOOTSTRAP_STATUS_FILE=/tmp/qwenpaw-loongsuite-bootstrap.json +``` + +The status file is an optional local confirmation that the bootstrap hook ran; +the real access check is still whether the configured backend receives QwenPaw +entry / AgentScope child spans after a user turn. + **2.4 — Configure export via `~/.loongsuite/bootstrap-config.json`** Create the directory and file if needed. The JSON root must be an object; string @@ -84,8 +96,9 @@ Example for quick local debugging with **console** exporters: } ``` -After a successful run you should see a line on stdout such as: -`loongsuite-site-bootstrap: started successfully (OpenTelemetry auto-instrumentation initialized).` +By default, a successful run prints a Site-bootstrap success line to stdout. +When `LOONGSUITE_PYTHON_SITE_BOOTSTRAP_LOG_SUCCESS=False`, use the optional +status file above or verify the generated trace in your backend instead. Do not start Python with `python -S` (that disables `site` and `.pth` processing). > **Beta / scope:** With the hook enabled, **every** Python process in that @@ -137,3 +150,7 @@ call inside the agent. Calls to models, tools, and other AgentScope primitives are **not** duplicated here: use AgentScope (and your existing model client) instrumentations alongside this package so they appear as child spans under this entry when configured. +When AgentScope spans run under a QwenPaw Entry span, the QwenPaw +`gen_ai.session.id` / `gen_ai.user.id` values are propagated through +OpenTelemetry baggage so downstream AgentScope LLM, agent, embedding, and tool +spans carry the same request identity. diff --git a/loongsuite-site-bootstrap/README.md b/loongsuite-site-bootstrap/README.md index 5a6ecd74f..8b7fcc9d9 100644 --- a/loongsuite-site-bootstrap/README.md +++ b/loongsuite-site-bootstrap/README.md @@ -60,12 +60,40 @@ export LOONGSUITE_PYTHON_SITE_BOOTSTRAP=True 从而使用 [`loongsuite-distro`](../loongsuite-distro) 中的 `LoongSuiteDistro` / `LoongSuiteConfigurator`(与 `loongsuite-instrument` + `OTEL_PYTHON_DISTRO=loongsuite` 一致)。上述两项仍使用 **`setdefault`**,在 JSON 补齐之后执行;若环境或 JSON 已为同名变量赋值,则保持已有取值。 +## 成功提示与静默模式 + +默认情况下,为保持兼容,`initialize()` 成功后会向 **stdout** 打印一行: + +```text +loongsuite-site-bootstrap: started successfully (OpenTelemetry auto-instrumentation initialized). +``` + +如果应用把 stdout 作为协议输出、交互终端或前端日志流,可关闭这行成功提示: + +```bash +export LOONGSUITE_PYTHON_SITE_BOOTSTRAP_LOG_SUCCESS=False +``` + +关闭成功提示不影响失败日志:自动注入失败时仍会通过 bootstrap logger 记录错误。若需要无 stdout 的成功确认,可让 bootstrap 写状态文件: + +```bash +export LOONGSUITE_PYTHON_SITE_BOOTSTRAP_STATUS_FILE=/tmp/loongsuite-site-bootstrap-status.json +``` + +成功时文件内容类似: + +```json +{"initialized":true,"pid":12345,"version":"0.6.0"} +``` + +成功后 bootstrap 还会在当前进程内设置 `LOONGSUITE_PYTHON_SITE_BOOTSTRAP_STARTED=true`,也可以通过 `loongsuite_site_bootstrap.is_initialized()` 读取当前 bootstrap 状态。真正的业务接入是否成功仍应以导出端是否收到对应 trace / metrics 为准。 + ## 行为说明 - 安装后 wheel 会在 `site-packages` 根目录释放 `loongsuite-site-bootstrap.pth`,其中含一行 `import loongsuite_site_bootstrap`,依赖 CPython `site` 对 `.pth` 中 `import` 行的标准行为。 - 不使用 `python -S`(禁用 `site`)时才会生效。 - 作用范围是**当前 Python 环境**内所有启用了本 bootstrap 的进程,不仅是某一应用入口。 -- 在 **`LOONGSUITE_PYTHON_SITE_BOOTSTRAP` 已开启且 `initialize()` 成功结束**后,会向 **stdout** 打印一行英文:`loongsuite-site-bootstrap: started successfully (OpenTelemetry auto-instrumentation initialized).`(本包自带一个仅绑定在 `loongsuite_site_bootstrap` logger 上的 `StreamHandler`,不依赖应用是否已配置 `logging`。) +- 本包自带一个仅绑定在 `loongsuite_site_bootstrap` logger 上的 `StreamHandler`,不依赖应用是否已配置 `logging`。 ## 卸载 diff --git a/loongsuite-site-bootstrap/src/loongsuite_site_bootstrap/__init__.py b/loongsuite-site-bootstrap/src/loongsuite_site_bootstrap/__init__.py index e8b4dae40..4dc7eb41d 100644 --- a/loongsuite-site-bootstrap/src/loongsuite_site_bootstrap/__init__.py +++ b/loongsuite-site-bootstrap/src/loongsuite_site_bootstrap/__init__.py @@ -43,7 +43,14 @@ from loongsuite_site_bootstrap.version import __version__ LOONGSUITE_PYTHON_SITE_BOOTSTRAP = "LOONGSUITE_PYTHON_SITE_BOOTSTRAP" +LOONGSUITE_PYTHON_SITE_BOOTSTRAP_LOG_SUCCESS = ( + "LOONGSUITE_PYTHON_SITE_BOOTSTRAP_LOG_SUCCESS" +) +LOONGSUITE_PYTHON_SITE_BOOTSTRAP_STATUS_FILE = ( + "LOONGSUITE_PYTHON_SITE_BOOTSTRAP_STATUS_FILE" +) _LOGGER: logging.Logger = logging.getLogger(__name__) +_INITIALIZED = False def _configure_bootstrap_logging() -> None: @@ -77,6 +84,46 @@ def _is_truthy_string(val: str) -> bool: return val.strip().lower() == "true" +def _is_falsey_string(val: str) -> bool: + """Return True for common explicit-off strings.""" + return val.strip().lower() in {"false", "0", "no", "off"} + + +def _should_log_success() -> bool: + val = os.environ.get(LOONGSUITE_PYTHON_SITE_BOOTSTRAP_LOG_SUCCESS) + if val is None: + return True + return not _is_falsey_string(val) + + +def _write_status_file(initialized: bool, error: str | None = None) -> None: + path_value = os.environ.get(LOONGSUITE_PYTHON_SITE_BOOTSTRAP_STATUS_FILE) + if not path_value: + return + path = Path(path_value).expanduser() + payload: dict[str, object] = { + "initialized": initialized, + "pid": os.getpid(), + "version": __version__, + } + if error: + payload["error"] = error + try: + path.parent.mkdir(parents=True, exist_ok=True) + tmp_path = path.with_name(f".{path.name}.{os.getpid()}.tmp") + tmp_path.write_text( + json.dumps(payload, separators=(",", ":")) + "\n", + encoding="utf-8", + ) + os.replace(tmp_path, path) + except Exception: + _LOGGER.debug( + "loongsuite-site-bootstrap: failed to write status file %s", + path, + exc_info=True, + ) + + def _read_bootstrap_config_file() -> dict[str, str] | None: path = Path.home() / ".loongsuite" / "bootstrap-config.json" if not path.is_file(): @@ -164,6 +211,7 @@ def _run_bootstrap_if_enabled() -> None: def _run_auto_instrumentation() -> None: + global _INITIALIZED # noqa: PLW0603 # Align with loongsuite-distro + opentelemetry-instrument / sitecustomize os.environ.setdefault("OTEL_PYTHON_DISTRO", "loongsuite") os.environ.setdefault("OTEL_PYTHON_CONFIGURATOR", "loongsuite") @@ -175,6 +223,8 @@ def _run_auto_instrumentation() -> None: initialize() except Exception: + _INITIALIZED = False + _write_status_file(False, "OpenTelemetry auto-instrumentation failed") _LOGGER.exception( "loongsuite-site-bootstrap: OpenTelemetry auto-instrumentation failed " "(import or initialize); continuing without instrumentation. " @@ -182,12 +232,26 @@ def _run_auto_instrumentation() -> None: ) return - _LOGGER.info( - "loongsuite-site-bootstrap: started successfully " - "(OpenTelemetry auto-instrumentation initialized)." - ) + _INITIALIZED = True + os.environ["LOONGSUITE_PYTHON_SITE_BOOTSTRAP_STARTED"] = "true" + _write_status_file(True) + if _should_log_success(): + _LOGGER.info( + "loongsuite-site-bootstrap: started successfully " + "(OpenTelemetry auto-instrumentation initialized)." + ) + + +def is_initialized() -> bool: + return _INITIALIZED _run_bootstrap_if_enabled() -__all__ = ["LOONGSUITE_PYTHON_SITE_BOOTSTRAP", "__version__"] +__all__ = [ + "LOONGSUITE_PYTHON_SITE_BOOTSTRAP", + "LOONGSUITE_PYTHON_SITE_BOOTSTRAP_LOG_SUCCESS", + "LOONGSUITE_PYTHON_SITE_BOOTSTRAP_STATUS_FILE", + "__version__", + "is_initialized", +] diff --git a/loongsuite-site-bootstrap/tests/test_bootstrap_status.py b/loongsuite-site-bootstrap/tests/test_bootstrap_status.py new file mode 100644 index 000000000..9bbf53e7e --- /dev/null +++ b/loongsuite-site-bootstrap/tests/test_bootstrap_status.py @@ -0,0 +1,47 @@ +import importlib +import json +import sys + + +def _load_bootstrap(monkeypatch): + monkeypatch.setenv("LOONGSUITE_PYTHON_SITE_BOOTSTRAP", "false") + sys.modules.pop("loongsuite_site_bootstrap", None) + return importlib.import_module("loongsuite_site_bootstrap") + + +def test_write_status_file_uses_atomic_json_payload(tmp_path, monkeypatch): + bootstrap = _load_bootstrap(monkeypatch) + status_path = tmp_path / "loongsuite-site-bootstrap-status.json" + monkeypatch.setenv( + bootstrap.LOONGSUITE_PYTHON_SITE_BOOTSTRAP_STATUS_FILE, + str(status_path), + ) + + bootstrap._write_status_file(True) + + payload = json.loads(status_path.read_text(encoding="utf-8")) + assert payload == { + "initialized": True, + "pid": payload["pid"], + "version": bootstrap.__version__, + } + assert isinstance(payload["pid"], int) + assert list(tmp_path.glob(".*.tmp")) == [] + + +def test_should_log_success_defaults_on_and_accepts_common_falsey( + monkeypatch, +): + bootstrap = _load_bootstrap(monkeypatch) + monkeypatch.delenv( + bootstrap.LOONGSUITE_PYTHON_SITE_BOOTSTRAP_LOG_SUCCESS, + raising=False, + ) + assert bootstrap._should_log_success() is True + + for value in ("False", "0", "no", "off"): + monkeypatch.setenv( + bootstrap.LOONGSUITE_PYTHON_SITE_BOOTSTRAP_LOG_SUCCESS, + value, + ) + assert bootstrap._should_log_success() is False diff --git a/util/opentelemetry-util-genai/CHANGELOG-loongsuite.md b/util/opentelemetry-util-genai/CHANGELOG-loongsuite.md index fbb5d887f..87f72ff93 100644 --- a/util/opentelemetry-util-genai/CHANGELOG-loongsuite.md +++ b/util/opentelemetry-util-genai/CHANGELOG-loongsuite.md @@ -7,6 +7,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +### Added + +- Propagate agent name through the `gen_ai.agent.name` Baggage key during + `start_invoke_agent` and automatically apply it to nested GenAI child span + attributes, including LLM, embedding, tool, retrieval, rerank, memory, entry, + and ReAct step invocations. + ## Version 0.6.0 (2026-06-03) There are no changelog entries for this release. diff --git a/util/opentelemetry-util-genai/README-loongsuite.rst b/util/opentelemetry-util-genai/README-loongsuite.rst index 348540f29..30d5403fe 100644 --- a/util/opentelemetry-util-genai/README-loongsuite.rst +++ b/util/opentelemetry-util-genai/README-loongsuite.rst @@ -28,7 +28,10 @@ OpenTelemetry Util for GenAI - LoongSuite 扩展 本实现在上游 GenAI Util 能力之上提供扩展,主要包括: - **llm**:聊天/补全类调用;支持多模态消息的**外置存储与 URI 替换**(见第 4 节),减轻 Trace 体积。 -- **invoke_agent / create_agent**:Agent 调用与创建。 +- **invoke_agent / create_agent**:Agent 调用与创建;``invoke_agent`` 可将 + Agent 名称写入 Baggage key ``gen_ai.agent.name``,使其下游 LLM、 + 工具、检索、ReAct step 等 GenAI 子 Span 自动带上 + ``gen_ai.agent.name`` 属性。 - **embedding**:向量嵌入。 - **execute_tool**:工具/函数执行。 - 当工具执行对应某个 skill 的加载动作时,可额外写入 ``gen_ai.skill.*`` 语义属性。 diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/extended_handler.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/extended_handler.py index 2126acfc7..3e6cfee7e 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/extended_handler.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/extended_handler.py @@ -119,7 +119,12 @@ RerankInvocation, RetrievalInvocation, ) -from opentelemetry.util.genai.handler import TelemetryHandler, _safe_detach +from opentelemetry.util.genai.handler import ( + TelemetryHandler, + _current_context, + _inject_agent_name_from_baggage, + _safe_detach, +) from opentelemetry.util.genai.span_utils import _apply_error_attributes from opentelemetry.util.genai.types import Error, LLMInvocation @@ -257,8 +262,9 @@ def start_create_agent( # calculation using timeit.default_timer. invocation.monotonic_start_s = timeit.default_timer() invocation.span = span + current_context = _current_context(context) invocation.context_token = otel_context.attach( - set_span_in_context(span) + set_span_in_context(span, current_context) ) return invocation @@ -327,8 +333,10 @@ def start_embedding( # calculation using timeit.default_timer. invocation.monotonic_start_s = timeit.default_timer() invocation.span = span + current_context = _current_context(context) + _inject_agent_name_from_baggage(invocation, current_context) invocation.context_token = otel_context.attach( - set_span_in_context(span) + set_span_in_context(span, current_context) ) return invocation @@ -397,8 +405,10 @@ def start_execute_tool( # calculation using timeit.default_timer. invocation.monotonic_start_s = timeit.default_timer() invocation.span = span + current_context = _current_context(context) + _inject_agent_name_from_baggage(invocation, current_context) invocation.context_token = otel_context.attach( - set_span_in_context(span) + set_span_in_context(span, current_context) ) return invocation @@ -474,9 +484,15 @@ def start_invoke_agent( # calculation using timeit.default_timer. invocation.monotonic_start_s = timeit.default_timer() invocation.span = span - invocation.context_token = otel_context.attach( - set_span_in_context(span) - ) + current_context = _current_context(context) + ctx = set_span_in_context(span, current_context) + if invocation.agent_name: + ctx = baggage.set_baggage( + GenAI.GEN_AI_AGENT_NAME, + invocation.agent_name, + ctx, + ) + invocation.context_token = otel_context.attach(ctx) return invocation def stop_invoke_agent( @@ -567,8 +583,10 @@ def start_retrieval( # calculation using timeit.default_timer. invocation.monotonic_start_s = timeit.default_timer() invocation.span = span + current_context = _current_context(context) + _inject_agent_name_from_baggage(invocation, current_context) invocation.context_token = otel_context.attach( - set_span_in_context(span) + set_span_in_context(span, current_context) ) return invocation @@ -637,8 +655,10 @@ def start_rerank( # calculation using timeit.default_timer. invocation.monotonic_start_s = timeit.default_timer() invocation.span = span + current_context = _current_context(context) + _inject_agent_name_from_baggage(invocation, current_context) invocation.context_token = otel_context.attach( - set_span_in_context(span) + set_span_in_context(span, current_context) ) return invocation @@ -708,8 +728,10 @@ def start_memory( # calculation using timeit.default_timer. invocation.monotonic_start_s = timeit.default_timer() invocation.span = span + current_context = _current_context(context) + _inject_agent_name_from_baggage(invocation, current_context) invocation.context_token = otel_context.attach( - set_span_in_context(span) + set_span_in_context(span, current_context) ) return invocation @@ -790,7 +812,9 @@ def start_entry( invocation.monotonic_start_s = timeit.default_timer() invocation.span = span - ctx = set_span_in_context(span) + current_context = _current_context(context) + _inject_agent_name_from_baggage(invocation, current_context) + ctx = set_span_in_context(span, current_context) if invocation.session_id is not None: ctx = baggage.set_baggage( _GEN_AI_SESSION_ID, invocation.session_id, ctx @@ -865,8 +889,10 @@ def start_react_step( ) invocation.monotonic_start_s = timeit.default_timer() invocation.span = span + current_context = _current_context(context) + _inject_agent_name_from_baggage(invocation, current_context) invocation.context_token = otel_context.attach( - set_span_in_context(span) + set_span_in_context(span, current_context) ) return invocation diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py index 7621f8f3d..052d35c5b 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py @@ -64,8 +64,9 @@ import logging import timeit from contextlib import contextmanager -from typing import Iterator +from typing import Any, Iterator, Protocol +from opentelemetry import baggage from opentelemetry import context as otel_context from opentelemetry._logs import ( LoggerProvider, @@ -76,6 +77,9 @@ Context, ) from opentelemetry.metrics import MeterProvider, get_meter +from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAI, +) from opentelemetry.semconv.schemas import Schemas from opentelemetry.trace import ( Span, @@ -90,12 +94,22 @@ _apply_llm_finish_attributes, _maybe_emit_llm_event, ) -from opentelemetry.util.genai.types import Error, LLMInvocation +from opentelemetry.util.genai.types import ( + Error, + LLMInvocation, +) from opentelemetry.util.genai.version import __version__ # LoongSuite Extension logger = logging.getLogger(__name__) +# LoongSuite Extension +_AGENT_NAME_BAGGAGE_KEY = GenAI.GEN_AI_AGENT_NAME + + +class _InvocationWithAttributes(Protocol): + attributes: dict[str, Any] + # LoongSuite Extension def _safe_detach(token: object) -> None: @@ -116,6 +130,22 @@ def _safe_detach(token: object) -> None: ) +def _current_context(context: Context | None = None) -> Context: + if context is not None: + return context + return otel_context.get_current() + + +def _inject_agent_name_from_baggage( + invocation: _InvocationWithAttributes, context: Context +) -> None: + if GenAI.GEN_AI_AGENT_NAME in invocation.attributes: + return + agent_name = baggage.get_baggage(_AGENT_NAME_BAGGAGE_KEY, context=context) + if agent_name: + invocation.attributes[GenAI.GEN_AI_AGENT_NAME] = agent_name + + class TelemetryHandler: """ High-level handler managing GenAI invocation lifecycles and emitting @@ -165,18 +195,21 @@ def start_llm( context: Context | None = None, # LoongSuite Extension ) -> LLMInvocation: """Start an LLM invocation and create a pending span entry.""" + current_context = _current_context(context) + _inject_agent_name_from_baggage(invocation, current_context) + # Create a span and attach it as current; keep the token to detach later span = self._tracer.start_span( name=f"{invocation.operation_name} {invocation.request_model}", kind=SpanKind.CLIENT, - context=context, # LoongSuite Extension + context=current_context, # LoongSuite Extension ) # Record a monotonic start timestamp (seconds) for duration # calculation using timeit.default_timer. invocation.monotonic_start_s = timeit.default_timer() invocation.span = span invocation.context_token = otel_context.attach( - set_span_in_context(span) + set_span_in_context(span, current_context) ) return invocation diff --git a/util/opentelemetry-util-genai/tests/test_extended_handler.py b/util/opentelemetry-util-genai/tests/test_extended_handler.py index 5723a25fb..e279d9f43 100644 --- a/util/opentelemetry-util-genai/tests/test_extended_handler.py +++ b/util/opentelemetry-util-genai/tests/test_extended_handler.py @@ -104,6 +104,8 @@ Uri, ) +_AGENT_NAME_BAGGAGE_KEY = "gen_ai.agent.name" + def patch_env_vars( stability_mode, content_capturing=None, emit_event=None, **extra_env_vars @@ -632,6 +634,128 @@ def test_invoke_agent_manual_start_and_stop(self): ) # Note: total_tokens is not set when only input_tokens is available + def test_invoke_agent_propagates_agent_name_baggage(self): + invocation = InvokeAgentInvocation( + provider="test-provider", + agent_name="BaggageAgent", + ) + + self.telemetry_handler.start_invoke_agent(invocation) + try: + current_baggage = get_all_baggage() + self.assertEqual( + current_baggage.get(_AGENT_NAME_BAGGAGE_KEY), + "BaggageAgent", + ) + finally: + self.telemetry_handler.stop_invoke_agent(invocation) + + restored_baggage = get_all_baggage() + self.assertNotIn(_AGENT_NAME_BAGGAGE_KEY, restored_baggage) + + def test_nested_invoke_agent_baggage_overrides_and_restores(self): + parent_invocation = InvokeAgentInvocation( + provider="test-provider", + agent_name="ParentAgent", + ) + child_invocation = InvokeAgentInvocation( + provider="test-provider", + agent_name="ChildAgent", + ) + + self.telemetry_handler.start_invoke_agent(parent_invocation) + try: + self.assertEqual( + get_all_baggage().get(_AGENT_NAME_BAGGAGE_KEY), + "ParentAgent", + ) + + self.telemetry_handler.start_invoke_agent(child_invocation) + try: + self.assertEqual( + get_all_baggage().get(_AGENT_NAME_BAGGAGE_KEY), + "ChildAgent", + ) + finally: + self.telemetry_handler.stop_invoke_agent(child_invocation) + + self.assertEqual( + get_all_baggage().get(_AGENT_NAME_BAGGAGE_KEY), + "ParentAgent", + ) + finally: + self.telemetry_handler.stop_invoke_agent(parent_invocation) + + restored_baggage = get_all_baggage() + self.assertNotIn(_AGENT_NAME_BAGGAGE_KEY, restored_baggage) + + def test_agent_context_colors_llm_and_tool_spans(self): + agent_invocation = InvokeAgentInvocation( + provider="test-provider", + agent_name="PlannerAgent", + ) + + self.telemetry_handler.start_invoke_agent(agent_invocation) + try: + llm_invocation = LLMInvocation( + provider="openai", + request_model="gpt-4o-mini", + ) + self.telemetry_handler.start_llm(llm_invocation) + self.telemetry_handler.stop_llm(llm_invocation) + + tool_invocation = ExecuteToolInvocation(tool_name="search") + self.telemetry_handler.start_execute_tool(tool_invocation) + self.telemetry_handler.stop_execute_tool(tool_invocation) + finally: + self.telemetry_handler.stop_invoke_agent(agent_invocation) + + spans = self.span_exporter.get_finished_spans() + llm_span = next( + span for span in spans if span.name == "chat gpt-4o-mini" + ) + tool_span = next( + span for span in spans if span.name == "execute_tool search" + ) + self.assertEqual( + llm_span.attributes.get(GenAI.GEN_AI_AGENT_NAME), + "PlannerAgent", + ) + self.assertEqual( + tool_span.attributes.get(GenAI.GEN_AI_AGENT_NAME), + "PlannerAgent", + ) + + def test_explicit_agent_parent_context_colors_llm_span(self): + agent_invocation = InvokeAgentInvocation( + provider="test-provider", + agent_name="ExplicitParentAgent", + ) + + self.telemetry_handler.start_invoke_agent(agent_invocation) + try: + parent_context = context_api.get_current() + llm_invocation = LLMInvocation( + provider="openai", + request_model="gpt-4o-mini", + ) + self.telemetry_handler.start_llm( + llm_invocation, + context=parent_context, + ) + self.telemetry_handler.stop_llm(llm_invocation) + finally: + self.telemetry_handler.stop_invoke_agent(agent_invocation) + + spans = self.span_exporter.get_finished_spans() + llm_span = next( + span for span in spans if span.name == "chat gpt-4o-mini" + ) + self.assertEqual( + llm_span.attributes.get(GenAI.GEN_AI_AGENT_NAME), + "ExplicitParentAgent", + ) + def test_invoke_agent_error_handling(self): class AgentInvocationError(RuntimeError): pass