From d95497bd4a95efc69a0f95ed925a82e25fcebb5c Mon Sep 17 00:00:00 2001 From: sanjeed5 Date: Fri, 6 Mar 2026 00:25:09 +0530 Subject: [PATCH] feat(telemetry): emit system prompt on chat spans per GenAI semconv System prompt was missing from chat (model invoke) span events, so observability backends could not render the full conversation context on individual LLM calls. Legacy mode (v1.36): emits a gen_ai.system.message span event before the conversation message events. Latest experimental mode: emits gen_ai.system_instructions on the gen_ai.client.inference.operation.details event, keeping it separate from gen_ai.input.messages per the spec guidance that system instructions provided separately from chat history should use gen_ai.system_instructions. Backwards compatible: new optional params default to None. --- src/strands/event_loop/event_loop.py | 2 + src/strands/telemetry/tracer.py | 43 +++ tests/strands/event_loop/test_event_loop.py | 3 + tests/strands/telemetry/test_tracer.py | 372 +++++++++++++------- 4 files changed, 285 insertions(+), 135 deletions(-) diff --git a/src/strands/event_loop/event_loop.py b/src/strands/event_loop/event_loop.py index 3113ddb79..52b29b303 100644 --- a/src/strands/event_loop/event_loop.py +++ b/src/strands/event_loop/event_loop.py @@ -313,6 +313,8 @@ async def _handle_model_execution( parent_span=cycle_span, model_id=model_id, custom_trace_attributes=agent.trace_attributes, + system_prompt=agent.system_prompt, + system_prompt_content=agent._system_prompt_content, ) with trace_api.use_span(model_invoke_span, end_on_exit=True): await agent.hooks.invoke_callbacks_async( diff --git a/src/strands/telemetry/tracer.py b/src/strands/telemetry/tracer.py index 80fb86c40..a88109e6b 100644 --- a/src/strands/telemetry/tracer.py +++ b/src/strands/telemetry/tracer.py @@ -284,6 +284,8 @@ def start_model_invoke_span( parent_span: Span | None = None, model_id: str | None = None, custom_trace_attributes: Mapping[str, AttributeValue] | None = None, + system_prompt: str | None = None, + system_prompt_content: list | None = None, **kwargs: Any, ) -> Span: """Start a new span for a model invocation. @@ -293,6 +295,8 @@ def start_model_invoke_span( parent_span: Optional parent span to link this span to. model_id: Optional identifier for the model being invoked. custom_trace_attributes: Optional mapping of custom trace attributes to include in the span. + system_prompt: Optional system prompt string provided to the model. + system_prompt_content: Optional list of system prompt content blocks. **kwargs: Additional attributes to add to the span. Returns: @@ -310,6 +314,7 @@ def start_model_invoke_span( attributes.update({k: v for k, v in kwargs.items() if isinstance(v, (str, int, float, bool))}) span = self._start_span("chat", parent_span, attributes=attributes, span_kind=trace_api.SpanKind.INTERNAL) + self._add_system_prompt_event(span, system_prompt, system_prompt_content) self._add_event_messages(span, messages) return span @@ -812,6 +817,44 @@ def _get_common_attributes( ) return dict(common_attributes) + def _add_system_prompt_event( + self, + span: Span, + system_prompt: str | None = None, + system_prompt_content: list | None = None, + ) -> None: + """Emit system prompt as a span event per OTel GenAI semantic conventions. + + In legacy mode (v1.36), emits a ``gen_ai.system.message`` event. + In latest experimental mode, emits ``gen_ai.system_instructions`` on the + ``gen_ai.client.inference.operation.details`` event, since Strands passes + system instructions separately from chat history. + + Args: + span: The span to add the event to. + system_prompt: Optional system prompt string. + system_prompt_content: Optional list of system prompt content blocks. + """ + if not system_prompt and not system_prompt_content: + return + + content_blocks = system_prompt_content if system_prompt_content else [{"text": system_prompt}] + + if self.use_latest_genai_conventions: + parts = self._map_content_blocks_to_otel_parts(content_blocks) + self._add_event( + span, + "gen_ai.client.inference.operation.details", + {"gen_ai.system_instructions": serialize(parts)}, + to_span_attributes=self.is_langfuse, + ) + else: + self._add_event( + span, + "gen_ai.system.message", + {"content": serialize(content_blocks)}, + ) + def _add_event_messages(self, span: Span, messages: Messages) -> None: """Adds messages as event to the provided span based on the current GenAI conventions. diff --git a/tests/strands/event_loop/test_event_loop.py b/tests/strands/event_loop/test_event_loop.py index 8c6155e20..226d42440 100644 --- a/tests/strands/event_loop/test_event_loop.py +++ b/tests/strands/event_loop/test_event_loop.py @@ -545,6 +545,9 @@ async def test_event_loop_cycle_creates_spans( mock_get_tracer.assert_called_once() mock_tracer.start_event_loop_cycle_span.assert_called_once() mock_tracer.start_model_invoke_span.assert_called_once() + call_kwargs = mock_tracer.start_model_invoke_span.call_args[1] + assert call_kwargs["system_prompt"] == agent.system_prompt + assert call_kwargs["system_prompt_content"] == agent._system_prompt_content mock_tracer.end_model_invoke_span.assert_called_once() mock_tracer.end_event_loop_cycle_span.assert_called_once() diff --git a/tests/strands/telemetry/test_tracer.py b/tests/strands/telemetry/test_tracer.py index da7f010e2..9ba4dbe7b 100644 --- a/tests/strands/telemetry/test_tracer.py +++ b/tests/strands/telemetry/test_tracer.py @@ -140,25 +140,37 @@ def test_start_model_invoke_span(mock_tracer): messages = [{"role": "user", "content": [{"text": "Hello"}]}] model_id = "test-model" custom_attrs = {"custom_key": "custom_value", "user_id": "12345"} + system_prompt = "You are a helpful assistant" span = tracer.start_model_invoke_span( - messages=messages, agent_name="TestAgent", model_id=model_id, custom_trace_attributes=custom_attrs + messages=messages, + agent_name="TestAgent", + model_id=model_id, + custom_trace_attributes=custom_attrs, + system_prompt=system_prompt, ) mock_tracer.start_span.assert_called_once() assert mock_tracer.start_span.call_args[1]["name"] == "chat" assert mock_tracer.start_span.call_args[1]["kind"] == SpanKind.INTERNAL - mock_span.set_attributes.assert_called_once_with({ - "gen_ai.operation.name": "chat", - "gen_ai.system": "strands-agents", - "custom_key": "custom_value", - "user_id": "12345", - "gen_ai.request.model": model_id, - "agent_name": "TestAgent", - }) - mock_span.add_event.assert_called_with( - "gen_ai.user.message", attributes={"content": json.dumps(messages[0]["content"])} + mock_span.set_attributes.assert_called_once_with( + { + "gen_ai.operation.name": "chat", + "gen_ai.system": "strands-agents", + "custom_key": "custom_value", + "user_id": "12345", + "gen_ai.request.model": model_id, + "agent_name": "TestAgent", + } + ) + + calls = mock_span.add_event.call_args_list + assert len(calls) == 2 + assert calls[0] == mock.call( + "gen_ai.system.message", + attributes={"content": serialize([{"text": system_prompt}])}, ) + assert calls[1] == mock.call("gen_ai.user.message", attributes={"content": json.dumps(messages[0]["content"])}) assert span is not None @@ -182,20 +194,34 @@ def test_start_model_invoke_span_latest_conventions(mock_tracer, monkeypatch): }, ] model_id = "test-model" + system_prompt = "You are a calculator assistant" - span = tracer.start_model_invoke_span(messages=messages, agent_name="TestAgent", model_id=model_id) + span = tracer.start_model_invoke_span( + messages=messages, agent_name="TestAgent", model_id=model_id, system_prompt=system_prompt + ) mock_tracer.start_span.assert_called_once() assert mock_tracer.start_span.call_args[1]["name"] == "chat" assert mock_tracer.start_span.call_args[1]["kind"] == SpanKind.INTERNAL - - mock_span.set_attributes.assert_called_once_with({ - "gen_ai.operation.name": "chat", - "gen_ai.provider.name": "strands-agents", - "gen_ai.request.model": model_id, - "agent_name": "TestAgent", - }) - mock_span.add_event.assert_called_with( + + mock_span.set_attributes.assert_called_once_with( + { + "gen_ai.operation.name": "chat", + "gen_ai.provider.name": "strands-agents", + "gen_ai.request.model": model_id, + "agent_name": "TestAgent", + } + ) + + calls = mock_span.add_event.call_args_list + assert len(calls) == 2 + assert calls[0] == mock.call( + "gen_ai.client.inference.operation.details", + attributes={ + "gen_ai.system_instructions": serialize([{"type": "text", "content": system_prompt}]), + }, + ) + assert calls[1] == mock.call( "gen_ai.client.inference.operation.details", attributes={ "gen_ai.input.messages": serialize( @@ -222,6 +248,54 @@ def test_start_model_invoke_span_latest_conventions(mock_tracer, monkeypatch): assert span is not None +def test_start_model_invoke_span_without_system_prompt(mock_tracer): + """Test that no system prompt event is emitted when system_prompt is None.""" + with mock.patch("strands.telemetry.tracer.trace_api.get_tracer", return_value=mock_tracer): + tracer = Tracer() + tracer.tracer = mock_tracer + + mock_span = mock.MagicMock() + mock_tracer.start_span.return_value = mock_span + + messages = [{"role": "user", "content": [{"text": "Hello"}]}] + + span = tracer.start_model_invoke_span(messages=messages, model_id="test-model") + + assert mock_span.add_event.call_count == 1 + mock_span.add_event.assert_called_once_with( + "gen_ai.user.message", attributes={"content": json.dumps(messages[0]["content"])} + ) + assert span is not None + + +def test_start_model_invoke_span_with_system_prompt_content(mock_tracer): + """Test that system_prompt_content takes priority over system_prompt string.""" + with mock.patch("strands.telemetry.tracer.trace_api.get_tracer", return_value=mock_tracer): + tracer = Tracer() + tracer.tracer = mock_tracer + + mock_span = mock.MagicMock() + mock_tracer.start_span.return_value = mock_span + + messages = [{"role": "user", "content": [{"text": "Hello"}]}] + system_prompt_content = [{"text": "You are helpful"}, {"text": "Be concise"}] + + span = tracer.start_model_invoke_span( + messages=messages, + model_id="test-model", + system_prompt="ignored string", + system_prompt_content=system_prompt_content, + ) + + calls = mock_span.add_event.call_args_list + assert len(calls) == 2 + assert calls[0] == mock.call( + "gen_ai.system.message", + attributes={"content": serialize(system_prompt_content)}, + ) + assert span is not None + + def test_end_model_invoke_span(mock_span): """Test ending a model invoke span.""" tracer = Tracer() @@ -232,15 +306,17 @@ def test_end_model_invoke_span(mock_span): tracer.end_model_invoke_span(mock_span, message, usage, metrics, stop_reason) - mock_span.set_attributes.assert_called_once_with({ - "gen_ai.usage.prompt_tokens": 10, - "gen_ai.usage.input_tokens": 10, - "gen_ai.usage.completion_tokens": 20, - "gen_ai.usage.output_tokens": 20, - "gen_ai.usage.total_tokens": 30, - "gen_ai.server.time_to_first_token": 10, - "gen_ai.server.request.duration": 20, - }) + mock_span.set_attributes.assert_called_once_with( + { + "gen_ai.usage.prompt_tokens": 10, + "gen_ai.usage.input_tokens": 10, + "gen_ai.usage.completion_tokens": 20, + "gen_ai.usage.output_tokens": 20, + "gen_ai.usage.total_tokens": 30, + "gen_ai.server.time_to_first_token": 10, + "gen_ai.server.request.duration": 20, + } + ) mock_span.add_event.assert_called_with( "gen_ai.choice", attributes={"message": json.dumps(message["content"]), "finish_reason": "end_turn"}, @@ -259,15 +335,17 @@ def test_end_model_invoke_span_latest_conventions(mock_span, monkeypatch): tracer.end_model_invoke_span(mock_span, message, usage, metrics, stop_reason) - mock_span.set_attributes.assert_called_once_with({ - "gen_ai.usage.prompt_tokens": 10, - "gen_ai.usage.input_tokens": 10, - "gen_ai.usage.completion_tokens": 20, - "gen_ai.usage.output_tokens": 20, - "gen_ai.usage.total_tokens": 30, - "gen_ai.server.time_to_first_token": 10, - "gen_ai.server.request.duration": 20, - }) + mock_span.set_attributes.assert_called_once_with( + { + "gen_ai.usage.prompt_tokens": 10, + "gen_ai.usage.input_tokens": 10, + "gen_ai.usage.completion_tokens": 20, + "gen_ai.usage.output_tokens": 20, + "gen_ai.usage.total_tokens": 30, + "gen_ai.server.time_to_first_token": 10, + "gen_ai.server.request.duration": 20, + } + ) mock_span.add_event.assert_called_with( "gen_ai.client.inference.operation.details", attributes={ @@ -300,15 +378,17 @@ def test_start_tool_call_span(mock_tracer): mock_tracer.start_span.assert_called_once() assert mock_tracer.start_span.call_args[1]["name"] == "execute_tool test-tool" - - mock_span.set_attributes.assert_called_once_with({ - "gen_ai.tool.name": "test-tool", - "gen_ai.system": "strands-agents", - "gen_ai.operation.name": "execute_tool", - "gen_ai.tool.call.id": "123", - "session_id": "abc123", - "environment": "production", - }) + + mock_span.set_attributes.assert_called_once_with( + { + "gen_ai.tool.name": "test-tool", + "gen_ai.system": "strands-agents", + "gen_ai.operation.name": "execute_tool", + "gen_ai.tool.call.id": "123", + "session_id": "abc123", + "environment": "production", + } + ) mock_span.add_event.assert_any_call( "gen_ai.tool.message", attributes={"role": "tool", "content": json.dumps({"param": "value"}), "id": "123"} ) @@ -331,13 +411,15 @@ def test_start_tool_call_span_latest_conventions(mock_tracer, monkeypatch): mock_tracer.start_span.assert_called_once() assert mock_tracer.start_span.call_args[1]["name"] == "execute_tool test-tool" - - mock_span.set_attributes.assert_called_once_with({ - "gen_ai.tool.name": "test-tool", - "gen_ai.provider.name": "strands-agents", - "gen_ai.operation.name": "execute_tool", - "gen_ai.tool.call.id": "123", - }) + + mock_span.set_attributes.assert_called_once_with( + { + "gen_ai.tool.name": "test-tool", + "gen_ai.provider.name": "strands-agents", + "gen_ai.operation.name": "execute_tool", + "gen_ai.tool.call.id": "123", + } + ) mock_span.add_event.assert_called_with( "gen_ai.client.inference.operation.details", attributes={ @@ -377,14 +459,16 @@ def test_start_swarm_call_span_with_string_task(mock_tracer): mock_tracer.start_span.assert_called_once() assert mock_tracer.start_span.call_args[1]["name"] == "invoke_swarm" - - mock_span.set_attributes.assert_called_once_with({ - "gen_ai.operation.name": "invoke_swarm", - "gen_ai.system": "strands-agents", - "gen_ai.agent.name": "swarm", - "workflow_id": "wf-789", - "priority": "high", - }) + + mock_span.set_attributes.assert_called_once_with( + { + "gen_ai.operation.name": "invoke_swarm", + "gen_ai.system": "strands-agents", + "gen_ai.agent.name": "swarm", + "workflow_id": "wf-789", + "priority": "high", + } + ) mock_span.add_event.assert_any_call("gen_ai.user.message", attributes={"content": "Design foo bar"}) assert span is not None @@ -404,12 +488,14 @@ def test_start_swarm_span_with_contentblock_task(mock_tracer): mock_tracer.start_span.assert_called_once() assert mock_tracer.start_span.call_args[1]["name"] == "invoke_swarm" - - mock_span.set_attributes.assert_called_once_with({ - "gen_ai.operation.name": "invoke_swarm", - "gen_ai.system": "strands-agents", - "gen_ai.agent.name": "swarm", - }) + + mock_span.set_attributes.assert_called_once_with( + { + "gen_ai.operation.name": "invoke_swarm", + "gen_ai.system": "strands-agents", + "gen_ai.agent.name": "swarm", + } + ) mock_span.add_event.assert_any_call( "gen_ai.user.message", attributes={"content": '[{"text": "Original Task: foo bar"}]'} ) @@ -460,12 +546,14 @@ def test_start_swarm_span_with_contentblock_task_latest_conventions(mock_tracer, mock_tracer.start_span.assert_called_once() assert mock_tracer.start_span.call_args[1]["name"] == "invoke_swarm" - - mock_span.set_attributes.assert_called_once_with({ - "gen_ai.operation.name": "invoke_swarm", - "gen_ai.provider.name": "strands-agents", - "gen_ai.agent.name": "swarm", - }) + + mock_span.set_attributes.assert_called_once_with( + { + "gen_ai.operation.name": "invoke_swarm", + "gen_ai.provider.name": "strands-agents", + "gen_ai.agent.name": "swarm", + } + ) mock_span.add_event.assert_any_call( "gen_ai.client.inference.operation.details", attributes={ @@ -528,13 +616,15 @@ def test_start_graph_call_span(mock_tracer): mock_tracer.start_span.assert_called_once() assert mock_tracer.start_span.call_args[1]["name"] == "execute_tool test-tool" - - mock_span.set_attributes.assert_called_once_with({ - "gen_ai.operation.name": "execute_tool", - "gen_ai.system": "strands-agents", - "gen_ai.tool.name": "test-tool", - "gen_ai.tool.call.id": "123", - }) + + mock_span.set_attributes.assert_called_once_with( + { + "gen_ai.operation.name": "execute_tool", + "gen_ai.system": "strands-agents", + "gen_ai.tool.name": "test-tool", + "gen_ai.tool.call.id": "123", + } + ) mock_span.add_event.assert_any_call( "gen_ai.tool.message", attributes={"role": "tool", "content": json.dumps({"param": "value"}), "id": "123"} ) @@ -608,12 +698,14 @@ def test_start_event_loop_cycle_span(mock_tracer): mock_tracer.start_span.assert_called_once() assert mock_tracer.start_span.call_args[1]["name"] == "execute_event_loop_cycle" - - mock_span.set_attributes.assert_called_once_with({ - "event_loop.cycle_id": "cycle-123", - "request_id": "req-456", - "trace_level": "debug", - }) + + mock_span.set_attributes.assert_called_once_with( + { + "event_loop.cycle_id": "cycle-123", + "request_id": "req-456", + "trace_level": "debug", + } + ) mock_span.add_event.assert_any_call( "gen_ai.user.message", attributes={"content": json.dumps([{"text": "Hello"}])} ) @@ -637,7 +729,7 @@ def test_start_event_loop_cycle_span_latest_conventions(mock_tracer, monkeypatch mock_tracer.start_span.assert_called_once() assert mock_tracer.start_span.call_args[1]["name"] == "execute_event_loop_cycle" - + mock_span.set_attributes.assert_called_once_with({"event_loop.cycle_id": "cycle-123"}) mock_span.add_event.assert_any_call( "gen_ai.client.inference.operation.details", @@ -731,14 +823,16 @@ def test_start_agent_span(mock_tracer): assert mock_tracer.start_span.call_args[1]["name"] == "invoke_agent WeatherAgent" assert mock_tracer.start_span.call_args[1]["kind"] == SpanKind.INTERNAL - mock_span.set_attributes.assert_called_once_with({ - "gen_ai.operation.name": "invoke_agent", - "gen_ai.system": "strands-agents", - "gen_ai.agent.name": "WeatherAgent", - "gen_ai.request.model": model_id, - "gen_ai.agent.tools": json.dumps(tools), - "custom_attr": "value", - }) + mock_span.set_attributes.assert_called_once_with( + { + "gen_ai.operation.name": "invoke_agent", + "gen_ai.system": "strands-agents", + "gen_ai.agent.name": "WeatherAgent", + "gen_ai.request.model": model_id, + "gen_ai.agent.tools": json.dumps(tools), + "custom_attr": "value", + } + ) mock_span.add_event.assert_any_call("gen_ai.user.message", attributes={"content": json.dumps(content)}) assert span is not None @@ -768,15 +862,17 @@ def test_start_agent_span_latest_conventions(mock_tracer, monkeypatch): mock_tracer.start_span.assert_called_once() assert mock_tracer.start_span.call_args[1]["name"] == "invoke_agent WeatherAgent" - - mock_span.set_attributes.assert_called_once_with({ - "gen_ai.operation.name": "invoke_agent", - "gen_ai.provider.name": "strands-agents", - "gen_ai.agent.name": "WeatherAgent", - "gen_ai.request.model": model_id, - "gen_ai.agent.tools": json.dumps(tools), - "custom_attr": "value", - }) + + mock_span.set_attributes.assert_called_once_with( + { + "gen_ai.operation.name": "invoke_agent", + "gen_ai.provider.name": "strands-agents", + "gen_ai.agent.name": "WeatherAgent", + "gen_ai.request.model": model_id, + "gen_ai.agent.tools": json.dumps(tools), + "custom_attr": "value", + } + ) mock_span.add_event.assert_any_call( "gen_ai.client.inference.operation.details", attributes={ @@ -919,17 +1015,19 @@ def test_end_model_invoke_span_with_cache_metrics(mock_span): tracer.end_model_invoke_span(mock_span, message, usage, metrics, stop_reason) - mock_span.set_attributes.assert_called_once_with({ - "gen_ai.usage.prompt_tokens": 10, - "gen_ai.usage.input_tokens": 10, - "gen_ai.usage.completion_tokens": 20, - "gen_ai.usage.output_tokens": 20, - "gen_ai.usage.total_tokens": 30, - "gen_ai.usage.cache_read_input_tokens": 5, - "gen_ai.usage.cache_write_input_tokens": 3, - "gen_ai.server.request.duration": 10, - "gen_ai.server.time_to_first_token": 5, - }) + mock_span.set_attributes.assert_called_once_with( + { + "gen_ai.usage.prompt_tokens": 10, + "gen_ai.usage.input_tokens": 10, + "gen_ai.usage.completion_tokens": 20, + "gen_ai.usage.output_tokens": 20, + "gen_ai.usage.total_tokens": 30, + "gen_ai.usage.cache_read_input_tokens": 5, + "gen_ai.usage.cache_write_input_tokens": 3, + "gen_ai.server.request.duration": 10, + "gen_ai.server.time_to_first_token": 5, + } + ) def test_end_agent_span_with_cache_metrics(mock_span): @@ -953,15 +1051,17 @@ def test_end_agent_span_with_cache_metrics(mock_span): tracer.end_agent_span(mock_span, mock_response) - mock_span.set_attributes.assert_called_once_with({ - "gen_ai.usage.prompt_tokens": 50, - "gen_ai.usage.input_tokens": 50, - "gen_ai.usage.completion_tokens": 100, - "gen_ai.usage.output_tokens": 100, - "gen_ai.usage.total_tokens": 150, - "gen_ai.usage.cache_read_input_tokens": 25, - "gen_ai.usage.cache_write_input_tokens": 10, - }) + mock_span.set_attributes.assert_called_once_with( + { + "gen_ai.usage.prompt_tokens": 50, + "gen_ai.usage.input_tokens": 50, + "gen_ai.usage.completion_tokens": 100, + "gen_ai.usage.output_tokens": 100, + "gen_ai.usage.total_tokens": 150, + "gen_ai.usage.cache_read_input_tokens": 25, + "gen_ai.usage.cache_write_input_tokens": 10, + } + ) mock_span.set_status.assert_called_once_with(StatusCode.OK) mock_span.end.assert_called_once() @@ -1519,18 +1619,20 @@ def test_end_model_invoke_span_langfuse_adds_attributes(mock_span, monkeypatch): } ] ) - + assert mock_span.set_attributes.call_count == 2 mock_span.set_attributes.assert_any_call({"gen_ai.output.messages": expected_output}) - mock_span.set_attributes.assert_any_call({ - "gen_ai.usage.prompt_tokens": 10, - "gen_ai.usage.input_tokens": 10, - "gen_ai.usage.completion_tokens": 20, - "gen_ai.usage.output_tokens": 20, - "gen_ai.usage.total_tokens": 30, - "gen_ai.server.time_to_first_token": 10, - "gen_ai.server.request.duration": 20, - }) + mock_span.set_attributes.assert_any_call( + { + "gen_ai.usage.prompt_tokens": 10, + "gen_ai.usage.input_tokens": 10, + "gen_ai.usage.completion_tokens": 20, + "gen_ai.usage.output_tokens": 20, + "gen_ai.usage.total_tokens": 30, + "gen_ai.server.time_to_first_token": 10, + "gen_ai.server.request.duration": 20, + } + ) mock_span.add_event.assert_called_with( "gen_ai.client.inference.operation.details",