diff --git a/src/instana/options.py b/src/instana/options.py index affaa266..d31bc456 100644 --- a/src/instana/options.py +++ b/src/instana/options.py @@ -1,4 +1,4 @@ -# (c) Copyright IBM Corp. 2021 +# (c) Copyright IBM Corp. 2021, 2025 # (c) Copyright Instana Inc. 2016 """ @@ -50,6 +50,10 @@ def __init__(self, **kwds: Dict[str, Any]) -> None: # enabled_spans lists all categories and types that should be enabled, preceding disabled_spans self.enabled_spans = [] + # Stack trace configuration - global defaults + self.stack_trace_level = "all" # Options: "all", "error", "none" + self.stack_trace_length = 30 # Default: 30, recommended range: 10-40 + self.set_trace_configurations() # Defaults @@ -122,6 +126,33 @@ def set_trace_configurations(self) -> None: ) self.set_disable_trace_configurations() + self.set_stack_trace_configurations() + + def set_stack_trace_configurations(self) -> None: + # Stack trace level configuration + if "INSTANA_STACK_TRACE" in os.environ: + level = os.environ["INSTANA_STACK_TRACE"].lower() + if level in ["all", "error", "none"]: + self.stack_trace_level = level + else: + logger.warning( + f"Invalid INSTANA_STACK_TRACE value: {level}. Must be 'all', 'error', or 'none'. Using default 'all'" + ) + + # Stack trace length configuration + if "INSTANA_STACK_TRACE_LENGTH" in os.environ: + try: + length = int(os.environ["INSTANA_STACK_TRACE_LENGTH"]) + if length >= 1: + self.stack_trace_length = length + else: + logger.warning( + "INSTANA_STACK_TRACE_LENGTH must be positive. Using default 30" + ) + except ValueError: + logger.warning( + "Invalid INSTANA_STACK_TRACE_LENGTH value. Must be an integer. Using default 30" + ) def set_disable_trace_configurations(self) -> None: disabled_spans = [] diff --git a/src/instana/span/span.py b/src/instana/span/span.py index f05a01f0..0319569f 100644 --- a/src/instana/span/span.py +++ b/src/instana/span/span.py @@ -1,4 +1,4 @@ -# (c) Copyright IBM Corp. 2021 +# (c) Copyright IBM Corp. 2021, 2025 # (c) Copyright Instana Inc. 2017 """ @@ -36,6 +36,7 @@ from instana.recorder import StanRecorder from instana.span.kind import HTTP_SPANS from instana.span.readable_span import Event, ReadableSpan +from instana.span.stack_trace import add_stack_trace_if_needed from instana.span_context import SpanContext @@ -197,6 +198,8 @@ def end(self, end_time: Optional[int] = None) -> None: self._end_time = end_time if end_time else time_ns() self._duration = self._end_time - self._start_time + add_stack_trace_if_needed(self) + self._span_processor.record_span(self._readable_span()) def mark_as_errored(self, attributes: types.Attributes = None) -> None: diff --git a/src/instana/span/stack_trace.py b/src/instana/span/stack_trace.py new file mode 100644 index 00000000..c3016fbf --- /dev/null +++ b/src/instana/span/stack_trace.py @@ -0,0 +1,156 @@ +# (c) Copyright IBM Corp. 2025 + +""" +Stack trace collection functionality for spans. + +This module provides utilities for capturing and filtering stack traces +for EXIT spans based on configuration settings. +""" + +import os +import re +import traceback +from typing import List, Optional, TYPE_CHECKING + +from instana.log import logger +from instana.span.kind import EXIT_SPANS + +if TYPE_CHECKING: + from instana.span.span import InstanaSpan + +# Regex patterns for filtering Instana internal frames +_re_tracer_frame = re.compile(r"/instana/.*\.py$") +_re_with_stan_frame = re.compile("with_instana") + + +def _should_collect_stack(level: str, is_errored: bool) -> bool: + """ + Determine if stack trace should be collected based on level and error state. + + Args: + level: Stack trace collection level ("all", "error", or "none") + is_errored: Whether the span has errors (ec > 0) + + Returns: + True if stack trace should be collected, False otherwise + """ + if level == "all": + return True + if level == "error" and is_errored: + return True + return False + + +def _should_exclude_frame(frame) -> bool: + """ + Check if a frame should be excluded from the stack trace. + + Frames are excluded if they are part of Instana's internal code, + unless INSTANA_DEBUG is set. + + Args: + frame: A frame from traceback.extract_stack() + + Returns: + True if frame should be excluded, False otherwise + """ + if "INSTANA_DEBUG" in os.environ: + return False + if _re_tracer_frame.search(frame[0]): + return True + if _re_with_stan_frame.search(frame[2]): + return True + return False + + +def _apply_stack_limit( + sanitized_stack: List[dict], limit: int, use_full_stack: bool +) -> List[dict]: + """ + Apply frame limit to the sanitized stack. + + Args: + sanitized_stack: List of stack frames + limit: Maximum number of frames to include + use_full_stack: If True, ignore the limit + + Returns: + Limited stack trace + """ + if use_full_stack or len(sanitized_stack) <= limit: + return sanitized_stack + # (limit * -1) gives us negative form of used for + # slicing from the end of the list. e.g. stack[-25:] + return sanitized_stack[(limit * -1) :] + + +def add_stack( + level: str, limit: int, is_errored: bool = False +) -> Optional[List[dict]]: + """ + Capture and return a stack trace based on configuration. + + This function collects the current call stack, filters out Instana + internal frames, and applies the configured limit. + + Args: + level: Stack trace collection level ("all", "error", or "none") + limit: Maximum number of frames to include (1-40) + is_errored: Whether the span has errors (ec > 0) + + Returns: + List of stack frames in format [{"c": file, "n": line, "m": method}, ...] + or None if stack trace should not be collected + """ + try: + # Determine if we should collect stack trace + if not _should_collect_stack(level, is_errored): + return None + + # For erroneous EXIT spans, MAY consider the whole stack + use_full_stack = is_errored + + # Enforce hard limit of 40 frames (unless errored and using full stack) + if not use_full_stack and limit > 40: + limit = 40 + + sanitized_stack = [] + trace_back = traceback.extract_stack() + trace_back.reverse() + + for frame in trace_back: + if _should_exclude_frame(frame): + continue + sanitized_stack.append({"c": frame[0], "n": frame[1], "m": frame[2]}) + + # Apply limit (unless it's an errored span and we want full stack) + return _apply_stack_limit(sanitized_stack, limit, use_full_stack) + + except Exception: + logger.debug("add_stack: ", exc_info=True) + return None + + +def add_stack_trace_if_needed(span: "InstanaSpan") -> None: + """ + Add stack trace to span based on configuration before span ends. + + This function checks if the span is an EXIT span and if so, captures + a stack trace based on the configured level and limit. + + Args: + span: The InstanaSpan to potentially add stack trace to + """ + if span.name in EXIT_SPANS: + # Get configuration from agent options + options = span._span_processor.agent.options + + # Check if span is errored + is_errored = span.attributes.get("ec", 0) > 0 + + # Capture stack trace using add_stack function + span.stack = add_stack( + level=options.stack_trace_level, + limit=options.stack_trace_length, + is_errored=is_errored + ) diff --git a/src/instana/tracer.py b/src/instana/tracer.py index 83ea05ec..8546a8a1 100644 --- a/src/instana/tracer.py +++ b/src/instana/tracer.py @@ -2,10 +2,7 @@ # (c) Copyright Instana Inc. 2016 -import os -import re import time -import traceback from contextlib import contextmanager from typing import TYPE_CHECKING, Iterator, Mapping, Optional, Type, Union @@ -30,7 +27,6 @@ from instana.propagators.text_propagator import TextPropagator from instana.recorder import StanRecorder from instana.sampling import InstanaSampler, Sampler -from instana.span.kind import EXIT_SPANS from instana.span.span import InstanaSpan, get_current_span from instana.span_context import SpanContext from instana.util.ids import generate_id @@ -138,9 +134,6 @@ def start_span( # events: Sequence[Event] = None, ) - if name in EXIT_SPANS: - self._add_stack(span) - return span @contextmanager @@ -174,39 +167,6 @@ def start_as_current_span( ) as span: yield span - def _add_stack(self, span: InstanaSpan, limit: Optional[int] = 30) -> None: - """ - Adds a backtrace to . The default length limit for - stack traces is 30 frames. A hard limit of 40 frames is enforced. - """ - try: - sanitized_stack = [] - if limit > 40: - limit = 40 - - trace_back = traceback.extract_stack() - trace_back.reverse() - for frame in trace_back: - # Exclude Instana frames unless we're in dev mode - if "INSTANA_DEBUG" not in os.environ: - if re_tracer_frame.search(frame[0]) is not None: - continue - - if re_with_stan_frame.search(frame[2]) is not None: - continue - - sanitized_stack.append({"c": frame[0], "n": frame[1], "m": frame[2]}) - - if len(sanitized_stack) > limit: - # (limit * -1) gives us negative form of used for - # slicing from the end of the list. e.g. stack[-30:] - span.stack = sanitized_stack[(limit * -1) :] - else: - span.stack = sanitized_stack - except Exception: - # No fail - pass - def _create_span_context(self, parent_context: SpanContext) -> SpanContext: """Creates a new SpanContext based on the given parent context.""" @@ -270,8 +230,3 @@ def extract( return self._propagators[format].extract(carrier, disable_w3c_trace_context) raise UnsupportedFormatException() - - -# Used by __add_stack -re_tracer_frame = re.compile(r"/instana/.*\.py$") -re_with_stan_frame = re.compile("with_instana") diff --git a/tests/span/test_span_stack_trace.py b/tests/span/test_span_stack_trace.py new file mode 100644 index 00000000..e31df2be --- /dev/null +++ b/tests/span/test_span_stack_trace.py @@ -0,0 +1,271 @@ +# (c) Copyright IBM Corp. 2025 + +"""Tests for stack trace collection functionality.""" + +from typing import Generator + +import pytest + +from instana.recorder import StanRecorder +from instana.span.span import InstanaSpan +from instana.span.stack_trace import add_stack, add_stack_trace_if_needed +from instana.span_context import SpanContext + + +class TestSpanStackTrace: + """Test stack trace collection for spans.""" + + @pytest.fixture(autouse=True) + def _resource(self) -> Generator[None, None, None]: + self.span = None + yield + if isinstance(self.span, InstanaSpan): + self.span.events.clear() + + def test_add_stack_hard_limit( + self, + span_context: SpanContext, + span_processor: StanRecorder, + ) -> None: + """Test that stack trace is capped at 40 frames even with higher limit.""" + span_name = "redis" # EXIT span + self.span = InstanaSpan(span_name, span_context, span_processor) + + # Manually set a high limit in options + span_processor.agent.options.stack_trace_length = 50 + + # Call add_stack directly with is_errored=False + stack = add_stack( + level=span_processor.agent.options.stack_trace_level, + limit=span_processor.agent.options.stack_trace_length, + is_errored=False + ) + + # Check if default is set + assert span_processor.agent.options.stack_trace_level == "all" + + assert stack + assert len(stack) <= 40 # Hard cap at 40 + + stack_0 = stack[0] + assert len(stack_0) == 3 + assert "c" in stack_0.keys() + assert "n" in stack_0.keys() + assert "m" in stack_0.keys() + + def test_add_stack_level_all( + self, + span_context: SpanContext, + span_processor: StanRecorder, + ) -> None: + """Test stack trace collection with level='all'.""" + span_name = "http" # EXIT span + self.span = InstanaSpan(span_name, span_context, span_processor) + + span_processor.agent.options.stack_trace_level = "all" + test_limit = 5 + span_processor.agent.options.stack_trace_length = test_limit + + # Non-errored span should get stack trace + stack = add_stack( + level=span_processor.agent.options.stack_trace_level, + limit=span_processor.agent.options.stack_trace_length, + is_errored=False + ) + + assert stack + assert len(stack) <= test_limit + + def test_add_stack_level_error_not_errored( + self, + span_context: SpanContext, + span_processor: StanRecorder, + ) -> None: + """Test that non-errored spans don't get stack trace with level='error'.""" + span_name = "http" # EXIT span + self.span = InstanaSpan(span_name, span_context, span_processor) + + span_processor.agent.options.stack_trace_level = "error" + span_processor.agent.options.stack_trace_length = 35 + + # Non-errored span should NOT get stack trace + stack = add_stack( + level=span_processor.agent.options.stack_trace_level, + limit=span_processor.agent.options.stack_trace_length, + is_errored=False + ) + + assert stack is None + + def test_add_stack_level_error_errored( + self, + span_context: SpanContext, + span_processor: StanRecorder, + ) -> None: + """Test that errored spans get full stack trace with level='error'.""" + span_name = "http" # EXIT span + self.span = InstanaSpan(span_name, span_context, span_processor) + + span_processor.agent.options.stack_trace_level = "error" + test_limit = 10 + span_processor.agent.options.stack_trace_length = test_limit + + # Errored span should get FULL stack trace (no limit) + stack = add_stack( + level=span_processor.agent.options.stack_trace_level, + limit=span_processor.agent.options.stack_trace_length, + is_errored=True + ) + + assert stack + # Should have more than the configured limit since it's errored + assert len(stack) >= test_limit + + def test_add_stack_level_none( + self, + span_context: SpanContext, + span_processor: StanRecorder, + ) -> None: + """Test that no stack trace is collected with level='none'.""" + span_name = "http" # EXIT span + self.span = InstanaSpan(span_name, span_context, span_processor) + + span_processor.agent.options.stack_trace_level = "none" + span_processor.agent.options.stack_trace_length = 20 + + # Should NOT get stack trace + stack = add_stack( + level=span_processor.agent.options.stack_trace_level, + limit=span_processor.agent.options.stack_trace_length, + is_errored=False + ) + assert stack is None + + # Even errored spans should not get stack trace + stack = add_stack( + level=span_processor.agent.options.stack_trace_level, + limit=span_processor.agent.options.stack_trace_length, + is_errored=True + ) + assert stack is None + + def test_add_stack_errored_span_full_stack( + self, + span_context: SpanContext, + span_processor: StanRecorder, + ) -> None: + """Test that errored spans get full stack regardless of level setting.""" + span_name = "mysql" # EXIT span + self.span = InstanaSpan(span_name, span_context, span_processor) + + # Set level to 'all' with a low limit + span_processor.agent.options.stack_trace_level = "all" + test_limit = 5 + span_processor.agent.options.stack_trace_length = test_limit + + # Errored span should get FULL stack (not limited to 5) + stack = add_stack( + level=span_processor.agent.options.stack_trace_level, + limit=span_processor.agent.options.stack_trace_length, + is_errored=True + ) + + assert stack + # Should have more than the configured limit since it's errored + assert len(stack) > test_limit + + def test_add_stack_trace_if_needed_exit_span( + self, + span_context: SpanContext, + span_processor: StanRecorder, + ) -> None: + """Test add_stack_trace_if_needed for EXIT spans.""" + span_name = "redis" # EXIT span + self.span = InstanaSpan(span_name, span_context, span_processor) + + # Call the function that checks if it's an EXIT span + add_stack_trace_if_needed(self.span) + + assert self.span.stack + + def test_add_stack_trace_if_needed_non_exit_span( + self, + span_context: SpanContext, + span_processor: StanRecorder, + ) -> None: + """Test add_stack_trace_if_needed for non-EXIT spans.""" + span_name = "wsgi" # Not an EXIT span + self.span = InstanaSpan(span_name, span_context, span_processor) + + # Call the function - should not add stack for non-EXIT spans + add_stack_trace_if_needed(self.span) + + assert not self.span.stack + + def test_add_stack_trace_if_needed_errored_span( + self, + span_context: SpanContext, + span_processor: StanRecorder, + ) -> None: + """Test add_stack_trace_if_needed detects errored spans.""" + span_name = "httpx" # EXIT span + attributes = {"ec": 1} # Mark as errored + self.span = InstanaSpan( + span_name, span_context, span_processor, attributes=attributes + ) + + test_limit = 5 + span_processor.agent.options.stack_trace_length = test_limit + + # Call the function - should detect error and use full stack + add_stack_trace_if_needed(self.span) + + assert self.span.stack + # Should have more than limit since it's errored + assert len(self.span.stack) > test_limit + + def test_span_end_collects_stack_trace( + self, + span_context: SpanContext, + span_processor: StanRecorder, + ) -> None: + """Test that span.end() triggers stack trace collection for EXIT spans.""" + span_name = "urllib3" # EXIT span + self.span = InstanaSpan(span_name, span_context, span_processor) + + assert not self.span.stack + + # End the span - should trigger stack trace collection + self.span.end() + + assert self.span.stack + assert self.span.end_time + + def test_stack_frame_format( + self, + span_context: SpanContext, + span_processor: StanRecorder, + ) -> None: + """Test that stack frames have correct format.""" + span_name = "postgres" # EXIT span + self.span = InstanaSpan(span_name, span_context, span_processor) + + test_limit = 5 + span_processor.agent.options.stack_trace_length = test_limit + + # Use add_stack directly + stack = add_stack( + level=span_processor.agent.options.stack_trace_level, + limit=span_processor.agent.options.stack_trace_length, + is_errored=False + ) + + assert stack + for frame in stack: + assert isinstance(frame, dict) + assert "c" in frame # file path + assert "n" in frame # line number + assert "m" in frame # method name + assert isinstance(frame["c"], str) + assert isinstance(frame["n"], int) + assert isinstance(frame["m"], str) diff --git a/tests/test_options.py b/tests/test_options.py index 4c3d3869..a532787b 100644 --- a/tests/test_options.py +++ b/tests/test_options.py @@ -637,4 +637,105 @@ def test_gcr_options_with_env_vars(self) -> None: assert self.gcr_options.log_level == logging.INFO -# Made with Bob +class TestStackTraceConfiguration: + """Test stack trace configuration options.""" + + @pytest.fixture(autouse=True) + def _resource(self) -> Generator[None, None, None]: + self.options = None + yield + if "tracing" in config.keys(): + del config["tracing"] + + def test_stack_trace_defaults(self) -> None: + """Test default stack trace configuration.""" + self.options = BaseOptions() + + assert self.options.stack_trace_level == "all" + assert self.options.stack_trace_length == 30 + + @pytest.mark.parametrize( + "level_value,expected_level", + [ + ("error", "error"), + ("none", "none"), + ("all", "all"), + ("ERROR", "error"), # Case insensitive + ], + ) + def test_stack_trace_level_env_var( + self, + level_value: str, + expected_level: str, + ) -> None: + """Test INSTANA_STACK_TRACE environment variable with valid values.""" + with patch.dict(os.environ, {"INSTANA_STACK_TRACE": level_value}): + self.options = BaseOptions() + assert self.options.stack_trace_level == expected_level + assert self.options.stack_trace_length == 30 # Default + + def test_stack_trace_level_env_var_invalid( + self, + caplog: pytest.LogCaptureFixture, + ) -> None: + """Test INSTANA_STACK_TRACE with invalid value falls back to default.""" + caplog.set_level(logging.WARNING, logger="instana") + with patch.dict(os.environ, {"INSTANA_STACK_TRACE": "INVALID"}): + self.options = BaseOptions() + assert self.options.stack_trace_level == "all" # Falls back to default + assert any( + "Invalid INSTANA_STACK_TRACE value" in message + for message in caplog.messages + ) + + @pytest.mark.parametrize( + "length_value,expected_length", + [ + ("25", 25), + ("60", 60), # Not capped here, capped when _add_stack() is called + ], + ) + def test_stack_trace_length_env_var( + self, + length_value: str, + expected_length: int, + ) -> None: + """Test INSTANA_STACK_TRACE_LENGTH environment variable with valid values.""" + with patch.dict(os.environ, {"INSTANA_STACK_TRACE_LENGTH": length_value}): + self.options = BaseOptions() + assert self.options.stack_trace_level == "all" # Default + assert self.options.stack_trace_length == expected_length + + @pytest.mark.parametrize( + "length_value,expected_warning", + [ + ("0", "must be positive"), + ("-5", "must be positive"), + ("invalid", "Invalid INSTANA_STACK_TRACE_LENGTH"), + ], + ) + def test_stack_trace_length_env_var_invalid( + self, + caplog: pytest.LogCaptureFixture, + length_value: str, + expected_warning: str, + ) -> None: + """Test INSTANA_STACK_TRACE_LENGTH with invalid values.""" + caplog.set_level(logging.WARNING, logger="instana") + with patch.dict(os.environ, {"INSTANA_STACK_TRACE_LENGTH": length_value}): + self.options = BaseOptions() + assert self.options.stack_trace_length == 30 # Falls back to default + assert any(expected_warning in message for message in caplog.messages) + + def test_stack_trace_both_env_vars(self) -> None: + """Test both INSTANA_STACK_TRACE and INSTANA_STACK_TRACE_LENGTH.""" + with patch.dict( + os.environ, + { + "INSTANA_STACK_TRACE": "error", + "INSTANA_STACK_TRACE_LENGTH": "15", + }, + ): + self.options = BaseOptions() + assert self.options.stack_trace_level == "error" + assert self.options.stack_trace_length == 15 diff --git a/tests/test_tracer.py b/tests/test_tracer.py index 79991d8d..13ed495e 100644 --- a/tests/test_tracer.py +++ b/tests/test_tracer.py @@ -48,28 +48,6 @@ def test_tracer_start_span( assert not span.stack -def test_tracer_start_span_with_stack(tracer_provider: InstanaTracerProvider) -> None: - span_name = "log" - tracer = InstanaTracer( - tracer_provider.sampler, - tracer_provider._span_processor, - tracer_provider._exporter, - tracer_provider._propagators, - ) - span = tracer.start_span(name=span_name) - - assert span - assert isinstance(span, InstanaSpan) - assert span.name == span_name - assert span.stack - - stack_0 = span.stack[0] - assert 3 == len(stack_0) - assert "c" in stack_0.keys() - assert "n" in stack_0.keys() - assert "m" in stack_0.keys() - - def test_tracer_start_span_Exception( mocker, tracer_provider: InstanaTracerProvider, span_context: SpanContext ) -> None: @@ -164,43 +142,3 @@ def test_tracer_create_span_context_root( assert new_span_context.trace_id == new_span_context.span_id -def test_tracer_add_stack_high_limit( - span: InstanaSpan, tracer_provider: InstanaTracerProvider -) -> None: - tracer = InstanaTracer( - tracer_provider.sampler, - tracer_provider._span_processor, - tracer_provider._exporter, - tracer_provider._propagators, - ) - tracer._add_stack(span, 50) - - assert span.stack - assert 40 >= len(span.stack) - - stack_0 = span.stack[0] - assert 3 == len(stack_0) - assert "c" in stack_0.keys() - assert "n" in stack_0.keys() - assert "m" in stack_0.keys() - - -def test_tracer_add_stack_low_limit( - span: InstanaSpan, tracer_provider: InstanaTracerProvider -) -> None: - tracer = InstanaTracer( - tracer_provider.sampler, - tracer_provider._span_processor, - tracer_provider._exporter, - tracer_provider._propagators, - ) - tracer._add_stack(span, 5) - - assert span.stack - assert 5 >= len(span.stack) - - stack_0 = span.stack[0] - assert 3 == len(stack_0) - assert "c" in stack_0.keys() - assert "n" in stack_0.keys() - assert "m" in stack_0.keys()