From 9825bcc2d8d052758a2002776a2f5401587ded7b Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Mon, 15 Jun 2026 08:43:36 +0200 Subject: [PATCH 1/7] fix logger --- haystack/logging.py | 55 +++- haystack/utils/requests_utils.py | 5 +- ...ogging-configuration-4a38bf0c8ea89fc9.yaml | 22 ++ test/test_logging.py | 243 ++++++++++++++---- 4 files changed, 266 insertions(+), 59 deletions(-) create mode 100644 releasenotes/notes/scope-logging-configuration-4a38bf0c8ea89fc9.yaml diff --git a/haystack/logging.py b/haystack/logging.py index 76139aa5b6..c6685d1361 100644 --- a/haystack/logging.py +++ b/haystack/logging.py @@ -8,6 +8,7 @@ import os import sys import typing +from collections.abc import Sequence from typing import Any if typing.TYPE_CHECKING: @@ -295,7 +296,11 @@ def correlate_logs_with_traces(_: "WrappedLogger", __: str, event_dict: "EventDi return event_dict -def configure_logging(use_json: bool | None = None) -> None: +def configure_logging( + use_json: bool | None = None, + logger_name: str | Sequence[str] = ("haystack", "haystack_integrations", "haystack_experimental"), + propagate: bool = True, +) -> None: """ Configure logging for Haystack. @@ -306,6 +311,20 @@ def configure_logging(use_json: bool | None = None) -> None: - If `structlog` is installed, you can JSON format all logs. Enable this by - setting the `use_json` parameter to `True` when calling this function - setting the environment variable `HAYSTACK_LOGGING_USE_JSON` to `true` + + :param use_json: Whether to format logs as JSON. If `None`, we try to guess based on the environment. + :param logger_name: + The name (or names) of the logger our formatting handler is attached to. Defaults to Haystack's own + namespaces (`"haystack"`, `"haystack_integrations"` and `"haystack_experimental"`), so that we only touch + Haystack's own loggers and leave the logging configuration of the host application and any other libraries + running in the same process untouched. Pass an empty string (`""`) to attach the handler to the root logger + instead - this restores the legacy behavior of formatting *every* log record in the process. + :param propagate: + Whether the configured loggers should propagate their records to ancestor loggers (ultimately the root + logger). The default (`True`) keeps records flowing to handlers configured by the host application and to + capturing tools such as `pytest`'s `caplog`. Set it to `False` to make Haystack fully own the output of its + own logs - this avoids duplicate log lines when the host application also configures the root logger. It has + no effect when `logger_name=""` (the root logger has no ancestors). """ import haystack.utils.jupyter # to avoid circular imports @@ -352,11 +371,16 @@ def configure_logging(use_json: bool | None = None) -> None: shared_processors.append(correlate_logs_with_traces) structlog.configure( - processors=shared_processors + [structlog.stdlib.ProcessorFormatter.wrap_for_formatter], + # `filter_by_level` reads the effective level from the underlying stdlib logger on *every* call, so changes + # to the log level made after `configure_logging` runs (e.g. by the host application) are respected. + processors=[ + structlog.stdlib.filter_by_level, + *shared_processors, + structlog.stdlib.ProcessorFormatter.wrap_for_formatter, + ], logger_factory=structlog.stdlib.LoggerFactory(ignore_frame_names=["haystack.logging"]), cache_logger_on_first_use=True, - # This is a filter that will filter out log entries that are below the log level of the root logger. - wrapper_class=structlog.make_filtering_bound_logger(min_level=logging.root.getEffectiveLevel()), + wrapper_class=structlog.stdlib.BoundLogger, ) renderers: list[Processor] @@ -392,12 +416,17 @@ def configure_logging(use_json: bool | None = None) -> None: # Use OUR `ProcessorFormatter` to format all `logging` entries. handler.setFormatter(formatter) - root_logger = logging.getLogger() - # avoid adding our handler twice - old_handlers = [ - h - for h in root_logger.handlers - if not (isinstance(h, logging.StreamHandler) and h.name == "HaystackLoggingHandler") - ] - new_handlers = [handler, *old_handlers] - root_logger.handlers = new_handlers + # By default we attach to Haystack's own namespaces so that we only format Haystack's log records and leave the + # loggers of the host application and other libraries in the same process untouched. Pass `logger_name=""` to + # attach to the root logger instead (legacy behavior - formats every record in the process). + logger_names = [logger_name] if isinstance(logger_name, str) else list(logger_name) + for name in logger_names: + target_logger = logging.getLogger(name) + # avoid adding our handler twice + old_handlers = [ + h + for h in target_logger.handlers + if not (isinstance(h, logging.StreamHandler) and h.name == "HaystackLoggingHandler") + ] + target_logger.handlers = [handler, *old_handlers] + target_logger.propagate = propagate diff --git a/haystack/utils/requests_utils.py b/haystack/utils/requests_utils.py index c5270cc055..642b0ad45e 100644 --- a/haystack/utils/requests_utils.py +++ b/haystack/utils/requests_utils.py @@ -8,7 +8,10 @@ import httpx from tenacity import after_log, before_log, retry, retry_if_exception_type, stop_after_attempt, wait_exponential -logger = logging.getLogger(__file__) +# NOTE: this uses the standard library logger (not `haystack.logging`) on purpose: tenacity's `before_log`/`after_log` +# call the logger with positional arguments, which Haystack's keyword-only patched logger would reject. We still name +# it with `__name__` so it lives under the `haystack` namespace and is picked up by `configure_logging`. +logger = logging.getLogger(__name__) def request_with_retry( diff --git a/releasenotes/notes/scope-logging-configuration-4a38bf0c8ea89fc9.yaml b/releasenotes/notes/scope-logging-configuration-4a38bf0c8ea89fc9.yaml new file mode 100644 index 0000000000..8803b4f3ec --- /dev/null +++ b/releasenotes/notes/scope-logging-configuration-4a38bf0c8ea89fc9.yaml @@ -0,0 +1,22 @@ +--- +upgrade: + - | + ``haystack.logging.configure_logging`` now attaches its formatting handler to Haystack's own logger namespaces + (``haystack``, ``haystack_integrations`` and ``haystack_experimental``) instead of the root logger. This makes + Haystack a better-behaved library when it runs next to other services in the same process: the log records of the + host application and other libraries are no longer reformatted or duplicated by Haystack. To restore the previous + behavior of formatting every log record in the process, call ``configure_logging(logger_name="")``. +fixes: + - | + ``haystack.logging.configure_logging`` no longer freezes the log level at import time. Previously the level was + captured once when the function ran (at ``import haystack``), so a log level set later by the application was + ignored for ``structlog``-native loggers. The effective level is now read on every log call. + - | + Fixed the logger in ``haystack.utils.requests_utils`` being named after the module's file path instead of + ``haystack.utils.requests_utils``, which kept its records outside the ``haystack`` logger namespace. +features: + - | + ``haystack.logging.configure_logging`` gained two parameters: ``logger_name`` to choose which logger(s) the + formatting handler is attached to, and ``propagate`` to control whether Haystack's loggers propagate their + records to ancestor loggers. Set ``propagate=False`` to let Haystack fully own the output of its own logs and + avoid duplicate log lines when the host application also configures the root logger. diff --git a/test/test_logging.py b/test/test_logging.py index 20bdd5947f..04efb5f106 100644 --- a/test/test_logging.py +++ b/test/test_logging.py @@ -7,7 +7,7 @@ import logging import os import sys -from collections.abc import Generator +from collections.abc import Callable, Generator from datetime import datetime, timezone from pathlib import Path from unittest.mock import ANY @@ -25,10 +25,42 @@ @pytest.fixture(autouse=True) def reset_logging_config() -> None: - old_handlers = logging.root.handlers.copy() + # `configure_logging` attaches its handler to Haystack's own namespaces (and may flip `propagate`), so we snapshot + # and restore the root logger plus those namespaces to keep tests isolated. + names = ["haystack", "haystack_integrations", "haystack_experimental"] + root_handlers = logging.root.handlers.copy() + snapshots = {name: _snapshot_logger(name) for name in names} yield - # Reset the logging configuration after each test to avoid impacting other tests - logging.root.handlers = old_handlers + logging.root.handlers = root_handlers + for name, (handlers, propagate, level) in snapshots.items(): + logger = logging.getLogger(name) + logger.handlers = handlers + logger.propagate = propagate + logger.setLevel(level) + + +def _snapshot_logger(name: str) -> tuple[list[logging.Handler], bool, int]: + logger = logging.getLogger(name) + return logger.handlers.copy(), logger.propagate, logger.level + + +@pytest.fixture() +def restore_named_loggers() -> Generator[Callable[[str], logging.Logger], None, None]: + """Snapshot a named logger's handlers/propagate/level and restore them after the test.""" + snapshots: dict[str, tuple[list[logging.Handler], bool, int]] = {} + + def _snapshot(name: str) -> logging.Logger: + logger = logging.getLogger(name) + snapshots[name] = (logger.handlers.copy(), logger.propagate, logger.level) + return logger + + yield _snapshot + + for name, (handlers, propagate, level) in snapshots.items(): + logger = logging.getLogger(name) + logger.handlers = handlers + logger.propagate = propagate + logger.setLevel(level) @pytest.fixture() @@ -47,7 +79,7 @@ def test_skip_logging_configuration( monkeypatch.setenv("HAYSTACK_LOGGING_IGNORE_STRUCTLOG", "true") haystack_logging.configure_logging() - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # the pytest fixture caplog only captures logs being rendered from the stdlib logging module @@ -61,7 +93,7 @@ class TestStructuredLoggingConsoleRendering: def test_log_filtering_when_using_debug(self, capfd: CaptureFixture) -> None: haystack_logging.configure_logging(use_json=False) - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.setLevel(logging.INFO) logger.debug("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) @@ -72,7 +104,7 @@ def test_log_filtering_when_using_debug(self, capfd: CaptureFixture) -> None: def test_log_filtering_when_using_debug_and_log_level_is_debug(self, capfd: CaptureFixture) -> None: haystack_logging.configure_logging(use_json=False) - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.setLevel(logging.DEBUG) logger.debug("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) @@ -89,7 +121,7 @@ def test_console_rendered_structured_log_even_if_no_tty_but_python_config( haystack_logging.configure_logging(use_json=False) - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result @@ -105,7 +137,7 @@ def test_console_rendered_structured_log_if_in_ipython( haystack_logging.configure_logging() - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result @@ -121,7 +153,7 @@ def test_console_rendered_structured_log_even_in_jupyter( haystack_logging.configure_logging() - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result @@ -137,7 +169,7 @@ def test_console_rendered_structured_log_even_if_no_tty_but_forced_through_env( haystack_logging.configure_logging() - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result @@ -149,7 +181,7 @@ def test_console_rendered_structured_log_even_if_no_tty_but_forced_through_env( def test_console_rendered_structured_log(self, capfd: CaptureFixture) -> None: haystack_logging.configure_logging() - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result @@ -170,7 +202,7 @@ def test_console_rendered_structured_log(self, capfd: CaptureFixture) -> None: def test_logging_exceptions(self, capfd: CaptureFixture) -> None: haystack_logging.configure_logging() - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") def function_that_raises_and_adds_to_stack_trace(): raise ValueError("This is an error") @@ -188,7 +220,7 @@ def function_that_raises_and_adds_to_stack_trace(): def test_logging_of_contextvars(self, capfd: CaptureFixture, set_context_var_key: str) -> None: haystack_logging.configure_logging() - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result @@ -202,7 +234,7 @@ def test_logging_as_json_if_not_atty(self, capfd: CaptureFixture, monkeypatch: M monkeypatch.setattr(sys.stderr, "isatty", lambda: False) haystack_logging.configure_logging() - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result @@ -216,13 +248,13 @@ def test_logging_as_json_if_not_atty(self, capfd: CaptureFixture, monkeypatch: M "level": "warning", "timestamp": ANY, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", } def test_logging_as_json(self, capfd: CaptureFixture) -> None: haystack_logging.configure_logging(use_json=True) - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result @@ -236,14 +268,14 @@ def test_logging_as_json(self, capfd: CaptureFixture) -> None: "level": "warning", "timestamp": ANY, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", } def test_logging_as_json_enabling_via_env(self, capfd: CaptureFixture, monkeypatch: MonkeyPatch) -> None: monkeypatch.setenv("HAYSTACK_LOGGING_USE_JSON", "true") haystack_logging.configure_logging() - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result @@ -257,7 +289,7 @@ def test_logging_as_json_enabling_via_env(self, capfd: CaptureFixture, monkeypat "level": "warning", "timestamp": ANY, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", } def test_logging_of_contextvars( @@ -266,7 +298,7 @@ def test_logging_of_contextvars( monkeypatch.setenv("HAYSTACK_LOGGING_USE_JSON", "true") haystack_logging.configure_logging() - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result @@ -281,13 +313,13 @@ def test_logging_of_contextvars( "level": "warning", "timestamp": ANY, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", } def test_logging_exceptions_json(self, capfd: CaptureFixture) -> None: haystack_logging.configure_logging(use_json=True) - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") def function_that_raises_and_adds_to_stack_trace(): my_local_variable = "my_local_variable" # noqa: F841 @@ -306,7 +338,7 @@ def function_that_raises_and_adds_to_stack_trace(): "level": "error", "timestamp": ANY, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", "exception": [ { "exc_notes": [], @@ -340,7 +372,7 @@ def test_trace_log_correlation_python_logs_with_console_rendering( haystack_logging.configure_logging(use_json=False) with spying_tracer.trace("test-operation"): - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) output = capfd.readouterr().err @@ -350,7 +382,7 @@ def test_trace_log_correlation_python_logs(self, spying_tracer: SpyingTracer, ca haystack_logging.configure_logging(use_json=True) with spying_tracer.trace("test-operation") as span: - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) output = capfd.readouterr().err @@ -365,13 +397,13 @@ def test_trace_log_correlation_python_logs(self, spying_tracer: SpyingTracer, ca "trace_id": span.trace_id, "span_id": span.span_id, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", } def test_trace_log_correlation_no_span(self, spying_tracer: SpyingTracer, capfd: CaptureFixture) -> None: haystack_logging.configure_logging(use_json=True) - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) @@ -385,13 +417,13 @@ def test_trace_log_correlation_no_span(self, spying_tracer: SpyingTracer, capfd: "level": "warning", "timestamp": ANY, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", } def test_trace_log_correlation_no_tracer(self, capfd: CaptureFixture) -> None: haystack_logging.configure_logging(use_json=True) - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) @@ -405,7 +437,7 @@ def test_trace_log_correlation_no_tracer(self, capfd: CaptureFixture) -> None: "level": "warning", "timestamp": ANY, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", } @@ -416,12 +448,12 @@ def test_correct_stack_level_with_stdlib_rendering( monkeypatch.setenv("HAYSTACK_LOGGING_IGNORE_STRUCTLOG", "true") haystack_logging.configure_logging() - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # the pytest fixture caplog only captures logs being rendered from the stdlib logging module assert caplog.messages == ["Hello, structured logging!"] - assert caplog.records[0].name == "test.test_logging" + assert caplog.records[0].name == "haystack.test_logging" # Nothing should be captured by capfd since structlog is not configured assert capfd.readouterr().err == "" @@ -429,11 +461,11 @@ def test_correct_stack_level_with_stdlib_rendering( def test_correct_stack_level_with_consoler_rendering(self, capfd: CaptureFixture) -> None: haystack_logging.configure_logging(use_json=False) - logger = haystack_logging.getLogger(__name__) + logger = haystack_logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) output = capfd.readouterr().err - assert "test.test_logging" in output + assert "haystack.test_logging" in output @pytest.mark.parametrize( "method, expected_level", @@ -450,7 +482,7 @@ def test_correct_stack_level_with_consoler_rendering(self, capfd: CaptureFixture def test_various_levels(self, capfd: LogCaptureFixture, method: str, expected_level: str) -> None: haystack_logging.configure_logging(use_json=True) - logger = haystack_logging.getLogger(__name__) + logger = haystack_logging.getLogger("haystack.test_logging") logger.setLevel(logging.DEBUG) @@ -467,13 +499,13 @@ def test_various_levels(self, capfd: LogCaptureFixture, method: str, expected_le "level": expected_level, "timestamp": ANY, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", } def test_log(self, capfd: LogCaptureFixture) -> None: haystack_logging.configure_logging(use_json=True) - logger = haystack_logging.getLogger(__name__) + logger = haystack_logging.getLogger("haystack.test_logging") logger.setLevel(logging.DEBUG) logger.log(logging.DEBUG, "Hello, structured '{key}'!", key="logging", key1="value1", key2="value2") @@ -489,13 +521,13 @@ def test_log(self, capfd: LogCaptureFixture) -> None: "level": "debug", "timestamp": ANY, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", } def test_log_json_content(self, capfd: LogCaptureFixture) -> None: haystack_logging.configure_logging(use_json=True) - logger = haystack_logging.getLogger(__name__) + logger = haystack_logging.getLogger("haystack.test_logging") logger.setLevel(logging.DEBUG) logger.log(logging.DEBUG, 'Hello, structured: {"key": "value"}', key="logging", key1="value1", key2="value2") @@ -511,13 +543,13 @@ def test_log_json_content(self, capfd: LogCaptureFixture) -> None: "level": "debug", "timestamp": ANY, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", } def test_log_with_string_cast(self, capfd: LogCaptureFixture) -> None: haystack_logging.configure_logging(use_json=True) - logger = haystack_logging.getLogger(__name__) + logger = haystack_logging.getLogger("haystack.test_logging") logger.setLevel(logging.DEBUG) logger.log(logging.DEBUG, "Hello, structured '{key}'!", key=LogCaptureFixture, key1="value1", key2="value2") @@ -533,7 +565,7 @@ def test_log_with_string_cast(self, capfd: LogCaptureFixture) -> None: "level": "debug", "timestamp": ANY, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", } @pytest.mark.parametrize( @@ -551,7 +583,7 @@ def test_log_with_string_cast(self, capfd: LogCaptureFixture) -> None: def test_haystack_logger_with_positional_args(self, method: str, expected_level: str) -> None: haystack_logging.configure_logging(use_json=True) - logger = haystack_logging.getLogger(__name__) + logger = haystack_logging.getLogger("haystack.test_logging") logger.setLevel(logging.DEBUG) with pytest.raises(TypeError): @@ -572,7 +604,7 @@ def test_haystack_logger_with_positional_args(self, method: str, expected_level: def test_haystack_logger_with_old_interpolation(self, method: str, expected_level: str) -> None: haystack_logging.configure_logging(use_json=True) - logger = haystack_logging.getLogger(__name__) + logger = haystack_logging.getLogger("haystack.test_logging") logger.setLevel(logging.DEBUG) # does not raise - hence we need to check this separately @@ -600,3 +632,124 @@ def test_that_haystack_logger_is_used(self) -> None: f"{path} doesn't use the Haystack logger. Please use the Haystack logger instead of the " f"standard library logger and add plenty of keyword arguments." ) + + +class TestLoggingScope: + """ + Haystack is a library that usually runs next to other services in the same process (e.g. a web server, or another + app's logging setup). These tests pin down that `configure_logging` only touches Haystack's own loggers and does + not reformat or hijack the logs of everything else in the process. + """ + + def test_handler_is_attached_to_haystack_namespaces_and_not_root(self) -> None: + haystack_logging.configure_logging(use_json=True) + + # Haystack's own namespace and the ones used by integration and experimental packages get the handler. + for name in ["haystack", "haystack_integrations", "haystack_experimental"]: + logger = logging.getLogger(name) + assert any(getattr(h, "name", None) == "HaystackLoggingHandler" for h in logger.handlers) + + # The root logger - shared by every other library/service in the process - is left untouched. + assert not any(getattr(h, "name", None) == "HaystackLoggingHandler" for h in logging.root.handlers) + + def test_integrations_logs_are_formatted_by_haystack(self, capfd: CaptureFixture) -> None: + haystack_logging.configure_logging(use_json=True) + + logging.getLogger("haystack_integrations.components.demo").warning("a log line from an integration") + + output = capfd.readouterr().err + assert json.loads(output)["event"] == "a log line from an integration" + + def test_other_services_logs_are_not_reformatted_by_haystack( + self, capfd: CaptureFixture, restore_named_loggers: Callable[[str], logging.Logger] + ) -> None: + # Stand-in for another service that configured its own plain-text logging (e.g. uvicorn) before Haystack runs. + other_service = restore_named_loggers("some_other_service") + other_service.handlers = [logging.StreamHandler()] + other_service.setLevel(logging.INFO) + other_service.propagate = False + + haystack_logging.configure_logging(use_json=True) + + other_service.info("a log line from another service") + logging.getLogger("haystack.test_logging").warning("a log line from haystack") + + lines = [line for line in capfd.readouterr().err.splitlines() if line.strip()] + + # Haystack formats its own record as JSON ... + haystack_json = [json.loads(line) for line in lines if line.startswith("{") and "from haystack" in line] + assert any(record["event"] == "a log line from haystack" for record in haystack_json) + + # ... but the other service's record stays exactly as that service rendered it - plain text, not Haystack JSON. + plain_lines = [line for line in lines if "from another service" in line] + assert plain_lines + for line in plain_lines: + with pytest.raises(json.JSONDecodeError): + json.loads(line) + + def test_legacy_root_behavior_still_available_via_empty_logger_name(self, capfd: CaptureFixture) -> None: + # Opt back into the old behavior of formatting *every* record in the process. + haystack_logging.configure_logging(use_json=True, logger_name="") + + assert any(getattr(h, "name", None) == "HaystackLoggingHandler" for h in logging.root.handlers) + + # A non-Haystack logger is now formatted by Haystack because the handler sits on the root logger. + logging.getLogger("some_other_service").warning("formatted by haystack") + output = capfd.readouterr().err + assert json.loads(output)["event"] == "formatted by haystack" + + def test_propagate_is_true_by_default(self, caplog: LogCaptureFixture) -> None: + # The default keeps records flowing to ancestor loggers, so tooling that captures via the root logger (such as + # pytest's `caplog`) keeps working. + haystack_logging.configure_logging(use_json=True) + assert logging.getLogger("haystack").propagate is True + + with caplog.at_level(logging.WARNING, logger="haystack.test_logging"): + logging.getLogger("haystack.test_logging").warning("captured via propagation") + assert "captured via propagation" in caplog.text + + def test_propagate_false_stops_records_from_reaching_root(self, capfd: CaptureFixture) -> None: + haystack_logging.configure_logging(use_json=True, propagate=False) + assert logging.getLogger("haystack").propagate is False + + # A plain handler on the root logger should NOT see Haystack's records when propagation is disabled. + root_handler = logging.StreamHandler() + root_handler.setFormatter(logging.Formatter("ROOT | %(message)s")) + logging.root.addHandler(root_handler) + try: + logging.getLogger("haystack.test_logging").warning("haystack owns this line") + finally: + logging.root.removeHandler(root_handler) + + output = capfd.readouterr().err + # Formatted once by Haystack (JSON), never by the root handler. + assert "ROOT |" not in output + assert json.loads(output)["event"] == "haystack owns this line" + + +class TestDynamicLogLevel: + """ + `configure_logging` runs at import time (in `haystack/__init__.py`), long before an application sets its desired + log level. These tests pin down that a log level set *after* `configure_logging` is still respected, instead of + being frozen to whatever the root level happened to be at import time. + """ + + def test_structlog_native_logger_respects_level_lowered_after_configure(self, capfd: CaptureFixture) -> None: + # Mirror the real ordering: configure while the level is high ... + logging.getLogger("haystack").setLevel(logging.WARNING) + haystack_logging.configure_logging(use_json=True) + + # ... then have the application lower the level afterwards. + logging.getLogger("haystack").setLevel(logging.DEBUG) + + structlog.get_logger("haystack.native_dynamic_level").debug("debug emitted after lowering the level") + + assert "debug emitted after lowering the level" in capfd.readouterr().err + + def test_structlog_native_logger_still_filters_below_level(self, capfd: CaptureFixture) -> None: + logging.getLogger("haystack").setLevel(logging.INFO) + haystack_logging.configure_logging(use_json=True) + + structlog.get_logger("haystack.native_filtered_level").debug("debug below the configured level") + + assert "debug below the configured level" not in capfd.readouterr().err From 7eedb323034c3f8b98646a50840753bb610d5095 Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Mon, 15 Jun 2026 11:21:06 +0200 Subject: [PATCH 2/7] more fixes --- haystack/__init__.py | 5 +- haystack/logging.py | 37 +++-- ...ogging-configuration-4a38bf0c8ea89fc9.yaml | 20 ++- test/test_logging.py | 128 ++++++++++++++++++ 4 files changed, 175 insertions(+), 15 deletions(-) diff --git a/haystack/__init__.py b/haystack/__init__.py index ddbe46296c..6ee5d08553 100644 --- a/haystack/__init__.py +++ b/haystack/__init__.py @@ -18,8 +18,9 @@ from haystack.version import __version__ # noqa: F401 # Initialize the logging configuration -# This is a no-op unless `structlog` is installed -haystack.logging.configure_logging() +# This is a no-op unless `structlog` is installed, and `force=False` makes it skip configuration if `structlog` has +# already been configured by the host application (so importing Haystack does not overwrite their setup). +haystack.logging.configure_logging(force=False) # Same for tracing (no op if `opentelemetry` or `ddtrace` is not installed) haystack.tracing.auto_enable_tracing() diff --git a/haystack/logging.py b/haystack/logging.py index c6685d1361..36373c52aa 100644 --- a/haystack/logging.py +++ b/haystack/logging.py @@ -17,6 +17,10 @@ HAYSTACK_LOGGING_USE_JSON_ENV_VAR = "HAYSTACK_LOGGING_USE_JSON" HAYSTACK_LOGGING_IGNORE_STRUCTLOG_ENV_VAR = "HAYSTACK_LOGGING_IGNORE_STRUCTLOG" +# Attribute set on a logger once we have patched its methods. `logging.getLogger` returns a shared singleton, so we +# use this marker to patch each logger only once and avoid wrapping the already-wrapped methods on repeated calls. +_PATCHED_MARKER = "_haystack_patched" + class PatchedLogger(typing.Protocol): """Class which enables using type checkers to find wrong logger usage.""" @@ -221,16 +225,13 @@ def _patch_structlog_call_information(logger: logging.Logger) -> None: if not isinstance(logger, _FixedFindCallerLogger): return - # completely copied from structlog. We only add `haystack.logging` to the list of ignored frames + # Copied from structlog's `_FixedFindCallerLogger.findCaller`, only adding `haystack.logging` to the list of + # ignored frames so our own logging wrappers don't show up as the caller. We deliberately do not forward + # `stacklevel` to `_find_first_app_frame_and_name`: that parameter only exists in structlog >= 25.5.0 and + # structlog is an optional dependency, so forwarding it would break logging on older versions. def findCaller(stack_info: bool = False, stacklevel: int = 1) -> tuple[str, int, str, str | None]: # noqa: ARG001 - try: - sinfo: str | None - # we need to exclude `haystack.logging` from the stack - f, name = _find_first_app_frame_and_name(["logging", "haystack.logging"]) - sinfo = _format_stack(f) if stack_info else None - except Exception as error: - print(f"Error in findCaller: {error}") - + f, _name = _find_first_app_frame_and_name(["logging", "haystack.logging"]) + sinfo = _format_stack(f) if stack_info else None return f.f_code.co_filename, f.f_lineno, f.f_code.co_name, sinfo logger.findCaller = findCaller # type: ignore @@ -248,6 +249,11 @@ def getLogger(name: str) -> PatchedLogger: - it makes structure logging effective, not just an available feature """ logger = logging.getLogger(name) + if getattr(logger, _PATCHED_MARKER, False): + # Already patched: `logging.getLogger` returned the same singleton, so re-patching would stack the wrappers + # and interpolate the message more than once. + return typing.cast(PatchedLogger, logger) + logger.debug = patch_log_method_to_kwargs_only(logger.debug) # type: ignore logger.info = patch_log_method_to_kwargs_only(logger.info) # type: ignore logger.warn = patch_log_method_to_kwargs_only(logger.warn) # type: ignore @@ -263,6 +269,8 @@ def getLogger(name: str) -> PatchedLogger: # We also patch the `makeRecord` method to use keyword string interpolation logger.makeRecord = patch_make_records_to_use_kwarg_string_interpolation(logger.makeRecord) # type: ignore + setattr(logger, _PATCHED_MARKER, True) + return typing.cast(PatchedLogger, logger) @@ -300,6 +308,7 @@ def configure_logging( use_json: bool | None = None, logger_name: str | Sequence[str] = ("haystack", "haystack_integrations", "haystack_experimental"), propagate: bool = True, + force: bool = True, ) -> None: """ Configure logging for Haystack. @@ -325,6 +334,11 @@ def configure_logging( capturing tools such as `pytest`'s `caplog`. Set it to `False` to make Haystack fully own the output of its own logs - this avoids duplicate log lines when the host application also configures the root logger. It has no effect when `logger_name=""` (the root logger has no ancestors). + :param force: + Whether to (re)configure logging even if `structlog` has already been configured by someone else. The default + (`True`) means an explicit call always takes over. Pass `False` to make this a no-op when `structlog` is + already configured - this is used by the import-time call in `haystack/__init__.py` so that merely importing + Haystack does not overwrite a `structlog` configuration set up by the host application. """ import haystack.utils.jupyter # to avoid circular imports @@ -341,6 +355,11 @@ def configure_logging( # If the user wants to ignore structlog, we don't configure it and fall back to standard logging return + # When not forcing, skip configuration if structlog is already configured (e.g. by the host application) so we + # leave its configuration and handlers untouched. + if not force and structlog.is_configured(): + return + # We roughly follow the structlog documentation here: # https://www.structlog.org/en/stable/standard-library.html#rendering-using-structlog-based-formatters-within-logging # This means that we use structlog to format the log entries for entries emitted via `logging` and `structlog`. diff --git a/releasenotes/notes/scope-logging-configuration-4a38bf0c8ea89fc9.yaml b/releasenotes/notes/scope-logging-configuration-4a38bf0c8ea89fc9.yaml index 8803b4f3ec..d49aaf8f3f 100644 --- a/releasenotes/notes/scope-logging-configuration-4a38bf0c8ea89fc9.yaml +++ b/releasenotes/notes/scope-logging-configuration-4a38bf0c8ea89fc9.yaml @@ -14,9 +14,21 @@ fixes: - | Fixed the logger in ``haystack.utils.requests_utils`` being named after the module's file path instead of ``haystack.utils.requests_utils``, which kept its records outside the ``haystack`` logger namespace. + - | + Importing Haystack no longer overwrites a ``structlog`` configuration that the host application already set up. + The import-time call to ``configure_logging`` now skips configuration when ``structlog`` is already configured. + - | + ``haystack.logging.getLogger`` is now idempotent. Previously, calling it more than once for the same logger name + wrapped the already-wrapped logger methods again, which caused the log message to be run through ``str.format`` + once per call. As a result a field value containing ``{...}`` could be re-interpolated and pull in the value of + another field. Each logger is now patched only once. + - | + The patched ``findCaller`` used to determine the source of a log entry no longer prints to stdout and no longer + masks errors with a misleading ``NameError``, matching structlog's own ``findCaller`` implementation. features: - | - ``haystack.logging.configure_logging`` gained two parameters: ``logger_name`` to choose which logger(s) the - formatting handler is attached to, and ``propagate`` to control whether Haystack's loggers propagate their - records to ancestor loggers. Set ``propagate=False`` to let Haystack fully own the output of its own logs and - avoid duplicate log lines when the host application also configures the root logger. + ``haystack.logging.configure_logging`` gained three parameters: ``logger_name`` to choose which logger(s) the + formatting handler is attached to, ``propagate`` to control whether Haystack's loggers propagate their records to + ancestor loggers, and ``force`` to control whether an existing ``structlog`` configuration is replaced. Set + ``propagate=False`` to let Haystack fully own the output of its own logs and avoid duplicate log lines when the + host application also configures the root logger. diff --git a/test/test_logging.py b/test/test_logging.py index 04efb5f106..3ae48ad6d7 100644 --- a/test/test_logging.py +++ b/test/test_logging.py @@ -14,6 +14,8 @@ import pytest import structlog +import structlog._frames +import structlog.stdlib from _pytest.capture import CaptureFixture from _pytest.logging import LogCaptureFixture from _pytest.monkeypatch import MonkeyPatch @@ -63,6 +65,23 @@ def _snapshot(name: str) -> logging.Logger: logger.setLevel(level) +@pytest.fixture() +def restore_structlog_config() -> Generator[None, None, None]: + """Snapshot the global structlog configuration and restore it after the test.""" + was_configured = structlog.is_configured() + config = structlog.get_config() + yield + if was_configured: + structlog.configure(**config) + else: + structlog.reset_defaults() + + +def _sentinel_processor(logger: object, method_name: str, event_dict: dict) -> dict: + """A no-op processor used to detect whether an existing structlog config was left untouched.""" + return event_dict + + @pytest.fixture() def set_context_var_key() -> Generator[str, None, None]: structlog.contextvars.bind_contextvars(context_var="value") @@ -753,3 +772,112 @@ def test_structlog_native_logger_still_filters_below_level(self, capfd: CaptureF structlog.get_logger("haystack.native_filtered_level").debug("debug below the configured level") assert "debug below the configured level" not in capfd.readouterr().err + + +class TestStructlogConfigIsPreserved: + """ + `structlog.configure` writes to a single process-global configuration. These tests pin down that merely importing + Haystack (which calls `configure_logging(force=False)`) does not overwrite a structlog configuration that the host + application already set up, while an explicit call still takes over. + """ + + def test_not_forced_skips_when_structlog_already_configured(self, restore_structlog_config: None) -> None: + # Stand-in for the host application configuring structlog before Haystack is imported/configured. + structlog.reset_defaults() + structlog.configure(processors=[_sentinel_processor]) + haystack_logger = logging.getLogger("haystack") + haystack_logger.handlers = [] + + haystack_logging.configure_logging(force=False) + + # The application's structlog configuration is left untouched ... + assert structlog.get_config()["processors"] == [_sentinel_processor] + # ... and we did not attach our handler on top of their setup. + assert not any(getattr(h, "name", None) == "HaystackLoggingHandler" for h in haystack_logger.handlers) + + def test_forced_takes_over_existing_structlog_config(self, restore_structlog_config: None) -> None: + structlog.reset_defaults() + structlog.configure(processors=[_sentinel_processor]) + haystack_logger = logging.getLogger("haystack") + haystack_logger.handlers = [] + + haystack_logging.configure_logging(use_json=True, force=True) + + assert structlog.get_config()["processors"] != [_sentinel_processor] + assert any(getattr(h, "name", None) == "HaystackLoggingHandler" for h in haystack_logger.handlers) + + def test_not_forced_still_configures_when_structlog_is_unconfigured(self, restore_structlog_config: None) -> None: + # This is the real import-time situation: nobody configured structlog yet, so we set up our nice defaults. + structlog.reset_defaults() + haystack_logger = logging.getLogger("haystack") + haystack_logger.handlers = [] + assert not structlog.is_configured() + + haystack_logging.configure_logging(force=False) + + assert structlog.is_configured() + assert any(getattr(h, "name", None) == "HaystackLoggingHandler" for h in haystack_logger.handlers) + + +class TestGetLoggerIsIdempotent: + """ + `logging.getLogger(name)` returns a process-wide singleton. `haystack.logging.getLogger` patches that shared + object in place, so calling it more than once for the same name (different modules, re-imports, ...) must not wrap + the already-wrapped methods again. The user-visible symptom of re-wrapping is that the message is run through + `str.format` once per wrap, so a field value that itself contains `{...}` gets re-interpolated. + """ + + def test_repeated_get_logger_interpolates_the_message_exactly_once(self, capfd: CaptureFixture) -> None: + haystack_logging.configure_logging(use_json=True) + + # Two modules grabbing the same logger name is the realistic trigger for re-wrapping. + haystack_logging.getLogger("haystack.idempotency_test") + logger = haystack_logging.getLogger("haystack.idempotency_test") + logger.setLevel(logging.INFO) + + # `a`'s value contains a `{b}` placeholder. With a single interpolation it must be left as-is; a second + # interpolation would expand it using `b` and leak "SECRET" into the message. + logger.info("Hello {a}", a="{b}", b="SECRET") + + parsed_output = json.loads(capfd.readouterr().err) + assert parsed_output["event"] == "Hello {b}" + assert "SECRET" not in parsed_output["event"] + + def test_repeated_get_logger_does_not_rewrap_methods(self) -> None: + haystack_logging.getLogger("haystack.idempotency_identity_test") + # Capture the patched methods after the first call, before the second one runs. + patched = logging.getLogger("haystack.idempotency_identity_test") + debug_after_first = patched.debug + make_record_after_first = patched.makeRecord + + haystack_logging.getLogger("haystack.idempotency_identity_test") + + # The second call must leave the already-patched methods in place, not wrap a fresh layer on top. + assert patched.debug is debug_after_first + assert patched.makeRecord is make_record_after_first + + +class TestFindCallerMatchesStructlog: + """ + `_patch_structlog_call_information` mirrors structlog's `_FixedFindCallerLogger.findCaller`, only adding + `haystack.logging` to the ignored frames. structlog itself does not guard the frame lookup, so neither do we: any + error must propagate as-is instead of being swallowed and printed to stdout. + """ + + def test_find_caller_does_not_print_or_mask_errors(self, capsys: CaptureFixture, monkeypatch: MonkeyPatch) -> None: + # Force the frame lookup to fail. It is imported inside `_patch_structlog_call_information`, so we patch the + # module attribute before patching the logger. + def boom(*args: object, **kwargs: object) -> tuple: + raise RuntimeError("frame lookup failed") + + monkeypatch.setattr(structlog._frames, "_find_first_app_frame_and_name", boom) + + logger = structlog.stdlib._FixedFindCallerLogger("haystack.find_caller_test") + haystack_logging._patch_structlog_call_information(logger) + + # The original error must propagate (not be masked by a NameError on an unbound `f`) ... + with pytest.raises(RuntimeError, match="frame lookup failed"): + logger.findCaller() + + # ... and nothing must be written to stdout. + assert capsys.readouterr().out == "" From d9189160d6f40e4697808202b6faeae190b83d98 Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Mon, 22 Jun 2026 12:50:44 +0200 Subject: [PATCH 3/7] PR comments --- haystack/__init__.py | 7 +- haystack/logging.py | 72 +++++++++++-------- ...ogging-configuration-4a38bf0c8ea89fc9.yaml | 14 ++-- test/test_logging.py | 33 +++++++-- 4 files changed, 82 insertions(+), 44 deletions(-) diff --git a/haystack/__init__.py b/haystack/__init__.py index 6ee5d08553..2c92d7fce7 100644 --- a/haystack/__init__.py +++ b/haystack/__init__.py @@ -17,9 +17,10 @@ from haystack.dataclasses import Answer, Document, ExtractedAnswer, GeneratedAnswer from haystack.version import __version__ # noqa: F401 -# Initialize the logging configuration -# This is a no-op unless `structlog` is installed, and `force=False` makes it skip configuration if `structlog` has -# already been configured by the host application (so importing Haystack does not overwrite their setup). +# Initialize the logging configuration. +# This is a no-op unless `structlog` is installed. `force=False` means we only install our own scoped logging handler +# (so Haystack's logs are formatted) without touching the process-global `structlog` configuration - importing +# Haystack must not reconfigure `structlog` for the host application's own native `structlog` loggers. haystack.logging.configure_logging(force=False) # Same for tracing (no op if `opentelemetry` or `ddtrace` is not installed) diff --git a/haystack/logging.py b/haystack/logging.py index 36373c52aa..fac3042c32 100644 --- a/haystack/logging.py +++ b/haystack/logging.py @@ -21,6 +21,14 @@ # use this marker to patch each logger only once and avoid wrapping the already-wrapped methods on repeated calls. _PATCHED_MARKER = "_haystack_patched" +# Name of the formatting handler we install. We use it to find and remove our own handler across (re)configurations. +_HANDLER_NAME = "HaystackLoggingHandler" + + +def _is_haystack_logging_handler(handler: logging.Handler) -> bool: + """Return whether the given handler is the one installed by `configure_logging`.""" + return isinstance(handler, logging.StreamHandler) and getattr(handler, "name", None) == _HANDLER_NAME + class PatchedLogger(typing.Protocol): """Class which enables using type checkers to find wrong logger usage.""" @@ -335,10 +343,11 @@ def configure_logging( own logs - this avoids duplicate log lines when the host application also configures the root logger. It has no effect when `logger_name=""` (the root logger has no ancestors). :param force: - Whether to (re)configure logging even if `structlog` has already been configured by someone else. The default - (`True`) means an explicit call always takes over. Pass `False` to make this a no-op when `structlog` is - already configured - this is used by the import-time call in `haystack/__init__.py` so that merely importing - Haystack does not overwrite a `structlog` configuration set up by the host application. + Whether this call owns the process-global `structlog` configuration. The default (`True`) means an explicit + call configures `structlog` and takes over any configuration set up by someone else. Pass `False` (as the + import-time call in `haystack/__init__.py` does) to leave the global `structlog` configuration untouched and + only install our own scoped handler so Haystack's own logs are formatted. This keeps merely importing Haystack + from reconfiguring `structlog` for the host application's own native `structlog` loggers. """ import haystack.utils.jupyter # to avoid circular imports @@ -355,11 +364,6 @@ def configure_logging( # If the user wants to ignore structlog, we don't configure it and fall back to standard logging return - # When not forcing, skip configuration if structlog is already configured (e.g. by the host application) so we - # leave its configuration and handlers untouched. - if not force and structlog.is_configured(): - return - # We roughly follow the structlog documentation here: # https://www.structlog.org/en/stable/standard-library.html#rendering-using-structlog-based-formatters-within-logging # This means that we use structlog to format the log entries for entries emitted via `logging` and `structlog`. @@ -389,18 +393,23 @@ def configure_logging( # We only need that in sophisticated production setups where we want to correlate logs with traces shared_processors.append(correlate_logs_with_traces) - structlog.configure( - # `filter_by_level` reads the effective level from the underlying stdlib logger on *every* call, so changes - # to the log level made after `configure_logging` runs (e.g. by the host application) are respected. - processors=[ - structlog.stdlib.filter_by_level, - *shared_processors, - structlog.stdlib.ProcessorFormatter.wrap_for_formatter, - ], - logger_factory=structlog.stdlib.LoggerFactory(ignore_frame_names=["haystack.logging"]), - cache_logger_on_first_use=True, - wrapper_class=structlog.stdlib.BoundLogger, - ) + # `structlog.configure` writes to a single process-global configuration that affects *every* native structlog + # logger in the process, not just Haystack's. We therefore only touch it on an explicit (forced) call. The + # import-time auto-config (`force=False`) skips it and relies solely on the scoped handler installed below to + # format Haystack's own (stdlib) logs, leaving the host application's native structlog loggers untouched. + if force: + structlog.configure( + # `filter_by_level` reads the effective level from the underlying stdlib logger on *every* call, so + # changes to the log level made after `configure_logging` runs (e.g. by the host app) are respected. + processors=[ + structlog.stdlib.filter_by_level, + *shared_processors, + structlog.stdlib.ProcessorFormatter.wrap_for_formatter, + ], + logger_factory=structlog.stdlib.LoggerFactory(ignore_frame_names=["haystack.logging"]), + cache_logger_on_first_use=True, + wrapper_class=structlog.stdlib.BoundLogger, + ) renderers: list[Processor] if use_json: @@ -431,7 +440,7 @@ def configure_logging( ) handler = logging.StreamHandler() - handler.name = "HaystackLoggingHandler" + handler.name = _HANDLER_NAME # Use OUR `ProcessorFormatter` to format all `logging` entries. handler.setFormatter(formatter) @@ -439,13 +448,18 @@ def configure_logging( # loggers of the host application and other libraries in the same process untouched. Pass `logger_name=""` to # attach to the root logger instead (legacy behavior - formats every record in the process). logger_names = [logger_name] if isinstance(logger_name, str) else list(logger_name) + + # Remove our handler from every logger that currently carries it before re-installing. This keeps re-configuration + # idempotent and, crucially, prevents records from being emitted twice when the target changes (e.g. switching to + # the root logger via `logger_name=""` while a previous call left handlers on the namespace loggers). + existing_loggers = [logging.getLogger(), *logging.Logger.manager.loggerDict.values()] + for existing_logger in existing_loggers: + if isinstance(existing_logger, logging.Logger) and any( + _is_haystack_logging_handler(h) for h in existing_logger.handlers + ): + existing_logger.handlers = [h for h in existing_logger.handlers if not _is_haystack_logging_handler(h)] + for name in logger_names: target_logger = logging.getLogger(name) - # avoid adding our handler twice - old_handlers = [ - h - for h in target_logger.handlers - if not (isinstance(h, logging.StreamHandler) and h.name == "HaystackLoggingHandler") - ] - target_logger.handlers = [handler, *old_handlers] + target_logger.handlers = [handler, *target_logger.handlers] target_logger.propagate = propagate diff --git a/releasenotes/notes/scope-logging-configuration-4a38bf0c8ea89fc9.yaml b/releasenotes/notes/scope-logging-configuration-4a38bf0c8ea89fc9.yaml index d49aaf8f3f..94185edc31 100644 --- a/releasenotes/notes/scope-logging-configuration-4a38bf0c8ea89fc9.yaml +++ b/releasenotes/notes/scope-logging-configuration-4a38bf0c8ea89fc9.yaml @@ -6,6 +6,11 @@ upgrade: Haystack a better-behaved library when it runs next to other services in the same process: the log records of the host application and other libraries are no longer reformatted or duplicated by Haystack. To restore the previous behavior of formatting every log record in the process, call ``configure_logging(logger_name="")``. + - | + Importing Haystack no longer configures ``structlog`` process-wide. The import-time setup now only installs + Haystack's own scoped logging handler (so Haystack's logs are still formatted) and leaves the global ``structlog`` + configuration untouched, so the host application's own native ``structlog`` loggers are no longer reformatted by + merely importing Haystack. ``structlog`` is configured only when you call ``configure_logging(force=True)``. fixes: - | ``haystack.logging.configure_logging`` no longer freezes the log level at import time. Previously the level was @@ -14,9 +19,6 @@ fixes: - | Fixed the logger in ``haystack.utils.requests_utils`` being named after the module's file path instead of ``haystack.utils.requests_utils``, which kept its records outside the ``haystack`` logger namespace. - - | - Importing Haystack no longer overwrites a ``structlog`` configuration that the host application already set up. - The import-time call to ``configure_logging`` now skips configuration when ``structlog`` is already configured. - | ``haystack.logging.getLogger`` is now idempotent. Previously, calling it more than once for the same logger name wrapped the already-wrapped logger methods again, which caused the log message to be run through ``str.format`` @@ -29,6 +31,6 @@ features: - | ``haystack.logging.configure_logging`` gained three parameters: ``logger_name`` to choose which logger(s) the formatting handler is attached to, ``propagate`` to control whether Haystack's loggers propagate their records to - ancestor loggers, and ``force`` to control whether an existing ``structlog`` configuration is replaced. Set - ``propagate=False`` to let Haystack fully own the output of its own logs and avoid duplicate log lines when the - host application also configures the root logger. + ancestor loggers, and ``force`` to control whether the call configures the process-global ``structlog`` (taking + over any existing configuration). Set ``propagate=False`` to let Haystack fully own the output of its own logs and + avoid duplicate log lines when the host application also configures the root logger. diff --git a/test/test_logging.py b/test/test_logging.py index 3ae48ad6d7..e288230faa 100644 --- a/test/test_logging.py +++ b/test/test_logging.py @@ -717,6 +717,22 @@ def test_legacy_root_behavior_still_available_via_empty_logger_name(self, capfd: output = capfd.readouterr().err assert json.loads(output)["event"] == "formatted by haystack" + def test_switching_to_root_logger_does_not_duplicate_haystack_logs(self, capfd: CaptureFixture) -> None: + # Mirror what `import haystack` does: install the handler on the Haystack namespaces. + haystack_logging.configure_logging(use_json=True) + # Then opt into root formatting. The namespace handlers must be removed - otherwise a `haystack.*` record is + # emitted twice (once by the namespace handler, once by the root handler via propagation). + haystack_logging.configure_logging(use_json=True, logger_name="") + + # The Haystack namespaces no longer carry our handler. + for name in ["haystack", "haystack_integrations", "haystack_experimental"]: + handlers = logging.getLogger(name).handlers + assert not any(getattr(h, "name", None) == "HaystackLoggingHandler" for h in handlers) + + logging.getLogger("haystack.demo").warning("dup check") + emitted = [line for line in capfd.readouterr().err.splitlines() if "dup check" in line] + assert len(emitted) == 1 + def test_propagate_is_true_by_default(self, caplog: LogCaptureFixture) -> None: # The default keeps records flowing to ancestor loggers, so tooling that captures via the root logger (such as # pytest's `caplog`) keeps working. @@ -781,7 +797,7 @@ class TestStructlogConfigIsPreserved: application already set up, while an explicit call still takes over. """ - def test_not_forced_skips_when_structlog_already_configured(self, restore_structlog_config: None) -> None: + def test_not_forced_preserves_existing_structlog_config(self, restore_structlog_config: None) -> None: # Stand-in for the host application configuring structlog before Haystack is imported/configured. structlog.reset_defaults() structlog.configure(processors=[_sentinel_processor]) @@ -792,8 +808,9 @@ def test_not_forced_skips_when_structlog_already_configured(self, restore_struct # The application's structlog configuration is left untouched ... assert structlog.get_config()["processors"] == [_sentinel_processor] - # ... and we did not attach our handler on top of their setup. - assert not any(getattr(h, "name", None) == "HaystackLoggingHandler" for h in haystack_logger.handlers) + # ... but we still install our scoped handler so Haystack's own logs are formatted (independently of import + # ordering relative to the application's structlog setup). + assert any(getattr(h, "name", None) == "HaystackLoggingHandler" for h in haystack_logger.handlers) def test_forced_takes_over_existing_structlog_config(self, restore_structlog_config: None) -> None: structlog.reset_defaults() @@ -806,8 +823,10 @@ def test_forced_takes_over_existing_structlog_config(self, restore_structlog_con assert structlog.get_config()["processors"] != [_sentinel_processor] assert any(getattr(h, "name", None) == "HaystackLoggingHandler" for h in haystack_logger.handlers) - def test_not_forced_still_configures_when_structlog_is_unconfigured(self, restore_structlog_config: None) -> None: - # This is the real import-time situation: nobody configured structlog yet, so we set up our nice defaults. + def test_not_forced_installs_handler_without_configuring_structlog(self, restore_structlog_config: None) -> None: + # The real import-time situation: nobody configured structlog yet. We install our scoped handler so Haystack's + # own logs are formatted, but we must NOT touch the process-global structlog configuration (which would + # reformat the host application's own native structlog loggers). structlog.reset_defaults() haystack_logger = logging.getLogger("haystack") haystack_logger.handlers = [] @@ -815,8 +834,10 @@ def test_not_forced_still_configures_when_structlog_is_unconfigured(self, restor haystack_logging.configure_logging(force=False) - assert structlog.is_configured() + # Our scoped handler is installed ... assert any(getattr(h, "name", None) == "HaystackLoggingHandler" for h in haystack_logger.handlers) + # ... but the global structlog configuration is left untouched. + assert not structlog.is_configured() class TestGetLoggerIsIdempotent: From af5730e2227dfd01722f6fd82fedba99694e9b3d Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Mon, 22 Jun 2026 13:02:04 +0200 Subject: [PATCH 4/7] rename force param to configure_structlog --- haystack/__init__.py | 8 +++---- haystack/logging.py | 24 +++++++++---------- ...ogging-configuration-4a38bf0c8ea89fc9.yaml | 9 +++---- test/test_logging.py | 16 ++++++------- 4 files changed, 29 insertions(+), 28 deletions(-) diff --git a/haystack/__init__.py b/haystack/__init__.py index 2c92d7fce7..fd6c3a9312 100644 --- a/haystack/__init__.py +++ b/haystack/__init__.py @@ -18,10 +18,10 @@ from haystack.version import __version__ # noqa: F401 # Initialize the logging configuration. -# This is a no-op unless `structlog` is installed. `force=False` means we only install our own scoped logging handler -# (so Haystack's logs are formatted) without touching the process-global `structlog` configuration - importing -# Haystack must not reconfigure `structlog` for the host application's own native `structlog` loggers. -haystack.logging.configure_logging(force=False) +# This is a no-op unless `structlog` is installed. `configure_structlog=False` means we only install our own scoped +# logging handler (so Haystack's logs are formatted) without touching the process-global `structlog` configuration - +# importing Haystack must not reconfigure `structlog` for the host application's own native `structlog` loggers. +haystack.logging.configure_logging(configure_structlog=False) # Same for tracing (no op if `opentelemetry` or `ddtrace` is not installed) haystack.tracing.auto_enable_tracing() diff --git a/haystack/logging.py b/haystack/logging.py index fac3042c32..1b8ef00906 100644 --- a/haystack/logging.py +++ b/haystack/logging.py @@ -11,6 +11,8 @@ from collections.abc import Sequence from typing import Any +import haystack.utils.jupyter + if typing.TYPE_CHECKING: from structlog.typing import EventDict, Processor, WrappedLogger @@ -316,7 +318,7 @@ def configure_logging( use_json: bool | None = None, logger_name: str | Sequence[str] = ("haystack", "haystack_integrations", "haystack_experimental"), propagate: bool = True, - force: bool = True, + configure_structlog: bool = True, ) -> None: """ Configure logging for Haystack. @@ -342,15 +344,13 @@ def configure_logging( capturing tools such as `pytest`'s `caplog`. Set it to `False` to make Haystack fully own the output of its own logs - this avoids duplicate log lines when the host application also configures the root logger. It has no effect when `logger_name=""` (the root logger has no ancestors). - :param force: - Whether this call owns the process-global `structlog` configuration. The default (`True`) means an explicit - call configures `structlog` and takes over any configuration set up by someone else. Pass `False` (as the - import-time call in `haystack/__init__.py` does) to leave the global `structlog` configuration untouched and - only install our own scoped handler so Haystack's own logs are formatted. This keeps merely importing Haystack - from reconfiguring `structlog` for the host application's own native `structlog` loggers. + :param configure_structlog: + Whether to configure the process-global `structlog` (thereby taking over any configuration set up by someone + else). The default (`True`) is what an explicit call should do. Pass `False` (as the import-time call in + `haystack/__init__.py` does) to leave the global `structlog` configuration untouched and only install our own + scoped handler so Haystack's own logs are formatted. This keeps merely importing Haystack from reconfiguring + `structlog` for the host application's own native `structlog` loggers. """ - import haystack.utils.jupyter # to avoid circular imports - try: import structlog from structlog.processors import ExceptionRenderer @@ -394,10 +394,10 @@ def configure_logging( shared_processors.append(correlate_logs_with_traces) # `structlog.configure` writes to a single process-global configuration that affects *every* native structlog - # logger in the process, not just Haystack's. We therefore only touch it on an explicit (forced) call. The - # import-time auto-config (`force=False`) skips it and relies solely on the scoped handler installed below to + # logger in the process, not just Haystack's. We therefore only touch it when explicitly asked to. The import-time + # auto-config (`configure_structlog=False`) skips it and relies solely on the scoped handler installed below to # format Haystack's own (stdlib) logs, leaving the host application's native structlog loggers untouched. - if force: + if configure_structlog: structlog.configure( # `filter_by_level` reads the effective level from the underlying stdlib logger on *every* call, so # changes to the log level made after `configure_logging` runs (e.g. by the host app) are respected. diff --git a/releasenotes/notes/scope-logging-configuration-4a38bf0c8ea89fc9.yaml b/releasenotes/notes/scope-logging-configuration-4a38bf0c8ea89fc9.yaml index 94185edc31..414da859fe 100644 --- a/releasenotes/notes/scope-logging-configuration-4a38bf0c8ea89fc9.yaml +++ b/releasenotes/notes/scope-logging-configuration-4a38bf0c8ea89fc9.yaml @@ -10,7 +10,8 @@ upgrade: Importing Haystack no longer configures ``structlog`` process-wide. The import-time setup now only installs Haystack's own scoped logging handler (so Haystack's logs are still formatted) and leaves the global ``structlog`` configuration untouched, so the host application's own native ``structlog`` loggers are no longer reformatted by - merely importing Haystack. ``structlog`` is configured only when you call ``configure_logging(force=True)``. + merely importing Haystack. ``structlog`` is configured only when you call + ``configure_logging(configure_structlog=True)``. fixes: - | ``haystack.logging.configure_logging`` no longer freezes the log level at import time. Previously the level was @@ -31,6 +32,6 @@ features: - | ``haystack.logging.configure_logging`` gained three parameters: ``logger_name`` to choose which logger(s) the formatting handler is attached to, ``propagate`` to control whether Haystack's loggers propagate their records to - ancestor loggers, and ``force`` to control whether the call configures the process-global ``structlog`` (taking - over any existing configuration). Set ``propagate=False`` to let Haystack fully own the output of its own logs and - avoid duplicate log lines when the host application also configures the root logger. + ancestor loggers, and ``configure_structlog`` to control whether the call configures the process-global + ``structlog`` (taking over any existing configuration). Set ``propagate=False`` to let Haystack fully own the + output of its own logs and avoid duplicate log lines when the host application also configures the root logger. diff --git a/test/test_logging.py b/test/test_logging.py index e288230faa..6aa5f17702 100644 --- a/test/test_logging.py +++ b/test/test_logging.py @@ -793,18 +793,18 @@ def test_structlog_native_logger_still_filters_below_level(self, capfd: CaptureF class TestStructlogConfigIsPreserved: """ `structlog.configure` writes to a single process-global configuration. These tests pin down that merely importing - Haystack (which calls `configure_logging(force=False)`) does not overwrite a structlog configuration that the host - application already set up, while an explicit call still takes over. + Haystack (which calls `configure_logging(configure_structlog=False)`) does not overwrite a structlog configuration + that the host application already set up, while an explicit call still takes over. """ - def test_not_forced_preserves_existing_structlog_config(self, restore_structlog_config: None) -> None: + def test_without_configure_structlog_existing_config_is_preserved(self, restore_structlog_config: None) -> None: # Stand-in for the host application configuring structlog before Haystack is imported/configured. structlog.reset_defaults() structlog.configure(processors=[_sentinel_processor]) haystack_logger = logging.getLogger("haystack") haystack_logger.handlers = [] - haystack_logging.configure_logging(force=False) + haystack_logging.configure_logging(configure_structlog=False) # The application's structlog configuration is left untouched ... assert structlog.get_config()["processors"] == [_sentinel_processor] @@ -812,18 +812,18 @@ def test_not_forced_preserves_existing_structlog_config(self, restore_structlog_ # ordering relative to the application's structlog setup). assert any(getattr(h, "name", None) == "HaystackLoggingHandler" for h in haystack_logger.handlers) - def test_forced_takes_over_existing_structlog_config(self, restore_structlog_config: None) -> None: + def test_configure_structlog_takes_over_existing_config(self, restore_structlog_config: None) -> None: structlog.reset_defaults() structlog.configure(processors=[_sentinel_processor]) haystack_logger = logging.getLogger("haystack") haystack_logger.handlers = [] - haystack_logging.configure_logging(use_json=True, force=True) + haystack_logging.configure_logging(use_json=True, configure_structlog=True) assert structlog.get_config()["processors"] != [_sentinel_processor] assert any(getattr(h, "name", None) == "HaystackLoggingHandler" for h in haystack_logger.handlers) - def test_not_forced_installs_handler_without_configuring_structlog(self, restore_structlog_config: None) -> None: + def test_installs_handler_without_configuring_structlog(self, restore_structlog_config: None) -> None: # The real import-time situation: nobody configured structlog yet. We install our scoped handler so Haystack's # own logs are formatted, but we must NOT touch the process-global structlog configuration (which would # reformat the host application's own native structlog loggers). @@ -832,7 +832,7 @@ def test_not_forced_installs_handler_without_configuring_structlog(self, restore haystack_logger.handlers = [] assert not structlog.is_configured() - haystack_logging.configure_logging(force=False) + haystack_logging.configure_logging(configure_structlog=False) # Our scoped handler is installed ... assert any(getattr(h, "name", None) == "HaystackLoggingHandler" for h in haystack_logger.handlers) From 844cf7fdd8f833bb17e0bbd94f6d6fa120a0d86b Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Mon, 22 Jun 2026 13:09:13 +0200 Subject: [PATCH 5/7] add migration entry and shorten dev comments --- MIGRATION.md | 26 ++++++++++++++++++++++++++ haystack/logging.py | 23 +++++++++-------------- 2 files changed, 35 insertions(+), 14 deletions(-) diff --git a/MIGRATION.md b/MIGRATION.md index f6928a7553..a3f5713b3c 100644 --- a/MIGRATION.md +++ b/MIGRATION.md @@ -873,3 +873,29 @@ store.write_documents(new_documents, policy=DuplicatePolicy.OVERWRITE) new_ids = {doc.id for doc in new_documents} store.delete_documents([doc.id for doc in old_documents if doc.id not in new_ids]) ``` + +### Haystack logging no longer reconfigures logging for the whole process + +**What changed:** Importing Haystack no longer attaches its formatting handler to the root logger, and no longer +configures `structlog` process-wide. The handler is now scoped to Haystack's own logger namespaces (`haystack`, +`haystack_integrations`, `haystack_experimental`), and the global `structlog` configuration is set only when you call +`configure_logging()` explicitly. As a result, importing Haystack no longer reformats the logs of the host application +or other libraries running in the same process. + +**Why:** Haystack should behave as a well-mannered library when it runs alongside other services in the same process, +rather than taking over logging for the whole process. + +**How to migrate:** If you relied on Haystack formatting every log record in the process, opt back in explicitly. + +Before (v2.x): +```python +import haystack # formatted every log record in the process and configured structlog globally +``` + +After (v3.0): +```python +from haystack import logging + +# Restore the old behavior: format every log record in the process (also configures structlog globally). +logging.configure_logging(logger_name="") +``` diff --git a/haystack/logging.py b/haystack/logging.py index 1b8ef00906..07ba002677 100644 --- a/haystack/logging.py +++ b/haystack/logging.py @@ -235,10 +235,9 @@ def _patch_structlog_call_information(logger: logging.Logger) -> None: if not isinstance(logger, _FixedFindCallerLogger): return - # Copied from structlog's `_FixedFindCallerLogger.findCaller`, only adding `haystack.logging` to the list of - # ignored frames so our own logging wrappers don't show up as the caller. We deliberately do not forward - # `stacklevel` to `_find_first_app_frame_and_name`: that parameter only exists in structlog >= 25.5.0 and - # structlog is an optional dependency, so forwarding it would break logging on older versions. + # Copied from structlog's `_FixedFindCallerLogger.findCaller`, adding `haystack.logging` to the ignored + # frames. We don't forward `stacklevel` to `_find_first_app_frame_and_name` (added in structlog 25.5.0): + # structlog is optional and may be older. def findCaller(stack_info: bool = False, stacklevel: int = 1) -> tuple[str, int, str, str | None]: # noqa: ARG001 f, _name = _find_first_app_frame_and_name(["logging", "haystack.logging"]) sinfo = _format_stack(f) if stack_info else None @@ -393,10 +392,9 @@ def configure_logging( # We only need that in sophisticated production setups where we want to correlate logs with traces shared_processors.append(correlate_logs_with_traces) - # `structlog.configure` writes to a single process-global configuration that affects *every* native structlog - # logger in the process, not just Haystack's. We therefore only touch it when explicitly asked to. The import-time - # auto-config (`configure_structlog=False`) skips it and relies solely on the scoped handler installed below to - # format Haystack's own (stdlib) logs, leaving the host application's native structlog loggers untouched. + # `structlog.configure` is process-global: it affects every native structlog logger, not just Haystack's. We only + # configure it when explicitly asked (`configure_structlog`); the import-time call skips it and relies on the + # scoped handler below to format Haystack's own logs, leaving the host app's structlog loggers untouched. if configure_structlog: structlog.configure( # `filter_by_level` reads the effective level from the underlying stdlib logger on *every* call, so @@ -444,14 +442,11 @@ def configure_logging( # Use OUR `ProcessorFormatter` to format all `logging` entries. handler.setFormatter(formatter) - # By default we attach to Haystack's own namespaces so that we only format Haystack's log records and leave the - # loggers of the host application and other libraries in the same process untouched. Pass `logger_name=""` to - # attach to the root logger instead (legacy behavior - formats every record in the process). + # Attach the handler to the target logger(s) - Haystack's own namespaces by default (see `logger_name`). logger_names = [logger_name] if isinstance(logger_name, str) else list(logger_name) - # Remove our handler from every logger that currently carries it before re-installing. This keeps re-configuration - # idempotent and, crucially, prevents records from being emitted twice when the target changes (e.g. switching to - # the root logger via `logger_name=""` while a previous call left handlers on the namespace loggers). + # Remove our handler from every logger that carries it before re-installing: keeps re-configuration idempotent and + # prevents double emission when the target changes (e.g. switching to the root logger via `logger_name=""`). existing_loggers = [logging.getLogger(), *logging.Logger.manager.loggerDict.values()] for existing_logger in existing_loggers: if isinstance(existing_logger, logging.Logger) and any( From 94eea647bfa87587777fbeee6b53785528a68a1e Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Wed, 24 Jun 2026 08:52:18 +0200 Subject: [PATCH 6/7] PR comments --- MIGRATION.md | 10 ++++++++++ test/test_logging.py | 7 ++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/MIGRATION.md b/MIGRATION.md index a3f5713b3c..8b223c839f 100644 --- a/MIGRATION.md +++ b/MIGRATION.md @@ -899,3 +899,13 @@ from haystack import logging # Restore the old behavior: format every log record in the process (also configures structlog globally). logging.configure_logging(logger_name="") ``` + +**Note on duplicate log lines:** Haystack's handler now sits on the `haystack.*` loggers, which still propagate to the +root logger. If the host application also configures a handler on the root logger, Haystack's own logs can appear +twice. To make Haystack fully own its output and stop the duplication, disable propagation: + +```python +from haystack import logging + +logging.configure_logging(propagate=False) +``` diff --git a/test/test_logging.py b/test/test_logging.py index 6aa5f17702..efd64338f0 100644 --- a/test/test_logging.py +++ b/test/test_logging.py @@ -631,7 +631,12 @@ def test_haystack_logger_with_old_interpolation(self, method: str, expected_leve def test_that_haystack_logger_is_used(self) -> None: """Forces the usage of the Haystack logger instead of the standard library logger.""" - allowed_list = [Path("haystack") / "logging.py"] + allowed_list = [ + Path("haystack") / "logging.py", + # Deliberately uses the stdlib logger: tenacity's before_log/after_log call it with positional args, which + # the Haystack logger rejects. + Path("haystack") / "utils" / "requests_utils.py", + ] for root, _, files in os.walk("haystack"): for file in files: path = Path(root) / file From b74a7839c500064c23f751a4bd19f99b8fdfaeeb Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Wed, 24 Jun 2026 08:59:33 +0200 Subject: [PATCH 7/7] shorten some dev comments --- haystack/__init__.py | 3 +-- haystack/logging.py | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/haystack/__init__.py b/haystack/__init__.py index 86287bef47..730f8b19f5 100644 --- a/haystack/__init__.py +++ b/haystack/__init__.py @@ -21,8 +21,7 @@ # Initialize the logging configuration. # This is a no-op unless `structlog` is installed. `configure_structlog=False` means we only install our own scoped -# logging handler (so Haystack's logs are formatted) without touching the process-global `structlog` configuration - -# importing Haystack must not reconfigure `structlog` for the host application's own native `structlog` loggers. +# logging handler (so Haystack's logs are formatted) without touching the process-global `structlog` configuration. haystack.logging.configure_logging(configure_structlog=False) __all__ = [ diff --git a/haystack/logging.py b/haystack/logging.py index 07ba002677..f7cdc0f268 100644 --- a/haystack/logging.py +++ b/haystack/logging.py @@ -393,8 +393,7 @@ def configure_logging( shared_processors.append(correlate_logs_with_traces) # `structlog.configure` is process-global: it affects every native structlog logger, not just Haystack's. We only - # configure it when explicitly asked (`configure_structlog`); the import-time call skips it and relies on the - # scoped handler below to format Haystack's own logs, leaving the host app's structlog loggers untouched. + # configure it when explicitly asked (`configure_structlog`). if configure_structlog: structlog.configure( # `filter_by_level` reads the effective level from the underlying stdlib logger on *every* call, so