diff --git a/MIGRATION.md b/MIGRATION.md index e93872b916..72f5cad1fe 100644 --- a/MIGRATION.md +++ b/MIGRATION.md @@ -1003,6 +1003,42 @@ new_ids = {doc.id for doc in new_documents} store.delete_documents([doc.id for doc in old_documents if doc.id not in new_ids]) ``` +### Haystack logging no longer reconfigures logging for the whole process + +**What changed:** Importing Haystack no longer attaches its formatting handler to the root logger, and no longer +configures `structlog` process-wide. The handler is now scoped to Haystack's own logger namespaces (`haystack`, +`haystack_integrations`, `haystack_experimental`), and the global `structlog` configuration is set only when you call +`configure_logging()` explicitly. As a result, importing Haystack no longer reformats the logs of the host application +or other libraries running in the same process. + +**Why:** Haystack should behave as a well-mannered library when it runs alongside other services in the same process, +rather than taking over logging for the whole process. + +**How to migrate:** If you relied on Haystack formatting every log record in the process, opt back in explicitly. + +Before (v2.x): +```python +import haystack # formatted every log record in the process and configured structlog globally +``` + +After (v3.0): +```python +from haystack import logging + +# Restore the old behavior: format every log record in the process (also configures structlog globally). +logging.configure_logging(logger_name="") +``` + +**Note on duplicate log lines:** Haystack's handler now sits on the `haystack.*` loggers, which still propagate to the +root logger. If the host application also configures a handler on the root logger, Haystack's own logs can appear +twice. To make Haystack fully own its output and stop the duplication, disable propagation: + +```python +from haystack import logging + +logging.configure_logging(propagate=False) +``` + ### Components now resolve API keys at warm-up **What changed:** Components that use external services now create their resources (such as API clients) during `warm_up()` instead of in `__init__`. As a consequence, a missing API key (for example, an unset environment variable behind a `Secret.from_env_var` default) is now reported at warm-up or first run rather than at construction. This affects OpenAI and Azure OpenAI components. diff --git a/haystack/__init__.py b/haystack/__init__.py index 7e1e6b96f8..730f8b19f5 100644 --- a/haystack/__init__.py +++ b/haystack/__init__.py @@ -19,9 +19,10 @@ from haystack.dataclasses import Answer, Document, ExtractedAnswer, GeneratedAnswer from haystack.version import __version__ # noqa: F401 -# Initialize the logging configuration -# This is a no-op unless `structlog` is installed -haystack.logging.configure_logging() +# Initialize the logging configuration. +# This is a no-op unless `structlog` is installed. `configure_structlog=False` means we only install our own scoped +# logging handler (so Haystack's logs are formatted) without touching the process-global `structlog` configuration. +haystack.logging.configure_logging(configure_structlog=False) __all__ = [ "Answer", diff --git a/haystack/logging.py b/haystack/logging.py index 76139aa5b6..f7cdc0f268 100644 --- a/haystack/logging.py +++ b/haystack/logging.py @@ -8,14 +8,29 @@ import os import sys import typing +from collections.abc import Sequence from typing import Any +import haystack.utils.jupyter + if typing.TYPE_CHECKING: from structlog.typing import EventDict, Processor, WrappedLogger HAYSTACK_LOGGING_USE_JSON_ENV_VAR = "HAYSTACK_LOGGING_USE_JSON" HAYSTACK_LOGGING_IGNORE_STRUCTLOG_ENV_VAR = "HAYSTACK_LOGGING_IGNORE_STRUCTLOG" +# Attribute set on a logger once we have patched its methods. `logging.getLogger` returns a shared singleton, so we +# use this marker to patch each logger only once and avoid wrapping the already-wrapped methods on repeated calls. +_PATCHED_MARKER = "_haystack_patched" + +# Name of the formatting handler we install. We use it to find and remove our own handler across (re)configurations. +_HANDLER_NAME = "HaystackLoggingHandler" + + +def _is_haystack_logging_handler(handler: logging.Handler) -> bool: + """Return whether the given handler is the one installed by `configure_logging`.""" + return isinstance(handler, logging.StreamHandler) and getattr(handler, "name", None) == _HANDLER_NAME + class PatchedLogger(typing.Protocol): """Class which enables using type checkers to find wrong logger usage.""" @@ -220,16 +235,12 @@ def _patch_structlog_call_information(logger: logging.Logger) -> None: if not isinstance(logger, _FixedFindCallerLogger): return - # completely copied from structlog. We only add `haystack.logging` to the list of ignored frames + # Copied from structlog's `_FixedFindCallerLogger.findCaller`, adding `haystack.logging` to the ignored + # frames. We don't forward `stacklevel` to `_find_first_app_frame_and_name` (added in structlog 25.5.0): + # structlog is optional and may be older. def findCaller(stack_info: bool = False, stacklevel: int = 1) -> tuple[str, int, str, str | None]: # noqa: ARG001 - try: - sinfo: str | None - # we need to exclude `haystack.logging` from the stack - f, name = _find_first_app_frame_and_name(["logging", "haystack.logging"]) - sinfo = _format_stack(f) if stack_info else None - except Exception as error: - print(f"Error in findCaller: {error}") - + f, _name = _find_first_app_frame_and_name(["logging", "haystack.logging"]) + sinfo = _format_stack(f) if stack_info else None return f.f_code.co_filename, f.f_lineno, f.f_code.co_name, sinfo logger.findCaller = findCaller # type: ignore @@ -247,6 +258,11 @@ def getLogger(name: str) -> PatchedLogger: - it makes structure logging effective, not just an available feature """ logger = logging.getLogger(name) + if getattr(logger, _PATCHED_MARKER, False): + # Already patched: `logging.getLogger` returned the same singleton, so re-patching would stack the wrappers + # and interpolate the message more than once. + return typing.cast(PatchedLogger, logger) + logger.debug = patch_log_method_to_kwargs_only(logger.debug) # type: ignore logger.info = patch_log_method_to_kwargs_only(logger.info) # type: ignore logger.warn = patch_log_method_to_kwargs_only(logger.warn) # type: ignore @@ -262,6 +278,8 @@ def getLogger(name: str) -> PatchedLogger: # We also patch the `makeRecord` method to use keyword string interpolation logger.makeRecord = patch_make_records_to_use_kwarg_string_interpolation(logger.makeRecord) # type: ignore + setattr(logger, _PATCHED_MARKER, True) + return typing.cast(PatchedLogger, logger) @@ -295,7 +313,12 @@ def correlate_logs_with_traces(_: "WrappedLogger", __: str, event_dict: "EventDi return event_dict -def configure_logging(use_json: bool | None = None) -> None: +def configure_logging( + use_json: bool | None = None, + logger_name: str | Sequence[str] = ("haystack", "haystack_integrations", "haystack_experimental"), + propagate: bool = True, + configure_structlog: bool = True, +) -> None: """ Configure logging for Haystack. @@ -306,9 +329,27 @@ def configure_logging(use_json: bool | None = None) -> None: - If `structlog` is installed, you can JSON format all logs. Enable this by - setting the `use_json` parameter to `True` when calling this function - setting the environment variable `HAYSTACK_LOGGING_USE_JSON` to `true` - """ - import haystack.utils.jupyter # to avoid circular imports + :param use_json: Whether to format logs as JSON. If `None`, we try to guess based on the environment. + :param logger_name: + The name (or names) of the logger our formatting handler is attached to. Defaults to Haystack's own + namespaces (`"haystack"`, `"haystack_integrations"` and `"haystack_experimental"`), so that we only touch + Haystack's own loggers and leave the logging configuration of the host application and any other libraries + running in the same process untouched. Pass an empty string (`""`) to attach the handler to the root logger + instead - this restores the legacy behavior of formatting *every* log record in the process. + :param propagate: + Whether the configured loggers should propagate their records to ancestor loggers (ultimately the root + logger). The default (`True`) keeps records flowing to handlers configured by the host application and to + capturing tools such as `pytest`'s `caplog`. Set it to `False` to make Haystack fully own the output of its + own logs - this avoids duplicate log lines when the host application also configures the root logger. It has + no effect when `logger_name=""` (the root logger has no ancestors). + :param configure_structlog: + Whether to configure the process-global `structlog` (thereby taking over any configuration set up by someone + else). The default (`True`) is what an explicit call should do. Pass `False` (as the import-time call in + `haystack/__init__.py` does) to leave the global `structlog` configuration untouched and only install our own + scoped handler so Haystack's own logs are formatted. This keeps merely importing Haystack from reconfiguring + `structlog` for the host application's own native `structlog` loggers. + """ try: import structlog from structlog.processors import ExceptionRenderer @@ -351,13 +392,21 @@ def configure_logging(use_json: bool | None = None) -> None: # We only need that in sophisticated production setups where we want to correlate logs with traces shared_processors.append(correlate_logs_with_traces) - structlog.configure( - processors=shared_processors + [structlog.stdlib.ProcessorFormatter.wrap_for_formatter], - logger_factory=structlog.stdlib.LoggerFactory(ignore_frame_names=["haystack.logging"]), - cache_logger_on_first_use=True, - # This is a filter that will filter out log entries that are below the log level of the root logger. - wrapper_class=structlog.make_filtering_bound_logger(min_level=logging.root.getEffectiveLevel()), - ) + # `structlog.configure` is process-global: it affects every native structlog logger, not just Haystack's. We only + # configure it when explicitly asked (`configure_structlog`). + if configure_structlog: + structlog.configure( + # `filter_by_level` reads the effective level from the underlying stdlib logger on *every* call, so + # changes to the log level made after `configure_logging` runs (e.g. by the host app) are respected. + processors=[ + structlog.stdlib.filter_by_level, + *shared_processors, + structlog.stdlib.ProcessorFormatter.wrap_for_formatter, + ], + logger_factory=structlog.stdlib.LoggerFactory(ignore_frame_names=["haystack.logging"]), + cache_logger_on_first_use=True, + wrapper_class=structlog.stdlib.BoundLogger, + ) renderers: list[Processor] if use_json: @@ -388,16 +437,23 @@ def configure_logging(use_json: bool | None = None) -> None: ) handler = logging.StreamHandler() - handler.name = "HaystackLoggingHandler" + handler.name = _HANDLER_NAME # Use OUR `ProcessorFormatter` to format all `logging` entries. handler.setFormatter(formatter) - root_logger = logging.getLogger() - # avoid adding our handler twice - old_handlers = [ - h - for h in root_logger.handlers - if not (isinstance(h, logging.StreamHandler) and h.name == "HaystackLoggingHandler") - ] - new_handlers = [handler, *old_handlers] - root_logger.handlers = new_handlers + # Attach the handler to the target logger(s) - Haystack's own namespaces by default (see `logger_name`). + logger_names = [logger_name] if isinstance(logger_name, str) else list(logger_name) + + # Remove our handler from every logger that carries it before re-installing: keeps re-configuration idempotent and + # prevents double emission when the target changes (e.g. switching to the root logger via `logger_name=""`). + existing_loggers = [logging.getLogger(), *logging.Logger.manager.loggerDict.values()] + for existing_logger in existing_loggers: + if isinstance(existing_logger, logging.Logger) and any( + _is_haystack_logging_handler(h) for h in existing_logger.handlers + ): + existing_logger.handlers = [h for h in existing_logger.handlers if not _is_haystack_logging_handler(h)] + + for name in logger_names: + target_logger = logging.getLogger(name) + target_logger.handlers = [handler, *target_logger.handlers] + target_logger.propagate = propagate diff --git a/haystack/utils/requests_utils.py b/haystack/utils/requests_utils.py index c5270cc055..642b0ad45e 100644 --- a/haystack/utils/requests_utils.py +++ b/haystack/utils/requests_utils.py @@ -8,7 +8,10 @@ import httpx from tenacity import after_log, before_log, retry, retry_if_exception_type, stop_after_attempt, wait_exponential -logger = logging.getLogger(__file__) +# NOTE: this uses the standard library logger (not `haystack.logging`) on purpose: tenacity's `before_log`/`after_log` +# call the logger with positional arguments, which Haystack's keyword-only patched logger would reject. We still name +# it with `__name__` so it lives under the `haystack` namespace and is picked up by `configure_logging`. +logger = logging.getLogger(__name__) def request_with_retry( diff --git a/releasenotes/notes/scope-logging-configuration-4a38bf0c8ea89fc9.yaml b/releasenotes/notes/scope-logging-configuration-4a38bf0c8ea89fc9.yaml new file mode 100644 index 0000000000..414da859fe --- /dev/null +++ b/releasenotes/notes/scope-logging-configuration-4a38bf0c8ea89fc9.yaml @@ -0,0 +1,37 @@ +--- +upgrade: + - | + ``haystack.logging.configure_logging`` now attaches its formatting handler to Haystack's own logger namespaces + (``haystack``, ``haystack_integrations`` and ``haystack_experimental``) instead of the root logger. This makes + Haystack a better-behaved library when it runs next to other services in the same process: the log records of the + host application and other libraries are no longer reformatted or duplicated by Haystack. To restore the previous + behavior of formatting every log record in the process, call ``configure_logging(logger_name="")``. + - | + Importing Haystack no longer configures ``structlog`` process-wide. The import-time setup now only installs + Haystack's own scoped logging handler (so Haystack's logs are still formatted) and leaves the global ``structlog`` + configuration untouched, so the host application's own native ``structlog`` loggers are no longer reformatted by + merely importing Haystack. ``structlog`` is configured only when you call + ``configure_logging(configure_structlog=True)``. +fixes: + - | + ``haystack.logging.configure_logging`` no longer freezes the log level at import time. Previously the level was + captured once when the function ran (at ``import haystack``), so a log level set later by the application was + ignored for ``structlog``-native loggers. The effective level is now read on every log call. + - | + Fixed the logger in ``haystack.utils.requests_utils`` being named after the module's file path instead of + ``haystack.utils.requests_utils``, which kept its records outside the ``haystack`` logger namespace. + - | + ``haystack.logging.getLogger`` is now idempotent. Previously, calling it more than once for the same logger name + wrapped the already-wrapped logger methods again, which caused the log message to be run through ``str.format`` + once per call. As a result a field value containing ``{...}`` could be re-interpolated and pull in the value of + another field. Each logger is now patched only once. + - | + The patched ``findCaller`` used to determine the source of a log entry no longer prints to stdout and no longer + masks errors with a misleading ``NameError``, matching structlog's own ``findCaller`` implementation. +features: + - | + ``haystack.logging.configure_logging`` gained three parameters: ``logger_name`` to choose which logger(s) the + formatting handler is attached to, ``propagate`` to control whether Haystack's loggers propagate their records to + ancestor loggers, and ``configure_structlog`` to control whether the call configures the process-global + ``structlog`` (taking over any existing configuration). Set ``propagate=False`` to let Haystack fully own the + output of its own logs and avoid duplicate log lines when the host application also configures the root logger. diff --git a/test/test_logging.py b/test/test_logging.py index 20bdd5947f..efd64338f0 100644 --- a/test/test_logging.py +++ b/test/test_logging.py @@ -7,13 +7,15 @@ import logging import os import sys -from collections.abc import Generator +from collections.abc import Callable, Generator from datetime import datetime, timezone from pathlib import Path from unittest.mock import ANY import pytest import structlog +import structlog._frames +import structlog.stdlib from _pytest.capture import CaptureFixture from _pytest.logging import LogCaptureFixture from _pytest.monkeypatch import MonkeyPatch @@ -25,10 +27,59 @@ @pytest.fixture(autouse=True) def reset_logging_config() -> None: - old_handlers = logging.root.handlers.copy() + # `configure_logging` attaches its handler to Haystack's own namespaces (and may flip `propagate`), so we snapshot + # and restore the root logger plus those namespaces to keep tests isolated. + names = ["haystack", "haystack_integrations", "haystack_experimental"] + root_handlers = logging.root.handlers.copy() + snapshots = {name: _snapshot_logger(name) for name in names} yield - # Reset the logging configuration after each test to avoid impacting other tests - logging.root.handlers = old_handlers + logging.root.handlers = root_handlers + for name, (handlers, propagate, level) in snapshots.items(): + logger = logging.getLogger(name) + logger.handlers = handlers + logger.propagate = propagate + logger.setLevel(level) + + +def _snapshot_logger(name: str) -> tuple[list[logging.Handler], bool, int]: + logger = logging.getLogger(name) + return logger.handlers.copy(), logger.propagate, logger.level + + +@pytest.fixture() +def restore_named_loggers() -> Generator[Callable[[str], logging.Logger], None, None]: + """Snapshot a named logger's handlers/propagate/level and restore them after the test.""" + snapshots: dict[str, tuple[list[logging.Handler], bool, int]] = {} + + def _snapshot(name: str) -> logging.Logger: + logger = logging.getLogger(name) + snapshots[name] = (logger.handlers.copy(), logger.propagate, logger.level) + return logger + + yield _snapshot + + for name, (handlers, propagate, level) in snapshots.items(): + logger = logging.getLogger(name) + logger.handlers = handlers + logger.propagate = propagate + logger.setLevel(level) + + +@pytest.fixture() +def restore_structlog_config() -> Generator[None, None, None]: + """Snapshot the global structlog configuration and restore it after the test.""" + was_configured = structlog.is_configured() + config = structlog.get_config() + yield + if was_configured: + structlog.configure(**config) + else: + structlog.reset_defaults() + + +def _sentinel_processor(logger: object, method_name: str, event_dict: dict) -> dict: + """A no-op processor used to detect whether an existing structlog config was left untouched.""" + return event_dict @pytest.fixture() @@ -47,7 +98,7 @@ def test_skip_logging_configuration( monkeypatch.setenv("HAYSTACK_LOGGING_IGNORE_STRUCTLOG", "true") haystack_logging.configure_logging() - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # the pytest fixture caplog only captures logs being rendered from the stdlib logging module @@ -61,7 +112,7 @@ class TestStructuredLoggingConsoleRendering: def test_log_filtering_when_using_debug(self, capfd: CaptureFixture) -> None: haystack_logging.configure_logging(use_json=False) - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.setLevel(logging.INFO) logger.debug("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) @@ -72,7 +123,7 @@ def test_log_filtering_when_using_debug(self, capfd: CaptureFixture) -> None: def test_log_filtering_when_using_debug_and_log_level_is_debug(self, capfd: CaptureFixture) -> None: haystack_logging.configure_logging(use_json=False) - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.setLevel(logging.DEBUG) logger.debug("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) @@ -89,7 +140,7 @@ def test_console_rendered_structured_log_even_if_no_tty_but_python_config( haystack_logging.configure_logging(use_json=False) - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result @@ -105,7 +156,7 @@ def test_console_rendered_structured_log_if_in_ipython( haystack_logging.configure_logging() - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result @@ -121,7 +172,7 @@ def test_console_rendered_structured_log_even_in_jupyter( haystack_logging.configure_logging() - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result @@ -137,7 +188,7 @@ def test_console_rendered_structured_log_even_if_no_tty_but_forced_through_env( haystack_logging.configure_logging() - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result @@ -149,7 +200,7 @@ def test_console_rendered_structured_log_even_if_no_tty_but_forced_through_env( def test_console_rendered_structured_log(self, capfd: CaptureFixture) -> None: haystack_logging.configure_logging() - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result @@ -170,7 +221,7 @@ def test_console_rendered_structured_log(self, capfd: CaptureFixture) -> None: def test_logging_exceptions(self, capfd: CaptureFixture) -> None: haystack_logging.configure_logging() - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") def function_that_raises_and_adds_to_stack_trace(): raise ValueError("This is an error") @@ -188,7 +239,7 @@ def function_that_raises_and_adds_to_stack_trace(): def test_logging_of_contextvars(self, capfd: CaptureFixture, set_context_var_key: str) -> None: haystack_logging.configure_logging() - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result @@ -202,7 +253,7 @@ def test_logging_as_json_if_not_atty(self, capfd: CaptureFixture, monkeypatch: M monkeypatch.setattr(sys.stderr, "isatty", lambda: False) haystack_logging.configure_logging() - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result @@ -216,13 +267,13 @@ def test_logging_as_json_if_not_atty(self, capfd: CaptureFixture, monkeypatch: M "level": "warning", "timestamp": ANY, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", } def test_logging_as_json(self, capfd: CaptureFixture) -> None: haystack_logging.configure_logging(use_json=True) - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result @@ -236,14 +287,14 @@ def test_logging_as_json(self, capfd: CaptureFixture) -> None: "level": "warning", "timestamp": ANY, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", } def test_logging_as_json_enabling_via_env(self, capfd: CaptureFixture, monkeypatch: MonkeyPatch) -> None: monkeypatch.setenv("HAYSTACK_LOGGING_USE_JSON", "true") haystack_logging.configure_logging() - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result @@ -257,7 +308,7 @@ def test_logging_as_json_enabling_via_env(self, capfd: CaptureFixture, monkeypat "level": "warning", "timestamp": ANY, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", } def test_logging_of_contextvars( @@ -266,7 +317,7 @@ def test_logging_of_contextvars( monkeypatch.setenv("HAYSTACK_LOGGING_USE_JSON", "true") haystack_logging.configure_logging() - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result @@ -281,13 +332,13 @@ def test_logging_of_contextvars( "level": "warning", "timestamp": ANY, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", } def test_logging_exceptions_json(self, capfd: CaptureFixture) -> None: haystack_logging.configure_logging(use_json=True) - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") def function_that_raises_and_adds_to_stack_trace(): my_local_variable = "my_local_variable" # noqa: F841 @@ -306,7 +357,7 @@ def function_that_raises_and_adds_to_stack_trace(): "level": "error", "timestamp": ANY, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", "exception": [ { "exc_notes": [], @@ -340,7 +391,7 @@ def test_trace_log_correlation_python_logs_with_console_rendering( haystack_logging.configure_logging(use_json=False) with spying_tracer.trace("test-operation"): - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) output = capfd.readouterr().err @@ -350,7 +401,7 @@ def test_trace_log_correlation_python_logs(self, spying_tracer: SpyingTracer, ca haystack_logging.configure_logging(use_json=True) with spying_tracer.trace("test-operation") as span: - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) output = capfd.readouterr().err @@ -365,13 +416,13 @@ def test_trace_log_correlation_python_logs(self, spying_tracer: SpyingTracer, ca "trace_id": span.trace_id, "span_id": span.span_id, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", } def test_trace_log_correlation_no_span(self, spying_tracer: SpyingTracer, capfd: CaptureFixture) -> None: haystack_logging.configure_logging(use_json=True) - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) @@ -385,13 +436,13 @@ def test_trace_log_correlation_no_span(self, spying_tracer: SpyingTracer, capfd: "level": "warning", "timestamp": ANY, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", } def test_trace_log_correlation_no_tracer(self, capfd: CaptureFixture) -> None: haystack_logging.configure_logging(use_json=True) - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) @@ -405,7 +456,7 @@ def test_trace_log_correlation_no_tracer(self, capfd: CaptureFixture) -> None: "level": "warning", "timestamp": ANY, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", } @@ -416,12 +467,12 @@ def test_correct_stack_level_with_stdlib_rendering( monkeypatch.setenv("HAYSTACK_LOGGING_IGNORE_STRUCTLOG", "true") haystack_logging.configure_logging() - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # the pytest fixture caplog only captures logs being rendered from the stdlib logging module assert caplog.messages == ["Hello, structured logging!"] - assert caplog.records[0].name == "test.test_logging" + assert caplog.records[0].name == "haystack.test_logging" # Nothing should be captured by capfd since structlog is not configured assert capfd.readouterr().err == "" @@ -429,11 +480,11 @@ def test_correct_stack_level_with_stdlib_rendering( def test_correct_stack_level_with_consoler_rendering(self, capfd: CaptureFixture) -> None: haystack_logging.configure_logging(use_json=False) - logger = haystack_logging.getLogger(__name__) + logger = haystack_logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) output = capfd.readouterr().err - assert "test.test_logging" in output + assert "haystack.test_logging" in output @pytest.mark.parametrize( "method, expected_level", @@ -450,7 +501,7 @@ def test_correct_stack_level_with_consoler_rendering(self, capfd: CaptureFixture def test_various_levels(self, capfd: LogCaptureFixture, method: str, expected_level: str) -> None: haystack_logging.configure_logging(use_json=True) - logger = haystack_logging.getLogger(__name__) + logger = haystack_logging.getLogger("haystack.test_logging") logger.setLevel(logging.DEBUG) @@ -467,13 +518,13 @@ def test_various_levels(self, capfd: LogCaptureFixture, method: str, expected_le "level": expected_level, "timestamp": ANY, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", } def test_log(self, capfd: LogCaptureFixture) -> None: haystack_logging.configure_logging(use_json=True) - logger = haystack_logging.getLogger(__name__) + logger = haystack_logging.getLogger("haystack.test_logging") logger.setLevel(logging.DEBUG) logger.log(logging.DEBUG, "Hello, structured '{key}'!", key="logging", key1="value1", key2="value2") @@ -489,13 +540,13 @@ def test_log(self, capfd: LogCaptureFixture) -> None: "level": "debug", "timestamp": ANY, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", } def test_log_json_content(self, capfd: LogCaptureFixture) -> None: haystack_logging.configure_logging(use_json=True) - logger = haystack_logging.getLogger(__name__) + logger = haystack_logging.getLogger("haystack.test_logging") logger.setLevel(logging.DEBUG) logger.log(logging.DEBUG, 'Hello, structured: {"key": "value"}', key="logging", key1="value1", key2="value2") @@ -511,13 +562,13 @@ def test_log_json_content(self, capfd: LogCaptureFixture) -> None: "level": "debug", "timestamp": ANY, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", } def test_log_with_string_cast(self, capfd: LogCaptureFixture) -> None: haystack_logging.configure_logging(use_json=True) - logger = haystack_logging.getLogger(__name__) + logger = haystack_logging.getLogger("haystack.test_logging") logger.setLevel(logging.DEBUG) logger.log(logging.DEBUG, "Hello, structured '{key}'!", key=LogCaptureFixture, key1="value1", key2="value2") @@ -533,7 +584,7 @@ def test_log_with_string_cast(self, capfd: LogCaptureFixture) -> None: "level": "debug", "timestamp": ANY, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", } @pytest.mark.parametrize( @@ -551,7 +602,7 @@ def test_log_with_string_cast(self, capfd: LogCaptureFixture) -> None: def test_haystack_logger_with_positional_args(self, method: str, expected_level: str) -> None: haystack_logging.configure_logging(use_json=True) - logger = haystack_logging.getLogger(__name__) + logger = haystack_logging.getLogger("haystack.test_logging") logger.setLevel(logging.DEBUG) with pytest.raises(TypeError): @@ -572,7 +623,7 @@ def test_haystack_logger_with_positional_args(self, method: str, expected_level: def test_haystack_logger_with_old_interpolation(self, method: str, expected_level: str) -> None: haystack_logging.configure_logging(use_json=True) - logger = haystack_logging.getLogger(__name__) + logger = haystack_logging.getLogger("haystack.test_logging") logger.setLevel(logging.DEBUG) # does not raise - hence we need to check this separately @@ -580,7 +631,12 @@ def test_haystack_logger_with_old_interpolation(self, method: str, expected_leve def test_that_haystack_logger_is_used(self) -> None: """Forces the usage of the Haystack logger instead of the standard library logger.""" - allowed_list = [Path("haystack") / "logging.py"] + allowed_list = [ + Path("haystack") / "logging.py", + # Deliberately uses the stdlib logger: tenacity's before_log/after_log call it with positional args, which + # the Haystack logger rejects. + Path("haystack") / "utils" / "requests_utils.py", + ] for root, _, files in os.walk("haystack"): for file in files: path = Path(root) / file @@ -600,3 +656,254 @@ def test_that_haystack_logger_is_used(self) -> None: f"{path} doesn't use the Haystack logger. Please use the Haystack logger instead of the " f"standard library logger and add plenty of keyword arguments." ) + + +class TestLoggingScope: + """ + Haystack is a library that usually runs next to other services in the same process (e.g. a web server, or another + app's logging setup). These tests pin down that `configure_logging` only touches Haystack's own loggers and does + not reformat or hijack the logs of everything else in the process. + """ + + def test_handler_is_attached_to_haystack_namespaces_and_not_root(self) -> None: + haystack_logging.configure_logging(use_json=True) + + # Haystack's own namespace and the ones used by integration and experimental packages get the handler. + for name in ["haystack", "haystack_integrations", "haystack_experimental"]: + logger = logging.getLogger(name) + assert any(getattr(h, "name", None) == "HaystackLoggingHandler" for h in logger.handlers) + + # The root logger - shared by every other library/service in the process - is left untouched. + assert not any(getattr(h, "name", None) == "HaystackLoggingHandler" for h in logging.root.handlers) + + def test_integrations_logs_are_formatted_by_haystack(self, capfd: CaptureFixture) -> None: + haystack_logging.configure_logging(use_json=True) + + logging.getLogger("haystack_integrations.components.demo").warning("a log line from an integration") + + output = capfd.readouterr().err + assert json.loads(output)["event"] == "a log line from an integration" + + def test_other_services_logs_are_not_reformatted_by_haystack( + self, capfd: CaptureFixture, restore_named_loggers: Callable[[str], logging.Logger] + ) -> None: + # Stand-in for another service that configured its own plain-text logging (e.g. uvicorn) before Haystack runs. + other_service = restore_named_loggers("some_other_service") + other_service.handlers = [logging.StreamHandler()] + other_service.setLevel(logging.INFO) + other_service.propagate = False + + haystack_logging.configure_logging(use_json=True) + + other_service.info("a log line from another service") + logging.getLogger("haystack.test_logging").warning("a log line from haystack") + + lines = [line for line in capfd.readouterr().err.splitlines() if line.strip()] + + # Haystack formats its own record as JSON ... + haystack_json = [json.loads(line) for line in lines if line.startswith("{") and "from haystack" in line] + assert any(record["event"] == "a log line from haystack" for record in haystack_json) + + # ... but the other service's record stays exactly as that service rendered it - plain text, not Haystack JSON. + plain_lines = [line for line in lines if "from another service" in line] + assert plain_lines + for line in plain_lines: + with pytest.raises(json.JSONDecodeError): + json.loads(line) + + def test_legacy_root_behavior_still_available_via_empty_logger_name(self, capfd: CaptureFixture) -> None: + # Opt back into the old behavior of formatting *every* record in the process. + haystack_logging.configure_logging(use_json=True, logger_name="") + + assert any(getattr(h, "name", None) == "HaystackLoggingHandler" for h in logging.root.handlers) + + # A non-Haystack logger is now formatted by Haystack because the handler sits on the root logger. + logging.getLogger("some_other_service").warning("formatted by haystack") + output = capfd.readouterr().err + assert json.loads(output)["event"] == "formatted by haystack" + + def test_switching_to_root_logger_does_not_duplicate_haystack_logs(self, capfd: CaptureFixture) -> None: + # Mirror what `import haystack` does: install the handler on the Haystack namespaces. + haystack_logging.configure_logging(use_json=True) + # Then opt into root formatting. The namespace handlers must be removed - otherwise a `haystack.*` record is + # emitted twice (once by the namespace handler, once by the root handler via propagation). + haystack_logging.configure_logging(use_json=True, logger_name="") + + # The Haystack namespaces no longer carry our handler. + for name in ["haystack", "haystack_integrations", "haystack_experimental"]: + handlers = logging.getLogger(name).handlers + assert not any(getattr(h, "name", None) == "HaystackLoggingHandler" for h in handlers) + + logging.getLogger("haystack.demo").warning("dup check") + emitted = [line for line in capfd.readouterr().err.splitlines() if "dup check" in line] + assert len(emitted) == 1 + + def test_propagate_is_true_by_default(self, caplog: LogCaptureFixture) -> None: + # The default keeps records flowing to ancestor loggers, so tooling that captures via the root logger (such as + # pytest's `caplog`) keeps working. + haystack_logging.configure_logging(use_json=True) + assert logging.getLogger("haystack").propagate is True + + with caplog.at_level(logging.WARNING, logger="haystack.test_logging"): + logging.getLogger("haystack.test_logging").warning("captured via propagation") + assert "captured via propagation" in caplog.text + + def test_propagate_false_stops_records_from_reaching_root(self, capfd: CaptureFixture) -> None: + haystack_logging.configure_logging(use_json=True, propagate=False) + assert logging.getLogger("haystack").propagate is False + + # A plain handler on the root logger should NOT see Haystack's records when propagation is disabled. + root_handler = logging.StreamHandler() + root_handler.setFormatter(logging.Formatter("ROOT | %(message)s")) + logging.root.addHandler(root_handler) + try: + logging.getLogger("haystack.test_logging").warning("haystack owns this line") + finally: + logging.root.removeHandler(root_handler) + + output = capfd.readouterr().err + # Formatted once by Haystack (JSON), never by the root handler. + assert "ROOT |" not in output + assert json.loads(output)["event"] == "haystack owns this line" + + +class TestDynamicLogLevel: + """ + `configure_logging` runs at import time (in `haystack/__init__.py`), long before an application sets its desired + log level. These tests pin down that a log level set *after* `configure_logging` is still respected, instead of + being frozen to whatever the root level happened to be at import time. + """ + + def test_structlog_native_logger_respects_level_lowered_after_configure(self, capfd: CaptureFixture) -> None: + # Mirror the real ordering: configure while the level is high ... + logging.getLogger("haystack").setLevel(logging.WARNING) + haystack_logging.configure_logging(use_json=True) + + # ... then have the application lower the level afterwards. + logging.getLogger("haystack").setLevel(logging.DEBUG) + + structlog.get_logger("haystack.native_dynamic_level").debug("debug emitted after lowering the level") + + assert "debug emitted after lowering the level" in capfd.readouterr().err + + def test_structlog_native_logger_still_filters_below_level(self, capfd: CaptureFixture) -> None: + logging.getLogger("haystack").setLevel(logging.INFO) + haystack_logging.configure_logging(use_json=True) + + structlog.get_logger("haystack.native_filtered_level").debug("debug below the configured level") + + assert "debug below the configured level" not in capfd.readouterr().err + + +class TestStructlogConfigIsPreserved: + """ + `structlog.configure` writes to a single process-global configuration. These tests pin down that merely importing + Haystack (which calls `configure_logging(configure_structlog=False)`) does not overwrite a structlog configuration + that the host application already set up, while an explicit call still takes over. + """ + + def test_without_configure_structlog_existing_config_is_preserved(self, restore_structlog_config: None) -> None: + # Stand-in for the host application configuring structlog before Haystack is imported/configured. + structlog.reset_defaults() + structlog.configure(processors=[_sentinel_processor]) + haystack_logger = logging.getLogger("haystack") + haystack_logger.handlers = [] + + haystack_logging.configure_logging(configure_structlog=False) + + # The application's structlog configuration is left untouched ... + assert structlog.get_config()["processors"] == [_sentinel_processor] + # ... but we still install our scoped handler so Haystack's own logs are formatted (independently of import + # ordering relative to the application's structlog setup). + assert any(getattr(h, "name", None) == "HaystackLoggingHandler" for h in haystack_logger.handlers) + + def test_configure_structlog_takes_over_existing_config(self, restore_structlog_config: None) -> None: + structlog.reset_defaults() + structlog.configure(processors=[_sentinel_processor]) + haystack_logger = logging.getLogger("haystack") + haystack_logger.handlers = [] + + haystack_logging.configure_logging(use_json=True, configure_structlog=True) + + assert structlog.get_config()["processors"] != [_sentinel_processor] + assert any(getattr(h, "name", None) == "HaystackLoggingHandler" for h in haystack_logger.handlers) + + def test_installs_handler_without_configuring_structlog(self, restore_structlog_config: None) -> None: + # The real import-time situation: nobody configured structlog yet. We install our scoped handler so Haystack's + # own logs are formatted, but we must NOT touch the process-global structlog configuration (which would + # reformat the host application's own native structlog loggers). + structlog.reset_defaults() + haystack_logger = logging.getLogger("haystack") + haystack_logger.handlers = [] + assert not structlog.is_configured() + + haystack_logging.configure_logging(configure_structlog=False) + + # Our scoped handler is installed ... + assert any(getattr(h, "name", None) == "HaystackLoggingHandler" for h in haystack_logger.handlers) + # ... but the global structlog configuration is left untouched. + assert not structlog.is_configured() + + +class TestGetLoggerIsIdempotent: + """ + `logging.getLogger(name)` returns a process-wide singleton. `haystack.logging.getLogger` patches that shared + object in place, so calling it more than once for the same name (different modules, re-imports, ...) must not wrap + the already-wrapped methods again. The user-visible symptom of re-wrapping is that the message is run through + `str.format` once per wrap, so a field value that itself contains `{...}` gets re-interpolated. + """ + + def test_repeated_get_logger_interpolates_the_message_exactly_once(self, capfd: CaptureFixture) -> None: + haystack_logging.configure_logging(use_json=True) + + # Two modules grabbing the same logger name is the realistic trigger for re-wrapping. + haystack_logging.getLogger("haystack.idempotency_test") + logger = haystack_logging.getLogger("haystack.idempotency_test") + logger.setLevel(logging.INFO) + + # `a`'s value contains a `{b}` placeholder. With a single interpolation it must be left as-is; a second + # interpolation would expand it using `b` and leak "SECRET" into the message. + logger.info("Hello {a}", a="{b}", b="SECRET") + + parsed_output = json.loads(capfd.readouterr().err) + assert parsed_output["event"] == "Hello {b}" + assert "SECRET" not in parsed_output["event"] + + def test_repeated_get_logger_does_not_rewrap_methods(self) -> None: + haystack_logging.getLogger("haystack.idempotency_identity_test") + # Capture the patched methods after the first call, before the second one runs. + patched = logging.getLogger("haystack.idempotency_identity_test") + debug_after_first = patched.debug + make_record_after_first = patched.makeRecord + + haystack_logging.getLogger("haystack.idempotency_identity_test") + + # The second call must leave the already-patched methods in place, not wrap a fresh layer on top. + assert patched.debug is debug_after_first + assert patched.makeRecord is make_record_after_first + + +class TestFindCallerMatchesStructlog: + """ + `_patch_structlog_call_information` mirrors structlog's `_FixedFindCallerLogger.findCaller`, only adding + `haystack.logging` to the ignored frames. structlog itself does not guard the frame lookup, so neither do we: any + error must propagate as-is instead of being swallowed and printed to stdout. + """ + + def test_find_caller_does_not_print_or_mask_errors(self, capsys: CaptureFixture, monkeypatch: MonkeyPatch) -> None: + # Force the frame lookup to fail. It is imported inside `_patch_structlog_call_information`, so we patch the + # module attribute before patching the logger. + def boom(*args: object, **kwargs: object) -> tuple: + raise RuntimeError("frame lookup failed") + + monkeypatch.setattr(structlog._frames, "_find_first_app_frame_and_name", boom) + + logger = structlog.stdlib._FixedFindCallerLogger("haystack.find_caller_test") + haystack_logging._patch_structlog_call_information(logger) + + # The original error must propagate (not be masked by a NameError on an unbound `f`) ... + with pytest.raises(RuntimeError, match="frame lookup failed"): + logger.findCaller() + + # ... and nothing must be written to stdout. + assert capsys.readouterr().out == ""