diff --git a/src/openai/_base_client.py b/src/openai/_base_client.py index 216b36aabd..3340b41d77 100644 --- a/src/openai/_base_client.py +++ b/src/openai/_base_client.py @@ -1,5 +1,7 @@ from __future__ import annotations +import os +import re import sys import json import time @@ -9,7 +11,9 @@ import inspect import logging import platform +import threading import warnings +import contextlib import email.utils from types import TracebackType from random import random @@ -831,12 +835,54 @@ def _idempotency_key(self) -> str: return f"stainless-python-retry-{uuid.uuid4()}" +# Serializes the temporary NO_PROXY mutation in `_sanitized_proxy_env` so +# overlapping client constructions from multiple threads cannot snapshot +# each other's cleaned value and leave the env permanently sanitized. +_proxy_env_lock = threading.Lock() + + +@contextlib.contextmanager +def _sanitized_proxy_env() -> Iterator[None]: + """Temporarily replace any whitespace inside `NO_PROXY` / `no_proxy` with commas. + + httpx parses these env vars by splitting on commas only, so newlines or + tabs that sneak in via Docker, .env files, or shell scripts become part + of the hostname and trigger `httpx.InvalidURL` during client construction. + Normalize them just for the duration of the wrapped httpx initialization + so the parsed proxy bypass list is well-formed, then restore the + original value. See openai/openai-python#3303. + + The mutation is serialized by `_proxy_env_lock` because `os.environ` is + process-wide; without the lock, concurrent client constructions could + snapshot each other's already-cleaned value as their "original" and + leave NO_PROXY permanently sanitized for the rest of the process. + """ + with _proxy_env_lock: + saved: Dict[str, Optional[str]] = {} + for name in ("NO_PROXY", "no_proxy"): + original = os.environ.get(name) + saved[name] = original + if original is not None and re.search(r"\s", original): + cleaned = re.sub(r"\s+", ",", original.strip()) + cleaned = re.sub(r",+", ",", cleaned).strip(",") + os.environ[name] = cleaned + try: + yield + finally: + for name, original in saved.items(): + if original is None: + os.environ.pop(name, None) + else: + os.environ[name] = original + + class _DefaultHttpxClient(httpx.Client): def __init__(self, **kwargs: Any) -> None: kwargs.setdefault("timeout", DEFAULT_TIMEOUT) kwargs.setdefault("limits", DEFAULT_CONNECTION_LIMITS) kwargs.setdefault("follow_redirects", True) - super().__init__(**kwargs) + with _sanitized_proxy_env(): + super().__init__(**kwargs) if TYPE_CHECKING: @@ -1423,7 +1469,8 @@ def __init__(self, **kwargs: Any) -> None: kwargs.setdefault("timeout", DEFAULT_TIMEOUT) kwargs.setdefault("limits", DEFAULT_CONNECTION_LIMITS) kwargs.setdefault("follow_redirects", True) - super().__init__(**kwargs) + with _sanitized_proxy_env(): + super().__init__(**kwargs) try: @@ -1441,7 +1488,8 @@ def __init__(self, **kwargs: Any) -> None: kwargs.setdefault("limits", DEFAULT_CONNECTION_LIMITS) kwargs.setdefault("follow_redirects", True) - super().__init__(**kwargs) + with _sanitized_proxy_env(): + super().__init__(**kwargs) if TYPE_CHECKING: diff --git a/tests/test_client.py b/tests/test_client.py index 2d8955a58e..850fa8428a 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -1292,6 +1292,56 @@ def test_proxy_environment_variables(self, monkeypatch: pytest.MonkeyPatch) -> N assert len(mounts) == 1 assert mounts[0][0].pattern == "https://" + def test_no_proxy_with_whitespace_does_not_raise(self, monkeypatch: pytest.MonkeyPatch) -> None: + # Regression test for openai/openai-python#3303: NO_PROXY values that + # contain newlines or other whitespace (common in Docker, .env files, + # and shell scripts) used to break client construction with + # httpx.InvalidURL because httpx splits NO_PROXY only by comma. + # The sanitizer in _DefaultHttpxClient.__init__ normalizes whitespace + # to commas just for the duration of the httpx init, then restores + # the original env var. + monkeypatch.setenv("NO_PROXY", "localhost\nexample.com\t192.168.1.1") + monkeypatch.delenv("no_proxy", raising=False) + + # Construction must not raise. + DefaultHttpxClient() + + # The original env var is restored so user code observes what it set. + assert os.environ["NO_PROXY"] == "localhost\nexample.com\t192.168.1.1" + + def test_no_proxy_concurrent_construction_preserves_original( + self, monkeypatch: pytest.MonkeyPatch + ) -> None: + # Without the lock around _sanitized_proxy_env, two threads that + # construct clients at the same time could snapshot each other's + # already-cleaned value as the "original" and leave NO_PROXY + # permanently sanitized for the rest of the process. Spin up + # several constructions in parallel and assert the env is the + # same string the caller set when everything has finished. + import threading + + original = "localhost\nexample.com\t192.168.1.1\n10.0.0.0/8" + monkeypatch.setenv("NO_PROXY", original) + monkeypatch.delenv("no_proxy", raising=False) + + errors: list[BaseException] = [] + + def construct() -> None: + try: + for _ in range(20): + DefaultHttpxClient() + except BaseException as e: + errors.append(e) + + threads = [threading.Thread(target=construct) for _ in range(8)] + for t in threads: + t.start() + for t in threads: + t.join() + + assert not errors, f"client construction raised: {errors!r}" + assert os.environ["NO_PROXY"] == original + @pytest.mark.filterwarnings("ignore:.*deprecated.*:DeprecationWarning") def test_default_client_creation(self) -> None: # Ensure that the client can be initialized without any exceptions @@ -2552,6 +2602,15 @@ async def test_proxy_environment_variables(self, monkeypatch: pytest.MonkeyPatch assert len(mounts) == 1 assert mounts[0][0].pattern == "https://" + async def test_no_proxy_with_whitespace_does_not_raise(self, monkeypatch: pytest.MonkeyPatch) -> None: + # Async counterpart of the sync regression test for #3303. + monkeypatch.setenv("NO_PROXY", "localhost\nexample.com\t192.168.1.1") + monkeypatch.delenv("no_proxy", raising=False) + + DefaultAsyncHttpxClient() + + assert os.environ["NO_PROXY"] == "localhost\nexample.com\t192.168.1.1" + @pytest.mark.filterwarnings("ignore:.*deprecated.*:DeprecationWarning") async def test_default_client_creation(self) -> None: # Ensure that the client can be initialized without any exceptions