From 72366a8f4204e2b33ddec1f545518891c2664338 Mon Sep 17 00:00:00 2001 From: Aryan Date: Wed, 10 Jun 2026 19:20:49 -0400 Subject: [PATCH 1/3] fix(pathfinder): make find_nvidia_binary_utility deterministic, never search CWD find_nvidia_binary_utility assembled a bounded list of trusted directories (NVIDIA wheel bin/, CONDA_PREFIX, CUDA_HOME/CUDA_PATH) and then delegated to shutil.which(name, path=trusted_dirs). On Windows shutil.which prepends the process current working directory to the search even when an explicit path= is supplied, so a binary located in an arbitrary (possibly attacker-writable) CWD could be returned in preference to the trusted CUDA / Conda / wheel binary. That violates the pathfinder contract of a deterministic lookup over a documented, bounded set of trusted roots. Replace the shutil.which delegation with an explicit resolver that searches only the trusted directories, in order, returning the first executable match. The current working directory and ambient PATH are never consulted. POSIX execute-bit (X_OK) and Windows extension semantics are preserved, so behavior is unchanged except for removing the CWD/PATH leakage. Names resolved in the existing trusted dirs return exactly as before. Rewrites the search-path tests to assert the deterministic probe order and adds TestResolveInTrustedDirs covering CWD isolation, first-match-wins, empty/duplicate dir skipping, and POSIX non-executable rejection. Fixes #2119 --- .../_binaries/find_nvidia_binary_utility.py | 41 ++++- .../tests/test_find_nvidia_binaries.py | 161 ++++++++++++++---- 2 files changed, 171 insertions(+), 31 deletions(-) diff --git a/cuda_pathfinder/cuda/pathfinder/_binaries/find_nvidia_binary_utility.py b/cuda_pathfinder/cuda/pathfinder/_binaries/find_nvidia_binary_utility.py index 10ca2e041b..18e52d5640 100644 --- a/cuda_pathfinder/cuda/pathfinder/_binaries/find_nvidia_binary_utility.py +++ b/cuda_pathfinder/cuda/pathfinder/_binaries/find_nvidia_binary_utility.py @@ -3,7 +3,6 @@ import functools import os -import shutil from cuda.pathfinder._binaries import supported_nvidia_binaries from cuda.pathfinder._utils.env_vars import get_cuda_path_or_home @@ -28,6 +27,41 @@ def _normalize_utility_name(utility_name: str) -> str: return utility_name +def _is_executable_file(path: str) -> bool: + """Return True if ``path`` is a file the OS would run as an executable. + + On Windows executability is determined by the file extension (the + candidate name already carries one), so existence is sufficient. On POSIX + the execute permission bit must be set, matching ``shutil.which``. + """ + if not os.path.isfile(path): + return False + if IS_WINDOWS: + return True + return os.access(path, os.X_OK) + + +def _resolve_in_trusted_dirs(normalized_name: str, dirs: list[str]) -> str | None: + """Resolve ``normalized_name`` against ``dirs`` only, in order. + + Unlike ``shutil.which``, this never consults the current working directory + or the ambient ``PATH``. On Windows ``shutil.which`` prepends the process + CWD to the search even when an explicit ``path=`` is supplied, which lets a + binary sitting in an arbitrary CWD shadow the trusted CUDA / Conda / wheel + binary that pathfinder is contracted to discover. Searching the trusted + directories explicitly keeps the lookup deterministic and bounded. + """ + seen: set[str] = set() + for directory in dirs: + if not directory or directory in seen: + continue + seen.add(directory) + candidate = os.path.join(directory, normalized_name) + if _is_executable_file(candidate): + return candidate + return None + + @functools.cache def find_nvidia_binary_utility(utility_name: str) -> str | None: """Locate a CUDA binary utility executable. @@ -73,6 +107,9 @@ def find_nvidia_binary_utility(utility_name: str) -> str | None: (``.exe``, ``.bat``, ``.cmd``). On Unix-like systems, executables are identified by the ``X_OK`` (execute) permission bit. + Lookup is restricted to the trusted directories listed above; the + process working directory and the ambient ``PATH`` are never consulted. + Example: >>> from cuda.pathfinder import find_nvidia_binary_utility >>> nvdisasm = find_nvidia_binary_utility("nvdisasm") @@ -104,4 +141,4 @@ def find_nvidia_binary_utility(utility_name: str) -> str | None: dirs.append(os.path.join(cuda_home, "bin")) normalized_name = _normalize_utility_name(utility_name) - return shutil.which(normalized_name, path=os.pathsep.join(dirs)) + return _resolve_in_trusted_dirs(normalized_name, dirs) diff --git a/cuda_pathfinder/tests/test_find_nvidia_binaries.py b/cuda_pathfinder/tests/test_find_nvidia_binaries.py index ec9740cd85..09e3cdc1b4 100644 --- a/cuda_pathfinder/tests/test_find_nvidia_binaries.py +++ b/cuda_pathfinder/tests/test_find_nvidia_binaries.py @@ -40,6 +40,24 @@ def clear_find_binary_cache(): find_nvidia_binary_utility.cache_clear() +def _patch_exec_probe(mocker, existing=()): + """Patch the executable-file probe and record probed candidates in order. + + ``existing`` is the set of candidate paths reported as present; every other + candidate is treated as missing. Returns the list that accumulates probed + candidates so tests can assert the deterministic search order. + """ + existing = set(existing) + checked: list[str] = [] + + def fake_is_executable_file(path): + checked.append(path) + return path in existing + + mocker.patch.object(binary_finder_module, "_is_executable_file", side_effect=fake_is_executable_file) + return checked + + @pytest.mark.usefixtures("clear_find_binary_cache") def test_find_binary_search_path_includes_site_packages_conda_cuda(monkeypatch, mocker): conda_prefix = os.path.join(os.sep, "conda") @@ -58,20 +76,19 @@ def test_find_binary_search_path_includes_site_packages_conda_cuda(monkeypatch, ) monkeypatch.setenv("CONDA_PREFIX", conda_prefix) mocker.patch.object(binary_finder_module, "get_cuda_path_or_home", return_value=cuda_home) - which_mock = mocker.patch.object( - binary_finder_module.shutil, "which", return_value=os.path.join(os.sep, "resolved", "nvcc") - ) - - result = find_nvidia_binary_utility("nvcc") - - assert result == os.path.join(os.sep, "resolved", "nvcc") - find_sub_dirs_mock.assert_called_once_with(site_key.split(os.sep)) expected_dirs = [ site_dir, os.path.join(conda_prefix, "bin"), os.path.join(cuda_home, "bin"), ] - which_mock.assert_called_once_with("nvcc", path=os.pathsep.join(expected_dirs)) + checked = _patch_exec_probe(mocker) + + result = find_nvidia_binary_utility("nvcc") + + # No directory contains the binary, so every trusted dir is probed in order. + assert result is None + find_sub_dirs_mock.assert_called_once_with(site_key.split(os.sep)) + assert checked == [os.path.join(d, "nvcc") for d in expected_dirs] @pytest.mark.usefixtures("clear_find_binary_cache") @@ -92,14 +109,6 @@ def test_find_binary_windows_extension_and_search_dirs(monkeypatch, mocker): ) monkeypatch.setenv("CONDA_PREFIX", conda_prefix) mocker.patch.object(binary_finder_module, "get_cuda_path_or_home", return_value=cuda_home) - which_mock = mocker.patch.object( - binary_finder_module.shutil, "which", return_value=os.path.join(os.sep, "resolved", "nvcc.exe") - ) - - result = find_nvidia_binary_utility("nvcc") - - assert result == os.path.join(os.sep, "resolved", "nvcc.exe") - find_sub_dirs_mock.assert_called_once_with(site_key.split(os.sep)) expected_dirs = [ site_dir, os.path.join(conda_prefix, "Library", "bin"), @@ -107,7 +116,41 @@ def test_find_binary_windows_extension_and_search_dirs(monkeypatch, mocker): os.path.join(cuda_home, "bin", "x86_64"), os.path.join(cuda_home, "bin"), ] - which_mock.assert_called_once_with("nvcc.exe", path=os.pathsep.join(expected_dirs)) + checked = _patch_exec_probe(mocker) + + result = find_nvidia_binary_utility("nvcc") + + # The .exe extension is appended and the Windows-specific dirs are probed in order. + assert result is None + find_sub_dirs_mock.assert_called_once_with(site_key.split(os.sep)) + assert checked == [os.path.join(d, "nvcc.exe") for d in expected_dirs] + + +@pytest.mark.usefixtures("clear_find_binary_cache") +def test_find_binary_first_matching_dir_wins(monkeypatch, mocker): + conda_prefix = os.path.join(os.sep, "conda") + cuda_home = os.path.join(os.sep, "cuda") + site_key = os.path.join("nvidia", "cuda_nvcc", "bin") + site_dir = os.path.join("site-packages", "cuda_nvcc", "bin") + + mocker.patch.object(binary_finder_module, "IS_WINDOWS", new=False) + mocker.patch.object( + binary_finder_module.supported_nvidia_binaries, + "SITE_PACKAGES_BINDIRS", + {"nvcc": (site_key,)}, + ) + mocker.patch.object(binary_finder_module, "find_sub_dirs_all_sitepackages", return_value=[site_dir]) + monkeypatch.setenv("CONDA_PREFIX", conda_prefix) + mocker.patch.object(binary_finder_module, "get_cuda_path_or_home", return_value=cuda_home) + conda_nvcc = os.path.join(conda_prefix, "bin", "nvcc") + cuda_nvcc = os.path.join(cuda_home, "bin", "nvcc") + checked = _patch_exec_probe(mocker, existing=[conda_nvcc, cuda_nvcc]) + + result = find_nvidia_binary_utility("nvcc") + + # Conda comes before CUDA_HOME, so the Conda hit wins and CUDA_HOME is never probed. + assert result == conda_nvcc + assert checked == [os.path.join(site_dir, "nvcc"), conda_nvcc] @pytest.mark.usefixtures("clear_find_binary_cache") @@ -123,13 +166,14 @@ def test_find_binary_returns_none_with_no_candidates(monkeypatch, mocker): find_sub_dirs_mock = mocker.patch.object(binary_finder_module, "find_sub_dirs_all_sitepackages", return_value=[]) monkeypatch.delenv("CONDA_PREFIX", raising=False) mocker.patch.object(binary_finder_module, "get_cuda_path_or_home", return_value=None) - which_mock = mocker.patch.object(binary_finder_module.shutil, "which", return_value=None) + checked = _patch_exec_probe(mocker) result = find_nvidia_binary_utility("nvcc") assert result is None find_sub_dirs_mock.assert_called_once_with(site_key.split(os.sep)) - which_mock.assert_called_once_with("nvcc", path="") + # No trusted dirs were assembled, so nothing is probed at all. + assert checked == [] @pytest.mark.usefixtures("clear_find_binary_cache") @@ -142,17 +186,17 @@ def test_find_binary_without_site_packages_entry(monkeypatch, mocker): find_sub_dirs_mock = mocker.patch.object(binary_finder_module, "find_sub_dirs_all_sitepackages", return_value=[]) monkeypatch.setenv("CONDA_PREFIX", conda_prefix) mocker.patch.object(binary_finder_module, "get_cuda_path_or_home", return_value=cuda_home) - which_mock = mocker.patch.object(binary_finder_module.shutil, "which", return_value=None) + expected_dirs = [ + os.path.join(conda_prefix, "bin"), + os.path.join(cuda_home, "bin"), + ] + checked = _patch_exec_probe(mocker) result = find_nvidia_binary_utility("nvcc") assert result is None find_sub_dirs_mock.assert_not_called() - expected_dirs = [ - os.path.join(conda_prefix, "bin"), - os.path.join(cuda_home, "bin"), - ] - which_mock.assert_called_once_with("nvcc", path=os.pathsep.join(expected_dirs)) + assert checked == [os.path.join(d, "nvcc") for d in expected_dirs] @pytest.mark.usefixtures("clear_find_binary_cache") @@ -161,15 +205,74 @@ def test_find_binary_cache_negative_result(monkeypatch, mocker): mocker.patch.object(binary_finder_module.supported_nvidia_binaries, "SITE_PACKAGES_BINDIRS", {}) mocker.patch.object(binary_finder_module, "find_sub_dirs_all_sitepackages", return_value=[]) monkeypatch.delenv("CONDA_PREFIX", raising=False) - mocker.patch.object(binary_finder_module, "get_cuda_path_or_home", return_value=None) - which_mock = mocker.patch.object(binary_finder_module.shutil, "which", return_value=None) + cuda_home_mock = mocker.patch.object(binary_finder_module, "get_cuda_path_or_home", return_value=None) + _patch_exec_probe(mocker) first = find_nvidia_binary_utility("nvcc") second = find_nvidia_binary_utility("nvcc") assert first is None assert second is None - which_mock.assert_called_once_with("nvcc", path="") + # The second call is served from @functools.cache, so the body runs only once. + cuda_home_mock.assert_called_once_with() + + +class TestResolveInTrustedDirs: + """Unit tests for the deterministic resolver, including the #2119 contract.""" + + @staticmethod + def _make_executable(directory, name): + path = os.path.join(str(directory), name) + with open(path, "w", encoding="utf-8") as handle: + handle.write("") + os.chmod(path, 0o700) + return path + + def test_cwd_is_not_searched(self, tmp_path, monkeypatch): + # Regression for #2119: a binary in the process CWD must never shadow + # the trusted directories the way shutil.which does on Windows. + trusted = tmp_path / "trusted" + trusted.mkdir() + evil_cwd = tmp_path / "cwd" + evil_cwd.mkdir() + empty = tmp_path / "empty" + empty.mkdir() + trusted_nvcc = self._make_executable(trusted, "nvcc") + self._make_executable(evil_cwd, "nvcc") # the decoy that must be ignored + monkeypatch.chdir(evil_cwd) + + # The only trusted dir given has no binary -> None, never the CWD copy. + assert binary_finder_module._resolve_in_trusted_dirs("nvcc", [str(empty)]) is None + # When a trusted dir holds it, that path wins regardless of CWD. + assert binary_finder_module._resolve_in_trusted_dirs("nvcc", [str(empty), str(trusted)]) == trusted_nvcc + + def test_first_trusted_dir_wins(self, tmp_path): + first = tmp_path / "a" + first.mkdir() + second = tmp_path / "b" + second.mkdir() + first_nvcc = self._make_executable(first, "nvcc") + self._make_executable(second, "nvcc") + assert binary_finder_module._resolve_in_trusted_dirs("nvcc", [str(first), str(second)]) == first_nvcc + + def test_empty_and_duplicate_dirs_skipped(self, tmp_path): + present = tmp_path / "p" + present.mkdir() + nvcc = self._make_executable(present, "nvcc") + assert binary_finder_module._resolve_in_trusted_dirs("nvcc", ["", str(present), str(present)]) == nvcc + assert binary_finder_module._resolve_in_trusted_dirs("nvcc", []) is None + + @pytest.mark.skipif(binary_finder_module.IS_WINDOWS, reason="POSIX execute-bit semantics") + def test_non_executable_file_rejected_on_posix(self, tmp_path): + directory = tmp_path / "d" + directory.mkdir() + path = os.path.join(str(directory), "nvcc") + with open(path, "w", encoding="utf-8") as handle: + handle.write("") + os.chmod(path, 0o644) + assert binary_finder_module._resolve_in_trusted_dirs("nvcc", [str(directory)]) is None + os.chmod(path, 0o700) + assert binary_finder_module._resolve_in_trusted_dirs("nvcc", [str(directory)]) == path @pytest.mark.usefixtures("clear_find_binary_cache") From c3d39f026d3d485994e04870e41e590a02057816 Mon Sep 17 00:00:00 2001 From: Aryan Date: Wed, 10 Jun 2026 21:11:39 -0400 Subject: [PATCH 2/3] feat(pathfinder): add CTK-root canary fallback to find_nvidia_binary_utility After the deterministic search over the explicit trusted directories (NVIDIA wheel bin/, CONDA_PREFIX, CUDA_HOME/CUDA_PATH) misses, fall back to a CTK-root canary probe: resolve cudart through the OS dynamic loader, which honors LD_LIBRARY_PATH on Linux and the native DLL search on Windows, derive the CUDA Toolkit root from its absolute path, and search that root's bin layout. This addresses the concern raised on #2196: users who follow the CUDA Linux installation guide set LD_LIBRARY_PATH for libraries and PATH for executables. The bounded finder alone would stop finding the utility for them because PATH is intentionally never consulted. The canary fallback recovers that case through LD_LIBRARY_PATH instead of PATH. LD_LIBRARY_PATH is still an attack vector, but a significantly harder one to exploit than PATH, and the ambient PATH and process CWD remain unused. The canary runs only after the explicit trusted dirs miss, so the common wheel/conda/CUDA_HOME cases never spawn the resolver subprocess. The canary -> CTK-root resolution is factored into a shared resolve_ctk_root_via_canary helper reused by the dynamic-library CTK-root canary flow. Adds tests for the fallback (found, ordering, Windows bin layout, not consulted when found earlier, cached) and for resolve_ctk_root_via_canary. Adds 1.6.0 release notes for the minor version bump. --- .../_binaries/find_nvidia_binary_utility.py | 65 ++++++++++++++-- .../_dynamic_libs/load_nvidia_dynamic_lib.py | 20 ++++- .../docs/source/release/1.6.0-notes.rst | 27 +++++++ .../tests/test_ctk_root_discovery.py | 30 ++++++++ .../tests/test_find_nvidia_binaries.py | 76 ++++++++++++++++++- 5 files changed, 206 insertions(+), 12 deletions(-) create mode 100644 cuda_pathfinder/docs/source/release/1.6.0-notes.rst diff --git a/cuda_pathfinder/cuda/pathfinder/_binaries/find_nvidia_binary_utility.py b/cuda_pathfinder/cuda/pathfinder/_binaries/find_nvidia_binary_utility.py index 18e52d5640..6c410e2752 100644 --- a/cuda_pathfinder/cuda/pathfinder/_binaries/find_nvidia_binary_utility.py +++ b/cuda_pathfinder/cuda/pathfinder/_binaries/find_nvidia_binary_utility.py @@ -9,6 +9,11 @@ from cuda.pathfinder._utils.find_sub_dirs import find_sub_dirs_all_sitepackages from cuda.pathfinder._utils.platform_aware import IS_WINDOWS +# CUDA Toolkit canary library used to derive the toolkit root when it is only +# visible through the dynamic loader. ``cudart`` always ships with the CTK and +# matches the anchor used by the dynamic-library CTK-root canary flow. +_CTK_ROOT_CANARY_ANCHOR_LIBNAME = "cudart" + class UnsupportedBinaryError(Exception): def __init__(self, utility: str) -> None: @@ -41,6 +46,35 @@ def _is_executable_file(path: str) -> bool: return os.access(path, os.X_OK) +def _ctk_bin_subdirs(root: str) -> list[str]: + """Return the bin directories to search under a CUDA Toolkit ``root``. + + On Windows the CTK ships binaries under ``bin/x64`` (CTK 13), ``bin/x86_64``, + and ``bin`` (CTK 12); on Linux they live in ``bin``. + """ + if IS_WINDOWS: + return [ + os.path.join(root, "bin", "x64"), + os.path.join(root, "bin", "x86_64"), + os.path.join(root, "bin"), + ] + return [os.path.join(root, "bin")] + + +def _resolve_ctk_root_via_canary() -> str | None: + """Derive the CUDA Toolkit root from the ``cudart`` canary library. + + ``cudart`` is resolved by the OS dynamic loader, which honors + ``LD_LIBRARY_PATH`` on Linux and the native DLL search on Windows, and the + toolkit root is derived from its absolute path. The ambient ``PATH`` is + never consulted. The loader module is imported lazily to avoid pulling the + dynamic-library machinery in at import time. + """ + from cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib import resolve_ctk_root_via_canary + + return resolve_ctk_root_via_canary(_CTK_ROOT_CANARY_ANCHOR_LIBNAME) + + def _resolve_in_trusted_dirs(normalized_name: str, dirs: list[str]) -> str | None: """Resolve ``normalized_name`` against ``dirs`` only, in order. @@ -99,6 +133,15 @@ def find_nvidia_binary_utility(utility_name: str) -> str | None: ``bin/x64``, ``bin/x86_64``, and ``bin`` subdirectories on Windows, or just ``bin`` on Linux. + 4. **CTK-root canary fallback** + + - Only when steps 1-3 miss: resolve the ``cudart`` library through the + OS dynamic loader (which honors ``LD_LIBRARY_PATH`` on Linux and the + native DLL search on Windows), derive the CUDA Toolkit root from it, + and search that root's bin layout. This finds the utility for users + who follow the CUDA install guide and set ``LD_LIBRARY_PATH`` for + libraries without also setting ``CUDA_HOME`` / ``CUDA_PATH``. + Note: Results are cached using ``@functools.cache`` for performance. The cache persists for the lifetime of the process. @@ -107,8 +150,9 @@ def find_nvidia_binary_utility(utility_name: str) -> str | None: (``.exe``, ``.bat``, ``.cmd``). On Unix-like systems, executables are identified by the ``X_OK`` (execute) permission bit. - Lookup is restricted to the trusted directories listed above; the - process working directory and the ambient ``PATH`` are never consulted. + Lookup is restricted to the trusted directories and the canary-derived + CTK root listed above; the process working directory and the ambient + ``PATH`` are never consulted. Example: >>> from cuda.pathfinder import find_nvidia_binary_utility @@ -135,10 +179,17 @@ def find_nvidia_binary_utility(utility_name: str) -> str | None: # 3. Search in CUDA Toolkit (CUDA_HOME/CUDA_PATH) if (cuda_home := get_cuda_path_or_home()) is not None: - if IS_WINDOWS: - dirs.append(os.path.join(cuda_home, "bin", "x64")) - dirs.append(os.path.join(cuda_home, "bin", "x86_64")) - dirs.append(os.path.join(cuda_home, "bin")) + dirs.extend(_ctk_bin_subdirs(cuda_home)) normalized_name = _normalize_utility_name(utility_name) - return _resolve_in_trusted_dirs(normalized_name, dirs) + found = _resolve_in_trusted_dirs(normalized_name, dirs) + if found is not None: + return found + + # 4. CTK-root canary fallback: only when the explicit trusted dirs above + # miss. Resolve cudart via the dynamic loader (honors LD_LIBRARY_PATH), + # derive the toolkit root, and search its bin layout. PATH is never used. + ctk_root = _resolve_ctk_root_via_canary() + if ctk_root is not None: + return _resolve_in_trusted_dirs(normalized_name, _ctk_bin_subdirs(ctk_root)) + return None diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py index a7a8965d2e..63ff80ac0a 100644 --- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py @@ -136,13 +136,25 @@ def _loadable_via_canary_subprocess(libname: str, *, timeout: float = _CANARY_PR return _resolve_system_loaded_abs_path_in_subprocess(libname, timeout=timeout) is not None +def resolve_ctk_root_via_canary(canary_libname: str) -> str | None: + """Resolve the CUDA Toolkit root from a system-loadable canary library. + + The canary library's absolute path is resolved by the OS dynamic loader in + an isolated subprocess, which honors ``LD_LIBRARY_PATH`` on Linux and the + native DLL search on Windows. The toolkit root is then derived from that + path. Returns ``None`` if the canary cannot be resolved or no root can be + derived. The ambient ``PATH`` is never consulted. + """ + canary_abs_path = _resolve_system_loaded_abs_path_in_subprocess(canary_libname) + if canary_abs_path is None: + return None + return derive_ctk_root(canary_abs_path) + + def _try_ctk_root_canary(ctx: SearchContext) -> str | None: """Try CTK-root canary fallback for descriptor-configured libraries.""" for canary_libname in ctx.desc.ctk_root_canary_anchor_libnames: - canary_abs_path = _resolve_system_loaded_abs_path_in_subprocess(canary_libname) - if canary_abs_path is None: - continue - ctk_root = derive_ctk_root(canary_abs_path) + ctk_root = resolve_ctk_root_via_canary(canary_libname) if ctk_root is None: continue find = find_via_ctk_root(ctx, ctk_root) diff --git a/cuda_pathfinder/docs/source/release/1.6.0-notes.rst b/cuda_pathfinder/docs/source/release/1.6.0-notes.rst new file mode 100644 index 0000000000..c905f7072d --- /dev/null +++ b/cuda_pathfinder/docs/source/release/1.6.0-notes.rst @@ -0,0 +1,27 @@ +.. SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +.. SPDX-License-Identifier: Apache-2.0 + +.. py:currentmodule:: cuda.pathfinder + +``cuda-pathfinder`` 1.6.0 Release notes +======================================= + +Highlights +---------- + +* :func:`find_nvidia_binary_utility` now resolves binaries through a bounded, + deterministic search of trusted directories instead of ``shutil.which``. The + process working directory and the ambient ``PATH`` are never consulted, which + closes a lookup ambiguity on Windows where ``shutil.which`` prepends the CWD + even when an explicit search path is supplied. + (`PR #2196 `_) + +* :func:`find_nvidia_binary_utility` gains a CTK-root canary fallback. When the + NVIDIA wheel, ``CONDA_PREFIX``, and ``CUDA_HOME`` / ``CUDA_PATH`` directories + all miss, ``cudart`` is resolved through the OS dynamic loader, which honors + ``LD_LIBRARY_PATH`` on Linux and the native DLL search on Windows. The CUDA + Toolkit root is derived from that path and its ``bin`` layout is searched. + This locates the utility for users who follow the CUDA installation guide and + set ``LD_LIBRARY_PATH`` for libraries without also setting ``CUDA_HOME`` / + ``CUDA_PATH``, while still never falling back to ``PATH``. + (`PR #2196 `_) diff --git a/cuda_pathfinder/tests/test_ctk_root_discovery.py b/cuda_pathfinder/tests/test_ctk_root_discovery.py index 19cbe847d2..9ad148dccd 100644 --- a/cuda_pathfinder/tests/test_ctk_root_discovery.py +++ b/cuda_pathfinder/tests/test_ctk_root_discovery.py @@ -17,6 +17,7 @@ _load_lib_no_cache, _resolve_system_loaded_abs_path_in_subprocess, _try_ctk_root_canary, + resolve_ctk_root_via_canary, ) from cuda.pathfinder._dynamic_libs.search_steps import ( SearchContext, @@ -369,6 +370,35 @@ def test_canary_skips_when_abs_path_none(mocker): assert _try_ctk_root_canary(_ctx("nvvm")) is None +# --------------------------------------------------------------------------- +# resolve_ctk_root_via_canary (shared by lib and binary discovery) +# --------------------------------------------------------------------------- + + +def test_resolve_ctk_root_via_canary_returns_root(tmp_path, mocker): + ctk_root = tmp_path / "cuda-13" + _create_cudart_in_ctk(ctk_root) + probe = mocker.patch( + f"{_MODULE}._resolve_system_loaded_abs_path_in_subprocess", + return_value=_fake_canary_path(ctk_root), + ) + assert resolve_ctk_root_via_canary("cudart") == str(ctk_root) + probe.assert_called_once_with("cudart") + + +def test_resolve_ctk_root_via_canary_none_when_probe_fails(mocker): + mocker.patch(f"{_MODULE}._resolve_system_loaded_abs_path_in_subprocess", return_value=None) + assert resolve_ctk_root_via_canary("cudart") is None + + +def test_resolve_ctk_root_via_canary_none_when_unrecognized(mocker): + mocker.patch( + f"{_MODULE}._resolve_system_loaded_abs_path_in_subprocess", + return_value=os.path.join(os.sep, "weird", "path", "libcudart.so.13"), + ) + assert resolve_ctk_root_via_canary("cudart") is None + + # --------------------------------------------------------------------------- # _load_lib_no_cache search-order # --------------------------------------------------------------------------- diff --git a/cuda_pathfinder/tests/test_find_nvidia_binaries.py b/cuda_pathfinder/tests/test_find_nvidia_binaries.py index 09e3cdc1b4..7f53039c77 100644 --- a/cuda_pathfinder/tests/test_find_nvidia_binaries.py +++ b/cuda_pathfinder/tests/test_find_nvidia_binaries.py @@ -76,6 +76,7 @@ def test_find_binary_search_path_includes_site_packages_conda_cuda(monkeypatch, ) monkeypatch.setenv("CONDA_PREFIX", conda_prefix) mocker.patch.object(binary_finder_module, "get_cuda_path_or_home", return_value=cuda_home) + mocker.patch.object(binary_finder_module, "_resolve_ctk_root_via_canary", return_value=None) expected_dirs = [ site_dir, os.path.join(conda_prefix, "bin"), @@ -109,6 +110,7 @@ def test_find_binary_windows_extension_and_search_dirs(monkeypatch, mocker): ) monkeypatch.setenv("CONDA_PREFIX", conda_prefix) mocker.patch.object(binary_finder_module, "get_cuda_path_or_home", return_value=cuda_home) + mocker.patch.object(binary_finder_module, "_resolve_ctk_root_via_canary", return_value=None) expected_dirs = [ site_dir, os.path.join(conda_prefix, "Library", "bin"), @@ -142,6 +144,7 @@ def test_find_binary_first_matching_dir_wins(monkeypatch, mocker): mocker.patch.object(binary_finder_module, "find_sub_dirs_all_sitepackages", return_value=[site_dir]) monkeypatch.setenv("CONDA_PREFIX", conda_prefix) mocker.patch.object(binary_finder_module, "get_cuda_path_or_home", return_value=cuda_home) + mocker.patch.object(binary_finder_module, "_resolve_ctk_root_via_canary", return_value=None) conda_nvcc = os.path.join(conda_prefix, "bin", "nvcc") cuda_nvcc = os.path.join(cuda_home, "bin", "nvcc") checked = _patch_exec_probe(mocker, existing=[conda_nvcc, cuda_nvcc]) @@ -153,6 +156,72 @@ def test_find_binary_first_matching_dir_wins(monkeypatch, mocker): assert checked == [os.path.join(site_dir, "nvcc"), conda_nvcc] +@pytest.mark.usefixtures("clear_find_binary_cache") +def test_find_binary_ctk_root_canary_fallback(monkeypatch, mocker): + # When the explicit trusted dirs (wheels, conda, CUDA_HOME/PATH) all miss, + # the cudart-canary-derived CTK root is searched last. + ctk_root = os.path.join(os.sep, "opt", "cuda") + + mocker.patch.object(binary_finder_module, "IS_WINDOWS", new=False) + mocker.patch.object(binary_finder_module.supported_nvidia_binaries, "SITE_PACKAGES_BINDIRS", {}) + mocker.patch.object(binary_finder_module, "find_sub_dirs_all_sitepackages", return_value=[]) + monkeypatch.delenv("CONDA_PREFIX", raising=False) + mocker.patch.object(binary_finder_module, "get_cuda_path_or_home", return_value=None) + canary_mock = mocker.patch.object(binary_finder_module, "_resolve_ctk_root_via_canary", return_value=ctk_root) + ctk_nvcc = os.path.join(ctk_root, "bin", "nvcc") + checked = _patch_exec_probe(mocker, existing=[ctk_nvcc]) + + result = find_nvidia_binary_utility("nvcc") + + assert result == ctk_nvcc + canary_mock.assert_called_once_with() + # No earlier trusted dirs existed, so the only probe is the canary bin dir. + assert checked == [ctk_nvcc] + + +@pytest.mark.usefixtures("clear_find_binary_cache") +def test_find_binary_canary_windows_bin_layout(monkeypatch, mocker): + ctk_root = os.path.join("C:", os.sep, "cuda") + + mocker.patch.object(binary_finder_module, "IS_WINDOWS", new=True) + mocker.patch.object(binary_finder_module.supported_nvidia_binaries, "SITE_PACKAGES_BINDIRS", {}) + mocker.patch.object(binary_finder_module, "find_sub_dirs_all_sitepackages", return_value=[]) + monkeypatch.delenv("CONDA_PREFIX", raising=False) + mocker.patch.object(binary_finder_module, "get_cuda_path_or_home", return_value=None) + mocker.patch.object(binary_finder_module, "_resolve_ctk_root_via_canary", return_value=ctk_root) + expected_dirs = [ + os.path.join(ctk_root, "bin", "x64"), + os.path.join(ctk_root, "bin", "x86_64"), + os.path.join(ctk_root, "bin"), + ] + checked = _patch_exec_probe(mocker) + + result = find_nvidia_binary_utility("nvcc") + + assert result is None + assert checked == [os.path.join(d, "nvcc.exe") for d in expected_dirs] + + +@pytest.mark.usefixtures("clear_find_binary_cache") +def test_find_binary_canary_not_consulted_when_found_earlier(monkeypatch, mocker): + # An earlier trusted dir hit must short-circuit before the canary subprocess. + conda_prefix = os.path.join(os.sep, "conda") + + mocker.patch.object(binary_finder_module, "IS_WINDOWS", new=False) + mocker.patch.object(binary_finder_module.supported_nvidia_binaries, "SITE_PACKAGES_BINDIRS", {}) + mocker.patch.object(binary_finder_module, "find_sub_dirs_all_sitepackages", return_value=[]) + monkeypatch.setenv("CONDA_PREFIX", conda_prefix) + mocker.patch.object(binary_finder_module, "get_cuda_path_or_home", return_value=None) + canary_mock = mocker.patch.object(binary_finder_module, "_resolve_ctk_root_via_canary", return_value=None) + conda_nvcc = os.path.join(conda_prefix, "bin", "nvcc") + _patch_exec_probe(mocker, existing=[conda_nvcc]) + + result = find_nvidia_binary_utility("nvcc") + + assert result == conda_nvcc + canary_mock.assert_not_called() + + @pytest.mark.usefixtures("clear_find_binary_cache") def test_find_binary_returns_none_with_no_candidates(monkeypatch, mocker): site_key = os.path.join("nvidia", "cuda_nvcc", "bin") @@ -166,6 +235,7 @@ def test_find_binary_returns_none_with_no_candidates(monkeypatch, mocker): find_sub_dirs_mock = mocker.patch.object(binary_finder_module, "find_sub_dirs_all_sitepackages", return_value=[]) monkeypatch.delenv("CONDA_PREFIX", raising=False) mocker.patch.object(binary_finder_module, "get_cuda_path_or_home", return_value=None) + mocker.patch.object(binary_finder_module, "_resolve_ctk_root_via_canary", return_value=None) checked = _patch_exec_probe(mocker) result = find_nvidia_binary_utility("nvcc") @@ -186,6 +256,7 @@ def test_find_binary_without_site_packages_entry(monkeypatch, mocker): find_sub_dirs_mock = mocker.patch.object(binary_finder_module, "find_sub_dirs_all_sitepackages", return_value=[]) monkeypatch.setenv("CONDA_PREFIX", conda_prefix) mocker.patch.object(binary_finder_module, "get_cuda_path_or_home", return_value=cuda_home) + mocker.patch.object(binary_finder_module, "_resolve_ctk_root_via_canary", return_value=None) expected_dirs = [ os.path.join(conda_prefix, "bin"), os.path.join(cuda_home, "bin"), @@ -206,6 +277,7 @@ def test_find_binary_cache_negative_result(monkeypatch, mocker): mocker.patch.object(binary_finder_module, "find_sub_dirs_all_sitepackages", return_value=[]) monkeypatch.delenv("CONDA_PREFIX", raising=False) cuda_home_mock = mocker.patch.object(binary_finder_module, "get_cuda_path_or_home", return_value=None) + canary_mock = mocker.patch.object(binary_finder_module, "_resolve_ctk_root_via_canary", return_value=None) _patch_exec_probe(mocker) first = find_nvidia_binary_utility("nvcc") @@ -213,8 +285,10 @@ def test_find_binary_cache_negative_result(monkeypatch, mocker): assert first is None assert second is None - # The second call is served from @functools.cache, so the body runs only once. + # The second call is served from @functools.cache, so the body runs only + # once, including the canary fallback. cuda_home_mock.assert_called_once_with() + canary_mock.assert_called_once_with() class TestResolveInTrustedDirs: From d14dc18b0fd95269b9ab950ed5d8eac71075f113 Mon Sep 17 00:00:00 2001 From: Aryan Date: Wed, 10 Jun 2026 21:17:28 -0400 Subject: [PATCH 3/3] fix(pathfinder): satisfy mypy no-any-return in canary helpers pre-commit.ci mypy flagged returning Any from resolve_ctk_root_via_canary and _resolve_ctk_root_via_canary (both declared -> str | None), because derive_ctk_root resolves to Any under the pathfinder mypy config. Bind the result to an annotated local before returning, matching the pattern used elsewhere in the package. --- .../cuda/pathfinder/_binaries/find_nvidia_binary_utility.py | 3 ++- .../cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cuda_pathfinder/cuda/pathfinder/_binaries/find_nvidia_binary_utility.py b/cuda_pathfinder/cuda/pathfinder/_binaries/find_nvidia_binary_utility.py index 6c410e2752..fcc2c74de4 100644 --- a/cuda_pathfinder/cuda/pathfinder/_binaries/find_nvidia_binary_utility.py +++ b/cuda_pathfinder/cuda/pathfinder/_binaries/find_nvidia_binary_utility.py @@ -72,7 +72,8 @@ def _resolve_ctk_root_via_canary() -> str | None: """ from cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib import resolve_ctk_root_via_canary - return resolve_ctk_root_via_canary(_CTK_ROOT_CANARY_ANCHOR_LIBNAME) + ctk_root: str | None = resolve_ctk_root_via_canary(_CTK_ROOT_CANARY_ANCHOR_LIBNAME) + return ctk_root def _resolve_in_trusted_dirs(normalized_name: str, dirs: list[str]) -> str | None: diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py index 63ff80ac0a..22495ed290 100644 --- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py @@ -148,7 +148,8 @@ def resolve_ctk_root_via_canary(canary_libname: str) -> str | None: canary_abs_path = _resolve_system_loaded_abs_path_in_subprocess(canary_libname) if canary_abs_path is None: return None - return derive_ctk_root(canary_abs_path) + ctk_root: str | None = derive_ctk_root(canary_abs_path) + return ctk_root def _try_ctk_root_canary(ctx: SearchContext) -> str | None: