diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/canary_probe_subprocess.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/canary_probe_subprocess.py new file mode 100644 index 0000000000..902b57d6e4 --- /dev/null +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/canary_probe_subprocess.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +import json +import sys + +from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL +from cuda.pathfinder._utils.platform_aware import IS_WINDOWS + +if IS_WINDOWS: + from cuda.pathfinder._dynamic_libs.load_dl_windows import load_with_system_search +else: + from cuda.pathfinder._dynamic_libs.load_dl_linux import load_with_system_search + + +def _probe_canary_abs_path(libname: str) -> str | None: + loaded: LoadedDL | None = load_with_system_search(libname) + if loaded is None: + return None + abs_path = loaded.abs_path + if not isinstance(abs_path, str): + return None + return abs_path + + +def probe_canary_abs_path_and_print_json(libname: str) -> None: + print(json.dumps(_probe_canary_abs_path(libname))) # noqa: T201 + + +def main(argv: list[str] | None = None) -> int: + args = sys.argv[1:] if argv is None else argv + if len(args) != 1: + return 2 + probe_canary_abs_path_and_print_json(args[0]) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py index 65c9f4bf3c..6265992e4a 100644 --- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py @@ -101,7 +101,7 @@ def _find_lib_dir_using_anchor_point(libname: str, anchor_point: str, linux_lib_ for rel_path in rel_paths: for dirname in sorted(glob.glob(os.path.join(anchor_point, rel_path))): if os.path.isdir(dirname): - return dirname + return os.path.normpath(dirname) return None @@ -152,6 +152,57 @@ def _find_dll_using_lib_dir( return None +def _derive_ctk_root_linux(resolved_lib_path: str) -> str | None: + """Derive the CTK installation root from a resolved library path on Linux. + + Standard system CTK layout: ``$CTK_ROOT/lib64/libfoo.so.XX`` + (some installs use ``lib`` instead of ``lib64``). + + Returns None if the path doesn't match a recognized layout. + """ + lib_dir = os.path.dirname(resolved_lib_path) + basename = os.path.basename(lib_dir) + if basename in ("lib64", "lib"): + return os.path.dirname(lib_dir) + return None + + +def _derive_ctk_root_windows(resolved_lib_path: str) -> str | None: + """Derive the CTK installation root from a resolved library path on Windows. + + Handles two CTK layouts: + - CTK 13: ``$CTK_ROOT/bin/x64/foo.dll`` + - CTK 12: ``$CTK_ROOT/bin/foo.dll`` + + Returns None if the path doesn't match a recognized layout. + + Uses ``ntpath`` explicitly so the function is testable on any platform. + """ + import ntpath + + lib_dir = ntpath.dirname(resolved_lib_path) + basename = ntpath.basename(lib_dir).lower() + if basename == "x64": + parent = ntpath.dirname(lib_dir) + if ntpath.basename(parent).lower() == "bin": + return ntpath.dirname(parent) + elif basename == "bin": + return ntpath.dirname(lib_dir) + return None + + +def derive_ctk_root(resolved_lib_path: str) -> str | None: + """Derive the CTK installation root from a resolved library path. + + Given the absolute path of a loaded CTK shared library, walk up the + directory tree to find the CTK root. Returns None if the path doesn't + match any recognized CTK directory layout. + """ + if IS_WINDOWS: + return _derive_ctk_root_windows(resolved_lib_path) + return _derive_ctk_root_linux(resolved_lib_path) + + class _FindNvidiaDynamicLib: def __init__(self, libname: str): self.libname = libname @@ -185,6 +236,16 @@ def try_with_conda_prefix(self) -> str | None: def try_with_cuda_home(self) -> str | None: return self._find_using_lib_dir(_find_lib_dir_using_cuda_home(self.libname)) + def try_via_ctk_root(self, ctk_root: str) -> str | None: + """Find the library under a derived CTK root directory. + + Uses :func:`_find_lib_dir_using_anchor_point` which already knows + about non-standard sub-paths (e.g. ``nvvm/lib64`` for nvvm). + """ + return self._find_using_lib_dir( + _find_lib_dir_using_anchor_point(self.libname, anchor_point=ctk_root, linux_lib_dir="lib64") + ) + def _find_using_lib_dir(self, lib_dir: str | None) -> str | None: if lib_dir is None: return None diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py index 8de2a5511e..1597a5b8b8 100644 --- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py @@ -2,16 +2,22 @@ # SPDX-License-Identifier: Apache-2.0 import functools +import json import struct import sys -from cuda.pathfinder._dynamic_libs.find_nvidia_dynamic_lib import _FindNvidiaDynamicLib +from cuda.pathfinder._dynamic_libs.canary_probe_subprocess import probe_canary_abs_path_and_print_json +from cuda.pathfinder._dynamic_libs.find_nvidia_dynamic_lib import ( + _FindNvidiaDynamicLib, + derive_ctk_root, +) from cuda.pathfinder._dynamic_libs.load_dl_common import DynamicLibNotFoundError, LoadedDL, load_dependencies from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import ( SUPPORTED_LINUX_SONAMES, SUPPORTED_WINDOWS_DLLS, ) from cuda.pathfinder._utils.platform_aware import IS_WINDOWS +from cuda.pathfinder._utils.spawned_process_runner import run_in_spawned_child_process if IS_WINDOWS: from cuda.pathfinder._dynamic_libs.load_dl_windows import ( @@ -60,6 +66,66 @@ def _load_driver_lib_no_cache(libname: str) -> LoadedDL: ) +# Libs that reside on the standard linker path in system CTK installs. +# Used to discover the CTK root when a lib with a non-standard path +# (e.g. nvvm under $CTK_ROOT/nvvm/lib64) can't be found directly. +_CTK_ROOT_CANARY_LIBNAMES = ("cudart",) + + +def _resolve_system_loaded_abs_path_in_subprocess(libname: str) -> str | None: + """Resolve a library's system-search absolute path in a child process. + + This keeps any side-effects of loading the canary library scoped to the + child process instead of polluting the current process. + """ + try: + result = run_in_spawned_child_process( + probe_canary_abs_path_and_print_json, + args=(libname,), + timeout=10.0, + ) + except (OSError, RuntimeError): + return None + if result.returncode != 0: + return None + + # Read the final non-empty stdout line in case earlier lines are emitted. + lines = [line for line in result.stdout.splitlines() if line.strip()] + if not lines: + return None + try: + payload = json.loads(lines[-1]) + except json.JSONDecodeError: + return None + if isinstance(payload, str): + return payload + return None + + +def _try_ctk_root_canary(finder: _FindNvidiaDynamicLib) -> str | None: + """Derive the CTK root from a system-installed canary lib. + + For libs like nvvm whose shared object doesn't reside on the standard + linker path, we locate a well-known CTK lib that IS on the linker path + via system search, derive the CTK installation root from its resolved + path, and then look for the target lib relative to that root. + + The canary load is performed in a subprocess to avoid introducing loader + state into the current process. + """ + for canary_libname in _CTK_ROOT_CANARY_LIBNAMES: + canary_abs_path = _resolve_system_loaded_abs_path_in_subprocess(canary_libname) + if canary_abs_path is None: + continue + ctk_root = derive_ctk_root(canary_abs_path) + if ctk_root is None: + continue + abs_path: str | None = finder.try_via_ctk_root(ctk_root) + if abs_path is not None: + return abs_path + return None + + def _load_lib_no_cache(libname: str) -> LoadedDL: if libname in _DRIVER_ONLY_LIBNAMES: return _load_driver_lib_no_cache(libname) @@ -90,11 +156,21 @@ def _load_lib_no_cache(libname: str) -> LoadedDL: loaded = load_with_system_search(libname) if loaded is not None: return loaded + abs_path = finder.try_with_cuda_home() - if abs_path is None: - finder.raise_not_found_error() - else: + if abs_path is not None: found_via = "CUDA_HOME" + else: + # Canary probe: if the direct system search and CUDA_HOME both + # failed (e.g. nvvm isn't on the linker path and CUDA_HOME is + # unset), try to discover the CTK root by loading a well-known CTK + # lib in a subprocess, then look for the target lib relative to + # that root. + abs_path = _try_ctk_root_canary(finder) + if abs_path is not None: + found_via = "system-ctk-root" + else: + finder.raise_not_found_error() return load_with_abs_path(libname, abs_path, found_via) @@ -164,6 +240,14 @@ def load_nvidia_dynamic_lib(libname: str) -> LoadedDL: - If set, use ``CUDA_HOME`` or ``CUDA_PATH`` (in that order). + 5. **CTK root canary probe** + + - For libraries whose shared object doesn't reside on the standard + linker path (e.g. ``libnvvm.so`` lives under ``$CTK_ROOT/nvvm/lib64``), + attempt to discover the CTK installation root by system-loading a + well-known CTK library (``cudart``) in a subprocess, then derive + the root from its resolved absolute path. + **Driver libraries** (``"cuda"``, ``"nvml"``): These are part of the NVIDIA display driver (not the CUDA Toolkit) and diff --git a/cuda_pathfinder/cuda/pathfinder/_utils/spawned_process_runner.py b/cuda_pathfinder/cuda/pathfinder/_utils/spawned_process_runner.py new file mode 100644 index 0000000000..cba0390861 --- /dev/null +++ b/cuda_pathfinder/cuda/pathfinder/_utils/spawned_process_runner.py @@ -0,0 +1,133 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +import multiprocessing +import queue # for Empty +import sys +import traceback +from collections.abc import Callable, Sequence +from dataclasses import dataclass +from io import StringIO +from typing import Any + +PROCESS_KILLED = -9 +PROCESS_NO_RESULT = -999 + + +# Similar to https://docs.python.org/3/library/subprocess.html#subprocess.CompletedProcess +# (args, check_returncode() are intentionally not supported here.) +@dataclass +class CompletedProcess: + returncode: int + stdout: str + stderr: str + + +class ChildProcessWrapper: + def __init__( + self, + result_queue: Any, + target: Callable[..., None], + args: Sequence[Any] | None, + kwargs: dict[str, Any] | None, + ) -> None: + self.target = target + self.args = () if args is None else args + self.kwargs = {} if kwargs is None else kwargs + self.result_queue = result_queue + + def __call__(self) -> None: + # Capture stdout/stderr + old_stdout = sys.stdout + old_stderr = sys.stderr + sys.stdout = StringIO() + sys.stderr = StringIO() + + try: + self.target(*self.args, **self.kwargs) + returncode = 0 + except SystemExit as e: # Handle sys.exit() + returncode = e.code if isinstance(e.code, int) else 0 + except BaseException: + traceback.print_exc() + returncode = 1 + finally: + # Collect outputs and restore streams + stdout = sys.stdout.getvalue() + stderr = sys.stderr.getvalue() + sys.stdout = old_stdout + sys.stderr = old_stderr + try: # noqa: SIM105 + self.result_queue.put((returncode, stdout, stderr)) + except Exception: # noqa: S110 + # If the queue is broken (e.g., parent gone), best effort logging + pass + + +def run_in_spawned_child_process( + target: Callable[..., None], + *, + args: Sequence[Any] | None = None, + kwargs: dict[str, Any] | None = None, + timeout: float | None = None, + rethrow: bool = False, +) -> CompletedProcess: + """Run `target` in a spawned child process, capturing stdout/stderr. + + The provided `target` must be defined at the top level of a module, and must + be importable in the spawned child process. Lambdas, closures, or interactively + defined functions (e.g., in Jupyter notebooks) will not work. + + If `rethrow=True` and the child process exits with a nonzero code, + raises ChildProcessError with the captured stderr. + """ + ctx = multiprocessing.get_context("spawn") + result_queue = ctx.Queue() + process = ctx.Process(target=ChildProcessWrapper(result_queue, target, args, kwargs)) + process.start() + + try: + process.join(timeout) + if process.is_alive(): + process.terminate() + process.join() + result = CompletedProcess( + returncode=PROCESS_KILLED, + stdout="", + stderr=f"Process timed out after {timeout} seconds and was terminated.", + ) + else: + try: + returncode, stdout, stderr = result_queue.get(timeout=1.0) + except (queue.Empty, EOFError): + result = CompletedProcess( + returncode=PROCESS_NO_RESULT, + stdout="", + stderr="Process exited or crashed before returning results.", + ) + else: + result = CompletedProcess( + returncode=returncode, + stdout=stdout, + stderr=stderr, + ) + + if rethrow and result.returncode != 0: + raise ChildProcessError( + f"Child process exited with code {result.returncode}.\n" + "--- stderr-from-child-process ---\n" + f"{result.stderr}" + "\n" + ) + + return result + + finally: + try: + result_queue.close() + result_queue.join_thread() + except Exception: # noqa: S110 + pass + if process.is_alive(): + process.kill() + process.join() diff --git a/cuda_pathfinder/tests/spawned_process_runner.py b/cuda_pathfinder/tests/spawned_process_runner.py index f4440743f5..ac0418445c 100644 --- a/cuda_pathfinder/tests/spawned_process_runner.py +++ b/cuda_pathfinder/tests/spawned_process_runner.py @@ -1,127 +1,6 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -import multiprocessing -import queue # for Empty -import sys -import traceback -from collections.abc import Callable, Sequence -from dataclasses import dataclass -from io import StringIO -from typing import Any +from cuda.pathfinder._utils.spawned_process_runner import run_in_spawned_child_process -PROCESS_KILLED = -9 -PROCESS_NO_RESULT = -999 - - -# Similar to https://docs.python.org/3/library/subprocess.html#subprocess.CompletedProcess -# (args, check_returncode() are intentionally not supported here.) -@dataclass -class CompletedProcess: - returncode: int - stdout: str - stderr: str - - -class ChildProcessWrapper: - def __init__(self, result_queue, target, args, kwargs): - self.target = target - self.args = () if args is None else args - self.kwargs = {} if kwargs is None else kwargs - self.result_queue = result_queue - - def __call__(self): - # Capture stdout/stderr - old_stdout = sys.stdout - old_stderr = sys.stderr - sys.stdout = StringIO() - sys.stderr = StringIO() - - try: - self.target(*self.args, **self.kwargs) - returncode = 0 - except SystemExit as e: # Handle sys.exit() - returncode = e.code if isinstance(e.code, int) else 0 - except BaseException: - traceback.print_exc() - returncode = 1 - finally: - # Collect outputs and restore streams - stdout = sys.stdout.getvalue() - stderr = sys.stderr.getvalue() - sys.stdout = old_stdout - sys.stderr = old_stderr - try: # noqa: SIM105 - self.result_queue.put((returncode, stdout, stderr)) - except Exception: # noqa: S110 - # If the queue is broken (e.g., parent gone), best effort logging - pass - - -def run_in_spawned_child_process( - target: Callable[..., None], - *, - args: Sequence[Any] | None = None, - kwargs: dict[str, Any] | None = None, - timeout: float | None = None, - rethrow: bool = False, -) -> CompletedProcess: - """Run `target` in a spawned child process, capturing stdout/stderr. - - The provided `target` must be defined at the top level of a module, and must - be importable in the spawned child process. Lambdas, closures, or interactively - defined functions (e.g., in Jupyter notebooks) will not work. - - If `rethrow=True` and the child process exits with a nonzero code, - raises ChildProcessError with the captured stderr. - """ - ctx = multiprocessing.get_context("spawn") - result_queue = ctx.Queue() - process = ctx.Process(target=ChildProcessWrapper(result_queue, target, args, kwargs)) - process.start() - - try: - process.join(timeout) - if process.is_alive(): - process.terminate() - process.join() - result = CompletedProcess( - returncode=PROCESS_KILLED, - stdout="", - stderr=f"Process timed out after {timeout} seconds and was terminated.", - ) - else: - try: - returncode, stdout, stderr = result_queue.get(timeout=1.0) - except (queue.Empty, EOFError): - result = CompletedProcess( - returncode=PROCESS_NO_RESULT, - stdout="", - stderr="Process exited or crashed before returning results.", - ) - else: - result = CompletedProcess( - returncode=returncode, - stdout=stdout, - stderr=stderr, - ) - - if rethrow and result.returncode != 0: - raise ChildProcessError( - f"Child process exited with code {result.returncode}.\n" - "--- stderr-from-child-process ---\n" - f"{result.stderr}" - "\n" - ) - - return result - - finally: - try: - result_queue.close() - result_queue.join_thread() - except Exception: # noqa: S110 - pass - if process.is_alive(): - process.kill() - process.join() +__all__ = ["run_in_spawned_child_process"] diff --git a/cuda_pathfinder/tests/test_ctk_root_discovery.py b/cuda_pathfinder/tests/test_ctk_root_discovery.py new file mode 100644 index 0000000000..71a61c86c0 --- /dev/null +++ b/cuda_pathfinder/tests/test_ctk_root_discovery.py @@ -0,0 +1,276 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + + +import pytest + +from cuda.pathfinder._dynamic_libs.find_nvidia_dynamic_lib import ( + _derive_ctk_root_linux, + _derive_ctk_root_windows, + _FindNvidiaDynamicLib, + derive_ctk_root, +) +from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL +from cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib import ( + _load_lib_no_cache, + _try_ctk_root_canary, +) +from cuda.pathfinder._utils.platform_aware import IS_WINDOWS + +_MODULE = "cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib" +_FIND_MODULE = "cuda.pathfinder._dynamic_libs.find_nvidia_dynamic_lib" + + +# --------------------------------------------------------------------------- +# Platform-aware test helpers +# --------------------------------------------------------------------------- + + +def _create_nvvm_in_ctk(ctk_root): + """Create a fake nvvm lib in the platform-appropriate CTK subdirectory.""" + if IS_WINDOWS: + nvvm_dir = ctk_root / "nvvm" / "bin" + nvvm_dir.mkdir(parents=True) + nvvm_lib = nvvm_dir / "nvvm64.dll" + else: + nvvm_dir = ctk_root / "nvvm" / "lib64" + nvvm_dir.mkdir(parents=True) + nvvm_lib = nvvm_dir / "libnvvm.so" + nvvm_lib.write_bytes(b"fake") + return nvvm_lib + + +def _create_cudart_in_ctk(ctk_root): + """Create a fake cudart lib in the platform-appropriate CTK subdirectory.""" + if IS_WINDOWS: + lib_dir = ctk_root / "bin" + lib_dir.mkdir(parents=True) + lib_file = lib_dir / "cudart64_12.dll" + else: + lib_dir = ctk_root / "lib64" + lib_dir.mkdir(parents=True) + lib_file = lib_dir / "libcudart.so" + lib_file.write_bytes(b"fake") + return lib_file + + +def _fake_canary_path(ctk_root): + """Return the path a system-loaded canary lib would resolve to.""" + if IS_WINDOWS: + return str(ctk_root / "bin" / "cudart64_13.dll") + return str(ctk_root / "lib64" / "libcudart.so.13") + + +# --------------------------------------------------------------------------- +# derive_ctk_root +# --------------------------------------------------------------------------- + + +def test_derive_ctk_root_linux_lib64(): + assert _derive_ctk_root_linux("/usr/local/cuda-13/lib64/libcudart.so.13") == "/usr/local/cuda-13" + + +def test_derive_ctk_root_linux_lib(): + assert _derive_ctk_root_linux("/opt/cuda/lib/libcudart.so.12") == "/opt/cuda" + + +def test_derive_ctk_root_linux_unrecognized(): + assert _derive_ctk_root_linux("/some/weird/path/libcudart.so.13") is None + + +def test_derive_ctk_root_linux_root_level(): + assert _derive_ctk_root_linux("/lib64/libcudart.so.13") == "/" + + +def test_derive_ctk_root_windows_ctk13(): + path = r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.0\bin\x64\cudart64_13.dll" + assert _derive_ctk_root_windows(path) == r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.0" + + +def test_derive_ctk_root_windows_ctk12(): + path = r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\bin\cudart64_12.dll" + assert _derive_ctk_root_windows(path) == r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8" + + +def test_derive_ctk_root_windows_unrecognized(): + assert _derive_ctk_root_windows(r"C:\weird\cudart64_13.dll") is None + + +def test_derive_ctk_root_windows_case_insensitive_bin(): + assert _derive_ctk_root_windows(r"C:\CUDA\Bin\cudart64_12.dll") == r"C:\CUDA" + + +def test_derive_ctk_root_windows_case_insensitive_x64(): + assert _derive_ctk_root_windows(r"C:\CUDA\BIN\X64\cudart64_13.dll") == r"C:\CUDA" + + +def test_derive_ctk_root_dispatches_to_linux(mocker): + mocker.patch(f"{_FIND_MODULE}.IS_WINDOWS", False) + assert derive_ctk_root("/usr/local/cuda/lib64/libcudart.so.13") == "/usr/local/cuda" + + +def test_derive_ctk_root_dispatches_to_windows(mocker): + mocker.patch(f"{_FIND_MODULE}.IS_WINDOWS", True) + assert derive_ctk_root(r"C:\CUDA\v13\bin\cudart64_13.dll") == r"C:\CUDA\v13" + + +# --------------------------------------------------------------------------- +# _FindNvidiaDynamicLib.try_via_ctk_root +# --------------------------------------------------------------------------- + + +def test_try_via_ctk_root_finds_nvvm(tmp_path): + ctk_root = tmp_path / "cuda-13" + nvvm_lib = _create_nvvm_in_ctk(ctk_root) + + assert _FindNvidiaDynamicLib("nvvm").try_via_ctk_root(str(ctk_root)) == str(nvvm_lib) + + +def test_try_via_ctk_root_returns_none_when_dir_missing(tmp_path): + ctk_root = tmp_path / "cuda-13" + ctk_root.mkdir() + + assert _FindNvidiaDynamicLib("nvvm").try_via_ctk_root(str(ctk_root)) is None + + +def test_try_via_ctk_root_regular_lib(tmp_path): + ctk_root = tmp_path / "cuda-13" + cudart_lib = _create_cudart_in_ctk(ctk_root) + + assert _FindNvidiaDynamicLib("cudart").try_via_ctk_root(str(ctk_root)) == str(cudart_lib) + + +# --------------------------------------------------------------------------- +# _try_ctk_root_canary +# --------------------------------------------------------------------------- + + +def _make_loaded_dl(path, found_via): + return LoadedDL(path, False, 0xDEAD, found_via) + + +def test_canary_finds_nvvm(tmp_path, mocker): + ctk_root = tmp_path / "cuda-13" + _create_cudart_in_ctk(ctk_root) + nvvm_lib = _create_nvvm_in_ctk(ctk_root) + + probe = mocker.patch( + f"{_MODULE}._resolve_system_loaded_abs_path_in_subprocess", + return_value=_fake_canary_path(ctk_root), + ) + parent_system_loader = mocker.patch(f"{_MODULE}.load_with_system_search") + + assert _try_ctk_root_canary(_FindNvidiaDynamicLib("nvvm")) == str(nvvm_lib) + probe.assert_called_once_with("cudart") + parent_system_loader.assert_not_called() + + +def test_canary_returns_none_when_subprocess_probe_fails(mocker): + mocker.patch(f"{_MODULE}._resolve_system_loaded_abs_path_in_subprocess", return_value=None) + assert _try_ctk_root_canary(_FindNvidiaDynamicLib("nvvm")) is None + + +def test_canary_returns_none_when_ctk_root_unrecognized(mocker): + mocker.patch( + f"{_MODULE}._resolve_system_loaded_abs_path_in_subprocess", + return_value="/weird/path/libcudart.so.13", + ) + assert _try_ctk_root_canary(_FindNvidiaDynamicLib("nvvm")) is None + + +def test_canary_returns_none_when_nvvm_not_in_ctk_root(tmp_path, mocker): + ctk_root = tmp_path / "cuda-13" + # Create only the canary lib dir, not nvvm + _create_cudart_in_ctk(ctk_root) + + mocker.patch( + f"{_MODULE}._resolve_system_loaded_abs_path_in_subprocess", + return_value=_fake_canary_path(ctk_root), + ) + assert _try_ctk_root_canary(_FindNvidiaDynamicLib("nvvm")) is None + + +def test_canary_skips_when_abs_path_none(mocker): + mocker.patch(f"{_MODULE}._resolve_system_loaded_abs_path_in_subprocess", return_value=None) + assert _try_ctk_root_canary(_FindNvidiaDynamicLib("nvvm")) is None + + +# --------------------------------------------------------------------------- +# _load_lib_no_cache search-order +# --------------------------------------------------------------------------- + + +@pytest.fixture +def _isolate_load_cascade(mocker): + """Disable the search steps that run before system-search in _load_lib_no_cache. + + This lets the ordering tests focus on system-search, CUDA_HOME, and the + canary probe without needing a real site-packages or conda environment. + """ + # No wheels installed + mocker.patch.object(_FindNvidiaDynamicLib, "try_site_packages", return_value=None) + # No conda env + mocker.patch.object(_FindNvidiaDynamicLib, "try_with_conda_prefix", return_value=None) + # Lib not already loaded by another component + mocker.patch(f"{_MODULE}.check_if_already_loaded_from_elsewhere", return_value=None) + # Skip transitive dependency loading + mocker.patch(f"{_MODULE}.load_dependencies") + + +@pytest.mark.usefixtures("_isolate_load_cascade") +def test_cuda_home_takes_priority_over_canary(tmp_path, mocker): + # Two competing CTK roots: one from CUDA_HOME, one the canary would find. + cuda_home_root = tmp_path / "cuda-home" + nvvm_home_lib = _create_nvvm_in_ctk(cuda_home_root) + + canary_root = tmp_path / "cuda-system" + _create_cudart_in_ctk(canary_root) + _create_nvvm_in_ctk(canary_root) + + canary_mock = mocker.MagicMock(return_value=_fake_canary_path(canary_root)) + + # System search finds nothing for nvvm. + mocker.patch(f"{_MODULE}.load_with_system_search", return_value=None) + # Canary subprocess probe would find cudart if consulted. + mocker.patch(f"{_MODULE}._resolve_system_loaded_abs_path_in_subprocess", side_effect=canary_mock) + # CUDA_HOME points to a separate root that also has nvvm + mocker.patch(f"{_FIND_MODULE}.get_cuda_home_or_path", return_value=str(cuda_home_root)) + # Capture the final load call + mocker.patch( + f"{_MODULE}.load_with_abs_path", + side_effect=lambda _libname, path, via: _make_loaded_dl(path, via), + ) + + result = _load_lib_no_cache("nvvm") + + # CUDA_HOME must win; the canary should never have been consulted + assert result.found_via == "CUDA_HOME" + assert result.abs_path == str(nvvm_home_lib) + canary_mock.assert_not_called() + + +@pytest.mark.usefixtures("_isolate_load_cascade") +def test_canary_fires_only_after_all_earlier_steps_fail(tmp_path, mocker): + canary_root = tmp_path / "cuda-system" + _create_cudart_in_ctk(canary_root) + nvvm_lib = _create_nvvm_in_ctk(canary_root) + + # System search: nvvm not on linker path. + mocker.patch(f"{_MODULE}.load_with_system_search", return_value=None) + # Canary subprocess probe finds cudart under a system CTK root. + mocker.patch( + f"{_MODULE}._resolve_system_loaded_abs_path_in_subprocess", + return_value=_fake_canary_path(canary_root), + ) + # No CUDA_HOME set + mocker.patch(f"{_FIND_MODULE}.get_cuda_home_or_path", return_value=None) + # Capture the final load call + mocker.patch( + f"{_MODULE}.load_with_abs_path", + side_effect=lambda _libname, path, via: _make_loaded_dl(path, via), + ) + + result = _load_lib_no_cache("nvvm") + + assert result.found_via == "system-ctk-root" + assert result.abs_path == str(nvvm_lib)