Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
fc8e238
fixed issues with cmake and numba in conda package and successfully b…
brendancol Jan 29, 2026
0bbc738
small updates to windows version
brendancol Jan 29, 2026
ace0a34
resolved conflict
brendancol Jan 29, 2026
2ea7d4c
working on fixing windows conda recipe
brendancol Jan 29, 2026
acdbf6f
fixing ci
brendancol Jan 29, 2026
d072e75
fixing windows conda recipe
brendancol Jan 29, 2026
c3a06dc
small updates to windows version
brendancol Jan 29, 2026
803620e
fixing window conda build
brendancol Jan 29, 2026
aa5c37e
fixing window build
brendancol Jan 29, 2026
16af3d6
small updates to windows version
brendancol Jan 29, 2026
80344dd
fixing window build
brendancol Jan 29, 2026
0cff5c0
fixing window build
brendancol Jan 29, 2026
fc4c2a5
fixing window build
brendancol Jan 29, 2026
52838bd
fixing window build
brendancol Jan 29, 2026
d09ca05
fixing window build
brendancol Jan 29, 2026
2804e8d
fixing window build
brendancol Jan 29, 2026
cc9703a
fixing window build
brendancol Jan 30, 2026
68af42b
fixing window build
brendancol Jan 30, 2026
34bc42a
fixing window build
brendancol Jan 30, 2026
d5dc9db
fixing window build
brendancol Jan 30, 2026
b59c1c7
fixing window build
brendancol Jan 30, 2026
dafd8f4
fixing window build
brendancol Jan 30, 2026
e5fcbc0
fixing window build
brendancol Jan 30, 2026
03f1a1d
recompiled ptx file & small notebook updates
brendancol Jan 30, 2026
bc3570f
updated notebook to clip collar
brendancol Jan 30, 2026
cb8c332
Merge branch 'master' into fixes-34-fix-conda-package
brendancol Jan 30, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 87 additions & 4 deletions conda-recipe/bld.bat
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ nvcc -ptx ^
-I"%OptiX_INSTALL_DIR%\include" ^
-I"%SRC_DIR%\cuda" ^
--use_fast_math ^
--allow-unsupported-compiler ^
-o "%SRC_DIR%\rtxpy\kernel.ptx" ^
"%SRC_DIR%\cuda\kernel.cu"
if errorlevel 1 (
Expand All @@ -89,14 +90,96 @@ if errorlevel 1 (
exit /b 1
)

cd /d "%OTK_PYOPTIX_DIR%\optix"
echo Building and installing otk-pyoptix...
"%PYTHON%" -m pip install . --no-deps --no-build-isolation -vv
:: Verify cmake is available (installed via conda)
where cmake >nul 2>&1
if errorlevel 1 (
echo ERROR: Failed to install otk-pyoptix
echo ERROR: cmake not found. Ensure cmake is in build requirements.
exit /b 1
)
echo Found cmake at:
where cmake

:: Verify C++ compiler is available (conda-build should set up VS environment)
where cl >nul 2>&1
if errorlevel 1 (
echo.
echo ERROR: C++ compiler ^(cl.exe^) not found.
echo.
echo Please ensure Visual Studio Build Tools are installed and activated.
echo You can install them from: https://visualstudio.microsoft.com/visual-cpp-build-tools/
echo.
echo If already installed, run this build from a "Developer Command Prompt"
echo or run vcvars64.bat before building.
echo.
exit /b 1
)
echo Found C++ compiler at:
where cl

:: Pre-clone pybind11 without submodules to avoid FetchContent submodule update failures
echo Pre-cloning pybind11 to avoid submodule issues...
set "PYBIND11_DIR=%SRC_DIR%\pybind11-src"
git clone --depth 1 --branch v2.13.6 https://github.com/pybind/pybind11.git "%PYBIND11_DIR%"
if errorlevel 1 (
echo ERROR: Failed to clone pybind11
exit /b 1
)

:: Tell CMake to use our pre-cloned pybind11 instead of fetching
set "FETCHCONTENT_SOURCE_DIR_PYBIND11=%PYBIND11_DIR%"
echo Using pre-cloned pybind11 at %PYBIND11_DIR%

pushd "%OTK_PYOPTIX_DIR%\optix"

:: Patch CMakeLists.txt to use our pre-cloned pybind11 and skip submodule updates
echo Patching CMakeLists.txt to use local pybind11...

:: Convert backslashes to forward slashes for CMake
set "PYBIND11_DIR_CMAKE=%PYBIND11_DIR:\=/%"

:: Prepend the FETCHCONTENT_SOURCE_DIR_PYBIND11 setting to CMakeLists.txt
(
echo set^(FETCHCONTENT_SOURCE_DIR_PYBIND11 "!PYBIND11_DIR_CMAKE!" CACHE PATH "pybind11 source" FORCE^)
type CMakeLists.txt
) > "%SRC_DIR%\CMakeLists_new.txt"
move /y "%SRC_DIR%\CMakeLists_new.txt" CMakeLists.txt >nul

echo Patched CMakeLists.txt - first 2 lines:
powershell -Command "Get-Content CMakeLists.txt -Head 2"

:: Set OptiX path for cmake/pip build process (exactly like run_gpu_test.bat)
set "OPTIX_PATH=%OptiX_INSTALL_DIR%"
set "CMAKE_PREFIX_PATH=%OptiX_INSTALL_DIR%;%CMAKE_PREFIX_PATH%"

:: Clear conda-build injected CMAKE variables that break the build
set CMAKE_GENERATOR=
set CMAKE_GENERATOR_PLATFORM=
set CMAKE_GENERATOR_TOOLSET=

:: Pre-install build dependencies so we can use --no-build-isolation
echo Installing build dependencies...
"%PYTHON%" -m pip install setuptools wheel

echo Building with OptiX_INSTALL_DIR=%OptiX_INSTALL_DIR%
echo FETCHCONTENT_SOURCE_DIR_PYBIND11=!FETCHCONTENT_SOURCE_DIR_PYBIND11!

:: Pass pybind11 source dir to CMake via CMAKE_ARGS (used by scikit-build and setuptools)
set "CMAKE_ARGS=-DFETCHCONTENT_SOURCE_DIR_PYBIND11=!PYBIND11_DIR!"

:: Use --no-build-isolation so environment variables are visible to CMake
"%PYTHON%" -m pip install . -v --no-build-isolation
if errorlevel 1 (
echo.
echo ERROR: Failed to install otk-pyoptix
echo.
echo If the error mentions OptiX not found, try setting manually:
echo set OptiX_INSTALL_DIR=%OptiX_INSTALL_DIR%
echo set OPTIX_PATH=%OptiX_INSTALL_DIR%
echo.
popd
exit /b 1
)
popd
echo otk-pyoptix installed successfully
echo.

Expand Down
15 changes: 10 additions & 5 deletions conda-recipe/conda_build_config.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
# Channels to use for build dependencies
channel_sources:
- conda-forge,nvidia

# CUDA versions to build against
cuda_compiler_version:
- "12.6"
- conda-forge

# Python versions to support
python:
Expand All @@ -13,7 +9,16 @@ python:
- "3.12"
- "3.13"

# NumPy version (needed to avoid conda-build warning)
numpy:
- "1.26"

# NumPy version handling:
# - Python 3.10-3.12: numpy 1.26 (last 1.x series)
# - Python 3.13+: numpy 2.1 (required for Python 3.13 support)
# Note: numpy pins are set conditionally in meta.yaml based on Python version
#
# Windows build notes:
# - Windows builds use system CUDA Toolkit (not conda packages)
# - Ensure CUDA Toolkit 12.x is installed and nvcc is in PATH
# - Linux builds use conda-provided CUDA packages
24 changes: 14 additions & 10 deletions conda-recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,30 +18,34 @@ build:

requirements:
build:
- {{ compiler('c') }}
- {{ compiler('cxx') }}
- {{ compiler('c') }} # [linux]
- {{ compiler('cxx') }} # [linux]
- conda-forge::cmake
- conda-forge::git
- cuda-nvcc
- cuda-cudart-dev
- cuda-nvrtc-dev
- conda-forge::git # [linux]
- cuda-nvcc >=12 # [linux]
- cuda-cudart-dev >=12 # [linux]
- cuda-nvrtc-dev >=12 # [linux]
# Windows uses system CUDA Toolkit and Visual Studio Build Tools

host:
- python
- pip
- setuptools >=68
- wheel
- cuda-version >=12
- cuda-cudart-dev
- cuda-nvrtc-dev
- conda-forge::cmake
- conda-forge::git # [win]
# Linux CUDA packages
- cuda-version >=12 # [linux]
- cuda-cudart-dev >=12 # [linux]
- cuda-nvrtc-dev >=12 # [linux]

run:
- python >=3.10
- numpy >=1.21,<3 # [py<313]
- numpy >=2.0,<3 # [py>=313]
- numba >=0.56
- cupy >=12.0
- cuda-version >=12
- cuda-version >=12 # [linux]
- __cuda # [linux]

test:
Expand Down
766 changes: 673 additions & 93 deletions examples/ham_radio_viewshed_analysis.ipynb

Large diffs are not rendered by default.

124 changes: 83 additions & 41 deletions rtxpy/kernel.ptx
Original file line number Diff line number Diff line change
Expand Up @@ -11,67 +11,89 @@
.address_size 64

// .globl __raygen__main
.const .align 8 .b8 params[24];
.const .align 8 .b8 params[40];

.visible .entry __raygen__main()
{
.reg .pred %p<3>;
.reg .f32 %f<10>;
.reg .b32 %r<83>;
.reg .b64 %rd<10>;
.reg .b32 %r<117>;
.reg .b64 %rd<19>;


// begin inline asm
call (%r1), _optix_get_launch_index_x, ();
call (%r33), _optix_get_launch_index_x, ();
// end inline asm
// begin inline asm
call (%r2), _optix_get_launch_index_y, ();
call (%r34), _optix_get_launch_index_y, ();
// end inline asm
// begin inline asm
call (%r3), _optix_get_launch_index_z, ();
call (%r35), _optix_get_launch_index_z, ();
// end inline asm
// begin inline asm
call (%r4), _optix_get_launch_dimension_x, ();
call (%r36), _optix_get_launch_dimension_x, ();
// end inline asm
// begin inline asm
call (%r5), _optix_get_launch_dimension_y, ();
call (%r37), _optix_get_launch_dimension_y, ();
// end inline asm
mad.lo.s32 %r77, %r5, %r3, %r2;
mad.lo.s32 %r78, %r77, %r4, %r1;
ld.const.u64 %rd2, [params+8];
cvta.to.global.u64 %rd3, %rd2;
mul.wide.u32 %rd4, %r78, 32;
add.s64 %rd5, %rd3, %rd4;
ld.global.f32 %f1, [%rd5];
ld.global.f32 %f2, [%rd5+4];
ld.global.f32 %f3, [%rd5+8];
ld.global.f32 %f7, [%rd5+12];
ld.global.f32 %f4, [%rd5+16];
ld.global.f32 %f5, [%rd5+20];
ld.global.f32 %f6, [%rd5+24];
ld.global.f32 %f8, [%rd5+28];
ld.const.u64 %rd1, [params];
mad.lo.s32 %r109, %r37, %r35, %r34;
mad.lo.s32 %r110, %r109, %r36, %r33;
cvt.u64.u32 %rd1, %r110;
ld.const.u64 %rd5, [params+8];
cvta.to.global.u64 %rd6, %rd5;
mul.wide.u32 %rd7, %r110, 32;
add.s64 %rd8, %rd6, %rd7;
ld.global.f32 %f1, [%rd8];
ld.global.f32 %f2, [%rd8+4];
ld.global.f32 %f3, [%rd8+8];
ld.global.f32 %f7, [%rd8+12];
ld.global.f32 %f4, [%rd8+16];
ld.global.f32 %f5, [%rd8+20];
ld.global.f32 %f6, [%rd8+24];
ld.global.f32 %f8, [%rd8+28];
ld.const.u64 %rd4, [params];
mov.f32 %f9, 0f00000000;
mov.u32 %r42, 1;
mov.u32 %r44, 4;
mov.u32 %r76, 0;
// begin inline asm
call(%r6,%r7,%r8,%r9,%r10,%r11,%r12,%r13,%r14,%r15,%r16,%r17,%r18,%r19,%r20,%r21,%r22,%r23,%r24,%r25,%r26,%r27,%r28,%r29,%r30,%r31,%r32,%r33,%r34,%r35,%r36,%r37),_optix_trace_typed_32,(%r76,%rd1,%f1,%f2,%f3,%f4,%f5,%f6,%f7,%f8,%f9,%r42,%r76,%r76,%r42,%r76,%r44,%r79,%r80,%r81,%r82,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76);
// end inline asm
ld.const.u64 %rd6, [params+16];
cvta.to.global.u64 %rd7, %rd6;
mul.wide.u32 %rd8, %r78, 16;
add.s64 %rd9, %rd7, %rd8;
st.global.u32 [%rd9], %r6;
st.global.u32 [%rd9+4], %r7;
st.global.u32 [%rd9+8], %r8;
st.global.u32 [%rd9+12], %r9;
mov.u32 %r74, 1;
mov.u32 %r76, 6;
mov.u32 %r108, 0;
// begin inline asm
call(%r38,%r39,%r40,%r41,%r42,%r43,%r44,%r45,%r46,%r47,%r48,%r49,%r50,%r51,%r52,%r53,%r54,%r55,%r56,%r57,%r58,%r59,%r60,%r61,%r62,%r63,%r64,%r65,%r66,%r67,%r68,%r69),_optix_trace_typed_32,(%r108,%rd4,%f1,%f2,%f3,%f4,%f5,%f6,%f7,%f8,%f9,%r74,%r108,%r108,%r74,%r108,%r76,%r111,%r112,%r113,%r114,%r115,%r116,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108);
// end inline asm
ld.const.u64 %rd9, [params+16];
cvta.to.global.u64 %rd10, %rd9;
mul.wide.u32 %rd11, %r110, 16;
add.s64 %rd12, %rd10, %rd11;
st.global.u32 [%rd12], %r38;
st.global.u32 [%rd12+4], %r39;
st.global.u32 [%rd12+8], %r40;
st.global.u32 [%rd12+12], %r41;
ld.const.u64 %rd2, [params+24];
setp.eq.s64 %p1, %rd2, 0;
@%p1 bra $L__BB0_2;

cvta.to.global.u64 %rd13, %rd2;
shl.b64 %rd14, %rd1, 2;
add.s64 %rd15, %rd13, %rd14;
st.global.u32 [%rd15], %r42;

$L__BB0_2:
ld.const.u64 %rd3, [params+32];
setp.eq.s64 %p2, %rd3, 0;
@%p2 bra $L__BB0_4;

cvta.to.global.u64 %rd16, %rd3;
shl.b64 %rd17, %rd1, 2;
add.s64 %rd18, %rd16, %rd17;
st.global.u32 [%rd18], %r43;

$L__BB0_4:
ret;

}
// .globl __miss__miss
.visible .entry __miss__miss()
{
.reg .b32 %r<9>;
.reg .b32 %r<13>;


mov.u32 %r8, 0;
Expand All @@ -92,21 +114,30 @@
// begin inline asm
call _optix_set_payload, (%r7, %r8);
// end inline asm
mov.u32 %r9, 4;
mov.u32 %r12, -1;
// begin inline asm
call _optix_set_payload, (%r9, %r12);
// end inline asm
mov.u32 %r11, 5;
// begin inline asm
call _optix_set_payload, (%r11, %r12);
// end inline asm
ret;

}
// .globl __closesthit__chit
.visible .entry __closesthit__chit()
{
.reg .f32 %f<37>;
.reg .b32 %r<14>;
.reg .b32 %r<19>;
.reg .b64 %rd<3>;


// begin inline asm
call (%f1), _optix_get_ray_tmax, ();
// end inline asm
cvt.rzi.ftz.u32.f32 %r13, %f1;
cvt.rzi.ftz.u32.f32 %r18, %f1;
// begin inline asm
call (%rd1), _optix_get_gas_traversable_handle, ();
// end inline asm
Expand Down Expand Up @@ -145,7 +176,7 @@
mul.ftz.f32 %f33, %f24, %f31;
neg.ftz.f32 %f34, %f33;
mul.ftz.f32 %f35, %f31, %f27;
cvt.rn.f32.u32 %f36, %r13;
cvt.rn.f32.u32 %f36, %r18;
mov.b32 %r6, %f36;
mov.u32 %r5, 0;
// begin inline asm
Expand All @@ -166,6 +197,17 @@
// begin inline asm
call _optix_set_payload, (%r11, %r12);
// end inline asm
mov.u32 %r13, 4;
// begin inline asm
call _optix_set_payload, (%r13, %r1);
// end inline asm
// begin inline asm
call (%r15), _optix_read_instance_id, ();
// end inline asm
mov.u32 %r16, 5;
// begin inline asm
call _optix_set_payload, (%r16, %r15);
// end inline asm
ret;

}
Expand Down
2 changes: 1 addition & 1 deletion run_gpu_test.bat
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ if not exist "cuda\kernel.cu" (
echo ERROR: cuda\kernel.cu not found. Are you in the rtxpy directory?
exit /b 1
)
nvcc -ptx -arch=sm_%GPU_ARCH% -I"%OptiX_INSTALL_DIR%\include" -Icuda --use_fast_math -o rtxpy\kernel.ptx cuda\kernel.cu
nvcc -ptx -arch=sm_%GPU_ARCH% -I"%OptiX_INSTALL_DIR%\include" -Icuda --use_fast_math -allow-unsupported-compiler -o rtxpy\kernel.ptx cuda\kernel.cu
if errorlevel 1 (
echo ERROR: PTX compilation failed
exit /b 1
Expand Down
Loading