From cd45fec29ab2752c509404a7a56ca78ba0271070 Mon Sep 17 00:00:00 2001
From: Brendan Collins <brendancol@gmail.com>
Date: Wed, 28 Jan 2026 06:58:44 -0800
Subject: [PATCH 1/7] added tests for nan behavior in vertex / ray inputs

---
 rtxpy/tests/test_simple.py | 73 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)

diff --git a/rtxpy/tests/test_simple.py b/rtxpy/tests/test_simple.py
index 378c054..7ed3947 100644
--- a/rtxpy/tests/test_simple.py
+++ b/rtxpy/tests/test_simple.py
@@ -30,3 +30,76 @@ def test_simple(test_cupy):
     res = optix.trace(rays,  hits, 1)
     assert res == 0
     np.testing.assert_almost_equal(hits, [100.0, 0.0, 0.0, 1.0])
+
+
+@pytest.mark.parametrize("test_cupy", [False, True])
+def test_nan_in_ray_input(test_cupy):
+    """Test behavior when ray input contains NaN values."""
+    if test_cupy:
+        if not has_cupy:
+            pytest.skip("cupy not available")
+
+        import cupy
+        backend = cupy
+    else:
+        import numpy
+        backend = numpy
+
+    # Valid mesh (unit square made of 2 triangles)
+    verts = backend.float32([0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0])
+    triangles = backend.int32([0, 1, 2, 2, 1, 3])
+
+    # Ray with NaN in origin (ox = NaN)
+    rays = backend.float32([np.nan, 0.33, 100, 0, 0, 0, -1, 1000])
+    hits = backend.float32([0, 0, 0, 0])
+
+    optix = RTX()
+    res = optix.build(0, verts, triangles)
+    assert res == 0
+
+    res = optix.trace(rays, hits, 1)
+    assert res == 0
+
+    # When ray origin contains NaN, the ray should miss (t = -1.0)
+    # or produce NaN in the hit result
+    t_value = float(hits[0])
+    assert t_value == -1.0 or np.isnan(t_value), \
+        f"Expected miss (t=-1.0) or NaN for ray with NaN origin, got t={t_value}"
+
+
+@pytest.mark.parametrize("test_cupy", [False, True])
+def test_nan_in_vertex_input(test_cupy):
+    """Test behavior when vertex data contains NaN values."""
+    if test_cupy:
+        if not has_cupy:
+            pytest.skip("cupy not available")
+
+        import cupy
+        backend = cupy
+    else:
+        import numpy
+        backend = numpy
+
+    # Mesh with NaN in one vertex (vertex 0 has NaN z-coordinate)
+    verts = backend.float32([0, 0, np.nan, 1, 0, 0, 0, 1, 0, 1, 1, 0])
+    triangles = backend.int32([0, 1, 2, 2, 1, 3])
+
+    # Valid ray pointing down at the mesh
+    rays = backend.float32([0.33, 0.33, 100, 0, 0, 0, -1, 1000])
+    hits = backend.float32([0, 0, 0, 0])
+
+    optix = RTX()
+    res = optix.build(0, verts, triangles)
+    # Build may succeed even with NaN vertices (OptiX doesn't validate)
+    # The behavior depends on OptiX implementation
+
+    if res == 0:
+        res = optix.trace(rays, hits, 1)
+        assert res == 0
+
+        # With NaN in triangle 0's vertex, behavior is undefined but should not crash
+        # Triangle 1 (vertices 2,1,3) should still be valid
+        # The ray at (0.33, 0.33) could hit either triangle depending on exact geometry
+        t_value = float(hits[0])
+        # Result should be a valid float (hit, miss, or NaN - but not crash)
+        assert np.isfinite(t_value) or np.isnan(t_value) or t_value == -1.0

From 9ee82509242e3448d72c7b1e7fb73e424bc12f4a Mon Sep 17 00:00:00 2001
From: Brendan Collins <brendancol@gmail.com>
Date: Wed, 28 Jan 2026 07:06:02 -0800
Subject: [PATCH 2/7] added tests to cover input xr.DataArray nan behavior

---
 rtxpy/tests/test_simple.py | 263 +++++++++++++++++++++++++++++++++++++
 1 file changed, 263 insertions(+)

diff --git a/rtxpy/tests/test_simple.py b/rtxpy/tests/test_simple.py
index 7ed3947..f2d44fc 100644
--- a/rtxpy/tests/test_simple.py
+++ b/rtxpy/tests/test_simple.py
@@ -1,9 +1,56 @@
 import numpy as np
 import pytest
+import xarray as xr
 
 from rtxpy import RTX, has_cupy
 
 
+def triangulate_elevation(elevation_data, backend):
+    """
+    Convert a 2D elevation array to vertices and triangles for mesh creation.
+    This matches the logic in examples/mesh_utils.py triangulateTerrain function.
+
+    Args:
+        elevation_data: 2D numpy or cupy array of elevation values (H x W)
+        backend: numpy or cupy module to use for array creation
+
+    Returns:
+        verts: Flattened vertex buffer (H*W*3 float32)
+        triangles: Flattened index buffer ((H-1)*(W-1)*2*3 int32)
+    """
+    H, W = elevation_data.shape
+    num_vertices = H * W
+    num_triangles = (H - 1) * (W - 1) * 2
+
+    verts = backend.zeros(num_vertices * 3, dtype=backend.float32)
+    triangles = backend.zeros(num_triangles * 3, dtype=backend.int32)
+
+    # Create vertices
+    for h in range(H):
+        for w in range(W):
+            mesh_index = h * W + w
+            offset = 3 * mesh_index
+            verts[offset] = w  # x coordinate
+            verts[offset + 1] = h  # y coordinate
+            verts[offset + 2] = float(elevation_data[h, w])  # z = elevation
+
+    # Create triangles (two per grid cell)
+    for h in range(H - 1):
+        for w in range(W - 1):
+            mesh_index = h * W + w
+            tri_offset = 6 * (h * (W - 1) + w)
+            # First triangle
+            triangles[tri_offset + 0] = mesh_index + W
+            triangles[tri_offset + 1] = mesh_index + W + 1
+            triangles[tri_offset + 2] = mesh_index
+            # Second triangle
+            triangles[tri_offset + 3] = mesh_index + W + 1
+            triangles[tri_offset + 4] = mesh_index + 1
+            triangles[tri_offset + 5] = mesh_index
+
+    return verts, triangles
+
+
 @pytest.mark.parametrize("test_cupy", [False, True])
 def test_simple(test_cupy):
     if test_cupy:
@@ -103,3 +150,219 @@ def test_nan_in_vertex_input(test_cupy):
         t_value = float(hits[0])
         # Result should be a valid float (hit, miss, or NaN - but not crash)
         assert np.isfinite(t_value) or np.isnan(t_value) or t_value == -1.0
+
+
+@pytest.mark.parametrize("test_cupy", [False, True])
+def test_nan_in_elevation_data_single_cell(test_cupy):
+    """Test behavior when elevation xarray.DataArray contains a single NaN value."""
+    if test_cupy:
+        if not has_cupy:
+            pytest.skip("cupy not available")
+
+        import cupy
+        backend = cupy
+    else:
+        import numpy
+        backend = numpy
+
+    # Create a 3x3 elevation grid with one NaN value in the center
+    elevation = np.array([
+        [1.0, 1.0, 1.0],
+        [1.0, np.nan, 1.0],
+        [1.0, 1.0, 1.0]
+    ], dtype=np.float32)
+
+    da = xr.DataArray(
+        elevation,
+        dims=['y', 'x'],
+        coords={'y': [0, 1, 2], 'x': [0, 1, 2]}
+    )
+
+    # Triangulate the elevation data
+    verts, triangles = triangulate_elevation(da.values, backend)
+
+    optix = RTX()
+    res = optix.build(0, verts, triangles)
+    # Build may succeed even with NaN in vertex data
+
+    if res == 0:
+        # Trace a ray pointing down at the center (where NaN is)
+        rays = backend.float32([1.0, 1.0, 100, 0, 0, 0, -1, 1000])
+        hits = backend.float32([0, 0, 0, 0])
+
+        res = optix.trace(rays, hits, 1)
+        assert res == 0
+
+        # With NaN in elevation, the ray may miss or produce undefined results
+        # but should not crash
+        t_value = float(hits[0])
+        assert np.isfinite(t_value) or np.isnan(t_value) or t_value == -1.0
+
+        # Trace a ray at a corner (away from NaN) - should hit valid geometry
+        rays_corner = backend.float32([0.25, 0.25, 100, 0, 0, 0, -1, 1000])
+        hits_corner = backend.float32([0, 0, 0, 0])
+
+        res = optix.trace(rays_corner, hits_corner, 1)
+        assert res == 0
+
+        # This ray targets the corner triangle which should be valid
+        t_corner = float(hits_corner[0])
+        # Result should be valid (not crash)
+        assert np.isfinite(t_corner) or np.isnan(t_corner) or t_corner == -1.0
+
+
+@pytest.mark.parametrize("test_cupy", [False, True])
+def test_nan_in_elevation_data_edge(test_cupy):
+    """Test behavior when elevation xarray.DataArray has NaN on the edge."""
+    if test_cupy:
+        if not has_cupy:
+            pytest.skip("cupy not available")
+
+        import cupy
+        backend = cupy
+    else:
+        import numpy
+        backend = numpy
+
+    # Create a 4x4 elevation grid with NaN on one edge
+    elevation = np.array([
+        [np.nan, 1.0, 1.0, 1.0],
+        [1.0, 2.0, 2.0, 1.0],
+        [1.0, 2.0, 2.0, 1.0],
+        [1.0, 1.0, 1.0, 1.0]
+    ], dtype=np.float32)
+
+    da = xr.DataArray(
+        elevation,
+        dims=['y', 'x'],
+        coords={'y': [0, 1, 2, 3], 'x': [0, 1, 2, 3]}
+    )
+
+    verts, triangles = triangulate_elevation(da.values, backend)
+
+    optix = RTX()
+    res = optix.build(0, verts, triangles)
+
+    if res == 0:
+        # Trace a ray at the NaN corner
+        rays_nan = backend.float32([0.25, 0.25, 100, 0, 0, 0, -1, 1000])
+        hits_nan = backend.float32([0, 0, 0, 0])
+
+        res = optix.trace(rays_nan, hits_nan, 1)
+        assert res == 0
+
+        # Ray near NaN vertex - behavior undefined but should not crash
+        t_nan = float(hits_nan[0])
+        assert np.isfinite(t_nan) or np.isnan(t_nan) or t_nan == -1.0
+
+        # Trace a ray far from the NaN area - should hit valid geometry
+        rays_valid = backend.float32([2.5, 2.5, 100, 0, 0, 0, -1, 1000])
+        hits_valid = backend.float32([0, 0, 0, 0])
+
+        res = optix.trace(rays_valid, hits_valid, 1)
+        assert res == 0
+
+        # This area has valid elevation data, should get a valid hit
+        t_valid = float(hits_valid[0])
+        # Expect a hit (positive t value) in the valid region
+        assert t_valid > 0 or t_valid == -1.0, \
+            f"Expected hit or miss in valid region, got t={t_valid}"
+
+
+@pytest.mark.parametrize("test_cupy", [False, True])
+def test_nan_in_elevation_data_all_nan(test_cupy):
+    """Test behavior when elevation xarray.DataArray is entirely NaN."""
+    if test_cupy:
+        if not has_cupy:
+            pytest.skip("cupy not available")
+
+        import cupy
+        backend = cupy
+    else:
+        import numpy
+        backend = numpy
+
+    # Create a 3x3 elevation grid with all NaN values
+    elevation = np.full((3, 3), np.nan, dtype=np.float32)
+
+    da = xr.DataArray(
+        elevation,
+        dims=['y', 'x'],
+        coords={'y': [0, 1, 2], 'x': [0, 1, 2]}
+    )
+
+    verts, triangles = triangulate_elevation(da.values, backend)
+
+    optix = RTX()
+    res = optix.build(0, verts, triangles)
+
+    # Build might succeed or fail with all NaN vertices
+    if res == 0:
+        rays = backend.float32([1.0, 1.0, 100, 0, 0, 0, -1, 1000])
+        hits = backend.float32([0, 0, 0, 0])
+
+        res = optix.trace(rays, hits, 1)
+        assert res == 0
+
+        # With all NaN vertices, should miss or return NaN but not crash
+        t_value = float(hits[0])
+        assert np.isnan(t_value) or t_value == -1.0, \
+            f"Expected miss or NaN for all-NaN mesh, got t={t_value}"
+
+
+@pytest.mark.parametrize("test_cupy", [False, True])
+def test_nan_in_elevation_data_sparse(test_cupy):
+    """Test behavior with sparse NaN pattern in elevation data."""
+    if test_cupy:
+        if not has_cupy:
+            pytest.skip("cupy not available")
+
+        import cupy
+        backend = cupy
+    else:
+        import numpy
+        backend = numpy
+
+    # Create a 5x5 elevation grid with sparse NaN values (checkerboard-like pattern)
+    elevation = np.array([
+        [1.0, 2.0, np.nan, 2.0, 1.0],
+        [2.0, 3.0, 4.0, 3.0, 2.0],
+        [np.nan, 4.0, 5.0, 4.0, np.nan],
+        [2.0, 3.0, 4.0, 3.0, 2.0],
+        [1.0, 2.0, np.nan, 2.0, 1.0]
+    ], dtype=np.float32)
+
+    da = xr.DataArray(
+        elevation,
+        dims=['y', 'x'],
+        coords={'y': range(5), 'x': range(5)}
+    )
+
+    verts, triangles = triangulate_elevation(da.values, backend)
+
+    optix = RTX()
+    res = optix.build(0, verts, triangles)
+
+    if res == 0:
+        # Trace multiple rays across the surface
+        # Ray at center (valid area)
+        rays_center = backend.float32([2.0, 2.0, 100, 0, 0, 0, -1, 1000])
+        hits_center = backend.float32([0, 0, 0, 0])
+
+        res = optix.trace(rays_center, hits_center, 1)
+        assert res == 0
+
+        t_center = float(hits_center[0])
+        # Center should be valid
+        assert np.isfinite(t_center) or np.isnan(t_center) or t_center == -1.0
+
+        # Ray near a NaN area
+        rays_nan_area = backend.float32([0.5, 2.0, 100, 0, 0, 0, -1, 1000])
+        hits_nan_area = backend.float32([0, 0, 0, 0])
+
+        res = optix.trace(rays_nan_area, hits_nan_area, 1)
+        assert res == 0
+
+        # Near NaN - should not crash
+        t_nan_area = float(hits_nan_area[0])
+        assert np.isfinite(t_nan_area) or np.isnan(t_nan_area) or t_nan_area == -1.0

From 0c59990e4fd02dea90e41c67ff3c27f8b460dcd5 Mon Sep 17 00:00:00 2001
From: Brendan Collins <brendancol@gmail.com>
Date: Wed, 28 Jan 2026 07:12:44 -0800
Subject: [PATCH 3/7] added coverage for numpy dtypes

---
 rtxpy/tests/test_simple.py | 76 +++++++++++++++++++++++++++++++-------
 1 file changed, 62 insertions(+), 14 deletions(-)

diff --git a/rtxpy/tests/test_simple.py b/rtxpy/tests/test_simple.py
index f2d44fc..a8cf699 100644
--- a/rtxpy/tests/test_simple.py
+++ b/rtxpy/tests/test_simple.py
@@ -4,6 +4,24 @@
 
 from rtxpy import RTX, has_cupy
 
+# All numpy numeric dtypes to test for elevation input
+NUMPY_NUMERIC_DTYPES = [
+    # Floating point types
+    np.float16,
+    np.float32,
+    np.float64,
+    # Signed integer types
+    np.int8,
+    np.int16,
+    np.int32,
+    np.int64,
+    # Unsigned integer types
+    np.uint8,
+    np.uint16,
+    np.uint32,
+    np.uint64,
+]
+
 
 def triangulate_elevation(elevation_data, backend):
     """
@@ -153,7 +171,8 @@ def test_nan_in_vertex_input(test_cupy):
 
 
 @pytest.mark.parametrize("test_cupy", [False, True])
-def test_nan_in_elevation_data_single_cell(test_cupy):
+@pytest.mark.parametrize("dtype", NUMPY_NUMERIC_DTYPES)
+def test_nan_in_elevation_data_single_cell(test_cupy, dtype):
     """Test behavior when elevation xarray.DataArray contains a single NaN value."""
     if test_cupy:
         if not has_cupy:
@@ -166,11 +185,16 @@ def test_nan_in_elevation_data_single_cell(test_cupy):
         backend = numpy
 
     # Create a 3x3 elevation grid with one NaN value in the center
-    elevation = np.array([
+    # For integer dtypes, NaN must be converted to a value (0) before casting
+    elevation_float = np.array([
         [1.0, 1.0, 1.0],
         [1.0, np.nan, 1.0],
         [1.0, 1.0, 1.0]
-    ], dtype=np.float32)
+    ], dtype=np.float64)
+    if np.issubdtype(dtype, np.integer):
+        elevation = np.nan_to_num(elevation_float, nan=0).astype(dtype)
+    else:
+        elevation = elevation_float.astype(dtype)
 
     da = xr.DataArray(
         elevation,
@@ -212,7 +236,8 @@ def test_nan_in_elevation_data_single_cell(test_cupy):
 
 
 @pytest.mark.parametrize("test_cupy", [False, True])
-def test_nan_in_elevation_data_edge(test_cupy):
+@pytest.mark.parametrize("dtype", NUMPY_NUMERIC_DTYPES)
+def test_nan_in_elevation_data_edge(test_cupy, dtype):
     """Test behavior when elevation xarray.DataArray has NaN on the edge."""
     if test_cupy:
         if not has_cupy:
@@ -225,12 +250,17 @@ def test_nan_in_elevation_data_edge(test_cupy):
         backend = numpy
 
     # Create a 4x4 elevation grid with NaN on one edge
-    elevation = np.array([
+    # For integer dtypes, NaN must be converted to a value (0) before casting
+    elevation_float = np.array([
         [np.nan, 1.0, 1.0, 1.0],
         [1.0, 2.0, 2.0, 1.0],
         [1.0, 2.0, 2.0, 1.0],
         [1.0, 1.0, 1.0, 1.0]
-    ], dtype=np.float32)
+    ], dtype=np.float64)
+    if np.issubdtype(dtype, np.integer):
+        elevation = np.nan_to_num(elevation_float, nan=0).astype(dtype)
+    else:
+        elevation = elevation_float.astype(dtype)
 
     da = xr.DataArray(
         elevation,
@@ -270,7 +300,8 @@ def test_nan_in_elevation_data_edge(test_cupy):
 
 
 @pytest.mark.parametrize("test_cupy", [False, True])
-def test_nan_in_elevation_data_all_nan(test_cupy):
+@pytest.mark.parametrize("dtype", NUMPY_NUMERIC_DTYPES)
+def test_nan_in_elevation_data_all_nan(test_cupy, dtype):
     """Test behavior when elevation xarray.DataArray is entirely NaN."""
     if test_cupy:
         if not has_cupy:
@@ -283,7 +314,12 @@ def test_nan_in_elevation_data_all_nan(test_cupy):
         backend = numpy
 
     # Create a 3x3 elevation grid with all NaN values
-    elevation = np.full((3, 3), np.nan, dtype=np.float32)
+    # For integer dtypes, NaN must be converted to a value (0) before casting
+    elevation_float = np.full((3, 3), np.nan, dtype=np.float64)
+    if np.issubdtype(dtype, np.integer):
+        elevation = np.nan_to_num(elevation_float, nan=0).astype(dtype)
+    else:
+        elevation = elevation_float.astype(dtype)
 
     da = xr.DataArray(
         elevation,
@@ -304,14 +340,21 @@ def test_nan_in_elevation_data_all_nan(test_cupy):
         res = optix.trace(rays, hits, 1)
         assert res == 0
 
-        # With all NaN vertices, should miss or return NaN but not crash
+        # With all NaN vertices (for float dtypes), should miss or return NaN but not crash
+        # For integer dtypes, NaN gets converted to a valid integer, so we may get a hit
         t_value = float(hits[0])
-        assert np.isnan(t_value) or t_value == -1.0, \
-            f"Expected miss or NaN for all-NaN mesh, got t={t_value}"
+        is_float_dtype = np.issubdtype(dtype, np.floating)
+        if is_float_dtype:
+            assert np.isnan(t_value) or t_value == -1.0, \
+                f"Expected miss or NaN for all-NaN mesh, got t={t_value}"
+        else:
+            # Integer dtypes: NaN converted to int, mesh is valid, may hit or miss
+            assert np.isfinite(t_value) or np.isnan(t_value) or t_value == -1.0
 
 
 @pytest.mark.parametrize("test_cupy", [False, True])
-def test_nan_in_elevation_data_sparse(test_cupy):
+@pytest.mark.parametrize("dtype", NUMPY_NUMERIC_DTYPES)
+def test_nan_in_elevation_data_sparse(test_cupy, dtype):
     """Test behavior with sparse NaN pattern in elevation data."""
     if test_cupy:
         if not has_cupy:
@@ -324,13 +367,18 @@ def test_nan_in_elevation_data_sparse(test_cupy):
         backend = numpy
 
     # Create a 5x5 elevation grid with sparse NaN values (checkerboard-like pattern)
-    elevation = np.array([
+    # For integer dtypes, NaN must be converted to a value (0) before casting
+    elevation_float = np.array([
         [1.0, 2.0, np.nan, 2.0, 1.0],
         [2.0, 3.0, 4.0, 3.0, 2.0],
         [np.nan, 4.0, 5.0, 4.0, np.nan],
         [2.0, 3.0, 4.0, 3.0, 2.0],
         [1.0, 2.0, np.nan, 2.0, 1.0]
-    ], dtype=np.float32)
+    ], dtype=np.float64)
+    if np.issubdtype(dtype, np.integer):
+        elevation = np.nan_to_num(elevation_float, nan=0).astype(dtype)
+    else:
+        elevation = elevation_float.astype(dtype)
 
     da = xr.DataArray(
         elevation,

From b10bc1f6a5db7ad8cd7cd8f57cbd43e8fe66a5eb Mon Sep 17 00:00:00 2001
From: Brendan Collins <brendancol@gmail.com>
Date: Wed, 28 Jan 2026 07:19:09 -0800
Subject: [PATCH 4/7] add cupy as test dep.

---
 .github/workflows/gpu-test.yml | 2 +-
 pyproject.toml                 | 2 +-
 setup.cfg                      | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/gpu-test.yml b/.github/workflows/gpu-test.yml
index 0e61d6a..7b8c61b 100644
--- a/.github/workflows/gpu-test.yml
+++ b/.github/workflows/gpu-test.yml
@@ -130,7 +130,7 @@ jobs:
       - name: Install rtxpy with CUDA dependencies
         run: |
           python -m pip install -U pip
-          python -m pip install -ve .[tests,cuda12]
+          python -m pip install -ve .[tests]
           python -m pip list
 
       - name: Run GPU tests
diff --git a/pyproject.toml b/pyproject.toml
index b1e1b8d..dbd8a6d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -35,7 +35,7 @@ dependencies = [
 # Install cupy via: conda install -c conda-forge cupy
 # Note: otk-pyoptix must be installed separately from NVIDIA
 # See: https://github.com/NVIDIA/otk-pyoptix
-tests = ["pytest"]
+tests = ["pytest", "cupy"]
 
 [project.urls]
 Homepage = "https://github.com/makepath/rtxpy"
diff --git a/setup.cfg b/setup.cfg
index d46a44d..addf0c2 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -3,7 +3,7 @@
 
 [metadata]
 author = makepath
-author_email = a.soklev@gmail.com
+author_email = brendancol@gmail.com
 license = MIT
 license_files = LICENSE
 

From f5db0e049253955e0a1ff444e63265a42a2d6f3c Mon Sep 17 00:00:00 2001
From: Brendan Collins <brendancol@gmail.com>
Date: Wed, 28 Jan 2026 07:27:14 -0800
Subject: [PATCH 5/7] fixing ci

---
 .github/workflows/gpu-test.yml | 16 ++++++++++++++--
 pyproject.toml                 |  2 +-
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/gpu-test.yml b/.github/workflows/gpu-test.yml
index 7b8c61b..45af971 100644
--- a/.github/workflows/gpu-test.yml
+++ b/.github/workflows/gpu-test.yml
@@ -30,10 +30,18 @@ jobs:
         with:
           fetch-depth: 0
 
-      - name: Install Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
+      - name: Setup Miniconda
+        uses: conda-incubator/setup-miniconda@v3
         with:
           python-version: ${{ matrix.python-version }}
+          activate-environment: test
+          auto-activate-base: false
+          miniconda-version: "latest"
+
+      - name: Install CuPy
+        shell: bash -el {0}
+        run: |
+          conda install -y -c conda-forge cupy
 
       - name: Verify GPU
         run: |
@@ -117,6 +125,7 @@ jobs:
           head -15 rtxpy/kernel.ptx
 
       - name: Install otk-pyoptix from source
+        shell: bash -el {0}
         run: |
           echo "Using OptiX from: ${OptiX_INSTALL_DIR}"
 
@@ -128,16 +137,19 @@ jobs:
           pip install .
 
       - name: Install rtxpy with CUDA dependencies
+        shell: bash -el {0}
         run: |
           python -m pip install -U pip
           python -m pip install -ve .[tests]
           python -m pip list
 
       - name: Run GPU tests
+        shell: bash -el {0}
         run: |
           python -m pytest -v rtxpy/tests
 
       - name: Test basic ray tracing
+        shell: bash -el {0}
         run: |
           python -c "
           from rtxpy import RTX
diff --git a/pyproject.toml b/pyproject.toml
index dbd8a6d..b1e1b8d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -35,7 +35,7 @@ dependencies = [
 # Install cupy via: conda install -c conda-forge cupy
 # Note: otk-pyoptix must be installed separately from NVIDIA
 # See: https://github.com/NVIDIA/otk-pyoptix
-tests = ["pytest", "cupy"]
+tests = ["pytest"]
 
 [project.urls]
 Homepage = "https://github.com/makepath/rtxpy"

From 907b149428756d77e95a3dbedb35d5ed365f532a Mon Sep 17 00:00:00 2001
From: Brendan Collins <brendancol@gmail.com>
Date: Wed, 28 Jan 2026 07:35:06 -0800
Subject: [PATCH 6/7] fixing ci

---
 .github/workflows/gpu-test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/gpu-test.yml b/.github/workflows/gpu-test.yml
index 45af971..37aeb53 100644
--- a/.github/workflows/gpu-test.yml
+++ b/.github/workflows/gpu-test.yml
@@ -41,7 +41,7 @@ jobs:
       - name: Install CuPy
         shell: bash -el {0}
         run: |
-          conda install -y -c conda-forge cupy
+          conda install -y -c conda-forge cupy xarray
 
       - name: Verify GPU
         run: |

From ca6607cbe79a4e8845bf1484e2600a8ae9b117e1 Mon Sep 17 00:00:00 2001
From: Brendan Collins <brendancol@gmail.com>
Date: Wed, 28 Jan 2026 08:31:54 -0800
Subject: [PATCH 7/7] added multi-instance support fixes #2

---
 rtxpy/rtx.py               | 418 +++++++++++++++++++++++++++++-
 rtxpy/tests/test_simple.py | 513 +++++++++++++++++++++++++++++++++++++
 2 files changed, 924 insertions(+), 7 deletions(-)

diff --git a/rtxpy/rtx.py b/rtxpy/rtx.py
index 5d57f51..b0466d7 100644
--- a/rtxpy/rtx.py
+++ b/rtxpy/rtx.py
@@ -8,6 +8,8 @@
 import os
 import atexit
 import struct
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional
 
 # CRITICAL: cupy must be imported before optix for proper CUDA context sharing
 import cupy
@@ -18,6 +20,24 @@
 import numpy as np
 
 
+# -----------------------------------------------------------------------------
+# Data structures for multi-GAS support
+# -----------------------------------------------------------------------------
+
+@dataclass
+class _GASEntry:
+    """Storage for a single Geometry Acceleration Structure."""
+    gas_id: str
+    gas_handle: int
+    gas_buffer: cupy.ndarray  # Must keep reference to prevent GC
+    vertices_hash: int
+    transform: List[float] = field(default_factory=lambda: [
+        1.0, 0.0, 0.0, 0.0,  # Row 0: [Xx, Xy, Xz, Tx]
+        0.0, 1.0, 0.0, 0.0,  # Row 1: [Yx, Yy, Yz, Ty]
+        0.0, 0.0, 1.0, 0.0,  # Row 2: [Zx, Zy, Zz, Tz]
+    ])  # 12 floats (3x4 row-major affine transform)
+
+
 # -----------------------------------------------------------------------------
 # Singleton state management
 # -----------------------------------------------------------------------------
@@ -37,11 +57,19 @@ def __init__(self):
         self.hit_pg = None
         self.sbt = None
 
-        # Acceleration structure cache
+        # Single-GAS mode acceleration structure cache
         self.gas_handle = 0
         self.gas_buffer = None
         self.current_hash = 0xFFFFFFFFFFFFFFFF  # uint64(-1)
 
+        # Multi-GAS mode state
+        self.gas_entries: Dict[str, _GASEntry] = {}  # Dict[str, _GASEntry]
+        self.ias_handle = 0
+        self.ias_buffer = None
+        self.ias_dirty = True
+        self.instances_buffer = None
+        self.single_gas_mode = True  # False when multi-GAS active
+
         # Device memory for params
         self.d_params = None
 
@@ -62,11 +90,19 @@ def cleanup(self):
         self.d_rays_size = 0
         self.d_hits_size = 0
 
-        # Free acceleration structure
+        # Free single-GAS mode acceleration structure
         self.gas_buffer = None
         self.gas_handle = 0
         self.current_hash = 0xFFFFFFFFFFFFFFFF
 
+        # Free multi-GAS mode resources
+        self.gas_entries = {}
+        self.ias_handle = 0
+        self.ias_buffer = None
+        self.ias_dirty = True
+        self.instances_buffer = None
+        self.single_gas_mode = True
+
         # OptiX objects are automatically cleaned up by Python GC
         self.sbt = None
         self.pipeline = None
@@ -147,7 +183,7 @@ def _init_optix():
 
     pipeline_options = optix.PipelineCompileOptions(
         usesMotionBlur=False,
-        traversableGraphFlags=optix.TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_GAS,
+        traversableGraphFlags=optix.TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY,
         numPayloadValues=4,
         numAttributeValues=2,
         exceptionFlags=optix.EXCEPTION_FLAG_NONE,
@@ -222,7 +258,7 @@ def _init_optix():
         dc_from_traversal,
         dc_from_state,
         continuation,
-        1,  # maxTraversableDepth
+        2,  # maxTraversableDepth (IAS -> GAS = 2 levels)
     )
 
     # Create shader binding table
@@ -278,10 +314,190 @@ def _create_sbt():
 # Acceleration structure building
 # -----------------------------------------------------------------------------
 
+def _build_gas_for_geometry(vertices, indices):
+    """
+    Build a single GAS (Geometry Acceleration Structure) for the given mesh.
+
+    Args:
+        vertices: Vertex buffer (Nx3 float32, flattened)
+        indices: Index buffer (Mx3 int32, flattened)
+
+    Returns:
+        Tuple of (gas_handle, gas_buffer) or (0, None) on error
+    """
+    global _state
+
+    if not _state.initialized:
+        _init_optix()
+
+    # Ensure data is on GPU as cupy arrays
+    if isinstance(vertices, cupy.ndarray):
+        d_vertices = vertices
+    else:
+        d_vertices = cupy.asarray(vertices, dtype=cupy.float32)
+
+    if isinstance(indices, cupy.ndarray):
+        d_indices = indices
+    else:
+        d_indices = cupy.asarray(indices, dtype=cupy.int32)
+
+    # Calculate counts
+    num_vertices = d_vertices.size // 3
+    num_triangles = d_indices.size // 3
+
+    if num_vertices == 0 or num_triangles == 0:
+        return 0, None
+
+    # Build input
+    build_input = optix.BuildInputTriangleArray(
+        vertexBuffers_=[d_vertices.data.ptr],
+        vertexFormat=optix.VERTEX_FORMAT_FLOAT3,
+        vertexStrideInBytes=12,  # 3 * sizeof(float)
+        indexBuffer=d_indices.data.ptr,
+        numIndexTriplets=num_triangles,
+        indexFormat=optix.INDICES_FORMAT_UNSIGNED_INT3,
+        indexStrideInBytes=12,  # 3 * sizeof(int)
+        flags_=[optix.GEOMETRY_FLAG_DISABLE_ANYHIT],
+        numSbtRecords=1,
+    )
+    build_input.numVertices = num_vertices
+
+    # Acceleration structure options
+    accel_options = optix.AccelBuildOptions(
+        buildFlags=optix.BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS,
+        operation=optix.BUILD_OPERATION_BUILD,
+    )
+
+    # Compute memory requirements
+    buffer_sizes = _state.context.accelComputeMemoryUsage(
+        [accel_options],
+        [build_input],
+    )
+
+    # Allocate buffers
+    d_temp = cupy.zeros(buffer_sizes.tempSizeInBytes, dtype=cupy.uint8)
+    gas_buffer = cupy.zeros(buffer_sizes.outputSizeInBytes, dtype=cupy.uint8)
+
+    # Build acceleration structure
+    gas_handle = _state.context.accelBuild(
+        0,  # stream
+        [accel_options],
+        [build_input],
+        d_temp.data.ptr,
+        buffer_sizes.tempSizeInBytes,
+        gas_buffer.data.ptr,
+        buffer_sizes.outputSizeInBytes,
+        [],  # emitted properties
+    )
+
+    return gas_handle, gas_buffer
+
+
+def _build_ias():
+    """
+    Build an Instance Acceleration Structure (IAS) from all GAS entries.
+
+    This creates a top-level acceleration structure that references all
+    geometry acceleration structures with their transforms.
+    """
+    global _state
+
+    if not _state.initialized:
+        _init_optix()
+
+    if not _state.gas_entries:
+        _state.ias_handle = 0
+        _state.ias_buffer = None
+        _state.ias_dirty = False
+        return
+
+    num_instances = len(_state.gas_entries)
+
+    # OptixInstance structure is 80 bytes:
+    # - transform: float[12] (3x4 row-major) = 48 bytes
+    # - instanceId: uint32 = 4 bytes
+    # - sbtOffset: uint32 = 4 bytes
+    # - visibilityMask: uint32 = 4 bytes
+    # - flags: uint32 = 4 bytes
+    # - traversableHandle: uint64 = 8 bytes
+    # - pad: uint32[2] = 8 bytes
+    # Total = 80 bytes
+
+    INSTANCE_SIZE = 80
+    instances_data = bytearray(num_instances * INSTANCE_SIZE)
+
+    for i, (gas_id, entry) in enumerate(_state.gas_entries.items()):
+        offset = i * INSTANCE_SIZE
+
+        # Pack transform (12 floats, 48 bytes)
+        transform_bytes = struct.pack('12f', *entry.transform)
+        instances_data[offset:offset + 48] = transform_bytes
+
+        # Pack instanceId (4 bytes)
+        struct.pack_into('I', instances_data, offset + 48, i)
+
+        # Pack sbtOffset (4 bytes) - all use same hit group (SBT index 0)
+        struct.pack_into('I', instances_data, offset + 52, 0)
+
+        # Pack visibilityMask (4 bytes) - 0xFF = visible to all rays
+        struct.pack_into('I', instances_data, offset + 56, 0xFF)
+
+        # Pack flags (4 bytes) - OPTIX_INSTANCE_FLAG_NONE = 0
+        struct.pack_into('I', instances_data, offset + 60, 0)
+
+        # Pack traversableHandle (8 bytes)
+        struct.pack_into('Q', instances_data, offset + 64, entry.gas_handle)
+
+        # Padding (8 bytes) - already zeros
+
+    # Copy instances to GPU
+    _state.instances_buffer = cupy.array(
+        np.frombuffer(instances_data, dtype=np.uint8)
+    )
+
+    # Build input for IAS
+    build_input = optix.BuildInputInstanceArray(
+        instances=_state.instances_buffer.data.ptr,
+        numInstances=num_instances,
+    )
+
+    # Acceleration structure options
+    accel_options = optix.AccelBuildOptions(
+        buildFlags=optix.BUILD_FLAG_ALLOW_UPDATE,
+        operation=optix.BUILD_OPERATION_BUILD,
+    )
+
+    # Compute memory requirements
+    buffer_sizes = _state.context.accelComputeMemoryUsage(
+        [accel_options],
+        [build_input],
+    )
+
+    # Allocate buffers
+    d_temp = cupy.zeros(buffer_sizes.tempSizeInBytes, dtype=cupy.uint8)
+    _state.ias_buffer = cupy.zeros(buffer_sizes.outputSizeInBytes, dtype=cupy.uint8)
+
+    # Build IAS
+    _state.ias_handle = _state.context.accelBuild(
+        0,  # stream
+        [accel_options],
+        [build_input],
+        d_temp.data.ptr,
+        buffer_sizes.tempSizeInBytes,
+        _state.ias_buffer.data.ptr,
+        buffer_sizes.outputSizeInBytes,
+        [],  # emitted properties
+    )
+
+    _state.ias_dirty = False
+
+
 def _build_accel(hash_value: int, vertices, indices) -> int:
     """
     Build an OptiX acceleration structure for the given triangle mesh.
 
+    This enables single-GAS mode and clears any multi-GAS state.
+
     Args:
         hash_value: Hash to identify this geometry (for caching)
         vertices: Vertex buffer (Nx3 float32, flattened)
@@ -295,6 +511,15 @@ def _build_accel(hash_value: int, vertices, indices) -> int:
     if not _state.initialized:
         _init_optix()
 
+    # Clear multi-GAS state when switching to single-GAS mode
+    if not _state.single_gas_mode:
+        _state.gas_entries = {}
+        _state.ias_handle = 0
+        _state.ias_buffer = None
+        _state.ias_dirty = True
+        _state.instances_buffer = None
+        _state.single_gas_mode = True
+
     # Check if we already have this acceleration structure cached
     if _state.current_hash == hash_value:
         return 0
@@ -374,6 +599,9 @@ def _trace_rays(rays, hits, num_rays: int) -> int:
     """
     Trace rays against the current acceleration structure.
 
+    Supports both single-GAS mode (using gas_handle) and multi-GAS mode
+    (using IAS that references multiple GAS).
+
     Args:
         rays: Ray buffer (Nx8 float32: ox,oy,oz,tmin,dx,dy,dz,tmax)
         hits: Hit buffer (Nx4 float32: t,nx,ny,nz)
@@ -387,8 +615,18 @@ def _trace_rays(rays, hits, num_rays: int) -> int:
     if not _state.initialized:
         return -1
 
-    if _state.gas_handle == 0:
-        return -1
+    # Determine which traversable handle to use
+    if _state.single_gas_mode:
+        if _state.gas_handle == 0:
+            return -1
+        trace_handle = _state.gas_handle
+    else:
+        # Multi-GAS mode: rebuild IAS if dirty
+        if _state.ias_dirty:
+            _build_ias()
+        if _state.ias_handle == 0:
+            return -1
+        trace_handle = _state.ias_handle
 
     # Size check
     if rays.size != num_rays * 8 or hits.size != num_rays * 4:
@@ -424,7 +662,7 @@ def _trace_rays(rays, hits, num_rays: int) -> int:
     # Pack params: handle(8 bytes) + rays_ptr(8 bytes) + hits_ptr(8 bytes)
     params_data = struct.pack(
         'QQQ',
-        _state.gas_handle,
+        trace_handle,
         d_rays.data.ptr,
         d_hits.data.ptr,
     )
@@ -493,6 +731,9 @@ def trace(self, rays, hits, numRays: int) -> int:
         """
         Trace rays against the current acceleration structure.
 
+        Works with both single-GAS mode (after build()) and multi-GAS mode
+        (after add_geometry()).
+
         Args:
             rays: Ray buffer (8 float32 per ray: ox,oy,oz,tmin,dx,dy,dz,tmax)
             hits: Hit buffer (4 float32 per hit: t,nx,ny,nz)
@@ -503,3 +744,166 @@ def trace(self, rays, hits, numRays: int) -> int:
             0 on success, non-zero on error
         """
         return _trace_rays(rays, hits, numRays)
+
+    # -------------------------------------------------------------------------
+    # Multi-GAS API
+    # -------------------------------------------------------------------------
+
+    def add_geometry(self, geometry_id: str, vertices, indices,
+                     transform: Optional[List[float]] = None) -> int:
+        """
+        Add a geometry (GAS) to the scene with an optional transform.
+
+        This enables multi-GAS mode. If called after build(), the single-GAS
+        state is cleared. Adding a geometry with an existing ID replaces it.
+
+        Args:
+            geometry_id: Unique identifier for this geometry
+            vertices: Vertex buffer (flattened float32 array, 3 floats per vertex)
+            indices: Index buffer (flattened int32 array, 3 ints per triangle)
+            transform: Optional 12-float list representing a 3x4 row-major
+                      affine transform matrix. Defaults to identity.
+                      Format: [Xx, Xy, Xz, Tx, Yx, Yy, Yz, Ty, Zx, Zy, Zz, Tz]
+
+        Returns:
+            0 on success, non-zero on error
+        """
+        global _state
+
+        if not _state.initialized:
+            _init_optix()
+
+        # Switch to multi-GAS mode if currently in single-GAS mode
+        if _state.single_gas_mode:
+            _state.gas_handle = 0
+            _state.gas_buffer = None
+            _state.current_hash = 0xFFFFFFFFFFFFFFFF
+            _state.single_gas_mode = False
+
+        # Build the GAS for this geometry
+        gas_handle, gas_buffer = _build_gas_for_geometry(vertices, indices)
+        if gas_handle == 0:
+            return -1
+
+        # Compute a hash for caching purposes
+        if isinstance(vertices, cupy.ndarray):
+            vertices_for_hash = vertices.get()
+        else:
+            vertices_for_hash = np.asarray(vertices)
+        vertices_hash = hash(vertices_for_hash.tobytes())
+
+        # Set transform (identity if not provided)
+        if transform is None:
+            transform = [
+                1.0, 0.0, 0.0, 0.0,
+                0.0, 1.0, 0.0, 0.0,
+                0.0, 0.0, 1.0, 0.0,
+            ]
+        else:
+            transform = list(transform)
+            if len(transform) != 12:
+                return -1
+
+        # Create or update the GAS entry
+        _state.gas_entries[geometry_id] = _GASEntry(
+            gas_id=geometry_id,
+            gas_handle=gas_handle,
+            gas_buffer=gas_buffer,
+            vertices_hash=vertices_hash,
+            transform=transform,
+        )
+
+        # Mark IAS as needing rebuild
+        _state.ias_dirty = True
+
+        return 0
+
+    def remove_geometry(self, geometry_id: str) -> int:
+        """
+        Remove a geometry from the scene.
+
+        Args:
+            geometry_id: The ID of the geometry to remove
+
+        Returns:
+            0 on success, -1 if geometry not found
+        """
+        global _state
+
+        if geometry_id not in _state.gas_entries:
+            return -1
+
+        del _state.gas_entries[geometry_id]
+        _state.ias_dirty = True
+
+        return 0
+
+    def update_transform(self, geometry_id: str,
+                        transform: List[float]) -> int:
+        """
+        Update the transform of an existing geometry.
+
+        Args:
+            geometry_id: The ID of the geometry to update
+            transform: 12-float list representing a 3x4 row-major affine
+                      transform matrix.
+                      Format: [Xx, Xy, Xz, Tx, Yx, Yy, Yz, Ty, Zx, Zy, Zz, Tz]
+
+        Returns:
+            0 on success, -1 if geometry not found or invalid transform
+        """
+        global _state
+
+        if geometry_id not in _state.gas_entries:
+            return -1
+
+        transform = list(transform)
+        if len(transform) != 12:
+            return -1
+
+        _state.gas_entries[geometry_id].transform = transform
+        _state.ias_dirty = True
+
+        return 0
+
+    def list_geometries(self) -> List[str]:
+        """
+        Get a list of all geometry IDs in the scene.
+
+        Returns:
+            List of geometry ID strings
+        """
+        return list(_state.gas_entries.keys())
+
+    def get_geometry_count(self) -> int:
+        """
+        Get the number of geometries in the scene.
+
+        Returns:
+            Number of geometries (0 in single-GAS mode)
+        """
+        return len(_state.gas_entries)
+
+    def clear_scene(self) -> None:
+        """
+        Remove all geometries and reset to single-GAS mode.
+
+        After calling this, you can use either build() for single-GAS mode
+        or add_geometry() for multi-GAS mode.
+        """
+        global _state
+
+        # Clear multi-GAS state
+        _state.gas_entries = {}
+        _state.ias_handle = 0
+        _state.ias_buffer = None
+        _state.ias_dirty = True
+        _state.instances_buffer = None
+
+        # Clear single-GAS state
+        _state.gas_handle = 0
+        _state.gas_buffer = None
+        _state.current_hash = 0xFFFFFFFFFFFFFFFF
+
+        # Reset to single-GAS mode
+        _state.single_gas_mode = True
diff --git a/rtxpy/tests/test_simple.py b/rtxpy/tests/test_simple.py
index a8cf699..42b4ff0 100644
--- a/rtxpy/tests/test_simple.py
+++ b/rtxpy/tests/test_simple.py
@@ -414,3 +414,516 @@ def test_nan_in_elevation_data_sparse(test_cupy, dtype):
         # Near NaN - should not crash
         t_nan_area = float(hits_nan_area[0])
         assert np.isfinite(t_nan_area) or np.isnan(t_nan_area) or t_nan_area == -1.0
+
+
+# =============================================================================
+# Multi-GAS Tests
+# =============================================================================
+
+@pytest.mark.parametrize("test_cupy", [False, True])
+def test_multi_gas_two_meshes(test_cupy):
+    """Test tracing against two meshes at different Z heights."""
+    if test_cupy:
+        if not has_cupy:
+            pytest.skip("cupy not available")
+        import cupy
+        backend = cupy
+    else:
+        backend = np
+
+    rtx = RTX()
+    rtx.clear_scene()  # Clear any state from previous tests
+
+    # Two triangles: one at z=0, one at z=5
+    verts = backend.float32([0, 0, 0, 1, 0, 0, 0.5, 1, 0])
+    tris = backend.int32([0, 1, 2])
+
+    # Add ground mesh at z=0
+    res = rtx.add_geometry("ground", verts, tris)
+    assert res == 0
+
+    # Add elevated mesh at z=5 using transform
+    # Transform: identity rotation, translation (0, 0, 5)
+    transform = [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 5]
+    res = rtx.add_geometry("elevated", verts, tris, transform=transform)
+    assert res == 0
+
+    # Ray pointing down from z=10 at the triangle center
+    rays = backend.float32([0.5, 0.33, 10, 0, 0, 0, -1, 1000])
+    hits = backend.float32([0, 0, 0, 0])
+
+    res = rtx.trace(rays, hits, 1)
+    assert res == 0
+
+    # Should hit the elevated mesh first at z=5 (distance ~5)
+    t_value = float(hits[0])
+    np.testing.assert_almost_equal(t_value, 5.0, decimal=1)
+
+
+@pytest.mark.parametrize("test_cupy", [False, True])
+def test_multi_gas_with_transform(test_cupy):
+    """Test geometry with translation transform."""
+    if test_cupy:
+        if not has_cupy:
+            pytest.skip("cupy not available")
+        import cupy
+        backend = cupy
+    else:
+        backend = np
+
+    rtx = RTX()
+    rtx.clear_scene()  # Clear any state from previous tests
+
+    # Triangle at origin
+    verts = backend.float32([0, 0, 0, 1, 0, 0, 0.5, 1, 0])
+    tris = backend.int32([0, 1, 2])
+
+    # Translate by (10, 0, 0)
+    transform = [1, 0, 0, 10, 0, 1, 0, 0, 0, 0, 1, 0]
+    res = rtx.add_geometry("translated", verts, tris, transform=transform)
+    assert res == 0
+
+    # Ray pointing down at (10.5, 0.33, 10) - should hit translated mesh
+    rays = backend.float32([10.5, 0.33, 10, 0, 0, 0, -1, 1000])
+    hits = backend.float32([0, 0, 0, 0])
+
+    res = rtx.trace(rays, hits, 1)
+    assert res == 0
+
+    t_value = float(hits[0])
+    np.testing.assert_almost_equal(t_value, 10.0, decimal=1)
+
+    # Ray at original position should miss
+    rays_miss = backend.float32([0.5, 0.33, 10, 0, 0, 0, -1, 1000])
+    hits_miss = backend.float32([0, 0, 0, 0])
+
+    res = rtx.trace(rays_miss, hits_miss, 1)
+    assert res == 0
+    assert float(hits_miss[0]) == -1.0  # Miss
+
+
+@pytest.mark.parametrize("test_cupy", [False, True])
+def test_multi_gas_many_geometries(test_cupy):
+    """Stress test with many geometries."""
+    if test_cupy:
+        if not has_cupy:
+            pytest.skip("cupy not available")
+        import cupy
+        backend = cupy
+    else:
+        backend = np
+
+    rtx = RTX()
+    rtx.clear_scene()  # Clear any state from previous tests
+
+    # Small triangle
+    verts = backend.float32([0, 0, 0, 0.5, 0, 0, 0.25, 0.5, 0])
+    tris = backend.int32([0, 1, 2])
+
+    # Add 100 geometries in a 10x10 grid
+    num_geoms = 100
+    for i in range(num_geoms):
+        x = (i % 10) * 2
+        y = (i // 10) * 2
+        transform = [1, 0, 0, x, 0, 1, 0, y, 0, 0, 1, 0]
+        res = rtx.add_geometry(f"mesh_{i}", verts, tris, transform=transform)
+        assert res == 0
+
+    assert rtx.get_geometry_count() == num_geoms
+
+    # Trace a ray at one of the geometries
+    rays = backend.float32([4.25, 4.25, 10, 0, 0, 0, -1, 1000])  # Should hit mesh_22
+    hits = backend.float32([0, 0, 0, 0])
+
+    res = rtx.trace(rays, hits, 1)
+    assert res == 0
+
+    t_value = float(hits[0])
+    np.testing.assert_almost_equal(t_value, 10.0, decimal=1)
+
+
+@pytest.mark.parametrize("test_cupy", [False, True])
+def test_remove_geometry(test_cupy):
+    """Test adding and removing geometry."""
+    if test_cupy:
+        if not has_cupy:
+            pytest.skip("cupy not available")
+        import cupy
+        backend = cupy
+    else:
+        backend = np
+
+    rtx = RTX()
+    rtx.clear_scene()  # Clear any state from previous tests
+
+    verts = backend.float32([0, 0, 0, 1, 0, 0, 0.5, 1, 0])
+    tris = backend.int32([0, 1, 2])
+
+    # Add two geometries
+    rtx.add_geometry("mesh1", verts, tris)
+    rtx.add_geometry("mesh2", verts, tris, transform=[1, 0, 0, 5, 0, 1, 0, 0, 0, 0, 1, 0])
+
+    assert rtx.get_geometry_count() == 2
+    assert "mesh1" in rtx.list_geometries()
+    assert "mesh2" in rtx.list_geometries()
+
+    # Remove one
+    res = rtx.remove_geometry("mesh1")
+    assert res == 0
+
+    assert rtx.get_geometry_count() == 1
+    assert "mesh1" not in rtx.list_geometries()
+    assert "mesh2" in rtx.list_geometries()
+
+    # Remove non-existent should fail
+    res = rtx.remove_geometry("nonexistent")
+    assert res == -1
+
+
+@pytest.mark.parametrize("test_cupy", [False, True])
+def test_replace_geometry(test_cupy):
+    """Test adding geometry with the same ID replaces it."""
+    if test_cupy:
+        if not has_cupy:
+            pytest.skip("cupy not available")
+        import cupy
+        backend = cupy
+    else:
+        backend = np
+
+    rtx = RTX()
+    rtx.clear_scene()  # Clear any state from previous tests
+
+    verts1 = backend.float32([0, 0, 0, 1, 0, 0, 0.5, 1, 0])
+    verts2 = backend.float32([0, 0, 5, 1, 0, 5, 0.5, 1, 5])  # At z=5
+    tris = backend.int32([0, 1, 2])
+
+    # Add initial geometry at z=0
+    rtx.add_geometry("mesh", verts1, tris)
+
+    # Ray should hit at z=0 (distance 10)
+    rays = backend.float32([0.5, 0.33, 10, 0, 0, 0, -1, 1000])
+    hits = backend.float32([0, 0, 0, 0])
+    rtx.trace(rays, hits, 1)
+    np.testing.assert_almost_equal(float(hits[0]), 10.0, decimal=1)
+
+    # Replace with geometry at z=5
+    rtx.add_geometry("mesh", verts2, tris)
+    assert rtx.get_geometry_count() == 1  # Still only one geometry
+
+    # Now should hit at z=5 (distance 5)
+    hits = backend.float32([0, 0, 0, 0])
+    rtx.trace(rays, hits, 1)
+    np.testing.assert_almost_equal(float(hits[0]), 5.0, decimal=1)
+
+
+@pytest.mark.parametrize("test_cupy", [False, True])
+def test_update_transform(test_cupy):
+    """Test updating transform of existing geometry."""
+    if test_cupy:
+        if not has_cupy:
+            pytest.skip("cupy not available")
+        import cupy
+        backend = cupy
+    else:
+        backend = np
+
+    rtx = RTX()
+    rtx.clear_scene()  # Clear any state from previous tests
+
+    verts = backend.float32([0, 0, 0, 1, 0, 0, 0.5, 1, 0])
+    tris = backend.int32([0, 1, 2])
+
+    # Add geometry at origin
+    rtx.add_geometry("mesh", verts, tris)
+
+    # Ray at origin hits
+    rays_origin = backend.float32([0.5, 0.33, 10, 0, 0, 0, -1, 1000])
+    hits = backend.float32([0, 0, 0, 0])
+    rtx.trace(rays_origin, hits, 1)
+    np.testing.assert_almost_equal(float(hits[0]), 10.0, decimal=1)
+
+    # Update transform to translate by (10, 0, 0)
+    res = rtx.update_transform("mesh", [1, 0, 0, 10, 0, 1, 0, 0, 0, 0, 1, 0])
+    assert res == 0
+
+    # Now ray at origin should miss
+    hits = backend.float32([0, 0, 0, 0])
+    rtx.trace(rays_origin, hits, 1)
+    assert float(hits[0]) == -1.0
+
+    # Ray at new position should hit
+    rays_new = backend.float32([10.5, 0.33, 10, 0, 0, 0, -1, 1000])
+    hits = backend.float32([0, 0, 0, 0])
+    rtx.trace(rays_new, hits, 1)
+    np.testing.assert_almost_equal(float(hits[0]), 10.0, decimal=1)
+
+    # Update non-existent should fail
+    res = rtx.update_transform("nonexistent", [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0])
+    assert res == -1
+
+    # Invalid transform length should fail
+    res = rtx.update_transform("mesh", [1, 0, 0])
+    assert res == -1
+
+
+@pytest.mark.parametrize("test_cupy", [False, True])
+def test_list_geometries(test_cupy):
+    """Test list_geometries and get_geometry_count methods."""
+    if test_cupy:
+        if not has_cupy:
+            pytest.skip("cupy not available")
+        import cupy
+        backend = cupy
+    else:
+        backend = np
+
+    rtx = RTX()
+    rtx.clear_scene()  # Clear any state from previous tests
+
+    verts = backend.float32([0, 0, 0, 1, 0, 0, 0.5, 1, 0])
+    tris = backend.int32([0, 1, 2])
+
+    # Initially empty
+    assert rtx.get_geometry_count() == 0
+    assert rtx.list_geometries() == []
+
+    # Add geometries
+    rtx.add_geometry("a", verts, tris)
+    rtx.add_geometry("b", verts, tris)
+    rtx.add_geometry("c", verts, tris)
+
+    assert rtx.get_geometry_count() == 3
+    geoms = rtx.list_geometries()
+    assert "a" in geoms
+    assert "b" in geoms
+    assert "c" in geoms
+
+
+@pytest.mark.parametrize("test_cupy", [False, True])
+def test_clear_scene(test_cupy):
+    """Test clear_scene removes all geometry and resets state."""
+    if test_cupy:
+        if not has_cupy:
+            pytest.skip("cupy not available")
+        import cupy
+        backend = cupy
+    else:
+        backend = np
+
+    rtx = RTX()
+    rtx.clear_scene()  # Clear any state from previous tests
+
+    verts = backend.float32([0, 0, 0, 1, 0, 0, 0.5, 1, 0])
+    tris = backend.int32([0, 1, 2])
+
+    # Add geometries
+    rtx.add_geometry("mesh1", verts, tris)
+    rtx.add_geometry("mesh2", verts, tris)
+    assert rtx.get_geometry_count() == 2
+
+    # Clear scene
+    rtx.clear_scene()
+    assert rtx.get_geometry_count() == 0
+    assert rtx.list_geometries() == []
+
+    # Can use build() after clear
+    res = rtx.build(123, verts, tris)
+    assert res == 0
+    assert rtx.getHash() == 123
+
+
+@pytest.mark.parametrize("test_cupy", [False, True])
+def test_backward_compat_single_gas(test_cupy):
+    """Test that existing single-GAS build() API still works."""
+    if test_cupy:
+        if not has_cupy:
+            pytest.skip("cupy not available")
+        import cupy
+        backend = cupy
+    else:
+        backend = np
+
+    rtx = RTX()
+    rtx.clear_scene()  # Clear any state from previous tests
+
+    verts = backend.float32([0, 0, 0, 1, 0, 0, 0.5, 1, 0])
+    tris = backend.int32([0, 1, 2])
+
+    # Use original API
+    res = rtx.build(12345, verts, tris)
+    assert res == 0
+    assert rtx.getHash() == 12345
+
+    # Trace should work
+    rays = backend.float32([0.5, 0.33, 10, 0, 0, 0, -1, 1000])
+    hits = backend.float32([0, 0, 0, 0])
+    res = rtx.trace(rays, hits, 1)
+    assert res == 0
+    np.testing.assert_almost_equal(float(hits[0]), 10.0, decimal=1)
+
+
+@pytest.mark.parametrize("test_cupy", [False, True])
+def test_switch_multi_to_single(test_cupy):
+    """Test that build() clears multi-GAS state."""
+    if test_cupy:
+        if not has_cupy:
+            pytest.skip("cupy not available")
+        import cupy
+        backend = cupy
+    else:
+        backend = np
+
+    rtx = RTX()
+    rtx.clear_scene()  # Clear any state from previous tests
+
+    verts = backend.float32([0, 0, 0, 1, 0, 0, 0.5, 1, 0])
+    tris = backend.int32([0, 1, 2])
+
+    # Start with multi-GAS
+    rtx.add_geometry("mesh1", verts, tris)
+    rtx.add_geometry("mesh2", verts, tris, transform=[1, 0, 0, 5, 0, 1, 0, 0, 0, 0, 1, 0])
+    assert rtx.get_geometry_count() == 2
+
+    # Switch to single-GAS with build()
+    res = rtx.build(999, verts, tris)
+    assert res == 0
+
+    # Multi-GAS state should be cleared
+    assert rtx.get_geometry_count() == 0
+
+    # Trace should use single-GAS
+    rays = backend.float32([0.5, 0.33, 10, 0, 0, 0, -1, 1000])
+    hits = backend.float32([0, 0, 0, 0])
+    res = rtx.trace(rays, hits, 1)
+    assert res == 0
+    np.testing.assert_almost_equal(float(hits[0]), 10.0, decimal=1)
+
+
+@pytest.mark.parametrize("test_cupy", [False, True])
+def test_switch_single_to_multi(test_cupy):
+    """Test that add_geometry() clears single-GAS state."""
+    if test_cupy:
+        if not has_cupy:
+            pytest.skip("cupy not available")
+        import cupy
+        backend = cupy
+    else:
+        backend = np
+
+    rtx = RTX()
+    rtx.clear_scene()  # Clear any state from previous tests
+
+    verts = backend.float32([0, 0, 0, 1, 0, 0, 0.5, 1, 0])
+    tris = backend.int32([0, 1, 2])
+
+    # Start with single-GAS
+    rtx.build(888, verts, tris)
+    assert rtx.getHash() == 888
+
+    # Switch to multi-GAS
+    rtx.add_geometry("mesh", verts, tris)
+
+    # Single-GAS hash should be cleared
+    assert rtx.getHash() == 0xFFFFFFFFFFFFFFFF
+
+    # Trace should use multi-GAS (IAS)
+    rays = backend.float32([0.5, 0.33, 10, 0, 0, 0, -1, 1000])
+    hits = backend.float32([0, 0, 0, 0])
+    res = rtx.trace(rays, hits, 1)
+    assert res == 0
+    np.testing.assert_almost_equal(float(hits[0]), 10.0, decimal=1)
+
+
+@pytest.mark.parametrize("test_cupy", [False, True])
+def test_empty_scene(test_cupy):
+    """Test behavior when tracing after removing all geometry."""
+    if test_cupy:
+        if not has_cupy:
+            pytest.skip("cupy not available")
+        import cupy
+        backend = cupy
+    else:
+        backend = np
+
+    rtx = RTX()
+    rtx.clear_scene()  # Clear any state from previous tests
+
+    verts = backend.float32([0, 0, 0, 1, 0, 0, 0.5, 1, 0])
+    tris = backend.int32([0, 1, 2])
+
+    # Add and then remove all geometry
+    rtx.add_geometry("mesh", verts, tris)
+    rtx.remove_geometry("mesh")
+    assert rtx.get_geometry_count() == 0
+
+    # Trace should fail gracefully (return error)
+    rays = backend.float32([0.5, 0.33, 10, 0, 0, 0, -1, 1000])
+    hits = backend.float32([0, 0, 0, 0])
+    res = rtx.trace(rays, hits, 1)
+    assert res == -1  # No geometry to trace against
+
+
+@pytest.mark.parametrize("test_cupy", [False, True])
+def test_trace_miss_multi_gas(test_cupy):
+    """Test ray that misses all geometries in multi-GAS mode."""
+    if test_cupy:
+        if not has_cupy:
+            pytest.skip("cupy not available")
+        import cupy
+        backend = cupy
+    else:
+        backend = np
+
+    rtx = RTX()
+    rtx.clear_scene()  # Clear any state from previous tests
+
+    verts = backend.float32([0, 0, 0, 1, 0, 0, 0.5, 1, 0])
+    tris = backend.int32([0, 1, 2])
+
+    rtx.add_geometry("mesh", verts, tris)
+
+    # Ray that misses the geometry
+    rays = backend.float32([100, 100, 10, 0, 0, 0, -1, 1000])
+    hits = backend.float32([0, 0, 0, 0])
+
+    res = rtx.trace(rays, hits, 1)
+    assert res == 0
+    assert float(hits[0]) == -1.0  # Miss
+
+
+@pytest.mark.parametrize("test_cupy", [False, True])
+def test_cupy_buffers_multi_gas(test_cupy):
+    """Test multi-GAS mode works with cupy buffers for rays/hits."""
+    if test_cupy:
+        if not has_cupy:
+            pytest.skip("cupy not available")
+        import cupy
+
+    rtx = RTX()
+    rtx.clear_scene()  # Clear any state from previous tests
+
+    # Use numpy arrays for vertex/triangle data (works with both backends)
+    verts = np.float32([0, 0, 0, 1, 0, 0, 0.5, 1, 0])
+    tris = np.int32([0, 1, 2])
+
+    rtx.add_geometry("mesh", verts, tris)
+
+    # Create ray/hit buffers on the appropriate backend
+    if test_cupy:
+        rays = cupy.array([0.5, 0.33, 10, 0, 0, 0, -1, 1000], dtype=cupy.float32)
+        hits = cupy.zeros(4, dtype=cupy.float32)
+    else:
+        rays = np.float32([0.5, 0.33, 10, 0, 0, 0, -1, 1000])
+        hits = np.float32([0, 0, 0, 0])
+
+    res = rtx.trace(rays, hits, 1)
+    assert res == 0
+
+    # Convert to numpy for comparison
+    if test_cupy:
+        hits_np = hits.get()
+    else:
+        hits_np = hits
+
+    np.testing.assert_almost_equal(hits_np[0], 10.0, decimal=1)