diff --git a/.claude/sweep-test-coverage-state.csv b/.claude/sweep-test-coverage-state.csv index bc102371..b6ea1749 100644 --- a/.claude/sweep-test-coverage-state.csv +++ b/.claude/sweep-test-coverage-state.csv @@ -5,3 +5,8 @@ Pass 17 (2026-05-18): added test_mask_nodata_gpu_vrt_2052.py closing Cat 1 HIGH polygonize,2026-05-27,2537,MEDIUM,4,"Pass 2 (2026-05-27): added test_polygonize_atol_rtol_backend_coverage_2026_05_27.py with 15 tests, all passing on a CUDA host. Closes Cat 4 MEDIUM parameter-coverage gap on atol/rtol forwarding through the cupy and dask+cupy backends. atol/rtol were exposed by #2173 / #2194 and thread through _polygonize_cupy (polygonize.py:808) and _polygonize_dask (polygonize.py:1719); the dask path further plumbs them into dask.delayed(_polygonize_chunk)(...) at lines 1748-1754 and into _bucket_key_for_value for cross-chunk merge bucketing at lines 1757-1758. Pre-existing tests covered non-default atol/rtol only on numpy and dask+numpy. The cupy and dask+cupy dispatchers were untested -- a regression dropping the kwargs there would silently change the float polygon count and would not be caught. Same dispatcher-silently-drops-kwarg pattern fixed by #1561 / #1605 / #1685 / #1810 / #1974 on adjacent GeoTIFF surfaces. 15 tests: cupy strict-equality + default-tolerance pin on _REPRO_2173, dask+cupy strict-equality single-chunk + multi-chunk (engages cross-chunk merge bucket) + default-tolerance multi-chunk pin, cupy intermediate-atol small/large pair, dask+cupy intermediate-atol single/multi-chunk small + single-chunk large, cupy integer atol-ignored matrix, dask+cupy integer atol-ignored single-chunk + multi-chunk, cupy rtol-only large/small matrix. Mutation against _polygonize_cupy float branch (drop atol/rtol kwargs in the _polygonize_numpy forward call at polygonize.py:823-825) flips 3 of 5 cupy tests red; mutation against dask.delayed(_polygonize_chunk)(...) at polygonize.py:1748-1754 (drop atol, rtol args) flips 2 of 6 dask+cupy tests red. Confirmed clean restore via md5sum. Source untouched. Filed issue #2537 (test-only). Cat 4 MEDIUM (parameter coverage on cupy + dask+cupy atol/rtol forwarding). Pass 1 (2026-05-19): added test_polygonize_coverage_2026_05_19.py with 58 tests, all passing on a CUDA host. Closes Cat 3 HIGH 1x1 / Nx1 single-column geometric gaps (Nx1 exercises the nx==1 padding path at polygonize.py:565 and the cupy nx==1 numpy-fallback at polygonize.py:671), Cat 3 MEDIUM 1xN single-row and all-equal-value rasters on all four backends. Closes Cat 2 HIGH NaN parity for cupy + dask+cupy (numpy/dask were already covered by test_polygonize_nan_pixels_excluded*), Cat 2 MEDIUM all-NaN raster on all four backends, Cat 2 HIGH +/-Inf pins on all four backends. Filed source-bug issue #2155: numpy/dask/dask+cupy backends silently absorb Inf cells into adjacent finite polygons because _is_close reduces abs(inf-inf) to nan; cupy backend handles Inf correctly. Pins lock the asymmetric behaviour so the fix is visible. Closes Cat 1 MEDIUM simplify_tolerance + mask= parity gaps on dask+cupy backend (numpy/cupy/dask were already covered). Closes Cat 4 MEDIUM column_name non-default value across geopandas/spatialpandas/geojson return types and Cat 4 MEDIUM validation error paths (bad connectivity, bad transform length, mask shape mismatch, mask underlying-type mismatch). Cat 5 N/A: polygonize returns lists/dataframes, not a DataArray with attrs to propagate." rasterize,2026-05-27,,HIGH,1;2;4,"Pass 3 (2026-05-27): added test_rasterize_coverage_2026_05_27.py with 23 tests, all passing on a CUDA host. Closes Cat 1 HIGH eager-cupy merge-mode parity gap: pass-1 only pinned merge='last' on a single non-overlapping polygon via TestCuPy.test_cupy_matches_numpy, and the Inf-burn tests in pass-2 only partially exercised sum/min/max on eager cupy; the parametrised six-mode parity test (last/first/max/min/sum/count) that TestDaskNumpy and TestDaskCupy carry had no eager-cupy twin, so a routing regression that swapped any of the six GPU atomic kernels in _ensure_gpu_kernels (rasterize.py:1308-1556) would slip past the dask+cupy tiled-finalize tests. Pin a three-way overlapping polygon scene plus a three-way overlapping point scene across all six modes on the eager cupy backend, with sanity checks (first!=last, min1; a future GPU atomic optimisation that deduped identical-value writes would silently break density rasters. Closes Cat 4 MEDIUM name= kwarg thread-through on dask+numpy / eager cupy / dask+cupy (the eager numpy path was the only one with name= coverage at TestBasic.test_output_name). Source untouched. Pass 2 (2026-05-21): added test_rasterize_coverage_2026_05_21.py with 58 tests, all passing on a CUDA host. Closes Cat 2 HIGH +/-Inf and NaN burn-value gaps that pass-1 left untouched: pin +Inf / -Inf / Inf+(-Inf)/NaN polygon, point, and line burn behaviour across numpy / cupy / dask+numpy / dask+cupy, plus Inf+finite under sum stays Inf, Inf+(-Inf) under sum collapses to NaN, min(Inf, 1.0) and max(-Inf, 1.0) pick the finite value, and Inf-as-bound is rejected with the same ValueError as NaN-as-bound (pass-1 only tested the NaN-bound rejection). Closes Cat 1 MEDIUM nested GeometryCollection on all four backends: a GC inside a GC has no direct test today even though rasterize.py:1995 documents recursive unpacking, and the deeply-nested-3-levels eager test pins the recursion depth limit isn't 1 or 2. Closes Cat 1 MEDIUM columns= (multi-column) parity on cupy and dask+cupy (TestMultiColumn covered numpy/dask+numpy only); pin three columns of props on GPU so the (N, P) loop survives the kernel boundary. Closes Cat 3 LOW rectangular-pixel parity with resolution=(rx, ry) across backends. Filed source-bug issue #2255: GPU max/min merge silently suppresses NaN burn values -- CPU returns NaN (1.0 > NaN is False, keeps NaN); GPU returns 1.0 because the kernel inits the output buffer to -inf for max (or +inf for min) and atomicMax/Min is NaN-suppressing under IEEE device semantics. Pinned both the CPU NaN-propagating behaviour and the GPU NaN-suppressing behaviour as paired tests (test_nan_burn_overlaps_max_cpu_propagates vs test_nan_burn_overlaps_max_gpu_suppresses_nan, plus test_nan_burn_single_geom_max_gpu_returns_neg_inf for the single-write-on-GPU-returns-buffer-init case) so the divergence is visible in CI until the GPU kernels are aligned. Source untouched. Pass 1 (2026-05-17): added test_rasterize_coverage_2026_05_17.py with 34 tests, all passing on a CUDA host. Closes four documented public-API gaps left after the pass-0 audit. (1) Cat 3 HIGH 1x1 single-pixel raster -- test_rasterize.py covers 1xN strips and Nx1 strips but never width=1 AND height=1, so the polygon scanline / line Bresenham / point burn kernels all ship without the single-cell degenerate case; the new TestSinglePixelRaster class pins polygon/point/line on eager numpy plus polygon parity across cupy / dask+numpy / dask+cupy. (2) Cat 4 HIGH like= template-raster parameter is documented at rasterize.py:2038 and implemented by _extract_grid_from_like (line 1930) but no test exercises it; TestLikeParameter pins dtype/bounds/coords inheritance, the three override branches (dtype, bounds, width/height), the three validation branches (not-DataArray, 3D, wrong dim names) and like= on all four backends. Mutation against the like-dtype branch (rasterize.py:2183-2184) flipped the inheritance test red. (3) Cat 4 HIGH resolution= happy path -- only the oversize-rejection error path was tested (line 304); TestResolutionParameter pins the scalar branch, the tuple branch, the ceil-and-clamp-to-1 semantics, and resolution= on all four backends. (4) Cat 4 HIGH non-empty GeometryCollection unpacking is documented at rasterize.py:1995 and implemented by _classify_geometries_loop (line 228) but only the empty-GC case was tested (line 269); TestGeometryCollection pins polygon+point and polygon+line+point collections on eager numpy plus parity across cupy / dask+numpy / dask+cupy so the loop classifier's polygon/line/point sub-bucketing has direct coverage. Cat 1 MEDIUM gap closed: eager cupy all_touched=True parity vs eager numpy (TestEagerCupyAllTouched) -- the existing test only covered dask+cupy all_touched, leaving the direct GPU all_touched kernel untested. Cat 2 MEDIUM gap closed: int32 dtype with default NaN fill silently casts to the int32-min sentinel (TestIntegerDtypeNanFill) -- pin the cast so any future ValueError-raises switch is visible as a code-review diff. Pre-existing 143 passing + 2 skipped tests in test_rasterize.py untouched." reproject,2026-05-10,,HIGH,1;4;5,"Added 39 tests: LiteCRS direct coverage, itrf_transform behaviour/roundtrip/array, itrf_frames, geoid_height numerical correctness + raster happy-path, vertical helpers (ellipsoidal<->orthometric/depth), reproject() lat/lon and latitude/longitude dim propagation. Note: _merge_arrays_cupy is imported but unused (no cupy merge dispatch in merge()); flagged as feature gap not test gap." +Pass 17 (2026-05-18): added test_mask_nodata_gpu_vrt_2052.py closing Cat 1 HIGH backend-coverage gap on the mask_nodata= opt-out kwarg (#2052). The kwarg was added in #2052 and wired through the four public readers (open_geotiff, read_geotiff_gpu, read_geotiff_dask, read_vrt), but test_mask_nodata_kwarg_2052.py only exercised the eager-numpy and dask+numpy branches. The pure-GPU mask gating at _backends/gpu.py:709, the dask+GPU dispatcher forwarding at _backends/gpu.py:991, the eager VRT mask gating at _backends/vrt.py:320, and the chunked VRT graph builder at _backends/vrt.py:408/588 had zero direct coverage. 19 new tests, all passing on GPU host: GPU eager + dask+GPU mask_nodata=False preserves uint16, GPU defaults still promote to float64, dispatcher thread-through for open_geotiff(gpu=True, mask_nodata=False) and open_geotiff(gpu=True, chunks=N, mask_nodata=False), VRT eager and chunked branches mirror, cross-backend parity (eager vs dask, eager vs GPU, eager vs dask+GPU, eager vs VRT) bit-exact under mask_nodata=False, direct read_geotiff_dask entry-point coverage. Fixture uses tiled+deflate compression so the pure nvCOMP decode path is exercised, not the CPU-fallback piggyback path. Mutation against gpu.py:709 (force mask_nodata=True) flipped 4 GPU tests red; mutation against vrt.py eager mask gate flipped 4 VRT tests red. Cat 1 HIGH (backend coverage on mask_nodata=False for GPU, dask+GPU, VRT eager, VRT chunked). Pass 16 (2026-05-15): added test_max_cloud_bytes_dispatcher_silent_drop_2026_05_15.py closing Cat 4 HIGH parameter-coverage gap on the open_geotiff dispatcher's max_cloud_bytes kwarg. The kwarg was added in #1928 (eager fsspec budget) and re-ordered into the canonical reader signature by #1957, but open_geotiff only forwards it to _read_to_array on the eager non-VRT branch (__init__.py:431). The GPU branch at line 410, the dask branch at line 422, and the VRT branch at line 362 never reference the kwarg, so open_geotiff(p, max_cloud_bytes=8, gpu=True) / open_geotiff(p, max_cloud_bytes=8, chunks=N) / open_geotiff(vrt, max_cloud_bytes=8) all silently drop the budget. Same class of dispatcher-silently-drops-backend-kwarg bug fixed by #1561 / #1605 / #1685 / #1810 for other kwargs; the two sibling kwargs on_gpu_failure (line 339) and missing_sources (line 355) already raise ValueError when used on a path where they do not apply. 11 tests: 4 xfail(strict=True) pinning the fix surface (gpu, dask, vrt, dask+gpu), 3 passing pins on the current silent-drop behaviour so the fix is visible as a diff, 4 positive pins that the eager local + file-like paths accept the kwarg (docstring no-op contract). Filed issue #1974 for the dispatcher fix (sweep is test-only). Cat 4 HIGH (silent backend-kwarg drop). Pass 15 (2026-05-15): added test_write_vrt_bool_nodata_1921.py closing Cat 1 HIGH backend-parity gap on bool nodata rejection. Issue #1911 added the isinstance(nodata, (bool, np.bool_)) -> TypeError guard at to_geotiff and build_geo_tags, but the sibling writers were left unchecked: write_vrt(nodata=True) silently emits True into the VRT XML (str(True) drops the sentinel because no reader parses 'True' as numeric); write_geotiff_gpu direct call relies on the build_geo_tags defense-in-depth rather than an entry-point check, so a future refactor moving that guard would regress the GPU writer with no test coverage. 17 new tests: 4 xfail (strict=True) pinning the write_vrt fix surface (issue #1921), 1 passing pin on the current buggy str(True) emission so the fix is visible as a diff, 6 numeric/None happy-path tests on write_vrt, 4 GPU writer direct-call bool-reject tests (4 dtypes x 1 call), 1 to_geotiff(gpu=True) dispatcher thread-through. Filed issue #1921 for the write_vrt fix (sweep is test-only). Cat 1 HIGH (write_vrt backend parity bug) + Cat 1 MEDIUM (write_geotiff_gpu defense-in-depth pin). Pass 14 (2026-05-15): added test_dask_streaming_write_degenerate_2026_05_15.py closing Cat 3 HIGH and Cat 2 HIGH/MEDIUM gaps on the dask streaming write path (to_geotiff with dask-backed DataArray, #1084). test_streaming_write.py covered 100x100 with a NaN block plus a 2x2 small raster but had nothing 1-pixel-row, 1-pixel-column, all-NaN, all-Inf, or +/-Inf-mixed. The streaming tile-row segmenter (#1485) on a 1-pixel-tall raster and the streaming nodata-mask coercion on an all-NaN chunk were reachable only with a dask input and had no direct coverage; a regression on either would not surface from the eager numpy path or the write_geotiff_gpu path (pass 5 covered the GPU writer's degenerate shapes). 16 new tests, all passing: 1x1 chunk-matches-shape + nodata-attr round-trip + uint16, 1xN single chunk + chunks-split-columns + wide-segmented-by-buffer (#1485 streaming_buffer_bytes=1 forces the segmenter), Nx1 single chunk + chunks-split-rows, all-NaN with finite sentinel + all-NaN without sentinel, mixed NaN/+Inf/-Inf preserving Inf bit-exact + sentinel masking NaN only, all-+Inf and all--Inf, predictor=3 (float predictor) round-trip on float32 + float64 plus int-dtype ValueError. predictor=3 streaming coverage extends the small-chunk and int-rejection geometry around test_predictor_fp_write_1313.test_predictor3_streaming_dask (which already covers a 128x192 predictor=3 dask streaming write with a Predictor-tag assertion). Cat 3 HIGH (1x1/1xN/Nx1) + Cat 2 HIGH (all-NaN with sentinel) + Cat 2 MEDIUM (mixed-Inf, all-Inf) + Cat 4 MEDIUM (predictor=3 streaming). Pass 13 (2026-05-13): added test_size_param_validation_gpu_vrt_1776.py closing Cat 4 HIGH parameter-coverage gap on size-arg validation. Issue #1752 added tile_size validation to to_geotiff and chunks validation to read_geotiff_dask, but the matching kwargs on three sibling entry points were left unchecked: write_geotiff_gpu(tile_size=) raised ZeroDivisionError for 0, struct.error for -1, TypeError for 256.0; read_geotiff_gpu(chunks=) and read_vrt(chunks=) raised ZeroDivisionError for 0 and silently accepted negative values. Factored two shared validators (_validate_tile_size_arg, _validate_chunks_arg) and called them up front from each entry point. 34 new tests, all passing on GPU host: tile_size matrix on write_geotiff_gpu (0/-1/256.0/True/False/positive/np.int64), chunks matrix on read_geotiff_gpu and read_vrt (0/-1/(0,N)/(N,-1)/wrong-length/bool/non-int/(N,float)/positive/np.int64), dispatcher thread-through tests (open_geotiff(gpu=True, chunks=0), to_geotiff(gpu=True, tile_size=0)). Pre-existing 13 #1752 tests still pass after refactor. Filed issue #1776. Pass 12 (2026-05-12): added test_gpu_writer_overview_mode_and_compression_level_1740.py closing Cat 4 HIGH and Cat 4 MEDIUM parameter-coverage gaps. (1) write_geotiff_gpu(overview_resampling='mode') and the dedicated _block_reduce_2d_gpu mode-fallback branch (_gpu_decode.py:3051-3056) had zero direct tests; six of the seven overview_resampling modes were covered (mean/nearest by test_features, min/max/median by pass 6, cubic by test_signature_parity_1631) but mode was the odd one out -- a regression dropping the mode dispatch from _block_reduce_2d_gpu would fall through to the mean reshape branch and emit wrong overview pixels for integer rasters. (2) write_geotiff_gpu(compression_level=) documented as accepted-but-ignored had no test; the CPU writer rejects out-of-range levels with ValueError, the GPU writer is documented not to -- a regression wiring the GPU writer up to the CPU range validator would silently break every to_geotiff(gpu=True, compression_level=X) caller for in-range levels and noisily for out-of-range. 19 tests, all passing on GPU host: _block_reduce_2d_gpu(method='mode') CPU-parity on 4x4 deterministic + random 8x8 + dtype-preserved across u8/u16/i16/i32, write_geotiff_gpu(cog=True, overview_resampling='mode') end-to-end round trip, to_geotiff(gpu=True, ..., overview_resampling='mode') dispatcher thread-through, GPU-vs-CPU pixel parity on 8x8 input, write_geotiff_gpu(compression_level=) in-range matrix on zstd/deflate, out-of-range matrix (zstd=999/-5, deflate=50/0) accepted without raising + round-trip preserved, to_geotiff(gpu=True, compression_level=999) dispatcher thread-through, companion CPU rejects-OOR pin to lock the asymmetry. Mutation against the mode branch (drop the 'if method == mode' block in _block_reduce_2d_gpu) flipped 9 mode tests red. Filed issue #1740. Pass 11 (2026-05-12): added test_gpu_writer_cpu_fallback_codecs_2026_05_12.py closing a Cat 4 HIGH parameter-coverage gap on write_geotiff_gpu compression= modes for the CPU-fallback codecs (lzw, packbits, lz4, lerc, jpeg2000/j2k). Pass 7 (test_gpu_writer_compression_modes_2026_05_11) covered only none/deflate/zstd/jpeg; the remaining five codecs route through dedicated branches in gpu_compress_tiles (_gpu_decode.py:2974-3019) with CPU fallbacks (lerc_compress, jpeg2000_compress, cpu_compress) that had zero direct tests via write_geotiff_gpu. A regression in routing/tag-wiring/fallback dispatch would ship silently because the internal reader uses the same compression-tag table. 17 tests, all passing on GPU host: lzw/packbits/lz4 round-trip + compression-tag pin on uint16, lerc lossless float32 + uint16 round-trip + tag pin, jpeg2000 uint8 single-band + RGB multi-band lossless round-trip + j2k-alias parity + tag pin, GPU-vs-CPU writer pixel parity for lzw/packbits, to_geotiff(gpu=True, compression=lzw/packbits) dispatcher thread-through. Mutation against compression dispatch (swap lzw bytes to zstd; swap lerc bytes to deflate) flipped round-trip tests red. Filed issue #1706. Pass 10 (2026-05-12): added test_kwarg_behaviour_2026_05_12_v2.py closing two Cat 4 HIGH parameter-coverage gaps. (1) write_geotiff_gpu(predictor=True/2/3) had zero direct tests; the GPU writer threads predictor= through normalize_predictor and gpu_compress_tiles into five CUDA encode kernels (_predictor_encode_kernel_u8/u16/u32/u64 for predictor=2, _fp_predictor_encode_kernel for predictor=3) and a regression dropping the encode-kernel calls would ship corrupt files. (2) read_vrt(window=) had no behaviour tests (only a signature pin in test_signature_annotations_1654); the kwarg is documented and _vrt.read_vrt implements full windowed-read semantics (clip, multi-source overlap, src/dst scaling, GeoTransform origin shift on coords + attrs['transform']). 23 tests, all passing on GPU host: predictor=True/2 round-trips on u8/u16/i32 + 3-band RGB samples_per_pixel stride; predictor=3 lossless round-trip on f32 and f64; predictor=3 int-dtype ValueError (CPU/GPU parity); CPU/GPU pixel-exact parity for pred=2 u16 and pred=3 f32; read_vrt(window=) subregion + full + clamp-overflow + clamp-negative + 2x1 mosaic seam straddle + offset past seam + transform-attr origin shift + y/x coords half-pixel shift + window+band + window+chunks (dask) + window+gpu (cupy) + window+gpu+chunks (dask+cupy). Mutation against the encode dispatch flipped 7 predictor tests red. Filed issue #1690. Pass 9 (2026-05-12): added test_kwarg_behaviour_2026_05_12.py closing three Cat 4 MEDIUM parameter-coverage gaps plus one Cat 4 LOW error path. write_vrt documented kwargs (relative/crs_wkt/nodata) had a smoke-test pinning that the kwargs are accepted but no test verified the override *effect* -- a regression dropping the override branch and silently using the default-from-first-source would ship undetected. read_geotiff_gpu(dtype=) cast had zero direct tests; the eager path has TestDtypeEager and dask has TestDtypeDask but the GPU branch had no equivalent. write_geotiff_gpu(bigtiff=) threads through to _assemble_tiff(force_bigtiff=) but no test asserted the on-disk header byte switches; the CPU writer had it via test_features::test_force_bigtiff_via_public_api. write_vrt(source_files=[]) ValueError was uncovered. 26 tests, all passing on GPU host: write_vrt relative=True/False XML attribute + path inspection + parse-back round-trip, write_vrt crs_wkt= override distinct-from-default XML check, write_vrt nodata= override + default-from-source coverage, write_vrt([]) ValueError + no-file side effect, read_geotiff_gpu dtype= matrix (float64->float32, float64->float16, uint16->int32, uint16->uint8, float-to-int raise, dtype=None preserves native), open_geotiff(gpu=True, dtype=) dispatcher, read_geotiff_gpu(chunks=, dtype=) dask+GPU branch, write_geotiff_gpu bigtiff=True/False/None header verification, to_geotiff(gpu=True, bigtiff=True) dispatcher thread-through. Pass 8 (2026-05-11): added test_lz4_compression_level_2026_05_11.py closing Cat 4 MEDIUM parameter-coverage gap on compression='lz4' + compression_level=. _LEVEL_RANGES advertises lz4: (0, 16) but only deflate (1, 9) and zstd (1, 22) had direct level boundary + round-trip + reject tests. The range check is the gatekeeper -- lz4_compress silently accepts any int level -- so a regression dropping 'lz4' from _LEVEL_RANGES would ship undetected. 18 tests, all passing: round-trip at levels 0/1/9/16 (lossless), default-level no-arg path, higher-level-not-larger smoke check on compressible input, out-of-range reject at -1/-10/17/100 on eager path, valid-range message format pin (lz4 valid: 0-16), dask streaming round-trip at 0/1/8/16, dask streaming out-of-range reject at -1/17/50 (separate _LEVEL_RANGES call site). Pass 7 (2026-05-11): added test_gpu_writer_compression_modes_2026_05_11.py closing Cat 4 HIGH gap on write_geotiff_gpu compression= modes. The writer documents zstd (default, fastest GPU), deflate, jpeg, and none, but only deflate + none had round-trip tests; the default zstd and the jpeg (nvJPEG/Pillow) paths shipped without targeted coverage. 11 new tests, all passing on GPU host: zstd round-trip + default-codec pinning, jpeg round-trip on 3-band RGB uint8 + 1-band greyscale, TIFF compression-tag header check across none/deflate/zstd/jpeg, plain deflate + none round-trips outside the COG/sentinel paths, and a cross-codec lossless parity check (zstd/deflate/none agree pixel-exact). nvJPEG path was exercised live, not just the Pillow fallback. Pass 6 (2026-05-11): added test_overview_resampling_min_max_median_2026_05_11.py covering Cat 4 HIGH parameter-coverage gap on overview_resampling=min/max/median. CPU end-to-end paths were already covered by test_cog_overview_nodata_1613::test_cpu_cog_overview_aggregations_ignore_sentinel; the GPU end-to-end paths and the direct CPU+GPU block-reducer branches had no targeted tests, so a regression on those code paths would ship undetected. 26 tests, all passing on GPU host: block-reducer unit tests (finite + partial-NaN), end-to-end COG writes for both to_geotiff and write_geotiff_gpu, CPU/GPU parity for to_geotiff(gpu=True), CPU nodata-sentinel regression check, and ValueError error-path tests for unknown method names on both backends. Pass 5 (2026-05-11): added test_degenerate_shapes_backends_2026_05_11.py covering Cat 3 HIGH geometric gaps (1x1 / 1xN / Nx1 reads on dask+numpy, GPU, dask+cupy backends; 1x1 / 1xN / Nx1 writes through write_geotiff_gpu) and Cat 2 MEDIUM NaN/Inf gaps (all-NaN read on GPU + dask+cupy, Inf / -Inf reads on all non-eager backends, NaN sentinel mask on dask read path including sentinel block split across chunk boundary). 23 tests, all passing on GPU host. Prior passes still hold: pass 4 (r4) closed read_geotiff_gpu/dask name= + max_pixels= kwargs (Cat 4), pass 3 (r3) closed read_vrt GPU/dask+GPU backend dispatch (Cat 1) and dtype/name kwargs (Cat 4)." +polygonize,2026-05-27,2537,MEDIUM,4,"Pass 2 (2026-05-27): added test_polygonize_atol_rtol_backend_coverage_2026_05_27.py with 15 tests, all passing on a CUDA host. Closes Cat 4 MEDIUM parameter-coverage gap on atol/rtol forwarding through the cupy and dask+cupy backends. atol/rtol were exposed by #2173 / #2194 and thread through _polygonize_cupy (polygonize.py:808) and _polygonize_dask (polygonize.py:1719); the dask path further plumbs them into dask.delayed(_polygonize_chunk)(...) at lines 1748-1754 and into _bucket_key_for_value for cross-chunk merge bucketing at lines 1757-1758. Pre-existing tests covered non-default atol/rtol only on numpy and dask+numpy. The cupy and dask+cupy dispatchers were untested -- a regression dropping the kwargs there would silently change the float polygon count and would not be caught. Same dispatcher-silently-drops-kwarg pattern fixed by #1561 / #1605 / #1685 / #1810 / #1974 on adjacent GeoTIFF surfaces. 15 tests: cupy strict-equality + default-tolerance pin on _REPRO_2173, dask+cupy strict-equality single-chunk + multi-chunk (engages cross-chunk merge bucket) + default-tolerance multi-chunk pin, cupy intermediate-atol small/large pair, dask+cupy intermediate-atol single/multi-chunk small + single-chunk large, cupy integer atol-ignored matrix, dask+cupy integer atol-ignored single-chunk + multi-chunk, cupy rtol-only large/small matrix. Mutation against _polygonize_cupy float branch (drop atol/rtol kwargs in the _polygonize_numpy forward call at polygonize.py:823-825) flips 3 of 5 cupy tests red; mutation against dask.delayed(_polygonize_chunk)(...) at polygonize.py:1748-1754 (drop atol, rtol args) flips 2 of 6 dask+cupy tests red. Confirmed clean restore via md5sum. Source untouched. Filed issue #2537 (test-only). Cat 4 MEDIUM (parameter coverage on cupy + dask+cupy atol/rtol forwarding). Pass 1 (2026-05-19): added test_polygonize_coverage_2026_05_19.py with 58 tests, all passing on a CUDA host. Closes Cat 3 HIGH 1x1 / Nx1 single-column geometric gaps (Nx1 exercises the nx==1 padding path at polygonize.py:565 and the cupy nx==1 numpy-fallback at polygonize.py:671), Cat 3 MEDIUM 1xN single-row and all-equal-value rasters on all four backends. Closes Cat 2 HIGH NaN parity for cupy + dask+cupy (numpy/dask were already covered by test_polygonize_nan_pixels_excluded*), Cat 2 MEDIUM all-NaN raster on all four backends, Cat 2 HIGH +/-Inf pins on all four backends. Filed source-bug issue #2155: numpy/dask/dask+cupy backends silently absorb Inf cells into adjacent finite polygons because _is_close reduces abs(inf-inf) to nan; cupy backend handles Inf correctly. Pins lock the asymmetric behaviour so the fix is visible. Closes Cat 1 MEDIUM simplify_tolerance + mask= parity gaps on dask+cupy backend (numpy/cupy/dask were already covered). Closes Cat 4 MEDIUM column_name non-default value across geopandas/spatialpandas/geojson return types and Cat 4 MEDIUM validation error paths (bad connectivity, bad transform length, mask shape mismatch, mask underlying-type mismatch). Cat 5 N/A: polygonize returns lists/dataframes, not a DataArray with attrs to propagate." +rasterize,2026-05-21,2255,HIGH,1;2;3,"Pass 2 (2026-05-21): added test_rasterize_coverage_2026_05_21.py with 58 tests, all passing on a CUDA host. Closes Cat 2 HIGH +/-Inf and NaN burn-value gaps that pass-1 left untouched: pin +Inf / -Inf / Inf+(-Inf)/NaN polygon, point, and line burn behaviour across numpy / cupy / dask+numpy / dask+cupy, plus Inf+finite under sum stays Inf, Inf+(-Inf) under sum collapses to NaN, min(Inf, 1.0) and max(-Inf, 1.0) pick the finite value, and Inf-as-bound is rejected with the same ValueError as NaN-as-bound (pass-1 only tested the NaN-bound rejection). Closes Cat 1 MEDIUM nested GeometryCollection on all four backends: a GC inside a GC has no direct test today even though rasterize.py:1995 documents recursive unpacking, and the deeply-nested-3-levels eager test pins the recursion depth limit isn't 1 or 2. Closes Cat 1 MEDIUM columns= (multi-column) parity on cupy and dask+cupy (TestMultiColumn covered numpy/dask+numpy only); pin three columns of props on GPU so the (N, P) loop survives the kernel boundary. Closes Cat 3 LOW rectangular-pixel parity with resolution=(rx, ry) across backends. Filed source-bug issue #2255: GPU max/min merge silently suppresses NaN burn values -- CPU returns NaN (1.0 > NaN is False, keeps NaN); GPU returns 1.0 because the kernel inits the output buffer to -inf for max (or +inf for min) and atomicMax/Min is NaN-suppressing under IEEE device semantics. Pinned both the CPU NaN-propagating behaviour and the GPU NaN-suppressing behaviour as paired tests (test_nan_burn_overlaps_max_cpu_propagates vs test_nan_burn_overlaps_max_gpu_suppresses_nan, plus test_nan_burn_single_geom_max_gpu_returns_neg_inf for the single-write-on-GPU-returns-buffer-init case) so the divergence is visible in CI until the GPU kernels are aligned. Source untouched. Pass 1 (2026-05-17): added test_rasterize_coverage_2026_05_17.py with 34 tests, all passing on a CUDA host. Closes four documented public-API gaps left after the pass-0 audit. (1) Cat 3 HIGH 1x1 single-pixel raster -- test_rasterize.py covers 1xN strips and Nx1 strips but never width=1 AND height=1, so the polygon scanline / line Bresenham / point burn kernels all ship without the single-cell degenerate case; the new TestSinglePixelRaster class pins polygon/point/line on eager numpy plus polygon parity across cupy / dask+numpy / dask+cupy. (2) Cat 4 HIGH like= template-raster parameter is documented at rasterize.py:2038 and implemented by _extract_grid_from_like (line 1930) but no test exercises it; TestLikeParameter pins dtype/bounds/coords inheritance, the three override branches (dtype, bounds, width/height), the three validation branches (not-DataArray, 3D, wrong dim names) and like= on all four backends. Mutation against the like-dtype branch (rasterize.py:2183-2184) flipped the inheritance test red. (3) Cat 4 HIGH resolution= happy path -- only the oversize-rejection error path was tested (line 304); TestResolutionParameter pins the scalar branch, the tuple branch, the ceil-and-clamp-to-1 semantics, and resolution= on all four backends. (4) Cat 4 HIGH non-empty GeometryCollection unpacking is documented at rasterize.py:1995 and implemented by _classify_geometries_loop (line 228) but only the empty-GC case was tested (line 269); TestGeometryCollection pins polygon+point and polygon+line+point collections on eager numpy plus parity across cupy / dask+numpy / dask+cupy so the loop classifier's polygon/line/point sub-bucketing has direct coverage. Cat 1 MEDIUM gap closed: eager cupy all_touched=True parity vs eager numpy (TestEagerCupyAllTouched) -- the existing test only covered dask+cupy all_touched, leaving the direct GPU all_touched kernel untested. Cat 2 MEDIUM gap closed: int32 dtype with default NaN fill silently casts to the int32-min sentinel (TestIntegerDtypeNanFill) -- pin the cast so any future ValueError-raises switch is visible as a code-review diff. Pre-existing 143 passing + 2 skipped tests in test_rasterize.py untouched." +reproject,2026-05-10,,HIGH,1;4;5,"Added 39 tests: LiteCRS direct coverage, itrf_transform behaviour/roundtrip/array, itrf_frames, geoid_height numerical correctness + raster happy-path, vertical helpers (ellipsoidal<->orthometric/depth), reproject() lat/lon and latitude/longitude dim propagation. Note: _merge_arrays_cupy is imported but unused (no cupy merge dispatch in merge()); flagged as feature gap not test gap." +zonal,2026-05-27,,HIGH,1;3;4;5,"Pass 1 (2026-05-27): added test_zonal_backend_coverage_2026_05_27.py with 32 tests, all passing on a CUDA host. Closes Cat 1 HIGH backend-coverage gaps: crosstab cupy + dask+cupy (_crosstab_cupy / _crosstab_dask_cupy were dispatched but never invoked by tests), regions cupy + dask+cupy (_regions_cupy via cupyx.scipy.ndimage + _regions_dask_cupy), trim dask+numpy + cupy + dask+cupy (_trim_bounds_dask isnan path and cupy data.get() path), crop dask+numpy + cupy + dask+cupy (_crop_bounds_dask + cupy data.get() path), apply 3D cupy + dask+cupy (per-layer kernel launch over the third axis in _apply_cupy and _apply_dask_cupy). Existing test_zonal.py covered only numpy + dask+numpy for crosstab/regions/trim/crop and 2D-only for cupy apply. Closes Cat 3 MEDIUM 1x1 / 1xN / Nx1 strip edge cases for trim, crop, and regions. Closes Cat 4 LOW pins: regions(neighborhood=6) ValueError, suggest_zonal_canvas(crs='Geographic') aspect-ratio pin and invalid-crs KeyError, crosstab cupy zone_ids/cat_ids filter, crosstab cupy agg='percentage'. Closes Cat 5 MEDIUM: regions coords/attrs propagation across numpy + dask+numpy, trim/crop name='trim'/'crop' default + attrs preservation. Also pins the documented numpy-vs-dask trim asymmetry on NaN sentinel (numpy _trim does equality which never matches NaN; dask _trim_bounds_dask has dedicated isnan branch). Mutation against the cupy.asnumpy() conversion in _crosstab_cupy flipped test_crosstab_cupy_matches_numpy red. Source untouched." diff --git a/xrspatial/tests/test_zonal_backend_coverage_2026_05_27.py b/xrspatial/tests/test_zonal_backend_coverage_2026_05_27.py new file mode 100644 index 00000000..aed729bf --- /dev/null +++ b/xrspatial/tests/test_zonal_backend_coverage_2026_05_27.py @@ -0,0 +1,692 @@ +"""Backend coverage tests for xrspatial.zonal. + +Closes HIGH-severity backend-coverage gaps surfaced by the test-coverage +sweep on 2026-05-27 (deep-sweep-test-coverage-zonal-2026-05-27). + +Module ``xrspatial/zonal.py`` registers four-backend dispatchers +(``ArrayTypeFunctionMapping``) for ``crosstab``, ``regions``, ``apply`` +(3D), and also has dedicated cupy / dask branches inside ``trim`` and +``crop``. Prior to this file the existing ``test_zonal.py`` only +exercised the numpy + dask+numpy paths for crosstab/regions/trim/crop +and the 2D-only path for apply on cupy backends. A regression on the +``_crosstab_cupy``, ``_crosstab_dask_cupy``, ``_regions_cupy``, +``_regions_dask_cupy``, ``_trim_bounds_dask``, ``_crop_bounds_dask``, +or 3D cupy ``apply`` branches would ship undetected. + +Tests in this file: + +- Cat 1 HIGH: crosstab cupy + dask+cupy parity vs numpy +- Cat 1 HIGH: regions cupy + dask+cupy parity vs numpy +- Cat 1 HIGH: trim cupy + dask+numpy + dask+cupy parity vs numpy +- Cat 1 HIGH: crop cupy + dask+numpy + dask+cupy parity vs numpy +- Cat 1 HIGH: apply 3D cupy + dask+cupy parity vs numpy +- Cat 3 MEDIUM: 1x1 single-pixel raster on regions/trim/crop +- Cat 3 MEDIUM: Nx1 / 1xN strip on regions/trim +- Cat 4 LOW: regions invalid neighborhood ValueError pin +- Cat 4 LOW: suggest_zonal_canvas Geographic CRS pin +- Cat 5 MEDIUM: trim/crop attrs preservation + +CUDA was available on the host when this file was authored (2026-05-27); +tests parametrized over ``cupy``/``dask+cupy`` execute live unless the +GPU-skip decorator skips them on a non-CUDA host. +""" +from __future__ import annotations + +import numpy as np +import pandas as pd +import pytest +import xarray as xr + +try: + import dask.array as da +except ImportError: + da = None + +from xrspatial.zonal import ( + apply, crop, crosstab, regions, suggest_zonal_canvas, trim, +) + +from .general_checks import ( + create_test_raster, cuda_and_cupy_available, dask_array_available, + has_dask_array, +) + +# Local alias kept for readability at decorator sites in this file. +dask_required = dask_array_available + + +def _to_numpy(arr): + """Bring a DataArray-or-array result to a numpy array.""" + if hasattr(arr, 'data'): + arr = arr.data + if da is not None and isinstance(arr, da.Array): + arr = arr.compute() + if hasattr(arr, 'get'): # cupy + arr = arr.get() + return np.asarray(arr) + + +def _to_pandas(df): + """Bring a (dask) DataFrame to pandas.""" + if hasattr(df, 'compute'): + return df.compute() + return df + + +def _canonical_labels(a): + """Re-label a region map so labels are assigned in raster-scan order. + + scipy.ndimage.label and cupyx.scipy.ndimage.label may emit labels in + different orders for the same input. Canonicalising lets the parity + tests compare cell partitions, not raw label values. + """ + out = np.full_like(a, -1, dtype=np.int64) + seen = {} + next_id = 0 + flat = a.ravel() + for i, v in enumerate(flat): + if not np.isfinite(v): + continue + key = float(v) + if key not in seen: + seen[key] = next_id + next_id += 1 + out.ravel()[i] = seen[key] + return out + + +# --------------------------------------------------------------------------- +# Cat 1 HIGH -- crosstab backend coverage (cupy / dask+cupy) +# --------------------------------------------------------------------------- + +@pytest.fixture +def crosstab_zones_values(): + """Small zones + values fixture for crosstab parity tests.""" + zones = np.array( + [[1, 1, 2, 2], + [1, 1, 2, 2], + [3, 3, 4, 4]], + dtype=np.float64, + ) + values = np.array( + [[10, 10, 20, 20], + [10, 20, 20, 30], + [30, 30, 10, 10]], + dtype=np.float64, + ) + return zones, values + + +@cuda_and_cupy_available +def test_crosstab_cupy_matches_numpy(crosstab_zones_values): + """_crosstab_cupy parity vs _crosstab_numpy (Cat 1 HIGH).""" + import cupy as cp + zones_np, values_np = crosstab_zones_values + + zones_n = xr.DataArray(zones_np, dims=['y', 'x']) + values_n = xr.DataArray(values_np, dims=['y', 'x']) + df_np = crosstab(zones_n, values_n) + + zones_c = xr.DataArray(cp.asarray(zones_np), dims=['y', 'x']) + values_c = xr.DataArray(cp.asarray(values_np), dims=['y', 'x']) + df_cp = crosstab(zones_c, values_c) + + # cupy path returns a pandas DataFrame after _crosstab_cupy converts back + assert isinstance(df_cp, pd.DataFrame) + pd.testing.assert_frame_equal( + df_cp.sort_values('zone').reset_index(drop=True), + df_np.sort_values('zone').reset_index(drop=True), + check_dtype=False, + ) + + +@cuda_and_cupy_available +@dask_required +def test_crosstab_dask_cupy_matches_numpy(crosstab_zones_values): + """_crosstab_dask_cupy parity vs _crosstab_numpy (Cat 1 HIGH).""" + import cupy as cp + zones_np, values_np = crosstab_zones_values + + zones_n = xr.DataArray(zones_np, dims=['y', 'x']) + values_n = xr.DataArray(values_np, dims=['y', 'x']) + df_np = crosstab(zones_n, values_n) + + zones_dc = xr.DataArray( + da.from_array(cp.asarray(zones_np), chunks=(2, 2)), + dims=['y', 'x'], + ) + values_dc = xr.DataArray( + da.from_array(cp.asarray(values_np), chunks=(2, 2)), + dims=['y', 'x'], + ) + df_dc = _to_pandas(crosstab(zones_dc, values_dc)) + + pd.testing.assert_frame_equal( + df_dc.sort_values('zone').reset_index(drop=True), + df_np.sort_values('zone').reset_index(drop=True), + check_dtype=False, + ) + + +@cuda_and_cupy_available +def test_crosstab_cupy_percentage(crosstab_zones_values): + """crosstab cupy with agg='percentage' (Cat 4 MEDIUM).""" + import cupy as cp + zones_np, values_np = crosstab_zones_values + + zones_n = xr.DataArray(zones_np, dims=['y', 'x']) + values_n = xr.DataArray(values_np, dims=['y', 'x']) + df_np = crosstab(zones_n, values_n, agg='percentage') + + zones_c = xr.DataArray(cp.asarray(zones_np), dims=['y', 'x']) + values_c = xr.DataArray(cp.asarray(values_np), dims=['y', 'x']) + df_cp = crosstab(zones_c, values_c, agg='percentage') + + pd.testing.assert_frame_equal( + df_cp.sort_values('zone').reset_index(drop=True), + df_np.sort_values('zone').reset_index(drop=True), + check_dtype=False, + ) + + +# --------------------------------------------------------------------------- +# Cat 1 HIGH -- regions backend coverage (cupy / dask+cupy) +# --------------------------------------------------------------------------- + +@pytest.fixture +def regions_input(): + return np.array( + [[1, 1, 0, 2, 2], + [1, 1, 0, 2, 2], + [0, 0, 0, 0, 0], + [3, 3, 0, 3, 3], + [3, 3, 0, 3, 3]], + dtype=np.float64, + ) + + +@cuda_and_cupy_available +def test_regions_cupy_matches_numpy(regions_input): + """_regions_cupy parity vs _regions_numpy (Cat 1 HIGH).""" + import cupy as cp + + arr_np = xr.DataArray(regions_input, dims=['y', 'x']) + arr_cp = xr.DataArray(cp.asarray(regions_input), dims=['y', 'x']) + + out_np = _to_numpy(regions(arr_np, neighborhood=4)) + out_cp = _to_numpy(regions(arr_cp, neighborhood=4)) + + # Labels may differ between scipy / cupyx; partitions must match. + np.testing.assert_array_equal( + _canonical_labels(out_np), _canonical_labels(out_cp), + ) + + +@cuda_and_cupy_available +@dask_required +def test_regions_dask_cupy_matches_numpy(regions_input): + """_regions_dask_cupy parity vs _regions_numpy (Cat 1 HIGH).""" + import cupy as cp + + arr_np = xr.DataArray(regions_input, dims=['y', 'x']) + arr_dc = xr.DataArray( + da.from_array(cp.asarray(regions_input), chunks=(3, 3)), + dims=['y', 'x'], + ) + + out_np = _to_numpy(regions(arr_np, neighborhood=4)) + out_dc = _to_numpy(regions(arr_dc, neighborhood=4)) + + np.testing.assert_array_equal( + _canonical_labels(out_np), _canonical_labels(out_dc), + ) + + +@cuda_and_cupy_available +def test_regions_cupy_eight_connectivity(): + """8-connectivity branch of _regions_cupy (Cat 4 MEDIUM).""" + import cupy as cp + + diag = np.array( + [[1, 0, 1], + [0, 1, 0], + [1, 0, 1]], + dtype=np.float64, + ) + arr_cp = xr.DataArray(cp.asarray(diag), dims=['y', 'x']) + out = _to_numpy(regions(arr_cp, neighborhood=8)) + # 8-connected: all 1s merge into one region, all 0s into another + finite_labels = out[np.isfinite(out)] + assert len(np.unique(finite_labels)) == 2 + + +def test_regions_invalid_neighborhood_raises(): + """Cat 4 LOW: regions(neighborhood=6) must raise ValueError.""" + arr = xr.DataArray(np.array([[1, 1], [1, 1]], dtype=np.float64), dims=['y', 'x']) + with pytest.raises(ValueError, match="neighborhood"): + regions(arr, neighborhood=6) + + +# --------------------------------------------------------------------------- +# Cat 1 HIGH -- trim backend coverage (cupy / dask+numpy / dask+cupy) +# --------------------------------------------------------------------------- + +@pytest.fixture +def trim_input(): + return np.array( + [[0, 0, 0, 0], + [0, 4, 0, 0], + [0, 4, 4, 0], + [0, 1, 1, 0], + [0, 0, 0, 0]], + dtype=np.float64, + ) + + +@cuda_and_cupy_available +def test_trim_cupy_matches_numpy(trim_input): + """trim cupy branch (data.get() + _trim) parity (Cat 1 HIGH).""" + import cupy as cp + + arr_np = xr.DataArray(trim_input, dims=['y', 'x']) + arr_cp = xr.DataArray(cp.asarray(trim_input), dims=['y', 'x']) + + out_np = trim(arr_np, values=(0.0,)) + out_cp = trim(arr_cp, values=(0.0,)) + + assert out_cp.shape == out_np.shape == (3, 2) + np.testing.assert_array_equal(_to_numpy(out_cp), _to_numpy(out_np)) + + +@dask_required +def test_trim_dask_numpy_matches_numpy(trim_input): + """trim dask path (_trim_bounds_dask) parity (Cat 1 HIGH).""" + arr_np = xr.DataArray(trim_input, dims=['y', 'x']) + arr_da = xr.DataArray( + da.from_array(trim_input, chunks=(3, 2)), dims=['y', 'x'], + ) + + out_np = trim(arr_np, values=(0.0,)) + out_da = trim(arr_da, values=(0.0,)) + + assert out_da.shape == out_np.shape == (3, 2) + np.testing.assert_array_equal(_to_numpy(out_da), _to_numpy(out_np)) + + +@cuda_and_cupy_available +@dask_required +def test_trim_dask_cupy_matches_numpy(trim_input): + """trim dask+cupy parity (Cat 1 HIGH).""" + import cupy as cp + + arr_np = xr.DataArray(trim_input, dims=['y', 'x']) + arr_dc = xr.DataArray( + da.from_array(cp.asarray(trim_input), chunks=(3, 2)), + dims=['y', 'x'], + ) + + out_np = trim(arr_np, values=(0.0,)) + out_dc = trim(arr_dc, values=(0.0,)) + + assert out_dc.shape == out_np.shape == (3, 2) + np.testing.assert_array_equal(_to_numpy(out_dc), _to_numpy(out_np)) + + +@dask_required +def test_trim_dask_nan_values(trim_input): + """Cat 2 HIGH: dask trim with default NaN sentinel. + + The dask branch ``_trim_bounds_dask`` has a dedicated ``isnan`` path + (zonal.py:2287) that the numpy backend lacks: numpy's ``_trim`` uses + a plain equality check, which never matches NaN. This test pins the + backend asymmetry so any future change is visible as a diff. + """ + arr_with_nan = np.where(trim_input == 0, np.nan, trim_input) + + arr_np = xr.DataArray(arr_with_nan, dims=['y', 'x']) + arr_da = xr.DataArray( + da.from_array(arr_with_nan, chunks=(3, 2)), dims=['y', 'x'], + ) + + # Dask path trims the all-NaN frame to the bounding box of finite + # data. Interior NaNs (the original 0 in the middle of row 1) are + # preserved. + out_da = trim(arr_da, values=(np.nan,)) + out_da_np = _to_numpy(out_da) + assert out_da_np.shape == (3, 2) + # bounding box covers rows 1-3, cols 1-2 of the input + expected = np.array([[4.0, np.nan], + [4.0, 4.0], + [1.0, 1.0]]) + np.testing.assert_array_equal(out_da_np, expected) + + # numpy path doesn't match NaN with equality, so the result is + # unchanged. Pin this asymmetry so a future change is visible. + out_np_nan = trim(arr_np, values=(np.nan,)) + assert out_np_nan.shape == arr_np.shape + + +def test_trim_preserves_name_attribute(): + """Cat 5 MEDIUM: trim should set name to 'trim' by default.""" + arr = xr.DataArray( + np.array([[0, 0, 0], [0, 5, 0], [0, 0, 0]], dtype=np.float64), + dims=['y', 'x'], + attrs={'res': (1.0, 1.0), 'crs': 'EPSG:4326'}, + ) + out = trim(arr, values=(0.0,)) + assert out.name == 'trim' + # attrs propagated from input + assert out.attrs.get('res') == (1.0, 1.0) + assert out.attrs.get('crs') == 'EPSG:4326' + + +# --------------------------------------------------------------------------- +# Cat 1 HIGH -- crop backend coverage (cupy / dask+numpy / dask+cupy) +# --------------------------------------------------------------------------- + +@pytest.fixture +def crop_input(): + arr = np.array( + [[0, 4, 0, 3], + [0, 4, 4, 3], + [0, 1, 1, 3], + [0, 1, 1, 3], + [0, 0, 0, 0]], + dtype=np.float64, + ) + return arr + + +@cuda_and_cupy_available +def test_crop_cupy_matches_numpy(crop_input): + """crop cupy branch (data.get() + _crop) parity (Cat 1 HIGH).""" + import cupy as cp + + zones_np = xr.DataArray(crop_input, dims=['y', 'x']) + zones_cp = xr.DataArray(cp.asarray(crop_input), dims=['y', 'x']) + + out_np = crop(zones_np, zones_np, zone_ids=(1.0, 3.0)) + out_cp = crop(zones_cp, zones_cp, zone_ids=(1.0, 3.0)) + + assert out_cp.shape == out_np.shape == (4, 3) + np.testing.assert_array_equal(_to_numpy(out_cp), _to_numpy(out_np)) + + +@dask_required +def test_crop_dask_numpy_matches_numpy(crop_input): + """crop dask path (_crop_bounds_dask) parity (Cat 1 HIGH).""" + zones_np = xr.DataArray(crop_input, dims=['y', 'x']) + zones_da = xr.DataArray( + da.from_array(crop_input, chunks=(3, 2)), dims=['y', 'x'], + ) + + out_np = crop(zones_np, zones_np, zone_ids=(1.0, 3.0)) + out_da = crop(zones_da, zones_da, zone_ids=(1.0, 3.0)) + + assert out_da.shape == out_np.shape == (4, 3) + np.testing.assert_array_equal(_to_numpy(out_da), _to_numpy(out_np)) + + +@cuda_and_cupy_available +@dask_required +def test_crop_dask_cupy_matches_numpy(crop_input): + """crop dask+cupy parity (Cat 1 HIGH).""" + import cupy as cp + + zones_np = xr.DataArray(crop_input, dims=['y', 'x']) + zones_dc = xr.DataArray( + da.from_array(cp.asarray(crop_input), chunks=(3, 2)), + dims=['y', 'x'], + ) + values_dc = xr.DataArray( + da.from_array(cp.asarray(crop_input), chunks=(3, 2)), + dims=['y', 'x'], + ) + + out_np = crop(zones_np, zones_np, zone_ids=(1.0, 3.0)) + out_dc = crop(zones_dc, values_dc, zone_ids=(1.0, 3.0)) + + assert out_dc.shape == out_np.shape == (4, 3) + np.testing.assert_array_equal(_to_numpy(out_dc), _to_numpy(out_np)) + + +def test_crop_preserves_name_attribute(crop_input): + """Cat 5 MEDIUM: crop should set name to 'crop' by default.""" + arr = xr.DataArray( + crop_input, + dims=['y', 'x'], + attrs={'res': (1.0, 1.0), 'crs': 'EPSG:4326'}, + ) + out = crop(arr, arr, zone_ids=(1.0, 3.0)) + assert out.name == 'crop' + # attrs propagated from input values + assert out.attrs.get('res') == (1.0, 1.0) + + +# --------------------------------------------------------------------------- +# Cat 1 HIGH -- apply 3D cupy / dask+cupy backend coverage +# --------------------------------------------------------------------------- + +@cuda_and_cupy_available +def test_apply_3d_cupy(): + """apply on 3D values with cupy backend (Cat 1 HIGH). + + Exercises the 3D branch of _apply_cupy (zonal.py:1655-1660): per-layer + kernel launch over the third axis. + """ + import cupy as cp + + zones_data = np.array([[1, 0], [0, 2]], dtype=np.int32) + values_data = np.ones((2, 2, 3), dtype=np.float64) * 5.0 + + zones = xr.DataArray(cp.asarray(zones_data), dims=['y', 'x']) + vals = xr.DataArray(cp.asarray(values_data), dims=['y', 'x', 'band']) + + result = apply(zones, vals, lambda x: x + 10, nodata=0) + result_np = _to_numpy(result) + + # Zone-1 cell and zone-2 cell incremented + np.testing.assert_array_equal(result_np[0, 0, :], [15.0, 15.0, 15.0]) + np.testing.assert_array_equal(result_np[1, 1, :], [15.0, 15.0, 15.0]) + # nodata cells stay at 5.0 + np.testing.assert_array_equal(result_np[0, 1, :], [5.0, 5.0, 5.0]) + np.testing.assert_array_equal(result_np[1, 0, :], [5.0, 5.0, 5.0]) + + +@cuda_and_cupy_available +@dask_required +def test_apply_3d_dask_cupy(): + """apply on 3D values with dask+cupy backend (Cat 1 HIGH). + + Exercises the 3D branch of _apply_dask_cupy (zonal.py:1722-1731): per-layer + map_blocks + da.stack. + """ + import cupy as cp + + zones_data = np.array([[1, 0], [0, 2]], dtype=np.int32) + values_data = np.ones((2, 2, 3), dtype=np.float64) * 5.0 + + zones = xr.DataArray( + da.from_array(cp.asarray(zones_data), chunks=(2, 2)), + dims=['y', 'x'], + ) + vals = xr.DataArray( + da.from_array(cp.asarray(values_data), chunks=(2, 2, 3)), + dims=['y', 'x', 'band'], + ) + + result = apply(zones, vals, lambda x: x + 10, nodata=0) + result_np = _to_numpy(result) + + np.testing.assert_array_equal(result_np[0, 0, :], [15.0, 15.0, 15.0]) + np.testing.assert_array_equal(result_np[1, 1, :], [15.0, 15.0, 15.0]) + np.testing.assert_array_equal(result_np[0, 1, :], [5.0, 5.0, 5.0]) + np.testing.assert_array_equal(result_np[1, 0, :], [5.0, 5.0, 5.0]) + + +# --------------------------------------------------------------------------- +# Cat 3 MEDIUM -- 1x1 single-pixel raster edge cases +# --------------------------------------------------------------------------- + +@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy']) +def test_trim_single_pixel(backend): + """Cat 3 MEDIUM: trim on a 1x1 raster (no padding to trim).""" + if 'dask' in backend and not has_dask_array(): + pytest.skip("Requires dask.array") + + arr = create_test_raster(np.array([[5.0]]), backend, chunks=(1, 1)) + out = trim(arr, values=(0.0,)) + assert out.shape == (1, 1) + np.testing.assert_array_equal(_to_numpy(out), np.array([[5.0]])) + + +@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy']) +def test_crop_single_pixel(backend): + """Cat 3 MEDIUM: crop on a 1x1 raster.""" + if 'dask' in backend and not has_dask_array(): + pytest.skip("Requires dask.array") + + arr = create_test_raster(np.array([[3.0]]), backend, chunks=(1, 1)) + out = crop(arr, arr, zone_ids=(3.0,)) + assert out.shape == (1, 1) + np.testing.assert_array_equal(_to_numpy(out), np.array([[3.0]])) + + +@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy']) +def test_regions_strip_1xN(backend): + """Cat 3 MEDIUM: regions on a 1xN strip.""" + if 'dask' in backend and not has_dask_array(): + pytest.skip("Requires dask.array") + + arr = create_test_raster( + np.array([[1.0, 1.0, 0.0, 2.0, 2.0]]), backend, chunks=(1, 3), + ) + out = regions(arr, neighborhood=4) + out_np = _to_numpy(out) + # three connected regions: the two 1s, the zero, the two 2s + assert len(np.unique(out_np[np.isfinite(out_np)])) == 3 + + +@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy']) +def test_regions_strip_Nx1(backend): + """Cat 3 MEDIUM: regions on an Nx1 strip.""" + if 'dask' in backend and not has_dask_array(): + pytest.skip("Requires dask.array") + + arr = create_test_raster( + np.array([[1.0], [1.0], [0.0], [2.0], [2.0]]), + backend, chunks=(3, 1), + ) + out = regions(arr, neighborhood=4) + out_np = _to_numpy(out) + # three connected regions + assert len(np.unique(out_np[np.isfinite(out_np)])) == 3 + + +# --------------------------------------------------------------------------- +# Cat 4 LOW -- suggest_zonal_canvas Geographic CRS pin +# --------------------------------------------------------------------------- + +def test_suggest_zonal_canvas_geographic_crs(): + """Cat 4 LOW: pin Geographic CRS branch of suggest_zonal_canvas. + + Geographic uses a 2:1 (x:y) aspect ratio (extent ±180 / ±90). This + test pins that ratio so a regression that changes the extent surfaces. + """ + h_g, w_g = suggest_zonal_canvas( + smallest_area=1.0, + x_range=(-10.0, 10.0), + y_range=(-5.0, 5.0), + crs='Geographic', + min_pixels=25, + ) + assert isinstance(h_g, int) and isinstance(w_g, int) + assert h_g > 0 and w_g > 0 + + # Geographic has 2:1 (x:y) aspect ratio -> width/height should land + # near 4x (because input x_range = 2*y_range and aspect_ratio = 2) + # The exact math: full_aspect_ratio = (360/180) = 2, + # h = sqrt(total_pixels / 2), w = 2*h. + # Then canvas_h *= y_range/180 (= 10/180), canvas_w *= x_range/360 (= 20/360). + # Both fractions are 1/18, so canvas_w / canvas_h = w/h = 2. + assert w_g == 2 * h_g + + +def test_suggest_zonal_canvas_invalid_crs_raises(): + """Cat 4 LOW: invalid CRS triggers KeyError in CRS lookup.""" + with pytest.raises(KeyError): + suggest_zonal_canvas( + smallest_area=1.0, + x_range=(0.0, 1.0), + y_range=(0.0, 1.0), + crs='NotAValidProjection', + ) + + +# --------------------------------------------------------------------------- +# Cat 4 MEDIUM -- crosstab parameter coverage on cupy backend +# --------------------------------------------------------------------------- + +@cuda_and_cupy_available +def test_crosstab_cupy_with_zone_ids_filter(): + """crosstab cupy with zone_ids subset (Cat 4 MEDIUM).""" + import cupy as cp + + zones_np = np.array( + [[1, 1, 2, 2], + [1, 1, 2, 2], + [3, 3, 4, 4]], + dtype=np.float64, + ) + values_np = np.array( + [[10, 10, 20, 20], + [10, 20, 20, 30], + [30, 30, 10, 10]], + dtype=np.float64, + ) + + zones_c = xr.DataArray(cp.asarray(zones_np), dims=['y', 'x']) + values_c = xr.DataArray(cp.asarray(values_np), dims=['y', 'x']) + df_cp = crosstab(zones_c, values_c, zone_ids=[1.0, 3.0]) + + # filtered to zones 1 and 3 only + assert set(df_cp['zone'].tolist()) == {1.0, 3.0} + + +@cuda_and_cupy_available +def test_crosstab_cupy_with_cat_ids_filter(): + """crosstab cupy with cat_ids subset (Cat 4 MEDIUM).""" + import cupy as cp + + zones_np = np.array([[1, 1, 2, 2], [1, 1, 2, 2]], dtype=np.float64) + values_np = np.array([[10, 20, 30, 40], [10, 20, 30, 40]], dtype=np.float64) + + zones_c = xr.DataArray(cp.asarray(zones_np), dims=['y', 'x']) + values_c = xr.DataArray(cp.asarray(values_np), dims=['y', 'x']) + df_cp = crosstab(zones_c, values_c, cat_ids=[10.0, 30.0]) + + cols = set(df_cp.columns) - {'zone'} + assert cols == {10.0, 30.0} + + +# --------------------------------------------------------------------------- +# Cat 5 MEDIUM -- regions coords / attrs propagation +# --------------------------------------------------------------------------- + +@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy']) +def test_regions_preserves_coords_and_attrs(backend, regions_input): + """Cat 5 MEDIUM: regions must propagate coords + attrs.""" + if 'dask' in backend and not has_dask_array(): + pytest.skip("Requires dask.array") + + arr = create_test_raster( + regions_input, backend, + attrs={'res': (0.5, 0.5), 'crs': 'EPSG:4326', 'custom': 'tag'}, + chunks=(3, 3), + ) + out = regions(arr, neighborhood=4) + + assert out.dims == arr.dims + assert out.attrs == arr.attrs + for coord in arr.coords: + np.testing.assert_allclose(out[coord].data, arr[coord].data)