diff --git a/xrspatial/geotiff/tests/parity/test_finalization.py b/xrspatial/geotiff/tests/parity/test_finalization.py new file mode 100644 index 00000000..46537ecf --- /dev/null +++ b/xrspatial/geotiff/tests/parity/test_finalization.py @@ -0,0 +1,1102 @@ +"""Cross-backend parity for the read finalization pipeline (epic #2162). + +Sibling to ``parity/test_backend_matrix.py``. Where ``test_backend_matrix`` +asserts pixel/coord/attrs parity over a fixture matrix, this file pins the +shared read-finalization plumbing that wave 2 of #2162 centralised into +helpers in ``_attrs`` / ``_validation``. Three sections, each a former +top-level file: + +Section 1 -- Dispatcher kwarg parity (#2175) + ``_validate_dispatch_kwargs`` runs at the top of every public read + entry point so ``overview_level``, ``max_cloud_bytes``, + ``missing_sources``, ``band_nodata``, ``on_gpu_failure``, and the + file-like-source guard reject identically across ``open_geotiff`` / + ``read_geotiff_dask`` / ``read_geotiff_gpu`` / ``read_vrt``. + +Section 2 -- Eager finalization parity (#2179) + ``_finalize_eager_read`` stamps the same nodata / georef attrs on the + eager numpy and eager GPU paths. The matrix walks float / int / + out-of-range sentinels, ``mask_nodata=False``, no-sentinel, + explicit ``dtype=``, windowed reads, MinIsWhite, and multi-band. + +Section 3 -- Lazy finalization parity (#2178) + ``_finalize_lazy_read_attrs`` stamps the same attrs on the two dask + backends (``read_geotiff_dask`` and the dask branch of + ``read_geotiff_gpu``). Covers the five georef states plus the + ``nodata_pixels_present`` / ``nodata_dtype_cast`` lazy contract. + +GPU and dask+GPU rows skip when cupy + CUDA are absent via the shared +``requires_gpu`` marker from ``_helpers/markers.py``. +""" +from __future__ import annotations + +import io + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import (open_geotiff, read_geotiff_dask, read_geotiff_gpu, read_vrt, + to_geotiff, write_vrt) +from xrspatial.geotiff._attrs import (GEOREF_STATUS_CRS_ONLY, GEOREF_STATUS_FULL, + GEOREF_STATUS_NONE, GEOREF_STATUS_ROTATED_DROPPED, + GEOREF_STATUS_TRANSFORM_ONLY) +from xrspatial.geotiff._coords import _NO_GEOREF_KEY + +from .._helpers.markers import requires_gpu +# Rotated-TIFF writer relocated to ``read/test_crs.py`` by epic #2390 PR 3. +from ..read.test_crs import _write_rotated_tiff + +# =========================================================================== +# Section 1 -- Dispatcher kwarg parity (#2175) +# =========================================================================== +# +# ``open_geotiff`` used to validate dispatcher kwargs inline; the three +# direct backends skipped most of the checks. ``_validate_dispatch_kwargs`` +# now runs at the top of every public read entry point so the exception +# type and message match across backends for the same invalid input. + + +def _build_local_tif(tmp_path, name='src_2175.tif'): + """Write a small valid GeoTIFF used as the dispatcher's source.""" + arr = np.arange(8 * 8, dtype=np.float32).reshape(8, 8) + da = xr.DataArray( + arr, + dims=['y', 'x'], + coords={'y': np.arange(8.0, 0, -1), 'x': np.arange(8.0)}, + attrs={ + 'crs': 4326, + 'transform': (1.0, 0, 0.0, 0, -1.0, 8.0), + }, + ) + path = str(tmp_path / name) + to_geotiff(da, path) + return path + + +def _build_vrt(tmp_path): + """Build a 1-source VRT mosaic referencing a small local GeoTIFF.""" + src = _build_local_tif(tmp_path, name='vrt_src_2175.tif') + vrt = str(tmp_path / 'mosaic_2175.vrt') + write_vrt(vrt, [src]) + return vrt, src + + +# --- overview_level type rejection through every entry point --- + + +@pytest.mark.parametrize("value", [True, False]) +def test_open_geotiff_overview_level_bool(tmp_path, value): + path = _build_local_tif(tmp_path) + with pytest.raises(TypeError, match="bool"): + open_geotiff(path, overview_level=value) + + +def test_open_geotiff_overview_level_str(tmp_path): + path = _build_local_tif(tmp_path) + with pytest.raises(TypeError, match="str"): + open_geotiff(path, overview_level="0") + + +def test_open_geotiff_overview_level_float(tmp_path): + path = _build_local_tif(tmp_path) + with pytest.raises(TypeError, match="float"): + open_geotiff(path, overview_level=1.0) + + +@pytest.mark.parametrize("value", [True, False]) +def test_dask_overview_level_bool(tmp_path, value): + path = _build_local_tif(tmp_path) + with pytest.raises(TypeError, match="bool"): + read_geotiff_dask(path, overview_level=value) + + +def test_dask_overview_level_str(tmp_path): + path = _build_local_tif(tmp_path) + with pytest.raises(TypeError, match="str"): + read_geotiff_dask(path, overview_level="0") + + +def test_dask_overview_level_float(tmp_path): + path = _build_local_tif(tmp_path) + with pytest.raises(TypeError, match="float"): + read_geotiff_dask(path, overview_level=1.0) + + +@pytest.mark.parametrize("value", [True, False]) +def test_gpu_overview_level_bool(tmp_path, value): + path = _build_local_tif(tmp_path) + with pytest.raises(TypeError, match="bool"): + read_geotiff_gpu(path, overview_level=value) + + +def test_gpu_overview_level_str(tmp_path): + path = _build_local_tif(tmp_path) + with pytest.raises(TypeError, match="str"): + read_geotiff_gpu(path, overview_level="0") + + +def test_gpu_overview_level_float(tmp_path): + path = _build_local_tif(tmp_path) + with pytest.raises(TypeError, match="float"): + read_geotiff_gpu(path, overview_level=1.0) + + +@pytest.mark.parametrize("value", [True, False]) +def test_vrt_overview_level_bool(tmp_path, value): + vrt, _src = _build_vrt(tmp_path) + with pytest.raises(TypeError, match="bool"): + read_vrt(vrt, overview_level=value) + + +def test_vrt_overview_level_str(tmp_path): + vrt, _src = _build_vrt(tmp_path) + with pytest.raises(TypeError, match="str"): + read_vrt(vrt, overview_level="0") + + +def test_vrt_overview_level_float(tmp_path): + vrt, _src = _build_vrt(tmp_path) + with pytest.raises(TypeError, match="float"): + read_vrt(vrt, overview_level=1.0) + + +# --- max_cloud_bytes incompatibility through every applicable backend --- + + +def test_open_geotiff_dask_rejects_max_cloud_bytes(tmp_path): + path = _build_local_tif(tmp_path) + with pytest.raises(ValueError, match=r"max_cloud_bytes"): + open_geotiff(path, chunks=4, max_cloud_bytes=8) + + +def test_open_geotiff_gpu_rejects_max_cloud_bytes(tmp_path): + path = _build_local_tif(tmp_path) + with pytest.raises(ValueError, match=r"max_cloud_bytes"): + open_geotiff(path, gpu=True, max_cloud_bytes=8) + + +def test_open_geotiff_vrt_rejects_max_cloud_bytes(tmp_path): + vrt, _src = _build_vrt(tmp_path) + with pytest.raises(ValueError, match=r"max_cloud_bytes"): + open_geotiff(vrt, max_cloud_bytes=8) + + +def test_dask_rejects_max_cloud_bytes(tmp_path): + path = _build_local_tif(tmp_path) + with pytest.raises(ValueError, match=r"max_cloud_bytes"): + read_geotiff_dask(path, max_cloud_bytes=8) + + +def test_gpu_rejects_max_cloud_bytes(tmp_path): + path = _build_local_tif(tmp_path) + with pytest.raises(ValueError, match=r"max_cloud_bytes"): + read_geotiff_gpu(path, max_cloud_bytes=8) + + +def test_vrt_rejects_max_cloud_bytes(tmp_path): + vrt, _src = _build_vrt(tmp_path) + with pytest.raises(ValueError, match=r"max_cloud_bytes"): + read_vrt(vrt, max_cloud_bytes=8) + + +def test_explicit_none_max_cloud_bytes_rejected_on_dask_direct(tmp_path): + """``max_cloud_bytes=None`` is the documented "disable budget" value + on the eager path. On the dask path it has no consumer, so an + explicit ``None`` is still rejected -- the sentinel default is the + only way to pass through without setting an opinion. + """ + path = _build_local_tif(tmp_path) + with pytest.raises(ValueError, match=r"max_cloud_bytes"): + read_geotiff_dask(path, max_cloud_bytes=None) + + +def test_explicit_none_max_cloud_bytes_rejected_on_gpu_direct(tmp_path): + path = _build_local_tif(tmp_path) + with pytest.raises(ValueError, match=r"max_cloud_bytes"): + read_geotiff_gpu(path, max_cloud_bytes=None) + + +def test_explicit_none_max_cloud_bytes_rejected_on_vrt_direct(tmp_path): + vrt, _src = _build_vrt(tmp_path) + with pytest.raises(ValueError, match=r"max_cloud_bytes"): + read_vrt(vrt, max_cloud_bytes=None) + + +# --- missing_sources on non-VRT sources --- + + +def test_open_geotiff_rejects_missing_sources_on_tif(tmp_path): + path = _build_local_tif(tmp_path) + with pytest.raises(ValueError, match=r"missing_sources only applies"): + open_geotiff(path, missing_sources='raise') + + +def test_dask_rejects_missing_sources_on_tif(tmp_path): + path = _build_local_tif(tmp_path) + with pytest.raises(ValueError, match=r"missing_sources only applies"): + read_geotiff_dask(path, missing_sources='raise') + + +def test_gpu_rejects_missing_sources_on_tif(tmp_path): + path = _build_local_tif(tmp_path) + with pytest.raises(ValueError, match=r"missing_sources only applies"): + read_geotiff_gpu(path, missing_sources='raise') + + +# --- band_nodata on non-VRT sources --- + + +def test_open_geotiff_rejects_band_nodata_on_tif(tmp_path): + path = _build_local_tif(tmp_path) + with pytest.raises(ValueError, match=r"band_nodata only applies"): + open_geotiff(path, band_nodata='first') + + +def test_dask_rejects_band_nodata_on_tif(tmp_path): + path = _build_local_tif(tmp_path) + with pytest.raises(ValueError, match=r"band_nodata only applies"): + read_geotiff_dask(path, band_nodata='first') + + +def test_gpu_rejects_band_nodata_on_tif(tmp_path): + path = _build_local_tif(tmp_path) + with pytest.raises(ValueError, match=r"band_nodata only applies"): + read_geotiff_gpu(path, band_nodata='first') + + +# --- on_gpu_failure when GPU is disabled --- + + +def test_open_geotiff_rejects_on_gpu_failure_when_gpu_false(tmp_path): + path = _build_local_tif(tmp_path) + with pytest.raises(ValueError, match=r"on_gpu_failure only applies"): + open_geotiff(path, on_gpu_failure='strict') + + +def test_dask_rejects_on_gpu_failure(tmp_path): + path = _build_local_tif(tmp_path) + with pytest.raises(ValueError, match=r"on_gpu_failure only applies"): + read_geotiff_dask(path, on_gpu_failure='strict') + + +def test_vrt_rejects_on_gpu_failure(tmp_path): + vrt, _src = _build_vrt(tmp_path) + with pytest.raises(ValueError, match=r"on_gpu_failure only applies"): + read_vrt(vrt, on_gpu_failure='strict') + + +# --- File-like sources reject gpu=True / chunks=... --- + + +def test_open_geotiff_rejects_file_like_with_chunks(tmp_path): + path = _build_local_tif(tmp_path) + with open(path, 'rb') as f: + buf = io.BytesIO(f.read()) + with pytest.raises( + ValueError, + match=r"chunks=\.\.\. \(dask\) is not supported for file-like"): + open_geotiff(buf, chunks=4) + + +def test_open_geotiff_rejects_file_like_with_gpu(tmp_path): + path = _build_local_tif(tmp_path) + with open(path, 'rb') as f: + buf = io.BytesIO(f.read()) + with pytest.raises( + ValueError, + match=r"gpu=True is not supported for file-like"): + open_geotiff(buf, gpu=True) + + +def test_dask_rejects_file_like(tmp_path): + path = _build_local_tif(tmp_path) + with open(path, 'rb') as f: + buf = io.BytesIO(f.read()) + with pytest.raises( + ValueError, + match=r"chunks=\.\.\. \(dask\) is not supported for file-like"): + read_geotiff_dask(buf) + + +def test_gpu_rejects_file_like(tmp_path): + path = _build_local_tif(tmp_path) + with open(path, 'rb') as f: + buf = io.BytesIO(f.read()) + with pytest.raises( + ValueError, + match=r"gpu=True is not supported for file-like"): + read_geotiff_gpu(buf) + + +# --- Path-object sources survive the helper's file-like guard --- + + +def test_open_geotiff_accepts_path_object(tmp_path): + from pathlib import Path + path = _build_local_tif(tmp_path) + out = open_geotiff(Path(path)) + assert out.shape == (8, 8) + + +def test_dask_accepts_path_object(tmp_path): + from pathlib import Path + path = _build_local_tif(tmp_path) + out = read_geotiff_dask(Path(path), chunks=4) + assert out.shape == (8, 8) + + +def test_vrt_accepts_path_object(tmp_path): + from pathlib import Path + vrt, _src = _build_vrt(tmp_path) + out = read_vrt(Path(vrt)) + assert out.shape == (8, 8) + + +@requires_gpu +def test_gpu_accepts_path_object(tmp_path): + from pathlib import Path + path = _build_local_tif(tmp_path) + out = read_geotiff_gpu(Path(path)) + assert out.shape == (8, 8) + + +def test_gpu_path_object_does_not_raise_file_like_error(tmp_path): + """Even on a CPU-only host the validator must accept a Path object. + + The dispatch validator runs before any cupy import, so the bad + behaviour on `main` (treating Path as file-like) raises before any + GPU code executes. With the fix the validator coerces Path to str + first and the error only surfaces (if at all) from the GPU stack. + """ + from pathlib import Path + path = _build_local_tif(tmp_path) + # Either the call succeeds (GPU available) or it fails for a real + # GPU reason. The one thing it must NOT raise is the file-like + # ValueError introduced by the validator misclassifying Path. + try: + read_geotiff_gpu(Path(path)) + except ValueError as e: + assert "file-like" not in str(e), ( + f"validator misclassified Path as file-like: {e}" + ) + except (ImportError, RuntimeError): + # ImportError: cupy not installed. + # RuntimeError: CUDA preflight failed. + # Both are unrelated to the Path-coercion regression. + pass + + +# --- Default sentinel pins (no regressions on the happy path) --- + + +def test_open_geotiff_defaults_round_trip(tmp_path): + path = _build_local_tif(tmp_path) + out = open_geotiff(path) + assert out.shape == (8, 8) + + +def test_dask_defaults_round_trip(tmp_path): + path = _build_local_tif(tmp_path) + out = read_geotiff_dask(path) + assert out.shape == (8, 8) + + +def test_vrt_defaults_round_trip(tmp_path): + vrt, _src = _build_vrt(tmp_path) + out = read_vrt(vrt) + assert out.shape == (8, 8) + + +# --- Cross-entry-point message parity --- + + +def _get_error(callable_, *args, **kwargs): + """Invoke ``callable_`` and return the (type_name, message) of the + exception it raises. Asserting on the type and message separately + catches a regression where the exception type changes silently + while the message stays the same. + """ + try: + callable_(*args, **kwargs) + except Exception as e: + return type(e).__name__, str(e) + raise AssertionError("expected an exception, none raised") + + +def test_max_cloud_bytes_message_parity(tmp_path): + path = _build_local_tif(tmp_path) + vrt, _ = _build_vrt(tmp_path) + open_dask = _get_error(open_geotiff, path, chunks=4, max_cloud_bytes=8) + direct_dask = _get_error(read_geotiff_dask, path, max_cloud_bytes=8) + # Both raise ValueError with the same dask-incompatibility message. + assert open_dask[0] == "ValueError" + assert direct_dask[0] == "ValueError" + for _, msg in (open_dask, direct_dask): + assert "max_cloud_bytes" in msg + assert "dask" in msg + + open_gpu = _get_error(open_geotiff, path, gpu=True, max_cloud_bytes=8) + direct_gpu = _get_error(read_geotiff_gpu, path, max_cloud_bytes=8) + assert open_gpu[0] == "ValueError" + assert direct_gpu[0] == "ValueError" + for _, msg in (open_gpu, direct_gpu): + assert "max_cloud_bytes" in msg + assert "gpu" in msg.lower() + + open_vrt = _get_error(open_geotiff, vrt, max_cloud_bytes=8) + direct_vrt = _get_error(read_vrt, vrt, max_cloud_bytes=8) + assert open_vrt[0] == "ValueError" + assert direct_vrt[0] == "ValueError" + for _, msg in (open_vrt, direct_vrt): + assert "max_cloud_bytes" in msg + assert "vrt" in msg.lower() + + +def test_band_nodata_message_parity(tmp_path): + path = _build_local_tif(tmp_path) + results = [ + _get_error(open_geotiff, path, band_nodata='first'), + _get_error(read_geotiff_dask, path, band_nodata='first'), + _get_error(read_geotiff_gpu, path, band_nodata='first'), + ] + for kind, msg in results: + assert kind == "ValueError" + assert "band_nodata only applies" in msg + + +def test_missing_sources_message_parity(tmp_path): + path = _build_local_tif(tmp_path) + results = [ + _get_error(open_geotiff, path, missing_sources='raise'), + _get_error(read_geotiff_dask, path, missing_sources='raise'), + _get_error(read_geotiff_gpu, path, missing_sources='raise'), + ] + for kind, msg in results: + assert kind == "ValueError" + assert "missing_sources only applies" in msg + + +def test_on_gpu_failure_message_parity(tmp_path): + path = _build_local_tif(tmp_path) + vrt, _ = _build_vrt(tmp_path) + results = [ + _get_error(open_geotiff, path, on_gpu_failure='strict'), + _get_error(read_geotiff_dask, path, on_gpu_failure='strict'), + _get_error(read_vrt, vrt, on_gpu_failure='strict'), + ] + for kind, msg in results: + assert kind == "ValueError" + assert "on_gpu_failure only applies" in msg + + +def test_overview_level_message_parity(tmp_path): + path = _build_local_tif(tmp_path) + vrt, _ = _build_vrt(tmp_path) + results = [ + _get_error(open_geotiff, path, overview_level="bad"), + _get_error(read_geotiff_dask, path, overview_level="bad"), + _get_error(read_geotiff_gpu, path, overview_level="bad"), + _get_error(read_vrt, vrt, overview_level="bad"), + ] + for kind, msg in results: + assert kind == "TypeError" + assert "overview_level must be an int or None" in msg + assert "str" in msg + + +# =========================================================================== +# Section 2 -- Eager finalization parity (#2179) +# =========================================================================== +# +# ``_finalize_eager_read`` stamps nodata / georef attrs on the eager numpy +# path and the three eager GPU paths. Each case reads the same file via +# ``open_geotiff(path)`` and ``open_geotiff(path, gpu=True)`` and compares +# the helper-stamped attrs across the two reads. + + +def _write_with_nodata(arr, path, *, nodata=None): + """Helper: write a 2-D array to a tiled GeoTIFF with an optional sentinel.""" + from xrspatial.geotiff._writer import write + write(arr, path, nodata=nodata, compression='deflate', + tiled=True, tile_size=16) + + +def _read_both(path, **kwargs): + """Read the same file via the eager numpy and eager GPU backends. + + Returns ``(cpu_da, gpu_da)``. ``kwargs`` are forwarded to both + ``open_geotiff`` calls so each backend sees the same caller + contract. + """ + cpu = open_geotiff(path, **kwargs) + gpu = open_geotiff(path, gpu=True, **kwargs) + return cpu, gpu + + +# Subset of attrs ``_finalize_eager_read`` is responsible for; mirrors +# the issue body's parity claim list. +_LIFECYCLE_ATTRS = ( + 'nodata', + 'nodata_pixels_present', + 'nodata_dtype_cast', + 'georef_status', +) + + +def _assert_lifecycle_attrs_match(cpu_da, gpu_da): + """Assert the four lifecycle attrs match across backends. + + ``masked_nodata`` is checked separately because the test suite + asserts on its boolean value when a sentinel is declared. + """ + for key in _LIFECYCLE_ATTRS: + cpu_v = cpu_da.attrs.get(key) + gpu_v = gpu_da.attrs.get(key) + assert cpu_v == gpu_v, ( + f"attrs[{key!r}] divergence: cpu={cpu_v!r} gpu={gpu_v!r}" + ) + + +@requires_gpu +def test_float_sentinel_match_and_mask(tmp_path): + """Float source + sentinel: both backends mask in place, attrs match.""" + arr = np.array( + [[1.0, 2.0, -9999.0], [4.0, -9999.0, 6.0]], dtype=np.float32) + path = str(tmp_path / 'eager_parity_2179_float_sentinel.tif') + _write_with_nodata(arr, path, nodata=-9999.0) + + cpu, gpu = _read_both(path) + + # dtype + masked_nodata first: float source stays at its declared + # dtype on both backends; the mask substitutes NaN. + assert cpu.dtype == gpu.dtype + assert cpu.attrs.get('masked_nodata') is True + assert gpu.attrs.get('masked_nodata') is True + + # Lifecycle attrs proper. ``nodata_pixels_present`` must surface + # as a real bool on both backends (the issue body calls this out + # explicitly). + _assert_lifecycle_attrs_match(cpu, gpu) + assert isinstance(cpu.attrs.get('nodata_pixels_present'), bool) + assert isinstance(gpu.attrs.get('nodata_pixels_present'), bool) + assert cpu.attrs.get('nodata_pixels_present') is True + + # And the NaN locations agree pixel-for-pixel. + cpu_arr = cpu.values + gpu_arr = gpu.data.get() + np.testing.assert_array_equal(np.isnan(cpu_arr), np.isnan(gpu_arr)) + + +@requires_gpu +def test_int_in_range_sentinel_promotes_to_float(tmp_path): + """uint16 + 65535 sentinel: both backends promote to float64 with NaN.""" + arr = np.array([[1, 2, 3], [65535, 5, 6]], dtype=np.uint16) + path = str(tmp_path / 'eager_parity_2179_int_sentinel.tif') + _write_with_nodata(arr, path, nodata=65535) + + cpu, gpu = _read_both(path) + + # Integer promotion fires on both backends. + assert cpu.dtype == np.float64 + assert gpu.dtype == np.float64 + assert cpu.attrs.get('masked_nodata') is True + assert gpu.attrs.get('masked_nodata') is True + + _assert_lifecycle_attrs_match(cpu, gpu) + assert cpu.attrs.get('nodata_pixels_present') is True + + cpu_arr = cpu.values + gpu_arr = gpu.data.get() + np.testing.assert_array_equal(np.isnan(cpu_arr), np.isnan(gpu_arr)) + + +@requires_gpu +def test_int_out_of_range_sentinel_is_no_op(tmp_path): + """uint8 + 9999 sentinel: out-of-range, no promotion, presence=False.""" + arr = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8) + path = str(tmp_path / 'eager_parity_2179_int_oor.tif') + # 9999 cannot match any uint8 pixel. ``_writer.write`` accepts an + # int sentinel here without complaining (the writer only refuses + # bool / NaN values, not out-of-range ints), so we get a file with + # the literal nodata tag set to 9999 and no pixel matching it. + _write_with_nodata(arr, path, nodata=9999) + + cpu, gpu = _read_both(path) + + # No promotion when the sentinel is out of range. Both backends + # leave the uint8 buffer alone. + assert cpu.dtype == np.uint8 + assert gpu.dtype == np.uint8 + # ``masked_nodata`` is False because the mask did not run; the + # final dtype is still int. + assert cpu.attrs.get('masked_nodata') is False + assert gpu.attrs.get('masked_nodata') is False + + _assert_lifecycle_attrs_match(cpu, gpu) + assert cpu.attrs.get('nodata_pixels_present') is False + + +@requires_gpu +def test_mask_nodata_false_keeps_literal_sentinel(tmp_path): + """mask_nodata=False leaves the buffer untouched on both backends.""" + arr = np.array( + [[1.0, 2.0, -9999.0], [4.0, -9999.0, 6.0]], dtype=np.float32) + path = str(tmp_path / 'eager_parity_2179_mask_false.tif') + _write_with_nodata(arr, path, nodata=-9999.0) + + cpu, gpu = _read_both(path, mask_nodata=False) + + # No NaN substitution; the literal sentinel survives on both + # backends with ``masked_nodata=False``. + assert cpu.dtype == np.float32 + assert gpu.dtype == np.float32 + assert cpu.attrs.get('masked_nodata') is False + assert gpu.attrs.get('masked_nodata') is False + + _assert_lifecycle_attrs_match(cpu, gpu) + # The no-mask scan branch still surfaces presence. + assert cpu.attrs.get('nodata_pixels_present') is True + + cpu_arr = cpu.values + gpu_arr = gpu.data.get() + np.testing.assert_array_equal(cpu_arr, gpu_arr) + + +@requires_gpu +def test_no_declared_sentinel_omits_nodata_attrs(tmp_path): + """Source without nodata declaration: no lifecycle attrs on either side.""" + arr = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8) + path = str(tmp_path / 'eager_parity_2179_no_sentinel.tif') + _write_with_nodata(arr, path, nodata=None) + + cpu, gpu = _read_both(path) + + assert cpu.dtype == np.uint8 + assert gpu.dtype == np.uint8 + + # The helper's ``_set_nodata_attrs`` early-returns when there is no + # declared sentinel, so neither ``nodata`` nor ``masked_nodata`` + # appear on either backend. + assert 'nodata' not in cpu.attrs + assert 'nodata' not in gpu.attrs + assert 'masked_nodata' not in cpu.attrs + assert 'masked_nodata' not in gpu.attrs + assert 'nodata_pixels_present' not in cpu.attrs + assert 'nodata_pixels_present' not in gpu.attrs + + # ``georef_status`` still rides on the helper regardless of nodata + # state, so the parity assertion exercises that branch too. + _assert_lifecycle_attrs_match(cpu, gpu) + + +@requires_gpu +def test_dtype_kwarg_records_post_mask_cast(tmp_path): + """Explicit dtype= records ``nodata_dtype_cast`` on both backends.""" + arr = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint16) + path = str(tmp_path / 'eager_parity_2179_dtype_cast.tif') + # Out-of-range sentinel keeps the mask a no-op so the cast attr is + # the only signal that the user asked for a dtype change; this + # isolates the ``nodata_dtype_cast`` branch from the mask-driven + # promotion exercised in ``test_int_in_range_sentinel_promotes_to_float``. + _write_with_nodata(arr, path, nodata=9999) + + cpu, gpu = _read_both(path, dtype=np.float32) + + assert cpu.dtype == np.float32 + assert gpu.dtype == np.float32 + assert cpu.attrs.get('nodata_dtype_cast') == 'float32' + assert gpu.attrs.get('nodata_dtype_cast') == 'float32' + + _assert_lifecycle_attrs_match(cpu, gpu) + + +@requires_gpu +def test_windowed_read_presence_matches_window_contents(tmp_path): + """Windowed read: nodata_pixels_present reflects the window, not the IFD. + + Pins the slice-before-mask behaviour the GPU local-eager path + picked up in #2179. Pre-PR the GPU path masked the full IFD then + sliced, so ``nodata_pixels_present`` reported sentinel presence + anywhere in the file; post-PR it reports presence within the + requested window. The CPU path has always behaved this way, so + the two now agree. + """ + # 4x4 raster with the sentinel only in the bottom half so the two + # windows below land on opposite sides of the presence bool. + arr = np.array( + [ + [1.0, 2.0, 3.0, 4.0], + [5.0, 6.0, 7.0, 8.0], + [9.0, 10.0, -9999.0, 12.0], + [13.0, 14.0, 15.0, 16.0], + ], + dtype=np.float32, + ) + path = str(tmp_path / 'eager_parity_2179_windowed.tif') + _write_with_nodata(arr, path, nodata=-9999.0) + + # Top-left 2x2 window: no sentinel in scope. + cpu, gpu = _read_both(path, window=(0, 0, 2, 2)) + _assert_lifecycle_attrs_match(cpu, gpu) + assert cpu.attrs.get('nodata_pixels_present') is False + assert gpu.attrs.get('nodata_pixels_present') is False + + # Bottom 2x4 window: covers the sentinel. + cpu, gpu = _read_both(path, window=(2, 0, 4, 4)) + _assert_lifecycle_attrs_match(cpu, gpu) + assert cpu.attrs.get('nodata_pixels_present') is True + assert gpu.attrs.get('nodata_pixels_present') is True + + +@requires_gpu +def test_miniswhite_post_inversion_sentinel_parity(tmp_path): + """MinIsWhite raster: post-inversion sentinel resolves identically on both backends. + + Exercises the ``_mw_mask_nodata`` branch in the GPU local-eager + path. The reader inverts the buffer and the post-MinIsWhite + sentinel is what the helper's mask block compares against on the + GPU side; the eager numpy path takes the same sentinel off + ``geo_info._mask_nodata`` through ``read_to_array``. Both should + land on the same NaN positions and the same lifecycle attrs. + """ + import tifffile + + # uint8 + nodata=0; MinIsWhite inverts the stored value to 255 + # before masking, and 255 is the post-inversion sentinel. + stored = np.array([[0, 100, 200], [50, 0, 255]], dtype=np.uint8) + path = str(tmp_path / 'eager_parity_2179_miniswhite.tif') + extratags = [("GDAL_NODATA", "s", 0, "0\0", True)] + tifffile.imwrite( + path, stored, photometric="miniswhite", + extratags=extratags, tile=(16, 16), + ) + + cpu, gpu = _read_both(path) + + _assert_lifecycle_attrs_match(cpu, gpu) + cpu_arr = cpu.values + gpu_arr = gpu.data.get() + # NaN positions must agree pixel-for-pixel; the MinIsWhite + # sentinel resolution drives this. + np.testing.assert_array_equal(np.isnan(cpu_arr), np.isnan(gpu_arr)) + + +@requires_gpu +def test_multiband_stripped_parity(tmp_path): + """3-band stripped read: helper builds (y, x, band) DataArray on both backends. + + The GPU CPU-fallback path lands on stripped files. Multi-band + output goes through the helper's ``arr.ndim == 3`` branch on + both backends; the parity assertion covers ``georef_status`` and + sentinel-related attrs for the multi-band shape so a future + change to the 3-D coord build cannot silently diverge. + """ + rng = np.random.RandomState(20260520) + data = rng.randint(0, 200, size=(32, 48, 3)).astype(np.uint8) + da_in = xr.DataArray(data, dims=['y', 'x', 'band']) + + path = str(tmp_path / 'eager_parity_2179_multiband.tif') + + # Stripped (tiled=False) routes the GPU read through the + # CPU-fallback eager site, which is one of the three sites this + # PR migrated. + to_geotiff(da_in, path, tiled=False) + + cpu, gpu = _read_both(path) + + # Shape and dims line up across backends. + assert cpu.dims == gpu.dims + assert cpu.shape == gpu.shape == (32, 48, 3) + + _assert_lifecycle_attrs_match(cpu, gpu) + cpu_arr = cpu.values + gpu_arr = gpu.data.get() + np.testing.assert_array_equal(cpu_arr, gpu_arr) + + +# =========================================================================== +# Section 3 -- Lazy finalization parity (#2178) +# =========================================================================== +# +# ``_finalize_lazy_read_attrs`` centralises the validate-then-populate-then- +# stamp logic shared by ``read_geotiff_dask`` (CPU+dask) and the dask branch +# of ``read_geotiff_gpu`` (GPU+dask). Each test opens the same fixture +# through both backends and compares the attrs. + +tifffile = pytest.importorskip("tifffile") + + +def _open_cpu_dask(path, **kwargs): + return read_geotiff_dask(path, chunks=2, **kwargs) + + +def _open_gpu_dask(path, **kwargs): + return read_geotiff_gpu(path, chunks=2, **kwargs) + + +_BACKENDS = [ + pytest.param(_open_cpu_dask, id="dask+numpy"), + pytest.param(_open_gpu_dask, id="dask+cupy", marks=requires_gpu), +] + + +def _gpu_dask_available() -> bool: + """Runtime GPU probe for the conditional cross-backend assertions. + + The ``requires_gpu`` marker handles the skip on the parametrised GPU + rows; this helper gates the inline ``if`` branches that compare CPU + against GPU inside an otherwise CPU-only test. + """ + from .._helpers.markers import gpu_available + return gpu_available() + + +# --- Fixture builders, mirroring the per-state fixtures in test_georef_status_2136 --- + + +def _make_full_tiff(path): + """Float coords + CRS -> ``full``.""" + da = xr.DataArray( + np.zeros((4, 4), dtype=np.float32), + coords={ + 'y': np.array([200.0, 199.0, 198.0, 197.0]), + 'x': np.array([100.0, 101.0, 102.0, 103.0]), + }, + dims=('y', 'x'), + attrs={'crs': 4326}, + ) + to_geotiff(da, path) + + +def _make_transform_only_tiff(path): + """Float coords, no CRS -> ``transform_only``.""" + da = xr.DataArray( + np.zeros((4, 4), dtype=np.float32), + coords={ + 'y': np.array([200.0, 199.0, 198.0, 197.0]), + 'x': np.array([100.0, 101.0, 102.0, 103.0]), + }, + dims=('y', 'x'), + ) + to_geotiff(da, path) + + +def _make_crs_only_tiff(path): + """No-georef marker + CRS -> ``crs_only``.""" + da = xr.DataArray( + np.zeros((4, 4), dtype=np.float32), + coords={ + 'y': np.arange(4, dtype=np.int64), + 'x': np.arange(4, dtype=np.int64), + }, + dims=('y', 'x'), + attrs={_NO_GEOREF_KEY: True, 'crs': 4326}, + ) + to_geotiff(da, path) + + +def _make_none_tiff(path): + """Bare TIFF with no GeoTIFF tags at all -> ``none``.""" + arr = np.zeros((4, 4), dtype=np.float32) + tifffile.imwrite( + path, arr, photometric='minisblack', planarconfig='contig', + metadata=None, + ) + + +def _make_rotated_tiff(path): + """Rotated ``ModelTransformationTag`` (opened with ``allow_rotated``) + -> ``rotated_dropped``. The data is uint16 because the rotated-TIFF + writer in the #2115 test only emits integer pixels; that's fine for + a metadata pin.""" + arr = np.arange(16, dtype='float promotion + branch.""" + arr = np.arange(16, dtype=np.float32).reshape(4, 4) + arr[0, 0] = sentinel + da = xr.DataArray( + arr, + coords={ + 'y': np.array([200.0, 199.0, 198.0, 197.0]), + 'x': np.array([100.0, 101.0, 102.0, 103.0]), + }, + dims=('y', 'x'), + attrs={'crs': 4326, 'nodata': sentinel}, + ) + to_geotiff(da, path) + + +def _make_int_with_nodata_tiff(path, sentinel=30): + """Integer raster carrying a sentinel. Lets the dtype-cast tests + distinguish "graph dtype auto-promoted by masking" from + "caller asked for an explicit cast".""" + arr = np.array([[10, 20, 25], [30, 40, 50]], dtype=np.int16) + da = xr.DataArray( + arr, + coords={ + 'y': np.array([200.0, 199.0]), + 'x': np.array([100.0, 101.0, 102.0]), + }, + dims=('y', 'x'), + attrs={'crs': 4326, 'nodata': sentinel}, + ) + to_geotiff(da, path) + + +_GEOREF_FIXTURES = [ + pytest.param(_make_full_tiff, GEOREF_STATUS_FULL, False, + id="full"), + pytest.param(_make_transform_only_tiff, GEOREF_STATUS_TRANSFORM_ONLY, + False, id="transform_only"), + pytest.param(_make_crs_only_tiff, GEOREF_STATUS_CRS_ONLY, False, + id="crs_only"), + pytest.param(_make_none_tiff, GEOREF_STATUS_NONE, False, + id="none"), + pytest.param(_make_rotated_tiff, GEOREF_STATUS_ROTATED_DROPPED, + True, id="rotated_dropped"), +] + + +@pytest.mark.parametrize("fixture,expected_status,allow_rotated", + _GEOREF_FIXTURES) +def test_georef_status_parity(tmp_path, fixture, expected_status, + allow_rotated): + """Both dask backends emit the same ``georef_status`` for each + of the five reader states.""" + path = str(tmp_path / f"tmp_2178_status_{expected_status}.tif") + fixture(path) + + kwargs = {'allow_rotated': True} if allow_rotated else {} + cpu = _open_cpu_dask(path, **kwargs) + assert cpu.attrs.get('georef_status') == expected_status + + if _gpu_dask_available(): + gpu = _open_gpu_dask(path, **kwargs) + assert gpu.attrs.get('georef_status') == expected_status + assert cpu.attrs['georef_status'] == gpu.attrs['georef_status'] + + +@pytest.mark.parametrize("fixture,expected_status,allow_rotated", + _GEOREF_FIXTURES) +def test_attrs_dict_parity(tmp_path, fixture, expected_status, + allow_rotated): + """Both dask backends emit the same attrs dict for each fixture.""" + if not _gpu_dask_available(): + pytest.skip("dask+cupy parity requires CUDA") + path = str(tmp_path / f"tmp_2178_parity_{expected_status}.tif") + fixture(path) + + kwargs = {'allow_rotated': True} if allow_rotated else {} + cpu = _open_cpu_dask(path, **kwargs) + gpu = _open_gpu_dask(path, **kwargs) + + cpu_attrs = dict(cpu.attrs) + gpu_attrs = dict(gpu.attrs) + assert cpu_attrs == gpu_attrs, ( + f"attrs dicts diverged for fixture={expected_status}:\n" + f" cpu only: {set(cpu_attrs) - set(gpu_attrs)}\n" + f" gpu only: {set(gpu_attrs) - set(cpu_attrs)}\n" + f" shared keys with different values: " + f"{[k for k in set(cpu_attrs) & set(gpu_attrs) if cpu_attrs[k] != gpu_attrs[k]]}" + ) + + +@pytest.mark.parametrize("opener", _BACKENDS) +def test_nodata_pixels_present_absent_on_lazy(tmp_path, opener): + """Lazy contract from #2135: ``nodata_pixels_present`` stays unset + on both dask backends.""" + path = str(tmp_path / "tmp_2178_pixels_absent.tif") + _make_float_with_nodata_tiff(path) + out = opener(path) + assert 'nodata_pixels_present' not in out.attrs + + +def test_nodata_pixels_present_cross_backend(tmp_path): + """Both backends agree on the absence of ``nodata_pixels_present`` + when reading the same fixture.""" + if not _gpu_dask_available(): + pytest.skip("dask+cupy parity requires CUDA") + path = str(tmp_path / "tmp_2178_pixels_cross.tif") + _make_float_with_nodata_tiff(path) + cpu = _open_cpu_dask(path) + gpu = _open_gpu_dask(path) + assert 'nodata_pixels_present' not in cpu.attrs + assert 'nodata_pixels_present' not in gpu.attrs + + +@pytest.mark.parametrize("opener", _BACKENDS) +def test_dtype_cast_absent_without_caller_dtype(tmp_path, opener): + """No ``dtype=`` kwarg: ``nodata_dtype_cast`` stays unset, even + when masking auto-promotes the graph dtype to float64.""" + path = str(tmp_path / "tmp_2178_no_cast.tif") + _make_int_with_nodata_tiff(path) + out = opener(path) + # Masking promoted the int source to float64 on the graph dtype, + # but the caller did not ask for a cast. + assert out.dtype == np.float64 + assert out.attrs.get('masked_nodata') is True + assert 'nodata_dtype_cast' not in out.attrs + + +@pytest.mark.parametrize("opener", _BACKENDS) +def test_dtype_cast_records_target(tmp_path, opener): + """Explicit ``dtype=`` kwarg: ``nodata_dtype_cast`` records the + requested dtype on both backends.""" + path = str(tmp_path / "tmp_2178_with_cast.tif") + _make_int_with_nodata_tiff(path) + out = opener(path, mask_nodata=False, dtype=np.float64) + assert out.attrs.get('masked_nodata') is False + assert out.attrs.get('nodata_dtype_cast') == 'float64' + assert 'nodata_pixels_present' not in out.attrs + + +def test_dtype_cast_parity_cross_backend(tmp_path): + """Cross-backend: same input + same ``dtype=`` kwarg yields the + same ``nodata_dtype_cast`` value.""" + if not _gpu_dask_available(): + pytest.skip("dask+cupy parity requires CUDA") + path = str(tmp_path / "tmp_2178_cast_cross.tif") + _make_int_with_nodata_tiff(path) + cpu = _open_cpu_dask(path, mask_nodata=False, dtype=np.float64) + gpu = _open_gpu_dask(path, mask_nodata=False, dtype=np.float64) + assert cpu.attrs.get('nodata_dtype_cast') == gpu.attrs.get('nodata_dtype_cast') + assert cpu.attrs.get('nodata_dtype_cast') == 'float64' + + +def test_dtype_cast_absent_parity_cross_backend(tmp_path): + """Cross-backend: same int input without an explicit ``dtype=`` + leaves ``nodata_dtype_cast`` absent on both backends (the auto- + promoted graph dtype must not leak as a caller cast).""" + if not _gpu_dask_available(): + pytest.skip("dask+cupy parity requires CUDA") + path = str(tmp_path / "tmp_2178_no_cast_cross.tif") + _make_int_with_nodata_tiff(path) + cpu = _open_cpu_dask(path) + gpu = _open_gpu_dask(path) + assert 'nodata_dtype_cast' not in cpu.attrs + assert 'nodata_dtype_cast' not in gpu.attrs + + +@pytest.mark.parametrize("opener", _BACKENDS) +def test_dtype_cast_records_integer_target(tmp_path, opener): + """Caller-supplied integer ``dtype=`` kwarg: ``nodata_dtype_cast`` + records the integer dtype on both backends. Pins the + ``dtype.kind != 'f'`` branch of the call-site fixup (review + follow-up for #2178).""" + path = str(tmp_path / "tmp_2178_int_cast.tif") + _make_int_with_nodata_tiff(path) + # ``mask_nodata=False`` keeps the integer dtype; the caller cast + # then routes the graph dtype to ``int32`` without the masking + # auto-promotion firing. The pre-helper contract emits + # ``nodata_dtype_cast='int32'`` and ``masked_nodata=False`` here. + out = opener(path, mask_nodata=False, dtype=np.int32) + assert out.dtype == np.int32 + assert out.attrs.get('masked_nodata') is False + assert out.attrs.get('nodata_dtype_cast') == 'int32' + assert 'nodata_pixels_present' not in out.attrs diff --git a/xrspatial/geotiff/tests/test_round_trip_parity_rasterio_zarr_1961.py b/xrspatial/geotiff/tests/parity/test_reference.py similarity index 53% rename from xrspatial/geotiff/tests/test_round_trip_parity_rasterio_zarr_1961.py rename to xrspatial/geotiff/tests/parity/test_reference.py index 22270d6f..81e733e2 100644 --- a/xrspatial/geotiff/tests/test_round_trip_parity_rasterio_zarr_1961.py +++ b/xrspatial/geotiff/tests/parity/test_reference.py @@ -1,26 +1,24 @@ -"""Cross-library accuracy test (issue #1961). - -Read the same GeoTIFF three ways and assert the results agree: - -1. ``rasterio.open(path)`` -> numpy array, transform, CRS, nodata. -2. ``xrspatial.geotiff.open_geotiff(path)`` -> ``xr.DataArray``. -3. Write the xarray-spatial DataArray with ``.to_zarr(...)``, reopen with - ``xr.open_zarr(...)``. - -The point of this file is to pin the GeoTIFF reader against an external -reference (rasterio) and against a round-trip through a different on-disk -format (Zarr). A regression in header parsing, georef extraction, coord -generation, nodata handling, or Zarr metadata propagation can pass every -existing test and only surface when a user files a bug. - -Each input file covers a case that has drifted before: - -- single-band float32 with a non-NaN nodata sentinel -- multi-band uint16 with a single dataset-level nodata sentinel -- north-up and south-up rasters (negative vs positive ``pixel_height``) -- 1xN / Nx1 stripe (#1945) -- tiled COG, no overviews -- no-georef raster with integer coords (#1949) +"""Backend parity for degenerate shapes and external-reference round trips. + +Sibling to ``parity/test_backend_matrix.py``. Two sections, each a former +top-level file: + +Section 1 -- Degenerate shapes and special floats across backends + (2026-05-11 coverage sweep) + The eager numpy path covers 1x1 / 1xN / Nx1 rasters plus all-NaN / + Inf inputs (``test_edge_cases.py``). This section adds the same + coverage for the GPU, dask+numpy, and dask+cupy backends, plus the + GPU writer's degenerate-shape path and the dask float-sentinel mask. + +Section 2 -- rasterio / Zarr round-trip parity (#1961) + Read the same GeoTIFF three ways -- rasterio, ``open_geotiff``, and a + Zarr round trip of the xarray-spatial result -- and assert pixels, + coords, transform, CRS, and nodata agree. Pins the reader against an + external reference and a different on-disk format. + +GPU rows skip when cupy + CUDA are absent via the shared ``requires_gpu`` +marker. The rasterio / Zarr section skips when those optional deps are +missing. """ from __future__ import annotations @@ -30,18 +28,364 @@ import pytest import xarray as xr +from xrspatial.geotiff import (open_geotiff, read_geotiff_dask, read_geotiff_gpu, to_geotiff, + write_geotiff_gpu) + +from .._helpers.markers import requires_gpu + +# =========================================================================== +# Section 1 -- Degenerate shapes and special floats across backends +# =========================================================================== +# +# A regression that broke any backend for a 1-pixel row, 1-column strip, +# or all-NaN input would not surface until a user hit the production-traffic +# mosaic that triggered it. This section closes the geometric-edge-case and +# NaN / Inf gaps for the non-eager backends. + + +class TestSinglePixelRead: + """1x1 rasters round-trip through every read backend. + + The eager numpy path covers this in ``test_edge_cases.py``. A 1x1 + raster has degenerate stride and tile geometry; the dask path's + chunk-tile alignment and the GPU path's grid sizing both have + code that assumes >1 pixel without crashing -- but a future + refactor could regress either. + """ + + @pytest.fixture + def single_pixel_path(self, tmp_path): + arr = np.array([[42.0]], dtype=np.float32) + p = tmp_path / "single_pixel.tif" + to_geotiff(arr, str(p)) + return str(p), arr + + def test_dask_numpy_backend(self, single_pixel_path): + path, arr = single_pixel_path + # chunks larger than the raster is the documented behaviour + # (dask collapses to a single chunk that matches the data). + result = open_geotiff(path, chunks=64) + assert result.shape == (1, 1) + computed = result.compute() + np.testing.assert_array_equal(computed.values, arr) + + def test_read_geotiff_dask_direct(self, single_pixel_path): + """The explicit ``read_geotiff_dask`` entry point matches dispatch.""" + path, arr = single_pixel_path + result = read_geotiff_dask(path, chunks=8) + assert result.shape == (1, 1) + np.testing.assert_array_equal(result.compute().values, arr) + + @requires_gpu + def test_gpu_backend(self, single_pixel_path): + path, arr = single_pixel_path + result = open_geotiff(path, gpu=True) + assert result.shape == (1, 1) + np.testing.assert_array_equal(result.data.get(), arr) + + @requires_gpu + def test_read_geotiff_gpu_direct(self, single_pixel_path): + """The explicit ``read_geotiff_gpu`` entry point matches dispatch.""" + path, arr = single_pixel_path + result = read_geotiff_gpu(path) + assert result.shape == (1, 1) + np.testing.assert_array_equal(result.data.get(), arr) + + @requires_gpu + def test_dask_cupy_backend(self, single_pixel_path): + """dask+cupy must also handle a 1x1 raster. + + The dask graph here has exactly one block (chunks larger than + the raster) and that block carries a cupy buffer. + """ + import cupy + path, arr = single_pixel_path + result = open_geotiff(path, gpu=True, chunks=64) + assert result.shape == (1, 1) + computed = result.compute() + assert isinstance(computed.data, cupy.ndarray) + np.testing.assert_array_equal(computed.data.get(), arr) + + +class TestSingleRowRead: + """1xN rasters round-trip through every read backend. + + Single-row tiles trigger the strip-fallback path in the GPU decoder + when there is no tiled layout, and a 1-row chunk in the dask graph. + """ + + @pytest.fixture + def single_row_path(self, tmp_path): + arr = np.arange(10, dtype=np.float32).reshape(1, 10) + p = tmp_path / "single_row.tif" + to_geotiff(arr, str(p)) + return str(p), arr + + def test_dask_numpy_backend(self, single_row_path): + path, arr = single_row_path + result = open_geotiff(path, chunks=4) + assert result.shape == (1, 10) + np.testing.assert_array_equal(result.compute().values, arr) + + @requires_gpu + def test_gpu_backend(self, single_row_path): + path, arr = single_row_path + result = open_geotiff(path, gpu=True) + assert result.shape == (1, 10) + np.testing.assert_array_equal(result.data.get(), arr) + + @requires_gpu + def test_dask_cupy_backend(self, single_row_path): + import cupy + path, arr = single_row_path + result = open_geotiff(path, gpu=True, chunks=4) + assert result.shape == (1, 10) + computed = result.compute() + assert isinstance(computed.data, cupy.ndarray) + np.testing.assert_array_equal(computed.data.get(), arr) + + +class TestSingleColumnRead: + """Nx1 rasters round-trip through every read backend. + + Single-column tiles are the mirror case of single-row, and exercise + the row-major iteration order in the dask block-builder and the + GPU's window-band slice path. + """ + + @pytest.fixture + def single_column_path(self, tmp_path): + arr = np.arange(10, dtype=np.float32).reshape(10, 1) + p = tmp_path / "single_column.tif" + to_geotiff(arr, str(p)) + return str(p), arr + + def test_dask_numpy_backend(self, single_column_path): + path, arr = single_column_path + result = open_geotiff(path, chunks=4) + assert result.shape == (10, 1) + np.testing.assert_array_equal(result.compute().values, arr) + + @requires_gpu + def test_gpu_backend(self, single_column_path): + path, arr = single_column_path + result = open_geotiff(path, gpu=True) + assert result.shape == (10, 1) + np.testing.assert_array_equal(result.data.get(), arr) + + @requires_gpu + def test_dask_cupy_backend(self, single_column_path): + import cupy + path, arr = single_column_path + result = open_geotiff(path, gpu=True, chunks=4) + assert result.shape == (10, 1) + computed = result.compute() + assert isinstance(computed.data, cupy.ndarray) + np.testing.assert_array_equal(computed.data.get(), arr) + + +@requires_gpu +class TestGpuWriterDegenerateShapes: + """``write_geotiff_gpu`` must accept 1-pixel, 1-row, and 1-column inputs. + + The GPU writer's tile-encoding path uses an internal grid sizing + helper that fell back to host code for shapes smaller than the + default tile. The fallback exists but had no regression test that + would catch a future "fast-path only" refactor. + """ + + def test_single_pixel_round_trip(self, tmp_path): + import cupy + arr = cupy.array([[42.0]], dtype=cupy.float32) + da_gpu = xr.DataArray(arr, dims=["y", "x"]) + p = str(tmp_path / "gpu_1x1.tif") + write_geotiff_gpu(da_gpu, p) + + result = open_geotiff(p) + assert result.shape == (1, 1) + assert result.values[0, 0] == 42.0 + + def test_single_row_round_trip(self, tmp_path): + import cupy + arr_np = np.arange(10, dtype=np.float32).reshape(1, 10) + arr = cupy.asarray(arr_np) + da_gpu = xr.DataArray(arr, dims=["y", "x"]) + p = str(tmp_path / "gpu_1xN.tif") + write_geotiff_gpu(da_gpu, p) + + result = open_geotiff(p) + assert result.shape == (1, 10) + np.testing.assert_array_equal(result.values, arr_np) + + def test_single_column_round_trip(self, tmp_path): + import cupy + arr_np = np.arange(10, dtype=np.float32).reshape(10, 1) + arr = cupy.asarray(arr_np) + da_gpu = xr.DataArray(arr, dims=["y", "x"]) + p = str(tmp_path / "gpu_Nx1.tif") + write_geotiff_gpu(da_gpu, p) + + result = open_geotiff(p) + assert result.shape == (10, 1) + np.testing.assert_array_equal(result.values, arr_np) + + +class TestAllNanRead: + """All-NaN raster (boundary of the algorithm) reads cleanly on every + backend. + + The eager path covers this in ``test_edge_cases.TestWriteSpecialValues``. + Without a matching GPU/dask test, a regression in the GPU nodata + masker or dask graph builder would only surface in production. + """ + + @pytest.fixture + def all_nan_path(self, tmp_path): + arr = np.full((8, 8), np.nan, dtype=np.float32) + p = tmp_path / "all_nan.tif" + to_geotiff(arr, str(p), nodata=float("nan")) + return str(p), arr + + def test_dask_numpy_backend(self, all_nan_path): + path, _ = all_nan_path + result = open_geotiff(path, chunks=4) + computed = result.compute() + assert np.all(np.isnan(computed.values)) + + @requires_gpu + def test_gpu_backend(self, all_nan_path): + path, _ = all_nan_path + result = open_geotiff(path, gpu=True) + assert np.all(np.isnan(result.data.get())) + + @requires_gpu + def test_dask_cupy_backend(self, all_nan_path): + path, _ = all_nan_path + result = open_geotiff(path, gpu=True, chunks=4) + computed = result.compute() + assert np.all(np.isnan(computed.data.get())) + + +class TestInfRead: + """+Inf and -Inf are valid float values in TIFF; they must survive + every read backend without being masked or clipped. + + The eager path's ``test_edge_cases.TestWriteSpecialValues::test_nan_and_inf`` + is a write-then-CPU-read test. The GPU and dask backends were + unexercised on Inf input. + """ + + @pytest.fixture + def inf_path(self, tmp_path): + arr = np.array( + [ + [np.inf, -np.inf, 1.0, 2.0], + [3.0, np.inf, -np.inf, 4.0], + [-np.inf, 5.0, 6.0, np.inf], + [7.0, 8.0, np.inf, 9.0], + ], + dtype=np.float32, + ) + p = tmp_path / "inf.tif" + # Do not set nodata: we want Inf to survive, not be remapped. + to_geotiff(arr, str(p)) + return str(p), arr + + def test_dask_numpy_backend(self, inf_path): + path, arr = inf_path + result = open_geotiff(path, chunks=2).compute() + assert np.isposinf(result.values[0, 0]) + assert np.isneginf(result.values[0, 1]) + np.testing.assert_array_equal(result.values, arr) + + @requires_gpu + def test_gpu_backend(self, inf_path): + path, arr = inf_path + result = open_geotiff(path, gpu=True) + host = result.data.get() + assert np.isposinf(host[0, 0]) + assert np.isneginf(host[0, 1]) + np.testing.assert_array_equal(host, arr) + + @requires_gpu + def test_dask_cupy_backend(self, inf_path): + path, arr = inf_path + result = open_geotiff(path, gpu=True, chunks=2) + host = result.compute().data.get() + assert np.isposinf(host[0, 0]) + assert np.isneginf(host[0, 1]) + np.testing.assert_array_equal(host, arr) + + +class TestNanSentinelDaskRead: + """Float raster with a finite ``nodata`` sentinel (``-9999.0``) is + masked to NaN consistently across backends on read. + + The integer-sentinel equivalent is pinned by issue #1597. The + float path has no such per-chunk dtype divergence (the input is + already float), but the dask graph still has to forward the + sentinel substitution. A regression in the float branch of + ``_delayed_read_window`` would silently break this. + """ + + @pytest.fixture + def nan_sentinel_path(self, tmp_path): + arr = np.arange(64, dtype=np.float32).reshape(8, 8) + arr[2:4, 2:4] = -9999.0 + arr[6, 0] = -9999.0 + p = tmp_path / "nan_sentinel_float.tif" + to_geotiff(arr, str(p), nodata=-9999.0) + return str(p), arr + + def test_eager_path_baseline(self, nan_sentinel_path): + """Baseline: eager path replaces the sentinel with NaN.""" + path, _ = nan_sentinel_path + result = open_geotiff(path) + assert np.isnan(result.values[2, 2]) + assert np.isnan(result.values[6, 0]) + assert result.values[0, 0] == 0.0 # non-sentinel survives + + def test_dask_numpy_matches_eager(self, nan_sentinel_path): + """dask compute reproduces the eager mask exactly.""" + path, _ = nan_sentinel_path + eager = open_geotiff(path) + dk = open_geotiff(path, chunks=4).compute() + np.testing.assert_array_equal(np.isnan(dk.values), np.isnan(eager.values)) + finite = ~np.isnan(eager.values) + np.testing.assert_array_equal(dk.values[finite], eager.values[finite]) + + def test_dask_numpy_chunks_smaller_than_sentinel_block(self, nan_sentinel_path): + """Sentinels split across two chunks still mask correctly. + + The 2x2 sentinel block at rows 2-3 cols 2-3 lands in a single + chunk for chunks=4 (rows 0-3) but straddles a chunk boundary + for chunks=2 (rows 2-3 split between chunks 1 and 2). This + exercises the per-block sentinel comparison. + """ + path, _ = nan_sentinel_path + dk = open_geotiff(path, chunks=2).compute() + assert np.isnan(dk.values[2, 2]) + assert np.isnan(dk.values[3, 3]) + assert np.isnan(dk.values[6, 0]) + + +# =========================================================================== +# Section 2 -- rasterio / Zarr round-trip parity (#1961) +# =========================================================================== +# +# Read the same GeoTIFF three ways and assert the results agree: +# 1. ``rasterio.open(path)`` -> numpy array, transform, CRS, nodata. +# 2. ``open_geotiff(path)`` -> ``xr.DataArray``. +# 3. Write the DataArray with ``.to_zarr``, reopen with ``xr.open_zarr``. +# Each input file covers a case that has drifted before. + rasterio = pytest.importorskip('rasterio', exc_type=ImportError) zarr = pytest.importorskip('zarr', exc_type=ImportError) from rasterio.transform import Affine, from_origin # noqa: E402 -from xrspatial.geotiff import open_geotiff # noqa: E402 from xrspatial.geotiff._crs import _resolve_crs_to_wkt # noqa: E402 -# --------------------------------------------------------------------------- -# helpers -# --------------------------------------------------------------------------- - ATOL_FLOAT32 = 0.0 # exact equality for float32 round trips ATOL_COORD = 1e-9 # coord tolerance (cells are O(1) - O(1e6)) RTOL_TRANSFORM = 1e-9 @@ -167,10 +511,6 @@ def _build_rasterio_coords(transform: Affine, height: int, width: int): return y, x -# --------------------------------------------------------------------------- -# parity checks shared across cases -# --------------------------------------------------------------------------- - def _parity_check_single_band( path, *, @@ -261,10 +601,6 @@ def _parity_check_single_band( f'{key!r} value drifted: {stored!r} vs {xrs.attrs[key]!r}') -# --------------------------------------------------------------------------- -# test cases -# --------------------------------------------------------------------------- - class TestSingleBandFloat32NodataSentinel: def test_round_trip(self, tmp_path): path = tmp_path / 'tmp_1961_float32_nodata.tif' diff --git a/xrspatial/geotiff/tests/parity/test_signature_contract.py b/xrspatial/geotiff/tests/parity/test_signature_contract.py new file mode 100644 index 00000000..4b5634ef --- /dev/null +++ b/xrspatial/geotiff/tests/parity/test_signature_contract.py @@ -0,0 +1,388 @@ +"""Cross-backend signature, docstring, and release-contract parity. + +Sibling to ``parity/test_backend_matrix.py``. These tests pin the +*advertised* contract of the public GeoTIFF surface so the writer trio and +the reader quartet stay consistent with each other and with the docs. +Three sections, each a former top-level file: + +Section 1 -- Writer signature / docstring parity (#1631) + ``write_vrt`` exposes its documented kwargs through an explicit + signature (no ``**kwargs`` catch-all), ``write_geotiff_gpu`` lists + ``'cubic'`` in its ``overview_resampling`` docstring, and its + ``data`` parameter carries the same type hint as ``to_geotiff``. + +Section 2 -- Read entry-point docstring/param parity (#2274) + Every signature kwarg on ``open_geotiff`` / ``read_geotiff_dask`` / + ``read_geotiff_gpu`` / ``read_vrt`` has a matching numpy-style + Parameters entry, and every Parameters entry maps to a real kwarg. + +Section 3 -- Release-contract tier parity (#2389) + ``docs/source/reference/geotiff_release_contract.md`` promises its + tier strings match ``SUPPORTED_FEATURES`` at runtime. This section + parses the contract table and asserts every key/tier pair agrees. + +GPU rows skip when cupy + CUDA are absent via the shared ``requires_gpu`` +marker from ``_helpers/markers.py``. +""" +from __future__ import annotations + +import inspect +import os +import re +from pathlib import Path + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import (SUPPORTED_FEATURES, open_geotiff, read_geotiff_dask, + read_geotiff_gpu, read_vrt, to_geotiff, write_geotiff_gpu, write_vrt) + +from .._helpers.markers import requires_gpu + +# =========================================================================== +# Section 1 -- Writer signature / docstring parity (#1631) +# =========================================================================== +# +# Three drifts flagged by the api-consistency sweep on 2026-05-11: +# ``write_vrt`` swallowed every kwarg into ``**kwargs`` so the documented +# ``relative`` / ``crs`` / ``nodata`` were invisible to ``inspect.signature``; +# ``write_geotiff_gpu``'s ``overview_resampling`` docstring omitted +# ``'cubic'``; and ``write_geotiff_gpu(data, ...)`` lacked the type hint +# ``to_geotiff(data, ...)`` carries. + + +def test_write_vrt_signature_exposes_documented_kwargs(): + """``inspect.signature(write_vrt)`` reports the four accepted kwargs. + + Prior to #1631 the public wrapper used ``**kwargs``, so + ``inspect.signature`` only saw ``vrt_path`` and ``source_files``. + Issue #1715 added ``crs`` for parity with ``to_geotiff`` / + ``write_geotiff_gpu`` while keeping the historic ``crs_wkt`` as a + deprecated alias (sentinel default so the deprecation shim can + tell "user passed nothing" from "user passed crs_wkt=None"). + """ + sig = inspect.signature(write_vrt) + params = sig.parameters + assert 'relative' in params + assert 'crs' in params # added in #1715 + assert 'crs_wkt' in params # deprecated alias + assert 'nodata' in params + assert params['relative'].default is True + # ``crs`` is the new canonical kwarg; default None means "pick from + # the first source", matching to_geotiff / write_geotiff_gpu. + assert params['crs'].default is None + # ``crs_wkt`` carries a sentinel default so the deprecation shim + # can distinguish "user passed nothing" (no warning) from "user + # passed crs_wkt=None" (deprecated-but-explicit, warn). The + # sentinel itself is private; check that it is NOT None so a + # future maintainer cannot accidentally drop the sentinel logic. + assert params['crs_wkt'].default is not None + assert params['crs_wkt'].default is not inspect.Parameter.empty + assert params['nodata'].default is None + # No catch-all VAR_KEYWORD + kinds = {p.kind for p in params.values()} + assert inspect.Parameter.VAR_KEYWORD not in kinds + + +def test_write_vrt_unknown_kwarg_rejected_at_public_level(tmp_path): + """A typo'd kwarg now raises ``TypeError`` from the public function + rather than from deep inside ``_vrt.write_vrt``. + """ + arr = np.zeros((8, 8), dtype=np.float32) + da = xr.DataArray( + arr, dims=['y', 'x'], + coords={'y': np.arange(8.0, 0, -1), 'x': np.arange(8.0)}, + attrs={'crs': 4326, 'transform': (1.0, 0, 0.0, 0, -1.0, 8.0)}, + ) + tif_path = str(tmp_path / 't.tif') + to_geotiff(da, tif_path) + + with pytest.raises(TypeError, match='typo_kwarg'): + write_vrt(str(tmp_path / 't.vrt'), [tif_path], typo_kwarg=1) + + +def test_write_vrt_accepts_documented_kwargs(tmp_path): + """Each documented kwarg round-trips through the explicit signature. + + Uses the new ``crs=None`` kwarg form (issue #1715). The deprecated + ``crs_wkt`` alias is exercised separately in + ``test_write_vrt_crs_1715.py``. + """ + arr = np.zeros((8, 8), dtype=np.float32) + da = xr.DataArray( + arr, dims=['y', 'x'], + coords={'y': np.arange(8.0, 0, -1), 'x': np.arange(8.0)}, + attrs={'crs': 4326, 'transform': (1.0, 0, 0.0, 0, -1.0, 8.0)}, + ) + tif_path = str(tmp_path / 't.tif') + to_geotiff(da, tif_path) + + vrt_path = str(tmp_path / 't.vrt') + out = write_vrt( + vrt_path, [tif_path], + relative=False, crs=None, nodata=-9999.0, + ) + assert out == vrt_path + assert os.path.exists(vrt_path) + + +def test_write_geotiff_gpu_docstring_lists_cubic(): + """``overview_resampling`` docstring includes ``'cubic'`` so it + matches ``to_geotiff`` and the underlying ``make_overview_gpu``. + """ + doc = write_geotiff_gpu.__doc__ + assert doc is not None + # Find the overview_resampling block + assert 'overview_resampling' in doc + # The block must mention cubic + block_start = doc.index('overview_resampling') + block_end = doc.index('bigtiff', block_start) + block = doc[block_start:block_end] + assert 'cubic' in block + + +def test_write_geotiff_gpu_data_has_type_hint(): + """``data`` parameter is annotated, matching ``to_geotiff(data, ...)``. + + The annotation also covers ``np.ndarray`` because the implementation + accepts numpy inputs (uploaded via ``cupy.asarray(np.asarray(data))``) + and the test suite exercises that path (e.g. + ``test_backend_kwarg_parity_1561.py`` passes a numpy ``dummy``). + """ + sig = inspect.signature(write_geotiff_gpu) + data_param = sig.parameters['data'] + assert data_param.annotation is not inspect.Parameter.empty + # The annotation is a forward reference under ``from __future__ import + # annotations``; just confirm it mentions the documented types. + ann_str = str(data_param.annotation) + assert 'DataArray' in ann_str + assert 'cupy' in ann_str + assert 'ndarray' in ann_str # numpy parity vs to_geotiff + + +@requires_gpu +def test_write_geotiff_gpu_cubic_overview_round_trip(tmp_path): + """``overview_resampling='cubic'`` works on the GPU writer. + + Sanity check that the docstring update is not advertising an + unsupported codec. ``make_overview_gpu`` falls back to the CPU + cubic implementation for parity with the CPU writer. + """ + import cupy + + arr_cpu = np.random.RandomState(0).rand(256, 256).astype(np.float32) + arr_gpu = cupy.asarray(arr_cpu) + da_gpu = xr.DataArray( + arr_gpu, dims=['y', 'x'], + coords={'y': np.arange(256.0, 0, -1), 'x': np.arange(256.0)}, + ) + path = str(tmp_path / 'cog.tif') + write_geotiff_gpu( + da_gpu, path, + cog=True, tile_size=64, overview_resampling='cubic', + ) + # Overview level 1 = 1/2 resolution + ov = open_geotiff(path, overview_level=1) + assert ov.shape == (128, 128) + + +# =========================================================================== +# Section 2 -- Read entry-point docstring / param parity (#2274) +# =========================================================================== +# +# The four read entry points accept ``allow_rotated`` and +# ``allow_unparseable_crs`` plus several gated kwargs whose only purpose is +# to raise ``ValueError`` on the wrong backend so all four readers stay +# error-symmetric. Those kwargs were missing Parameters-section entries on +# the backends that reject them. + +READ_ENTRY_POINTS = ( + open_geotiff, + read_geotiff_dask, + read_geotiff_gpu, + read_vrt, +) + + +# Numpy-style docstring parameter heading pattern. Matches lines like +# `` name : type`` after ``inspect.getdoc`` has normalised the +# leading indentation to column zero. +_PARAM_HEADING = re.compile(r"^(\w+) : ", flags=re.MULTILINE) + + +def _signature_params(fn): + return set(inspect.signature(fn).parameters) + + +def _documented_params(fn): + doc = inspect.getdoc(fn) or "" + return set(_PARAM_HEADING.findall(doc)) + + +@pytest.mark.parametrize("fn", READ_ENTRY_POINTS, ids=lambda f: f.__name__) +def test_read_entry_point_kwargs_have_docstring_entries(fn): + """Every signature kwarg appears in the Parameters section.""" + params = _signature_params(fn) + documented = _documented_params(fn) + missing = sorted(params - documented) + assert missing == [], ( + f"{fn.__name__} has kwargs without Parameters-section entries: " + f"{missing}. Add a numpy-style ``name : type`` heading for each " + f"so the docstring agrees with the signature. The kwargs may be " + f"gated (raise ValueError on the wrong backend) but they are " + f"still on the public surface, and tools that read the " + f"docstring (Sphinx, IDE help) cannot tell the kwarg exists " + f"without an entry. See #2274." + ) + + +@pytest.mark.parametrize("fn", READ_ENTRY_POINTS, ids=lambda f: f.__name__) +def test_read_entry_point_docstring_does_not_invent_params(fn): + """Every Parameters entry maps to a real signature kwarg. + + Catches the inverse drift: a kwarg removed from the signature but + still listed in the Parameters section. + """ + params = _signature_params(fn) + documented = _documented_params(fn) + extra = sorted(documented - params) + assert extra == [], ( + f"{fn.__name__} has Parameters-section entries that do not " + f"appear in the signature: {extra}. Either remove the entry " + f"or restore the kwarg." + ) + + +@pytest.mark.parametrize("fn", READ_ENTRY_POINTS, ids=lambda f: f.__name__) +def test_allow_rotated_documented(fn): + """``allow_rotated`` was the load-bearing #2274 gap on the backends. + + Pin it explicitly so a future commit that strips the Parameters + entry while keeping the signature kwarg fails loudly. + """ + assert "allow_rotated" in _signature_params(fn), ( + f"{fn.__name__} unexpectedly dropped allow_rotated from its " + f"signature" + ) + assert "allow_rotated" in _documented_params(fn), ( + f"{fn.__name__} accepts allow_rotated but does not document it " + f"in its Parameters section (#2274)." + ) + + +@pytest.mark.parametrize("fn", READ_ENTRY_POINTS, ids=lambda f: f.__name__) +def test_allow_unparseable_crs_documented(fn): + """``allow_unparseable_crs`` was the other shared #2274 gap. + + ``open_geotiff`` had the kwarg only in the Tier prose paragraph; + the three backends did not mention it at all. + """ + assert "allow_unparseable_crs" in _signature_params(fn), ( + f"{fn.__name__} unexpectedly dropped allow_unparseable_crs from " + f"its signature" + ) + assert "allow_unparseable_crs" in _documented_params(fn), ( + f"{fn.__name__} accepts allow_unparseable_crs but does not " + f"document it in its Parameters section (#2274)." + ) + + +# =========================================================================== +# Section 3 -- Release-contract tier parity (#2389) +# =========================================================================== +# +# ``docs/source/reference/geotiff_release_contract.md`` lists every public +# GeoTIFF feature with its tier and promises the tier strings match +# ``SUPPORTED_FEATURES`` at runtime. Nothing in CI checked that claim before +# this gate, so the contract drifted twice in two releases (#2381, #2389). + +_HERE = Path(__file__).resolve() +# tests/parity/test_signature_contract.py -> parents: parity, tests, +# geotiff, xrspatial, REPO_ROOT. Matches the depth that +# release_gates/test_stable_features.py uses (parents[4]). +_REPO_ROOT = _HERE.parents[4] +_CONTRACT = ( + _REPO_ROOT / "docs" / "source" / "reference" / "geotiff_release_contract.md" +) + +# Match table rows of the form: +# | `codec.none` | stable | Uncompressed... | +# The key column is always in backticks; the tier column is the bare +# tier label that should appear verbatim in SUPPORTED_FEATURES. +_ROW_RE = re.compile( + r"^\|\s*`([a-z_]+\.[a-z0-9_]+)`\s*\|\s*([a-z_]+)\s*\|", + re.MULTILINE, +) + + +def _contract_rows() -> list[tuple[str, tuple[str, str]]]: + """Return ``(line_number_hint, (key, tier))`` for every table row. + + The line-number hint is the 1-based offset of the match inside the + file so assertion failures can point a maintainer at the exact row. + """ + text = _CONTRACT.read_text(encoding="utf-8") + rows: list[tuple[str, tuple[str, str]]] = [] + for match in _ROW_RE.finditer(text): + line_no = text.count("\n", 0, match.start()) + 1 + rows.append((f"{_CONTRACT.name}:{line_no}", (match.group(1), match.group(2)))) + return rows + + +def test_contract_table_parses_into_rows() -> None: + """The regex catches the table rows. If a future doc rewrite breaks + the row shape, fail loudly here instead of silently passing the + tier check on zero rows. + """ + rows = _contract_rows() + assert rows, ( + f"no contract rows parsed from {_CONTRACT}; the markdown table " + "shape may have changed and this test's regex needs to follow." + ) + # Sanity floor: the contract today lists roughly 28 keys. Use a + # conservative lower bound so a sweeping accidental table truncation + # fails the gate. The exact count is not pinned; tiers move. + assert len(rows) >= 20, ( + f"only {len(rows)} contract rows parsed; the table may have been " + "truncated or the row format changed." + ) + + +def test_contract_keys_are_real_supported_features() -> None: + """Every key in the contract table exists in ``SUPPORTED_FEATURES``. + A stray row left behind after a key is removed from ``_attrs.py`` + fails here. + """ + bad: list[tuple[str, str]] = [] + for where, (key, _tier) in _contract_rows(): + if key not in SUPPORTED_FEATURES: + bad.append((where, key)) + assert not bad, ( + "contract table lists keys that are not in SUPPORTED_FEATURES; " + "either the key was removed from _attrs.py and the doc row was " + "left behind, or the row's backticked text is wrong: " + f"{bad}" + ) + + +def test_contract_tiers_match_supported_features() -> None: + """Every row's tier column matches ``SUPPORTED_FEATURES[key]``. + This is the gate that would have caught the #2381 / #2389 drift. + """ + mismatches: list[tuple[str, str, str, str]] = [] + for where, (key, tier) in _contract_rows(): + if key not in SUPPORTED_FEATURES: + # Reported by ``test_contract_keys_are_real_supported_features``; + # skip here to keep this failure focused on tier drift. + continue + expected = SUPPORTED_FEATURES[key] + if tier != expected: + mismatches.append((where, key, tier, expected)) + assert not mismatches, ( + "contract page tier strings disagree with SUPPORTED_FEATURES; " + "the contract page promises the two match verbatim. Update the " + "tier column in geotiff_release_contract.md to the runtime tier " + "(format: (where, key, doc_tier, runtime_tier)): " + f"{mismatches}" + ) diff --git a/xrspatial/geotiff/tests/test_degenerate_shapes_backends_2026_05_11.py b/xrspatial/geotiff/tests/test_degenerate_shapes_backends_2026_05_11.py deleted file mode 100644 index e78cb389..00000000 --- a/xrspatial/geotiff/tests/test_degenerate_shapes_backends_2026_05_11.py +++ /dev/null @@ -1,397 +0,0 @@ -"""Backend coverage for degenerate raster shapes and special float values. - -The eager numpy path (``test_edge_cases.py``) covers 1x1, 1xN, and Nx1 -rasters end-to-end, plus all-NaN, all-Inf, and -Inf inputs. The GPU -(``open_geotiff(gpu=True)``), dask+numpy (``open_geotiff(chunks=N)``), -and dask+cupy (``open_geotiff(gpu=True, chunks=N)``) backends had no -matching coverage. A regression that broke any backend for a 1-pixel -row, 1-column strip, or all-NaN input would not surface until a user -hit the production-traffic mosaic that triggered it. - -Test coverage gap sweep 2026-05-11 (pass 5) closes the Cat 3 (geometric -edge case) and Cat 2 (NaN / Inf) gaps for the non-eager backends: - -* 1x1, 1xN, Nx1 reads on every backend (Cat 3 HIGH). -* 1x1, 1xN, and Nx1 writes through ``write_geotiff_gpu`` (Cat 3 HIGH - for the GPU writer's degenerate-shape path). -* All-NaN read on GPU and dask backends (Cat 2 MEDIUM). -* Inf / -Inf read on GPU and dask backends (Cat 2 MEDIUM). -* Finite-sentinel-to-NaN masking on dask read path for a float raster - (Cat 2 MEDIUM; the eager path has it via - ``test_dask_int_nodata_chunks_1597`` for integer nodata but no float - equivalent). -""" -from __future__ import annotations - -import importlib.util - -import numpy as np -import pytest -import xarray as xr - -from xrspatial.geotiff import (open_geotiff, read_geotiff_dask, read_geotiff_gpu, to_geotiff, - write_geotiff_gpu) - -# --------------------------------------------------------------------------- -# GPU gating: matches the predicate the rest of the geotiff test suite uses. -# --------------------------------------------------------------------------- - - -def _gpu_available() -> bool: - if importlib.util.find_spec("cupy") is None: - return False - try: - import cupy - return bool(cupy.cuda.is_available()) - except Exception: - return False - - -_HAS_GPU = _gpu_available() -_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required") - - -# =========================================================================== -# Cat 3: 1x1 single-pixel read across backends -# =========================================================================== - -class TestSinglePixelRead: - """1x1 rasters round-trip through every read backend. - - The eager numpy path covers this in ``test_edge_cases.py``. A 1x1 - raster has degenerate stride and tile geometry; the dask path's - chunk-tile alignment and the GPU path's grid sizing both have - code that assumes >1 pixel without crashing -- but a future - refactor could regress either. - """ - - @pytest.fixture - def single_pixel_path(self, tmp_path): - arr = np.array([[42.0]], dtype=np.float32) - p = tmp_path / "single_pixel.tif" - to_geotiff(arr, str(p)) - return str(p), arr - - def test_dask_numpy_backend(self, single_pixel_path): - path, arr = single_pixel_path - # chunks larger than the raster is the documented behaviour - # (dask collapses to a single chunk that matches the data). - result = open_geotiff(path, chunks=64) - assert result.shape == (1, 1) - computed = result.compute() - np.testing.assert_array_equal(computed.values, arr) - - def test_read_geotiff_dask_direct(self, single_pixel_path): - """The explicit ``read_geotiff_dask`` entry point matches dispatch.""" - path, arr = single_pixel_path - result = read_geotiff_dask(path, chunks=8) - assert result.shape == (1, 1) - np.testing.assert_array_equal(result.compute().values, arr) - - @_gpu_only - def test_gpu_backend(self, single_pixel_path): - path, arr = single_pixel_path - result = open_geotiff(path, gpu=True) - assert result.shape == (1, 1) - np.testing.assert_array_equal(result.data.get(), arr) - - @_gpu_only - def test_read_geotiff_gpu_direct(self, single_pixel_path): - """The explicit ``read_geotiff_gpu`` entry point matches dispatch.""" - path, arr = single_pixel_path - result = read_geotiff_gpu(path) - assert result.shape == (1, 1) - np.testing.assert_array_equal(result.data.get(), arr) - - @_gpu_only - def test_dask_cupy_backend(self, single_pixel_path): - """dask+cupy must also handle a 1x1 raster. - - The dask graph here has exactly one block (chunks larger than - the raster) and that block carries a cupy buffer. - """ - import cupy - path, arr = single_pixel_path - result = open_geotiff(path, gpu=True, chunks=64) - assert result.shape == (1, 1) - computed = result.compute() - assert isinstance(computed.data, cupy.ndarray) - np.testing.assert_array_equal(computed.data.get(), arr) - - -# =========================================================================== -# Cat 3: 1xN single-row read across backends -# =========================================================================== - -class TestSingleRowRead: - """1xN rasters round-trip through every read backend. - - Single-row tiles trigger the strip-fallback path in the GPU decoder - when there is no tiled layout, and a 1-row chunk in the dask graph. - """ - - @pytest.fixture - def single_row_path(self, tmp_path): - arr = np.arange(10, dtype=np.float32).reshape(1, 10) - p = tmp_path / "single_row.tif" - to_geotiff(arr, str(p)) - return str(p), arr - - def test_dask_numpy_backend(self, single_row_path): - path, arr = single_row_path - result = open_geotiff(path, chunks=4) - assert result.shape == (1, 10) - np.testing.assert_array_equal(result.compute().values, arr) - - @_gpu_only - def test_gpu_backend(self, single_row_path): - path, arr = single_row_path - result = open_geotiff(path, gpu=True) - assert result.shape == (1, 10) - np.testing.assert_array_equal(result.data.get(), arr) - - @_gpu_only - def test_dask_cupy_backend(self, single_row_path): - import cupy - path, arr = single_row_path - result = open_geotiff(path, gpu=True, chunks=4) - assert result.shape == (1, 10) - computed = result.compute() - assert isinstance(computed.data, cupy.ndarray) - np.testing.assert_array_equal(computed.data.get(), arr) - - -# =========================================================================== -# Cat 3: Nx1 single-column read across backends -# =========================================================================== - -class TestSingleColumnRead: - """Nx1 rasters round-trip through every read backend. - - Single-column tiles are the mirror case of single-row, and exercise - the row-major iteration order in the dask block-builder and the - GPU's window-band slice path. - """ - - @pytest.fixture - def single_column_path(self, tmp_path): - arr = np.arange(10, dtype=np.float32).reshape(10, 1) - p = tmp_path / "single_column.tif" - to_geotiff(arr, str(p)) - return str(p), arr - - def test_dask_numpy_backend(self, single_column_path): - path, arr = single_column_path - result = open_geotiff(path, chunks=4) - assert result.shape == (10, 1) - np.testing.assert_array_equal(result.compute().values, arr) - - @_gpu_only - def test_gpu_backend(self, single_column_path): - path, arr = single_column_path - result = open_geotiff(path, gpu=True) - assert result.shape == (10, 1) - np.testing.assert_array_equal(result.data.get(), arr) - - @_gpu_only - def test_dask_cupy_backend(self, single_column_path): - import cupy - path, arr = single_column_path - result = open_geotiff(path, gpu=True, chunks=4) - assert result.shape == (10, 1) - computed = result.compute() - assert isinstance(computed.data, cupy.ndarray) - np.testing.assert_array_equal(computed.data.get(), arr) - - -# =========================================================================== -# Cat 3: 1x1, 1xN, and Nx1 writes through write_geotiff_gpu -# =========================================================================== - -@_gpu_only -class TestGpuWriterDegenerateShapes: - """``write_geotiff_gpu`` must accept 1-pixel, 1-row, and 1-column inputs. - - The GPU writer's tile-encoding path uses an internal grid sizing - helper that fell back to host code for shapes smaller than the - default tile. The fallback exists but had no regression test that - would catch a future "fast-path only" refactor. - """ - - def test_single_pixel_round_trip(self, tmp_path): - import cupy - arr = cupy.array([[42.0]], dtype=cupy.float32) - da_gpu = xr.DataArray(arr, dims=["y", "x"]) - p = str(tmp_path / "gpu_1x1.tif") - write_geotiff_gpu(da_gpu, p) - - result = open_geotiff(p) - assert result.shape == (1, 1) - assert result.values[0, 0] == 42.0 - - def test_single_row_round_trip(self, tmp_path): - import cupy - arr_np = np.arange(10, dtype=np.float32).reshape(1, 10) - arr = cupy.asarray(arr_np) - da_gpu = xr.DataArray(arr, dims=["y", "x"]) - p = str(tmp_path / "gpu_1xN.tif") - write_geotiff_gpu(da_gpu, p) - - result = open_geotiff(p) - assert result.shape == (1, 10) - np.testing.assert_array_equal(result.values, arr_np) - - def test_single_column_round_trip(self, tmp_path): - import cupy - arr_np = np.arange(10, dtype=np.float32).reshape(10, 1) - arr = cupy.asarray(arr_np) - da_gpu = xr.DataArray(arr, dims=["y", "x"]) - p = str(tmp_path / "gpu_Nx1.tif") - write_geotiff_gpu(da_gpu, p) - - result = open_geotiff(p) - assert result.shape == (10, 1) - np.testing.assert_array_equal(result.values, arr_np) - - -# =========================================================================== -# Cat 2: all-NaN / Inf reads on GPU and dask backends -# =========================================================================== - -class TestAllNanRead: - """All-NaN raster (boundary of the algorithm) reads cleanly on every - backend. - - The eager path covers this in ``test_edge_cases.TestWriteSpecialValues``. - Without a matching GPU/dask test, a regression in the GPU nodata - masker or dask graph builder would only surface in production. - """ - - @pytest.fixture - def all_nan_path(self, tmp_path): - arr = np.full((8, 8), np.nan, dtype=np.float32) - p = tmp_path / "all_nan.tif" - to_geotiff(arr, str(p), nodata=float("nan")) - return str(p), arr - - def test_dask_numpy_backend(self, all_nan_path): - path, _ = all_nan_path - result = open_geotiff(path, chunks=4) - computed = result.compute() - assert np.all(np.isnan(computed.values)) - - @_gpu_only - def test_gpu_backend(self, all_nan_path): - path, _ = all_nan_path - result = open_geotiff(path, gpu=True) - assert np.all(np.isnan(result.data.get())) - - @_gpu_only - def test_dask_cupy_backend(self, all_nan_path): - path, _ = all_nan_path - result = open_geotiff(path, gpu=True, chunks=4) - computed = result.compute() - assert np.all(np.isnan(computed.data.get())) - - -class TestInfRead: - """+Inf and -Inf are valid float values in TIFF; they must survive - every read backend without being masked or clipped. - - The eager path's ``test_edge_cases.TestWriteSpecialValues::test_nan_and_inf`` - is a write-then-CPU-read test. The GPU and dask backends were - unexercised on Inf input. - """ - - @pytest.fixture - def inf_path(self, tmp_path): - arr = np.array( - [ - [np.inf, -np.inf, 1.0, 2.0], - [3.0, np.inf, -np.inf, 4.0], - [-np.inf, 5.0, 6.0, np.inf], - [7.0, 8.0, np.inf, 9.0], - ], - dtype=np.float32, - ) - p = tmp_path / "inf.tif" - # Do not set nodata: we want Inf to survive, not be remapped. - to_geotiff(arr, str(p)) - return str(p), arr - - def test_dask_numpy_backend(self, inf_path): - path, arr = inf_path - result = open_geotiff(path, chunks=2).compute() - assert np.isposinf(result.values[0, 0]) - assert np.isneginf(result.values[0, 1]) - np.testing.assert_array_equal(result.values, arr) - - @_gpu_only - def test_gpu_backend(self, inf_path): - path, arr = inf_path - result = open_geotiff(path, gpu=True) - host = result.data.get() - assert np.isposinf(host[0, 0]) - assert np.isneginf(host[0, 1]) - np.testing.assert_array_equal(host, arr) - - @_gpu_only - def test_dask_cupy_backend(self, inf_path): - path, arr = inf_path - result = open_geotiff(path, gpu=True, chunks=2) - host = result.compute().data.get() - assert np.isposinf(host[0, 0]) - assert np.isneginf(host[0, 1]) - np.testing.assert_array_equal(host, arr) - - -class TestNanSentinelDaskRead: - """Float raster with a finite ``nodata`` sentinel (``-9999.0``) is - masked to NaN consistently across backends on read. - - The integer-sentinel equivalent is pinned by issue #1597. The - float path has no such per-chunk dtype divergence (the input is - already float), but the dask graph still has to forward the - sentinel substitution. A regression in the float branch of - ``_delayed_read_window`` would silently break this. - """ - - @pytest.fixture - def nan_sentinel_path(self, tmp_path): - arr = np.arange(64, dtype=np.float32).reshape(8, 8) - arr[2:4, 2:4] = -9999.0 - arr[6, 0] = -9999.0 - p = tmp_path / "nan_sentinel_float.tif" - to_geotiff(arr, str(p), nodata=-9999.0) - return str(p), arr - - def test_eager_path_baseline(self, nan_sentinel_path): - """Baseline: eager path replaces the sentinel with NaN.""" - path, _ = nan_sentinel_path - result = open_geotiff(path) - assert np.isnan(result.values[2, 2]) - assert np.isnan(result.values[6, 0]) - assert result.values[0, 0] == 0.0 # non-sentinel survives - - def test_dask_numpy_matches_eager(self, nan_sentinel_path): - """dask compute reproduces the eager mask exactly.""" - path, _ = nan_sentinel_path - eager = open_geotiff(path) - dk = open_geotiff(path, chunks=4).compute() - np.testing.assert_array_equal(np.isnan(dk.values), np.isnan(eager.values)) - finite = ~np.isnan(eager.values) - np.testing.assert_array_equal(dk.values[finite], eager.values[finite]) - - def test_dask_numpy_chunks_smaller_than_sentinel_block(self, nan_sentinel_path): - """Sentinels split across two chunks still mask correctly. - - The 2x2 sentinel block at rows 2-3 cols 2-3 lands in a single - chunk for chunks=4 (rows 0-3) but straddles a chunk boundary - for chunks=2 (rows 2-3 split between chunks 1 and 2). This - exercises the per-block sentinel comparison. - """ - path, _ = nan_sentinel_path - dk = open_geotiff(path, chunks=2).compute() - assert np.isnan(dk.values[2, 2]) - assert np.isnan(dk.values[3, 3]) - assert np.isnan(dk.values[6, 0]) diff --git a/xrspatial/geotiff/tests/test_dispatch_validation_parity_2162.py b/xrspatial/geotiff/tests/test_dispatch_validation_parity_2162.py deleted file mode 100644 index 01560489..00000000 --- a/xrspatial/geotiff/tests/test_dispatch_validation_parity_2162.py +++ /dev/null @@ -1,562 +0,0 @@ -"""Dispatcher kwarg parity across GeoTIFF read entry points (issue #2175). - -``open_geotiff`` used to validate ``overview_level``, -``on_gpu_failure``, ``missing_sources``, ``band_nodata``, -``max_cloud_bytes``, and the file-like source restrictions inline at -the top of its body. The three direct backends -- ``read_geotiff_dask``, -``read_geotiff_gpu``, and ``read_vrt`` -- each ran their own -``_validate_overview_level_arg`` call but skipped the rest. A caller -who passed an invalid ``band_nodata`` to dask or ``max_cloud_bytes`` -to the GPU reader got no error at all, or got an unrelated -``TypeError`` from the signature. - -This module pins parity. ``_validate_dispatch_kwargs`` lives in -``xrspatial/geotiff/_validation.py`` and is called from the top of -every public read entry point. The matrix below walks each kwarg -through every entry point and asserts that the exception type and -message match. - -See issue #2175 (parent #2162). -""" -from __future__ import annotations - -import importlib.util -import io - -import numpy as np -import pytest -import xarray as xr - -from xrspatial.geotiff import (open_geotiff, read_geotiff_dask, read_geotiff_gpu, read_vrt, - to_geotiff, write_vrt) - -# -------------------------------------------------------------------------- -# Skip helpers + small fixtures -# -------------------------------------------------------------------------- - - -def _gpu_available() -> bool: - """Return True iff cupy imports cleanly AND a CUDA device is up.""" - if importlib.util.find_spec("cupy") is None: - return False - try: - import cupy - return bool(cupy.cuda.is_available()) - except Exception: - return False - - -_HAS_GPU = _gpu_available() -_gpu_only = pytest.mark.skipif( - not _HAS_GPU, reason="cupy + CUDA required", -) - - -def _build_local_tif(tmp_path, name='src_2175.tif'): - """Write a small valid GeoTIFF used as the dispatcher's source.""" - arr = np.arange(8 * 8, dtype=np.float32).reshape(8, 8) - da = xr.DataArray( - arr, - dims=['y', 'x'], - coords={'y': np.arange(8.0, 0, -1), 'x': np.arange(8.0)}, - attrs={ - 'crs': 4326, - 'transform': (1.0, 0, 0.0, 0, -1.0, 8.0), - }, - ) - path = str(tmp_path / name) - to_geotiff(da, path) - return path - - -def _build_vrt(tmp_path): - """Build a 1-source VRT mosaic referencing a small local GeoTIFF.""" - src = _build_local_tif(tmp_path, name='vrt_src_2175.tif') - vrt = str(tmp_path / 'mosaic_2175.vrt') - write_vrt(vrt, [src]) - return vrt, src - - -# -------------------------------------------------------------------------- -# overview_level type rejection through every entry point -# -------------------------------------------------------------------------- -# -# ``_validate_overview_level_arg`` rejects bool / str / float with a -# ``TypeError`` whose message names the offending type. The helper runs -# this check first across all four entry points (issue #2074, #2160). -# The cupy gate skips GPU cases when CUDA is unavailable. The GPU and -# VRT entry points also reach the validator before any GPU / source -# parse, so the bad-input path raises on CPU-only hosts. - - -@pytest.mark.parametrize("value", [True, False]) -def test_open_geotiff_overview_level_bool(tmp_path, value): - path = _build_local_tif(tmp_path) - with pytest.raises(TypeError, match="bool"): - open_geotiff(path, overview_level=value) - - -def test_open_geotiff_overview_level_str(tmp_path): - path = _build_local_tif(tmp_path) - with pytest.raises(TypeError, match="str"): - open_geotiff(path, overview_level="0") - - -def test_open_geotiff_overview_level_float(tmp_path): - path = _build_local_tif(tmp_path) - with pytest.raises(TypeError, match="float"): - open_geotiff(path, overview_level=1.0) - - -@pytest.mark.parametrize("value", [True, False]) -def test_dask_overview_level_bool(tmp_path, value): - path = _build_local_tif(tmp_path) - with pytest.raises(TypeError, match="bool"): - read_geotiff_dask(path, overview_level=value) - - -def test_dask_overview_level_str(tmp_path): - path = _build_local_tif(tmp_path) - with pytest.raises(TypeError, match="str"): - read_geotiff_dask(path, overview_level="0") - - -def test_dask_overview_level_float(tmp_path): - path = _build_local_tif(tmp_path) - with pytest.raises(TypeError, match="float"): - read_geotiff_dask(path, overview_level=1.0) - - -@pytest.mark.parametrize("value", [True, False]) -def test_gpu_overview_level_bool(tmp_path, value): - path = _build_local_tif(tmp_path) - with pytest.raises(TypeError, match="bool"): - read_geotiff_gpu(path, overview_level=value) - - -def test_gpu_overview_level_str(tmp_path): - path = _build_local_tif(tmp_path) - with pytest.raises(TypeError, match="str"): - read_geotiff_gpu(path, overview_level="0") - - -def test_gpu_overview_level_float(tmp_path): - path = _build_local_tif(tmp_path) - with pytest.raises(TypeError, match="float"): - read_geotiff_gpu(path, overview_level=1.0) - - -@pytest.mark.parametrize("value", [True, False]) -def test_vrt_overview_level_bool(tmp_path, value): - vrt, _src = _build_vrt(tmp_path) - with pytest.raises(TypeError, match="bool"): - read_vrt(vrt, overview_level=value) - - -def test_vrt_overview_level_str(tmp_path): - vrt, _src = _build_vrt(tmp_path) - with pytest.raises(TypeError, match="str"): - read_vrt(vrt, overview_level="0") - - -def test_vrt_overview_level_float(tmp_path): - vrt, _src = _build_vrt(tmp_path) - with pytest.raises(TypeError, match="float"): - read_vrt(vrt, overview_level=1.0) - - -# -------------------------------------------------------------------------- -# max_cloud_bytes incompatibility through every applicable backend -# -------------------------------------------------------------------------- -# -# Only the eager non-VRT non-GPU non-dask branch in ``open_geotiff`` -# consumes ``max_cloud_bytes``. The three direct backends never look -# at it and would silently drop the budget before #2175. Parity means -# each backend rejects an explicit value with a ``ValueError`` whose -# message names the kwarg. - - -def test_open_geotiff_dask_rejects_max_cloud_bytes(tmp_path): - path = _build_local_tif(tmp_path) - with pytest.raises(ValueError, match=r"max_cloud_bytes"): - open_geotiff(path, chunks=4, max_cloud_bytes=8) - - -def test_open_geotiff_gpu_rejects_max_cloud_bytes(tmp_path): - path = _build_local_tif(tmp_path) - with pytest.raises(ValueError, match=r"max_cloud_bytes"): - open_geotiff(path, gpu=True, max_cloud_bytes=8) - - -def test_open_geotiff_vrt_rejects_max_cloud_bytes(tmp_path): - vrt, _src = _build_vrt(tmp_path) - with pytest.raises(ValueError, match=r"max_cloud_bytes"): - open_geotiff(vrt, max_cloud_bytes=8) - - -def test_dask_rejects_max_cloud_bytes(tmp_path): - path = _build_local_tif(tmp_path) - with pytest.raises(ValueError, match=r"max_cloud_bytes"): - read_geotiff_dask(path, max_cloud_bytes=8) - - -def test_gpu_rejects_max_cloud_bytes(tmp_path): - path = _build_local_tif(tmp_path) - with pytest.raises(ValueError, match=r"max_cloud_bytes"): - read_geotiff_gpu(path, max_cloud_bytes=8) - - -def test_vrt_rejects_max_cloud_bytes(tmp_path): - vrt, _src = _build_vrt(tmp_path) - with pytest.raises(ValueError, match=r"max_cloud_bytes"): - read_vrt(vrt, max_cloud_bytes=8) - - -def test_explicit_none_max_cloud_bytes_rejected_on_dask_direct(tmp_path): - """``max_cloud_bytes=None`` is the documented "disable budget" value - on the eager path. On the dask path it has no consumer, so an - explicit ``None`` is still rejected -- the sentinel default is the - only way to pass through without setting an opinion. - """ - path = _build_local_tif(tmp_path) - with pytest.raises(ValueError, match=r"max_cloud_bytes"): - read_geotiff_dask(path, max_cloud_bytes=None) - - -def test_explicit_none_max_cloud_bytes_rejected_on_gpu_direct(tmp_path): - path = _build_local_tif(tmp_path) - with pytest.raises(ValueError, match=r"max_cloud_bytes"): - read_geotiff_gpu(path, max_cloud_bytes=None) - - -def test_explicit_none_max_cloud_bytes_rejected_on_vrt_direct(tmp_path): - vrt, _src = _build_vrt(tmp_path) - with pytest.raises(ValueError, match=r"max_cloud_bytes"): - read_vrt(vrt, max_cloud_bytes=None) - - -# -------------------------------------------------------------------------- -# missing_sources on non-VRT sources -# -------------------------------------------------------------------------- -# -# ``missing_sources`` controls the VRT mosaic loop. On a plain GeoTIFF -# the kwarg has no meaning. Every non-VRT entry point rejects an -# explicit value with a ``ValueError`` whose message names the kwarg. - - -def test_open_geotiff_rejects_missing_sources_on_tif(tmp_path): - path = _build_local_tif(tmp_path) - with pytest.raises(ValueError, match=r"missing_sources only applies"): - open_geotiff(path, missing_sources='raise') - - -def test_dask_rejects_missing_sources_on_tif(tmp_path): - path = _build_local_tif(tmp_path) - with pytest.raises(ValueError, match=r"missing_sources only applies"): - read_geotiff_dask(path, missing_sources='raise') - - -def test_gpu_rejects_missing_sources_on_tif(tmp_path): - path = _build_local_tif(tmp_path) - with pytest.raises(ValueError, match=r"missing_sources only applies"): - read_geotiff_gpu(path, missing_sources='raise') - - -# -------------------------------------------------------------------------- -# band_nodata on non-VRT sources -# -------------------------------------------------------------------------- -# -# ``band_nodata`` is the #1987 PR 5 opt-out for the VRT mixed-band -# metadata check. On a plain GeoTIFF the kwarg has no meaning -- each -# non-VRT entry point rejects an explicit value. - - -def test_open_geotiff_rejects_band_nodata_on_tif(tmp_path): - path = _build_local_tif(tmp_path) - with pytest.raises(ValueError, match=r"band_nodata only applies"): - open_geotiff(path, band_nodata='first') - - -def test_dask_rejects_band_nodata_on_tif(tmp_path): - path = _build_local_tif(tmp_path) - with pytest.raises(ValueError, match=r"band_nodata only applies"): - read_geotiff_dask(path, band_nodata='first') - - -def test_gpu_rejects_band_nodata_on_tif(tmp_path): - path = _build_local_tif(tmp_path) - with pytest.raises(ValueError, match=r"band_nodata only applies"): - read_geotiff_gpu(path, band_nodata='first') - - -# -------------------------------------------------------------------------- -# on_gpu_failure when GPU is disabled -# -------------------------------------------------------------------------- -# -# ``on_gpu_failure`` is the GPU pipeline's strict/auto fallback policy. -# The CPU and dask paths have no such concept, and ``read_vrt`` does -# not route through the GPU decoder. Each non-GPU entry point rejects -# an explicit value with a ``ValueError`` whose message names the kwarg. - - -def test_open_geotiff_rejects_on_gpu_failure_when_gpu_false(tmp_path): - path = _build_local_tif(tmp_path) - with pytest.raises(ValueError, match=r"on_gpu_failure only applies"): - open_geotiff(path, on_gpu_failure='strict') - - -def test_dask_rejects_on_gpu_failure(tmp_path): - path = _build_local_tif(tmp_path) - with pytest.raises(ValueError, match=r"on_gpu_failure only applies"): - read_geotiff_dask(path, on_gpu_failure='strict') - - -def test_vrt_rejects_on_gpu_failure(tmp_path): - vrt, _src = _build_vrt(tmp_path) - with pytest.raises(ValueError, match=r"on_gpu_failure only applies"): - read_vrt(vrt, on_gpu_failure='strict') - - -# -------------------------------------------------------------------------- -# File-like sources reject gpu=True / chunks=... -# -------------------------------------------------------------------------- -# -# The GPU and dask paths re-open the source by path from worker tasks, -# so a file-like buffer cannot survive the trip. ``open_geotiff`` and -# the direct backends share the rejection now via the helper. - - -def test_open_geotiff_rejects_file_like_with_chunks(tmp_path): - path = _build_local_tif(tmp_path) - with open(path, 'rb') as f: - buf = io.BytesIO(f.read()) - with pytest.raises( - ValueError, - match=r"chunks=\.\.\. \(dask\) is not supported for file-like"): - open_geotiff(buf, chunks=4) - - -def test_open_geotiff_rejects_file_like_with_gpu(tmp_path): - path = _build_local_tif(tmp_path) - with open(path, 'rb') as f: - buf = io.BytesIO(f.read()) - with pytest.raises( - ValueError, - match=r"gpu=True is not supported for file-like"): - open_geotiff(buf, gpu=True) - - -def test_dask_rejects_file_like(tmp_path): - path = _build_local_tif(tmp_path) - with open(path, 'rb') as f: - buf = io.BytesIO(f.read()) - with pytest.raises( - ValueError, - match=r"chunks=\.\.\. \(dask\) is not supported for file-like"): - read_geotiff_dask(buf) - - -def test_gpu_rejects_file_like(tmp_path): - path = _build_local_tif(tmp_path) - with open(path, 'rb') as f: - buf = io.BytesIO(f.read()) - with pytest.raises( - ValueError, - match=r"gpu=True is not supported for file-like"): - read_geotiff_gpu(buf) - - -# -------------------------------------------------------------------------- -# Path-object sources survive the helper's file-like guard -# -------------------------------------------------------------------------- -# -# ``pathlib.Path`` is an ``os.PathLike`` instance, not a string. Each -# entry point coerces ``Path`` to ``str`` via ``_coerce_path`` before the -# dispatch validator runs so the file-like guard -# (``not isinstance(source, str)``) does not misclassify a Path as a -# file-like buffer. Regression for the review feedback on the original -# #2175 PR (the GPU entry point was coercing AFTER the validator). - - -def test_open_geotiff_accepts_path_object(tmp_path): - from pathlib import Path - path = _build_local_tif(tmp_path) - out = open_geotiff(Path(path)) - assert out.shape == (8, 8) - - -def test_dask_accepts_path_object(tmp_path): - from pathlib import Path - path = _build_local_tif(tmp_path) - out = read_geotiff_dask(Path(path), chunks=4) - assert out.shape == (8, 8) - - -def test_vrt_accepts_path_object(tmp_path): - from pathlib import Path - vrt, _src = _build_vrt(tmp_path) - out = read_vrt(Path(vrt)) - assert out.shape == (8, 8) - - -@_gpu_only -def test_gpu_accepts_path_object(tmp_path): - from pathlib import Path - path = _build_local_tif(tmp_path) - out = read_geotiff_gpu(Path(path)) - assert out.shape == (8, 8) - - -def test_gpu_path_object_does_not_raise_file_like_error(tmp_path): - """Even on a CPU-only host the validator must accept a Path object. - - The dispatch validator runs before any cupy import, so the bad - behaviour on `main` (treating Path as file-like) raises before any - GPU code executes. With the fix the validator coerces Path to str - first and the error only surfaces (if at all) from the GPU stack. - """ - from pathlib import Path - path = _build_local_tif(tmp_path) - # Either the call succeeds (GPU available) or it fails for a real - # GPU reason. The one thing it must NOT raise is the file-like - # ValueError introduced by the validator misclassifying Path. - try: - read_geotiff_gpu(Path(path)) - except ValueError as e: - assert "file-like" not in str(e), ( - f"validator misclassified Path as file-like: {e}" - ) - except (ImportError, RuntimeError): - # ImportError: cupy not installed. - # RuntimeError: CUDA preflight failed. - # Both are unrelated to the Path-coercion regression. - pass - - -# -------------------------------------------------------------------------- -# Default sentinel pins (no regressions on the happy path) -# -------------------------------------------------------------------------- -# -# Every entry point must accept its sentinel defaults without raising. -# A regression on a default would break every call site that omits the -# kwarg. - - -def test_open_geotiff_defaults_round_trip(tmp_path): - path = _build_local_tif(tmp_path) - out = open_geotiff(path) - assert out.shape == (8, 8) - - -def test_dask_defaults_round_trip(tmp_path): - path = _build_local_tif(tmp_path) - out = read_geotiff_dask(path) - assert out.shape == (8, 8) - - -def test_vrt_defaults_round_trip(tmp_path): - vrt, _src = _build_vrt(tmp_path) - out = read_vrt(vrt) - assert out.shape == (8, 8) - - -# -------------------------------------------------------------------------- -# Cross-entry-point message parity. The same invalid input through -# different entry points should produce the same exception text so -# callers can match on a single regex regardless of which backend they -# hit. -# -------------------------------------------------------------------------- - - -def _get_error(callable_, *args, **kwargs): - """Invoke ``callable_`` and return the (type_name, message) of the - exception it raises. Asserting on the type and message separately - catches a regression where the exception type changes silently - while the message stays the same. - """ - try: - callable_(*args, **kwargs) - except Exception as e: - return type(e).__name__, str(e) - raise AssertionError("expected an exception, none raised") - - -def test_max_cloud_bytes_message_parity(tmp_path): - path = _build_local_tif(tmp_path) - vrt, _ = _build_vrt(tmp_path) - open_dask = _get_error(open_geotiff, path, chunks=4, max_cloud_bytes=8) - direct_dask = _get_error(read_geotiff_dask, path, max_cloud_bytes=8) - # Both raise ValueError with the same dask-incompatibility message. - assert open_dask[0] == "ValueError" - assert direct_dask[0] == "ValueError" - for _, msg in (open_dask, direct_dask): - assert "max_cloud_bytes" in msg - assert "dask" in msg - - open_gpu = _get_error(open_geotiff, path, gpu=True, max_cloud_bytes=8) - direct_gpu = _get_error(read_geotiff_gpu, path, max_cloud_bytes=8) - assert open_gpu[0] == "ValueError" - assert direct_gpu[0] == "ValueError" - for _, msg in (open_gpu, direct_gpu): - assert "max_cloud_bytes" in msg - assert "gpu" in msg.lower() - - open_vrt = _get_error(open_geotiff, vrt, max_cloud_bytes=8) - direct_vrt = _get_error(read_vrt, vrt, max_cloud_bytes=8) - assert open_vrt[0] == "ValueError" - assert direct_vrt[0] == "ValueError" - for _, msg in (open_vrt, direct_vrt): - assert "max_cloud_bytes" in msg - assert "vrt" in msg.lower() - - -def test_band_nodata_message_parity(tmp_path): - path = _build_local_tif(tmp_path) - results = [ - _get_error(open_geotiff, path, band_nodata='first'), - _get_error(read_geotiff_dask, path, band_nodata='first'), - _get_error(read_geotiff_gpu, path, band_nodata='first'), - ] - for kind, msg in results: - assert kind == "ValueError" - assert "band_nodata only applies" in msg - - -def test_missing_sources_message_parity(tmp_path): - path = _build_local_tif(tmp_path) - results = [ - _get_error(open_geotiff, path, missing_sources='raise'), - _get_error(read_geotiff_dask, path, missing_sources='raise'), - _get_error(read_geotiff_gpu, path, missing_sources='raise'), - ] - for kind, msg in results: - assert kind == "ValueError" - assert "missing_sources only applies" in msg - - -def test_on_gpu_failure_message_parity(tmp_path): - path = _build_local_tif(tmp_path) - vrt, _ = _build_vrt(tmp_path) - results = [ - _get_error(open_geotiff, path, on_gpu_failure='strict'), - _get_error(read_geotiff_dask, path, on_gpu_failure='strict'), - _get_error(read_vrt, vrt, on_gpu_failure='strict'), - ] - for kind, msg in results: - assert kind == "ValueError" - assert "on_gpu_failure only applies" in msg - - -def test_overview_level_message_parity(tmp_path): - path = _build_local_tif(tmp_path) - vrt, _ = _build_vrt(tmp_path) - results = [ - _get_error(open_geotiff, path, overview_level="bad"), - _get_error(read_geotiff_dask, path, overview_level="bad"), - _get_error(read_geotiff_gpu, path, overview_level="bad"), - _get_error(read_vrt, vrt, overview_level="bad"), - ] - for kind, msg in results: - assert kind == "TypeError" - assert "overview_level must be an int or None" in msg - assert "str" in msg diff --git a/xrspatial/geotiff/tests/test_eager_finalization_parity_2162.py b/xrspatial/geotiff/tests/test_eager_finalization_parity_2162.py deleted file mode 100644 index 5549bed6..00000000 --- a/xrspatial/geotiff/tests/test_eager_finalization_parity_2162.py +++ /dev/null @@ -1,407 +0,0 @@ -"""Cross-backend parity for the eager finalization pipeline (issue #2179). - -Wave 2 of #2162 routed the eager numpy path and the three eager GPU -paths in ``_backends/gpu.py`` through the shared -``_finalize_eager_read`` helper introduced in #2177. The four sites -previously inlined the same validate / populate-attrs / mask / cast / -``_set_nodata_attrs`` block; this file pins parity for the attrs the -helper now stamps on both backends so a future change in one branch -cannot silently diverge from the other. - -The matrix walks: - -* Float source with a sentinel value (mask promotes via NaN). -* Integer source with an in-range sentinel (mask promotes int -> float64). -* Integer source with an out-of-range sentinel (mask is a no-op). -* ``mask_nodata=False`` left-alone semantics. -* Source with no declared sentinel (helper short-circuits both - ``nodata`` and ``masked_nodata`` attrs). -* Explicit ``dtype=`` kwarg (records ``nodata_dtype_cast``). -* Windowed read (pins the slice-before-mask behaviour on the GPU - local-eager path so ``nodata_pixels_present`` reflects the - window, not the full IFD). -* MinIsWhite photometric (exercises the post-inversion sentinel - branch of the GPU local-eager ``mask_sentinel`` resolution). -* Stripped multi-band file (exercises the 3-D output branch on - both backends through the GPU CPU-fallback eager site). - -For each case the test reads the file via the eager numpy backend -(``open_geotiff(path)``) and the eager GPU backend -(``open_geotiff(path, gpu=True)``) and compares the four -``_finalize_eager_read``-stamped attrs across the two reads: -``nodata``, ``nodata_pixels_present``, ``nodata_dtype_cast``, and -``georef_status``. The masked-pixel locations are also compared so a -divergence in the mask step would surface here. -""" -from __future__ import annotations - -import importlib.util - -import numpy as np -import pytest - - -def _gpu_available() -> bool: - """True if cupy is importable and CUDA is initialised.""" - if importlib.util.find_spec("cupy") is None: - return False - try: - import cupy - return bool(cupy.cuda.is_available()) - except Exception: - return False - - -_HAS_GPU = _gpu_available() -_gpu_only = pytest.mark.skipif( - not _HAS_GPU, - reason="cupy + CUDA required", -) - - -def _write_with_nodata(arr, path, *, nodata=None): - """Helper: write a 2-D array to a tiled GeoTIFF with an optional sentinel.""" - from xrspatial.geotiff._writer import write - write(arr, path, nodata=nodata, compression='deflate', - tiled=True, tile_size=16) - - -def _read_both(path, **kwargs): - """Read the same file via the eager numpy and eager GPU backends. - - Returns ``(cpu_da, gpu_da)``. ``kwargs`` are forwarded to both - ``open_geotiff`` calls so each backend sees the same caller - contract. - """ - from xrspatial.geotiff import open_geotiff - cpu = open_geotiff(path, **kwargs) - gpu = open_geotiff(path, gpu=True, **kwargs) - return cpu, gpu - - -# Subset of attrs ``_finalize_eager_read`` is responsible for; mirrors -# the issue body's parity claim list. -_LIFECYCLE_ATTRS = ( - 'nodata', - 'nodata_pixels_present', - 'nodata_dtype_cast', - 'georef_status', -) - - -def _assert_lifecycle_attrs_match(cpu_da, gpu_da): - """Assert the four lifecycle attrs match across backends. - - ``masked_nodata`` is checked separately because the test suite - asserts on its boolean value when a sentinel is declared. - """ - for key in _LIFECYCLE_ATTRS: - cpu_v = cpu_da.attrs.get(key) - gpu_v = gpu_da.attrs.get(key) - assert cpu_v == gpu_v, ( - f"attrs[{key!r}] divergence: cpu={cpu_v!r} gpu={gpu_v!r}" - ) - - -# --------------------------------------------------------------------------- -# Float source with in-buffer sentinel -# --------------------------------------------------------------------------- - - -@_gpu_only -def test_float_sentinel_match_and_mask(tmp_path): - """Float source + sentinel: both backends mask in place, attrs match.""" - arr = np.array( - [[1.0, 2.0, -9999.0], [4.0, -9999.0, 6.0]], dtype=np.float32) - path = str(tmp_path / 'eager_parity_2179_float_sentinel.tif') - _write_with_nodata(arr, path, nodata=-9999.0) - - cpu, gpu = _read_both(path) - - # dtype + masked_nodata first: float source stays at its declared - # dtype on both backends; the mask substitutes NaN. - assert cpu.dtype == gpu.dtype - assert cpu.attrs.get('masked_nodata') is True - assert gpu.attrs.get('masked_nodata') is True - - # Lifecycle attrs proper. ``nodata_pixels_present`` must surface - # as a real bool on both backends (the issue body calls this out - # explicitly). - _assert_lifecycle_attrs_match(cpu, gpu) - assert isinstance(cpu.attrs.get('nodata_pixels_present'), bool) - assert isinstance(gpu.attrs.get('nodata_pixels_present'), bool) - assert cpu.attrs.get('nodata_pixels_present') is True - - # And the NaN locations agree pixel-for-pixel. - cpu_arr = cpu.values - gpu_arr = gpu.data.get() - np.testing.assert_array_equal(np.isnan(cpu_arr), np.isnan(gpu_arr)) - - -# --------------------------------------------------------------------------- -# Integer source with in-range sentinel -# --------------------------------------------------------------------------- - - -@_gpu_only -def test_int_in_range_sentinel_promotes_to_float(tmp_path): - """uint16 + 65535 sentinel: both backends promote to float64 with NaN.""" - arr = np.array([[1, 2, 3], [65535, 5, 6]], dtype=np.uint16) - path = str(tmp_path / 'eager_parity_2179_int_sentinel.tif') - _write_with_nodata(arr, path, nodata=65535) - - cpu, gpu = _read_both(path) - - # Integer promotion fires on both backends. - assert cpu.dtype == np.float64 - assert gpu.dtype == np.float64 - assert cpu.attrs.get('masked_nodata') is True - assert gpu.attrs.get('masked_nodata') is True - - _assert_lifecycle_attrs_match(cpu, gpu) - assert cpu.attrs.get('nodata_pixels_present') is True - - cpu_arr = cpu.values - gpu_arr = gpu.data.get() - np.testing.assert_array_equal(np.isnan(cpu_arr), np.isnan(gpu_arr)) - - -# --------------------------------------------------------------------------- -# Integer source with out-of-range sentinel -# --------------------------------------------------------------------------- - - -@_gpu_only -def test_int_out_of_range_sentinel_is_no_op(tmp_path): - """uint8 + 9999 sentinel: out-of-range, no promotion, presence=False.""" - arr = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8) - path = str(tmp_path / 'eager_parity_2179_int_oor.tif') - # 9999 cannot match any uint8 pixel. ``_writer.write`` accepts an - # int sentinel here without complaining (the writer only refuses - # bool / NaN values, not out-of-range ints), so we get a file with - # the literal nodata tag set to 9999 and no pixel matching it. - _write_with_nodata(arr, path, nodata=9999) - - cpu, gpu = _read_both(path) - - # No promotion when the sentinel is out of range. Both backends - # leave the uint8 buffer alone. - assert cpu.dtype == np.uint8 - assert gpu.dtype == np.uint8 - # ``masked_nodata`` is False because the mask did not run; the - # final dtype is still int. - assert cpu.attrs.get('masked_nodata') is False - assert gpu.attrs.get('masked_nodata') is False - - _assert_lifecycle_attrs_match(cpu, gpu) - assert cpu.attrs.get('nodata_pixels_present') is False - - -# --------------------------------------------------------------------------- -# mask_nodata=False -# --------------------------------------------------------------------------- - - -@_gpu_only -def test_mask_nodata_false_keeps_literal_sentinel(tmp_path): - """mask_nodata=False leaves the buffer untouched on both backends.""" - arr = np.array( - [[1.0, 2.0, -9999.0], [4.0, -9999.0, 6.0]], dtype=np.float32) - path = str(tmp_path / 'eager_parity_2179_mask_false.tif') - _write_with_nodata(arr, path, nodata=-9999.0) - - cpu, gpu = _read_both(path, mask_nodata=False) - - # No NaN substitution; the literal sentinel survives on both - # backends with ``masked_nodata=False``. - assert cpu.dtype == np.float32 - assert gpu.dtype == np.float32 - assert cpu.attrs.get('masked_nodata') is False - assert gpu.attrs.get('masked_nodata') is False - - _assert_lifecycle_attrs_match(cpu, gpu) - # The no-mask scan branch still surfaces presence. - assert cpu.attrs.get('nodata_pixels_present') is True - - cpu_arr = cpu.values - gpu_arr = gpu.data.get() - np.testing.assert_array_equal(cpu_arr, gpu_arr) - - -# --------------------------------------------------------------------------- -# No declared sentinel -# --------------------------------------------------------------------------- - - -@_gpu_only -def test_no_declared_sentinel_omits_nodata_attrs(tmp_path): - """Source without nodata declaration: no lifecycle attrs on either side.""" - arr = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8) - path = str(tmp_path / 'eager_parity_2179_no_sentinel.tif') - _write_with_nodata(arr, path, nodata=None) - - cpu, gpu = _read_both(path) - - assert cpu.dtype == np.uint8 - assert gpu.dtype == np.uint8 - - # The helper's ``_set_nodata_attrs`` early-returns when there is no - # declared sentinel, so neither ``nodata`` nor ``masked_nodata`` - # appear on either backend. - assert 'nodata' not in cpu.attrs - assert 'nodata' not in gpu.attrs - assert 'masked_nodata' not in cpu.attrs - assert 'masked_nodata' not in gpu.attrs - assert 'nodata_pixels_present' not in cpu.attrs - assert 'nodata_pixels_present' not in gpu.attrs - - # ``georef_status`` still rides on the helper regardless of nodata - # state, so the parity assertion exercises that branch too. - _assert_lifecycle_attrs_match(cpu, gpu) - - -# --------------------------------------------------------------------------- -# nodata_dtype_cast on explicit dtype= -# --------------------------------------------------------------------------- - - -@_gpu_only -def test_dtype_kwarg_records_post_mask_cast(tmp_path): - """Explicit dtype= records ``nodata_dtype_cast`` on both backends.""" - arr = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint16) - path = str(tmp_path / 'eager_parity_2179_dtype_cast.tif') - # Out-of-range sentinel keeps the mask a no-op so the cast attr is - # the only signal that the user asked for a dtype change; this - # isolates the ``nodata_dtype_cast`` branch from the mask-driven - # promotion exercised in ``test_int_in_range_sentinel_promotes_to_float``. - _write_with_nodata(arr, path, nodata=9999) - - cpu, gpu = _read_both(path, dtype=np.float32) - - assert cpu.dtype == np.float32 - assert gpu.dtype == np.float32 - assert cpu.attrs.get('nodata_dtype_cast') == 'float32' - assert gpu.attrs.get('nodata_dtype_cast') == 'float32' - - _assert_lifecycle_attrs_match(cpu, gpu) - - -# --------------------------------------------------------------------------- -# Windowed reads -# --------------------------------------------------------------------------- - - -@_gpu_only -def test_windowed_read_presence_matches_window_contents(tmp_path): - """Windowed read: nodata_pixels_present reflects the window, not the IFD. - - Pins the slice-before-mask behaviour the GPU local-eager path - picked up in #2179. Pre-PR the GPU path masked the full IFD then - sliced, so ``nodata_pixels_present`` reported sentinel presence - anywhere in the file; post-PR it reports presence within the - requested window. The CPU path has always behaved this way, so - the two now agree. - """ - # 4x4 raster with the sentinel only in the bottom half so the two - # windows below land on opposite sides of the presence bool. - arr = np.array( - [ - [1.0, 2.0, 3.0, 4.0], - [5.0, 6.0, 7.0, 8.0], - [9.0, 10.0, -9999.0, 12.0], - [13.0, 14.0, 15.0, 16.0], - ], - dtype=np.float32, - ) - path = str(tmp_path / 'eager_parity_2179_windowed.tif') - _write_with_nodata(arr, path, nodata=-9999.0) - - # Top-left 2x2 window: no sentinel in scope. - cpu, gpu = _read_both(path, window=(0, 0, 2, 2)) - _assert_lifecycle_attrs_match(cpu, gpu) - assert cpu.attrs.get('nodata_pixels_present') is False - assert gpu.attrs.get('nodata_pixels_present') is False - - # Bottom 2x4 window: covers the sentinel. - cpu, gpu = _read_both(path, window=(2, 0, 4, 4)) - _assert_lifecycle_attrs_match(cpu, gpu) - assert cpu.attrs.get('nodata_pixels_present') is True - assert gpu.attrs.get('nodata_pixels_present') is True - - -# --------------------------------------------------------------------------- -# MinIsWhite + nodata -# --------------------------------------------------------------------------- - - -@_gpu_only -def test_miniswhite_post_inversion_sentinel_parity(tmp_path): - """MinIsWhite raster: post-inversion sentinel resolves identically on both backends. - - Exercises the ``_mw_mask_nodata`` branch in the GPU local-eager - path. The reader inverts the buffer and the post-MinIsWhite - sentinel is what the helper's mask block compares against on the - GPU side; the eager numpy path takes the same sentinel off - ``geo_info._mask_nodata`` through ``read_to_array``. Both should - land on the same NaN positions and the same lifecycle attrs. - """ - import tifffile - - # uint8 + nodata=0; MinIsWhite inverts the stored value to 255 - # before masking, and 255 is the post-inversion sentinel. - stored = np.array([[0, 100, 200], [50, 0, 255]], dtype=np.uint8) - path = str(tmp_path / 'eager_parity_2179_miniswhite.tif') - extratags = [("GDAL_NODATA", "s", 0, "0\0", True)] - tifffile.imwrite( - path, stored, photometric="miniswhite", - extratags=extratags, tile=(16, 16), - ) - - cpu, gpu = _read_both(path) - - _assert_lifecycle_attrs_match(cpu, gpu) - cpu_arr = cpu.values - gpu_arr = gpu.data.get() - # NaN positions must agree pixel-for-pixel; the MinIsWhite - # sentinel resolution drives this. - np.testing.assert_array_equal(np.isnan(cpu_arr), np.isnan(gpu_arr)) - - -# --------------------------------------------------------------------------- -# Multi-band (3D) -# --------------------------------------------------------------------------- - - -@_gpu_only -def test_multiband_stripped_parity(tmp_path): - """3-band stripped read: helper builds (y, x, band) DataArray on both backends. - - The GPU CPU-fallback path lands on stripped files. Multi-band - output goes through the helper's ``arr.ndim == 3`` branch on - both backends; the parity assertion covers ``georef_status`` and - sentinel-related attrs for the multi-band shape so a future - change to the 3-D coord build cannot silently diverge. - """ - import xarray as xr - rng = np.random.RandomState(20260520) - data = rng.randint(0, 200, size=(32, 48, 3)).astype(np.uint8) - da_in = xr.DataArray(data, dims=['y', 'x', 'band']) - - path = str(tmp_path / 'eager_parity_2179_multiband.tif') - from xrspatial.geotiff import to_geotiff - - # Stripped (tiled=False) routes the GPU read through the - # CPU-fallback eager site, which is one of the three sites this - # PR migrated. - to_geotiff(da_in, path, tiled=False) - - cpu, gpu = _read_both(path) - - # Shape and dims line up across backends. - assert cpu.dims == gpu.dims - assert cpu.shape == gpu.shape == (32, 48, 3) - - _assert_lifecycle_attrs_match(cpu, gpu) - cpu_arr = cpu.values - gpu_arr = gpu.data.get() - np.testing.assert_array_equal(cpu_arr, gpu_arr) diff --git a/xrspatial/geotiff/tests/test_lazy_finalization_parity_2162.py b/xrspatial/geotiff/tests/test_lazy_finalization_parity_2162.py deleted file mode 100644 index fc51dfbd..00000000 --- a/xrspatial/geotiff/tests/test_lazy_finalization_parity_2162.py +++ /dev/null @@ -1,332 +0,0 @@ -"""Lazy-read finalization parity between the two dask backends (PR C of #2162). - -Wave 2 of issue #2162 migrates ``read_geotiff_dask`` (the CPU+dask -backend) and the dask branch of ``read_geotiff_gpu`` (the GPU+dask -backend) onto the shared :func:`_finalize_lazy_read_attrs` helper from -#2177. Both sites had ~25 lines of validate-then-populate-then-stamp -code that produced the same attrs surface; the helper centralises that -logic so a single bug fix lands in both backends at once. - -The tests in this module pin the lazy-attrs contract across the two -backends so a future change to the helper (or to one backend's call -site) cannot drift them apart without a visible failure. Each test -opens the same fixture through ``read_geotiff_dask`` and -``read_geotiff_gpu(chunks=...)`` and compares the attrs dicts. - -Pins per the issue body: - -* ``attrs['nodata_pixels_present']`` is absent on both backends (the - per-chunk reduction would force eager compute; #2135 contract). -* ``attrs['nodata_dtype_cast']`` matches when the caller forced a cast. -* ``attrs['georef_status']`` matches across the five reader states - (full, transform_only, crs_only, none, rotated_dropped). - -GPU tests skip when CUDA is unavailable using the project's standard -``cupy + CUDA`` gate. -""" -from __future__ import annotations - -import importlib.util - -import numpy as np -import pytest -import xarray as xr - -from xrspatial.geotiff import read_geotiff_dask, to_geotiff -from xrspatial.geotiff._attrs import (GEOREF_STATUS_CRS_ONLY, GEOREF_STATUS_FULL, - GEOREF_STATUS_NONE, GEOREF_STATUS_ROTATED_DROPPED, - GEOREF_STATUS_TRANSFORM_ONLY) -from xrspatial.geotiff._coords import _NO_GEOREF_KEY - -tifffile = pytest.importorskip("tifffile") - -# Rotated-TIFF writer relocated to ``read/test_crs.py`` by epic #2390 PR 3. -from xrspatial.geotiff.tests.read.test_crs import \ - _write_rotated_tiff # noqa: E402 - - -def _gpu_available() -> bool: - if importlib.util.find_spec("cupy") is None: - return False - try: - import cupy - return bool(cupy.cuda.is_available()) - except Exception: - return False - - -_HAS_GPU = _gpu_available() -_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required") - - -def _open_cpu_dask(path, **kwargs): - return read_geotiff_dask(path, chunks=2, **kwargs) - - -def _open_gpu_dask(path, **kwargs): - # Lazy import so the module loads under CPU-only sandboxes. - from xrspatial.geotiff import read_geotiff_gpu - return read_geotiff_gpu(path, chunks=2, **kwargs) - - -_BACKENDS = [ - pytest.param(_open_cpu_dask, id="dask+numpy"), - pytest.param(_open_gpu_dask, id="dask+cupy", marks=_gpu_only), -] - - -# --------------------------------------------------------------------------- -# Fixture builders, mirroring the per-state fixtures in test_georef_status_2136 -# --------------------------------------------------------------------------- - - -def _make_full_tiff(path): - """Float coords + CRS -> ``full``.""" - da = xr.DataArray( - np.zeros((4, 4), dtype=np.float32), - coords={ - 'y': np.array([200.0, 199.0, 198.0, 197.0]), - 'x': np.array([100.0, 101.0, 102.0, 103.0]), - }, - dims=('y', 'x'), - attrs={'crs': 4326}, - ) - to_geotiff(da, path) - - -def _make_transform_only_tiff(path): - """Float coords, no CRS -> ``transform_only``.""" - da = xr.DataArray( - np.zeros((4, 4), dtype=np.float32), - coords={ - 'y': np.array([200.0, 199.0, 198.0, 197.0]), - 'x': np.array([100.0, 101.0, 102.0, 103.0]), - }, - dims=('y', 'x'), - ) - to_geotiff(da, path) - - -def _make_crs_only_tiff(path): - """No-georef marker + CRS -> ``crs_only``.""" - da = xr.DataArray( - np.zeros((4, 4), dtype=np.float32), - coords={ - 'y': np.arange(4, dtype=np.int64), - 'x': np.arange(4, dtype=np.int64), - }, - dims=('y', 'x'), - attrs={_NO_GEOREF_KEY: True, 'crs': 4326}, - ) - to_geotiff(da, path) - - -def _make_none_tiff(path): - """Bare TIFF with no GeoTIFF tags at all -> ``none``.""" - arr = np.zeros((4, 4), dtype=np.float32) - tifffile.imwrite( - path, arr, photometric='minisblack', planarconfig='contig', - metadata=None, - ) - - -def _make_rotated_tiff(path): - """Rotated ``ModelTransformationTag`` (opened with ``allow_rotated``) - -> ``rotated_dropped``. The data is uint16 because the rotated-TIFF - writer in the #2115 test only emits integer pixels; that's fine for - a metadata pin.""" - arr = np.arange(16, dtype='float promotion - branch.""" - arr = np.arange(16, dtype=np.float32).reshape(4, 4) - arr[0, 0] = sentinel - da = xr.DataArray( - arr, - coords={ - 'y': np.array([200.0, 199.0, 198.0, 197.0]), - 'x': np.array([100.0, 101.0, 102.0, 103.0]), - }, - dims=('y', 'x'), - attrs={'crs': 4326, 'nodata': sentinel}, - ) - to_geotiff(da, path) - - -def _make_int_with_nodata_tiff(path, sentinel=30): - """Integer raster carrying a sentinel. Lets the dtype-cast tests - distinguish "graph dtype auto-promoted by masking" from - "caller asked for an explicit cast".""" - arr = np.array([[10, 20, 25], [30, 40, 50]], dtype=np.int16) - da = xr.DataArray( - arr, - coords={ - 'y': np.array([200.0, 199.0]), - 'x': np.array([100.0, 101.0, 102.0]), - }, - dims=('y', 'x'), - attrs={'crs': 4326, 'nodata': sentinel}, - ) - to_geotiff(da, path) - - -# --------------------------------------------------------------------------- -# Cross-backend parity tests -# --------------------------------------------------------------------------- - - -_GEOREF_FIXTURES = [ - pytest.param(_make_full_tiff, GEOREF_STATUS_FULL, False, - id="full"), - pytest.param(_make_transform_only_tiff, GEOREF_STATUS_TRANSFORM_ONLY, - False, id="transform_only"), - pytest.param(_make_crs_only_tiff, GEOREF_STATUS_CRS_ONLY, False, - id="crs_only"), - pytest.param(_make_none_tiff, GEOREF_STATUS_NONE, False, - id="none"), - pytest.param(_make_rotated_tiff, GEOREF_STATUS_ROTATED_DROPPED, - True, id="rotated_dropped"), -] - - -@pytest.mark.parametrize("fixture,expected_status,allow_rotated", - _GEOREF_FIXTURES) -def test_georef_status_parity(tmp_path, fixture, expected_status, - allow_rotated): - """Both dask backends emit the same ``georef_status`` for each - of the five reader states.""" - path = str(tmp_path / f"tmp_2178_status_{expected_status}.tif") - fixture(path) - - kwargs = {'allow_rotated': True} if allow_rotated else {} - cpu = _open_cpu_dask(path, **kwargs) - assert cpu.attrs.get('georef_status') == expected_status - - if _HAS_GPU: - gpu = _open_gpu_dask(path, **kwargs) - assert gpu.attrs.get('georef_status') == expected_status - assert cpu.attrs['georef_status'] == gpu.attrs['georef_status'] - - -@pytest.mark.parametrize("fixture,expected_status,allow_rotated", - _GEOREF_FIXTURES) -def test_attrs_dict_parity(tmp_path, fixture, expected_status, - allow_rotated): - """Both dask backends emit the same attrs dict for each fixture.""" - if not _HAS_GPU: - pytest.skip("dask+cupy parity requires CUDA") - path = str(tmp_path / f"tmp_2178_parity_{expected_status}.tif") - fixture(path) - - kwargs = {'allow_rotated': True} if allow_rotated else {} - cpu = _open_cpu_dask(path, **kwargs) - gpu = _open_gpu_dask(path, **kwargs) - - cpu_attrs = dict(cpu.attrs) - gpu_attrs = dict(gpu.attrs) - assert cpu_attrs == gpu_attrs, ( - f"attrs dicts diverged for fixture={expected_status}:\n" - f" cpu only: {set(cpu_attrs) - set(gpu_attrs)}\n" - f" gpu only: {set(gpu_attrs) - set(cpu_attrs)}\n" - f" shared keys with different values: " - f"{[k for k in set(cpu_attrs) & set(gpu_attrs) if cpu_attrs[k] != gpu_attrs[k]]}" - ) - - -@pytest.mark.parametrize("opener", _BACKENDS) -def test_nodata_pixels_present_absent_on_lazy(tmp_path, opener): - """Lazy contract from #2135: ``nodata_pixels_present`` stays unset - on both dask backends.""" - path = str(tmp_path / "tmp_2178_pixels_absent.tif") - _make_float_with_nodata_tiff(path) - out = opener(path) - assert 'nodata_pixels_present' not in out.attrs - - -def test_nodata_pixels_present_cross_backend(tmp_path): - """Both backends agree on the absence of ``nodata_pixels_present`` - when reading the same fixture.""" - if not _HAS_GPU: - pytest.skip("dask+cupy parity requires CUDA") - path = str(tmp_path / "tmp_2178_pixels_cross.tif") - _make_float_with_nodata_tiff(path) - cpu = _open_cpu_dask(path) - gpu = _open_gpu_dask(path) - assert 'nodata_pixels_present' not in cpu.attrs - assert 'nodata_pixels_present' not in gpu.attrs - - -@pytest.mark.parametrize("opener", _BACKENDS) -def test_dtype_cast_absent_without_caller_dtype(tmp_path, opener): - """No ``dtype=`` kwarg: ``nodata_dtype_cast`` stays unset, even - when masking auto-promotes the graph dtype to float64.""" - path = str(tmp_path / "tmp_2178_no_cast.tif") - _make_int_with_nodata_tiff(path) - out = opener(path) - # Masking promoted the int source to float64 on the graph dtype, - # but the caller did not ask for a cast. - assert out.dtype == np.float64 - assert out.attrs.get('masked_nodata') is True - assert 'nodata_dtype_cast' not in out.attrs - - -@pytest.mark.parametrize("opener", _BACKENDS) -def test_dtype_cast_records_target(tmp_path, opener): - """Explicit ``dtype=`` kwarg: ``nodata_dtype_cast`` records the - requested dtype on both backends.""" - path = str(tmp_path / "tmp_2178_with_cast.tif") - _make_int_with_nodata_tiff(path) - out = opener(path, mask_nodata=False, dtype=np.float64) - assert out.attrs.get('masked_nodata') is False - assert out.attrs.get('nodata_dtype_cast') == 'float64' - assert 'nodata_pixels_present' not in out.attrs - - -def test_dtype_cast_parity_cross_backend(tmp_path): - """Cross-backend: same input + same ``dtype=`` kwarg yields the - same ``nodata_dtype_cast`` value.""" - if not _HAS_GPU: - pytest.skip("dask+cupy parity requires CUDA") - path = str(tmp_path / "tmp_2178_cast_cross.tif") - _make_int_with_nodata_tiff(path) - cpu = _open_cpu_dask(path, mask_nodata=False, dtype=np.float64) - gpu = _open_gpu_dask(path, mask_nodata=False, dtype=np.float64) - assert cpu.attrs.get('nodata_dtype_cast') == gpu.attrs.get('nodata_dtype_cast') - assert cpu.attrs.get('nodata_dtype_cast') == 'float64' - - -def test_dtype_cast_absent_parity_cross_backend(tmp_path): - """Cross-backend: same int input without an explicit ``dtype=`` - leaves ``nodata_dtype_cast`` absent on both backends (the auto- - promoted graph dtype must not leak as a caller cast).""" - if not _HAS_GPU: - pytest.skip("dask+cupy parity requires CUDA") - path = str(tmp_path / "tmp_2178_no_cast_cross.tif") - _make_int_with_nodata_tiff(path) - cpu = _open_cpu_dask(path) - gpu = _open_gpu_dask(path) - assert 'nodata_dtype_cast' not in cpu.attrs - assert 'nodata_dtype_cast' not in gpu.attrs - - -@pytest.mark.parametrize("opener", _BACKENDS) -def test_dtype_cast_records_integer_target(tmp_path, opener): - """Caller-supplied integer ``dtype=`` kwarg: ``nodata_dtype_cast`` - records the integer dtype on both backends. Pins the - ``dtype.kind != 'f'`` branch of the call-site fixup (review - follow-up for #2178).""" - path = str(tmp_path / "tmp_2178_int_cast.tif") - _make_int_with_nodata_tiff(path) - # ``mask_nodata=False`` keeps the integer dtype; the caller cast - # then routes the graph dtype to ``int32`` without the masking - # auto-promotion firing. The pre-helper contract emits - # ``nodata_dtype_cast='int32'`` and ``masked_nodata=False`` here. - out = opener(path, mask_nodata=False, dtype=np.int32) - assert out.dtype == np.int32 - assert out.attrs.get('masked_nodata') is False - assert out.attrs.get('nodata_dtype_cast') == 'int32' - assert 'nodata_pixels_present' not in out.attrs diff --git a/xrspatial/geotiff/tests/test_read_entry_points_doc_param_parity_2274.py b/xrspatial/geotiff/tests/test_read_entry_points_doc_param_parity_2274.py deleted file mode 100644 index 3a52abd8..00000000 --- a/xrspatial/geotiff/tests/test_read_entry_points_doc_param_parity_2274.py +++ /dev/null @@ -1,115 +0,0 @@ -"""Regression test for #2274: every kwarg on the public read entry -points has a Parameters-section docstring entry. - -The original gap: the four read entry points (``open_geotiff``, -``read_geotiff_dask``, ``read_geotiff_gpu``, ``read_vrt``) accept -``allow_rotated`` and ``allow_unparseable_crs``, but those kwargs were -only documented on ``open_geotiff`` (and only inline in the Tier prose -paragraph for ``allow_unparseable_crs``). The three direct backends -also accept several gated kwargs whose only purpose is to raise -``ValueError`` on the wrong backend so all four readers stay -error-symmetric; those kwargs had no Parameters entry on the backends -that reject them. - -This test pins the fix and catches any future addition of a signature -kwarg without a matching Parameters entry on any of the four read -entry points. -""" -from __future__ import annotations - -import inspect -import re - -import pytest - -from xrspatial.geotiff import open_geotiff, read_geotiff_dask, read_geotiff_gpu, read_vrt - -READ_ENTRY_POINTS = ( - open_geotiff, - read_geotiff_dask, - read_geotiff_gpu, - read_vrt, -) - - -# Numpy-style docstring parameter heading pattern. Matches lines like -# `` name : type`` after ``inspect.getdoc`` has normalised the -# leading indentation to column zero. -_PARAM_HEADING = re.compile(r"^(\w+) : ", flags=re.MULTILINE) - - -def _signature_params(fn): - return set(inspect.signature(fn).parameters) - - -def _documented_params(fn): - doc = inspect.getdoc(fn) or "" - return set(_PARAM_HEADING.findall(doc)) - - -@pytest.mark.parametrize("fn", READ_ENTRY_POINTS, ids=lambda f: f.__name__) -def test_read_entry_point_kwargs_have_docstring_entries(fn): - """Every signature kwarg appears in the Parameters section.""" - params = _signature_params(fn) - documented = _documented_params(fn) - missing = sorted(params - documented) - assert missing == [], ( - f"{fn.__name__} has kwargs without Parameters-section entries: " - f"{missing}. Add a numpy-style ``name : type`` heading for each " - f"so the docstring agrees with the signature. The kwargs may be " - f"gated (raise ValueError on the wrong backend) but they are " - f"still on the public surface, and tools that read the " - f"docstring (Sphinx, IDE help) cannot tell the kwarg exists " - f"without an entry. See #2274." - ) - - -@pytest.mark.parametrize("fn", READ_ENTRY_POINTS, ids=lambda f: f.__name__) -def test_read_entry_point_docstring_does_not_invent_params(fn): - """Every Parameters entry maps to a real signature kwarg. - - Catches the inverse drift: a kwarg removed from the signature but - still listed in the Parameters section. - """ - params = _signature_params(fn) - documented = _documented_params(fn) - extra = sorted(documented - params) - assert extra == [], ( - f"{fn.__name__} has Parameters-section entries that do not " - f"appear in the signature: {extra}. Either remove the entry " - f"or restore the kwarg." - ) - - -@pytest.mark.parametrize("fn", READ_ENTRY_POINTS, ids=lambda f: f.__name__) -def test_allow_rotated_documented(fn): - """``allow_rotated`` was the load-bearing #2274 gap on the backends. - - Pin it explicitly so a future commit that strips the Parameters - entry while keeping the signature kwarg fails loudly. - """ - assert "allow_rotated" in _signature_params(fn), ( - f"{fn.__name__} unexpectedly dropped allow_rotated from its " - f"signature" - ) - assert "allow_rotated" in _documented_params(fn), ( - f"{fn.__name__} accepts allow_rotated but does not document it " - f"in its Parameters section (#2274)." - ) - - -@pytest.mark.parametrize("fn", READ_ENTRY_POINTS, ids=lambda f: f.__name__) -def test_allow_unparseable_crs_documented(fn): - """``allow_unparseable_crs`` was the other shared #2274 gap. - - ``open_geotiff`` had the kwarg only in the Tier prose paragraph; - the three backends did not mention it at all. - """ - assert "allow_unparseable_crs" in _signature_params(fn), ( - f"{fn.__name__} unexpectedly dropped allow_unparseable_crs from " - f"its signature" - ) - assert "allow_unparseable_crs" in _documented_params(fn), ( - f"{fn.__name__} accepts allow_unparseable_crs but does not " - f"document it in its Parameters section (#2274)." - ) diff --git a/xrspatial/geotiff/tests/test_release_contract_parity_2389.py b/xrspatial/geotiff/tests/test_release_contract_parity_2389.py deleted file mode 100644 index be79826c..00000000 --- a/xrspatial/geotiff/tests/test_release_contract_parity_2389.py +++ /dev/null @@ -1,127 +0,0 @@ -"""Lock ``geotiff_release_contract.md`` against ``SUPPORTED_FEATURES``. - -Background ----------- -``docs/source/reference/geotiff_release_contract.md`` lists every public -GeoTIFF feature with its tier and claims: - - The tier strings here match the strings in - ``xrspatial.geotiff.SUPPORTED_FEATURES`` at runtime. - -Before this test, nothing in CI checked that claim. The sibling release -gate registry (``release_gates/test_stable_features.py``, -``Cross-cutting meta-gates`` section) parses -``release_gate_geotiff.rst``, not this ``.md`` contract page, so the -contract could (and did) silently drift the next time a key was -re-tiered in ``_attrs.py`` -- twice in two releases (#2381 and #2389). - -What this test pins -------------------- -* Every row in the feature tier table parses cleanly into a - ``(key, tier)`` pair. -* The key on every row is a real key in ``SUPPORTED_FEATURES``. -* The tier column on every row matches ``SUPPORTED_FEATURES[key]`` - byte-for-byte (so a future ``internal-only`` vs ``internal_only`` - drift fails the gate before the doc lands). - -Out of scope ------------- -* Locking the section-heading prose (``### experimental`` etc.) -- - those are human-readable labels, not runtime tier strings. -* Locking the contract page against ``release_gate_geotiff.rst`` -- - the gate page only enumerates ``stable`` and ``advanced`` tiers - (``release_gates/test_stable_features.py`` already covers that side). -""" -from __future__ import annotations - -import re -from pathlib import Path - -from xrspatial.geotiff import SUPPORTED_FEATURES - -_HERE = Path(__file__).resolve() -_REPO_ROOT = _HERE.parents[3] -_CONTRACT = ( - _REPO_ROOT / "docs" / "source" / "reference" / "geotiff_release_contract.md" -) - -# Match table rows of the form: -# | `codec.none` | stable | Uncompressed... | -# The key column is always in backticks; the tier column is the bare -# tier label that should appear verbatim in SUPPORTED_FEATURES. -_ROW_RE = re.compile( - r"^\|\s*`([a-z_]+\.[a-z0-9_]+)`\s*\|\s*([a-z_]+)\s*\|", - re.MULTILINE, -) - - -def _contract_rows() -> list[tuple[str, tuple[str, str]]]: - """Return ``(line_number_hint, (key, tier))`` for every table row. - - The line-number hint is the 1-based offset of the match inside the - file so assertion failures can point a maintainer at the exact row. - """ - text = _CONTRACT.read_text(encoding="utf-8") - rows: list[tuple[str, tuple[str, str]]] = [] - for match in _ROW_RE.finditer(text): - line_no = text.count("\n", 0, match.start()) + 1 - rows.append((f"{_CONTRACT.name}:{line_no}", (match.group(1), match.group(2)))) - return rows - - -def test_contract_table_parses_into_rows() -> None: - """The regex catches the table rows. If a future doc rewrite breaks - the row shape, fail loudly here instead of silently passing the - tier check on zero rows. - """ - rows = _contract_rows() - assert rows, ( - f"no contract rows parsed from {_CONTRACT}; the markdown table " - "shape may have changed and this test's regex needs to follow." - ) - # Sanity floor: the contract today lists roughly 28 keys. Use a - # conservative lower bound so a sweeping accidental table truncation - # fails the gate. The exact count is not pinned; tiers move. - assert len(rows) >= 20, ( - f"only {len(rows)} contract rows parsed; the table may have been " - "truncated or the row format changed." - ) - - -def test_contract_keys_are_real_supported_features() -> None: - """Every key in the contract table exists in ``SUPPORTED_FEATURES``. - A stray row left behind after a key is removed from ``_attrs.py`` - fails here. - """ - bad: list[tuple[str, str]] = [] - for where, (key, _tier) in _contract_rows(): - if key not in SUPPORTED_FEATURES: - bad.append((where, key)) - assert not bad, ( - "contract table lists keys that are not in SUPPORTED_FEATURES; " - "either the key was removed from _attrs.py and the doc row was " - "left behind, or the row's backticked text is wrong: " - f"{bad}" - ) - - -def test_contract_tiers_match_supported_features() -> None: - """Every row's tier column matches ``SUPPORTED_FEATURES[key]``. - This is the gate that would have caught the #2381 / #2389 drift. - """ - mismatches: list[tuple[str, str, str, str]] = [] - for where, (key, tier) in _contract_rows(): - if key not in SUPPORTED_FEATURES: - # Reported by ``test_contract_keys_are_real_supported_features``; - # skip here to keep this failure focused on tier drift. - continue - expected = SUPPORTED_FEATURES[key] - if tier != expected: - mismatches.append((where, key, tier, expected)) - assert not mismatches, ( - "contract page tier strings disagree with SUPPORTED_FEATURES; " - "the contract page promises the two match verbatim. Update the " - "tier column in geotiff_release_contract.md to the runtime tier " - "(format: (where, key, doc_tier, runtime_tier)): " - f"{mismatches}" - ) diff --git a/xrspatial/geotiff/tests/test_signature_parity_1631.py b/xrspatial/geotiff/tests/test_signature_parity_1631.py deleted file mode 100644 index 37e563b0..00000000 --- a/xrspatial/geotiff/tests/test_signature_parity_1631.py +++ /dev/null @@ -1,180 +0,0 @@ -"""Regression test for #1631: public write_vrt / write_geotiff_gpu -signature and docstring parity vs to_geotiff. - -Three drifts were flagged by the api-consistency sweep on 2026-05-11: - -1. ``write_vrt(vrt_path, source_files, **kwargs)`` swallowed every kwarg - into ``**kwargs``. The docstring documented ``relative``, ``crs_wkt``, - ``nodata``, but ``inspect.signature`` and IDE autocomplete saw nothing. -2. ``write_geotiff_gpu``'s ``overview_resampling`` docstring omitted - ``'cubic'``; ``to_geotiff`` lists it and ``make_overview_gpu`` accepts - it (falling back to CPU). -3. ``write_geotiff_gpu(data, ...)`` lacked the type hint that - ``to_geotiff(data, ...)`` has. - -This module pins each of those three guarantees against future drift. -""" -from __future__ import annotations - -import importlib.util -import inspect -import os - -import numpy as np -import pytest -import xarray as xr - -from xrspatial.geotiff import open_geotiff, to_geotiff, write_geotiff_gpu, write_vrt - - -def _gpu_available() -> bool: - """True when cupy imports and CUDA is initialised.""" - if importlib.util.find_spec("cupy") is None: - return False - try: - import cupy - - return bool(cupy.cuda.is_available()) - except Exception: - return False - - -_HAS_GPU = _gpu_available() -_gpu_only = pytest.mark.skipif( - not _HAS_GPU, reason="cupy + CUDA required", -) - - -def test_write_vrt_signature_exposes_documented_kwargs(): - """``inspect.signature(write_vrt)`` reports the four accepted kwargs. - - Prior to #1631 the public wrapper used ``**kwargs``, so - ``inspect.signature`` only saw ``vrt_path`` and ``source_files``. - Issue #1715 added ``crs`` for parity with ``to_geotiff`` / - ``write_geotiff_gpu`` while keeping the historic ``crs_wkt`` as a - deprecated alias (sentinel default so the deprecation shim can - tell "user passed nothing" from "user passed crs_wkt=None"). - """ - sig = inspect.signature(write_vrt) - params = sig.parameters - assert 'relative' in params - assert 'crs' in params # added in #1715 - assert 'crs_wkt' in params # deprecated alias - assert 'nodata' in params - assert params['relative'].default is True - # ``crs`` is the new canonical kwarg; default None means "pick from - # the first source", matching to_geotiff / write_geotiff_gpu. - assert params['crs'].default is None - # ``crs_wkt`` carries a sentinel default so the deprecation shim - # can distinguish "user passed nothing" (no warning) from "user - # passed crs_wkt=None" (deprecated-but-explicit, warn). The - # sentinel itself is private; check that it is NOT None so a - # future maintainer cannot accidentally drop the sentinel logic. - assert params['crs_wkt'].default is not None - assert params['crs_wkt'].default is not inspect.Parameter.empty - assert params['nodata'].default is None - # No catch-all VAR_KEYWORD - kinds = {p.kind for p in params.values()} - assert inspect.Parameter.VAR_KEYWORD not in kinds - - -def test_write_vrt_unknown_kwarg_rejected_at_public_level(tmp_path): - """A typo'd kwarg now raises ``TypeError`` from the public function - rather than from deep inside ``_vrt.write_vrt``. - """ - arr = np.zeros((8, 8), dtype=np.float32) - da = xr.DataArray( - arr, dims=['y', 'x'], - coords={'y': np.arange(8.0, 0, -1), 'x': np.arange(8.0)}, - attrs={'crs': 4326, 'transform': (1.0, 0, 0.0, 0, -1.0, 8.0)}, - ) - tif_path = str(tmp_path / 't.tif') - to_geotiff(da, tif_path) - - with pytest.raises(TypeError, match='typo_kwarg'): - write_vrt(str(tmp_path / 't.vrt'), [tif_path], typo_kwarg=1) - - -def test_write_vrt_accepts_documented_kwargs(tmp_path): - """Each documented kwarg round-trips through the explicit signature. - - Uses the new ``crs=None`` kwarg form (issue #1715). The deprecated - ``crs_wkt`` alias is exercised separately in - ``test_write_vrt_crs_1715.py``. - """ - arr = np.zeros((8, 8), dtype=np.float32) - da = xr.DataArray( - arr, dims=['y', 'x'], - coords={'y': np.arange(8.0, 0, -1), 'x': np.arange(8.0)}, - attrs={'crs': 4326, 'transform': (1.0, 0, 0.0, 0, -1.0, 8.0)}, - ) - tif_path = str(tmp_path / 't.tif') - to_geotiff(da, tif_path) - - vrt_path = str(tmp_path / 't.vrt') - out = write_vrt( - vrt_path, [tif_path], - relative=False, crs=None, nodata=-9999.0, - ) - assert out == vrt_path - assert os.path.exists(vrt_path) - - -def test_write_geotiff_gpu_docstring_lists_cubic(): - """``overview_resampling`` docstring includes ``'cubic'`` so it - matches ``to_geotiff`` and the underlying ``make_overview_gpu``. - """ - doc = write_geotiff_gpu.__doc__ - assert doc is not None - # Find the overview_resampling block - assert 'overview_resampling' in doc - # The block must mention cubic - block_start = doc.index('overview_resampling') - block_end = doc.index('bigtiff', block_start) - block = doc[block_start:block_end] - assert 'cubic' in block - - -def test_write_geotiff_gpu_data_has_type_hint(): - """``data`` parameter is annotated, matching ``to_geotiff(data, ...)``. - - The annotation also covers ``np.ndarray`` because the implementation - accepts numpy inputs (uploaded via ``cupy.asarray(np.asarray(data))``) - and the test suite exercises that path (e.g. - ``test_backend_kwarg_parity_1561.py`` passes a numpy ``dummy``). - """ - sig = inspect.signature(write_geotiff_gpu) - data_param = sig.parameters['data'] - assert data_param.annotation is not inspect.Parameter.empty - # The annotation is a forward reference under ``from __future__ import - # annotations``; just confirm it mentions the documented types. - ann_str = str(data_param.annotation) - assert 'DataArray' in ann_str - assert 'cupy' in ann_str - assert 'ndarray' in ann_str # numpy parity vs to_geotiff - - -@_gpu_only -def test_write_geotiff_gpu_cubic_overview_round_trip(tmp_path): - """``overview_resampling='cubic'`` works on the GPU writer. - - Sanity check that the docstring update is not advertising an - unsupported codec. ``make_overview_gpu`` falls back to the CPU - cubic implementation for parity with the CPU writer. - """ - import cupy - - arr_cpu = np.random.RandomState(0).rand(256, 256).astype(np.float32) - arr_gpu = cupy.asarray(arr_cpu) - da_gpu = xr.DataArray( - arr_gpu, dims=['y', 'x'], - coords={'y': np.arange(256.0, 0, -1), 'x': np.arange(256.0)}, - ) - path = str(tmp_path / 'cog.tif') - write_geotiff_gpu( - da_gpu, path, - cog=True, tile_size=64, overview_resampling='cubic', - ) - # Overview level 1 = 1/2 resolution - ov = open_geotiff(path, overview_level=1) - assert ov.shape == (128, 128)