diff --git a/docs/source/reference/release_gate_geotiff.rst b/docs/source/reference/release_gate_geotiff.rst index 9f01a0812..dfee88e17 100644 --- a/docs/source/reference/release_gate_geotiff.rst +++ b/docs/source/reference/release_gate_geotiff.rst @@ -170,7 +170,7 @@ Local GeoTIFF read and write and zero-area windows raise rather than silently clamp; coords on georeferenced inputs match the eager full-read slice. - ``xrspatial/geotiff/tests/unit/test_input_validation.py``, - ``xrspatial/geotiff/tests/test_no_georef_windowed_coords_1710.py`` + ``xrspatial/geotiff/tests/read/test_georef.py`` - `#2340`_ * - ``reader.windowed`` -- shifted-transform parity (eager + dask) - stable @@ -492,29 +492,25 @@ VRT supported subset - advanced - VRT over compatible GeoTIFF sources returns the same pixels and attrs through eager and dask paths. - - ``xrspatial/geotiff/tests/test_vrt_backend_coverage_2026_05_11.py``, - ``xrspatial/geotiff/tests/test_golden_corpus_vrt_1930.py``, - ``xrspatial/geotiff/tests/test_vrt_finalization_parity_2162.py`` + - ``xrspatial/geotiff/tests/vrt/test_parity.py``, + ``xrspatial/geotiff/tests/test_golden_corpus_vrt_1930.py`` - `#2342`_ * - VRT default ``missing_sources='raise'`` - stable - Missing source files fail at construction, not at compute. - - ``xrspatial/geotiff/tests/test_vrt_missing_sources_default_raise_1843.py``, - ``xrspatial/geotiff/tests/test_read_vrt_default_missing_sources_1860.py``, - ``xrspatial/geotiff/tests/test_vrt_chunked_missing_raise_at_build_2265.py`` + - ``xrspatial/geotiff/tests/vrt/test_missing_sources.py`` - `#2342`_ * - VRT ``missing_sources='warn'`` opt-in - advanced - Holes surface as the band sentinel, ``attrs['vrt_holes']`` is set, and a :class:`GeoTIFFFallbackWarning` is emitted. - ``xrspatial/geotiff/tests/vrt/test_metadata.py``, - ``xrspatial/geotiff/tests/vrt/test_missing_sources.py``, - ``xrspatial/geotiff/tests/test_vrt_chunked_missing_sources_1799.py`` + ``xrspatial/geotiff/tests/vrt/test_missing_sources.py`` - `#2342`_ * - VRT source / dest rectangle validation - stable - Out-of-bounds source or destination rectangles raise at construction. - - ``xrspatial/geotiff/tests/test_geotiff_vrt_srcrect_validation_1784.py``, + - ``xrspatial/geotiff/tests/vrt/test_validation.py``, ``xrspatial/geotiff/tests/vrt/test_window.py`` - `#2342`_ * - VRT path containment @@ -541,8 +537,7 @@ VRT supported subset - advanced - Chunked VRT reads return the same shape, coords, attrs, and values as eager reads on the supported subset. - - ``xrspatial/geotiff/tests/vrt/test_window.py``, - ``xrspatial/geotiff/tests/test_read_vrt_lazy_chunks_1798.py`` + - ``xrspatial/geotiff/tests/vrt/test_window.py`` - `#2342`_ * - VRT single-parse contract - stable @@ -566,7 +561,7 @@ VRT supported subset * - ``write_vrt`` - advanced - Writer rejects source-incompatibility cases at the writer boundary. - - ``xrspatial/geotiff/tests/test_to_geotiff_vrt_tiled_validation_1862.py`` + - ``xrspatial/geotiff/tests/vrt/test_validation.py`` - `#2342`_ Sidecar and overview interactions diff --git a/xrspatial/geotiff/tests/test_geotiff_vrt_srcrect_validation_1784.py b/xrspatial/geotiff/tests/test_geotiff_vrt_srcrect_validation_1784.py deleted file mode 100644 index 07aa487af..000000000 --- a/xrspatial/geotiff/tests/test_geotiff_vrt_srcrect_validation_1784.py +++ /dev/null @@ -1,165 +0,0 @@ -"""VRT ``SrcRect`` must reject negative sizes and offsets up front. - -The ``DstRect`` validation added for issue #1737 only covers one half of the -SimpleSource rectangle pair. A malformed ```` (or -negative offset) reaches ``read_to_array`` as a bad window, raises -``ValueError`` for the out-of-range window, and is then swallowed by the -lenient source-read ``try/except`` that is meant to handle *missing or -unreadable source files* -- not malformed XML rectangles. - -Net effect before this fix: malformed XML becomes a single warning plus a -zero-filled hole in the mosaic. In strict mode the same condition surfaces -the swallowed error inside the try. Either way, the caller cannot tell the -malformed-VRT case from a legitimate missing tile. - -Regression test for issue #1784: ``read_vrt`` should refuse the read with a -``ValueError`` that names the offending SrcRect field, in both lenient and -strict modes. -""" -from __future__ import annotations - -import os -import tempfile -import warnings - -import numpy as np -import pytest - -from xrspatial.geotiff import to_geotiff -from xrspatial.geotiff._vrt import read_vrt - - -def _write_source(td: str, name: str = 'src.tif') -> str: - """Write a 10x10 uint8 source GeoTIFF and return its path.""" - src_path = os.path.join(td, name) - to_geotiff(np.zeros((10, 10), dtype=np.uint8), src_path, - compression='none') - return src_path - - -def _write_vrt(td: str, *, - src_x_off: int = 0, src_y_off: int = 0, - src_x_size: int = 10, src_y_size: int = 10, - src_filename: str = 'src.tif', - raster_x: int = 100, raster_y: int = 100) -> str: - """Write a VRT with a single SimpleSource using the given SrcRect.""" - vrt_path = os.path.join(td, 'mosaic.vrt') - vrt_xml = ( - f'\n' - f' \n' - f' \n' - f' {src_filename}' - f'\n' - f' 1\n' - f' \n' - f' \n' - f' \n' - f' \n' - f'\n' - ) - with open(vrt_path, 'w') as f: - f.write(vrt_xml) - return vrt_path - - -def test_negative_srcrect_x_size_rejected(): - """Negative ``SrcRect xSize`` surfaces as ``ValueError`` rather than - being swallowed by the missing-source fallback.""" - with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td: - _write_source(td) - vrt_path = _write_vrt(td, src_x_size=-50) - with pytest.raises(ValueError, match=r"SrcRect.*negative size"): - read_vrt(vrt_path) - - -def test_negative_srcrect_y_size_rejected(): - """Negative ``SrcRect ySize`` surfaces as ``ValueError``.""" - with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td: - _write_source(td) - vrt_path = _write_vrt(td, src_y_size=-50) - with pytest.raises(ValueError, match=r"SrcRect.*negative size"): - read_vrt(vrt_path) - - -def test_negative_srcrect_x_off_rejected(): - """Negative ``SrcRect xOff`` surfaces as ``ValueError``.""" - with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td: - _write_source(td) - vrt_path = _write_vrt(td, src_x_off=-10) - with pytest.raises(ValueError, match=r"SrcRect.*negative offset"): - read_vrt(vrt_path) - - -def test_negative_srcrect_y_off_rejected(): - """Negative ``SrcRect yOff`` surfaces as ``ValueError``.""" - with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td: - _write_source(td) - vrt_path = _write_vrt(td, src_y_off=-10) - with pytest.raises(ValueError, match=r"SrcRect.*negative offset"): - read_vrt(vrt_path) - - -def test_negative_srcrect_message_names_bad_values(): - """The error message must name the malformed field and its value so the - caller can find the offending ```` in the VRT.""" - with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td: - _write_source(td) - vrt_path = _write_vrt(td, src_x_size=-7, src_y_size=-3) - with pytest.raises(ValueError) as excinfo: - read_vrt(vrt_path) - msg = str(excinfo.value) - assert "SrcRect" in msg - assert "-7" in msg - assert "-3" in msg - - -def test_missing_source_still_takes_lenient_warning_path(): - """A *valid* SrcRect with a missing source file must still hit the - lenient warning path -- the new SrcRect check must not swallow the - missing-file case that PR #1675 narrowed. - - Issue #1843 flipped the default to ``missing_sources='raise'`` so - this test now passes ``'warn'`` explicitly to exercise the opt-in - lenient branch. - """ - from xrspatial.geotiff import GeoTIFFFallbackWarning - with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td: - # No source file written; SrcRect itself is well-formed. - vrt_path = _write_vrt(td, src_filename='does_not_exist.tif') - with warnings.catch_warnings(record=True) as caught: - warnings.simplefilter('always') - arr, _ = read_vrt(vrt_path, missing_sources='warn') - # The lenient path must produce a fallback warning and a result - # array (zero-filled at the hole), not raise. - fallback = [w for w in caught - if issubclass(w.category, GeoTIFFFallbackWarning)] - assert fallback, ( - "expected a GeoTIFFFallbackWarning for the missing source" - ) - assert arr.shape == (100, 100) - - -def test_valid_srcrect_reads_normally(): - """A well-formed SrcRect with a real source must succeed -- no false - positives on valid VRTs.""" - with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td: - _write_source(td) - vrt_path = _write_vrt(td, raster_x=10, raster_y=10) - arr, _ = read_vrt(vrt_path) - assert arr.shape == (10, 10) - # Source is all zeros and DstRect covers the full VRT raster, so - # the entire output must be zero. - assert np.all(arr == 0) - - -def test_negative_srcrect_raises_under_strict_mode(monkeypatch): - """The check runs *before* the lenient try/except, so strict mode and - lenient mode must both raise. Pinning strict mode here prevents a - regression where the check accidentally moves back inside the try.""" - monkeypatch.setenv('XRSPATIAL_GEOTIFF_STRICT', '1') - with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td: - _write_source(td) - vrt_path = _write_vrt(td, src_x_size=-50) - with pytest.raises(ValueError, match=r"SrcRect.*negative size"): - read_vrt(vrt_path) diff --git a/xrspatial/geotiff/tests/test_open_geotiff_vrt_kwarg_drop_1685.py b/xrspatial/geotiff/tests/test_open_geotiff_vrt_kwarg_drop_1685.py deleted file mode 100644 index ae2dc996c..000000000 --- a/xrspatial/geotiff/tests/test_open_geotiff_vrt_kwarg_drop_1685.py +++ /dev/null @@ -1,108 +0,0 @@ -"""Regression test for #1685: ``open_geotiff`` silently dropped -``overview_level`` and ``on_gpu_failure`` when the source was a VRT. - -The api-consistency sweep on 2026-05-12 flagged that ``open_geotiff`` -documents both kwargs as supported, but the VRT dispatch branch routes -to ``read_vrt`` whose signature accepts neither. Calls like -``open_geotiff('mosaic.vrt', overview_level=2)`` returned full-resolution -data with no warning. Issue #1561 fixed the same class of bug for the -dask and GPU dispatch branches; this one closes the remaining gap by -refusing the unsupported combinations up front. -""" -from __future__ import annotations - -import numpy as np -import pytest -import xarray as xr - -from xrspatial.geotiff import open_geotiff, to_geotiff, write_vrt - - -@pytest.fixture -def small_vrt(tmp_path): - """Two-tile uint16 VRT we can hand to ``open_geotiff``.""" - arr_a = np.arange(16, dtype=np.uint16).reshape(4, 4) - da_a = xr.DataArray( - arr_a, - dims=["y", "x"], - coords={ - "y": np.array([0.5, 1.5, 2.5, 3.5]), - "x": np.array([0.5, 1.5, 2.5, 3.5]), - }, - attrs={"crs": 4326}, - ) - tile_a = tmp_path / "tile_a.tif" - to_geotiff(da_a, str(tile_a)) - - arr_b = np.arange(16, 32, dtype=np.uint16).reshape(4, 4) - da_b = xr.DataArray( - arr_b, - dims=["y", "x"], - coords={ - "y": np.array([0.5, 1.5, 2.5, 3.5]), - "x": np.array([4.5, 5.5, 6.5, 7.5]), - }, - attrs={"crs": 4326}, - ) - tile_b = tmp_path / "tile_b.tif" - to_geotiff(da_b, str(tile_b)) - - vrt_path = tmp_path / "mosaic.vrt" - write_vrt(str(vrt_path), [str(tile_a), str(tile_b)]) - return str(vrt_path) - - -def test_open_geotiff_vrt_rejects_overview_level(small_vrt): - """``overview_level`` plus ``.vrt`` raises ValueError, not a silent drop.""" - with pytest.raises(ValueError, match="overview_level is not supported"): - open_geotiff(small_vrt, overview_level=1) - - -def test_open_geotiff_vrt_accepts_overview_level_zero(small_vrt): - """``overview_level=0`` is documented as full resolution (the default), - so passing it on a VRT is semantically equivalent to omitting the kwarg - and must not raise. Only non-zero overview levels are rejected. - """ - da = open_geotiff(small_vrt, overview_level=0) - # Same shape as the no-kwarg case: two 4x4 tiles side-by-side. - assert da.shape == (4, 8) - - -def test_open_geotiff_vrt_rejects_on_gpu_failure_with_gpu_true(small_vrt): - """``on_gpu_failure='strict'`` plus ``.vrt`` (gpu=True) is refused.""" - # The check fires before any GPU code runs; no CUDA needed. - with pytest.raises(ValueError, match="on_gpu_failure is not supported"): - open_geotiff(small_vrt, gpu=True, on_gpu_failure="strict") - - -def test_open_geotiff_vrt_without_unsupported_kwargs_still_works(small_vrt): - """The previously-accepted kwargs still flow through to ``read_vrt``.""" - da = open_geotiff(small_vrt) - # Two 4x4 tiles side-by-side; result is 4x8. - assert da.shape == (4, 8) - - -def test_open_geotiff_vrt_with_window_still_works(small_vrt): - """``window`` was already forwarded; this regression should not break it.""" - da = open_geotiff(small_vrt, window=(0, 1, 4, 5)) - assert da.shape == (4, 4) - - -def test_open_geotiff_non_vrt_still_accepts_overview_level(tmp_path): - """The fix is VRT-specific; ``.tif`` sources keep accepting overview_level.""" - # Build a single COG with one overview so overview_level=0 round-trips. - arr = np.arange(64, dtype=np.uint16).reshape(8, 8) - da = xr.DataArray( - arr, - dims=["y", "x"], - coords={ - "y": np.arange(8, dtype=np.float64), - "x": np.arange(8, dtype=np.float64), - }, - attrs={"crs": 4326}, - ) - tif_path = tmp_path / "with_ovr.tif" - to_geotiff(da, str(tif_path), cog=True, tile_size=16, overview_levels=[2]) - # Either overview_level value must be accepted without raising. - open_geotiff(str(tif_path), overview_level=0) - open_geotiff(str(tif_path), overview_level=1) diff --git a/xrspatial/geotiff/tests/test_read_geotiff_dask_vrt_kwargs_1795.py b/xrspatial/geotiff/tests/test_read_geotiff_dask_vrt_kwargs_1795.py deleted file mode 100644 index a24887d0d..000000000 --- a/xrspatial/geotiff/tests/test_read_geotiff_dask_vrt_kwargs_1795.py +++ /dev/null @@ -1,56 +0,0 @@ -"""Direct read_geotiff_dask(.vrt) must forward VRT kwargs (#1795).""" -from __future__ import annotations - -import os - -import numpy as np -import pytest - -from xrspatial.geotiff import read_geotiff_dask, to_geotiff - - -def _write_vrt(vrt_path, source_name, *, bands=1): - band_xml = [] - for i in range(bands): - band_xml.append( - f' \n' - ' \n' - f' {source_name}' - '\n' - f' {i + 1}\n' - ' \n' - ' \n' - ' \n' - ' \n' - ) - vrt_path.write_text( - '\n' - + ''.join(band_xml) - + '\n' - ) - - -def test_direct_read_geotiff_dask_vrt_forwards_window_and_band(tmp_path): - arr = np.arange(4 * 6 * 2, dtype=np.float32).reshape(4, 6, 2) - src = tmp_path / "tmp_1797_source.tif" - to_geotiff(arr, str(src), compression='none') - vrt = tmp_path / "tmp_1797_source.vrt" - _write_vrt(vrt, os.path.basename(src), bands=2) - - got = read_geotiff_dask( - str(vrt), chunks=2, window=(1, 2, 4, 6), band=1, - ) - - assert got.shape == (3, 4) - np.testing.assert_array_equal(got.values, arr[1:4, 2:6, 1]) - - -def test_direct_read_geotiff_dask_vrt_forwards_max_pixels(tmp_path): - arr = np.arange(24, dtype=np.float32).reshape(4, 6) - src = tmp_path / "tmp_1797_source_cap.tif" - to_geotiff(arr, str(src), compression='none') - vrt = tmp_path / "tmp_1797_source_cap.vrt" - _write_vrt(vrt, os.path.basename(src)) - - with pytest.raises(ValueError, match="exceed"): - read_geotiff_dask(str(vrt), chunks=2, max_pixels=10) diff --git a/xrspatial/geotiff/tests/test_read_vrt_default_missing_sources_1860.py b/xrspatial/geotiff/tests/test_read_vrt_default_missing_sources_1860.py deleted file mode 100644 index 7f278915f..000000000 --- a/xrspatial/geotiff/tests/test_read_vrt_default_missing_sources_1860.py +++ /dev/null @@ -1,98 +0,0 @@ -"""Regression test for #1860: the public ``read_vrt`` and -``open_geotiff(.vrt)`` default ``missing_sources`` to ``'raise'``, matching -the internal ``_vrt.read_vrt`` default set in #1843. - -Before #1860 the public wrapper defaulted to ``'warn'``, which silently -overrode the internal ``'raise'`` default and let unreadable backing -sources produce zero-fill holes on integer rasters with no exception. -Callers that want the lenient partial-mosaic behaviour pass -``missing_sources='warn'`` explicitly. -""" -from __future__ import annotations - -import pytest - -from xrspatial.geotiff import GeoTIFFFallbackWarning, open_geotiff, read_vrt - - -def _write_missing_source_vrt(path): - path.write_text( - '\n' - ' \n' - ' \n' - ' missing_1860.tif' - '\n' - ' 1\n' - ' \n' - ' \n' - ' \n' - ' \n' - '\n' - ) - - -def test_public_read_vrt_default_raises_on_unreadable_source(tmp_path): - """Public ``read_vrt`` with no ``missing_sources`` kwarg must raise. - - Before #1860 the default was ``'warn'`` and the call returned a - partial mosaic with ``attrs['vrt_holes']`` instead of raising. With - the default aligned to the internal ``_vrt.read_vrt`` default of - ``'raise'``, the unreadable source must now halt the call. - """ - vrt = tmp_path / "tmp_1860_public_default_raise.vrt" - _write_missing_source_vrt(vrt) - - with pytest.raises((OSError, ValueError)): - read_vrt(str(vrt)) - - -def test_open_geotiff_vrt_default_raises_on_unreadable_source(tmp_path): - """``open_geotiff(vrt_path)`` with no ``missing_sources`` kwarg must - raise on an unreadable backing source. - - ``open_geotiff`` forwards ``missing_sources`` to ``read_vrt`` only - when the caller passed it explicitly; otherwise the public - ``read_vrt`` default applies. With that default now ``'raise'``, the - silent-degradation path is closed for ``open_geotiff`` callers too. - """ - vrt = tmp_path / "tmp_1860_open_geotiff_default_raise.vrt" - _write_missing_source_vrt(vrt) - - with pytest.raises((OSError, ValueError)): - open_geotiff(str(vrt)) - - -def test_public_read_vrt_explicit_warn_preserves_lenient_behaviour(tmp_path): - """``missing_sources='warn'`` is still the escape hatch for partial - mosaics on the public ``read_vrt`` API. - - The warning fires, the call returns, and ``attrs['vrt_holes']`` is - populated with the skipped source record. Pinning this keeps the - historical contract available to callers that opt in. - """ - vrt = tmp_path / "tmp_1860_public_explicit_warn.vrt" - _write_missing_source_vrt(vrt) - - with pytest.warns(GeoTIFFFallbackWarning, match="could not be read"): - da = read_vrt(str(vrt), missing_sources='warn') - - assert 'vrt_holes' in da.attrs - assert da.attrs['vrt_holes'][0]['source'].endswith('missing_1860.tif') - - -def test_open_geotiff_vrt_explicit_warn_preserves_lenient_behaviour(tmp_path): - """``open_geotiff(vrt_path, missing_sources='warn')`` still produces - a partial mosaic with the hole record on the DataArray attrs. - - The forwarding branch in ``open_geotiff`` only runs when the caller - explicitly passes ``missing_sources``; this test pins that branch - against regressions. - """ - vrt = tmp_path / "tmp_1860_open_geotiff_explicit_warn.vrt" - _write_missing_source_vrt(vrt) - - with pytest.warns(GeoTIFFFallbackWarning, match="could not be read"): - da = open_geotiff(str(vrt), missing_sources='warn') - - assert 'vrt_holes' in da.attrs - assert da.attrs['vrt_holes'][0]['source'].endswith('missing_1860.tif') diff --git a/xrspatial/geotiff/tests/test_read_vrt_lazy_chunks_1798.py b/xrspatial/geotiff/tests/test_read_vrt_lazy_chunks_1798.py deleted file mode 100644 index f60162b5a..000000000 --- a/xrspatial/geotiff/tests/test_read_vrt_lazy_chunks_1798.py +++ /dev/null @@ -1,78 +0,0 @@ -"""read_vrt(chunks=...) should build lazy window tasks (#1798).""" -from __future__ import annotations - -import os -import warnings - -import numpy as np -import pytest - -from xrspatial.geotiff import read_vrt, to_geotiff - - -def _write_vrt(vrt_path, source_name): - vrt_path.write_text( - '\n' - ' \n' - ' \n' - f' {source_name}' - '\n' - ' 1\n' - ' \n' - ' \n' - ' \n' - ' \n' - '\n' - ) - - -def test_read_vrt_chunks_matches_eager_values(tmp_path): - arr = np.arange(24, dtype=np.float32).reshape(4, 6) - src = tmp_path / "tmp_1798_source.tif" - to_geotiff(arr, str(src), compression='none') - vrt = tmp_path / "tmp_1798_source.vrt" - _write_vrt(vrt, os.path.basename(src)) - - eager = read_vrt(str(vrt)) - lazy = read_vrt(str(vrt), chunks=2) - - assert lazy.data.chunks == ((2, 2), (2, 2, 2)) - np.testing.assert_array_equal(lazy.compute().values, eager.values) - - -def test_read_vrt_chunks_does_not_read_sources_during_construction(tmp_path): - """The chunked path must not eagerly decode sources at build. - - Construction does run a cheap ``os.path.exists`` sweep over each - source (to populate ``vrt_holes`` and to fail-fast under the - default ``missing_sources='raise'``), but it must not open or - decode any source file. This test pairs the missing source with - the lenient ``missing_sources='warn'`` opt-in so the build - succeeds; the assertion is that no decode-time warnings (which - would only fire if the source were actually read) leak out - during construction. - """ - vrt = tmp_path / "tmp_1798_missing_source.vrt" - _write_vrt(vrt, "missing.tif") - - with warnings.catch_warnings(record=True) as caught: - lazy = read_vrt(str(vrt), chunks=2, missing_sources="warn") - - # Build-time warnings from the decode codecs should be absent. - # ``missing_sources='warn'`` does not warn at build time either; the - # per-task ``GeoTIFFFallbackWarning`` only fires when a chunk - # actually decodes the missing tile during ``compute()``. - assert caught == [] - assert hasattr(lazy.data, 'compute') - - -def test_read_vrt_chunks_rejects_excessive_task_count(tmp_path): - vrt = tmp_path / "tmp_1798_huge_extent.vrt" - vrt.write_text( - '\n' - ' \n' - '\n' - ) - - with pytest.raises(ValueError, match="task cap"): - read_vrt(str(vrt), chunks=1, max_pixels=20_000_000_000) diff --git a/xrspatial/geotiff/tests/test_to_geotiff_vrt_tiled_validation_1862.py b/xrspatial/geotiff/tests/test_to_geotiff_vrt_tiled_validation_1862.py deleted file mode 100644 index ff160b9ad..000000000 --- a/xrspatial/geotiff/tests/test_to_geotiff_vrt_tiled_validation_1862.py +++ /dev/null @@ -1,66 +0,0 @@ -"""Regression tests for issue #1862. - -``to_geotiff(..., '.vrt', tiled=False, tile_size=0)`` previously warned that -``tile_size`` was ignored, then crashed with ``ZeroDivisionError`` inside -``_write_vrt_tiled`` because the VRT writer always tiles. The ``tiled=False`` -flag was never honored on the VRT path, and ``tile_size`` was only validated -when ``tiled=True``, so an invalid ``tile_size=0`` slipped through. - -``to_geotiff`` now refuses ``tiled=False`` for ``.vrt`` paths up front with a -``ValueError``, and validates ``tile_size`` unconditionally on the VRT -branch so callers get a clear error before the writer divides by it. -""" -from __future__ import annotations - -import os - -import numpy as np -import pytest -import xarray as xr - -from xrspatial.geotiff import to_geotiff - - -def _make_da(shape=(64, 64)): - arr = np.arange(np.prod(shape), dtype=np.float32).reshape(shape) - return xr.DataArray(arr, dims=['y', 'x']) - - -def test_vrt_rejects_tiled_false_1862(tmp_path): - """``tiled=False`` is not a valid request for VRT output.""" - da = _make_da() - out = os.path.join(str(tmp_path), 'vrt_tiled_false_1862.vrt') - with pytest.raises(ValueError, match='tiled=False is not compatible'): - to_geotiff(da, out, tiled=False) - - -def test_vrt_tiled_false_zero_tile_size_raises_value_error_1862(tmp_path): - """``tiled=False`` plus ``tile_size=0`` must raise ``ValueError``, - not the previous ``ZeroDivisionError`` from inside the writer.""" - da = _make_da() - out = os.path.join( - str(tmp_path), 'vrt_tiled_false_zero_1862.vrt') - with pytest.raises(ValueError) as exc: - to_geotiff(da, out, tiled=False, tile_size=0) - # Either the tiled=False guard or the tile_size validator may fire - # first; both produce ValueError, never ZeroDivisionError. - assert not isinstance(exc.value, ZeroDivisionError) - - -def test_vrt_zero_tile_size_default_tiled_raises_value_error_1862(tmp_path): - """With the default ``tiled=True``, ``tile_size=0`` must surface from - the shared ``_validate_tile_size`` check, not a deep ``ZeroDivisionError``. - """ - da = _make_da() - out = os.path.join( - str(tmp_path), 'vrt_default_tiled_zero_1862.vrt') - with pytest.raises(ValueError, match='tile_size'): - to_geotiff(da, out, tile_size=0) - - -def test_vrt_default_args_still_succeeds_1862(tmp_path): - """Sanity: the default-args VRT write path is unaffected by the fix.""" - da = _make_da() - out = os.path.join(str(tmp_path), 'vrt_default_1862.vrt') - to_geotiff(da, out) - assert os.path.exists(out) diff --git a/xrspatial/geotiff/tests/test_vrt_backend_coverage_2026_05_11.py b/xrspatial/geotiff/tests/test_vrt_backend_coverage_2026_05_11.py deleted file mode 100644 index 00b604b07..000000000 --- a/xrspatial/geotiff/tests/test_vrt_backend_coverage_2026_05_11.py +++ /dev/null @@ -1,243 +0,0 @@ -"""Backend / parameter coverage for the VRT read path. - -The non-VRT read backends (``open_geotiff`` / ``read_geotiff_dask`` / -``read_geotiff_gpu``) all have dedicated multi-backend coverage; the -VRT route through ``read_vrt`` historically lacked it. The eager -numpy path has dense coverage, but the GPU and dask+GPU paths the -``read_vrt`` body explicitly handles (the ``if gpu: cupy.asarray`` -and trailing ``result.chunk(...)`` blocks) were only reachable -indirectly via ``open_geotiff('.vrt', gpu=True)`` / ``..., chunks=N)`` -and went untested. - -The error-rejection branches for file-like sources combined with -``gpu=True`` / ``chunks=N`` on ``open_geotiff`` were likewise covered -only by inspection. - -Test coverage gap sweep 2026-05-11 (pass 3): close the VRT backend -coverage gap and the file-like-rejection parameter gaps. -""" -from __future__ import annotations - -import importlib.util -import io -import os - -import numpy as np -import pytest - -from xrspatial.geotiff import open_geotiff, read_vrt, to_geotiff -from xrspatial.geotiff._vrt import write_vrt as _write_vrt_internal - -# --------------------------------------------------------------------------- -# GPU gating: matches the ``_gpu_available`` / ``_HAS_GPU`` predicate that -# the rest of the geotiff test suite (e.g. test_backend_kwarg_parity_1561, -# test_attrs_parity_1548) uses, so future GPU tests stay greppable. -# --------------------------------------------------------------------------- - - -def _gpu_available() -> bool: - if importlib.util.find_spec("cupy") is None: - return False - try: - import cupy - return bool(cupy.cuda.is_available()) - except Exception: - return False - - -_HAS_GPU = _gpu_available() -_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required") - - -# --------------------------------------------------------------------------- -# Fixtures -# --------------------------------------------------------------------------- - -@pytest.fixture -def single_tile_vrt(tmp_path): - """A trivial single-tile VRT plus its source array. - - Float32 source so the VRT band advertises Float32 and the eager - numpy read returns float32 (lets dtype-cast tests assert a real - type change). - """ - arr = np.arange(16, dtype=np.float32).reshape(4, 4) - tile_path = str(tmp_path / 'tile.tif') - to_geotiff(arr, tile_path) - vrt_path = str(tmp_path / 'mosaic.vrt') - _write_vrt_internal(vrt_path, [tile_path]) - return vrt_path, arr - - -# --------------------------------------------------------------------------- -# Cat 1: read_vrt backend coverage (GPU + dask+GPU) -# --------------------------------------------------------------------------- - -@_gpu_only -class TestReadVrtGpuBackend: - """``read_vrt(gpu=True)`` returns a CuPy-backed DataArray. - - The eager VRT decode runs on the CPU (the VRT internal reader - walks SimpleSources and assembles via windowed reads) then the - final ``if gpu: arr = cupy.asarray(arr)`` block transfers to GPU. - A regression that dropped the transfer block would have shipped - a numpy DataArray instead of a CuPy one; this test pins that. - """ - - def test_read_vrt_gpu_returns_cupy(self, single_tile_vrt): - import cupy - - vrt_path, arr = single_tile_vrt - da = read_vrt(vrt_path, gpu=True) - assert isinstance(da.data, cupy.ndarray), ( - f"expected cupy.ndarray, got {type(da.data).__name__}" - ) - np.testing.assert_array_equal(da.data.get(), arr) - - def test_read_vrt_gpu_chunks_returns_dask_cupy(self, single_tile_vrt): - """``read_vrt(gpu=True, chunks=N)`` is the documented dask+cupy - VRT entry point. The trailing ``result.chunk(...)`` block has - to wrap the cupy backing without falling back to numpy. - """ - import cupy - import dask.array as da_mod - - vrt_path, arr = single_tile_vrt - result = read_vrt(vrt_path, gpu=True, chunks=2) - - assert isinstance(result.data, da_mod.Array), ( - f"expected dask Array, got {type(result.data).__name__}" - ) - # _meta tells distributed Dask the underlying array is cupy. - # A numpy meta here would cause optimizers to silently move - # data back to host. - assert isinstance(result.data._meta, cupy.ndarray), ( - f"expected cupy._meta, got " - f"{type(result.data._meta).__module__}." - f"{type(result.data._meta).__name__}" - ) - # Chunks honour the spatial spec; the band axis (absent here) - # would chunk as a single block. - assert result.data.chunks == ((2, 2), (2, 2)) - - computed = result.compute() - assert isinstance(computed.data, cupy.ndarray) - np.testing.assert_array_equal(computed.data.get(), arr) - - def test_open_geotiff_vrt_gpu_routes_through(self, single_tile_vrt): - """``open_geotiff('.vrt', gpu=True)`` dispatches to ``read_vrt`` - and surfaces the cupy data unchanged. The dispatcher branch - is one line in ``open_geotiff`` but a refactor that dropped - ``gpu=gpu`` from the forwarded kwargs would silently produce - a numpy DataArray. - """ - import cupy - - vrt_path, arr = single_tile_vrt - da = open_geotiff(vrt_path, gpu=True) - assert isinstance(da.data, cupy.ndarray) - np.testing.assert_array_equal(da.data.get(), arr) - - def test_open_geotiff_vrt_gpu_chunks(self, single_tile_vrt): - """``open_geotiff('.vrt', gpu=True, chunks=N)`` is the combined - dask+cupy entry point. Same dispatch test as the gpu-only - variant but also pins the chunk forwarding. - """ - import cupy - import dask.array as da_mod - - vrt_path, arr = single_tile_vrt - result = open_geotiff(vrt_path, gpu=True, chunks=2) - - assert isinstance(result.data, da_mod.Array) - assert isinstance(result.data._meta, cupy.ndarray) - assert result.data.chunks == ((2, 2), (2, 2)) - - computed = result.compute() - np.testing.assert_array_equal(computed.data.get(), arr) - - -# --------------------------------------------------------------------------- -# Cat 4: read_vrt parameter coverage (dtype / name) -# --------------------------------------------------------------------------- - -class TestReadVrtDtypeKwarg: - """``read_vrt(dtype=...)`` casts after decode and validates the cast.""" - - def test_safe_widening_cast(self, single_tile_vrt): - """float32 -> float64 is permitted; values survive bit-for-bit.""" - vrt_path, arr = single_tile_vrt - da = read_vrt(vrt_path, dtype='float64') - assert da.dtype == np.float64 - np.testing.assert_array_equal(da.values, arr.astype(np.float64)) - - def test_float_to_int_rejected(self, single_tile_vrt): - """Float-to-int is lossy and refused with a descriptive error. - Mirrors ``open_geotiff(dtype=...)`` behaviour so callers see the - same gate on both entry points. - """ - vrt_path, _ = single_tile_vrt - with pytest.raises(ValueError, match="Cannot cast float"): - read_vrt(vrt_path, dtype='int32') - - -class TestReadVrtNameKwarg: - """``read_vrt(name='custom')`` overrides the file-stem derivation.""" - - def test_explicit_name_used(self, single_tile_vrt): - vrt_path, _ = single_tile_vrt - da = read_vrt(vrt_path, name='custom_name') - assert da.name == 'custom_name' - - def test_default_name_from_stem(self, single_tile_vrt): - vrt_path, _ = single_tile_vrt - da = read_vrt(vrt_path) - # mosaic.vrt -> mosaic - assert da.name == os.path.splitext(os.path.basename(vrt_path))[0] - - -# --------------------------------------------------------------------------- -# Cat 4: open_geotiff file-like + backend kwarg rejection -# --------------------------------------------------------------------------- - -class TestOpenGeotiffFileLikeKwargRejection: - """File-like sources reject ``gpu=True`` and ``chunks=N`` up front. - - The check sits in ``open_geotiff`` (not the underlying readers) - because both downstream paths re-open the source by path from - worker tasks. A buffer passed through would either raise deep - inside dask graph construction or silently behave as if the - buffer were a string path. - """ - - @staticmethod - def _buf_with_tiff(tmp_path): - arr = np.zeros((4, 4), dtype=np.float32) - path = str(tmp_path / 'src.tif') - to_geotiff(arr, path) - with open(path, 'rb') as fh: - return io.BytesIO(fh.read()) - - def test_gpu_with_file_like_raises(self, tmp_path): - buf = self._buf_with_tiff(tmp_path) - with pytest.raises(ValueError, match="gpu=True is not supported"): - open_geotiff(buf, gpu=True) - - def test_chunks_with_file_like_raises(self, tmp_path): - buf = self._buf_with_tiff(tmp_path) - with pytest.raises(ValueError, match="chunks=.*file-like"): - open_geotiff(buf, chunks=64) - - def test_chunks_with_pathlib_path_still_works(self, tmp_path): - """Sanity-check: pathlib.Path is not file-like and must keep - working through the dask path. Otherwise the file-like gate - would also lock out Path inputs. - """ - arr = np.arange(16, dtype=np.float32).reshape(4, 4) - path = tmp_path / 'sample.tif' - to_geotiff(arr, str(path)) - - import dask.array as da_mod - result = open_geotiff(path, chunks=2) - assert isinstance(result.data, da_mod.Array) - np.testing.assert_array_equal(np.asarray(result.data), arr) diff --git a/xrspatial/geotiff/tests/test_vrt_backend_parity_2321.py b/xrspatial/geotiff/tests/test_vrt_backend_parity_2321.py deleted file mode 100644 index 39c42b7fe..000000000 --- a/xrspatial/geotiff/tests/test_vrt_backend_parity_2321.py +++ /dev/null @@ -1,632 +0,0 @@ -"""Backend parity for VRT reads with sidecar/overview interactions (#2321 sub-task 4). - -Sub-task 4 of issue #2321 (parent) locks down the VRT support contract by -asserting eager / dask parity on the surface that is most likely to drift: - -* metadata (``attrs['transform']``, ``attrs['crs']``, ``attrs['crs_wkt']``, - ``attrs['georef_status']``), not just pixel values; -* coords -- a windowed read must shift the ``y`` / ``x`` arrays consistently - between the eager and the lazy code paths; -* sidecar interactions -- a VRT whose backing source is a GeoTIFF with an - external ``.tif.ovr`` pyramid must surface the same georef attrs (and - pixel values) as an equivalent VRT over the inline-overview fixture. - -The shape mirrors ``test_backend_parity_matrix.py``: a small declarative -fixture / backend matrix, one ``assert_parity`` helper, and a single -parametrised test. We do not re-invent helpers -- the materialisation -and pixel-comparison primitives match the matrix file so a future move -to the shared parity harness is mechanical. - -VRT fixtures use the existing ``write_vrt`` writer (``xrspatial.geotiff -._vrt.write_vrt``) on top of ``to_geotiff`` source tiles, plus the -bundled ``overview_external_ovr_uint16.tif`` / ``.tif.ovr`` sidecar pair -from ``golden_corpus/fixtures/`` (and its inline-overview counterpart). - -Acceptance per the parent issue: the VRT path cannot pass by returning -correct pixel values with wrong georeferencing attrs. Windowed eager -and lazy VRT reads agree on shape, coords, attrs, and values. -""" -from __future__ import annotations - -import pathlib -import shutil -from dataclasses import dataclass -from pathlib import Path -from typing import Any, Callable - -import numpy as np -import pytest -import xarray as xr - -from xrspatial.geotiff import open_geotiff, to_geotiff -from xrspatial.geotiff._vrt import write_vrt as _write_vrt_internal - - -# --------------------------------------------------------------------------- -# Fixture paths shipped under golden_corpus. -# --------------------------------------------------------------------------- - -_GOLDEN = ( - pathlib.Path(__file__).resolve().parent - / "golden_corpus" - / "fixtures" -) -_SIDECAR_TIF = _GOLDEN / "overview_external_ovr_uint16.tif" -_SIDECAR_OVR = _GOLDEN / "overview_external_ovr_uint16.tif.ovr" -_INLINE_OVR_TIF = _GOLDEN / "overview_internal_uint16.tif" - - -def _sidecar_fixture_or_skip() -> Path: - """Return the bundled sidecar TIFF or skip if absent.""" - if not _SIDECAR_TIF.exists() or not _SIDECAR_OVR.exists(): - pytest.skip("sidecar overview fixture not present in golden_corpus") - return _SIDECAR_TIF - - -def _inline_overview_fixture_or_skip() -> Path: - if not _INLINE_OVR_TIF.exists(): - pytest.skip("inline overview fixture not present in golden_corpus") - return _INLINE_OVR_TIF - - -# --------------------------------------------------------------------------- -# Materialisation + comparison helpers -# (mirrors ``test_backend_parity_matrix.py`` so cross-test parity reads -# the same way). -# --------------------------------------------------------------------------- - -def _materialise(da: xr.DataArray) -> np.ndarray: - raw = da.data - if hasattr(raw, "compute"): - raw = raw.compute() - if hasattr(raw, "get"): - raw = raw.get() - return np.asarray(raw) - - -def _coord_view(da: xr.DataArray, name: str) -> np.ndarray: - return np.asarray(da.coords[name].values) - - -def _assert_pixels_equal(ref: np.ndarray, actual: np.ndarray, - *, label: str) -> None: - """Pixel equality, dtype-aware (mirrors test_backend_parity_matrix.py).""" - assert ref.dtype == actual.dtype, ( - f"{label}: dtype differs ref={ref.dtype} actual={actual.dtype}" - ) - assert ref.shape == actual.shape, ( - f"{label}: shape differs ref={ref.shape} actual={actual.shape}" - ) - if ref.dtype.kind == "f": - assert np.array_equal(ref, actual, equal_nan=True), ( - f"{label}: float pixels differ (NaN-aware)" - ) - else: - assert ref.tobytes() == actual.tobytes(), ( - f"{label}: integer pixel bytes differ" - ) - - -def _assert_metadata_parity( - ref: xr.DataArray, - actual: xr.DataArray, - *, - label: str, - expected_dims: tuple[str, ...], -) -> None: - """Fail if any of the parity-critical attrs / coords drift between two reads. - - The acceptance bar for this PR: the VRT path cannot pass by returning - correct pixel values with wrong georeferencing attrs. Every field - checked here is part of the VRT contract that downstream code relies - on, so a backend that ships the right bytes with the wrong attrs - still fails the cell. - """ - # Dims and order. - assert actual.dims == expected_dims, ( - f"{label}: dims {actual.dims!r} != expected {expected_dims!r}" - ) - assert ref.dims == expected_dims, ( - f"{label}: ref dims {ref.dims!r} != expected {expected_dims!r}" - ) - - # Coord values + coord dtype per axis. A windowed read that decoded - # the right pixels but shifted the coords inconsistently would - # surface here, not in the pixel check above. - for axis in expected_dims: - if axis not in ref.coords: - continue - ref_c = _coord_view(ref, axis) - actual_c = _coord_view(actual, axis) - assert ref_c.dtype == actual_c.dtype, ( - f"{label}: coord {axis!r} dtype " - f"ref={ref_c.dtype} actual={actual_c.dtype}" - ) - assert ref_c.shape == actual_c.shape, ( - f"{label}: coord {axis!r} shape " - f"ref={ref_c.shape} actual={actual_c.shape}" - ) - assert ref_c.tobytes() == actual_c.tobytes(), ( - f"{label}: coord {axis!r} bytes differ " - f"(ref[:3]={ref_c[:3].tolist()!r}, " - f"actual[:3]={actual_c[:3].tolist()!r})" - ) - - # Transform tuple. ``rasterio.Affine`` (if used) compares equal to - # a 6-tuple via ``__eq__`` so this works for both surface forms. - ref_t = ref.attrs.get("transform") - actual_t = actual.attrs.get("transform") - assert ref_t == actual_t, ( - f"{label}: transform tuple differs " - f"ref={ref_t!r} actual={actual_t!r}" - ) - - # CRS attrs. The contract: ``attrs['crs']`` carries the EPSG int when - # one is recognised, ``attrs['crs_wkt']`` always carries the WKT. - assert ref.attrs.get("crs") == actual.attrs.get("crs"), ( - f"{label}: attrs['crs'] differs " - f"ref={ref.attrs.get('crs')!r} actual={actual.attrs.get('crs')!r}" - ) - assert ref.attrs.get("crs_wkt") == actual.attrs.get("crs_wkt"), ( - f"{label}: crs_wkt differs" - ) - - # georef_status: lazy / eager / GPU all populate this from the same - # helper (#2136 / #2162). A drift here means the dask graph builder - # is using a different finalization path than the eager reader, - # which is exactly the kind of regression this matrix should catch. - assert ref.attrs.get("georef_status") == actual.attrs.get( - "georef_status" - ), ( - f"{label}: georef_status differs " - f"ref={ref.attrs.get('georef_status')!r} " - f"actual={actual.attrs.get('georef_status')!r}" - ) - - -# --------------------------------------------------------------------------- -# VRT fixture builders. -# Each builder writes its files inside a fresh ``tmp_path`` and returns a -# (vrt_path, expected_dtype) pair. The harness then calls open_geotiff + -# read_vrt with the four backend cells and compares them. -# --------------------------------------------------------------------------- - -def _build_two_tile_float32_vrt(tmp_path: Path) -> tuple[Path, np.dtype]: - """Two 16x16 float32 tiles laid out side-by-side as a 16x32 mosaic. - - Differentiated values per tile so the windowed cells exercise both - halves of the mosaic without colliding with the sidecar fixture. - """ - tile_h, tile_w = 16, 16 - paths: list[str] = [] - for c in range(2): - arr = np.full( - (tile_h, tile_w), float(c + 1) * 1000.0, dtype=np.float32 - ) - # Sprinkle distinct values so a swap between tiles surfaces. - arr[0, 0] = -7.0 + c - arr[tile_h - 1, tile_w - 1] = 9000.0 + c - origin_x = float(c * tile_w) - da = xr.DataArray( - arr, dims=["y", "x"], - coords={ - "y": np.arange(tile_h - 1, -1, -1, dtype=np.float64), - "x": np.arange( - origin_x, origin_x + tile_w, dtype=np.float64), - }, - attrs={"crs": 4326}, - ) - tile_path = tmp_path / f"tile_2321_{c}.tif" - to_geotiff(da, str(tile_path), compression="none", tiled=False) - paths.append(str(tile_path)) - vrt_path = tmp_path / "two_tile_2321_.vrt" - _write_vrt_internal(str(vrt_path), paths, relative=False) - return vrt_path, np.dtype("float32") - - -def _build_sidecar_vrt(tmp_path: Path) -> tuple[Path, np.dtype]: - """VRT over a copy of the bundled sidecar TIFF + its ``.ovr`` partner. - - Copying the pair into ``tmp_path`` keeps the original golden corpus - file untouched and ensures the ``.ovr`` lookup is resolved at the - VRT read site (not via a cached path on the original fixture). - """ - src = _sidecar_fixture_or_skip() - base = tmp_path / "sidecar_2321_.tif" - shutil.copy(src, base) - shutil.copy(str(src) + ".ovr", str(base) + ".ovr") - vrt_path = tmp_path / "sidecar_2321_.vrt" - _write_vrt_internal(str(vrt_path), [str(base)], relative=False) - return vrt_path, np.dtype("uint16") - - -def _build_inline_overview_vrt(tmp_path: Path) -> tuple[Path, np.dtype]: - """VRT over a copy of the inline-overview fixture (no sidecar). - - Used as the comparison source for ``test_sidecar_vrt_attrs_match_inline``: - both fixtures share their base IFD (same dtype, transform, CRS, and - bytes at level 0), so the VRT contract requires that the eager read - surfaces identical georef attrs regardless of where the pyramid - physically lives. - """ - src = _inline_overview_fixture_or_skip() - base = tmp_path / "inline_2321_.tif" - shutil.copy(src, base) - vrt_path = tmp_path / "inline_2321_.vrt" - _write_vrt_internal(str(vrt_path), [str(base)], relative=False) - return vrt_path, np.dtype("uint16") - - -# --------------------------------------------------------------------------- -# Backend matrix: eager (numpy), dask+numpy. -# GPU is intentionally omitted -- the VRT read path goes through the -# CPU decoder regardless of ``gpu=True`` for the pieces under test here, -# and ``read_vrt(gpu=True, chunks=...)`` already has dedicated coverage -# in ``test_vrt_lazy_chunks_1814.py``. -# --------------------------------------------------------------------------- - -@dataclass(frozen=True) -class _BackendSpec: - backend_id: str - kwargs: dict[str, Any] - - -_BACKENDS: tuple[_BackendSpec, ...] = ( - _BackendSpec(backend_id="eager", kwargs={}), - _BackendSpec(backend_id="dask", kwargs={"chunks": (16, 16)}), -) - - -def _backend_params() -> list: - return [pytest.param(b, id=b.backend_id) for b in _BACKENDS] - - -# --------------------------------------------------------------------------- -# Fixture matrix: each entry is one (builder, label, expected_dims, window). -# The window column lets us reuse the same builder for the full-extent -# and the windowed cells without doubling the fixture surface. -# --------------------------------------------------------------------------- - -@dataclass(frozen=True) -class _FixtureSpec: - fix_id: str - builder: Callable[[Path], tuple[Path, np.dtype]] - expected_dims: tuple[str, ...] - # Window passed to open_geotiff / read_vrt; None means full extent. - window: tuple[int, int, int, int] | None - - -# ``fix_id`` is unique per (builder, window); the ``vrt_fixture`` resolver -# below caches one on-disk layout per *builder*, so two specs that share -# a builder (e.g. the full-extent and windowed cells over the same VRT) -# reuse a single set of source TIFFs and a single ``.vrt`` file. -_FIXTURES: tuple[_FixtureSpec, ...] = ( - _FixtureSpec( - fix_id="two-tile-float32-full", - builder=_build_two_tile_float32_vrt, - expected_dims=("y", "x"), - window=None, - ), - _FixtureSpec( - # The windowed cell straddles the seam between the two tiles - # (col 8..24 spans tile 0's right half + tile 1's left half). - # That makes the dask path actually read both backing sources, - # not just one, so a windowed dask graph that only re-reads the - # first source would surface here. - fix_id="two-tile-float32-window-spans-seam", - builder=_build_two_tile_float32_vrt, - expected_dims=("y", "x"), - window=(4, 8, 12, 24), - ), - _FixtureSpec( - fix_id="sidecar-uint16-full", - builder=_build_sidecar_vrt, - expected_dims=("y", "x"), - window=None, - ), - _FixtureSpec( - fix_id="sidecar-uint16-window", - builder=_build_sidecar_vrt, - expected_dims=("y", "x"), - window=(8, 8, 56, 56), - ), -) - - -def _fixture_params() -> list: - return [pytest.param(f, id=f.fix_id) for f in _FIXTURES] - - -# --------------------------------------------------------------------------- -# Cached fixture builds: one VRT layout per fix_id per session. -# --------------------------------------------------------------------------- - -@pytest.fixture(scope="session") -def _vrt_parity_dir(tmp_path_factory): - return tmp_path_factory.mktemp("vrt_parity_2321_") - - -@pytest.fixture(scope="session") -def _vrt_parity_cache() -> dict[str, tuple[Path, np.dtype]]: - """Session-scoped (path, dtype) cache shared across every cell. - - The cache must outlive a single test function. A function-scoped - cache would be reset between cells, causing every cell to rebuild - the same VRT and its source TIFFs. On POSIX a rebuild is just - inefficient; on Windows it surfaces as PermissionError / OSError - because ``to_geotiff`` writes through a ``.tmp`` file and then - renames over the existing target while another cell may still - hold the previous file mapped (issue surfaced in CI on - ``windows-latest`` for #2330). - """ - return {} - - -@pytest.fixture -def vrt_fixture(_vrt_parity_dir, _vrt_parity_cache): - """Resolve a :class:`_FixtureSpec` to a (vrt_path, dtype) pair on disk. - - Each builder gets its own subdirectory so the on-disk layout (vrt + - sources + any sidecar) is isolated from neighbouring builders. Builds - are cached at session scope so the four cells that share a builder - (e.g. full-extent + windowed over the same VRT) reuse one set of - source TIFFs and one ``.vrt`` file. - """ - base = _vrt_parity_dir - cache = _vrt_parity_cache - - def _resolve(spec: _FixtureSpec) -> tuple[Path, np.dtype]: - # The fix_id encodes both the builder and the window; collapse to - # the builder so we do not rebuild identical layouts. - key = spec.builder.__name__ - if key in cache: - return cache[key] - sub = base / key - sub.mkdir(exist_ok=True) - result = spec.builder(sub) - cache[key] = result - return result - return _resolve - - -# --------------------------------------------------------------------------- -# Tests -# --------------------------------------------------------------------------- - -@pytest.mark.parametrize("spec", _fixture_params()) -@pytest.mark.parametrize("backend", _backend_params()) -def test_vrt_backend_parity(spec, backend, vrt_fixture): - """One cell per (fixture, backend). Asserts pixels + metadata parity. - - Reference is always the eager numpy read with the same window kwarg. - The cell compares the current backend's output against that - reference. Eager-vs-eager is the identity case and locks in the - parity-helper contract. - """ - vrt_path, expected_dtype = vrt_fixture(spec) - - open_kwargs: dict[str, Any] = {} - if spec.window is not None: - open_kwargs["window"] = spec.window - - ref = open_geotiff(str(vrt_path), **open_kwargs) - - actual = open_geotiff( - str(vrt_path), **open_kwargs, **backend.kwargs, - ) - - label = ( - f"fixture={spec.fix_id} backend={backend.backend_id} " - f"window={spec.window!r}" - ) - - ref_arr = _materialise(ref) - actual_arr = _materialise(actual) - - # Dtype against the explicit spec, not just against the reference. - # A silent upcast that the reference also exhibits would still fail - # here (the spec dtype is the contract). - assert ref_arr.dtype == expected_dtype, ( - f"{label}: reference dtype {ref_arr.dtype} != " - f"expected {expected_dtype}" - ) - assert actual_arr.dtype == expected_dtype, ( - f"{label}: actual dtype {actual_arr.dtype} != " - f"expected {expected_dtype}" - ) - - _assert_pixels_equal(ref_arr, actual_arr, label=label) - _assert_metadata_parity( - ref, actual, label=label, expected_dims=spec.expected_dims, - ) - - -# --------------------------------------------------------------------------- -# Cross-fixture parity: sidecar pyramid vs inline pyramid. -# Both backing TIFFs share their base IFD bytes (same uint16 raster, same -# transform, same CRS), so a VRT wrapping each must report identical -# georef attrs at level 0. The check guards against the sidecar lookup -# accidentally rewriting (or dropping) any of the contract-named attrs. -# --------------------------------------------------------------------------- - -@pytest.mark.parametrize("backend", _backend_params()) -def test_sidecar_vrt_attrs_match_inline(backend, tmp_path): - """Sidecar-backed VRT and inline-overview-backed VRT report identical - georef attrs and pixels at the base level. - - Acceptance criterion straight from the parent issue: the sidecar - ``.ovr`` lookup must produce the same georef status and CRS attrs - as an inline-overview source. The check runs on each backend so a - drift introduced only on the dask path still surfaces. - """ - side_sub = tmp_path / "sidecar" - inline_sub = tmp_path / "inline" - side_sub.mkdir() - inline_sub.mkdir() - side_vrt, side_dtype = _build_sidecar_vrt(side_sub) - inline_vrt, inline_dtype = _build_inline_overview_vrt(inline_sub) - - assert side_dtype == inline_dtype, ( - f"sidecar dtype {side_dtype} != inline dtype {inline_dtype}; " - f"the golden_corpus fixtures should share a base IFD" - ) - - side = open_geotiff(str(side_vrt), **backend.kwargs) - inline = open_geotiff(str(inline_vrt), **backend.kwargs) - - label = ( - f"sidecar-vs-inline backend={backend.backend_id}" - ) - - # Shape parity is the precondition for the pixel comparison. - assert side.shape == inline.shape, ( - f"{label}: shape differs side={side.shape} inline={inline.shape}" - ) - - # Pixel parity at the base level. Both fixtures share their level-0 - # bytes (the sidecar only adds an external pyramid), so the read-back - # arrays should match byte-for-byte. - _assert_pixels_equal( - _materialise(inline), _materialise(side), label=label, - ) - - # Metadata parity: the read paths must surface identical georef - # attrs across the two physical layouts. - _assert_metadata_parity( - inline, side, label=label, expected_dims=("y", "x"), - ) - - -# --------------------------------------------------------------------------- -# Windowed-coord shift parity: an eager windowed read and a chunked -# windowed read of the same VRT must report the same shifted coords -# AND the same shifted transform. Pixel equality alone is not enough -- -# we want to catch the regression where the dask graph computes correct -# pixels but the assembled DataArray keeps the full-extent coords or -# transform. -# --------------------------------------------------------------------------- - -def test_windowed_vrt_shifts_coords_and_transform_consistently(tmp_path): - """Eager and lazy windowed VRT reads agree on shape, coords, attrs, - and values. - - Per the parent issue's acceptance criterion. The cell is split out - from the parametrised matrix above so a coord/transform drift on - the dask path produces a single, named failure rather than a - matrix-wide flag. - """ - vrt_path, _ = _build_two_tile_float32_vrt(tmp_path) - # Window deliberately straddles the tile seam (col 16) and trims - # the y-axis on both ends, so both axes get shifted. - window = (3, 5, 13, 27) - - eager = open_geotiff(str(vrt_path), window=window) - lazy = open_geotiff(str(vrt_path), window=window, chunks=(5, 11)) - - # Shape parity (precondition). - assert eager.shape == (10, 22) - assert lazy.shape == (10, 22) - - # Coord shift: the eager read's y/x arrays should match the lazy - # read's exactly (same shape, same dtype, same bytes). - np.testing.assert_array_equal(eager["y"].values, lazy["y"].values) - np.testing.assert_array_equal(eager["x"].values, lazy["x"].values) - assert eager["y"].dtype == lazy["y"].dtype - assert eager["x"].dtype == lazy["x"].dtype - - # The window cuts the leading 3 rows and the leading 5 columns of - # the full-extent grid (which goes from y=15..0 and x=0..31), so - # the windowed first y is 12.0 and the windowed first x is 5.0. - # The check pins the absolute shift, not just the eager/lazy - # equality, so a regression that drifts BOTH backends the same - # way still surfaces. - assert eager["y"].values[0] == 12.0 - assert eager["x"].values[0] == 5.0 - - # Transform must shift consistently: the rasterio 6-tuple's c - # (origin_x) and f (origin_y) entries should reflect the window - # offset, while pixel sizes (a, e) stay constant. - eager_t = eager.attrs.get("transform") - lazy_t = lazy.attrs.get("transform") - assert eager_t == lazy_t, ( - f"transform differs eager={eager_t!r} lazy={lazy_t!r}" - ) - # Pixel size unchanged by the window. - assert eager_t[0] == 1.0 and eager_t[4] == -1.0, ( - f"pixel size mismatch in windowed transform {eager_t!r}" - ) - - # Pixel parity. - np.testing.assert_array_equal(eager.values, lazy.compute().values) - - # CRS attrs parity. - assert eager.attrs.get("crs") == lazy.attrs.get("crs") - assert eager.attrs.get("crs_wkt") == lazy.attrs.get("crs_wkt") - assert eager.attrs.get("georef_status") == lazy.attrs.get( - "georef_status" - ) - - -# --------------------------------------------------------------------------- -# Absolute-shift parity for the sidecar windowed cell. The parametrised -# matrix only checks eager-vs-dask equality; pin the actual shifted -# coords and transform here so a regression that drifts BOTH backends -# the same way still surfaces. The bundled sidecar fixture has a known -# pixel size of 0.001 and origin (-120.0, 45.0). -# --------------------------------------------------------------------------- - -def test_sidecar_window_shifts_to_known_coords(tmp_path): - """The sidecar VRT, read with ``window=(8, 8, 56, 56)``, should land - on the same coords / transform an absolute calculation would predict. - - The bundled fixture is 64x64 at pixel size 0.001 with origin - (-120.0, 45.0). Trimming rows 8..56 / cols 8..56 yields a 48x48 - window whose x-coord array starts at -120.0 + 8 * 0.001 + half-pixel - centre offset, and whose transform's c/f entries shift by the same - 8-pixel offsets. - """ - vrt_path, _ = _build_sidecar_vrt(tmp_path) - window = (8, 8, 56, 56) - - eager = open_geotiff(str(vrt_path), window=window) - - assert eager.shape == (48, 48) - # Pixel size column (a, e) of the rasterio 6-tuple stays constant. - t = eager.attrs.get("transform") - assert t is not None, "windowed sidecar VRT dropped attrs['transform']" - assert t[0] == pytest.approx(0.001) - assert t[4] == pytest.approx(-0.001) - # Origin shifts by 8 pixels: c += 8 * a, f += 8 * e. - # Full-extent origin is c=-120.0, f=45.0. - assert t[2] == pytest.approx(-120.0 + 8 * 0.001) - assert t[5] == pytest.approx(45.0 + 8 * -0.001) - - -# --------------------------------------------------------------------------- -# Sanity check: the matrix harness itself flags a metadata regression. -# --------------------------------------------------------------------------- - -def test_assert_metadata_parity_flags_transform_drift(tmp_path): - """Locks the harness behaviour: a transform-only drift between two - otherwise-identical DataArrays fails the parity helper. - - Without this, a regression that silently dropped the transform check - inside ``_assert_metadata_parity`` would let the rest of the matrix - pass with empty assertions. - """ - vrt_path, _ = _build_two_tile_float32_vrt(tmp_path) - da_ref = open_geotiff(str(vrt_path)) - da_bad = da_ref.copy() - da_bad.attrs = dict(da_ref.attrs) - # Mutate the transform's origin_x. The pixels and coords remain - # untouched; only the attr drifts. - old_t = da_bad.attrs["transform"] - da_bad.attrs["transform"] = ( - old_t[0], old_t[1], old_t[2] + 1.0, - old_t[3], old_t[4], old_t[5], - ) - with pytest.raises(AssertionError, match="transform"): - _assert_metadata_parity( - da_ref, da_bad, label="harness-sanity", - expected_dims=("y", "x"), - ) diff --git a/xrspatial/geotiff/tests/test_vrt_chunked_missing_raise_at_build_2265.py b/xrspatial/geotiff/tests/test_vrt_chunked_missing_raise_at_build_2265.py deleted file mode 100644 index 2be7946bf..000000000 --- a/xrspatial/geotiff/tests/test_vrt_chunked_missing_raise_at_build_2265.py +++ /dev/null @@ -1,325 +0,0 @@ -"""Issue #2265: chunked VRT ``missing_sources='raise'`` must raise at build. - -The public docstring on ``read_vrt`` says ``missing_sources='raise'`` (the -public default since #1860) "fails immediately on an unreadable backing -source so a partial mosaic never surfaces silently". Before #2265 the -chunked path only honoured that contract at compute time: it ran a -static ``os.path.exists`` sweep at build, recorded misses into -``attrs['vrt_holes']``, and only the per-chunk delayed decode raised -- -which meant a windowed downstream slice past the bad tile could ship a -partial mosaic silently. This module pins the "raise at build" behaviour -and the related scoping invariants: - -* a missing source intersecting the requested window raises at build, -* a missing source outside the requested window does not raise, -* a missing source on a band the caller did not select does not raise, -* ``XRSPATIAL_GEOTIFF_STRICT=1`` forces the raise regardless of kwarg, -* ``missing_sources='warn'`` keeps the existing record-and-warn path. -""" -from __future__ import annotations - -import os -import warnings - -import numpy as np -import pytest -import xarray as xr - -from xrspatial.geotiff import GeoTIFFFallbackWarning, read_vrt, to_geotiff - - -def _write_present_source(tmp_path: str, name: str, fill: float) -> str: - """Write a 4x4 float32 GeoTIFF source for use in a multi-source VRT.""" - src = os.path.join(tmp_path, name) - arr = np.full((4, 4), fill, dtype=np.float32) - da = xr.DataArray( - arr, dims=("y", "x"), - attrs={"transform": (1.0, 0.0, 0.0, 0.0, -1.0, 0.0)}, - ) - to_geotiff(da, src) - return src - - -def _make_horizontal_partial_vrt(tmp_path: str) -> str: - """2-source VRT: present source on the left, missing source on the right. - - Layout (rows x cols = 4 x 8): - ``[ present | missing ]``. Used for the basic - ``raise at build`` and window-scoping assertions. - """ - src = _write_present_source(tmp_path, "src_2265_h_present.tif", 7.0) - missing = os.path.join(tmp_path, "missing_2265_h.tif") - vrt_path = os.path.join(tmp_path, "partial_2265_h.vrt") - with open(vrt_path, "w") as f: - f.write( - f'\n' - '0.0, 1.0, 0.0, 0.0, 0.0, -1.0\n' - '\n' - '\n' - f'{src}\n' - '1\n' - '\n' - '\n' - '\n' - '\n' - f'{missing}\n' - '1\n' - '\n' - '\n' - '\n' - '\n' - '\n' - ) - return vrt_path - - -def _make_multiband_partial_vrt(tmp_path: str) -> str: - """2-band VRT where band 1 has a missing source and band 2 is intact. - - Both bands cover the full 4x4 extent with one source each. A - ``band=1`` (0-based, the second band) read should not raise because - the per-chunk decode never touches band 1's missing source. Reading - without a band restriction or with ``band=0`` should raise. - """ - src_b1 = _write_present_source(tmp_path, "src_2265_mb_b1.tif", 11.0) - src_b2 = _write_present_source(tmp_path, "src_2265_mb_b2.tif", 22.0) - missing_b1 = os.path.join(tmp_path, "missing_2265_mb_b1.tif") - vrt_path = os.path.join(tmp_path, "partial_2265_multiband.vrt") - with open(vrt_path, "w") as f: - f.write( - f'\n' - '0.0, 1.0, 0.0, 0.0, 0.0, -1.0\n' - # Band 1: one present source + one missing source covering - # the same extent. The missing source intersects every - # chunk window so the build must raise when band 1 is in - # scope. - '\n' - '\n' - f'{src_b1}\n' - '1\n' - '\n' - '\n' - '\n' - '\n' - f'{missing_b1}\n' - '1\n' - '\n' - '\n' - '\n' - '\n' - # Band 2: a single present source. ``band=1`` (0-based) on - # the chunked read should pick this band only and skip - # band 1's missing source. - '\n' - '\n' - f'{src_b2}\n' - '1\n' - '\n' - '\n' - '\n' - '\n' - '\n' - ) - return vrt_path - - -class TestRaiseAtBuild: - """``missing_sources='raise'`` raises during construction, not compute.""" - - def test_build_raises_immediately(self, tmp_path): - vrt_path = _make_horizontal_partial_vrt(str(tmp_path)) - with pytest.raises(FileNotFoundError, match="missing_2265_h"): - read_vrt(vrt_path, chunks=4, missing_sources="raise") - - def test_default_raises_at_build(self, tmp_path): - """The public default is ``'raise'`` so dropping the kwarg - must hit the same fast-fail path.""" - vrt_path = _make_horizontal_partial_vrt(str(tmp_path)) - with pytest.raises(FileNotFoundError): - read_vrt(vrt_path, chunks=4) - - def test_error_message_mentions_opt_in(self, tmp_path): - """The exception text should tell the caller how to opt into - the lenient path. A regression that drops this guidance would - leave callers debugging a bare ``FileNotFoundError`` without - knowing the kwarg toggle exists.""" - vrt_path = _make_horizontal_partial_vrt(str(tmp_path)) - with pytest.raises(FileNotFoundError) as excinfo: - read_vrt(vrt_path, chunks=4, missing_sources="raise") - msg = str(excinfo.value) - assert "missing_sources='warn'" in msg - assert "partial mosaic" in msg - - -class TestWindowScoping: - """The raise honours the requested window.""" - - def test_window_past_missing_does_not_raise(self, tmp_path): - """A window that touches only the present source still builds - and computes. Without this scoping the static raise would be - overzealous compared to the eager path (which decodes only - sources that intersect the window).""" - vrt_path = _make_horizontal_partial_vrt(str(tmp_path)) - result = read_vrt( - vrt_path, chunks=4, window=(0, 0, 4, 4), - missing_sources="raise", - ) - computed = result.compute() - np.testing.assert_array_equal( - np.asarray(computed), np.full((4, 4), 7.0, dtype=np.float32), - ) - - def test_window_intersecting_missing_raises(self, tmp_path): - """A window that overlaps the missing tile still raises at build.""" - vrt_path = _make_horizontal_partial_vrt(str(tmp_path)) - with pytest.raises(FileNotFoundError): - read_vrt( - vrt_path, chunks=4, window=(0, 4, 4, 8), - missing_sources="raise", - ) - - -class TestBandScoping: - """The raise honours ``band=`` restriction.""" - - def test_band_select_skips_other_bands_missing_source(self, tmp_path): - """``band=1`` reads band 2 only; band 1's missing source is - irrelevant to the graph, so the build must not raise.""" - vrt_path = _make_multiband_partial_vrt(str(tmp_path)) - result = read_vrt( - vrt_path, chunks=4, band=1, missing_sources="raise", - ) - computed = result.compute() - np.testing.assert_array_equal( - np.asarray(computed), np.full((4, 4), 22.0, dtype=np.float32), - ) - - def test_band_select_on_missing_band_raises(self, tmp_path): - """``band=0`` selects the band with the missing source so the - build must raise (mirror of the unselected-band test above).""" - vrt_path = _make_multiband_partial_vrt(str(tmp_path)) - with pytest.raises(FileNotFoundError): - read_vrt( - vrt_path, chunks=4, band=0, missing_sources="raise", - ) - - def test_no_band_restriction_raises(self, tmp_path): - """Without a ``band=`` restriction, both bands' sources are in - scope and the missing source on band 1 raises at build.""" - vrt_path = _make_multiband_partial_vrt(str(tmp_path)) - with pytest.raises(FileNotFoundError): - read_vrt(vrt_path, chunks=4, missing_sources="raise") - - -class TestWarnPreserved: - """``missing_sources='warn'`` keeps the record-and-warn behaviour.""" - - def test_warn_records_holes_at_build(self, tmp_path): - """The lenient path must not regress to a build-time raise.""" - vrt_path = _make_horizontal_partial_vrt(str(tmp_path)) - result = read_vrt(vrt_path, chunks=4, missing_sources="warn") - assert "vrt_holes" in result.attrs - assert len(result.attrs["vrt_holes"]) == 1 - assert result.attrs["vrt_holes"][0]["source"].endswith( - "missing_2265_h.tif" - ) - - def test_warn_compute_emits_per_task_warning(self, tmp_path): - """The compute step still warns per task on the lenient path.""" - vrt_path = _make_horizontal_partial_vrt(str(tmp_path)) - with warnings.catch_warnings(record=True) as caught: - warnings.simplefilter("always") - result = read_vrt(vrt_path, chunks=4, missing_sources="warn") - computed = result.compute() - messages = [str(w.message) for w in caught - if isinstance(w.message, GeoTIFFFallbackWarning)] - assert any("missing_2265_h" in msg for msg in messages) - # Present side decodes to 7.0; missing side decodes to NaN. - np.testing.assert_array_equal( - np.asarray(computed)[:, :4], - np.full((4, 4), 7.0, dtype=np.float32), - ) - assert np.all(np.isnan(np.asarray(computed)[:, 4:])) - - -def _make_multi_missing_vrt(tmp_path: str, n_missing: int) -> str: - """VRT with ``n_missing`` missing sources tiling the destination. - - Each missing source covers a distinct 4x4 dst block laid out - horizontally; the VRT's full extent is sized to hold all of them. - Used to pin the multi-source preview behavior of the build-time - raise message. - """ - vrt_path = os.path.join(tmp_path, f"partial_2265_multi_{n_missing}.vrt") - width = 4 * n_missing - src_xml = [] - for i in range(n_missing): - missing = os.path.join(tmp_path, f"missing_2265_multi_{i}.tif") - src_xml.append( - '\n' - f'{missing}\n' - '1\n' - '\n' - f'\n' - '\n' - ) - with open(vrt_path, "w") as f: - f.write( - f'\n' - '0.0, 1.0, 0.0, 0.0, 0.0, -1.0\n' - '\n' - + ''.join(src_xml) + - '\n' - '\n' - ) - return vrt_path - - -class TestMultipleMissingSources: - """The error message previews multiple holes and reports the total.""" - - def test_two_missing_sources_listed_with_count(self, tmp_path): - """All missing sources fit in the preview (n=2 <= preview cap).""" - vrt_path = _make_multi_missing_vrt(str(tmp_path), n_missing=2) - with pytest.raises(FileNotFoundError) as excinfo: - read_vrt(vrt_path, chunks=4, missing_sources="raise") - msg = str(excinfo.value) - assert "missing_2265_multi_0" in msg - assert "missing_2265_multi_1" in msg - assert "2 missing source(s) total" in msg - # Preview cap kicks in only above 3 holes; no "and N more" tail - # should appear for n_missing=2. - assert "more" not in msg.lower() or "and 0 more" not in msg - - def test_many_missing_sources_truncated_with_more_suffix(self, tmp_path): - """Above the preview cap, the message says 'and N more'.""" - n = 5 - vrt_path = _make_multi_missing_vrt(str(tmp_path), n_missing=n) - with pytest.raises(FileNotFoundError) as excinfo: - read_vrt(vrt_path, chunks=4, missing_sources="raise") - msg = str(excinfo.value) - # First few names are listed; the rest collapse into "and N more". - assert "missing_2265_multi_0" in msg - # The last source should NOT be in the preview (it's past the cap). - assert f"missing_2265_multi_{n - 1}" not in msg - # Total count is reported regardless of truncation. - assert f"{n} missing source(s) total" in msg - # The truncation tail names how many more there are. - assert "and 2 more" in msg - - -class TestStrictMode: - """``XRSPATIAL_GEOTIFF_STRICT=1`` forces the raise even with ``'warn'``.""" - - def test_strict_overrides_warn_kwarg(self, tmp_path, monkeypatch): - monkeypatch.setenv("XRSPATIAL_GEOTIFF_STRICT", "1") - vrt_path = _make_horizontal_partial_vrt(str(tmp_path)) - with pytest.raises(FileNotFoundError): - read_vrt(vrt_path, chunks=4, missing_sources="warn") - - def test_strict_off_warn_still_warns(self, tmp_path, monkeypatch): - """Sanity: without strict mode, ``'warn'`` keeps warning.""" - monkeypatch.delenv("XRSPATIAL_GEOTIFF_STRICT", raising=False) - vrt_path = _make_horizontal_partial_vrt(str(tmp_path)) - result = read_vrt(vrt_path, chunks=4, missing_sources="warn") - assert "vrt_holes" in result.attrs diff --git a/xrspatial/geotiff/tests/test_vrt_chunked_missing_sources_1799.py b/xrspatial/geotiff/tests/test_vrt_chunked_missing_sources_1799.py deleted file mode 100644 index eaa1e347b..000000000 --- a/xrspatial/geotiff/tests/test_vrt_chunked_missing_sources_1799.py +++ /dev/null @@ -1,251 +0,0 @@ -"""Chunked-VRT coverage for ``missing_sources`` (issue #1799). - -``test_vrt_missing_sources_policy_1799`` covers the eager (non-chunked) -``read_vrt`` path. The chunked path (``read_vrt(chunks=N)``, dispatching -through ``_read_vrt_chunked``) plumbs ``missing_sources`` separately: - -* Parse-time approximation: a static ``os.path.exists`` sweep over every - source populates ``attrs['vrt_holes']`` on the returned DataArray - before any decode work starts (docstring in ``_backends/vrt.py:344``). -* Decode-time: each per-chunk task receives ``missing_sources`` and the - internal reader applies the same warn/raise policy as the eager path. - -A regression dropping either the parse-time sweep or the per-chunk -forward would silently change the contract: - -* ``vrt_holes`` would disappear from the lazy build, breaking callers - that branch on ``"vrt_holes" in da.attrs`` to detect partial mosaics - before scheduling a compute (the contract documented in #1734). -* ``missing_sources='raise'`` could silently degrade to ``'warn'`` (or - vice versa) on the chunked path while the eager path stays correct. - -This module pins both invariants. Tests use a 2-source mosaic where one -source is missing on disk; the present source covers one chunk window -and the missing source covers another, so the warn/raise policy is -exercised against a non-trivial graph. -""" -from __future__ import annotations - -import os -import warnings - -import numpy as np -import pytest -import xarray as xr - -from xrspatial.geotiff import GeoTIFFFallbackWarning, read_vrt, to_geotiff - - -def _make_partial_vrt(tmp_path) -> tuple[str, str]: - """Build a 2-source VRT with one present + one missing source. - - Returns ``(vrt_path, present_src_path)``. The VRT references the - present source for the left half and a non-existent file for the - right half, so chunked reads against the right half hit the - missing-source decode path. - """ - src = os.path.join(tmp_path, "src_present.tif") - arr = np.full((4, 4), 7.0, dtype=np.float32) - da = xr.DataArray( - arr, dims=("y", "x"), - attrs={"transform": (1.0, 0.0, 0.0, 0.0, -1.0, 0.0)}, - ) - to_geotiff(da, src) - - missing = os.path.join(tmp_path, "missing.tif") - vrt_path = os.path.join(tmp_path, "partial.vrt") - with open(vrt_path, "w") as f: - f.write( - f'\n' - '0.0, 1.0, 0.0, 0.0, 0.0, -1.0\n' - '\n' - '\n' - f'{src}\n' - '1\n' - '\n' - '\n' - '\n' - '\n' - f'{missing}\n' - '1\n' - '\n' - '\n' - '\n' - '\n' - '\n' - ) - return vrt_path, src - - -class TestChunkedMissingSourcesWarn: - """``read_vrt(chunks=N, missing_sources='warn')`` records holes at build. - - The eager path scans every source at decode time. The chunked path - cannot afford that sweep up front (it would defeat the lazy graph), - so it uses ``os.path.exists`` to populate ``vrt_holes`` at build - time. The compute step still emits per-task warnings for any - missing source that survives. - """ - - def test_vrt_holes_populated_at_build(self, tmp_path): - vrt_path, _ = _make_partial_vrt(str(tmp_path)) - result = read_vrt(vrt_path, chunks=4, missing_sources="warn") - assert "vrt_holes" in result.attrs, ( - "Chunked path must populate vrt_holes at build time so " - "callers can detect partial mosaics without forcing a " - "compute (issue #1734)." - ) - holes = result.attrs["vrt_holes"] - assert len(holes) == 1 - # Pin the full record schema (see ``_backends/vrt.py:608``) so a - # regression in either path that drops or renames a key is - # caught here. - assert set(holes[0].keys()) == {"source", "band", "dst_rect", "error"} - assert holes[0]["source"].endswith("missing.tif") - assert holes[0]["band"] == 1 - assert holes[0]["dst_rect"] == (4, 0, 4, 4) - - def test_compute_emits_per_task_warning(self, tmp_path): - vrt_path, _ = _make_partial_vrt(str(tmp_path)) - with warnings.catch_warnings(record=True) as caught: - warnings.simplefilter("always") - result = read_vrt(vrt_path, chunks=4, missing_sources="warn") - computed = result.compute() - messages = [str(w.message) for w in caught - if isinstance(w.message, GeoTIFFFallbackWarning)] - assert any("missing.tif" in msg for msg in messages), ( - f"Expected GeoTIFFFallbackWarning naming the missing " - f"source after compute, got messages: {messages!r}" - ) - # Present-source chunk decodes its 7.0 fill; missing-source - # chunk decodes to NaN under the lenient policy on float32. - # Pin both halves so a regression in the lenient path that - # wiped the present side or changed the missing-side fill would - # surface. - np.testing.assert_array_equal( - np.asarray(computed)[:, :4], np.full((4, 4), 7.0, dtype=np.float32), - ) - assert np.all(np.isnan(np.asarray(computed)[:, 4:])) - - def test_chunks_tuple_form(self, tmp_path): - """Tuple ``chunks=(h, w)`` threads through identically.""" - vrt_path, _ = _make_partial_vrt(str(tmp_path)) - result = read_vrt( - vrt_path, chunks=(2, 4), missing_sources="warn", - ) - assert "vrt_holes" in result.attrs - # 2 chunks vertically * 2 chunks horizontally = 4 tasks. - # The missing source is in column 1 (cols 4-7); only the right - # half should produce warning records, but vrt_holes is a - # parse-time sweep so it records the source once regardless. - assert len(result.attrs["vrt_holes"]) == 1 - - -class TestChunkedMissingSourcesRaise: - """``read_vrt(chunks=N, missing_sources='raise')`` fails at build. - - The docstring on ``read_vrt`` promises that the default - ``'raise'`` "fails immediately on an unreadable backing source so a - partial mosaic never surfaces silently". Issue #2265 closes the - chunked-path gap: the static ``os.path.exists`` sweep that already - runs to populate ``vrt_holes`` now also raises up front when the - policy is ``'raise'`` and the sweep finds any hole intersecting the - requested window. Without this guard the build would succeed and - only ``result.compute()`` on a hole-touching chunk would raise, so - a downstream pipeline that windowed past the bad tile could ship a - partial mosaic silently. - """ - - def test_build_raises_immediately(self, tmp_path): - vrt_path, _ = _make_partial_vrt(str(tmp_path)) - with pytest.raises(FileNotFoundError, match="missing.tif"): - read_vrt(vrt_path, chunks=4, missing_sources="raise") - - def test_build_raise_message_mentions_policy_kwarg(self, tmp_path): - """The raise tells the caller how to opt into the lenient path. - - Lock in the kwarg-naming guidance in the error string so a - future refactor that drops or renames the suggestion regresses - the user-facing message rather than silently churning it. - """ - vrt_path, _ = _make_partial_vrt(str(tmp_path)) - with pytest.raises(FileNotFoundError) as excinfo: - read_vrt(vrt_path, chunks=4, missing_sources="raise") - assert "missing_sources='warn'" in str(excinfo.value) - - def test_window_past_missing_succeeds_under_raise(self, tmp_path): - """A window that does not touch a missing source still builds. - - The static sweep is scoped to the windowed extent. If the - window covers only present sources, the chunked graph has - nothing to raise about and ``compute()`` returns the present - tile. This preserves the contract that ``missing_sources`` - only fires when the requested region actually depends on a - missing source. - """ - vrt_path, _ = _make_partial_vrt(str(tmp_path)) - # Window covers only the present source (cols 0-4). - result = read_vrt( - vrt_path, chunks=4, window=(0, 0, 4, 4), - missing_sources="raise", - ) - computed = result.compute() - np.testing.assert_array_equal( - np.asarray(computed), np.full((4, 4), 7.0, dtype=np.float32), - ) - - def test_band_selection_skips_other_bands_holes(self, tmp_path): - """A ``band=`` restriction scopes the static raise to that band. - - Mirrors the eager path: only sources on the selected band get - decoded, so a missing source on an unselected band should not - block the build. The partial VRT in this module is single-band - so the only way to exercise this is to confirm that the - single-band default still raises (sanity gate) -- the - cross-band gating is covered indirectly by the broader VRT - test matrix. - """ - vrt_path, _ = _make_partial_vrt(str(tmp_path)) - # Selecting band 0 (the only band) still touches the missing - # source so the build raises. The negative case (a missing - # source on a different band than the selected one) is hard to - # build without a multi-band VRT helper; the band_num gate in - # ``_read_vrt_chunked`` is exercised by the standalone test - # ``test_chunked_band_selection_skips_other_bands_holes`` below. - with pytest.raises(FileNotFoundError): - read_vrt( - vrt_path, chunks=4, band=0, missing_sources="raise", - ) - - -class TestChunkedMissingSourcesDefault: - """The default ``missing_sources`` on chunked reads is ``'raise'``. - - The public ``read_vrt`` default flipped to ``'raise'`` in #1843 / - #1860 and the chunked path now honours it at build time (#2265). - A regression flipping the chunked default to ``'warn'`` would - silently produce partial mosaics for callers who don't pass the - kwarg. - """ - - def test_chunked_default_raises_at_build(self, tmp_path): - vrt_path, _ = _make_partial_vrt(str(tmp_path)) - with pytest.raises(FileNotFoundError, match="missing.tif"): - read_vrt(vrt_path, chunks=4) - - -class TestChunkedMissingSourcesValidation: - """Invalid ``missing_sources`` policies are rejected at entry.""" - - def test_invalid_policy_raises_at_build(self, tmp_path): - vrt_path, _ = _make_partial_vrt(str(tmp_path)) - with pytest.raises(ValueError, match="missing_sources"): - read_vrt(vrt_path, chunks=4, missing_sources="ignore") - - def test_invalid_policy_raises_without_chunks_too(self, tmp_path): - """Sanity: the eager path also rejects the bad value. Pinning - cross-mode parity means callers see the same error whether or - not they pass ``chunks=``.""" - vrt_path, _ = _make_partial_vrt(str(tmp_path)) - with pytest.raises(ValueError, match="missing_sources"): - read_vrt(vrt_path, missing_sources="ignore") diff --git a/xrspatial/geotiff/tests/test_vrt_finalization_parity_2162.py b/xrspatial/geotiff/tests/test_vrt_finalization_parity_2162.py deleted file mode 100644 index 294d836c5..000000000 --- a/xrspatial/geotiff/tests/test_vrt_finalization_parity_2162.py +++ /dev/null @@ -1,667 +0,0 @@ -"""Cross-backend parity for the VRT finalization pipeline (issue #2180). - -Wave 3 of #2162 routed the VRT eager and chunked paths through -``_finalize_lazy_read_attrs`` from #2177. Before the migration the two -sites built ``GeoTIFFMetadata`` from VRT internals by hand and called -``metadata_to_attrs`` directly, bypassing the shared -``_validate_read_geo_info`` / ``_populate_attrs_from_geo_info`` block -the other backends share. - -The tests below pin parity for the attrs the helper now stamps: - -* VRT eager attrs match eager numpy attrs (``open_geotiff``) for - single-source VRTs that mirror a plain TIFF. -* VRT chunked attrs match dask numpy attrs (``read_geotiff_dask``) for - the same single-source VRTs. -* ``band_nodata='first'`` paths still produce the per-band attrs - pinned by ``test_vrt_band_nodata_1598``. -* ``missing_sources='warn'`` still surfaces ``attrs['vrt_holes']`` on - the eager VRT path (the chunked path's parse-time hole scan is - covered by ``test_open_geotiff_missing_sources_1810``). -* ``attrs['georef_status']`` matches across VRT and non-VRT paths for - the five canonical states (``full``, ``transform_only``, - ``crs_only``, ``none``, ``rotated_dropped``). - -VRT-only attrs that the non-VRT path cannot produce (e.g. -``vrt_holes``) and the windowed-transform shift are not part of the -parity assertion -- they are pinned by the regression tests cited -above. A few attrs the non-VRT path emits (``extra_tags``, -``gdal_metadata``, resolution tags) are likewise dropped from the -comparison because the VRT path intentionally omits them; the test -filters those keys explicitly. -""" -from __future__ import annotations - -import warnings - -import numpy as np -import pytest -import xarray as xr - -from xrspatial.geotiff import open_geotiff, read_geotiff_dask, read_vrt, to_geotiff -from xrspatial.geotiff._attrs import (GEOREF_STATUS_CRS_ONLY, GEOREF_STATUS_FULL, - GEOREF_STATUS_NONE, GEOREF_STATUS_ROTATED_DROPPED, - GEOREF_STATUS_TRANSFORM_ONLY) -from xrspatial.geotiff._coords import _NO_GEOREF_KEY -from xrspatial.geotiff._writer import write - -tifffile = pytest.importorskip("tifffile") - - -# Attrs the VRT path is documented to omit when the non-VRT path emits -# them. The parity comparisons drop these keys before checking equality -# so the per-backend documented surface stays in scope. -_NON_VRT_ONLY_KEYS = frozenset({ - 'extra_tags', - 'image_description', - 'extra_samples', - 'gdal_metadata', - 'gdal_metadata_xml', - 'x_resolution', - 'y_resolution', - 'resolution_unit', - 'colormap', -}) - - -# Attrs that differ in textual representation between the GeoTIFF writer -# and the literal VRT XML even when they encode the same logical value. -# ``crs_wkt`` carries pyproj's expanded WKT in the TIFF path but the -# verbatim VRT XML body in the VRT path; ``transform`` shifts by a -# half-pixel between the two writers' AREA_OR_POINT conventions. The -# parity test compares them separately via EPSG / origin checks rather -# than insisting on byte-identical strings. -_REPRESENTATION_KEYS = frozenset({'crs_wkt', 'transform'}) - - -def _shared_canonical_attrs(attrs: dict) -> dict: - """Return the helper-emitted attrs that should match across writers. - - Drops: - * The non-VRT TIFF-tag attrs the VRT path intentionally omits. - * The representation-sensitive attrs (``crs_wkt``, ``transform``) - that differ in literal form but encode the same logical value. - ``crs`` (EPSG integer) carries the same information for the WKT - comparison; the transform half-pixel shift is exercised by the - regression tests for the underlying readers. - """ - return { - k: v for k, v in attrs.items() - if k not in _NON_VRT_ONLY_KEYS and k not in _REPRESENTATION_KEYS - } - - -def _strip_non_vrt_keys(attrs: dict) -> dict: - return {k: v for k, v in attrs.items() if k not in _NON_VRT_ONLY_KEYS} - - -def _write_single_source_vrt(tiff_path, vrt_path, *, width, height, - dtype='Float32', nodata=None, - geo_transform='0.0, 1.0, 0.0, 0.0, 0.0, -1.0', - srs=None): - """Write a one-band VRT pointing at ``tiff_path``. - - Mirrors the writer in ``test_vrt_band_nodata_1598`` but parameterises - the geo bits so the same helper can produce ``full`` / - ``transform_only`` / ``crs_only`` / ``none`` / ``rotated_dropped`` - VRTs. - """ - nodata_xml = ( - f" {nodata}\n" if nodata is not None - else '' - ) - srs_xml = ( - f' {srs}\n' if srs is not None - else '' - ) - gt_xml = ( - f' {geo_transform}\n' - if geo_transform is not None - else '' - ) - vrt_xml = ( - f'\n' - f'{gt_xml}' - f'{srs_xml}' - f' \n' - f'{nodata_xml}' - f' \n' - f' {tiff_path}\n' - f' 1\n' - f' \n' - f' \n' - f' \n' - f' \n' - f'\n' - ) - with open(vrt_path, 'w') as f: - f.write(vrt_xml) - - -# --------------------------------------------------------------------------- -# Fixture builders for the five georef states. -# --------------------------------------------------------------------------- -# -# Each builder writes a backing TIFF and a single-source VRT that wraps -# it with the same transform / CRS, then returns both paths. The VRT -# path's ``georef_status`` should match the TIFF path's because the VRT -# shares the same geometry. - -_WGS84_WKT = ( - 'GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,' - 'AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,' - 'AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,' - 'AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]]' -) - - -def _make_full_pair(tmp_path, name): - """Full georef: float coords + CRS.""" - tiff = str(tmp_path / f'{name}_tiff.tif') - vrt = str(tmp_path / f'{name}.vrt') - da = xr.DataArray( - np.zeros((4, 4), dtype=np.float32), - coords={ - 'y': np.array([200.0, 199.0, 198.0, 197.0]), - 'x': np.array([100.0, 101.0, 102.0, 103.0]), - }, - dims=('y', 'x'), - attrs={'crs': 4326}, - ) - to_geotiff(da, tiff) - _write_single_source_vrt( - tiff, vrt, width=4, height=4, - geo_transform='100.0, 1.0, 0.0, 200.0, 0.0, -1.0', - srs=_WGS84_WKT, - ) - return tiff, vrt - - -def _make_transform_only_pair(tmp_path, name): - """Float coords, no CRS.""" - tiff = str(tmp_path / f'{name}_tiff.tif') - vrt = str(tmp_path / f'{name}.vrt') - da = xr.DataArray( - np.zeros((4, 4), dtype=np.float32), - coords={ - 'y': np.array([200.0, 199.0, 198.0, 197.0]), - 'x': np.array([100.0, 101.0, 102.0, 103.0]), - }, - dims=('y', 'x'), - ) - to_geotiff(da, tiff) - _write_single_source_vrt( - tiff, vrt, width=4, height=4, - geo_transform='100.0, 1.0, 0.0, 200.0, 0.0, -1.0', - srs=None, - ) - return tiff, vrt - - -def _make_crs_only_pair(tmp_path, name): - """No-georef marker + CRS.""" - tiff = str(tmp_path / f'{name}_tiff.tif') - vrt = str(tmp_path / f'{name}.vrt') - da = xr.DataArray( - np.zeros((4, 4), dtype=np.float32), - coords={ - 'y': np.arange(4, dtype=np.int64), - 'x': np.arange(4, dtype=np.int64), - }, - dims=('y', 'x'), - attrs={_NO_GEOREF_KEY: True, 'crs': 4326}, - ) - to_geotiff(da, tiff) - _write_single_source_vrt( - tiff, vrt, width=4, height=4, - geo_transform=None, - srs=_WGS84_WKT, - ) - return tiff, vrt - - -def _make_none_pair(tmp_path, name): - """No CRS, no transform.""" - tiff = str(tmp_path / f'{name}_tiff.tif') - vrt = str(tmp_path / f'{name}.vrt') - arr = np.zeros((4, 4), dtype=np.float32) - tifffile.imwrite( - tiff, arr, photometric='minisblack', planarconfig='contig', - metadata=None, - ) - _write_single_source_vrt( - tiff, vrt, width=4, height=4, - geo_transform=None, - srs=None, - ) - return tiff, vrt - - -def _make_rotated_pair(tmp_path, name): - """Rotated VRT with ``allow_rotated=True``: lands at - ``rotated_dropped``.""" - tiff = str(tmp_path / f'{name}_tiff.tif') - vrt = str(tmp_path / f'{name}.vrt') - arr = np.arange(16, dtype=np.uint16).reshape(4, 4) - write(arr, tiff, compression='none', tiled=False) - _write_single_source_vrt( - tiff, vrt, width=4, height=4, dtype='UInt16', - geo_transform='0.0, 1.0, 0.5, 0.0, 0.5, -1.0', - srs=None, - ) - return tiff, vrt - - -# --------------------------------------------------------------------------- -# Parity tests: VRT eager attrs vs eager numpy attrs. -# --------------------------------------------------------------------------- - - -def test_vrt_eager_full_matches_open_geotiff(tmp_path): - """A single-source VRT wrapping a ``full`` TIFF emits the same - canonical helper-stamped attrs as the underlying TIFF read via - ``open_geotiff``. - - The helper-emitted attrs (``crs`` / ``georef_status`` / contract - version / nodata lifecycle) must match. ``crs_wkt`` and - ``transform`` differ in textual representation between the two - writers and are compared separately via EPSG / origin checks - below; pinning byte-identical strings would test the writer, not - the helper migration. - """ - tiff, vrt = _make_full_pair(tmp_path, 'full_2180') - tiff_attrs = _shared_canonical_attrs(dict(open_geotiff(tiff).attrs)) - vrt_attrs = _shared_canonical_attrs(dict(read_vrt(vrt).attrs)) - assert tiff_attrs == vrt_attrs, ( - f"TIFF/VRT attrs diverged:\n" - f" tiff only: {set(tiff_attrs) - set(vrt_attrs)}\n" - f" vrt only: {set(vrt_attrs) - set(tiff_attrs)}\n" - f" shared keys with different values: " - f"{[k for k in set(tiff_attrs) & set(vrt_attrs) if tiff_attrs[k] != vrt_attrs[k]]}" - ) - # Logical CRS equality across the two writers (different WKT text, - # same EPSG code). - full_tiff_attrs = dict(open_geotiff(tiff).attrs) - full_vrt_attrs = dict(read_vrt(vrt).attrs) - assert full_tiff_attrs['crs'] == full_vrt_attrs['crs'] == 4326 - # Both paths emit a 6-tuple transform with the same length. - assert len(full_tiff_attrs['transform']) == 6 - assert len(full_vrt_attrs['transform']) == 6 - - -def test_vrt_eager_transform_only_matches_open_geotiff(tmp_path): - tiff, vrt = _make_transform_only_pair(tmp_path, 'tonly_2180') - tiff_attrs = _shared_canonical_attrs(dict(open_geotiff(tiff).attrs)) - vrt_attrs = _shared_canonical_attrs(dict(read_vrt(vrt).attrs)) - assert tiff_attrs == vrt_attrs - assert tiff_attrs['georef_status'] == GEOREF_STATUS_TRANSFORM_ONLY - - -def test_vrt_eager_crs_only_matches_open_geotiff(tmp_path): - tiff, vrt = _make_crs_only_pair(tmp_path, 'crsonly_2180') - tiff_attrs = _shared_canonical_attrs(dict(open_geotiff(tiff).attrs)) - vrt_attrs = _shared_canonical_attrs(dict(read_vrt(vrt).attrs)) - assert tiff_attrs == vrt_attrs - assert tiff_attrs['georef_status'] == GEOREF_STATUS_CRS_ONLY - - -def test_vrt_eager_none_matches_open_geotiff(tmp_path): - tiff, vrt = _make_none_pair(tmp_path, 'none_2180') - tiff_attrs = _shared_canonical_attrs(dict(open_geotiff(tiff).attrs)) - vrt_attrs = _shared_canonical_attrs(dict(read_vrt(vrt).attrs)) - assert tiff_attrs == vrt_attrs - assert tiff_attrs['georef_status'] == GEOREF_STATUS_NONE - - -def test_vrt_eager_rotated_dropped_matches_open_geotiff(tmp_path): - """The rotated branch is the VRT-specific path: a non-zero skew on - the GDAL geotransform lands in ``rotated_dropped`` and the helper - drops ``crs`` / ``transform`` / ``crs_wkt`` while emitting - ``rotated_affine`` plus the no-georef marker. The non-VRT side does - not have a way to write a rotated TIFF cleanly through ``to_geotiff`` - (axis-aligned only); the assertions here pin the attrs surface - against the canonical ``georef_status`` values rather than a - non-VRT TIFF parity check. - """ - _, vrt = _make_rotated_pair(tmp_path, 'rot_2180') - attrs = dict(read_vrt(vrt, allow_rotated=True).attrs) - assert attrs['georef_status'] == GEOREF_STATUS_ROTATED_DROPPED - assert attrs.get(_NO_GEOREF_KEY) is True - assert 'rotated_affine' in attrs - assert attrs.get('crs') is None - assert attrs.get('crs_wkt') is None - assert 'transform' not in attrs - - -# --------------------------------------------------------------------------- -# Parity tests: VRT chunked attrs vs dask numpy attrs. -# --------------------------------------------------------------------------- - - -def test_vrt_chunked_full_matches_dask(tmp_path): - tiff, vrt = _make_full_pair(tmp_path, 'full_chunked_2180') - tiff_attrs = _shared_canonical_attrs( - dict(read_geotiff_dask(tiff, chunks=2).attrs) - ) - vrt_attrs = _shared_canonical_attrs( - dict(read_vrt(vrt, chunks=2).attrs) - ) - assert tiff_attrs == vrt_attrs - - -def test_vrt_chunked_transform_only_matches_dask(tmp_path): - tiff, vrt = _make_transform_only_pair(tmp_path, 'tonly_chunked_2180') - tiff_attrs = _shared_canonical_attrs( - dict(read_geotiff_dask(tiff, chunks=2).attrs) - ) - vrt_attrs = _shared_canonical_attrs( - dict(read_vrt(vrt, chunks=2).attrs) - ) - assert tiff_attrs == vrt_attrs - - -def test_vrt_chunked_crs_only_matches_dask(tmp_path): - tiff, vrt = _make_crs_only_pair(tmp_path, 'crsonly_chunked_2180') - tiff_attrs = _shared_canonical_attrs( - dict(read_geotiff_dask(tiff, chunks=2).attrs) - ) - vrt_attrs = _shared_canonical_attrs( - dict(read_vrt(vrt, chunks=2).attrs) - ) - assert tiff_attrs == vrt_attrs - - -def test_vrt_chunked_none_matches_dask(tmp_path): - tiff, vrt = _make_none_pair(tmp_path, 'none_chunked_2180') - tiff_attrs = _shared_canonical_attrs( - dict(read_geotiff_dask(tiff, chunks=2).attrs) - ) - vrt_attrs = _shared_canonical_attrs( - dict(read_vrt(vrt, chunks=2).attrs) - ) - assert tiff_attrs == vrt_attrs - - -def test_vrt_chunked_rotated_dropped(tmp_path): - _, vrt = _make_rotated_pair(tmp_path, 'rot_chunked_2180') - attrs = dict(read_vrt(vrt, allow_rotated=True, chunks=2).attrs) - assert attrs['georef_status'] == GEOREF_STATUS_ROTATED_DROPPED - assert attrs.get(_NO_GEOREF_KEY) is True - assert 'rotated_affine' in attrs - - -# --------------------------------------------------------------------------- -# band_nodata paths: the ``'first'`` opt-out keeps the legacy -# flatten-to-band-0 semantics. Pin per-band attrs on a mixed VRT. -# --------------------------------------------------------------------------- - - -def _write_two_band_per_band_nodata_vrt(tmp_path): - band0 = np.array([[1, 2], [3, 65535]], dtype=np.uint16) - band1 = np.array([[7, 8], [9, 65000]], dtype=np.uint16) - p0 = str(tmp_path / 'vrt_band0_2180.tif') - p1 = str(tmp_path / 'vrt_band1_2180.tif') - write(band0, p0, nodata=65535, compression='none', tiled=False) - write(band1, p1, nodata=65000, compression='none', tiled=False) - - vrt_path = str(tmp_path / 'two_band_per_band_nodata_2180.vrt') - vrt_xml = f""" - 0.0, 1.0, 0.0, 0.0, 0.0, -1.0 - - 65535 - - {p0} - 1 - - - - - - 65000 - - {p1} - 1 - - - - -""" - with open(vrt_path, 'w') as f: - f.write(vrt_xml) - return vrt_path - - -def test_band_nodata_first_band_attrs(tmp_path): - """``band=1`` with ``band_nodata='first'`` surfaces band 1's - sentinel on attrs and masks against it. Pins the per-band selection - survives the migration.""" - vrt_path = _write_two_band_per_band_nodata_vrt(tmp_path) - r = read_vrt(vrt_path, band=1, band_nodata='first') - assert r.attrs['nodata'] == 65000.0 - assert r.attrs['masked_nodata'] is True - assert np.isnan(r.values[1, 1]) - assert r.attrs.get('nodata_pixels_present') is True - - -def test_band_nodata_chunked_first_band_attrs(tmp_path): - """The chunked path threads the same per-band sentinel onto attrs.""" - vrt_path = _write_two_band_per_band_nodata_vrt(tmp_path) - r = read_vrt(vrt_path, band=1, band_nodata='first', chunks=2) - assert r.attrs['nodata'] == 65000.0 - assert r.attrs['masked_nodata'] is True - # Chunked path leaves ``nodata_pixels_present`` unset by contract. - assert 'nodata_pixels_present' not in r.attrs - - -def _make_no_sentinel_vrt(tmp_path, name): - """A single-band float VRT with no ````. Used to pin the - ``dtype=`` + no-sentinel branch of ``_finalize_lazy_read_attrs`` - (``caller_dtype`` set, ``nodata`` is None -> attr stays absent).""" - tiff = str(tmp_path / f'{name}_tiff.tif') - vrt = str(tmp_path / f'{name}.vrt') - arr = np.arange(16, dtype=np.float32).reshape(4, 4) - write(arr, tiff, compression='none', tiled=False) - _write_single_source_vrt( - tiff, vrt, width=4, height=4, - geo_transform='0.0, 1.0, 0.0, 0.0, 0.0, -1.0', - nodata=None, - ) - return vrt - - -def test_dtype_cast_no_sentinel_omits_attr_eager(tmp_path): - """Eager VRT with ``dtype=`` and no declared sentinel: the helper - receives ``caller_dtype=np.float64`` but ``nodata is None``, so - ``nodata_dtype_cast`` stays absent. Pins the symmetric branch the - dask parity test covers for non-VRT.""" - vrt = _make_no_sentinel_vrt(tmp_path, 'no_sentinel_eager_2180') - r = read_vrt(vrt, dtype=np.float64) - assert r.dtype == np.float64 - assert 'nodata' not in r.attrs - assert 'masked_nodata' not in r.attrs - assert 'nodata_dtype_cast' not in r.attrs - - -def test_dtype_cast_no_sentinel_omits_attr_chunked(tmp_path): - """Chunked VRT with ``dtype=`` and no declared sentinel: same - ``nodata_dtype_cast`` pop as the eager branch.""" - vrt = _make_no_sentinel_vrt(tmp_path, 'no_sentinel_chunked_2180') - r = read_vrt(vrt, dtype=np.float64, chunks=2) - assert r.dtype == np.float64 - assert 'nodata' not in r.attrs - assert 'masked_nodata' not in r.attrs - assert 'nodata_dtype_cast' not in r.attrs - - -# --------------------------------------------------------------------------- -# missing_sources paths: ``warn`` surfaces ``vrt_holes`` on the eager -# path; the chunked parse-time scan also surfaces it. -# --------------------------------------------------------------------------- - - -def test_missing_sources_eager_surfaces_vrt_holes(tmp_path): - """The eager VRT path keeps populating ``attrs['vrt_holes']`` after - the migration, even though the field rides outside the synthesised - ``GeoInfo`` and through ``attrs_in`` on the helper.""" - tiff_path = str(tmp_path / 'present_2180.tif') - arr = np.arange(16, dtype=np.float32).reshape(4, 4) - write(arr, tiff_path, compression='none', tiled=False) - - missing_path = str(tmp_path / 'missing_2180.tif') # never created - vrt_path = str(tmp_path / 'mosaic_2180.vrt') - vrt_xml = f""" - 0.0, 1.0, 0.0, 0.0, 0.0, -1.0 - - - {tiff_path} - 1 - - - - - {missing_path} - 1 - - - - -""" - with open(vrt_path, 'w') as f: - f.write(vrt_xml) - with warnings.catch_warnings(): - warnings.simplefilter('ignore') - r = read_vrt(vrt_path, missing_sources='warn') - assert 'vrt_holes' in r.attrs - holes = r.attrs['vrt_holes'] - assert isinstance(holes, list) and len(holes) >= 1 - # Each hole entry has the documented shape. - for hole in holes: - assert 'source' in hole - assert 'band' in hole - assert 'dst_rect' in hole - assert 'error' in hole - - -def test_missing_sources_chunked_surfaces_vrt_holes(tmp_path): - """Chunked path's parse-time existence sweep still populates - ``attrs['vrt_holes']`` after the migration.""" - tiff_path = str(tmp_path / 'present_chunked_2180.tif') - arr = np.arange(16, dtype=np.float32).reshape(4, 4) - write(arr, tiff_path, compression='none', tiled=False) - - missing_path = str(tmp_path / 'missing_chunked_2180.tif') - vrt_path = str(tmp_path / 'mosaic_chunked_2180.vrt') - vrt_xml = f""" - 0.0, 1.0, 0.0, 0.0, 0.0, -1.0 - - - {tiff_path} - 1 - - - - - {missing_path} - 1 - - - - -""" - with open(vrt_path, 'w') as f: - f.write(vrt_xml) - r = read_vrt(vrt_path, missing_sources='warn', chunks=2) - assert 'vrt_holes' in r.attrs - holes = r.attrs['vrt_holes'] - assert isinstance(holes, list) and len(holes) >= 1 - - -# --------------------------------------------------------------------------- -# georef_status parity across the five states between VRT eager, -# VRT chunked, non-VRT eager, and non-VRT chunked. -# --------------------------------------------------------------------------- - - -_STATUS_PAIRS = [ - pytest.param(_make_full_pair, GEOREF_STATUS_FULL, False, id="full"), - pytest.param( - _make_transform_only_pair, GEOREF_STATUS_TRANSFORM_ONLY, - False, id="transform_only", - ), - pytest.param( - _make_crs_only_pair, GEOREF_STATUS_CRS_ONLY, - False, id="crs_only", - ), - pytest.param(_make_none_pair, GEOREF_STATUS_NONE, False, id="none"), - pytest.param( - _make_rotated_pair, GEOREF_STATUS_ROTATED_DROPPED, True, - id="rotated_dropped", - ), -] - - -@pytest.mark.parametrize("pair_factory,expected_status,allow_rotated", - _STATUS_PAIRS) -def test_georef_status_eager_parity(tmp_path, pair_factory, expected_status, - allow_rotated): - """VRT eager and (where applicable) non-VRT eager agree on - ``georef_status``. The rotated VRT case has no non-VRT counterpart - through ``to_geotiff``, so the test pins the VRT value alone.""" - tiff, vrt = pair_factory(tmp_path, f'georef_eager_{expected_status}') - kwargs = {'allow_rotated': True} if allow_rotated else {} - vrt_status = read_vrt(vrt, **kwargs).attrs.get('georef_status') - assert vrt_status == expected_status - if not allow_rotated: - tiff_status = open_geotiff(tiff, **kwargs).attrs.get('georef_status') - assert tiff_status == expected_status - assert vrt_status == tiff_status - - -@pytest.mark.parametrize("pair_factory,expected_status,allow_rotated", - _STATUS_PAIRS) -def test_georef_status_chunked_parity(tmp_path, pair_factory, expected_status, - allow_rotated): - """VRT chunked and non-VRT chunked agree on ``georef_status``.""" - tiff, vrt = pair_factory(tmp_path, f'georef_chunked_{expected_status}') - kwargs = {'allow_rotated': True} if allow_rotated else {} - vrt_status = read_vrt(vrt, chunks=2, **kwargs).attrs.get('georef_status') - assert vrt_status == expected_status - if not allow_rotated: - tiff_status = read_geotiff_dask( - tiff, chunks=2, **kwargs - ).attrs.get('georef_status') - assert tiff_status == expected_status - assert vrt_status == tiff_status - - -# --------------------------------------------------------------------------- -# Eager/chunked VRT internal parity: the same VRT read eagerly and -# chunked should agree on the canonical attrs (modulo the documented -# absence of ``nodata_pixels_present`` on lazy reads). -# --------------------------------------------------------------------------- - - -_VRT_FACTORIES = [ - pytest.param(_make_full_pair, False, id="full"), - pytest.param(_make_transform_only_pair, False, id="transform_only"), - pytest.param(_make_crs_only_pair, False, id="crs_only"), - pytest.param(_make_none_pair, False, id="none"), - pytest.param(_make_rotated_pair, True, id="rotated_dropped"), -] - - -@pytest.mark.parametrize("pair_factory,allow_rotated", _VRT_FACTORIES) -def test_vrt_eager_chunked_internal_parity(tmp_path, pair_factory, - allow_rotated): - """Eager and chunked VRT reads of the same fixture agree on the - shared canonical attrs (``crs`` / ``crs_wkt`` / ``transform`` / - ``georef_status`` / contract version). The lazy contract from - #2135 leaves ``nodata_pixels_present`` unset on chunked output, so - the comparison drops that key.""" - _, vrt = pair_factory(tmp_path, 'internal_parity_2180') - kwargs = {'allow_rotated': True} if allow_rotated else {} - eager_attrs = dict(read_vrt(vrt, **kwargs).attrs) - chunked_attrs = dict(read_vrt(vrt, chunks=2, **kwargs).attrs) - eager_attrs.pop('nodata_pixels_present', None) - chunked_attrs.pop('nodata_pixels_present', None) - assert eager_attrs == chunked_attrs diff --git a/xrspatial/geotiff/tests/test_vrt_missing_sources_default_raise_1843.py b/xrspatial/geotiff/tests/test_vrt_missing_sources_default_raise_1843.py deleted file mode 100644 index a43f94a45..000000000 --- a/xrspatial/geotiff/tests/test_vrt_missing_sources_default_raise_1843.py +++ /dev/null @@ -1,83 +0,0 @@ -"""Regression test for #1843: the internal ``read_vrt`` in ``_vrt.py`` -defaults to ``missing_sources='raise'`` so an unreadable source halts -the call instead of leaving a silent zero-fill hole on integer rasters. - -Callers wanting the historical lenient behaviour pass -``missing_sources='warn'`` explicitly. The strict-mode env var -``XRSPATIAL_GEOTIFF_STRICT=1`` continues to force-raise across the -whole module (orthogonal axis, not affected by this change). -""" -from __future__ import annotations - -import pytest - -from xrspatial.geotiff import GeoTIFFFallbackWarning -from xrspatial.geotiff._vrt import read_vrt as _read_vrt_internal - - -def _write_missing_source_vrt(path): - path.write_text( - '\n' - ' \n' - ' \n' - ' missing_1843.tif' - '\n' - ' 1\n' - ' \n' - ' \n' - ' \n' - ' \n' - '\n' - ) - - -def test_read_vrt_default_raises_on_unreadable_source(tmp_path): - """Without an explicit ``missing_sources`` kwarg, an unreadable - backing source must raise rather than silently zero-fill. - - This is the behaviour change from #1843. Before this commit the - default was ``'warn'`` and a missing ``Byte`` tile produced a hole - of zero pixels that was indistinguishable from real data unless - the caller checked ``attrs['vrt_holes']``. - """ - vrt = tmp_path / "tmp_1843_default_raise.vrt" - _write_missing_source_vrt(vrt) - - with pytest.raises((OSError, ValueError)): - _read_vrt_internal(str(vrt)) - - -def test_read_vrt_explicit_warn_preserves_lenient_behaviour(tmp_path): - """``missing_sources='warn'`` is still the escape hatch for callers - that want partial mosaics with ``vrt.holes`` populated. - - Pinning the lenient path here keeps the historical contract - available to callers who opt in. The warning and the hole record - must both still surface. - """ - vrt = tmp_path / "tmp_1843_explicit_warn.vrt" - _write_missing_source_vrt(vrt) - - with pytest.warns(GeoTIFFFallbackWarning, match="could not be read"): - arr, parsed = _read_vrt_internal(str(vrt), missing_sources='warn') - - assert arr.shape == (2, 2) - assert len(parsed.holes) == 1 - assert parsed.holes[0]['source'].endswith('missing_1843.tif') - - -def test_read_vrt_strict_env_still_raises_under_warn(monkeypatch, tmp_path): - """``XRSPATIAL_GEOTIFF_STRICT=1`` continues to force-raise even - when the caller explicitly asks for the lenient ``'warn'`` policy. - - The strict env var is a module-wide override (see #1662); it must - still win over per-call ``missing_sources='warn'`` so CI runs with - strict mode catch partial mosaics regardless of caller settings. - """ - vrt = tmp_path / "tmp_1843_strict_env.vrt" - _write_missing_source_vrt(vrt) - - monkeypatch.setenv("XRSPATIAL_GEOTIFF_STRICT", "1") - - with pytest.raises((OSError, ValueError)): - _read_vrt_internal(str(vrt), missing_sources='warn') diff --git a/xrspatial/geotiff/tests/vrt/test_missing_sources.py b/xrspatial/geotiff/tests/vrt/test_missing_sources.py index 0e41826ec..a0399e158 100644 --- a/xrspatial/geotiff/tests/vrt/test_missing_sources.py +++ b/xrspatial/geotiff/tests/vrt/test_missing_sources.py @@ -16,14 +16,22 @@ * Any other value raises ``ValueError`` naming the bad kwarg and echoing the bad value via ``repr()``. -The companion file ``test_vrt_missing_sources_default_raise_1843.py`` -stays in place for now: it exercises the internal -``xrspatial.geotiff._vrt.read_vrt`` entry point and the -``XRSPATIAL_GEOTIFF_STRICT=1`` env-var override, neither of which is in -this module's surface. +Also folds the VRT-tail missing-sources residue (cluster 13, #2437): + +* Internal ``_vrt.read_vrt`` entry point default-raise + explicit-warn + + ``XRSPATIAL_GEOTIFF_STRICT=1`` override (was + ``test_vrt_missing_sources_default_raise_1843.py``). +* Public ``read_vrt`` / ``open_geotiff('.vrt')`` default-raise + + explicit-warn (was ``test_read_vrt_default_missing_sources_1860.py``). +* Chunked-path missing-source policy: ``vrt_holes`` at build, + raise-at-build, per-task compute warnings, window / band scoping, + multi-source error preview (was + ``test_vrt_chunked_missing_sources_1799.py`` and + ``test_vrt_chunked_missing_raise_at_build_2265.py``). """ from __future__ import annotations +import os import warnings import numpy as np @@ -36,6 +44,7 @@ read_vrt, to_geotiff, ) +from xrspatial.geotiff._vrt import read_vrt as _internal_read_vrt PRESENT_FILL = 7.0 @@ -315,3 +324,571 @@ def test_eager_byte_invalid_policy(self, tmp_path): vrt = _write_byte_missing_vrt(tmp_path) with pytest.raises(ValueError, match="missing_sources"): read_vrt(vrt, missing_sources="ignore") + + +# =========================================================================== +# Internal ``_vrt.read_vrt`` entry point (was +# test_vrt_missing_sources_default_raise_1843.py). +# +# The public matrix above exercises the package-level ``read_vrt`` / +# ``open_geotiff`` surface. These cases pin the internal +# ``xrspatial.geotiff._vrt.read_vrt`` entry point directly, including the +# ``XRSPATIAL_GEOTIFF_STRICT=1`` module-wide override that wins over a +# per-call ``missing_sources='warn'``. +# =========================================================================== + + +def _write_internal_missing_source_vrt(path): + """All-missing 2x2 Byte VRT for the internal-entry-point cases.""" + path.write_text( + '\n' + ' \n' + ' \n' + ' missing_1843.tif' + '\n' + ' 1\n' + ' \n' + ' \n' + ' \n' + ' \n' + '\n' + ) + + +class TestInternalEntryPointMissingSources: + """``xrspatial.geotiff._vrt.read_vrt`` default + opt-in behaviour.""" + + def test_internal_default_raises_on_unreadable_source(self, tmp_path): + """Without an explicit ``missing_sources`` kwarg, an unreadable + backing source must raise rather than silently zero-fill. + + Before the default flipped to ``'raise'`` a missing ``Byte`` tile + produced a hole of zero pixels indistinguishable from real data + unless the caller checked ``attrs['vrt_holes']``. + """ + vrt = tmp_path / "tmp_1843_default_raise.vrt" + _write_internal_missing_source_vrt(vrt) + with pytest.raises((OSError, ValueError)): + _internal_read_vrt(str(vrt)) + + def test_internal_explicit_warn_preserves_lenient_behaviour(self, tmp_path): + """``missing_sources='warn'`` is still the escape hatch for callers + that want partial mosaics with ``parsed.holes`` populated.""" + vrt = tmp_path / "tmp_1843_explicit_warn.vrt" + _write_internal_missing_source_vrt(vrt) + with pytest.warns(GeoTIFFFallbackWarning, match="could not be read"): + arr, parsed = _internal_read_vrt(str(vrt), missing_sources='warn') + assert arr.shape == (2, 2) + assert len(parsed.holes) == 1 + assert parsed.holes[0]['source'].endswith('missing_1843.tif') + + def test_internal_strict_env_still_raises_under_warn( + self, monkeypatch, tmp_path, + ): + """``XRSPATIAL_GEOTIFF_STRICT=1`` continues to force-raise even + when the caller explicitly asks for the lenient ``'warn'`` policy. + + The strict env var is a module-wide override; it must still win + over per-call ``missing_sources='warn'`` so CI runs with strict + mode catch partial mosaics regardless of caller settings. + """ + vrt = tmp_path / "tmp_1843_strict_env.vrt" + _write_internal_missing_source_vrt(vrt) + monkeypatch.setenv("XRSPATIAL_GEOTIFF_STRICT", "1") + with pytest.raises((OSError, ValueError)): + _internal_read_vrt(str(vrt), missing_sources='warn') + + +# =========================================================================== +# Public default ``missing_sources='raise'`` on read_vrt + open_geotiff +# (was test_read_vrt_default_missing_sources_1860.py). +# +# Pins that the public wrapper's default matches the internal +# ``_vrt.read_vrt`` default rather than silently overriding it with the +# old lenient ``'warn'`` behaviour. +# =========================================================================== + + +def _write_public_missing_source_vrt(path): + path.write_text( + '\n' + ' \n' + ' \n' + ' missing_1860.tif' + '\n' + ' 1\n' + ' \n' + ' \n' + ' \n' + ' \n' + '\n' + ) + + +class TestPublicDefaultMissingSources: + """Public ``read_vrt`` / ``open_geotiff('.vrt')`` default to ``'raise'``.""" + + def test_public_read_vrt_default_raises(self, tmp_path): + """Public ``read_vrt`` with no ``missing_sources`` kwarg must raise. + + The default is aligned to the internal ``_vrt.read_vrt`` default + of ``'raise'`` so the unreadable source halts the call instead of + returning a partial mosaic with ``attrs['vrt_holes']``. + """ + vrt = tmp_path / "tmp_1860_public_default_raise.vrt" + _write_public_missing_source_vrt(vrt) + with pytest.raises((OSError, ValueError)): + read_vrt(str(vrt)) + + def test_open_geotiff_vrt_default_raises(self, tmp_path): + """``open_geotiff(vrt_path)`` with no ``missing_sources`` kwarg must + raise on an unreadable backing source. + + ``open_geotiff`` forwards ``missing_sources`` to ``read_vrt`` only + when the caller passed it explicitly; otherwise the public + ``read_vrt`` default applies. + """ + vrt = tmp_path / "tmp_1860_open_geotiff_default_raise.vrt" + _write_public_missing_source_vrt(vrt) + with pytest.raises((OSError, ValueError)): + open_geotiff(str(vrt)) + + def test_public_read_vrt_explicit_warn_preserves_lenient_behaviour( + self, tmp_path, + ): + """``missing_sources='warn'`` is still the escape hatch for partial + mosaics on the public ``read_vrt`` API.""" + vrt = tmp_path / "tmp_1860_public_explicit_warn.vrt" + _write_public_missing_source_vrt(vrt) + with pytest.warns(GeoTIFFFallbackWarning, match="could not be read"): + da = read_vrt(str(vrt), missing_sources='warn') + assert 'vrt_holes' in da.attrs + assert da.attrs['vrt_holes'][0]['source'].endswith('missing_1860.tif') + + def test_open_geotiff_vrt_explicit_warn_preserves_lenient_behaviour( + self, tmp_path, + ): + """``open_geotiff(vrt_path, missing_sources='warn')`` still produces + a partial mosaic with the hole record on the DataArray attrs.""" + vrt = tmp_path / "tmp_1860_open_geotiff_explicit_warn.vrt" + _write_public_missing_source_vrt(vrt) + with pytest.warns(GeoTIFFFallbackWarning, match="could not be read"): + da = open_geotiff(str(vrt), missing_sources='warn') + assert 'vrt_holes' in da.attrs + assert da.attrs['vrt_holes'][0]['source'].endswith('missing_1860.tif') + + +# =========================================================================== +# Chunked-path missing-source policy (was +# test_vrt_chunked_missing_sources_1799.py). +# +# The eager path scans every source at decode time. The chunked path +# uses a parse-time ``os.path.exists`` sweep to populate ``vrt_holes`` at +# build, and threads ``missing_sources`` through to the per-chunk decode. +# =========================================================================== + + +def _chunked_make_partial_vrt(tmp_path) -> tuple[str, str]: + """2-source VRT: present source on the left, missing on the right. + + Returns ``(vrt_path, present_src_path)``. + """ + src = os.path.join(tmp_path, "src_present.tif") + arr = np.full((4, 4), 7.0, dtype=np.float32) + da = xr.DataArray( + arr, dims=("y", "x"), + attrs={"transform": (1.0, 0.0, 0.0, 0.0, -1.0, 0.0)}, + ) + to_geotiff(da, src) + + missing = os.path.join(tmp_path, "missing.tif") + vrt_path = os.path.join(tmp_path, "partial.vrt") + with open(vrt_path, "w") as f: + f.write( + '\n' + '0.0, 1.0, 0.0, 0.0, 0.0, -1.0\n' + '\n' + '\n' + f'{src}\n' + '1\n' + '\n' + '\n' + '\n' + '\n' + f'{missing}\n' + '1\n' + '\n' + '\n' + '\n' + '\n' + '\n' + ) + return vrt_path, src + + +class TestChunkedMissingSourcesWarn: + """``read_vrt(chunks=N, missing_sources='warn')`` records holes at build.""" + + def test_vrt_holes_populated_at_build(self, tmp_path): + vrt_path, _ = _chunked_make_partial_vrt(str(tmp_path)) + result = read_vrt(vrt_path, chunks=4, missing_sources="warn") + assert "vrt_holes" in result.attrs, ( + "Chunked path must populate vrt_holes at build time so " + "callers can detect partial mosaics without forcing a compute." + ) + holes = result.attrs["vrt_holes"] + assert len(holes) == 1 + assert set(holes[0].keys()) == {"source", "band", "dst_rect", "error"} + assert holes[0]["source"].endswith("missing.tif") + assert holes[0]["band"] == 1 + assert holes[0]["dst_rect"] == (4, 0, 4, 4) + + def test_compute_emits_per_task_warning(self, tmp_path): + vrt_path, _ = _chunked_make_partial_vrt(str(tmp_path)) + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + result = read_vrt(vrt_path, chunks=4, missing_sources="warn") + computed = result.compute() + messages = [str(w.message) for w in caught + if isinstance(w.message, GeoTIFFFallbackWarning)] + assert any("missing.tif" in msg for msg in messages), ( + f"Expected GeoTIFFFallbackWarning naming the missing source " + f"after compute, got messages: {messages!r}" + ) + np.testing.assert_array_equal( + np.asarray(computed)[:, :4], np.full((4, 4), 7.0, dtype=np.float32), + ) + assert np.all(np.isnan(np.asarray(computed)[:, 4:])) + + def test_chunks_tuple_form(self, tmp_path): + """Tuple ``chunks=(h, w)`` threads through identically.""" + vrt_path, _ = _chunked_make_partial_vrt(str(tmp_path)) + result = read_vrt(vrt_path, chunks=(2, 4), missing_sources="warn") + assert "vrt_holes" in result.attrs + assert len(result.attrs["vrt_holes"]) == 1 + + +class TestChunkedMissingSourcesRaiseSmoke: + """``read_vrt(chunks=N, missing_sources='raise')`` fails at build. + + The detailed raise-at-build matrix (window / band scoping, multi-source + preview, strict env) lives in the 2265 section below; this keeps the + 1799 smoke assertions alongside the warn cases they were paired with. + """ + + def test_build_raises_immediately(self, tmp_path): + vrt_path, _ = _chunked_make_partial_vrt(str(tmp_path)) + with pytest.raises(FileNotFoundError, match="missing.tif"): + read_vrt(vrt_path, chunks=4, missing_sources="raise") + + def test_build_raise_message_mentions_policy_kwarg(self, tmp_path): + vrt_path, _ = _chunked_make_partial_vrt(str(tmp_path)) + with pytest.raises(FileNotFoundError) as excinfo: + read_vrt(vrt_path, chunks=4, missing_sources="raise") + assert "missing_sources='warn'" in str(excinfo.value) + + def test_window_past_missing_succeeds_under_raise(self, tmp_path): + vrt_path, _ = _chunked_make_partial_vrt(str(tmp_path)) + result = read_vrt( + vrt_path, chunks=4, window=(0, 0, 4, 4), + missing_sources="raise", + ) + computed = result.compute() + np.testing.assert_array_equal( + np.asarray(computed), np.full((4, 4), 7.0, dtype=np.float32), + ) + + def test_band_selection_single_band_still_raises(self, tmp_path): + """Selecting band 0 (the only band) still touches the missing + source so the build raises. Cross-band gating is exercised by the + multiband cases in the 2265 section below.""" + vrt_path, _ = _chunked_make_partial_vrt(str(tmp_path)) + with pytest.raises(FileNotFoundError): + read_vrt(vrt_path, chunks=4, band=0, missing_sources="raise") + + +class TestChunkedMissingSourcesDefault: + """The default ``missing_sources`` on chunked reads is ``'raise'``.""" + + def test_chunked_default_raises_at_build(self, tmp_path): + vrt_path, _ = _chunked_make_partial_vrt(str(tmp_path)) + with pytest.raises(FileNotFoundError, match="missing.tif"): + read_vrt(vrt_path, chunks=4) + + +class TestChunkedMissingSourcesValidation: + """Invalid ``missing_sources`` policies are rejected at entry.""" + + def test_invalid_policy_raises_at_build(self, tmp_path): + vrt_path, _ = _chunked_make_partial_vrt(str(tmp_path)) + with pytest.raises(ValueError, match="missing_sources"): + read_vrt(vrt_path, chunks=4, missing_sources="ignore") + + def test_invalid_policy_raises_without_chunks_too(self, tmp_path): + """The eager path also rejects the bad value; callers see the same + error whether or not they pass ``chunks=``.""" + vrt_path, _ = _chunked_make_partial_vrt(str(tmp_path)) + with pytest.raises(ValueError, match="missing_sources"): + read_vrt(vrt_path, missing_sources="ignore") + + +# =========================================================================== +# Chunked raise-at-build matrix (was +# test_vrt_chunked_missing_raise_at_build_2265.py). +# +# The chunked path now honours ``missing_sources='raise'`` at build time: +# the static ``os.path.exists`` sweep raises up front when a hole +# intersects the requested window / selected band, instead of only the +# per-chunk delayed decode raising at compute. +# =========================================================================== + + +def _raise_write_present_source(tmp_path: str, name: str, fill: float) -> str: + """Write a 4x4 float32 GeoTIFF source for a multi-source VRT.""" + src = os.path.join(tmp_path, name) + arr = np.full((4, 4), fill, dtype=np.float32) + da = xr.DataArray( + arr, dims=("y", "x"), + attrs={"transform": (1.0, 0.0, 0.0, 0.0, -1.0, 0.0)}, + ) + to_geotiff(da, src) + return src + + +def _raise_make_horizontal_partial_vrt(tmp_path: str) -> str: + """2-source VRT: ``[ present | missing ]`` laid out 4x8.""" + src = _raise_write_present_source(tmp_path, "src_2265_h_present.tif", 7.0) + missing = os.path.join(tmp_path, "missing_2265_h.tif") + vrt_path = os.path.join(tmp_path, "partial_2265_h.vrt") + with open(vrt_path, "w") as f: + f.write( + '\n' + '0.0, 1.0, 0.0, 0.0, 0.0, -1.0\n' + '\n' + '\n' + f'{src}\n' + '1\n' + '\n' + '\n' + '\n' + '\n' + f'{missing}\n' + '1\n' + '\n' + '\n' + '\n' + '\n' + '\n' + ) + return vrt_path + + +def _raise_make_multiband_partial_vrt(tmp_path: str) -> str: + """2-band VRT where band 1 has a missing source and band 2 is intact.""" + src_b1 = _raise_write_present_source(tmp_path, "src_2265_mb_b1.tif", 11.0) + src_b2 = _raise_write_present_source(tmp_path, "src_2265_mb_b2.tif", 22.0) + missing_b1 = os.path.join(tmp_path, "missing_2265_mb_b1.tif") + vrt_path = os.path.join(tmp_path, "partial_2265_multiband.vrt") + with open(vrt_path, "w") as f: + f.write( + '\n' + '0.0, 1.0, 0.0, 0.0, 0.0, -1.0\n' + '\n' + '\n' + f'{src_b1}\n' + '1\n' + '\n' + '\n' + '\n' + '\n' + f'{missing_b1}\n' + '1\n' + '\n' + '\n' + '\n' + '\n' + '\n' + '\n' + f'{src_b2}\n' + '1\n' + '\n' + '\n' + '\n' + '\n' + '\n' + ) + return vrt_path + + +def _raise_make_multi_missing_vrt(tmp_path: str, n_missing: int) -> str: + """VRT with ``n_missing`` missing sources tiling the destination.""" + vrt_path = os.path.join(tmp_path, f"partial_2265_multi_{n_missing}.vrt") + width = 4 * n_missing + src_xml = [] + for i in range(n_missing): + missing = os.path.join(tmp_path, f"missing_2265_multi_{i}.tif") + src_xml.append( + '\n' + f'{missing}\n' + '1\n' + '\n' + f'\n' + '\n' + ) + with open(vrt_path, "w") as f: + f.write( + f'\n' + '0.0, 1.0, 0.0, 0.0, 0.0, -1.0\n' + '\n' + + ''.join(src_xml) + + '\n' + '\n' + ) + return vrt_path + + +class TestRaiseAtBuild: + """``missing_sources='raise'`` raises during construction, not compute.""" + + def test_build_raises_immediately(self, tmp_path): + vrt_path = _raise_make_horizontal_partial_vrt(str(tmp_path)) + with pytest.raises(FileNotFoundError, match="missing_2265_h"): + read_vrt(vrt_path, chunks=4, missing_sources="raise") + + def test_default_raises_at_build(self, tmp_path): + """The public default is ``'raise'`` so dropping the kwarg hits + the same fast-fail path.""" + vrt_path = _raise_make_horizontal_partial_vrt(str(tmp_path)) + with pytest.raises(FileNotFoundError): + read_vrt(vrt_path, chunks=4) + + def test_error_message_mentions_opt_in(self, tmp_path): + """The exception text tells the caller how to opt into the lenient + path.""" + vrt_path = _raise_make_horizontal_partial_vrt(str(tmp_path)) + with pytest.raises(FileNotFoundError) as excinfo: + read_vrt(vrt_path, chunks=4, missing_sources="raise") + msg = str(excinfo.value) + assert "missing_sources='warn'" in msg + assert "partial mosaic" in msg + + +class TestRaiseAtBuildWindowScoping: + """The raise honours the requested window.""" + + def test_window_past_missing_does_not_raise(self, tmp_path): + vrt_path = _raise_make_horizontal_partial_vrt(str(tmp_path)) + result = read_vrt( + vrt_path, chunks=4, window=(0, 0, 4, 4), + missing_sources="raise", + ) + computed = result.compute() + np.testing.assert_array_equal( + np.asarray(computed), np.full((4, 4), 7.0, dtype=np.float32), + ) + + def test_window_intersecting_missing_raises(self, tmp_path): + vrt_path = _raise_make_horizontal_partial_vrt(str(tmp_path)) + with pytest.raises(FileNotFoundError): + read_vrt( + vrt_path, chunks=4, window=(0, 4, 4, 8), + missing_sources="raise", + ) + + +class TestRaiseAtBuildBandScoping: + """The raise honours ``band=`` restriction.""" + + def test_band_select_skips_other_bands_missing_source(self, tmp_path): + """``band=1`` reads band 2 only; band 1's missing source is + irrelevant to the graph, so the build must not raise.""" + vrt_path = _raise_make_multiband_partial_vrt(str(tmp_path)) + result = read_vrt( + vrt_path, chunks=4, band=1, missing_sources="raise", + ) + computed = result.compute() + np.testing.assert_array_equal( + np.asarray(computed), np.full((4, 4), 22.0, dtype=np.float32), + ) + + def test_band_select_on_missing_band_raises(self, tmp_path): + vrt_path = _raise_make_multiband_partial_vrt(str(tmp_path)) + with pytest.raises(FileNotFoundError): + read_vrt(vrt_path, chunks=4, band=0, missing_sources="raise") + + def test_no_band_restriction_raises(self, tmp_path): + vrt_path = _raise_make_multiband_partial_vrt(str(tmp_path)) + with pytest.raises(FileNotFoundError): + read_vrt(vrt_path, chunks=4, missing_sources="raise") + + +class TestRaiseAtBuildWarnPreserved: + """``missing_sources='warn'`` keeps the record-and-warn behaviour.""" + + def test_warn_records_holes_at_build(self, tmp_path): + vrt_path = _raise_make_horizontal_partial_vrt(str(tmp_path)) + result = read_vrt(vrt_path, chunks=4, missing_sources="warn") + assert "vrt_holes" in result.attrs + assert len(result.attrs["vrt_holes"]) == 1 + assert result.attrs["vrt_holes"][0]["source"].endswith( + "missing_2265_h.tif" + ) + + def test_warn_compute_emits_per_task_warning(self, tmp_path): + vrt_path = _raise_make_horizontal_partial_vrt(str(tmp_path)) + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + result = read_vrt(vrt_path, chunks=4, missing_sources="warn") + computed = result.compute() + messages = [str(w.message) for w in caught + if isinstance(w.message, GeoTIFFFallbackWarning)] + assert any("missing_2265_h" in msg for msg in messages) + np.testing.assert_array_equal( + np.asarray(computed)[:, :4], + np.full((4, 4), 7.0, dtype=np.float32), + ) + assert np.all(np.isnan(np.asarray(computed)[:, 4:])) + + +class TestRaiseAtBuildMultipleMissingSources: + """The error message previews multiple holes and reports the total.""" + + def test_two_missing_sources_listed_with_count(self, tmp_path): + """All missing sources fit in the preview (n=2 <= preview cap).""" + vrt_path = _raise_make_multi_missing_vrt(str(tmp_path), n_missing=2) + with pytest.raises(FileNotFoundError) as excinfo: + read_vrt(vrt_path, chunks=4, missing_sources="raise") + msg = str(excinfo.value) + assert "missing_2265_multi_0" in msg + assert "missing_2265_multi_1" in msg + assert "2 missing source(s) total" in msg + assert "more" not in msg.lower() or "and 0 more" not in msg + + def test_many_missing_sources_truncated_with_more_suffix(self, tmp_path): + """Above the preview cap, the message says 'and N more'.""" + n = 5 + vrt_path = _raise_make_multi_missing_vrt(str(tmp_path), n_missing=n) + with pytest.raises(FileNotFoundError) as excinfo: + read_vrt(vrt_path, chunks=4, missing_sources="raise") + msg = str(excinfo.value) + assert "missing_2265_multi_0" in msg + assert f"missing_2265_multi_{n - 1}" not in msg + assert f"{n} missing source(s) total" in msg + assert "and 2 more" in msg + + +class TestRaiseAtBuildStrictMode: + """``XRSPATIAL_GEOTIFF_STRICT=1`` forces the raise even with ``'warn'``.""" + + def test_strict_overrides_warn_kwarg(self, tmp_path, monkeypatch): + monkeypatch.setenv("XRSPATIAL_GEOTIFF_STRICT", "1") + vrt_path = _raise_make_horizontal_partial_vrt(str(tmp_path)) + with pytest.raises(FileNotFoundError): + read_vrt(vrt_path, chunks=4, missing_sources="warn") + + def test_strict_off_warn_still_warns(self, tmp_path, monkeypatch): + """Without strict mode, ``'warn'`` keeps warning.""" + monkeypatch.delenv("XRSPATIAL_GEOTIFF_STRICT", raising=False) + vrt_path = _raise_make_horizontal_partial_vrt(str(tmp_path)) + result = read_vrt(vrt_path, chunks=4, missing_sources="warn") + assert "vrt_holes" in result.attrs diff --git a/xrspatial/geotiff/tests/vrt/test_parity.py b/xrspatial/geotiff/tests/vrt/test_parity.py new file mode 100644 index 000000000..6f57e544d --- /dev/null +++ b/xrspatial/geotiff/tests/vrt/test_parity.py @@ -0,0 +1,1156 @@ +"""Cross-backend parity and backend-coverage for the VRT read path. + +Consolidates the VRT-tail parity / coverage residue (cluster 13, #2437): + +* Backend parity for VRT reads with sidecar / overview interactions: + eager-vs-dask pixel + metadata (coords, transform, CRS, + ``georef_status``) parity, sidecar-vs-inline-overview attrs, and the + windowed coord / transform shift (was + ``test_vrt_backend_parity_2321.py``). +* Cross-backend parity for the VRT finalization pipeline: VRT eager vs + ``open_geotiff`` and VRT chunked vs ``read_geotiff_dask`` for the five + canonical georef states, ``band_nodata='first'`` per-band attrs, + ``dtype=`` no-sentinel branch, ``missing_sources='warn'`` vrt_holes, + and eager/chunked internal parity (was + ``test_vrt_finalization_parity_2162.py``). +* Backend / parameter coverage for ``read_vrt``: the GPU and dask+GPU + decode paths, ``dtype=`` / ``name=`` kwargs, and the file-like + + backend-kwarg rejection on ``open_geotiff`` (was + ``test_vrt_backend_coverage_2026_05_11.py``). + +The parity helpers (``_materialise`` / ``_assert_pixels_equal`` / +``_assert_metadata_parity``) mirror ``parity/test_backend_matrix.py`` so +cross-test parity reads the same way; this file keeps them VRT-local +rather than re-homing the shared harness. +""" +from __future__ import annotations + +import importlib.util +import io +import os +import pathlib +import shutil +import warnings +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Callable + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import ( + open_geotiff, + read_geotiff_dask, + read_vrt, + to_geotiff, +) +from xrspatial.geotiff._attrs import ( + GEOREF_STATUS_CRS_ONLY, + GEOREF_STATUS_FULL, + GEOREF_STATUS_NONE, + GEOREF_STATUS_ROTATED_DROPPED, + GEOREF_STATUS_TRANSFORM_ONLY, +) +from xrspatial.geotiff._coords import _NO_GEOREF_KEY +from xrspatial.geotiff._vrt import write_vrt as _write_vrt_internal +from xrspatial.geotiff._writer import write + +tifffile = pytest.importorskip("tifffile") + + +# =========================================================================== +# GPU gating (matches the rest of the geotiff test suite's predicate). +# =========================================================================== + + +def _gpu_available() -> bool: + if importlib.util.find_spec("cupy") is None: + return False + try: + import cupy + return bool(cupy.cuda.is_available()) + except Exception: + return False + + +_HAS_GPU = _gpu_available() +_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required") + + +# =========================================================================== +# Backend parity with sidecar / overview interactions +# (was test_vrt_backend_parity_2321.py). +# =========================================================================== +# +# Asserts eager / dask parity on the surface most likely to drift: +# metadata (transform, crs, crs_wkt, georef_status), windowed coords, +# and sidecar (.tif.ovr) interactions. Acceptance: the VRT path cannot +# pass by returning correct pixel values with wrong georeferencing attrs. + +_GOLDEN = ( + pathlib.Path(__file__).resolve().parent.parent + / "golden_corpus" + / "fixtures" +) +_SIDECAR_TIF = _GOLDEN / "overview_external_ovr_uint16.tif" +_SIDECAR_OVR = _GOLDEN / "overview_external_ovr_uint16.tif.ovr" +_INLINE_OVR_TIF = _GOLDEN / "overview_internal_uint16.tif" + + +def _sidecar_fixture_or_skip() -> Path: + """Return the bundled sidecar TIFF or skip if absent.""" + if not _SIDECAR_TIF.exists() or not _SIDECAR_OVR.exists(): + pytest.skip("sidecar overview fixture not present in golden_corpus") + return _SIDECAR_TIF + + +def _inline_overview_fixture_or_skip() -> Path: + if not _INLINE_OVR_TIF.exists(): + pytest.skip("inline overview fixture not present in golden_corpus") + return _INLINE_OVR_TIF + + +def _materialise(da: xr.DataArray) -> np.ndarray: + raw = da.data + if hasattr(raw, "compute"): + raw = raw.compute() + if hasattr(raw, "get"): + raw = raw.get() + return np.asarray(raw) + + +def _coord_view(da: xr.DataArray, name: str) -> np.ndarray: + return np.asarray(da.coords[name].values) + + +def _assert_pixels_equal(ref: np.ndarray, actual: np.ndarray, + *, label: str) -> None: + """Pixel equality, dtype-aware (mirrors test_backend_matrix.py).""" + assert ref.dtype == actual.dtype, ( + f"{label}: dtype differs ref={ref.dtype} actual={actual.dtype}" + ) + assert ref.shape == actual.shape, ( + f"{label}: shape differs ref={ref.shape} actual={actual.shape}" + ) + if ref.dtype.kind == "f": + assert np.array_equal(ref, actual, equal_nan=True), ( + f"{label}: float pixels differ (NaN-aware)" + ) + else: + assert ref.tobytes() == actual.tobytes(), ( + f"{label}: integer pixel bytes differ" + ) + + +def _assert_metadata_parity( + ref: xr.DataArray, + actual: xr.DataArray, + *, + label: str, + expected_dims: tuple[str, ...], +) -> None: + """Fail if any parity-critical attr / coord drifts between two reads.""" + assert actual.dims == expected_dims, ( + f"{label}: dims {actual.dims!r} != expected {expected_dims!r}" + ) + assert ref.dims == expected_dims, ( + f"{label}: ref dims {ref.dims!r} != expected {expected_dims!r}" + ) + + for axis in expected_dims: + if axis not in ref.coords: + continue + ref_c = _coord_view(ref, axis) + actual_c = _coord_view(actual, axis) + assert ref_c.dtype == actual_c.dtype, ( + f"{label}: coord {axis!r} dtype " + f"ref={ref_c.dtype} actual={actual_c.dtype}" + ) + assert ref_c.shape == actual_c.shape, ( + f"{label}: coord {axis!r} shape " + f"ref={ref_c.shape} actual={actual_c.shape}" + ) + assert ref_c.tobytes() == actual_c.tobytes(), ( + f"{label}: coord {axis!r} bytes differ " + f"(ref[:3]={ref_c[:3].tolist()!r}, " + f"actual[:3]={actual_c[:3].tolist()!r})" + ) + + ref_t = ref.attrs.get("transform") + actual_t = actual.attrs.get("transform") + assert ref_t == actual_t, ( + f"{label}: transform tuple differs " + f"ref={ref_t!r} actual={actual_t!r}" + ) + + assert ref.attrs.get("crs") == actual.attrs.get("crs"), ( + f"{label}: attrs['crs'] differs " + f"ref={ref.attrs.get('crs')!r} actual={actual.attrs.get('crs')!r}" + ) + assert ref.attrs.get("crs_wkt") == actual.attrs.get("crs_wkt"), ( + f"{label}: crs_wkt differs" + ) + assert ref.attrs.get("georef_status") == actual.attrs.get( + "georef_status" + ), ( + f"{label}: georef_status differs " + f"ref={ref.attrs.get('georef_status')!r} " + f"actual={actual.attrs.get('georef_status')!r}" + ) + + +def _build_two_tile_float32_vrt(tmp_path: Path) -> tuple[Path, np.dtype]: + """Two 16x16 float32 tiles laid out side-by-side as a 16x32 mosaic.""" + tile_h, tile_w = 16, 16 + paths: list[str] = [] + for c in range(2): + arr = np.full( + (tile_h, tile_w), float(c + 1) * 1000.0, dtype=np.float32 + ) + arr[0, 0] = -7.0 + c + arr[tile_h - 1, tile_w - 1] = 9000.0 + c + origin_x = float(c * tile_w) + da = xr.DataArray( + arr, dims=["y", "x"], + coords={ + "y": np.arange(tile_h - 1, -1, -1, dtype=np.float64), + "x": np.arange( + origin_x, origin_x + tile_w, dtype=np.float64), + }, + attrs={"crs": 4326}, + ) + tile_path = tmp_path / f"tile_2321_{c}.tif" + to_geotiff(da, str(tile_path), compression="none", tiled=False) + paths.append(str(tile_path)) + vrt_path = tmp_path / "two_tile_2321_.vrt" + _write_vrt_internal(str(vrt_path), paths, relative=False) + return vrt_path, np.dtype("float32") + + +def _build_sidecar_vrt(tmp_path: Path) -> tuple[Path, np.dtype]: + """VRT over a copy of the bundled sidecar TIFF + its ``.ovr`` partner.""" + src = _sidecar_fixture_or_skip() + base = tmp_path / "sidecar_2321_.tif" + shutil.copy(src, base) + shutil.copy(str(src) + ".ovr", str(base) + ".ovr") + vrt_path = tmp_path / "sidecar_2321_.vrt" + _write_vrt_internal(str(vrt_path), [str(base)], relative=False) + return vrt_path, np.dtype("uint16") + + +def _build_inline_overview_vrt(tmp_path: Path) -> tuple[Path, np.dtype]: + """VRT over a copy of the inline-overview fixture (no sidecar).""" + src = _inline_overview_fixture_or_skip() + base = tmp_path / "inline_2321_.tif" + shutil.copy(src, base) + vrt_path = tmp_path / "inline_2321_.vrt" + _write_vrt_internal(str(vrt_path), [str(base)], relative=False) + return vrt_path, np.dtype("uint16") + + +@dataclass(frozen=True) +class _BackendSpec: + backend_id: str + kwargs: dict[str, Any] + + +_BACKENDS: tuple[_BackendSpec, ...] = ( + _BackendSpec(backend_id="eager", kwargs={}), + _BackendSpec(backend_id="dask", kwargs={"chunks": (16, 16)}), +) + + +def _backend_params() -> list: + return [pytest.param(b, id=b.backend_id) for b in _BACKENDS] + + +@dataclass(frozen=True) +class _FixtureSpec: + fix_id: str + builder: Callable[[Path], tuple[Path, np.dtype]] + expected_dims: tuple[str, ...] + window: tuple[int, int, int, int] | None + + +_FIXTURES: tuple[_FixtureSpec, ...] = ( + _FixtureSpec( + fix_id="two-tile-float32-full", + builder=_build_two_tile_float32_vrt, + expected_dims=("y", "x"), + window=None, + ), + _FixtureSpec( + fix_id="two-tile-float32-window-spans-seam", + builder=_build_two_tile_float32_vrt, + expected_dims=("y", "x"), + window=(4, 8, 12, 24), + ), + _FixtureSpec( + fix_id="sidecar-uint16-full", + builder=_build_sidecar_vrt, + expected_dims=("y", "x"), + window=None, + ), + _FixtureSpec( + fix_id="sidecar-uint16-window", + builder=_build_sidecar_vrt, + expected_dims=("y", "x"), + window=(8, 8, 56, 56), + ), +) + + +def _fixture_params() -> list: + return [pytest.param(f, id=f.fix_id) for f in _FIXTURES] + + +@pytest.fixture(scope="session") +def _vrt_parity_dir(tmp_path_factory): + return tmp_path_factory.mktemp("vrt_parity_2321_") + + +@pytest.fixture(scope="session") +def _vrt_parity_cache() -> dict[str, tuple[Path, np.dtype]]: + """Session-scoped (path, dtype) cache shared across every cell. + + A function-scoped cache would rebuild the same VRT per cell; on + Windows that surfaces as PermissionError when ``to_geotiff`` renames + over a file another cell still holds mapped. + """ + return {} + + +@pytest.fixture +def vrt_fixture(_vrt_parity_dir, _vrt_parity_cache): + """Resolve a :class:`_FixtureSpec` to a (vrt_path, dtype) pair on disk.""" + base = _vrt_parity_dir + cache = _vrt_parity_cache + + def _resolve(spec: _FixtureSpec) -> tuple[Path, np.dtype]: + key = spec.builder.__name__ + if key in cache: + return cache[key] + sub = base / key + sub.mkdir(exist_ok=True) + result = spec.builder(sub) + cache[key] = result + return result + return _resolve + + +@pytest.mark.parametrize("spec", _fixture_params()) +@pytest.mark.parametrize("backend", _backend_params()) +def test_vrt_backend_parity(spec, backend, vrt_fixture): + """One cell per (fixture, backend). Asserts pixels + metadata parity.""" + vrt_path, expected_dtype = vrt_fixture(spec) + + open_kwargs: dict[str, Any] = {} + if spec.window is not None: + open_kwargs["window"] = spec.window + + ref = open_geotiff(str(vrt_path), **open_kwargs) + actual = open_geotiff(str(vrt_path), **open_kwargs, **backend.kwargs) + + label = ( + f"fixture={spec.fix_id} backend={backend.backend_id} " + f"window={spec.window!r}" + ) + + ref_arr = _materialise(ref) + actual_arr = _materialise(actual) + + assert ref_arr.dtype == expected_dtype, ( + f"{label}: reference dtype {ref_arr.dtype} != " + f"expected {expected_dtype}" + ) + assert actual_arr.dtype == expected_dtype, ( + f"{label}: actual dtype {actual_arr.dtype} != " + f"expected {expected_dtype}" + ) + + _assert_pixels_equal(ref_arr, actual_arr, label=label) + _assert_metadata_parity( + ref, actual, label=label, expected_dims=spec.expected_dims, + ) + + +@pytest.mark.parametrize("backend", _backend_params()) +def test_sidecar_vrt_attrs_match_inline(backend, tmp_path): + """Sidecar-backed and inline-overview-backed VRTs report identical + georef attrs and pixels at the base level.""" + side_sub = tmp_path / "sidecar" + inline_sub = tmp_path / "inline" + side_sub.mkdir() + inline_sub.mkdir() + side_vrt, side_dtype = _build_sidecar_vrt(side_sub) + inline_vrt, inline_dtype = _build_inline_overview_vrt(inline_sub) + + assert side_dtype == inline_dtype, ( + f"sidecar dtype {side_dtype} != inline dtype {inline_dtype}; " + f"the golden_corpus fixtures should share a base IFD" + ) + + side = open_geotiff(str(side_vrt), **backend.kwargs) + inline = open_geotiff(str(inline_vrt), **backend.kwargs) + + label = f"sidecar-vs-inline backend={backend.backend_id}" + + assert side.shape == inline.shape, ( + f"{label}: shape differs side={side.shape} inline={inline.shape}" + ) + + _assert_pixels_equal( + _materialise(inline), _materialise(side), label=label, + ) + _assert_metadata_parity( + inline, side, label=label, expected_dims=("y", "x"), + ) + + +def test_windowed_vrt_shifts_coords_and_transform_consistently(tmp_path): + """Eager and lazy windowed VRT reads agree on shape, coords, attrs, + and values.""" + vrt_path, _ = _build_two_tile_float32_vrt(tmp_path) + window = (3, 5, 13, 27) + + eager = open_geotiff(str(vrt_path), window=window) + lazy = open_geotiff(str(vrt_path), window=window, chunks=(5, 11)) + + assert eager.shape == (10, 22) + assert lazy.shape == (10, 22) + + np.testing.assert_array_equal(eager["y"].values, lazy["y"].values) + np.testing.assert_array_equal(eager["x"].values, lazy["x"].values) + assert eager["y"].dtype == lazy["y"].dtype + assert eager["x"].dtype == lazy["x"].dtype + + assert eager["y"].values[0] == 12.0 + assert eager["x"].values[0] == 5.0 + + eager_t = eager.attrs.get("transform") + lazy_t = lazy.attrs.get("transform") + assert eager_t == lazy_t, ( + f"transform differs eager={eager_t!r} lazy={lazy_t!r}" + ) + assert eager_t[0] == 1.0 and eager_t[4] == -1.0, ( + f"pixel size mismatch in windowed transform {eager_t!r}" + ) + + np.testing.assert_array_equal(eager.values, lazy.compute().values) + + assert eager.attrs.get("crs") == lazy.attrs.get("crs") + assert eager.attrs.get("crs_wkt") == lazy.attrs.get("crs_wkt") + assert eager.attrs.get("georef_status") == lazy.attrs.get( + "georef_status" + ) + + +def test_sidecar_window_shifts_to_known_coords(tmp_path): + """The sidecar VRT read with ``window=(8, 8, 56, 56)`` lands on the + coords / transform an absolute calculation predicts.""" + vrt_path, _ = _build_sidecar_vrt(tmp_path) + window = (8, 8, 56, 56) + + eager = open_geotiff(str(vrt_path), window=window) + + assert eager.shape == (48, 48) + t = eager.attrs.get("transform") + assert t is not None, "windowed sidecar VRT dropped attrs['transform']" + assert t[0] == pytest.approx(0.001) + assert t[4] == pytest.approx(-0.001) + assert t[2] == pytest.approx(-120.0 + 8 * 0.001) + assert t[5] == pytest.approx(45.0 + 8 * -0.001) + + +def test_assert_metadata_parity_flags_transform_drift(tmp_path): + """A transform-only drift between two otherwise-identical DataArrays + fails the parity helper (locks the harness behaviour).""" + vrt_path, _ = _build_two_tile_float32_vrt(tmp_path) + da_ref = open_geotiff(str(vrt_path)) + da_bad = da_ref.copy() + da_bad.attrs = dict(da_ref.attrs) + old_t = da_bad.attrs["transform"] + da_bad.attrs["transform"] = ( + old_t[0], old_t[1], old_t[2] + 1.0, + old_t[3], old_t[4], old_t[5], + ) + with pytest.raises(AssertionError, match="transform"): + _assert_metadata_parity( + da_ref, da_bad, label="harness-sanity", + expected_dims=("y", "x"), + ) + + +# =========================================================================== +# VRT finalization-pipeline parity +# (was test_vrt_finalization_parity_2162.py). +# =========================================================================== +# +# The VRT eager and chunked paths route through the shared +# ``_finalize_lazy_read_attrs`` helper. These tests pin parity for the +# attrs the helper stamps against the non-VRT eager / dask readers. + +_NON_VRT_ONLY_KEYS = frozenset({ + 'extra_tags', + 'image_description', + 'extra_samples', + 'gdal_metadata', + 'gdal_metadata_xml', + 'x_resolution', + 'y_resolution', + 'resolution_unit', + 'colormap', +}) + +_REPRESENTATION_KEYS = frozenset({'crs_wkt', 'transform'}) + + +def _shared_canonical_attrs(attrs: dict) -> dict: + """Return the helper-emitted attrs that should match across writers.""" + return { + k: v for k, v in attrs.items() + if k not in _NON_VRT_ONLY_KEYS and k not in _REPRESENTATION_KEYS + } + + +def _write_single_source_vrt(tiff_path, vrt_path, *, width, height, + dtype='Float32', nodata=None, + geo_transform='0.0, 1.0, 0.0, 0.0, 0.0, -1.0', + srs=None): + """Write a one-band VRT pointing at ``tiff_path``.""" + nodata_xml = ( + f" {nodata}\n" if nodata is not None + else '' + ) + srs_xml = ( + f' {srs}\n' if srs is not None + else '' + ) + gt_xml = ( + f' {geo_transform}\n' + if geo_transform is not None + else '' + ) + vrt_xml = ( + f'\n' + f'{gt_xml}' + f'{srs_xml}' + f' \n' + f'{nodata_xml}' + f' \n' + f' {tiff_path}\n' + f' 1\n' + f' \n' + f' \n' + f' \n' + f' \n' + f'\n' + ) + with open(vrt_path, 'w') as f: + f.write(vrt_xml) + + +_WGS84_WKT = ( + 'GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,' + 'AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,' + 'AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,' + 'AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]]' +) + + +def _make_full_pair(tmp_path, name): + """Full georef: float coords + CRS.""" + tiff = str(tmp_path / f'{name}_tiff.tif') + vrt = str(tmp_path / f'{name}.vrt') + da = xr.DataArray( + np.zeros((4, 4), dtype=np.float32), + coords={ + 'y': np.array([200.0, 199.0, 198.0, 197.0]), + 'x': np.array([100.0, 101.0, 102.0, 103.0]), + }, + dims=('y', 'x'), + attrs={'crs': 4326}, + ) + to_geotiff(da, tiff) + _write_single_source_vrt( + tiff, vrt, width=4, height=4, + geo_transform='100.0, 1.0, 0.0, 200.0, 0.0, -1.0', + srs=_WGS84_WKT, + ) + return tiff, vrt + + +def _make_transform_only_pair(tmp_path, name): + """Float coords, no CRS.""" + tiff = str(tmp_path / f'{name}_tiff.tif') + vrt = str(tmp_path / f'{name}.vrt') + da = xr.DataArray( + np.zeros((4, 4), dtype=np.float32), + coords={ + 'y': np.array([200.0, 199.0, 198.0, 197.0]), + 'x': np.array([100.0, 101.0, 102.0, 103.0]), + }, + dims=('y', 'x'), + ) + to_geotiff(da, tiff) + _write_single_source_vrt( + tiff, vrt, width=4, height=4, + geo_transform='100.0, 1.0, 0.0, 200.0, 0.0, -1.0', + srs=None, + ) + return tiff, vrt + + +def _make_crs_only_pair(tmp_path, name): + """No-georef marker + CRS.""" + tiff = str(tmp_path / f'{name}_tiff.tif') + vrt = str(tmp_path / f'{name}.vrt') + da = xr.DataArray( + np.zeros((4, 4), dtype=np.float32), + coords={ + 'y': np.arange(4, dtype=np.int64), + 'x': np.arange(4, dtype=np.int64), + }, + dims=('y', 'x'), + attrs={_NO_GEOREF_KEY: True, 'crs': 4326}, + ) + to_geotiff(da, tiff) + _write_single_source_vrt( + tiff, vrt, width=4, height=4, + geo_transform=None, + srs=_WGS84_WKT, + ) + return tiff, vrt + + +def _make_none_pair(tmp_path, name): + """No CRS, no transform.""" + tiff = str(tmp_path / f'{name}_tiff.tif') + vrt = str(tmp_path / f'{name}.vrt') + arr = np.zeros((4, 4), dtype=np.float32) + tifffile.imwrite( + tiff, arr, photometric='minisblack', planarconfig='contig', + metadata=None, + ) + _write_single_source_vrt( + tiff, vrt, width=4, height=4, + geo_transform=None, + srs=None, + ) + return tiff, vrt + + +def _make_rotated_pair(tmp_path, name): + """Rotated VRT with ``allow_rotated=True``: lands at ``rotated_dropped``.""" + tiff = str(tmp_path / f'{name}_tiff.tif') + vrt = str(tmp_path / f'{name}.vrt') + arr = np.arange(16, dtype=np.uint16).reshape(4, 4) + write(arr, tiff, compression='none', tiled=False) + _write_single_source_vrt( + tiff, vrt, width=4, height=4, dtype='UInt16', + geo_transform='0.0, 1.0, 0.5, 0.0, 0.5, -1.0', + srs=None, + ) + return tiff, vrt + + +def test_vrt_eager_full_matches_open_geotiff(tmp_path): + """A single-source VRT wrapping a ``full`` TIFF emits the same + canonical helper-stamped attrs as the underlying TIFF read.""" + tiff, vrt = _make_full_pair(tmp_path, 'full_2180') + tiff_attrs = _shared_canonical_attrs(dict(open_geotiff(tiff).attrs)) + vrt_attrs = _shared_canonical_attrs(dict(read_vrt(vrt).attrs)) + assert tiff_attrs == vrt_attrs, ( + f"TIFF/VRT attrs diverged:\n" + f" tiff only: {set(tiff_attrs) - set(vrt_attrs)}\n" + f" vrt only: {set(vrt_attrs) - set(tiff_attrs)}\n" + f" shared keys with different values: " + f"{[k for k in set(tiff_attrs) & set(vrt_attrs) if tiff_attrs[k] != vrt_attrs[k]]}" + ) + full_tiff_attrs = dict(open_geotiff(tiff).attrs) + full_vrt_attrs = dict(read_vrt(vrt).attrs) + assert full_tiff_attrs['crs'] == full_vrt_attrs['crs'] == 4326 + assert len(full_tiff_attrs['transform']) == 6 + assert len(full_vrt_attrs['transform']) == 6 + + +def test_vrt_eager_transform_only_matches_open_geotiff(tmp_path): + tiff, vrt = _make_transform_only_pair(tmp_path, 'tonly_2180') + tiff_attrs = _shared_canonical_attrs(dict(open_geotiff(tiff).attrs)) + vrt_attrs = _shared_canonical_attrs(dict(read_vrt(vrt).attrs)) + assert tiff_attrs == vrt_attrs + assert tiff_attrs['georef_status'] == GEOREF_STATUS_TRANSFORM_ONLY + + +def test_vrt_eager_crs_only_matches_open_geotiff(tmp_path): + tiff, vrt = _make_crs_only_pair(tmp_path, 'crsonly_2180') + tiff_attrs = _shared_canonical_attrs(dict(open_geotiff(tiff).attrs)) + vrt_attrs = _shared_canonical_attrs(dict(read_vrt(vrt).attrs)) + assert tiff_attrs == vrt_attrs + assert tiff_attrs['georef_status'] == GEOREF_STATUS_CRS_ONLY + + +def test_vrt_eager_none_matches_open_geotiff(tmp_path): + tiff, vrt = _make_none_pair(tmp_path, 'none_2180') + tiff_attrs = _shared_canonical_attrs(dict(open_geotiff(tiff).attrs)) + vrt_attrs = _shared_canonical_attrs(dict(read_vrt(vrt).attrs)) + assert tiff_attrs == vrt_attrs + assert tiff_attrs['georef_status'] == GEOREF_STATUS_NONE + + +def test_vrt_eager_rotated_dropped_matches_open_geotiff(tmp_path): + """The rotated branch is the VRT-specific path: a non-zero skew lands + in ``rotated_dropped`` and the helper drops crs / transform / crs_wkt + while emitting ``rotated_affine`` plus the no-georef marker.""" + _, vrt = _make_rotated_pair(tmp_path, 'rot_2180') + attrs = dict(read_vrt(vrt, allow_rotated=True).attrs) + assert attrs['georef_status'] == GEOREF_STATUS_ROTATED_DROPPED + assert attrs.get(_NO_GEOREF_KEY) is True + assert 'rotated_affine' in attrs + assert attrs.get('crs') is None + assert attrs.get('crs_wkt') is None + assert 'transform' not in attrs + + +def test_vrt_chunked_full_matches_dask(tmp_path): + tiff, vrt = _make_full_pair(tmp_path, 'full_chunked_2180') + tiff_attrs = _shared_canonical_attrs( + dict(read_geotiff_dask(tiff, chunks=2).attrs) + ) + vrt_attrs = _shared_canonical_attrs( + dict(read_vrt(vrt, chunks=2).attrs) + ) + assert tiff_attrs == vrt_attrs + + +def test_vrt_chunked_transform_only_matches_dask(tmp_path): + tiff, vrt = _make_transform_only_pair(tmp_path, 'tonly_chunked_2180') + tiff_attrs = _shared_canonical_attrs( + dict(read_geotiff_dask(tiff, chunks=2).attrs) + ) + vrt_attrs = _shared_canonical_attrs( + dict(read_vrt(vrt, chunks=2).attrs) + ) + assert tiff_attrs == vrt_attrs + + +def test_vrt_chunked_crs_only_matches_dask(tmp_path): + tiff, vrt = _make_crs_only_pair(tmp_path, 'crsonly_chunked_2180') + tiff_attrs = _shared_canonical_attrs( + dict(read_geotiff_dask(tiff, chunks=2).attrs) + ) + vrt_attrs = _shared_canonical_attrs( + dict(read_vrt(vrt, chunks=2).attrs) + ) + assert tiff_attrs == vrt_attrs + + +def test_vrt_chunked_none_matches_dask(tmp_path): + tiff, vrt = _make_none_pair(tmp_path, 'none_chunked_2180') + tiff_attrs = _shared_canonical_attrs( + dict(read_geotiff_dask(tiff, chunks=2).attrs) + ) + vrt_attrs = _shared_canonical_attrs( + dict(read_vrt(vrt, chunks=2).attrs) + ) + assert tiff_attrs == vrt_attrs + + +def test_vrt_chunked_rotated_dropped(tmp_path): + _, vrt = _make_rotated_pair(tmp_path, 'rot_chunked_2180') + attrs = dict(read_vrt(vrt, allow_rotated=True, chunks=2).attrs) + assert attrs['georef_status'] == GEOREF_STATUS_ROTATED_DROPPED + assert attrs.get(_NO_GEOREF_KEY) is True + assert 'rotated_affine' in attrs + + +def _write_two_band_per_band_nodata_vrt(tmp_path): + band0 = np.array([[1, 2], [3, 65535]], dtype=np.uint16) + band1 = np.array([[7, 8], [9, 65000]], dtype=np.uint16) + p0 = str(tmp_path / 'vrt_band0_2180.tif') + p1 = str(tmp_path / 'vrt_band1_2180.tif') + write(band0, p0, nodata=65535, compression='none', tiled=False) + write(band1, p1, nodata=65000, compression='none', tiled=False) + + vrt_path = str(tmp_path / 'two_band_per_band_nodata_2180.vrt') + vrt_xml = f""" + 0.0, 1.0, 0.0, 0.0, 0.0, -1.0 + + 65535 + + {p0} + 1 + + + + + + 65000 + + {p1} + 1 + + + + +""" + with open(vrt_path, 'w') as f: + f.write(vrt_xml) + return vrt_path + + +def test_band_nodata_first_band_attrs(tmp_path): + """``band=1`` with ``band_nodata='first'`` surfaces band 1's sentinel + on attrs and masks against it.""" + vrt_path = _write_two_band_per_band_nodata_vrt(tmp_path) + r = read_vrt(vrt_path, band=1, band_nodata='first') + assert r.attrs['nodata'] == 65000.0 + assert r.attrs['masked_nodata'] is True + assert np.isnan(r.values[1, 1]) + assert r.attrs.get('nodata_pixels_present') is True + + +def test_band_nodata_chunked_first_band_attrs(tmp_path): + """The chunked path threads the same per-band sentinel onto attrs.""" + vrt_path = _write_two_band_per_band_nodata_vrt(tmp_path) + r = read_vrt(vrt_path, band=1, band_nodata='first', chunks=2) + assert r.attrs['nodata'] == 65000.0 + assert r.attrs['masked_nodata'] is True + assert 'nodata_pixels_present' not in r.attrs + + +def _make_no_sentinel_vrt(tmp_path, name): + """A single-band float VRT with no ````.""" + tiff = str(tmp_path / f'{name}_tiff.tif') + vrt = str(tmp_path / f'{name}.vrt') + arr = np.arange(16, dtype=np.float32).reshape(4, 4) + write(arr, tiff, compression='none', tiled=False) + _write_single_source_vrt( + tiff, vrt, width=4, height=4, + geo_transform='0.0, 1.0, 0.0, 0.0, 0.0, -1.0', + nodata=None, + ) + return vrt + + +def test_dtype_cast_no_sentinel_omits_attr_eager(tmp_path): + """Eager VRT with ``dtype=`` and no declared sentinel: + ``nodata_dtype_cast`` stays absent.""" + vrt = _make_no_sentinel_vrt(tmp_path, 'no_sentinel_eager_2180') + r = read_vrt(vrt, dtype=np.float64) + assert r.dtype == np.float64 + assert 'nodata' not in r.attrs + assert 'masked_nodata' not in r.attrs + assert 'nodata_dtype_cast' not in r.attrs + + +def test_dtype_cast_no_sentinel_omits_attr_chunked(tmp_path): + """Chunked VRT with ``dtype=`` and no declared sentinel: same + ``nodata_dtype_cast`` pop as the eager branch.""" + vrt = _make_no_sentinel_vrt(tmp_path, 'no_sentinel_chunked_2180') + r = read_vrt(vrt, dtype=np.float64, chunks=2) + assert r.dtype == np.float64 + assert 'nodata' not in r.attrs + assert 'masked_nodata' not in r.attrs + assert 'nodata_dtype_cast' not in r.attrs + + +def test_missing_sources_eager_surfaces_vrt_holes(tmp_path): + """The eager VRT path keeps populating ``attrs['vrt_holes']`` after + the finalization migration.""" + tiff_path = str(tmp_path / 'present_2180.tif') + arr = np.arange(16, dtype=np.float32).reshape(4, 4) + write(arr, tiff_path, compression='none', tiled=False) + + missing_path = str(tmp_path / 'missing_2180.tif') # never created + vrt_path = str(tmp_path / 'mosaic_2180.vrt') + vrt_xml = f""" + 0.0, 1.0, 0.0, 0.0, 0.0, -1.0 + + + {tiff_path} + 1 + + + + + {missing_path} + 1 + + + + +""" + with open(vrt_path, 'w') as f: + f.write(vrt_xml) + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + r = read_vrt(vrt_path, missing_sources='warn') + assert 'vrt_holes' in r.attrs + holes = r.attrs['vrt_holes'] + assert isinstance(holes, list) and len(holes) >= 1 + for hole in holes: + assert 'source' in hole + assert 'band' in hole + assert 'dst_rect' in hole + assert 'error' in hole + + +def test_missing_sources_chunked_surfaces_vrt_holes(tmp_path): + """Chunked path's parse-time existence sweep still populates + ``attrs['vrt_holes']`` after the migration.""" + tiff_path = str(tmp_path / 'present_chunked_2180.tif') + arr = np.arange(16, dtype=np.float32).reshape(4, 4) + write(arr, tiff_path, compression='none', tiled=False) + + missing_path = str(tmp_path / 'missing_chunked_2180.tif') + vrt_path = str(tmp_path / 'mosaic_chunked_2180.vrt') + vrt_xml = f""" + 0.0, 1.0, 0.0, 0.0, 0.0, -1.0 + + + {tiff_path} + 1 + + + + + {missing_path} + 1 + + + + +""" + with open(vrt_path, 'w') as f: + f.write(vrt_xml) + r = read_vrt(vrt_path, missing_sources='warn', chunks=2) + assert 'vrt_holes' in r.attrs + holes = r.attrs['vrt_holes'] + assert isinstance(holes, list) and len(holes) >= 1 + + +_STATUS_PAIRS = [ + pytest.param(_make_full_pair, GEOREF_STATUS_FULL, False, id="full"), + pytest.param( + _make_transform_only_pair, GEOREF_STATUS_TRANSFORM_ONLY, + False, id="transform_only", + ), + pytest.param( + _make_crs_only_pair, GEOREF_STATUS_CRS_ONLY, + False, id="crs_only", + ), + pytest.param(_make_none_pair, GEOREF_STATUS_NONE, False, id="none"), + pytest.param( + _make_rotated_pair, GEOREF_STATUS_ROTATED_DROPPED, True, + id="rotated_dropped", + ), +] + + +@pytest.mark.parametrize("pair_factory,expected_status,allow_rotated", + _STATUS_PAIRS) +def test_georef_status_eager_parity(tmp_path, pair_factory, expected_status, + allow_rotated): + """VRT eager and (where applicable) non-VRT eager agree on + ``georef_status``.""" + tiff, vrt = pair_factory(tmp_path, f'georef_eager_{expected_status}') + kwargs = {'allow_rotated': True} if allow_rotated else {} + vrt_status = read_vrt(vrt, **kwargs).attrs.get('georef_status') + assert vrt_status == expected_status + if not allow_rotated: + tiff_status = open_geotiff(tiff, **kwargs).attrs.get('georef_status') + assert tiff_status == expected_status + assert vrt_status == tiff_status + + +@pytest.mark.parametrize("pair_factory,expected_status,allow_rotated", + _STATUS_PAIRS) +def test_georef_status_chunked_parity(tmp_path, pair_factory, expected_status, + allow_rotated): + """VRT chunked and non-VRT chunked agree on ``georef_status``.""" + tiff, vrt = pair_factory(tmp_path, f'georef_chunked_{expected_status}') + kwargs = {'allow_rotated': True} if allow_rotated else {} + vrt_status = read_vrt(vrt, chunks=2, **kwargs).attrs.get('georef_status') + assert vrt_status == expected_status + if not allow_rotated: + tiff_status = read_geotiff_dask( + tiff, chunks=2, **kwargs + ).attrs.get('georef_status') + assert tiff_status == expected_status + assert vrt_status == tiff_status + + +_VRT_FACTORIES = [ + pytest.param(_make_full_pair, False, id="full"), + pytest.param(_make_transform_only_pair, False, id="transform_only"), + pytest.param(_make_crs_only_pair, False, id="crs_only"), + pytest.param(_make_none_pair, False, id="none"), + pytest.param(_make_rotated_pair, True, id="rotated_dropped"), +] + + +@pytest.mark.parametrize("pair_factory,allow_rotated", _VRT_FACTORIES) +def test_vrt_eager_chunked_internal_parity(tmp_path, pair_factory, + allow_rotated): + """Eager and chunked VRT reads of the same fixture agree on the shared + canonical attrs (modulo the lazy ``nodata_pixels_present`` carve-out).""" + _, vrt = pair_factory(tmp_path, 'internal_parity_2180') + kwargs = {'allow_rotated': True} if allow_rotated else {} + eager_attrs = dict(read_vrt(vrt, **kwargs).attrs) + chunked_attrs = dict(read_vrt(vrt, chunks=2, **kwargs).attrs) + eager_attrs.pop('nodata_pixels_present', None) + chunked_attrs.pop('nodata_pixels_present', None) + assert eager_attrs == chunked_attrs + + +# =========================================================================== +# read_vrt backend / parameter coverage +# (was test_vrt_backend_coverage_2026_05_11.py). +# =========================================================================== +# +# Covers the GPU and dask+GPU decode paths the read_vrt body handles, the +# ``dtype=`` / ``name=`` kwargs, and the open_geotiff file-like + +# backend-kwarg rejection. + + +@pytest.fixture +def single_tile_vrt(tmp_path): + """A trivial single-tile float32 VRT plus its source array.""" + arr = np.arange(16, dtype=np.float32).reshape(4, 4) + tile_path = str(tmp_path / 'tile.tif') + to_geotiff(arr, tile_path) + vrt_path = str(tmp_path / 'mosaic.vrt') + _write_vrt_internal(vrt_path, [tile_path]) + return vrt_path, arr + + +@_gpu_only +class TestReadVrtGpuBackend: + """``read_vrt(gpu=True)`` returns a CuPy-backed DataArray.""" + + def test_read_vrt_gpu_returns_cupy(self, single_tile_vrt): + import cupy + + vrt_path, arr = single_tile_vrt + da = read_vrt(vrt_path, gpu=True) + assert isinstance(da.data, cupy.ndarray), ( + f"expected cupy.ndarray, got {type(da.data).__name__}" + ) + np.testing.assert_array_equal(da.data.get(), arr) + + def test_read_vrt_gpu_chunks_returns_dask_cupy(self, single_tile_vrt): + """``read_vrt(gpu=True, chunks=N)`` is the dask+cupy VRT entry + point; the trailing ``result.chunk(...)`` block wraps the cupy + backing without falling back to numpy.""" + import cupy + import dask.array as da_mod + + vrt_path, arr = single_tile_vrt + result = read_vrt(vrt_path, gpu=True, chunks=2) + + assert isinstance(result.data, da_mod.Array), ( + f"expected dask Array, got {type(result.data).__name__}" + ) + assert isinstance(result.data._meta, cupy.ndarray), ( + f"expected cupy._meta, got " + f"{type(result.data._meta).__module__}." + f"{type(result.data._meta).__name__}" + ) + assert result.data.chunks == ((2, 2), (2, 2)) + + computed = result.compute() + assert isinstance(computed.data, cupy.ndarray) + np.testing.assert_array_equal(computed.data.get(), arr) + + def test_open_geotiff_vrt_gpu_routes_through(self, single_tile_vrt): + """``open_geotiff('.vrt', gpu=True)`` dispatches to ``read_vrt`` + and surfaces the cupy data unchanged.""" + import cupy + + vrt_path, arr = single_tile_vrt + da = open_geotiff(vrt_path, gpu=True) + assert isinstance(da.data, cupy.ndarray) + np.testing.assert_array_equal(da.data.get(), arr) + + def test_open_geotiff_vrt_gpu_chunks(self, single_tile_vrt): + """``open_geotiff('.vrt', gpu=True, chunks=N)`` is the combined + dask+cupy entry point.""" + import cupy + import dask.array as da_mod + + vrt_path, arr = single_tile_vrt + result = open_geotiff(vrt_path, gpu=True, chunks=2) + + assert isinstance(result.data, da_mod.Array) + assert isinstance(result.data._meta, cupy.ndarray) + assert result.data.chunks == ((2, 2), (2, 2)) + + computed = result.compute() + np.testing.assert_array_equal(computed.data.get(), arr) + + +class TestReadVrtDtypeKwarg: + """``read_vrt(dtype=...)`` casts after decode and validates the cast.""" + + def test_safe_widening_cast(self, single_tile_vrt): + """float32 -> float64 is permitted; values survive bit-for-bit.""" + vrt_path, arr = single_tile_vrt + da = read_vrt(vrt_path, dtype='float64') + assert da.dtype == np.float64 + np.testing.assert_array_equal(da.values, arr.astype(np.float64)) + + def test_float_to_int_rejected(self, single_tile_vrt): + """Float-to-int is lossy and refused with a descriptive error.""" + vrt_path, _ = single_tile_vrt + with pytest.raises(ValueError, match="Cannot cast float"): + read_vrt(vrt_path, dtype='int32') + + +class TestReadVrtNameKwarg: + """``read_vrt(name='custom')`` overrides the file-stem derivation.""" + + def test_explicit_name_used(self, single_tile_vrt): + vrt_path, _ = single_tile_vrt + da = read_vrt(vrt_path, name='custom_name') + assert da.name == 'custom_name' + + def test_default_name_from_stem(self, single_tile_vrt): + vrt_path, _ = single_tile_vrt + da = read_vrt(vrt_path) + assert da.name == os.path.splitext(os.path.basename(vrt_path))[0] + + +class TestOpenGeotiffFileLikeKwargRejection: + """File-like sources reject ``gpu=True`` and ``chunks=N`` up front.""" + + @staticmethod + def _buf_with_tiff(tmp_path): + arr = np.zeros((4, 4), dtype=np.float32) + path = str(tmp_path / 'src.tif') + to_geotiff(arr, path) + with open(path, 'rb') as fh: + return io.BytesIO(fh.read()) + + def test_gpu_with_file_like_raises(self, tmp_path): + buf = self._buf_with_tiff(tmp_path) + with pytest.raises(ValueError, match="gpu=True is not supported"): + open_geotiff(buf, gpu=True) + + def test_chunks_with_file_like_raises(self, tmp_path): + buf = self._buf_with_tiff(tmp_path) + with pytest.raises(ValueError, match="chunks=.*file-like"): + open_geotiff(buf, chunks=64) + + def test_chunks_with_pathlib_path_still_works(self, tmp_path): + """pathlib.Path is not file-like and must keep working through the + dask path.""" + arr = np.arange(16, dtype=np.float32).reshape(4, 4) + path = tmp_path / 'sample.tif' + to_geotiff(arr, str(path)) + + import dask.array as da_mod + result = open_geotiff(path, chunks=2) + assert isinstance(result.data, da_mod.Array) + np.testing.assert_array_equal(np.asarray(result.data), arr) diff --git a/xrspatial/geotiff/tests/vrt/test_validation.py b/xrspatial/geotiff/tests/vrt/test_validation.py index b81c6d0a9..f1852fb61 100644 --- a/xrspatial/geotiff/tests/vrt/test_validation.py +++ b/xrspatial/geotiff/tests/vrt/test_validation.py @@ -1570,3 +1570,300 @@ def test_empty_entries_ignored(self, tmp_path, monkeypatch): monkeypatch.setenv('XRSPATIAL_VRT_ALLOWED_ROOTS', value) arr, _ = _internal_read_vrt(vrt_path) assert arr.shape == (4, 4) + + +# =========================================================================== +# VRT-tail validation folds (cluster 13, #2437) +# =========================================================================== +# +# Three originally-standalone validation files folded here: +# +# * SrcRect negative-size / negative-offset rejection (was +# ``test_geotiff_vrt_srcrect_validation_1784.py``). +# * ``open_geotiff('.vrt')`` rejecting kwargs it silently dropped: +# ``overview_level`` and ``on_gpu_failure`` (was +# ``test_open_geotiff_vrt_kwarg_drop_1685.py``). +# * ``to_geotiff(..., '.vrt')`` rejecting ``tiled=False`` and validating +# ``tile_size`` up front instead of crashing in the writer (was +# ``test_to_geotiff_vrt_tiled_validation_1862.py``). + + +# --------------------------------------------------------------------------- +# SrcRect negative-size / negative-offset rejection (#1784) +# --------------------------------------------------------------------------- +# +# A malformed ```` (or negative offset) must surface +# as a ``ValueError`` naming the offending field, in both lenient and +# strict modes -- never get swallowed by the missing-source fallback. + + +def _srcrect_write_source(td: str, name: str = 'src.tif') -> str: + """Write a 10x10 uint8 source GeoTIFF and return its path.""" + src_path = os.path.join(td, name) + to_geotiff(np.zeros((10, 10), dtype=np.uint8), src_path, + compression='none') + return src_path + + +def _srcrect_write_vrt(td: str, *, + src_x_off: int = 0, src_y_off: int = 0, + src_x_size: int = 10, src_y_size: int = 10, + src_filename: str = 'src.tif', + raster_x: int = 100, raster_y: int = 100) -> str: + """Write a VRT with a single SimpleSource using the given SrcRect.""" + vrt_path = os.path.join(td, 'mosaic.vrt') + vrt_xml = ( + f'\n' + f' \n' + f' \n' + f' {src_filename}' + f'\n' + f' 1\n' + f' \n' + f' \n' + f' \n' + f' \n' + f'\n' + ) + with open(vrt_path, 'w') as f: + f.write(vrt_xml) + return vrt_path + + +class TestSrcRectRejection: + """Malformed ```` geometry rejected before the lenient + missing-source fallback can swallow it.""" + + def test_negative_x_size_rejected(self, tmp_path): + td = str(tmp_path) + _srcrect_write_source(td) + vrt_path = _srcrect_write_vrt(td, src_x_size=-50) + with pytest.raises(ValueError, match=r"SrcRect.*negative size"): + _internal_read_vrt(vrt_path) + + def test_negative_y_size_rejected(self, tmp_path): + td = str(tmp_path) + _srcrect_write_source(td) + vrt_path = _srcrect_write_vrt(td, src_y_size=-50) + with pytest.raises(ValueError, match=r"SrcRect.*negative size"): + _internal_read_vrt(vrt_path) + + def test_negative_x_off_rejected(self, tmp_path): + td = str(tmp_path) + _srcrect_write_source(td) + vrt_path = _srcrect_write_vrt(td, src_x_off=-10) + with pytest.raises(ValueError, match=r"SrcRect.*negative offset"): + _internal_read_vrt(vrt_path) + + def test_negative_y_off_rejected(self, tmp_path): + td = str(tmp_path) + _srcrect_write_source(td) + vrt_path = _srcrect_write_vrt(td, src_y_off=-10) + with pytest.raises(ValueError, match=r"SrcRect.*negative offset"): + _internal_read_vrt(vrt_path) + + def test_message_names_bad_values(self, tmp_path): + """The error message names the malformed field and its value so + the caller can find the offending ```` in the VRT.""" + td = str(tmp_path) + _srcrect_write_source(td) + vrt_path = _srcrect_write_vrt(td, src_x_size=-7, src_y_size=-3) + with pytest.raises(ValueError) as excinfo: + _internal_read_vrt(vrt_path) + msg = str(excinfo.value) + assert "SrcRect" in msg + assert "-7" in msg + assert "-3" in msg + + def test_missing_source_still_takes_lenient_warning_path(self, tmp_path): + """A *valid* SrcRect with a missing source file still hits the + lenient warning path -- the SrcRect check must not swallow the + missing-file case. ``missing_sources='warn'`` opts into the + lenient branch since the default is now ``'raise'``.""" + td = str(tmp_path) + # No source file written; SrcRect itself is well-formed. + vrt_path = _srcrect_write_vrt(td, src_filename='does_not_exist.tif') + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter('always') + arr, _ = _internal_read_vrt(vrt_path, missing_sources='warn') + fallback = [w for w in caught + if issubclass(w.category, GeoTIFFFallbackWarning)] + assert fallback, ( + "expected a GeoTIFFFallbackWarning for the missing source" + ) + assert arr.shape == (100, 100) + + def test_valid_srcrect_reads_normally(self, tmp_path): + """A well-formed SrcRect with a real source succeeds -- no false + positives on valid VRTs.""" + td = str(tmp_path) + _srcrect_write_source(td) + vrt_path = _srcrect_write_vrt(td, raster_x=10, raster_y=10) + arr, _ = _internal_read_vrt(vrt_path) + assert arr.shape == (10, 10) + assert np.all(arr == 0) + + def test_negative_srcrect_raises_under_strict_mode( + self, tmp_path, monkeypatch, + ): + """The check runs before the lenient try/except, so strict mode + and lenient mode both raise.""" + monkeypatch.setenv('XRSPATIAL_GEOTIFF_STRICT', '1') + td = str(tmp_path) + _srcrect_write_source(td) + vrt_path = _srcrect_write_vrt(td, src_x_size=-50) + with pytest.raises(ValueError, match=r"SrcRect.*negative size"): + _internal_read_vrt(vrt_path) + + +# --------------------------------------------------------------------------- +# open_geotiff('.vrt') kwarg-drop rejection (#1685) +# --------------------------------------------------------------------------- +# +# ``open_geotiff`` documents ``overview_level`` and ``on_gpu_failure`` but +# the VRT dispatch branch routes to ``read_vrt`` whose signature accepts +# neither, so the kwargs were silently dropped. The fix refuses the +# unsupported combinations up front. + + +@pytest.fixture +def _kwarg_drop_small_vrt(tmp_path): + """Two-tile uint16 VRT for the kwarg-drop rejection cases.""" + arr_a = np.arange(16, dtype=np.uint16).reshape(4, 4) + da_a = xr.DataArray( + arr_a, dims=["y", "x"], + coords={ + "y": np.array([0.5, 1.5, 2.5, 3.5]), + "x": np.array([0.5, 1.5, 2.5, 3.5]), + }, + attrs={"crs": 4326}, + ) + tile_a = tmp_path / "tile_a.tif" + to_geotiff(da_a, str(tile_a)) + + arr_b = np.arange(16, 32, dtype=np.uint16).reshape(4, 4) + da_b = xr.DataArray( + arr_b, dims=["y", "x"], + coords={ + "y": np.array([0.5, 1.5, 2.5, 3.5]), + "x": np.array([4.5, 5.5, 6.5, 7.5]), + }, + attrs={"crs": 4326}, + ) + tile_b = tmp_path / "tile_b.tif" + to_geotiff(da_b, str(tile_b)) + + from xrspatial.geotiff import write_vrt + vrt_path = tmp_path / "mosaic.vrt" + write_vrt(str(vrt_path), [str(tile_a), str(tile_b)]) + return str(vrt_path) + + +class TestOpenGeotiffVrtKwargRejection: + """``open_geotiff('.vrt')`` rejects kwargs it used to silently drop.""" + + def test_rejects_overview_level(self, _kwarg_drop_small_vrt): + """``overview_level`` plus ``.vrt`` raises, not a silent drop.""" + with pytest.raises( + ValueError, match="overview_level is not supported", + ): + open_geotiff(_kwarg_drop_small_vrt, overview_level=1) + + def test_accepts_overview_level_zero(self, _kwarg_drop_small_vrt): + """``overview_level=0`` is full resolution (the default), so it is + equivalent to omitting the kwarg and must not raise.""" + da = open_geotiff(_kwarg_drop_small_vrt, overview_level=0) + assert da.shape == (4, 8) + + def test_rejects_on_gpu_failure_with_gpu_true(self, _kwarg_drop_small_vrt): + """``on_gpu_failure='strict'`` plus ``.vrt`` (gpu=True) is refused. + + The check fires before any GPU code runs; no CUDA needed.""" + with pytest.raises( + ValueError, match="on_gpu_failure is not supported", + ): + open_geotiff( + _kwarg_drop_small_vrt, gpu=True, on_gpu_failure="strict", + ) + + def test_without_unsupported_kwargs_still_works(self, _kwarg_drop_small_vrt): + """The previously-accepted kwargs still flow through to + ``read_vrt``.""" + da = open_geotiff(_kwarg_drop_small_vrt) + assert da.shape == (4, 8) + + def test_with_window_still_works(self, _kwarg_drop_small_vrt): + """``window`` was already forwarded; the fix must not break it.""" + da = open_geotiff(_kwarg_drop_small_vrt, window=(0, 1, 4, 5)) + assert da.shape == (4, 4) + + def test_non_vrt_still_accepts_overview_level(self, tmp_path): + """The fix is VRT-specific; ``.tif`` sources keep accepting + ``overview_level``.""" + arr = np.arange(64, dtype=np.uint16).reshape(8, 8) + da = xr.DataArray( + arr, dims=["y", "x"], + coords={ + "y": np.arange(8, dtype=np.float64), + "x": np.arange(8, dtype=np.float64), + }, + attrs={"crs": 4326}, + ) + tif_path = tmp_path / "with_ovr.tif" + to_geotiff( + da, str(tif_path), cog=True, tile_size=16, overview_levels=[2], + ) + open_geotiff(str(tif_path), overview_level=0) + open_geotiff(str(tif_path), overview_level=1) + + +# --------------------------------------------------------------------------- +# to_geotiff('.vrt') tiled / tile_size validation (#1862) +# --------------------------------------------------------------------------- +# +# ``to_geotiff(..., '.vrt', tiled=False)`` used to warn then crash with +# ``ZeroDivisionError`` inside the always-tiling VRT writer. The fix +# refuses ``tiled=False`` for ``.vrt`` and validates ``tile_size`` +# unconditionally so callers get a clear ``ValueError`` up front. + + +def _tiled_validation_make_da(shape=(64, 64)): + arr = np.arange(np.prod(shape), dtype=np.float32).reshape(shape) + return xr.DataArray(arr, dims=['y', 'x']) + + +class TestVrtTiledValidation: + """VRT writer rejects ``tiled=False`` and bad ``tile_size`` up front.""" + + def test_rejects_tiled_false(self, tmp_path): + """``tiled=False`` is not a valid request for VRT output.""" + da = _tiled_validation_make_da() + out = os.path.join(str(tmp_path), 'vrt_tiled_false.vrt') + with pytest.raises(ValueError, match='tiled=False is not compatible'): + to_geotiff(da, out, tiled=False) + + def test_tiled_false_zero_tile_size_raises_value_error(self, tmp_path): + """``tiled=False`` plus ``tile_size=0`` raises ``ValueError``, not + the previous ``ZeroDivisionError`` from inside the writer.""" + da = _tiled_validation_make_da() + out = os.path.join(str(tmp_path), 'vrt_tiled_false_zero.vrt') + with pytest.raises(ValueError) as exc: + to_geotiff(da, out, tiled=False, tile_size=0) + assert not isinstance(exc.value, ZeroDivisionError) + + def test_zero_tile_size_default_tiled_raises_value_error(self, tmp_path): + """With the default ``tiled=True``, ``tile_size=0`` surfaces from + the shared ``_validate_tile_size`` check, not a deep + ``ZeroDivisionError``.""" + da = _tiled_validation_make_da() + out = os.path.join(str(tmp_path), 'vrt_default_tiled_zero.vrt') + with pytest.raises(ValueError, match='tile_size'): + to_geotiff(da, out, tile_size=0) + + def test_default_args_still_succeeds(self, tmp_path): + """The default-args VRT write path is unaffected by the fix.""" + da = _tiled_validation_make_da() + out = os.path.join(str(tmp_path), 'vrt_default.vrt') + to_geotiff(da, out) + assert os.path.exists(out) diff --git a/xrspatial/geotiff/tests/vrt/test_window.py b/xrspatial/geotiff/tests/vrt/test_window.py index d5975d86e..78939bc5f 100644 --- a/xrspatial/geotiff/tests/vrt/test_window.py +++ b/xrspatial/geotiff/tests/vrt/test_window.py @@ -29,6 +29,7 @@ import pytest import tempfile import uuid +import warnings import xarray as xr from pathlib import Path from unittest import mock @@ -1148,3 +1149,132 @@ def _write_and_collect(vrt_path: str) -> dict[str, bytes]: assert set(tiles1) == set(tiles2), f'Tile file set differs between runs: {set(tiles1) ^ set(tiles2)}' for name, blob1 in tiles1.items(): assert blob1 == tiles2[name], f'Tile {name} differs between runs (race condition?)' + + +# --------------------------------------------------------------------------- +# VRT-tail window / chunking folds (cluster 13, #2437) +# --------------------------------------------------------------------------- +# +# Two originally-standalone files folded here, both exercising the +# windowed / chunked read paths this module already covers: +# +# * read_vrt(chunks=...) lazy-window construction (#1798): chunk layout +# matches eager values, build does not decode sources, and an +# excessive task count is rejected. +# * read_geotiff_dask('.vrt') kwarg forwarding (#1795): the direct dask +# entry point forwards window / band / max_pixels through to read_vrt. + + +def _vrttail_write_single_band_vrt(vrt_path, source_name): + """One-band 6x4 Float32 VRT wrapping ``source_name`` (relative).""" + vrt_path.write_text( + '\n' + ' \n' + ' \n' + f' {source_name}' + '\n' + ' 1\n' + ' \n' + ' \n' + ' \n' + ' \n' + '\n' + ) + + +def _vrttail_write_multi_band_vrt(vrt_path, source_name, *, bands): + """``bands``-band 6x4 Float32 VRT wrapping ``source_name`` (relative).""" + band_xml = [] + for i in range(bands): + band_xml.append( + f' \n' + ' \n' + f' {source_name}' + '\n' + f' {i + 1}\n' + ' \n' + ' \n' + ' \n' + ' \n' + ) + vrt_path.write_text( + '\n' + + ''.join(band_xml) + + '\n' + ) + + +class TestVrtTailLazyChunks: + """read_vrt(chunks=...) builds lazy window tasks (#1798).""" + + def test_chunks_matches_eager_values(self, tmp_path): + arr = np.arange(24, dtype=np.float32).reshape(4, 6) + src = tmp_path / "tmp_1798_source.tif" + to_geotiff(arr, str(src), compression='none') + vrt = tmp_path / "tmp_1798_source.vrt" + _vrttail_write_single_band_vrt(vrt, os.path.basename(src)) + + eager = read_vrt(str(vrt)) + lazy = read_vrt(str(vrt), chunks=2) + + assert lazy.data.chunks == ((2, 2), (2, 2, 2)) + np.testing.assert_array_equal(lazy.compute().values, eager.values) + + def test_chunks_does_not_read_sources_during_construction(self, tmp_path): + """The chunked path runs a cheap ``os.path.exists`` sweep at build + but must not open or decode any source file. + + Pairing the missing source with ``missing_sources='warn'`` lets + the build succeed; the assertion is that no decode-time warnings + (which would only fire if a source were actually read) leak out + during construction. + """ + vrt = tmp_path / "tmp_1798_missing_source.vrt" + _vrttail_write_single_band_vrt(vrt, "missing.tif") + + with warnings.catch_warnings(record=True) as caught: + lazy = read_vrt(str(vrt), chunks=2, missing_sources="warn") + + assert caught == [] + assert hasattr(lazy.data, 'compute') + + def test_chunks_rejects_excessive_task_count(self, tmp_path): + vrt = tmp_path / "tmp_1798_huge_extent.vrt" + vrt.write_text( + '\n' + ' \n' + '\n' + ) + with pytest.raises(ValueError, match="task cap"): + read_vrt(str(vrt), chunks=1, max_pixels=20_000_000_000) + + +class TestVrtTailDirectDaskKwargs: + """read_geotiff_dask('.vrt') forwards VRT kwargs (#1795).""" + + def test_forwards_window_and_band(self, tmp_path): + from xrspatial.geotiff import read_geotiff_dask + + arr = np.arange(4 * 6 * 2, dtype=np.float32).reshape(4, 6, 2) + src = tmp_path / "tmp_1797_source.tif" + to_geotiff(arr, str(src), compression='none') + vrt = tmp_path / "tmp_1797_source.vrt" + _vrttail_write_multi_band_vrt(vrt, os.path.basename(src), bands=2) + + got = read_geotiff_dask( + str(vrt), chunks=2, window=(1, 2, 4, 6), band=1, + ) + assert got.shape == (3, 4) + np.testing.assert_array_equal(got.values, arr[1:4, 2:6, 1]) + + def test_forwards_max_pixels(self, tmp_path): + from xrspatial.geotiff import read_geotiff_dask + + arr = np.arange(24, dtype=np.float32).reshape(4, 6) + src = tmp_path / "tmp_1797_source_cap.tif" + to_geotiff(arr, str(src), compression='none') + vrt = tmp_path / "tmp_1797_source_cap.vrt" + _vrttail_write_single_band_vrt(vrt, os.path.basename(src)) + + with pytest.raises(ValueError, match="exceed"): + read_geotiff_dask(str(vrt), chunks=2, max_pixels=10)