diff --git a/docs/source/reference/release_gate_geotiff.rst b/docs/source/reference/release_gate_geotiff.rst
index 9f01a0812..dfee88e17 100644
--- a/docs/source/reference/release_gate_geotiff.rst
+++ b/docs/source/reference/release_gate_geotiff.rst
@@ -170,7 +170,7 @@ Local GeoTIFF read and write
and zero-area windows raise rather than silently clamp; coords
on georeferenced inputs match the eager full-read slice.
- ``xrspatial/geotiff/tests/unit/test_input_validation.py``,
- ``xrspatial/geotiff/tests/test_no_georef_windowed_coords_1710.py``
+ ``xrspatial/geotiff/tests/read/test_georef.py``
- `#2340`_
* - ``reader.windowed`` -- shifted-transform parity (eager + dask)
- stable
@@ -492,29 +492,25 @@ VRT supported subset
- advanced
- VRT over compatible GeoTIFF sources returns the same pixels and
attrs through eager and dask paths.
- - ``xrspatial/geotiff/tests/test_vrt_backend_coverage_2026_05_11.py``,
- ``xrspatial/geotiff/tests/test_golden_corpus_vrt_1930.py``,
- ``xrspatial/geotiff/tests/test_vrt_finalization_parity_2162.py``
+ - ``xrspatial/geotiff/tests/vrt/test_parity.py``,
+ ``xrspatial/geotiff/tests/test_golden_corpus_vrt_1930.py``
- `#2342`_
* - VRT default ``missing_sources='raise'``
- stable
- Missing source files fail at construction, not at compute.
- - ``xrspatial/geotiff/tests/test_vrt_missing_sources_default_raise_1843.py``,
- ``xrspatial/geotiff/tests/test_read_vrt_default_missing_sources_1860.py``,
- ``xrspatial/geotiff/tests/test_vrt_chunked_missing_raise_at_build_2265.py``
+ - ``xrspatial/geotiff/tests/vrt/test_missing_sources.py``
- `#2342`_
* - VRT ``missing_sources='warn'`` opt-in
- advanced
- Holes surface as the band sentinel, ``attrs['vrt_holes']`` is set,
and a :class:`GeoTIFFFallbackWarning` is emitted.
- ``xrspatial/geotiff/tests/vrt/test_metadata.py``,
- ``xrspatial/geotiff/tests/vrt/test_missing_sources.py``,
- ``xrspatial/geotiff/tests/test_vrt_chunked_missing_sources_1799.py``
+ ``xrspatial/geotiff/tests/vrt/test_missing_sources.py``
- `#2342`_
* - VRT source / dest rectangle validation
- stable
- Out-of-bounds source or destination rectangles raise at construction.
- - ``xrspatial/geotiff/tests/test_geotiff_vrt_srcrect_validation_1784.py``,
+ - ``xrspatial/geotiff/tests/vrt/test_validation.py``,
``xrspatial/geotiff/tests/vrt/test_window.py``
- `#2342`_
* - VRT path containment
@@ -541,8 +537,7 @@ VRT supported subset
- advanced
- Chunked VRT reads return the same shape, coords, attrs, and values
as eager reads on the supported subset.
- - ``xrspatial/geotiff/tests/vrt/test_window.py``,
- ``xrspatial/geotiff/tests/test_read_vrt_lazy_chunks_1798.py``
+ - ``xrspatial/geotiff/tests/vrt/test_window.py``
- `#2342`_
* - VRT single-parse contract
- stable
@@ -566,7 +561,7 @@ VRT supported subset
* - ``write_vrt``
- advanced
- Writer rejects source-incompatibility cases at the writer boundary.
- - ``xrspatial/geotiff/tests/test_to_geotiff_vrt_tiled_validation_1862.py``
+ - ``xrspatial/geotiff/tests/vrt/test_validation.py``
- `#2342`_
Sidecar and overview interactions
diff --git a/xrspatial/geotiff/tests/test_geotiff_vrt_srcrect_validation_1784.py b/xrspatial/geotiff/tests/test_geotiff_vrt_srcrect_validation_1784.py
deleted file mode 100644
index 07aa487af..000000000
--- a/xrspatial/geotiff/tests/test_geotiff_vrt_srcrect_validation_1784.py
+++ /dev/null
@@ -1,165 +0,0 @@
-"""VRT ``SrcRect`` must reject negative sizes and offsets up front.
-
-The ``DstRect`` validation added for issue #1737 only covers one half of the
-SimpleSource rectangle pair. A malformed ```` (or
-negative offset) reaches ``read_to_array`` as a bad window, raises
-``ValueError`` for the out-of-range window, and is then swallowed by the
-lenient source-read ``try/except`` that is meant to handle *missing or
-unreadable source files* -- not malformed XML rectangles.
-
-Net effect before this fix: malformed XML becomes a single warning plus a
-zero-filled hole in the mosaic. In strict mode the same condition surfaces
-the swallowed error inside the try. Either way, the caller cannot tell the
-malformed-VRT case from a legitimate missing tile.
-
-Regression test for issue #1784: ``read_vrt`` should refuse the read with a
-``ValueError`` that names the offending SrcRect field, in both lenient and
-strict modes.
-"""
-from __future__ import annotations
-
-import os
-import tempfile
-import warnings
-
-import numpy as np
-import pytest
-
-from xrspatial.geotiff import to_geotiff
-from xrspatial.geotiff._vrt import read_vrt
-
-
-def _write_source(td: str, name: str = 'src.tif') -> str:
- """Write a 10x10 uint8 source GeoTIFF and return its path."""
- src_path = os.path.join(td, name)
- to_geotiff(np.zeros((10, 10), dtype=np.uint8), src_path,
- compression='none')
- return src_path
-
-
-def _write_vrt(td: str, *,
- src_x_off: int = 0, src_y_off: int = 0,
- src_x_size: int = 10, src_y_size: int = 10,
- src_filename: str = 'src.tif',
- raster_x: int = 100, raster_y: int = 100) -> str:
- """Write a VRT with a single SimpleSource using the given SrcRect."""
- vrt_path = os.path.join(td, 'mosaic.vrt')
- vrt_xml = (
- f'\n'
- f' \n'
- f' \n'
- f' {src_filename}'
- f'\n'
- f' 1\n'
- f' \n'
- f' \n'
- f' \n'
- f' \n'
- f'\n'
- )
- with open(vrt_path, 'w') as f:
- f.write(vrt_xml)
- return vrt_path
-
-
-def test_negative_srcrect_x_size_rejected():
- """Negative ``SrcRect xSize`` surfaces as ``ValueError`` rather than
- being swallowed by the missing-source fallback."""
- with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td:
- _write_source(td)
- vrt_path = _write_vrt(td, src_x_size=-50)
- with pytest.raises(ValueError, match=r"SrcRect.*negative size"):
- read_vrt(vrt_path)
-
-
-def test_negative_srcrect_y_size_rejected():
- """Negative ``SrcRect ySize`` surfaces as ``ValueError``."""
- with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td:
- _write_source(td)
- vrt_path = _write_vrt(td, src_y_size=-50)
- with pytest.raises(ValueError, match=r"SrcRect.*negative size"):
- read_vrt(vrt_path)
-
-
-def test_negative_srcrect_x_off_rejected():
- """Negative ``SrcRect xOff`` surfaces as ``ValueError``."""
- with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td:
- _write_source(td)
- vrt_path = _write_vrt(td, src_x_off=-10)
- with pytest.raises(ValueError, match=r"SrcRect.*negative offset"):
- read_vrt(vrt_path)
-
-
-def test_negative_srcrect_y_off_rejected():
- """Negative ``SrcRect yOff`` surfaces as ``ValueError``."""
- with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td:
- _write_source(td)
- vrt_path = _write_vrt(td, src_y_off=-10)
- with pytest.raises(ValueError, match=r"SrcRect.*negative offset"):
- read_vrt(vrt_path)
-
-
-def test_negative_srcrect_message_names_bad_values():
- """The error message must name the malformed field and its value so the
- caller can find the offending ```` in the VRT."""
- with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td:
- _write_source(td)
- vrt_path = _write_vrt(td, src_x_size=-7, src_y_size=-3)
- with pytest.raises(ValueError) as excinfo:
- read_vrt(vrt_path)
- msg = str(excinfo.value)
- assert "SrcRect" in msg
- assert "-7" in msg
- assert "-3" in msg
-
-
-def test_missing_source_still_takes_lenient_warning_path():
- """A *valid* SrcRect with a missing source file must still hit the
- lenient warning path -- the new SrcRect check must not swallow the
- missing-file case that PR #1675 narrowed.
-
- Issue #1843 flipped the default to ``missing_sources='raise'`` so
- this test now passes ``'warn'`` explicitly to exercise the opt-in
- lenient branch.
- """
- from xrspatial.geotiff import GeoTIFFFallbackWarning
- with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td:
- # No source file written; SrcRect itself is well-formed.
- vrt_path = _write_vrt(td, src_filename='does_not_exist.tif')
- with warnings.catch_warnings(record=True) as caught:
- warnings.simplefilter('always')
- arr, _ = read_vrt(vrt_path, missing_sources='warn')
- # The lenient path must produce a fallback warning and a result
- # array (zero-filled at the hole), not raise.
- fallback = [w for w in caught
- if issubclass(w.category, GeoTIFFFallbackWarning)]
- assert fallback, (
- "expected a GeoTIFFFallbackWarning for the missing source"
- )
- assert arr.shape == (100, 100)
-
-
-def test_valid_srcrect_reads_normally():
- """A well-formed SrcRect with a real source must succeed -- no false
- positives on valid VRTs."""
- with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td:
- _write_source(td)
- vrt_path = _write_vrt(td, raster_x=10, raster_y=10)
- arr, _ = read_vrt(vrt_path)
- assert arr.shape == (10, 10)
- # Source is all zeros and DstRect covers the full VRT raster, so
- # the entire output must be zero.
- assert np.all(arr == 0)
-
-
-def test_negative_srcrect_raises_under_strict_mode(monkeypatch):
- """The check runs *before* the lenient try/except, so strict mode and
- lenient mode must both raise. Pinning strict mode here prevents a
- regression where the check accidentally moves back inside the try."""
- monkeypatch.setenv('XRSPATIAL_GEOTIFF_STRICT', '1')
- with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td:
- _write_source(td)
- vrt_path = _write_vrt(td, src_x_size=-50)
- with pytest.raises(ValueError, match=r"SrcRect.*negative size"):
- read_vrt(vrt_path)
diff --git a/xrspatial/geotiff/tests/test_open_geotiff_vrt_kwarg_drop_1685.py b/xrspatial/geotiff/tests/test_open_geotiff_vrt_kwarg_drop_1685.py
deleted file mode 100644
index ae2dc996c..000000000
--- a/xrspatial/geotiff/tests/test_open_geotiff_vrt_kwarg_drop_1685.py
+++ /dev/null
@@ -1,108 +0,0 @@
-"""Regression test for #1685: ``open_geotiff`` silently dropped
-``overview_level`` and ``on_gpu_failure`` when the source was a VRT.
-
-The api-consistency sweep on 2026-05-12 flagged that ``open_geotiff``
-documents both kwargs as supported, but the VRT dispatch branch routes
-to ``read_vrt`` whose signature accepts neither. Calls like
-``open_geotiff('mosaic.vrt', overview_level=2)`` returned full-resolution
-data with no warning. Issue #1561 fixed the same class of bug for the
-dask and GPU dispatch branches; this one closes the remaining gap by
-refusing the unsupported combinations up front.
-"""
-from __future__ import annotations
-
-import numpy as np
-import pytest
-import xarray as xr
-
-from xrspatial.geotiff import open_geotiff, to_geotiff, write_vrt
-
-
-@pytest.fixture
-def small_vrt(tmp_path):
- """Two-tile uint16 VRT we can hand to ``open_geotiff``."""
- arr_a = np.arange(16, dtype=np.uint16).reshape(4, 4)
- da_a = xr.DataArray(
- arr_a,
- dims=["y", "x"],
- coords={
- "y": np.array([0.5, 1.5, 2.5, 3.5]),
- "x": np.array([0.5, 1.5, 2.5, 3.5]),
- },
- attrs={"crs": 4326},
- )
- tile_a = tmp_path / "tile_a.tif"
- to_geotiff(da_a, str(tile_a))
-
- arr_b = np.arange(16, 32, dtype=np.uint16).reshape(4, 4)
- da_b = xr.DataArray(
- arr_b,
- dims=["y", "x"],
- coords={
- "y": np.array([0.5, 1.5, 2.5, 3.5]),
- "x": np.array([4.5, 5.5, 6.5, 7.5]),
- },
- attrs={"crs": 4326},
- )
- tile_b = tmp_path / "tile_b.tif"
- to_geotiff(da_b, str(tile_b))
-
- vrt_path = tmp_path / "mosaic.vrt"
- write_vrt(str(vrt_path), [str(tile_a), str(tile_b)])
- return str(vrt_path)
-
-
-def test_open_geotiff_vrt_rejects_overview_level(small_vrt):
- """``overview_level`` plus ``.vrt`` raises ValueError, not a silent drop."""
- with pytest.raises(ValueError, match="overview_level is not supported"):
- open_geotiff(small_vrt, overview_level=1)
-
-
-def test_open_geotiff_vrt_accepts_overview_level_zero(small_vrt):
- """``overview_level=0`` is documented as full resolution (the default),
- so passing it on a VRT is semantically equivalent to omitting the kwarg
- and must not raise. Only non-zero overview levels are rejected.
- """
- da = open_geotiff(small_vrt, overview_level=0)
- # Same shape as the no-kwarg case: two 4x4 tiles side-by-side.
- assert da.shape == (4, 8)
-
-
-def test_open_geotiff_vrt_rejects_on_gpu_failure_with_gpu_true(small_vrt):
- """``on_gpu_failure='strict'`` plus ``.vrt`` (gpu=True) is refused."""
- # The check fires before any GPU code runs; no CUDA needed.
- with pytest.raises(ValueError, match="on_gpu_failure is not supported"):
- open_geotiff(small_vrt, gpu=True, on_gpu_failure="strict")
-
-
-def test_open_geotiff_vrt_without_unsupported_kwargs_still_works(small_vrt):
- """The previously-accepted kwargs still flow through to ``read_vrt``."""
- da = open_geotiff(small_vrt)
- # Two 4x4 tiles side-by-side; result is 4x8.
- assert da.shape == (4, 8)
-
-
-def test_open_geotiff_vrt_with_window_still_works(small_vrt):
- """``window`` was already forwarded; this regression should not break it."""
- da = open_geotiff(small_vrt, window=(0, 1, 4, 5))
- assert da.shape == (4, 4)
-
-
-def test_open_geotiff_non_vrt_still_accepts_overview_level(tmp_path):
- """The fix is VRT-specific; ``.tif`` sources keep accepting overview_level."""
- # Build a single COG with one overview so overview_level=0 round-trips.
- arr = np.arange(64, dtype=np.uint16).reshape(8, 8)
- da = xr.DataArray(
- arr,
- dims=["y", "x"],
- coords={
- "y": np.arange(8, dtype=np.float64),
- "x": np.arange(8, dtype=np.float64),
- },
- attrs={"crs": 4326},
- )
- tif_path = tmp_path / "with_ovr.tif"
- to_geotiff(da, str(tif_path), cog=True, tile_size=16, overview_levels=[2])
- # Either overview_level value must be accepted without raising.
- open_geotiff(str(tif_path), overview_level=0)
- open_geotiff(str(tif_path), overview_level=1)
diff --git a/xrspatial/geotiff/tests/test_read_geotiff_dask_vrt_kwargs_1795.py b/xrspatial/geotiff/tests/test_read_geotiff_dask_vrt_kwargs_1795.py
deleted file mode 100644
index a24887d0d..000000000
--- a/xrspatial/geotiff/tests/test_read_geotiff_dask_vrt_kwargs_1795.py
+++ /dev/null
@@ -1,56 +0,0 @@
-"""Direct read_geotiff_dask(.vrt) must forward VRT kwargs (#1795)."""
-from __future__ import annotations
-
-import os
-
-import numpy as np
-import pytest
-
-from xrspatial.geotiff import read_geotiff_dask, to_geotiff
-
-
-def _write_vrt(vrt_path, source_name, *, bands=1):
- band_xml = []
- for i in range(bands):
- band_xml.append(
- f' \n'
- ' \n'
- f' {source_name}'
- '\n'
- f' {i + 1}\n'
- ' \n'
- ' \n'
- ' \n'
- ' \n'
- )
- vrt_path.write_text(
- '\n'
- + ''.join(band_xml)
- + '\n'
- )
-
-
-def test_direct_read_geotiff_dask_vrt_forwards_window_and_band(tmp_path):
- arr = np.arange(4 * 6 * 2, dtype=np.float32).reshape(4, 6, 2)
- src = tmp_path / "tmp_1797_source.tif"
- to_geotiff(arr, str(src), compression='none')
- vrt = tmp_path / "tmp_1797_source.vrt"
- _write_vrt(vrt, os.path.basename(src), bands=2)
-
- got = read_geotiff_dask(
- str(vrt), chunks=2, window=(1, 2, 4, 6), band=1,
- )
-
- assert got.shape == (3, 4)
- np.testing.assert_array_equal(got.values, arr[1:4, 2:6, 1])
-
-
-def test_direct_read_geotiff_dask_vrt_forwards_max_pixels(tmp_path):
- arr = np.arange(24, dtype=np.float32).reshape(4, 6)
- src = tmp_path / "tmp_1797_source_cap.tif"
- to_geotiff(arr, str(src), compression='none')
- vrt = tmp_path / "tmp_1797_source_cap.vrt"
- _write_vrt(vrt, os.path.basename(src))
-
- with pytest.raises(ValueError, match="exceed"):
- read_geotiff_dask(str(vrt), chunks=2, max_pixels=10)
diff --git a/xrspatial/geotiff/tests/test_read_vrt_default_missing_sources_1860.py b/xrspatial/geotiff/tests/test_read_vrt_default_missing_sources_1860.py
deleted file mode 100644
index 7f278915f..000000000
--- a/xrspatial/geotiff/tests/test_read_vrt_default_missing_sources_1860.py
+++ /dev/null
@@ -1,98 +0,0 @@
-"""Regression test for #1860: the public ``read_vrt`` and
-``open_geotiff(.vrt)`` default ``missing_sources`` to ``'raise'``, matching
-the internal ``_vrt.read_vrt`` default set in #1843.
-
-Before #1860 the public wrapper defaulted to ``'warn'``, which silently
-overrode the internal ``'raise'`` default and let unreadable backing
-sources produce zero-fill holes on integer rasters with no exception.
-Callers that want the lenient partial-mosaic behaviour pass
-``missing_sources='warn'`` explicitly.
-"""
-from __future__ import annotations
-
-import pytest
-
-from xrspatial.geotiff import GeoTIFFFallbackWarning, open_geotiff, read_vrt
-
-
-def _write_missing_source_vrt(path):
- path.write_text(
- '\n'
- ' \n'
- ' \n'
- ' missing_1860.tif'
- '\n'
- ' 1\n'
- ' \n'
- ' \n'
- ' \n'
- ' \n'
- '\n'
- )
-
-
-def test_public_read_vrt_default_raises_on_unreadable_source(tmp_path):
- """Public ``read_vrt`` with no ``missing_sources`` kwarg must raise.
-
- Before #1860 the default was ``'warn'`` and the call returned a
- partial mosaic with ``attrs['vrt_holes']`` instead of raising. With
- the default aligned to the internal ``_vrt.read_vrt`` default of
- ``'raise'``, the unreadable source must now halt the call.
- """
- vrt = tmp_path / "tmp_1860_public_default_raise.vrt"
- _write_missing_source_vrt(vrt)
-
- with pytest.raises((OSError, ValueError)):
- read_vrt(str(vrt))
-
-
-def test_open_geotiff_vrt_default_raises_on_unreadable_source(tmp_path):
- """``open_geotiff(vrt_path)`` with no ``missing_sources`` kwarg must
- raise on an unreadable backing source.
-
- ``open_geotiff`` forwards ``missing_sources`` to ``read_vrt`` only
- when the caller passed it explicitly; otherwise the public
- ``read_vrt`` default applies. With that default now ``'raise'``, the
- silent-degradation path is closed for ``open_geotiff`` callers too.
- """
- vrt = tmp_path / "tmp_1860_open_geotiff_default_raise.vrt"
- _write_missing_source_vrt(vrt)
-
- with pytest.raises((OSError, ValueError)):
- open_geotiff(str(vrt))
-
-
-def test_public_read_vrt_explicit_warn_preserves_lenient_behaviour(tmp_path):
- """``missing_sources='warn'`` is still the escape hatch for partial
- mosaics on the public ``read_vrt`` API.
-
- The warning fires, the call returns, and ``attrs['vrt_holes']`` is
- populated with the skipped source record. Pinning this keeps the
- historical contract available to callers that opt in.
- """
- vrt = tmp_path / "tmp_1860_public_explicit_warn.vrt"
- _write_missing_source_vrt(vrt)
-
- with pytest.warns(GeoTIFFFallbackWarning, match="could not be read"):
- da = read_vrt(str(vrt), missing_sources='warn')
-
- assert 'vrt_holes' in da.attrs
- assert da.attrs['vrt_holes'][0]['source'].endswith('missing_1860.tif')
-
-
-def test_open_geotiff_vrt_explicit_warn_preserves_lenient_behaviour(tmp_path):
- """``open_geotiff(vrt_path, missing_sources='warn')`` still produces
- a partial mosaic with the hole record on the DataArray attrs.
-
- The forwarding branch in ``open_geotiff`` only runs when the caller
- explicitly passes ``missing_sources``; this test pins that branch
- against regressions.
- """
- vrt = tmp_path / "tmp_1860_open_geotiff_explicit_warn.vrt"
- _write_missing_source_vrt(vrt)
-
- with pytest.warns(GeoTIFFFallbackWarning, match="could not be read"):
- da = open_geotiff(str(vrt), missing_sources='warn')
-
- assert 'vrt_holes' in da.attrs
- assert da.attrs['vrt_holes'][0]['source'].endswith('missing_1860.tif')
diff --git a/xrspatial/geotiff/tests/test_read_vrt_lazy_chunks_1798.py b/xrspatial/geotiff/tests/test_read_vrt_lazy_chunks_1798.py
deleted file mode 100644
index f60162b5a..000000000
--- a/xrspatial/geotiff/tests/test_read_vrt_lazy_chunks_1798.py
+++ /dev/null
@@ -1,78 +0,0 @@
-"""read_vrt(chunks=...) should build lazy window tasks (#1798)."""
-from __future__ import annotations
-
-import os
-import warnings
-
-import numpy as np
-import pytest
-
-from xrspatial.geotiff import read_vrt, to_geotiff
-
-
-def _write_vrt(vrt_path, source_name):
- vrt_path.write_text(
- '\n'
- ' \n'
- ' \n'
- f' {source_name}'
- '\n'
- ' 1\n'
- ' \n'
- ' \n'
- ' \n'
- ' \n'
- '\n'
- )
-
-
-def test_read_vrt_chunks_matches_eager_values(tmp_path):
- arr = np.arange(24, dtype=np.float32).reshape(4, 6)
- src = tmp_path / "tmp_1798_source.tif"
- to_geotiff(arr, str(src), compression='none')
- vrt = tmp_path / "tmp_1798_source.vrt"
- _write_vrt(vrt, os.path.basename(src))
-
- eager = read_vrt(str(vrt))
- lazy = read_vrt(str(vrt), chunks=2)
-
- assert lazy.data.chunks == ((2, 2), (2, 2, 2))
- np.testing.assert_array_equal(lazy.compute().values, eager.values)
-
-
-def test_read_vrt_chunks_does_not_read_sources_during_construction(tmp_path):
- """The chunked path must not eagerly decode sources at build.
-
- Construction does run a cheap ``os.path.exists`` sweep over each
- source (to populate ``vrt_holes`` and to fail-fast under the
- default ``missing_sources='raise'``), but it must not open or
- decode any source file. This test pairs the missing source with
- the lenient ``missing_sources='warn'`` opt-in so the build
- succeeds; the assertion is that no decode-time warnings (which
- would only fire if the source were actually read) leak out
- during construction.
- """
- vrt = tmp_path / "tmp_1798_missing_source.vrt"
- _write_vrt(vrt, "missing.tif")
-
- with warnings.catch_warnings(record=True) as caught:
- lazy = read_vrt(str(vrt), chunks=2, missing_sources="warn")
-
- # Build-time warnings from the decode codecs should be absent.
- # ``missing_sources='warn'`` does not warn at build time either; the
- # per-task ``GeoTIFFFallbackWarning`` only fires when a chunk
- # actually decodes the missing tile during ``compute()``.
- assert caught == []
- assert hasattr(lazy.data, 'compute')
-
-
-def test_read_vrt_chunks_rejects_excessive_task_count(tmp_path):
- vrt = tmp_path / "tmp_1798_huge_extent.vrt"
- vrt.write_text(
- '\n'
- ' \n'
- '\n'
- )
-
- with pytest.raises(ValueError, match="task cap"):
- read_vrt(str(vrt), chunks=1, max_pixels=20_000_000_000)
diff --git a/xrspatial/geotiff/tests/test_to_geotiff_vrt_tiled_validation_1862.py b/xrspatial/geotiff/tests/test_to_geotiff_vrt_tiled_validation_1862.py
deleted file mode 100644
index ff160b9ad..000000000
--- a/xrspatial/geotiff/tests/test_to_geotiff_vrt_tiled_validation_1862.py
+++ /dev/null
@@ -1,66 +0,0 @@
-"""Regression tests for issue #1862.
-
-``to_geotiff(..., '.vrt', tiled=False, tile_size=0)`` previously warned that
-``tile_size`` was ignored, then crashed with ``ZeroDivisionError`` inside
-``_write_vrt_tiled`` because the VRT writer always tiles. The ``tiled=False``
-flag was never honored on the VRT path, and ``tile_size`` was only validated
-when ``tiled=True``, so an invalid ``tile_size=0`` slipped through.
-
-``to_geotiff`` now refuses ``tiled=False`` for ``.vrt`` paths up front with a
-``ValueError``, and validates ``tile_size`` unconditionally on the VRT
-branch so callers get a clear error before the writer divides by it.
-"""
-from __future__ import annotations
-
-import os
-
-import numpy as np
-import pytest
-import xarray as xr
-
-from xrspatial.geotiff import to_geotiff
-
-
-def _make_da(shape=(64, 64)):
- arr = np.arange(np.prod(shape), dtype=np.float32).reshape(shape)
- return xr.DataArray(arr, dims=['y', 'x'])
-
-
-def test_vrt_rejects_tiled_false_1862(tmp_path):
- """``tiled=False`` is not a valid request for VRT output."""
- da = _make_da()
- out = os.path.join(str(tmp_path), 'vrt_tiled_false_1862.vrt')
- with pytest.raises(ValueError, match='tiled=False is not compatible'):
- to_geotiff(da, out, tiled=False)
-
-
-def test_vrt_tiled_false_zero_tile_size_raises_value_error_1862(tmp_path):
- """``tiled=False`` plus ``tile_size=0`` must raise ``ValueError``,
- not the previous ``ZeroDivisionError`` from inside the writer."""
- da = _make_da()
- out = os.path.join(
- str(tmp_path), 'vrt_tiled_false_zero_1862.vrt')
- with pytest.raises(ValueError) as exc:
- to_geotiff(da, out, tiled=False, tile_size=0)
- # Either the tiled=False guard or the tile_size validator may fire
- # first; both produce ValueError, never ZeroDivisionError.
- assert not isinstance(exc.value, ZeroDivisionError)
-
-
-def test_vrt_zero_tile_size_default_tiled_raises_value_error_1862(tmp_path):
- """With the default ``tiled=True``, ``tile_size=0`` must surface from
- the shared ``_validate_tile_size`` check, not a deep ``ZeroDivisionError``.
- """
- da = _make_da()
- out = os.path.join(
- str(tmp_path), 'vrt_default_tiled_zero_1862.vrt')
- with pytest.raises(ValueError, match='tile_size'):
- to_geotiff(da, out, tile_size=0)
-
-
-def test_vrt_default_args_still_succeeds_1862(tmp_path):
- """Sanity: the default-args VRT write path is unaffected by the fix."""
- da = _make_da()
- out = os.path.join(str(tmp_path), 'vrt_default_1862.vrt')
- to_geotiff(da, out)
- assert os.path.exists(out)
diff --git a/xrspatial/geotiff/tests/test_vrt_backend_coverage_2026_05_11.py b/xrspatial/geotiff/tests/test_vrt_backend_coverage_2026_05_11.py
deleted file mode 100644
index 00b604b07..000000000
--- a/xrspatial/geotiff/tests/test_vrt_backend_coverage_2026_05_11.py
+++ /dev/null
@@ -1,243 +0,0 @@
-"""Backend / parameter coverage for the VRT read path.
-
-The non-VRT read backends (``open_geotiff`` / ``read_geotiff_dask`` /
-``read_geotiff_gpu``) all have dedicated multi-backend coverage; the
-VRT route through ``read_vrt`` historically lacked it. The eager
-numpy path has dense coverage, but the GPU and dask+GPU paths the
-``read_vrt`` body explicitly handles (the ``if gpu: cupy.asarray``
-and trailing ``result.chunk(...)`` blocks) were only reachable
-indirectly via ``open_geotiff('.vrt', gpu=True)`` / ``..., chunks=N)``
-and went untested.
-
-The error-rejection branches for file-like sources combined with
-``gpu=True`` / ``chunks=N`` on ``open_geotiff`` were likewise covered
-only by inspection.
-
-Test coverage gap sweep 2026-05-11 (pass 3): close the VRT backend
-coverage gap and the file-like-rejection parameter gaps.
-"""
-from __future__ import annotations
-
-import importlib.util
-import io
-import os
-
-import numpy as np
-import pytest
-
-from xrspatial.geotiff import open_geotiff, read_vrt, to_geotiff
-from xrspatial.geotiff._vrt import write_vrt as _write_vrt_internal
-
-# ---------------------------------------------------------------------------
-# GPU gating: matches the ``_gpu_available`` / ``_HAS_GPU`` predicate that
-# the rest of the geotiff test suite (e.g. test_backend_kwarg_parity_1561,
-# test_attrs_parity_1548) uses, so future GPU tests stay greppable.
-# ---------------------------------------------------------------------------
-
-
-def _gpu_available() -> bool:
- if importlib.util.find_spec("cupy") is None:
- return False
- try:
- import cupy
- return bool(cupy.cuda.is_available())
- except Exception:
- return False
-
-
-_HAS_GPU = _gpu_available()
-_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required")
-
-
-# ---------------------------------------------------------------------------
-# Fixtures
-# ---------------------------------------------------------------------------
-
-@pytest.fixture
-def single_tile_vrt(tmp_path):
- """A trivial single-tile VRT plus its source array.
-
- Float32 source so the VRT band advertises Float32 and the eager
- numpy read returns float32 (lets dtype-cast tests assert a real
- type change).
- """
- arr = np.arange(16, dtype=np.float32).reshape(4, 4)
- tile_path = str(tmp_path / 'tile.tif')
- to_geotiff(arr, tile_path)
- vrt_path = str(tmp_path / 'mosaic.vrt')
- _write_vrt_internal(vrt_path, [tile_path])
- return vrt_path, arr
-
-
-# ---------------------------------------------------------------------------
-# Cat 1: read_vrt backend coverage (GPU + dask+GPU)
-# ---------------------------------------------------------------------------
-
-@_gpu_only
-class TestReadVrtGpuBackend:
- """``read_vrt(gpu=True)`` returns a CuPy-backed DataArray.
-
- The eager VRT decode runs on the CPU (the VRT internal reader
- walks SimpleSources and assembles via windowed reads) then the
- final ``if gpu: arr = cupy.asarray(arr)`` block transfers to GPU.
- A regression that dropped the transfer block would have shipped
- a numpy DataArray instead of a CuPy one; this test pins that.
- """
-
- def test_read_vrt_gpu_returns_cupy(self, single_tile_vrt):
- import cupy
-
- vrt_path, arr = single_tile_vrt
- da = read_vrt(vrt_path, gpu=True)
- assert isinstance(da.data, cupy.ndarray), (
- f"expected cupy.ndarray, got {type(da.data).__name__}"
- )
- np.testing.assert_array_equal(da.data.get(), arr)
-
- def test_read_vrt_gpu_chunks_returns_dask_cupy(self, single_tile_vrt):
- """``read_vrt(gpu=True, chunks=N)`` is the documented dask+cupy
- VRT entry point. The trailing ``result.chunk(...)`` block has
- to wrap the cupy backing without falling back to numpy.
- """
- import cupy
- import dask.array as da_mod
-
- vrt_path, arr = single_tile_vrt
- result = read_vrt(vrt_path, gpu=True, chunks=2)
-
- assert isinstance(result.data, da_mod.Array), (
- f"expected dask Array, got {type(result.data).__name__}"
- )
- # _meta tells distributed Dask the underlying array is cupy.
- # A numpy meta here would cause optimizers to silently move
- # data back to host.
- assert isinstance(result.data._meta, cupy.ndarray), (
- f"expected cupy._meta, got "
- f"{type(result.data._meta).__module__}."
- f"{type(result.data._meta).__name__}"
- )
- # Chunks honour the spatial spec; the band axis (absent here)
- # would chunk as a single block.
- assert result.data.chunks == ((2, 2), (2, 2))
-
- computed = result.compute()
- assert isinstance(computed.data, cupy.ndarray)
- np.testing.assert_array_equal(computed.data.get(), arr)
-
- def test_open_geotiff_vrt_gpu_routes_through(self, single_tile_vrt):
- """``open_geotiff('.vrt', gpu=True)`` dispatches to ``read_vrt``
- and surfaces the cupy data unchanged. The dispatcher branch
- is one line in ``open_geotiff`` but a refactor that dropped
- ``gpu=gpu`` from the forwarded kwargs would silently produce
- a numpy DataArray.
- """
- import cupy
-
- vrt_path, arr = single_tile_vrt
- da = open_geotiff(vrt_path, gpu=True)
- assert isinstance(da.data, cupy.ndarray)
- np.testing.assert_array_equal(da.data.get(), arr)
-
- def test_open_geotiff_vrt_gpu_chunks(self, single_tile_vrt):
- """``open_geotiff('.vrt', gpu=True, chunks=N)`` is the combined
- dask+cupy entry point. Same dispatch test as the gpu-only
- variant but also pins the chunk forwarding.
- """
- import cupy
- import dask.array as da_mod
-
- vrt_path, arr = single_tile_vrt
- result = open_geotiff(vrt_path, gpu=True, chunks=2)
-
- assert isinstance(result.data, da_mod.Array)
- assert isinstance(result.data._meta, cupy.ndarray)
- assert result.data.chunks == ((2, 2), (2, 2))
-
- computed = result.compute()
- np.testing.assert_array_equal(computed.data.get(), arr)
-
-
-# ---------------------------------------------------------------------------
-# Cat 4: read_vrt parameter coverage (dtype / name)
-# ---------------------------------------------------------------------------
-
-class TestReadVrtDtypeKwarg:
- """``read_vrt(dtype=...)`` casts after decode and validates the cast."""
-
- def test_safe_widening_cast(self, single_tile_vrt):
- """float32 -> float64 is permitted; values survive bit-for-bit."""
- vrt_path, arr = single_tile_vrt
- da = read_vrt(vrt_path, dtype='float64')
- assert da.dtype == np.float64
- np.testing.assert_array_equal(da.values, arr.astype(np.float64))
-
- def test_float_to_int_rejected(self, single_tile_vrt):
- """Float-to-int is lossy and refused with a descriptive error.
- Mirrors ``open_geotiff(dtype=...)`` behaviour so callers see the
- same gate on both entry points.
- """
- vrt_path, _ = single_tile_vrt
- with pytest.raises(ValueError, match="Cannot cast float"):
- read_vrt(vrt_path, dtype='int32')
-
-
-class TestReadVrtNameKwarg:
- """``read_vrt(name='custom')`` overrides the file-stem derivation."""
-
- def test_explicit_name_used(self, single_tile_vrt):
- vrt_path, _ = single_tile_vrt
- da = read_vrt(vrt_path, name='custom_name')
- assert da.name == 'custom_name'
-
- def test_default_name_from_stem(self, single_tile_vrt):
- vrt_path, _ = single_tile_vrt
- da = read_vrt(vrt_path)
- # mosaic.vrt -> mosaic
- assert da.name == os.path.splitext(os.path.basename(vrt_path))[0]
-
-
-# ---------------------------------------------------------------------------
-# Cat 4: open_geotiff file-like + backend kwarg rejection
-# ---------------------------------------------------------------------------
-
-class TestOpenGeotiffFileLikeKwargRejection:
- """File-like sources reject ``gpu=True`` and ``chunks=N`` up front.
-
- The check sits in ``open_geotiff`` (not the underlying readers)
- because both downstream paths re-open the source by path from
- worker tasks. A buffer passed through would either raise deep
- inside dask graph construction or silently behave as if the
- buffer were a string path.
- """
-
- @staticmethod
- def _buf_with_tiff(tmp_path):
- arr = np.zeros((4, 4), dtype=np.float32)
- path = str(tmp_path / 'src.tif')
- to_geotiff(arr, path)
- with open(path, 'rb') as fh:
- return io.BytesIO(fh.read())
-
- def test_gpu_with_file_like_raises(self, tmp_path):
- buf = self._buf_with_tiff(tmp_path)
- with pytest.raises(ValueError, match="gpu=True is not supported"):
- open_geotiff(buf, gpu=True)
-
- def test_chunks_with_file_like_raises(self, tmp_path):
- buf = self._buf_with_tiff(tmp_path)
- with pytest.raises(ValueError, match="chunks=.*file-like"):
- open_geotiff(buf, chunks=64)
-
- def test_chunks_with_pathlib_path_still_works(self, tmp_path):
- """Sanity-check: pathlib.Path is not file-like and must keep
- working through the dask path. Otherwise the file-like gate
- would also lock out Path inputs.
- """
- arr = np.arange(16, dtype=np.float32).reshape(4, 4)
- path = tmp_path / 'sample.tif'
- to_geotiff(arr, str(path))
-
- import dask.array as da_mod
- result = open_geotiff(path, chunks=2)
- assert isinstance(result.data, da_mod.Array)
- np.testing.assert_array_equal(np.asarray(result.data), arr)
diff --git a/xrspatial/geotiff/tests/test_vrt_backend_parity_2321.py b/xrspatial/geotiff/tests/test_vrt_backend_parity_2321.py
deleted file mode 100644
index 39c42b7fe..000000000
--- a/xrspatial/geotiff/tests/test_vrt_backend_parity_2321.py
+++ /dev/null
@@ -1,632 +0,0 @@
-"""Backend parity for VRT reads with sidecar/overview interactions (#2321 sub-task 4).
-
-Sub-task 4 of issue #2321 (parent) locks down the VRT support contract by
-asserting eager / dask parity on the surface that is most likely to drift:
-
-* metadata (``attrs['transform']``, ``attrs['crs']``, ``attrs['crs_wkt']``,
- ``attrs['georef_status']``), not just pixel values;
-* coords -- a windowed read must shift the ``y`` / ``x`` arrays consistently
- between the eager and the lazy code paths;
-* sidecar interactions -- a VRT whose backing source is a GeoTIFF with an
- external ``.tif.ovr`` pyramid must surface the same georef attrs (and
- pixel values) as an equivalent VRT over the inline-overview fixture.
-
-The shape mirrors ``test_backend_parity_matrix.py``: a small declarative
-fixture / backend matrix, one ``assert_parity`` helper, and a single
-parametrised test. We do not re-invent helpers -- the materialisation
-and pixel-comparison primitives match the matrix file so a future move
-to the shared parity harness is mechanical.
-
-VRT fixtures use the existing ``write_vrt`` writer (``xrspatial.geotiff
-._vrt.write_vrt``) on top of ``to_geotiff`` source tiles, plus the
-bundled ``overview_external_ovr_uint16.tif`` / ``.tif.ovr`` sidecar pair
-from ``golden_corpus/fixtures/`` (and its inline-overview counterpart).
-
-Acceptance per the parent issue: the VRT path cannot pass by returning
-correct pixel values with wrong georeferencing attrs. Windowed eager
-and lazy VRT reads agree on shape, coords, attrs, and values.
-"""
-from __future__ import annotations
-
-import pathlib
-import shutil
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Any, Callable
-
-import numpy as np
-import pytest
-import xarray as xr
-
-from xrspatial.geotiff import open_geotiff, to_geotiff
-from xrspatial.geotiff._vrt import write_vrt as _write_vrt_internal
-
-
-# ---------------------------------------------------------------------------
-# Fixture paths shipped under golden_corpus.
-# ---------------------------------------------------------------------------
-
-_GOLDEN = (
- pathlib.Path(__file__).resolve().parent
- / "golden_corpus"
- / "fixtures"
-)
-_SIDECAR_TIF = _GOLDEN / "overview_external_ovr_uint16.tif"
-_SIDECAR_OVR = _GOLDEN / "overview_external_ovr_uint16.tif.ovr"
-_INLINE_OVR_TIF = _GOLDEN / "overview_internal_uint16.tif"
-
-
-def _sidecar_fixture_or_skip() -> Path:
- """Return the bundled sidecar TIFF or skip if absent."""
- if not _SIDECAR_TIF.exists() or not _SIDECAR_OVR.exists():
- pytest.skip("sidecar overview fixture not present in golden_corpus")
- return _SIDECAR_TIF
-
-
-def _inline_overview_fixture_or_skip() -> Path:
- if not _INLINE_OVR_TIF.exists():
- pytest.skip("inline overview fixture not present in golden_corpus")
- return _INLINE_OVR_TIF
-
-
-# ---------------------------------------------------------------------------
-# Materialisation + comparison helpers
-# (mirrors ``test_backend_parity_matrix.py`` so cross-test parity reads
-# the same way).
-# ---------------------------------------------------------------------------
-
-def _materialise(da: xr.DataArray) -> np.ndarray:
- raw = da.data
- if hasattr(raw, "compute"):
- raw = raw.compute()
- if hasattr(raw, "get"):
- raw = raw.get()
- return np.asarray(raw)
-
-
-def _coord_view(da: xr.DataArray, name: str) -> np.ndarray:
- return np.asarray(da.coords[name].values)
-
-
-def _assert_pixels_equal(ref: np.ndarray, actual: np.ndarray,
- *, label: str) -> None:
- """Pixel equality, dtype-aware (mirrors test_backend_parity_matrix.py)."""
- assert ref.dtype == actual.dtype, (
- f"{label}: dtype differs ref={ref.dtype} actual={actual.dtype}"
- )
- assert ref.shape == actual.shape, (
- f"{label}: shape differs ref={ref.shape} actual={actual.shape}"
- )
- if ref.dtype.kind == "f":
- assert np.array_equal(ref, actual, equal_nan=True), (
- f"{label}: float pixels differ (NaN-aware)"
- )
- else:
- assert ref.tobytes() == actual.tobytes(), (
- f"{label}: integer pixel bytes differ"
- )
-
-
-def _assert_metadata_parity(
- ref: xr.DataArray,
- actual: xr.DataArray,
- *,
- label: str,
- expected_dims: tuple[str, ...],
-) -> None:
- """Fail if any of the parity-critical attrs / coords drift between two reads.
-
- The acceptance bar for this PR: the VRT path cannot pass by returning
- correct pixel values with wrong georeferencing attrs. Every field
- checked here is part of the VRT contract that downstream code relies
- on, so a backend that ships the right bytes with the wrong attrs
- still fails the cell.
- """
- # Dims and order.
- assert actual.dims == expected_dims, (
- f"{label}: dims {actual.dims!r} != expected {expected_dims!r}"
- )
- assert ref.dims == expected_dims, (
- f"{label}: ref dims {ref.dims!r} != expected {expected_dims!r}"
- )
-
- # Coord values + coord dtype per axis. A windowed read that decoded
- # the right pixels but shifted the coords inconsistently would
- # surface here, not in the pixel check above.
- for axis in expected_dims:
- if axis not in ref.coords:
- continue
- ref_c = _coord_view(ref, axis)
- actual_c = _coord_view(actual, axis)
- assert ref_c.dtype == actual_c.dtype, (
- f"{label}: coord {axis!r} dtype "
- f"ref={ref_c.dtype} actual={actual_c.dtype}"
- )
- assert ref_c.shape == actual_c.shape, (
- f"{label}: coord {axis!r} shape "
- f"ref={ref_c.shape} actual={actual_c.shape}"
- )
- assert ref_c.tobytes() == actual_c.tobytes(), (
- f"{label}: coord {axis!r} bytes differ "
- f"(ref[:3]={ref_c[:3].tolist()!r}, "
- f"actual[:3]={actual_c[:3].tolist()!r})"
- )
-
- # Transform tuple. ``rasterio.Affine`` (if used) compares equal to
- # a 6-tuple via ``__eq__`` so this works for both surface forms.
- ref_t = ref.attrs.get("transform")
- actual_t = actual.attrs.get("transform")
- assert ref_t == actual_t, (
- f"{label}: transform tuple differs "
- f"ref={ref_t!r} actual={actual_t!r}"
- )
-
- # CRS attrs. The contract: ``attrs['crs']`` carries the EPSG int when
- # one is recognised, ``attrs['crs_wkt']`` always carries the WKT.
- assert ref.attrs.get("crs") == actual.attrs.get("crs"), (
- f"{label}: attrs['crs'] differs "
- f"ref={ref.attrs.get('crs')!r} actual={actual.attrs.get('crs')!r}"
- )
- assert ref.attrs.get("crs_wkt") == actual.attrs.get("crs_wkt"), (
- f"{label}: crs_wkt differs"
- )
-
- # georef_status: lazy / eager / GPU all populate this from the same
- # helper (#2136 / #2162). A drift here means the dask graph builder
- # is using a different finalization path than the eager reader,
- # which is exactly the kind of regression this matrix should catch.
- assert ref.attrs.get("georef_status") == actual.attrs.get(
- "georef_status"
- ), (
- f"{label}: georef_status differs "
- f"ref={ref.attrs.get('georef_status')!r} "
- f"actual={actual.attrs.get('georef_status')!r}"
- )
-
-
-# ---------------------------------------------------------------------------
-# VRT fixture builders.
-# Each builder writes its files inside a fresh ``tmp_path`` and returns a
-# (vrt_path, expected_dtype) pair. The harness then calls open_geotiff +
-# read_vrt with the four backend cells and compares them.
-# ---------------------------------------------------------------------------
-
-def _build_two_tile_float32_vrt(tmp_path: Path) -> tuple[Path, np.dtype]:
- """Two 16x16 float32 tiles laid out side-by-side as a 16x32 mosaic.
-
- Differentiated values per tile so the windowed cells exercise both
- halves of the mosaic without colliding with the sidecar fixture.
- """
- tile_h, tile_w = 16, 16
- paths: list[str] = []
- for c in range(2):
- arr = np.full(
- (tile_h, tile_w), float(c + 1) * 1000.0, dtype=np.float32
- )
- # Sprinkle distinct values so a swap between tiles surfaces.
- arr[0, 0] = -7.0 + c
- arr[tile_h - 1, tile_w - 1] = 9000.0 + c
- origin_x = float(c * tile_w)
- da = xr.DataArray(
- arr, dims=["y", "x"],
- coords={
- "y": np.arange(tile_h - 1, -1, -1, dtype=np.float64),
- "x": np.arange(
- origin_x, origin_x + tile_w, dtype=np.float64),
- },
- attrs={"crs": 4326},
- )
- tile_path = tmp_path / f"tile_2321_{c}.tif"
- to_geotiff(da, str(tile_path), compression="none", tiled=False)
- paths.append(str(tile_path))
- vrt_path = tmp_path / "two_tile_2321_.vrt"
- _write_vrt_internal(str(vrt_path), paths, relative=False)
- return vrt_path, np.dtype("float32")
-
-
-def _build_sidecar_vrt(tmp_path: Path) -> tuple[Path, np.dtype]:
- """VRT over a copy of the bundled sidecar TIFF + its ``.ovr`` partner.
-
- Copying the pair into ``tmp_path`` keeps the original golden corpus
- file untouched and ensures the ``.ovr`` lookup is resolved at the
- VRT read site (not via a cached path on the original fixture).
- """
- src = _sidecar_fixture_or_skip()
- base = tmp_path / "sidecar_2321_.tif"
- shutil.copy(src, base)
- shutil.copy(str(src) + ".ovr", str(base) + ".ovr")
- vrt_path = tmp_path / "sidecar_2321_.vrt"
- _write_vrt_internal(str(vrt_path), [str(base)], relative=False)
- return vrt_path, np.dtype("uint16")
-
-
-def _build_inline_overview_vrt(tmp_path: Path) -> tuple[Path, np.dtype]:
- """VRT over a copy of the inline-overview fixture (no sidecar).
-
- Used as the comparison source for ``test_sidecar_vrt_attrs_match_inline``:
- both fixtures share their base IFD (same dtype, transform, CRS, and
- bytes at level 0), so the VRT contract requires that the eager read
- surfaces identical georef attrs regardless of where the pyramid
- physically lives.
- """
- src = _inline_overview_fixture_or_skip()
- base = tmp_path / "inline_2321_.tif"
- shutil.copy(src, base)
- vrt_path = tmp_path / "inline_2321_.vrt"
- _write_vrt_internal(str(vrt_path), [str(base)], relative=False)
- return vrt_path, np.dtype("uint16")
-
-
-# ---------------------------------------------------------------------------
-# Backend matrix: eager (numpy), dask+numpy.
-# GPU is intentionally omitted -- the VRT read path goes through the
-# CPU decoder regardless of ``gpu=True`` for the pieces under test here,
-# and ``read_vrt(gpu=True, chunks=...)`` already has dedicated coverage
-# in ``test_vrt_lazy_chunks_1814.py``.
-# ---------------------------------------------------------------------------
-
-@dataclass(frozen=True)
-class _BackendSpec:
- backend_id: str
- kwargs: dict[str, Any]
-
-
-_BACKENDS: tuple[_BackendSpec, ...] = (
- _BackendSpec(backend_id="eager", kwargs={}),
- _BackendSpec(backend_id="dask", kwargs={"chunks": (16, 16)}),
-)
-
-
-def _backend_params() -> list:
- return [pytest.param(b, id=b.backend_id) for b in _BACKENDS]
-
-
-# ---------------------------------------------------------------------------
-# Fixture matrix: each entry is one (builder, label, expected_dims, window).
-# The window column lets us reuse the same builder for the full-extent
-# and the windowed cells without doubling the fixture surface.
-# ---------------------------------------------------------------------------
-
-@dataclass(frozen=True)
-class _FixtureSpec:
- fix_id: str
- builder: Callable[[Path], tuple[Path, np.dtype]]
- expected_dims: tuple[str, ...]
- # Window passed to open_geotiff / read_vrt; None means full extent.
- window: tuple[int, int, int, int] | None
-
-
-# ``fix_id`` is unique per (builder, window); the ``vrt_fixture`` resolver
-# below caches one on-disk layout per *builder*, so two specs that share
-# a builder (e.g. the full-extent and windowed cells over the same VRT)
-# reuse a single set of source TIFFs and a single ``.vrt`` file.
-_FIXTURES: tuple[_FixtureSpec, ...] = (
- _FixtureSpec(
- fix_id="two-tile-float32-full",
- builder=_build_two_tile_float32_vrt,
- expected_dims=("y", "x"),
- window=None,
- ),
- _FixtureSpec(
- # The windowed cell straddles the seam between the two tiles
- # (col 8..24 spans tile 0's right half + tile 1's left half).
- # That makes the dask path actually read both backing sources,
- # not just one, so a windowed dask graph that only re-reads the
- # first source would surface here.
- fix_id="two-tile-float32-window-spans-seam",
- builder=_build_two_tile_float32_vrt,
- expected_dims=("y", "x"),
- window=(4, 8, 12, 24),
- ),
- _FixtureSpec(
- fix_id="sidecar-uint16-full",
- builder=_build_sidecar_vrt,
- expected_dims=("y", "x"),
- window=None,
- ),
- _FixtureSpec(
- fix_id="sidecar-uint16-window",
- builder=_build_sidecar_vrt,
- expected_dims=("y", "x"),
- window=(8, 8, 56, 56),
- ),
-)
-
-
-def _fixture_params() -> list:
- return [pytest.param(f, id=f.fix_id) for f in _FIXTURES]
-
-
-# ---------------------------------------------------------------------------
-# Cached fixture builds: one VRT layout per fix_id per session.
-# ---------------------------------------------------------------------------
-
-@pytest.fixture(scope="session")
-def _vrt_parity_dir(tmp_path_factory):
- return tmp_path_factory.mktemp("vrt_parity_2321_")
-
-
-@pytest.fixture(scope="session")
-def _vrt_parity_cache() -> dict[str, tuple[Path, np.dtype]]:
- """Session-scoped (path, dtype) cache shared across every cell.
-
- The cache must outlive a single test function. A function-scoped
- cache would be reset between cells, causing every cell to rebuild
- the same VRT and its source TIFFs. On POSIX a rebuild is just
- inefficient; on Windows it surfaces as PermissionError / OSError
- because ``to_geotiff`` writes through a ``.tmp`` file and then
- renames over the existing target while another cell may still
- hold the previous file mapped (issue surfaced in CI on
- ``windows-latest`` for #2330).
- """
- return {}
-
-
-@pytest.fixture
-def vrt_fixture(_vrt_parity_dir, _vrt_parity_cache):
- """Resolve a :class:`_FixtureSpec` to a (vrt_path, dtype) pair on disk.
-
- Each builder gets its own subdirectory so the on-disk layout (vrt +
- sources + any sidecar) is isolated from neighbouring builders. Builds
- are cached at session scope so the four cells that share a builder
- (e.g. full-extent + windowed over the same VRT) reuse one set of
- source TIFFs and one ``.vrt`` file.
- """
- base = _vrt_parity_dir
- cache = _vrt_parity_cache
-
- def _resolve(spec: _FixtureSpec) -> tuple[Path, np.dtype]:
- # The fix_id encodes both the builder and the window; collapse to
- # the builder so we do not rebuild identical layouts.
- key = spec.builder.__name__
- if key in cache:
- return cache[key]
- sub = base / key
- sub.mkdir(exist_ok=True)
- result = spec.builder(sub)
- cache[key] = result
- return result
- return _resolve
-
-
-# ---------------------------------------------------------------------------
-# Tests
-# ---------------------------------------------------------------------------
-
-@pytest.mark.parametrize("spec", _fixture_params())
-@pytest.mark.parametrize("backend", _backend_params())
-def test_vrt_backend_parity(spec, backend, vrt_fixture):
- """One cell per (fixture, backend). Asserts pixels + metadata parity.
-
- Reference is always the eager numpy read with the same window kwarg.
- The cell compares the current backend's output against that
- reference. Eager-vs-eager is the identity case and locks in the
- parity-helper contract.
- """
- vrt_path, expected_dtype = vrt_fixture(spec)
-
- open_kwargs: dict[str, Any] = {}
- if spec.window is not None:
- open_kwargs["window"] = spec.window
-
- ref = open_geotiff(str(vrt_path), **open_kwargs)
-
- actual = open_geotiff(
- str(vrt_path), **open_kwargs, **backend.kwargs,
- )
-
- label = (
- f"fixture={spec.fix_id} backend={backend.backend_id} "
- f"window={spec.window!r}"
- )
-
- ref_arr = _materialise(ref)
- actual_arr = _materialise(actual)
-
- # Dtype against the explicit spec, not just against the reference.
- # A silent upcast that the reference also exhibits would still fail
- # here (the spec dtype is the contract).
- assert ref_arr.dtype == expected_dtype, (
- f"{label}: reference dtype {ref_arr.dtype} != "
- f"expected {expected_dtype}"
- )
- assert actual_arr.dtype == expected_dtype, (
- f"{label}: actual dtype {actual_arr.dtype} != "
- f"expected {expected_dtype}"
- )
-
- _assert_pixels_equal(ref_arr, actual_arr, label=label)
- _assert_metadata_parity(
- ref, actual, label=label, expected_dims=spec.expected_dims,
- )
-
-
-# ---------------------------------------------------------------------------
-# Cross-fixture parity: sidecar pyramid vs inline pyramid.
-# Both backing TIFFs share their base IFD bytes (same uint16 raster, same
-# transform, same CRS), so a VRT wrapping each must report identical
-# georef attrs at level 0. The check guards against the sidecar lookup
-# accidentally rewriting (or dropping) any of the contract-named attrs.
-# ---------------------------------------------------------------------------
-
-@pytest.mark.parametrize("backend", _backend_params())
-def test_sidecar_vrt_attrs_match_inline(backend, tmp_path):
- """Sidecar-backed VRT and inline-overview-backed VRT report identical
- georef attrs and pixels at the base level.
-
- Acceptance criterion straight from the parent issue: the sidecar
- ``.ovr`` lookup must produce the same georef status and CRS attrs
- as an inline-overview source. The check runs on each backend so a
- drift introduced only on the dask path still surfaces.
- """
- side_sub = tmp_path / "sidecar"
- inline_sub = tmp_path / "inline"
- side_sub.mkdir()
- inline_sub.mkdir()
- side_vrt, side_dtype = _build_sidecar_vrt(side_sub)
- inline_vrt, inline_dtype = _build_inline_overview_vrt(inline_sub)
-
- assert side_dtype == inline_dtype, (
- f"sidecar dtype {side_dtype} != inline dtype {inline_dtype}; "
- f"the golden_corpus fixtures should share a base IFD"
- )
-
- side = open_geotiff(str(side_vrt), **backend.kwargs)
- inline = open_geotiff(str(inline_vrt), **backend.kwargs)
-
- label = (
- f"sidecar-vs-inline backend={backend.backend_id}"
- )
-
- # Shape parity is the precondition for the pixel comparison.
- assert side.shape == inline.shape, (
- f"{label}: shape differs side={side.shape} inline={inline.shape}"
- )
-
- # Pixel parity at the base level. Both fixtures share their level-0
- # bytes (the sidecar only adds an external pyramid), so the read-back
- # arrays should match byte-for-byte.
- _assert_pixels_equal(
- _materialise(inline), _materialise(side), label=label,
- )
-
- # Metadata parity: the read paths must surface identical georef
- # attrs across the two physical layouts.
- _assert_metadata_parity(
- inline, side, label=label, expected_dims=("y", "x"),
- )
-
-
-# ---------------------------------------------------------------------------
-# Windowed-coord shift parity: an eager windowed read and a chunked
-# windowed read of the same VRT must report the same shifted coords
-# AND the same shifted transform. Pixel equality alone is not enough --
-# we want to catch the regression where the dask graph computes correct
-# pixels but the assembled DataArray keeps the full-extent coords or
-# transform.
-# ---------------------------------------------------------------------------
-
-def test_windowed_vrt_shifts_coords_and_transform_consistently(tmp_path):
- """Eager and lazy windowed VRT reads agree on shape, coords, attrs,
- and values.
-
- Per the parent issue's acceptance criterion. The cell is split out
- from the parametrised matrix above so a coord/transform drift on
- the dask path produces a single, named failure rather than a
- matrix-wide flag.
- """
- vrt_path, _ = _build_two_tile_float32_vrt(tmp_path)
- # Window deliberately straddles the tile seam (col 16) and trims
- # the y-axis on both ends, so both axes get shifted.
- window = (3, 5, 13, 27)
-
- eager = open_geotiff(str(vrt_path), window=window)
- lazy = open_geotiff(str(vrt_path), window=window, chunks=(5, 11))
-
- # Shape parity (precondition).
- assert eager.shape == (10, 22)
- assert lazy.shape == (10, 22)
-
- # Coord shift: the eager read's y/x arrays should match the lazy
- # read's exactly (same shape, same dtype, same bytes).
- np.testing.assert_array_equal(eager["y"].values, lazy["y"].values)
- np.testing.assert_array_equal(eager["x"].values, lazy["x"].values)
- assert eager["y"].dtype == lazy["y"].dtype
- assert eager["x"].dtype == lazy["x"].dtype
-
- # The window cuts the leading 3 rows and the leading 5 columns of
- # the full-extent grid (which goes from y=15..0 and x=0..31), so
- # the windowed first y is 12.0 and the windowed first x is 5.0.
- # The check pins the absolute shift, not just the eager/lazy
- # equality, so a regression that drifts BOTH backends the same
- # way still surfaces.
- assert eager["y"].values[0] == 12.0
- assert eager["x"].values[0] == 5.0
-
- # Transform must shift consistently: the rasterio 6-tuple's c
- # (origin_x) and f (origin_y) entries should reflect the window
- # offset, while pixel sizes (a, e) stay constant.
- eager_t = eager.attrs.get("transform")
- lazy_t = lazy.attrs.get("transform")
- assert eager_t == lazy_t, (
- f"transform differs eager={eager_t!r} lazy={lazy_t!r}"
- )
- # Pixel size unchanged by the window.
- assert eager_t[0] == 1.0 and eager_t[4] == -1.0, (
- f"pixel size mismatch in windowed transform {eager_t!r}"
- )
-
- # Pixel parity.
- np.testing.assert_array_equal(eager.values, lazy.compute().values)
-
- # CRS attrs parity.
- assert eager.attrs.get("crs") == lazy.attrs.get("crs")
- assert eager.attrs.get("crs_wkt") == lazy.attrs.get("crs_wkt")
- assert eager.attrs.get("georef_status") == lazy.attrs.get(
- "georef_status"
- )
-
-
-# ---------------------------------------------------------------------------
-# Absolute-shift parity for the sidecar windowed cell. The parametrised
-# matrix only checks eager-vs-dask equality; pin the actual shifted
-# coords and transform here so a regression that drifts BOTH backends
-# the same way still surfaces. The bundled sidecar fixture has a known
-# pixel size of 0.001 and origin (-120.0, 45.0).
-# ---------------------------------------------------------------------------
-
-def test_sidecar_window_shifts_to_known_coords(tmp_path):
- """The sidecar VRT, read with ``window=(8, 8, 56, 56)``, should land
- on the same coords / transform an absolute calculation would predict.
-
- The bundled fixture is 64x64 at pixel size 0.001 with origin
- (-120.0, 45.0). Trimming rows 8..56 / cols 8..56 yields a 48x48
- window whose x-coord array starts at -120.0 + 8 * 0.001 + half-pixel
- centre offset, and whose transform's c/f entries shift by the same
- 8-pixel offsets.
- """
- vrt_path, _ = _build_sidecar_vrt(tmp_path)
- window = (8, 8, 56, 56)
-
- eager = open_geotiff(str(vrt_path), window=window)
-
- assert eager.shape == (48, 48)
- # Pixel size column (a, e) of the rasterio 6-tuple stays constant.
- t = eager.attrs.get("transform")
- assert t is not None, "windowed sidecar VRT dropped attrs['transform']"
- assert t[0] == pytest.approx(0.001)
- assert t[4] == pytest.approx(-0.001)
- # Origin shifts by 8 pixels: c += 8 * a, f += 8 * e.
- # Full-extent origin is c=-120.0, f=45.0.
- assert t[2] == pytest.approx(-120.0 + 8 * 0.001)
- assert t[5] == pytest.approx(45.0 + 8 * -0.001)
-
-
-# ---------------------------------------------------------------------------
-# Sanity check: the matrix harness itself flags a metadata regression.
-# ---------------------------------------------------------------------------
-
-def test_assert_metadata_parity_flags_transform_drift(tmp_path):
- """Locks the harness behaviour: a transform-only drift between two
- otherwise-identical DataArrays fails the parity helper.
-
- Without this, a regression that silently dropped the transform check
- inside ``_assert_metadata_parity`` would let the rest of the matrix
- pass with empty assertions.
- """
- vrt_path, _ = _build_two_tile_float32_vrt(tmp_path)
- da_ref = open_geotiff(str(vrt_path))
- da_bad = da_ref.copy()
- da_bad.attrs = dict(da_ref.attrs)
- # Mutate the transform's origin_x. The pixels and coords remain
- # untouched; only the attr drifts.
- old_t = da_bad.attrs["transform"]
- da_bad.attrs["transform"] = (
- old_t[0], old_t[1], old_t[2] + 1.0,
- old_t[3], old_t[4], old_t[5],
- )
- with pytest.raises(AssertionError, match="transform"):
- _assert_metadata_parity(
- da_ref, da_bad, label="harness-sanity",
- expected_dims=("y", "x"),
- )
diff --git a/xrspatial/geotiff/tests/test_vrt_chunked_missing_raise_at_build_2265.py b/xrspatial/geotiff/tests/test_vrt_chunked_missing_raise_at_build_2265.py
deleted file mode 100644
index 2be7946bf..000000000
--- a/xrspatial/geotiff/tests/test_vrt_chunked_missing_raise_at_build_2265.py
+++ /dev/null
@@ -1,325 +0,0 @@
-"""Issue #2265: chunked VRT ``missing_sources='raise'`` must raise at build.
-
-The public docstring on ``read_vrt`` says ``missing_sources='raise'`` (the
-public default since #1860) "fails immediately on an unreadable backing
-source so a partial mosaic never surfaces silently". Before #2265 the
-chunked path only honoured that contract at compute time: it ran a
-static ``os.path.exists`` sweep at build, recorded misses into
-``attrs['vrt_holes']``, and only the per-chunk delayed decode raised --
-which meant a windowed downstream slice past the bad tile could ship a
-partial mosaic silently. This module pins the "raise at build" behaviour
-and the related scoping invariants:
-
-* a missing source intersecting the requested window raises at build,
-* a missing source outside the requested window does not raise,
-* a missing source on a band the caller did not select does not raise,
-* ``XRSPATIAL_GEOTIFF_STRICT=1`` forces the raise regardless of kwarg,
-* ``missing_sources='warn'`` keeps the existing record-and-warn path.
-"""
-from __future__ import annotations
-
-import os
-import warnings
-
-import numpy as np
-import pytest
-import xarray as xr
-
-from xrspatial.geotiff import GeoTIFFFallbackWarning, read_vrt, to_geotiff
-
-
-def _write_present_source(tmp_path: str, name: str, fill: float) -> str:
- """Write a 4x4 float32 GeoTIFF source for use in a multi-source VRT."""
- src = os.path.join(tmp_path, name)
- arr = np.full((4, 4), fill, dtype=np.float32)
- da = xr.DataArray(
- arr, dims=("y", "x"),
- attrs={"transform": (1.0, 0.0, 0.0, 0.0, -1.0, 0.0)},
- )
- to_geotiff(da, src)
- return src
-
-
-def _make_horizontal_partial_vrt(tmp_path: str) -> str:
- """2-source VRT: present source on the left, missing source on the right.
-
- Layout (rows x cols = 4 x 8):
- ``[ present | missing ]``. Used for the basic
- ``raise at build`` and window-scoping assertions.
- """
- src = _write_present_source(tmp_path, "src_2265_h_present.tif", 7.0)
- missing = os.path.join(tmp_path, "missing_2265_h.tif")
- vrt_path = os.path.join(tmp_path, "partial_2265_h.vrt")
- with open(vrt_path, "w") as f:
- f.write(
- f'\n'
- '0.0, 1.0, 0.0, 0.0, 0.0, -1.0\n'
- '\n'
- '\n'
- f'{src}\n'
- '1\n'
- '\n'
- '\n'
- '\n'
- '\n'
- f'{missing}\n'
- '1\n'
- '\n'
- '\n'
- '\n'
- '\n'
- '\n'
- )
- return vrt_path
-
-
-def _make_multiband_partial_vrt(tmp_path: str) -> str:
- """2-band VRT where band 1 has a missing source and band 2 is intact.
-
- Both bands cover the full 4x4 extent with one source each. A
- ``band=1`` (0-based, the second band) read should not raise because
- the per-chunk decode never touches band 1's missing source. Reading
- without a band restriction or with ``band=0`` should raise.
- """
- src_b1 = _write_present_source(tmp_path, "src_2265_mb_b1.tif", 11.0)
- src_b2 = _write_present_source(tmp_path, "src_2265_mb_b2.tif", 22.0)
- missing_b1 = os.path.join(tmp_path, "missing_2265_mb_b1.tif")
- vrt_path = os.path.join(tmp_path, "partial_2265_multiband.vrt")
- with open(vrt_path, "w") as f:
- f.write(
- f'\n'
- '0.0, 1.0, 0.0, 0.0, 0.0, -1.0\n'
- # Band 1: one present source + one missing source covering
- # the same extent. The missing source intersects every
- # chunk window so the build must raise when band 1 is in
- # scope.
- '\n'
- '\n'
- f'{src_b1}\n'
- '1\n'
- '\n'
- '\n'
- '\n'
- '\n'
- f'{missing_b1}\n'
- '1\n'
- '\n'
- '\n'
- '\n'
- '\n'
- # Band 2: a single present source. ``band=1`` (0-based) on
- # the chunked read should pick this band only and skip
- # band 1's missing source.
- '\n'
- '\n'
- f'{src_b2}\n'
- '1\n'
- '\n'
- '\n'
- '\n'
- '\n'
- '\n'
- )
- return vrt_path
-
-
-class TestRaiseAtBuild:
- """``missing_sources='raise'`` raises during construction, not compute."""
-
- def test_build_raises_immediately(self, tmp_path):
- vrt_path = _make_horizontal_partial_vrt(str(tmp_path))
- with pytest.raises(FileNotFoundError, match="missing_2265_h"):
- read_vrt(vrt_path, chunks=4, missing_sources="raise")
-
- def test_default_raises_at_build(self, tmp_path):
- """The public default is ``'raise'`` so dropping the kwarg
- must hit the same fast-fail path."""
- vrt_path = _make_horizontal_partial_vrt(str(tmp_path))
- with pytest.raises(FileNotFoundError):
- read_vrt(vrt_path, chunks=4)
-
- def test_error_message_mentions_opt_in(self, tmp_path):
- """The exception text should tell the caller how to opt into
- the lenient path. A regression that drops this guidance would
- leave callers debugging a bare ``FileNotFoundError`` without
- knowing the kwarg toggle exists."""
- vrt_path = _make_horizontal_partial_vrt(str(tmp_path))
- with pytest.raises(FileNotFoundError) as excinfo:
- read_vrt(vrt_path, chunks=4, missing_sources="raise")
- msg = str(excinfo.value)
- assert "missing_sources='warn'" in msg
- assert "partial mosaic" in msg
-
-
-class TestWindowScoping:
- """The raise honours the requested window."""
-
- def test_window_past_missing_does_not_raise(self, tmp_path):
- """A window that touches only the present source still builds
- and computes. Without this scoping the static raise would be
- overzealous compared to the eager path (which decodes only
- sources that intersect the window)."""
- vrt_path = _make_horizontal_partial_vrt(str(tmp_path))
- result = read_vrt(
- vrt_path, chunks=4, window=(0, 0, 4, 4),
- missing_sources="raise",
- )
- computed = result.compute()
- np.testing.assert_array_equal(
- np.asarray(computed), np.full((4, 4), 7.0, dtype=np.float32),
- )
-
- def test_window_intersecting_missing_raises(self, tmp_path):
- """A window that overlaps the missing tile still raises at build."""
- vrt_path = _make_horizontal_partial_vrt(str(tmp_path))
- with pytest.raises(FileNotFoundError):
- read_vrt(
- vrt_path, chunks=4, window=(0, 4, 4, 8),
- missing_sources="raise",
- )
-
-
-class TestBandScoping:
- """The raise honours ``band=`` restriction."""
-
- def test_band_select_skips_other_bands_missing_source(self, tmp_path):
- """``band=1`` reads band 2 only; band 1's missing source is
- irrelevant to the graph, so the build must not raise."""
- vrt_path = _make_multiband_partial_vrt(str(tmp_path))
- result = read_vrt(
- vrt_path, chunks=4, band=1, missing_sources="raise",
- )
- computed = result.compute()
- np.testing.assert_array_equal(
- np.asarray(computed), np.full((4, 4), 22.0, dtype=np.float32),
- )
-
- def test_band_select_on_missing_band_raises(self, tmp_path):
- """``band=0`` selects the band with the missing source so the
- build must raise (mirror of the unselected-band test above)."""
- vrt_path = _make_multiband_partial_vrt(str(tmp_path))
- with pytest.raises(FileNotFoundError):
- read_vrt(
- vrt_path, chunks=4, band=0, missing_sources="raise",
- )
-
- def test_no_band_restriction_raises(self, tmp_path):
- """Without a ``band=`` restriction, both bands' sources are in
- scope and the missing source on band 1 raises at build."""
- vrt_path = _make_multiband_partial_vrt(str(tmp_path))
- with pytest.raises(FileNotFoundError):
- read_vrt(vrt_path, chunks=4, missing_sources="raise")
-
-
-class TestWarnPreserved:
- """``missing_sources='warn'`` keeps the record-and-warn behaviour."""
-
- def test_warn_records_holes_at_build(self, tmp_path):
- """The lenient path must not regress to a build-time raise."""
- vrt_path = _make_horizontal_partial_vrt(str(tmp_path))
- result = read_vrt(vrt_path, chunks=4, missing_sources="warn")
- assert "vrt_holes" in result.attrs
- assert len(result.attrs["vrt_holes"]) == 1
- assert result.attrs["vrt_holes"][0]["source"].endswith(
- "missing_2265_h.tif"
- )
-
- def test_warn_compute_emits_per_task_warning(self, tmp_path):
- """The compute step still warns per task on the lenient path."""
- vrt_path = _make_horizontal_partial_vrt(str(tmp_path))
- with warnings.catch_warnings(record=True) as caught:
- warnings.simplefilter("always")
- result = read_vrt(vrt_path, chunks=4, missing_sources="warn")
- computed = result.compute()
- messages = [str(w.message) for w in caught
- if isinstance(w.message, GeoTIFFFallbackWarning)]
- assert any("missing_2265_h" in msg for msg in messages)
- # Present side decodes to 7.0; missing side decodes to NaN.
- np.testing.assert_array_equal(
- np.asarray(computed)[:, :4],
- np.full((4, 4), 7.0, dtype=np.float32),
- )
- assert np.all(np.isnan(np.asarray(computed)[:, 4:]))
-
-
-def _make_multi_missing_vrt(tmp_path: str, n_missing: int) -> str:
- """VRT with ``n_missing`` missing sources tiling the destination.
-
- Each missing source covers a distinct 4x4 dst block laid out
- horizontally; the VRT's full extent is sized to hold all of them.
- Used to pin the multi-source preview behavior of the build-time
- raise message.
- """
- vrt_path = os.path.join(tmp_path, f"partial_2265_multi_{n_missing}.vrt")
- width = 4 * n_missing
- src_xml = []
- for i in range(n_missing):
- missing = os.path.join(tmp_path, f"missing_2265_multi_{i}.tif")
- src_xml.append(
- '\n'
- f'{missing}\n'
- '1\n'
- '\n'
- f'\n'
- '\n'
- )
- with open(vrt_path, "w") as f:
- f.write(
- f'\n'
- '0.0, 1.0, 0.0, 0.0, 0.0, -1.0\n'
- '\n'
- + ''.join(src_xml) +
- '\n'
- '\n'
- )
- return vrt_path
-
-
-class TestMultipleMissingSources:
- """The error message previews multiple holes and reports the total."""
-
- def test_two_missing_sources_listed_with_count(self, tmp_path):
- """All missing sources fit in the preview (n=2 <= preview cap)."""
- vrt_path = _make_multi_missing_vrt(str(tmp_path), n_missing=2)
- with pytest.raises(FileNotFoundError) as excinfo:
- read_vrt(vrt_path, chunks=4, missing_sources="raise")
- msg = str(excinfo.value)
- assert "missing_2265_multi_0" in msg
- assert "missing_2265_multi_1" in msg
- assert "2 missing source(s) total" in msg
- # Preview cap kicks in only above 3 holes; no "and N more" tail
- # should appear for n_missing=2.
- assert "more" not in msg.lower() or "and 0 more" not in msg
-
- def test_many_missing_sources_truncated_with_more_suffix(self, tmp_path):
- """Above the preview cap, the message says 'and N more'."""
- n = 5
- vrt_path = _make_multi_missing_vrt(str(tmp_path), n_missing=n)
- with pytest.raises(FileNotFoundError) as excinfo:
- read_vrt(vrt_path, chunks=4, missing_sources="raise")
- msg = str(excinfo.value)
- # First few names are listed; the rest collapse into "and N more".
- assert "missing_2265_multi_0" in msg
- # The last source should NOT be in the preview (it's past the cap).
- assert f"missing_2265_multi_{n - 1}" not in msg
- # Total count is reported regardless of truncation.
- assert f"{n} missing source(s) total" in msg
- # The truncation tail names how many more there are.
- assert "and 2 more" in msg
-
-
-class TestStrictMode:
- """``XRSPATIAL_GEOTIFF_STRICT=1`` forces the raise even with ``'warn'``."""
-
- def test_strict_overrides_warn_kwarg(self, tmp_path, monkeypatch):
- monkeypatch.setenv("XRSPATIAL_GEOTIFF_STRICT", "1")
- vrt_path = _make_horizontal_partial_vrt(str(tmp_path))
- with pytest.raises(FileNotFoundError):
- read_vrt(vrt_path, chunks=4, missing_sources="warn")
-
- def test_strict_off_warn_still_warns(self, tmp_path, monkeypatch):
- """Sanity: without strict mode, ``'warn'`` keeps warning."""
- monkeypatch.delenv("XRSPATIAL_GEOTIFF_STRICT", raising=False)
- vrt_path = _make_horizontal_partial_vrt(str(tmp_path))
- result = read_vrt(vrt_path, chunks=4, missing_sources="warn")
- assert "vrt_holes" in result.attrs
diff --git a/xrspatial/geotiff/tests/test_vrt_chunked_missing_sources_1799.py b/xrspatial/geotiff/tests/test_vrt_chunked_missing_sources_1799.py
deleted file mode 100644
index eaa1e347b..000000000
--- a/xrspatial/geotiff/tests/test_vrt_chunked_missing_sources_1799.py
+++ /dev/null
@@ -1,251 +0,0 @@
-"""Chunked-VRT coverage for ``missing_sources`` (issue #1799).
-
-``test_vrt_missing_sources_policy_1799`` covers the eager (non-chunked)
-``read_vrt`` path. The chunked path (``read_vrt(chunks=N)``, dispatching
-through ``_read_vrt_chunked``) plumbs ``missing_sources`` separately:
-
-* Parse-time approximation: a static ``os.path.exists`` sweep over every
- source populates ``attrs['vrt_holes']`` on the returned DataArray
- before any decode work starts (docstring in ``_backends/vrt.py:344``).
-* Decode-time: each per-chunk task receives ``missing_sources`` and the
- internal reader applies the same warn/raise policy as the eager path.
-
-A regression dropping either the parse-time sweep or the per-chunk
-forward would silently change the contract:
-
-* ``vrt_holes`` would disappear from the lazy build, breaking callers
- that branch on ``"vrt_holes" in da.attrs`` to detect partial mosaics
- before scheduling a compute (the contract documented in #1734).
-* ``missing_sources='raise'`` could silently degrade to ``'warn'`` (or
- vice versa) on the chunked path while the eager path stays correct.
-
-This module pins both invariants. Tests use a 2-source mosaic where one
-source is missing on disk; the present source covers one chunk window
-and the missing source covers another, so the warn/raise policy is
-exercised against a non-trivial graph.
-"""
-from __future__ import annotations
-
-import os
-import warnings
-
-import numpy as np
-import pytest
-import xarray as xr
-
-from xrspatial.geotiff import GeoTIFFFallbackWarning, read_vrt, to_geotiff
-
-
-def _make_partial_vrt(tmp_path) -> tuple[str, str]:
- """Build a 2-source VRT with one present + one missing source.
-
- Returns ``(vrt_path, present_src_path)``. The VRT references the
- present source for the left half and a non-existent file for the
- right half, so chunked reads against the right half hit the
- missing-source decode path.
- """
- src = os.path.join(tmp_path, "src_present.tif")
- arr = np.full((4, 4), 7.0, dtype=np.float32)
- da = xr.DataArray(
- arr, dims=("y", "x"),
- attrs={"transform": (1.0, 0.0, 0.0, 0.0, -1.0, 0.0)},
- )
- to_geotiff(da, src)
-
- missing = os.path.join(tmp_path, "missing.tif")
- vrt_path = os.path.join(tmp_path, "partial.vrt")
- with open(vrt_path, "w") as f:
- f.write(
- f'\n'
- '0.0, 1.0, 0.0, 0.0, 0.0, -1.0\n'
- '\n'
- '\n'
- f'{src}\n'
- '1\n'
- '\n'
- '\n'
- '\n'
- '\n'
- f'{missing}\n'
- '1\n'
- '\n'
- '\n'
- '\n'
- '\n'
- '\n'
- )
- return vrt_path, src
-
-
-class TestChunkedMissingSourcesWarn:
- """``read_vrt(chunks=N, missing_sources='warn')`` records holes at build.
-
- The eager path scans every source at decode time. The chunked path
- cannot afford that sweep up front (it would defeat the lazy graph),
- so it uses ``os.path.exists`` to populate ``vrt_holes`` at build
- time. The compute step still emits per-task warnings for any
- missing source that survives.
- """
-
- def test_vrt_holes_populated_at_build(self, tmp_path):
- vrt_path, _ = _make_partial_vrt(str(tmp_path))
- result = read_vrt(vrt_path, chunks=4, missing_sources="warn")
- assert "vrt_holes" in result.attrs, (
- "Chunked path must populate vrt_holes at build time so "
- "callers can detect partial mosaics without forcing a "
- "compute (issue #1734)."
- )
- holes = result.attrs["vrt_holes"]
- assert len(holes) == 1
- # Pin the full record schema (see ``_backends/vrt.py:608``) so a
- # regression in either path that drops or renames a key is
- # caught here.
- assert set(holes[0].keys()) == {"source", "band", "dst_rect", "error"}
- assert holes[0]["source"].endswith("missing.tif")
- assert holes[0]["band"] == 1
- assert holes[0]["dst_rect"] == (4, 0, 4, 4)
-
- def test_compute_emits_per_task_warning(self, tmp_path):
- vrt_path, _ = _make_partial_vrt(str(tmp_path))
- with warnings.catch_warnings(record=True) as caught:
- warnings.simplefilter("always")
- result = read_vrt(vrt_path, chunks=4, missing_sources="warn")
- computed = result.compute()
- messages = [str(w.message) for w in caught
- if isinstance(w.message, GeoTIFFFallbackWarning)]
- assert any("missing.tif" in msg for msg in messages), (
- f"Expected GeoTIFFFallbackWarning naming the missing "
- f"source after compute, got messages: {messages!r}"
- )
- # Present-source chunk decodes its 7.0 fill; missing-source
- # chunk decodes to NaN under the lenient policy on float32.
- # Pin both halves so a regression in the lenient path that
- # wiped the present side or changed the missing-side fill would
- # surface.
- np.testing.assert_array_equal(
- np.asarray(computed)[:, :4], np.full((4, 4), 7.0, dtype=np.float32),
- )
- assert np.all(np.isnan(np.asarray(computed)[:, 4:]))
-
- def test_chunks_tuple_form(self, tmp_path):
- """Tuple ``chunks=(h, w)`` threads through identically."""
- vrt_path, _ = _make_partial_vrt(str(tmp_path))
- result = read_vrt(
- vrt_path, chunks=(2, 4), missing_sources="warn",
- )
- assert "vrt_holes" in result.attrs
- # 2 chunks vertically * 2 chunks horizontally = 4 tasks.
- # The missing source is in column 1 (cols 4-7); only the right
- # half should produce warning records, but vrt_holes is a
- # parse-time sweep so it records the source once regardless.
- assert len(result.attrs["vrt_holes"]) == 1
-
-
-class TestChunkedMissingSourcesRaise:
- """``read_vrt(chunks=N, missing_sources='raise')`` fails at build.
-
- The docstring on ``read_vrt`` promises that the default
- ``'raise'`` "fails immediately on an unreadable backing source so a
- partial mosaic never surfaces silently". Issue #2265 closes the
- chunked-path gap: the static ``os.path.exists`` sweep that already
- runs to populate ``vrt_holes`` now also raises up front when the
- policy is ``'raise'`` and the sweep finds any hole intersecting the
- requested window. Without this guard the build would succeed and
- only ``result.compute()`` on a hole-touching chunk would raise, so
- a downstream pipeline that windowed past the bad tile could ship a
- partial mosaic silently.
- """
-
- def test_build_raises_immediately(self, tmp_path):
- vrt_path, _ = _make_partial_vrt(str(tmp_path))
- with pytest.raises(FileNotFoundError, match="missing.tif"):
- read_vrt(vrt_path, chunks=4, missing_sources="raise")
-
- def test_build_raise_message_mentions_policy_kwarg(self, tmp_path):
- """The raise tells the caller how to opt into the lenient path.
-
- Lock in the kwarg-naming guidance in the error string so a
- future refactor that drops or renames the suggestion regresses
- the user-facing message rather than silently churning it.
- """
- vrt_path, _ = _make_partial_vrt(str(tmp_path))
- with pytest.raises(FileNotFoundError) as excinfo:
- read_vrt(vrt_path, chunks=4, missing_sources="raise")
- assert "missing_sources='warn'" in str(excinfo.value)
-
- def test_window_past_missing_succeeds_under_raise(self, tmp_path):
- """A window that does not touch a missing source still builds.
-
- The static sweep is scoped to the windowed extent. If the
- window covers only present sources, the chunked graph has
- nothing to raise about and ``compute()`` returns the present
- tile. This preserves the contract that ``missing_sources``
- only fires when the requested region actually depends on a
- missing source.
- """
- vrt_path, _ = _make_partial_vrt(str(tmp_path))
- # Window covers only the present source (cols 0-4).
- result = read_vrt(
- vrt_path, chunks=4, window=(0, 0, 4, 4),
- missing_sources="raise",
- )
- computed = result.compute()
- np.testing.assert_array_equal(
- np.asarray(computed), np.full((4, 4), 7.0, dtype=np.float32),
- )
-
- def test_band_selection_skips_other_bands_holes(self, tmp_path):
- """A ``band=`` restriction scopes the static raise to that band.
-
- Mirrors the eager path: only sources on the selected band get
- decoded, so a missing source on an unselected band should not
- block the build. The partial VRT in this module is single-band
- so the only way to exercise this is to confirm that the
- single-band default still raises (sanity gate) -- the
- cross-band gating is covered indirectly by the broader VRT
- test matrix.
- """
- vrt_path, _ = _make_partial_vrt(str(tmp_path))
- # Selecting band 0 (the only band) still touches the missing
- # source so the build raises. The negative case (a missing
- # source on a different band than the selected one) is hard to
- # build without a multi-band VRT helper; the band_num gate in
- # ``_read_vrt_chunked`` is exercised by the standalone test
- # ``test_chunked_band_selection_skips_other_bands_holes`` below.
- with pytest.raises(FileNotFoundError):
- read_vrt(
- vrt_path, chunks=4, band=0, missing_sources="raise",
- )
-
-
-class TestChunkedMissingSourcesDefault:
- """The default ``missing_sources`` on chunked reads is ``'raise'``.
-
- The public ``read_vrt`` default flipped to ``'raise'`` in #1843 /
- #1860 and the chunked path now honours it at build time (#2265).
- A regression flipping the chunked default to ``'warn'`` would
- silently produce partial mosaics for callers who don't pass the
- kwarg.
- """
-
- def test_chunked_default_raises_at_build(self, tmp_path):
- vrt_path, _ = _make_partial_vrt(str(tmp_path))
- with pytest.raises(FileNotFoundError, match="missing.tif"):
- read_vrt(vrt_path, chunks=4)
-
-
-class TestChunkedMissingSourcesValidation:
- """Invalid ``missing_sources`` policies are rejected at entry."""
-
- def test_invalid_policy_raises_at_build(self, tmp_path):
- vrt_path, _ = _make_partial_vrt(str(tmp_path))
- with pytest.raises(ValueError, match="missing_sources"):
- read_vrt(vrt_path, chunks=4, missing_sources="ignore")
-
- def test_invalid_policy_raises_without_chunks_too(self, tmp_path):
- """Sanity: the eager path also rejects the bad value. Pinning
- cross-mode parity means callers see the same error whether or
- not they pass ``chunks=``."""
- vrt_path, _ = _make_partial_vrt(str(tmp_path))
- with pytest.raises(ValueError, match="missing_sources"):
- read_vrt(vrt_path, missing_sources="ignore")
diff --git a/xrspatial/geotiff/tests/test_vrt_finalization_parity_2162.py b/xrspatial/geotiff/tests/test_vrt_finalization_parity_2162.py
deleted file mode 100644
index 294d836c5..000000000
--- a/xrspatial/geotiff/tests/test_vrt_finalization_parity_2162.py
+++ /dev/null
@@ -1,667 +0,0 @@
-"""Cross-backend parity for the VRT finalization pipeline (issue #2180).
-
-Wave 3 of #2162 routed the VRT eager and chunked paths through
-``_finalize_lazy_read_attrs`` from #2177. Before the migration the two
-sites built ``GeoTIFFMetadata`` from VRT internals by hand and called
-``metadata_to_attrs`` directly, bypassing the shared
-``_validate_read_geo_info`` / ``_populate_attrs_from_geo_info`` block
-the other backends share.
-
-The tests below pin parity for the attrs the helper now stamps:
-
-* VRT eager attrs match eager numpy attrs (``open_geotiff``) for
- single-source VRTs that mirror a plain TIFF.
-* VRT chunked attrs match dask numpy attrs (``read_geotiff_dask``) for
- the same single-source VRTs.
-* ``band_nodata='first'`` paths still produce the per-band attrs
- pinned by ``test_vrt_band_nodata_1598``.
-* ``missing_sources='warn'`` still surfaces ``attrs['vrt_holes']`` on
- the eager VRT path (the chunked path's parse-time hole scan is
- covered by ``test_open_geotiff_missing_sources_1810``).
-* ``attrs['georef_status']`` matches across VRT and non-VRT paths for
- the five canonical states (``full``, ``transform_only``,
- ``crs_only``, ``none``, ``rotated_dropped``).
-
-VRT-only attrs that the non-VRT path cannot produce (e.g.
-``vrt_holes``) and the windowed-transform shift are not part of the
-parity assertion -- they are pinned by the regression tests cited
-above. A few attrs the non-VRT path emits (``extra_tags``,
-``gdal_metadata``, resolution tags) are likewise dropped from the
-comparison because the VRT path intentionally omits them; the test
-filters those keys explicitly.
-"""
-from __future__ import annotations
-
-import warnings
-
-import numpy as np
-import pytest
-import xarray as xr
-
-from xrspatial.geotiff import open_geotiff, read_geotiff_dask, read_vrt, to_geotiff
-from xrspatial.geotiff._attrs import (GEOREF_STATUS_CRS_ONLY, GEOREF_STATUS_FULL,
- GEOREF_STATUS_NONE, GEOREF_STATUS_ROTATED_DROPPED,
- GEOREF_STATUS_TRANSFORM_ONLY)
-from xrspatial.geotiff._coords import _NO_GEOREF_KEY
-from xrspatial.geotiff._writer import write
-
-tifffile = pytest.importorskip("tifffile")
-
-
-# Attrs the VRT path is documented to omit when the non-VRT path emits
-# them. The parity comparisons drop these keys before checking equality
-# so the per-backend documented surface stays in scope.
-_NON_VRT_ONLY_KEYS = frozenset({
- 'extra_tags',
- 'image_description',
- 'extra_samples',
- 'gdal_metadata',
- 'gdal_metadata_xml',
- 'x_resolution',
- 'y_resolution',
- 'resolution_unit',
- 'colormap',
-})
-
-
-# Attrs that differ in textual representation between the GeoTIFF writer
-# and the literal VRT XML even when they encode the same logical value.
-# ``crs_wkt`` carries pyproj's expanded WKT in the TIFF path but the
-# verbatim VRT XML body in the VRT path; ``transform`` shifts by a
-# half-pixel between the two writers' AREA_OR_POINT conventions. The
-# parity test compares them separately via EPSG / origin checks rather
-# than insisting on byte-identical strings.
-_REPRESENTATION_KEYS = frozenset({'crs_wkt', 'transform'})
-
-
-def _shared_canonical_attrs(attrs: dict) -> dict:
- """Return the helper-emitted attrs that should match across writers.
-
- Drops:
- * The non-VRT TIFF-tag attrs the VRT path intentionally omits.
- * The representation-sensitive attrs (``crs_wkt``, ``transform``)
- that differ in literal form but encode the same logical value.
- ``crs`` (EPSG integer) carries the same information for the WKT
- comparison; the transform half-pixel shift is exercised by the
- regression tests for the underlying readers.
- """
- return {
- k: v for k, v in attrs.items()
- if k not in _NON_VRT_ONLY_KEYS and k not in _REPRESENTATION_KEYS
- }
-
-
-def _strip_non_vrt_keys(attrs: dict) -> dict:
- return {k: v for k, v in attrs.items() if k not in _NON_VRT_ONLY_KEYS}
-
-
-def _write_single_source_vrt(tiff_path, vrt_path, *, width, height,
- dtype='Float32', nodata=None,
- geo_transform='0.0, 1.0, 0.0, 0.0, 0.0, -1.0',
- srs=None):
- """Write a one-band VRT pointing at ``tiff_path``.
-
- Mirrors the writer in ``test_vrt_band_nodata_1598`` but parameterises
- the geo bits so the same helper can produce ``full`` /
- ``transform_only`` / ``crs_only`` / ``none`` / ``rotated_dropped``
- VRTs.
- """
- nodata_xml = (
- f" {nodata}\n" if nodata is not None
- else ''
- )
- srs_xml = (
- f' {srs}\n' if srs is not None
- else ''
- )
- gt_xml = (
- f' {geo_transform}\n'
- if geo_transform is not None
- else ''
- )
- vrt_xml = (
- f'\n'
- f'{gt_xml}'
- f'{srs_xml}'
- f' \n'
- f'{nodata_xml}'
- f' \n'
- f' {tiff_path}\n'
- f' 1\n'
- f' \n'
- f' \n'
- f' \n'
- f' \n'
- f'\n'
- )
- with open(vrt_path, 'w') as f:
- f.write(vrt_xml)
-
-
-# ---------------------------------------------------------------------------
-# Fixture builders for the five georef states.
-# ---------------------------------------------------------------------------
-#
-# Each builder writes a backing TIFF and a single-source VRT that wraps
-# it with the same transform / CRS, then returns both paths. The VRT
-# path's ``georef_status`` should match the TIFF path's because the VRT
-# shares the same geometry.
-
-_WGS84_WKT = (
- 'GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,'
- 'AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,'
- 'AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,'
- 'AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]]'
-)
-
-
-def _make_full_pair(tmp_path, name):
- """Full georef: float coords + CRS."""
- tiff = str(tmp_path / f'{name}_tiff.tif')
- vrt = str(tmp_path / f'{name}.vrt')
- da = xr.DataArray(
- np.zeros((4, 4), dtype=np.float32),
- coords={
- 'y': np.array([200.0, 199.0, 198.0, 197.0]),
- 'x': np.array([100.0, 101.0, 102.0, 103.0]),
- },
- dims=('y', 'x'),
- attrs={'crs': 4326},
- )
- to_geotiff(da, tiff)
- _write_single_source_vrt(
- tiff, vrt, width=4, height=4,
- geo_transform='100.0, 1.0, 0.0, 200.0, 0.0, -1.0',
- srs=_WGS84_WKT,
- )
- return tiff, vrt
-
-
-def _make_transform_only_pair(tmp_path, name):
- """Float coords, no CRS."""
- tiff = str(tmp_path / f'{name}_tiff.tif')
- vrt = str(tmp_path / f'{name}.vrt')
- da = xr.DataArray(
- np.zeros((4, 4), dtype=np.float32),
- coords={
- 'y': np.array([200.0, 199.0, 198.0, 197.0]),
- 'x': np.array([100.0, 101.0, 102.0, 103.0]),
- },
- dims=('y', 'x'),
- )
- to_geotiff(da, tiff)
- _write_single_source_vrt(
- tiff, vrt, width=4, height=4,
- geo_transform='100.0, 1.0, 0.0, 200.0, 0.0, -1.0',
- srs=None,
- )
- return tiff, vrt
-
-
-def _make_crs_only_pair(tmp_path, name):
- """No-georef marker + CRS."""
- tiff = str(tmp_path / f'{name}_tiff.tif')
- vrt = str(tmp_path / f'{name}.vrt')
- da = xr.DataArray(
- np.zeros((4, 4), dtype=np.float32),
- coords={
- 'y': np.arange(4, dtype=np.int64),
- 'x': np.arange(4, dtype=np.int64),
- },
- dims=('y', 'x'),
- attrs={_NO_GEOREF_KEY: True, 'crs': 4326},
- )
- to_geotiff(da, tiff)
- _write_single_source_vrt(
- tiff, vrt, width=4, height=4,
- geo_transform=None,
- srs=_WGS84_WKT,
- )
- return tiff, vrt
-
-
-def _make_none_pair(tmp_path, name):
- """No CRS, no transform."""
- tiff = str(tmp_path / f'{name}_tiff.tif')
- vrt = str(tmp_path / f'{name}.vrt')
- arr = np.zeros((4, 4), dtype=np.float32)
- tifffile.imwrite(
- tiff, arr, photometric='minisblack', planarconfig='contig',
- metadata=None,
- )
- _write_single_source_vrt(
- tiff, vrt, width=4, height=4,
- geo_transform=None,
- srs=None,
- )
- return tiff, vrt
-
-
-def _make_rotated_pair(tmp_path, name):
- """Rotated VRT with ``allow_rotated=True``: lands at
- ``rotated_dropped``."""
- tiff = str(tmp_path / f'{name}_tiff.tif')
- vrt = str(tmp_path / f'{name}.vrt')
- arr = np.arange(16, dtype=np.uint16).reshape(4, 4)
- write(arr, tiff, compression='none', tiled=False)
- _write_single_source_vrt(
- tiff, vrt, width=4, height=4, dtype='UInt16',
- geo_transform='0.0, 1.0, 0.5, 0.0, 0.5, -1.0',
- srs=None,
- )
- return tiff, vrt
-
-
-# ---------------------------------------------------------------------------
-# Parity tests: VRT eager attrs vs eager numpy attrs.
-# ---------------------------------------------------------------------------
-
-
-def test_vrt_eager_full_matches_open_geotiff(tmp_path):
- """A single-source VRT wrapping a ``full`` TIFF emits the same
- canonical helper-stamped attrs as the underlying TIFF read via
- ``open_geotiff``.
-
- The helper-emitted attrs (``crs`` / ``georef_status`` / contract
- version / nodata lifecycle) must match. ``crs_wkt`` and
- ``transform`` differ in textual representation between the two
- writers and are compared separately via EPSG / origin checks
- below; pinning byte-identical strings would test the writer, not
- the helper migration.
- """
- tiff, vrt = _make_full_pair(tmp_path, 'full_2180')
- tiff_attrs = _shared_canonical_attrs(dict(open_geotiff(tiff).attrs))
- vrt_attrs = _shared_canonical_attrs(dict(read_vrt(vrt).attrs))
- assert tiff_attrs == vrt_attrs, (
- f"TIFF/VRT attrs diverged:\n"
- f" tiff only: {set(tiff_attrs) - set(vrt_attrs)}\n"
- f" vrt only: {set(vrt_attrs) - set(tiff_attrs)}\n"
- f" shared keys with different values: "
- f"{[k for k in set(tiff_attrs) & set(vrt_attrs) if tiff_attrs[k] != vrt_attrs[k]]}"
- )
- # Logical CRS equality across the two writers (different WKT text,
- # same EPSG code).
- full_tiff_attrs = dict(open_geotiff(tiff).attrs)
- full_vrt_attrs = dict(read_vrt(vrt).attrs)
- assert full_tiff_attrs['crs'] == full_vrt_attrs['crs'] == 4326
- # Both paths emit a 6-tuple transform with the same length.
- assert len(full_tiff_attrs['transform']) == 6
- assert len(full_vrt_attrs['transform']) == 6
-
-
-def test_vrt_eager_transform_only_matches_open_geotiff(tmp_path):
- tiff, vrt = _make_transform_only_pair(tmp_path, 'tonly_2180')
- tiff_attrs = _shared_canonical_attrs(dict(open_geotiff(tiff).attrs))
- vrt_attrs = _shared_canonical_attrs(dict(read_vrt(vrt).attrs))
- assert tiff_attrs == vrt_attrs
- assert tiff_attrs['georef_status'] == GEOREF_STATUS_TRANSFORM_ONLY
-
-
-def test_vrt_eager_crs_only_matches_open_geotiff(tmp_path):
- tiff, vrt = _make_crs_only_pair(tmp_path, 'crsonly_2180')
- tiff_attrs = _shared_canonical_attrs(dict(open_geotiff(tiff).attrs))
- vrt_attrs = _shared_canonical_attrs(dict(read_vrt(vrt).attrs))
- assert tiff_attrs == vrt_attrs
- assert tiff_attrs['georef_status'] == GEOREF_STATUS_CRS_ONLY
-
-
-def test_vrt_eager_none_matches_open_geotiff(tmp_path):
- tiff, vrt = _make_none_pair(tmp_path, 'none_2180')
- tiff_attrs = _shared_canonical_attrs(dict(open_geotiff(tiff).attrs))
- vrt_attrs = _shared_canonical_attrs(dict(read_vrt(vrt).attrs))
- assert tiff_attrs == vrt_attrs
- assert tiff_attrs['georef_status'] == GEOREF_STATUS_NONE
-
-
-def test_vrt_eager_rotated_dropped_matches_open_geotiff(tmp_path):
- """The rotated branch is the VRT-specific path: a non-zero skew on
- the GDAL geotransform lands in ``rotated_dropped`` and the helper
- drops ``crs`` / ``transform`` / ``crs_wkt`` while emitting
- ``rotated_affine`` plus the no-georef marker. The non-VRT side does
- not have a way to write a rotated TIFF cleanly through ``to_geotiff``
- (axis-aligned only); the assertions here pin the attrs surface
- against the canonical ``georef_status`` values rather than a
- non-VRT TIFF parity check.
- """
- _, vrt = _make_rotated_pair(tmp_path, 'rot_2180')
- attrs = dict(read_vrt(vrt, allow_rotated=True).attrs)
- assert attrs['georef_status'] == GEOREF_STATUS_ROTATED_DROPPED
- assert attrs.get(_NO_GEOREF_KEY) is True
- assert 'rotated_affine' in attrs
- assert attrs.get('crs') is None
- assert attrs.get('crs_wkt') is None
- assert 'transform' not in attrs
-
-
-# ---------------------------------------------------------------------------
-# Parity tests: VRT chunked attrs vs dask numpy attrs.
-# ---------------------------------------------------------------------------
-
-
-def test_vrt_chunked_full_matches_dask(tmp_path):
- tiff, vrt = _make_full_pair(tmp_path, 'full_chunked_2180')
- tiff_attrs = _shared_canonical_attrs(
- dict(read_geotiff_dask(tiff, chunks=2).attrs)
- )
- vrt_attrs = _shared_canonical_attrs(
- dict(read_vrt(vrt, chunks=2).attrs)
- )
- assert tiff_attrs == vrt_attrs
-
-
-def test_vrt_chunked_transform_only_matches_dask(tmp_path):
- tiff, vrt = _make_transform_only_pair(tmp_path, 'tonly_chunked_2180')
- tiff_attrs = _shared_canonical_attrs(
- dict(read_geotiff_dask(tiff, chunks=2).attrs)
- )
- vrt_attrs = _shared_canonical_attrs(
- dict(read_vrt(vrt, chunks=2).attrs)
- )
- assert tiff_attrs == vrt_attrs
-
-
-def test_vrt_chunked_crs_only_matches_dask(tmp_path):
- tiff, vrt = _make_crs_only_pair(tmp_path, 'crsonly_chunked_2180')
- tiff_attrs = _shared_canonical_attrs(
- dict(read_geotiff_dask(tiff, chunks=2).attrs)
- )
- vrt_attrs = _shared_canonical_attrs(
- dict(read_vrt(vrt, chunks=2).attrs)
- )
- assert tiff_attrs == vrt_attrs
-
-
-def test_vrt_chunked_none_matches_dask(tmp_path):
- tiff, vrt = _make_none_pair(tmp_path, 'none_chunked_2180')
- tiff_attrs = _shared_canonical_attrs(
- dict(read_geotiff_dask(tiff, chunks=2).attrs)
- )
- vrt_attrs = _shared_canonical_attrs(
- dict(read_vrt(vrt, chunks=2).attrs)
- )
- assert tiff_attrs == vrt_attrs
-
-
-def test_vrt_chunked_rotated_dropped(tmp_path):
- _, vrt = _make_rotated_pair(tmp_path, 'rot_chunked_2180')
- attrs = dict(read_vrt(vrt, allow_rotated=True, chunks=2).attrs)
- assert attrs['georef_status'] == GEOREF_STATUS_ROTATED_DROPPED
- assert attrs.get(_NO_GEOREF_KEY) is True
- assert 'rotated_affine' in attrs
-
-
-# ---------------------------------------------------------------------------
-# band_nodata paths: the ``'first'`` opt-out keeps the legacy
-# flatten-to-band-0 semantics. Pin per-band attrs on a mixed VRT.
-# ---------------------------------------------------------------------------
-
-
-def _write_two_band_per_band_nodata_vrt(tmp_path):
- band0 = np.array([[1, 2], [3, 65535]], dtype=np.uint16)
- band1 = np.array([[7, 8], [9, 65000]], dtype=np.uint16)
- p0 = str(tmp_path / 'vrt_band0_2180.tif')
- p1 = str(tmp_path / 'vrt_band1_2180.tif')
- write(band0, p0, nodata=65535, compression='none', tiled=False)
- write(band1, p1, nodata=65000, compression='none', tiled=False)
-
- vrt_path = str(tmp_path / 'two_band_per_band_nodata_2180.vrt')
- vrt_xml = f"""
- 0.0, 1.0, 0.0, 0.0, 0.0, -1.0
-
- 65535
-
- {p0}
- 1
-
-
-
-
-
- 65000
-
- {p1}
- 1
-
-
-
-
-"""
- with open(vrt_path, 'w') as f:
- f.write(vrt_xml)
- return vrt_path
-
-
-def test_band_nodata_first_band_attrs(tmp_path):
- """``band=1`` with ``band_nodata='first'`` surfaces band 1's
- sentinel on attrs and masks against it. Pins the per-band selection
- survives the migration."""
- vrt_path = _write_two_band_per_band_nodata_vrt(tmp_path)
- r = read_vrt(vrt_path, band=1, band_nodata='first')
- assert r.attrs['nodata'] == 65000.0
- assert r.attrs['masked_nodata'] is True
- assert np.isnan(r.values[1, 1])
- assert r.attrs.get('nodata_pixels_present') is True
-
-
-def test_band_nodata_chunked_first_band_attrs(tmp_path):
- """The chunked path threads the same per-band sentinel onto attrs."""
- vrt_path = _write_two_band_per_band_nodata_vrt(tmp_path)
- r = read_vrt(vrt_path, band=1, band_nodata='first', chunks=2)
- assert r.attrs['nodata'] == 65000.0
- assert r.attrs['masked_nodata'] is True
- # Chunked path leaves ``nodata_pixels_present`` unset by contract.
- assert 'nodata_pixels_present' not in r.attrs
-
-
-def _make_no_sentinel_vrt(tmp_path, name):
- """A single-band float VRT with no ````. Used to pin the
- ``dtype=`` + no-sentinel branch of ``_finalize_lazy_read_attrs``
- (``caller_dtype`` set, ``nodata`` is None -> attr stays absent)."""
- tiff = str(tmp_path / f'{name}_tiff.tif')
- vrt = str(tmp_path / f'{name}.vrt')
- arr = np.arange(16, dtype=np.float32).reshape(4, 4)
- write(arr, tiff, compression='none', tiled=False)
- _write_single_source_vrt(
- tiff, vrt, width=4, height=4,
- geo_transform='0.0, 1.0, 0.0, 0.0, 0.0, -1.0',
- nodata=None,
- )
- return vrt
-
-
-def test_dtype_cast_no_sentinel_omits_attr_eager(tmp_path):
- """Eager VRT with ``dtype=`` and no declared sentinel: the helper
- receives ``caller_dtype=np.float64`` but ``nodata is None``, so
- ``nodata_dtype_cast`` stays absent. Pins the symmetric branch the
- dask parity test covers for non-VRT."""
- vrt = _make_no_sentinel_vrt(tmp_path, 'no_sentinel_eager_2180')
- r = read_vrt(vrt, dtype=np.float64)
- assert r.dtype == np.float64
- assert 'nodata' not in r.attrs
- assert 'masked_nodata' not in r.attrs
- assert 'nodata_dtype_cast' not in r.attrs
-
-
-def test_dtype_cast_no_sentinel_omits_attr_chunked(tmp_path):
- """Chunked VRT with ``dtype=`` and no declared sentinel: same
- ``nodata_dtype_cast`` pop as the eager branch."""
- vrt = _make_no_sentinel_vrt(tmp_path, 'no_sentinel_chunked_2180')
- r = read_vrt(vrt, dtype=np.float64, chunks=2)
- assert r.dtype == np.float64
- assert 'nodata' not in r.attrs
- assert 'masked_nodata' not in r.attrs
- assert 'nodata_dtype_cast' not in r.attrs
-
-
-# ---------------------------------------------------------------------------
-# missing_sources paths: ``warn`` surfaces ``vrt_holes`` on the eager
-# path; the chunked parse-time scan also surfaces it.
-# ---------------------------------------------------------------------------
-
-
-def test_missing_sources_eager_surfaces_vrt_holes(tmp_path):
- """The eager VRT path keeps populating ``attrs['vrt_holes']`` after
- the migration, even though the field rides outside the synthesised
- ``GeoInfo`` and through ``attrs_in`` on the helper."""
- tiff_path = str(tmp_path / 'present_2180.tif')
- arr = np.arange(16, dtype=np.float32).reshape(4, 4)
- write(arr, tiff_path, compression='none', tiled=False)
-
- missing_path = str(tmp_path / 'missing_2180.tif') # never created
- vrt_path = str(tmp_path / 'mosaic_2180.vrt')
- vrt_xml = f"""
- 0.0, 1.0, 0.0, 0.0, 0.0, -1.0
-
-
- {tiff_path}
- 1
-
-
-
-
- {missing_path}
- 1
-
-
-
-
-"""
- with open(vrt_path, 'w') as f:
- f.write(vrt_xml)
- with warnings.catch_warnings():
- warnings.simplefilter('ignore')
- r = read_vrt(vrt_path, missing_sources='warn')
- assert 'vrt_holes' in r.attrs
- holes = r.attrs['vrt_holes']
- assert isinstance(holes, list) and len(holes) >= 1
- # Each hole entry has the documented shape.
- for hole in holes:
- assert 'source' in hole
- assert 'band' in hole
- assert 'dst_rect' in hole
- assert 'error' in hole
-
-
-def test_missing_sources_chunked_surfaces_vrt_holes(tmp_path):
- """Chunked path's parse-time existence sweep still populates
- ``attrs['vrt_holes']`` after the migration."""
- tiff_path = str(tmp_path / 'present_chunked_2180.tif')
- arr = np.arange(16, dtype=np.float32).reshape(4, 4)
- write(arr, tiff_path, compression='none', tiled=False)
-
- missing_path = str(tmp_path / 'missing_chunked_2180.tif')
- vrt_path = str(tmp_path / 'mosaic_chunked_2180.vrt')
- vrt_xml = f"""
- 0.0, 1.0, 0.0, 0.0, 0.0, -1.0
-
-
- {tiff_path}
- 1
-
-
-
-
- {missing_path}
- 1
-
-
-
-
-"""
- with open(vrt_path, 'w') as f:
- f.write(vrt_xml)
- r = read_vrt(vrt_path, missing_sources='warn', chunks=2)
- assert 'vrt_holes' in r.attrs
- holes = r.attrs['vrt_holes']
- assert isinstance(holes, list) and len(holes) >= 1
-
-
-# ---------------------------------------------------------------------------
-# georef_status parity across the five states between VRT eager,
-# VRT chunked, non-VRT eager, and non-VRT chunked.
-# ---------------------------------------------------------------------------
-
-
-_STATUS_PAIRS = [
- pytest.param(_make_full_pair, GEOREF_STATUS_FULL, False, id="full"),
- pytest.param(
- _make_transform_only_pair, GEOREF_STATUS_TRANSFORM_ONLY,
- False, id="transform_only",
- ),
- pytest.param(
- _make_crs_only_pair, GEOREF_STATUS_CRS_ONLY,
- False, id="crs_only",
- ),
- pytest.param(_make_none_pair, GEOREF_STATUS_NONE, False, id="none"),
- pytest.param(
- _make_rotated_pair, GEOREF_STATUS_ROTATED_DROPPED, True,
- id="rotated_dropped",
- ),
-]
-
-
-@pytest.mark.parametrize("pair_factory,expected_status,allow_rotated",
- _STATUS_PAIRS)
-def test_georef_status_eager_parity(tmp_path, pair_factory, expected_status,
- allow_rotated):
- """VRT eager and (where applicable) non-VRT eager agree on
- ``georef_status``. The rotated VRT case has no non-VRT counterpart
- through ``to_geotiff``, so the test pins the VRT value alone."""
- tiff, vrt = pair_factory(tmp_path, f'georef_eager_{expected_status}')
- kwargs = {'allow_rotated': True} if allow_rotated else {}
- vrt_status = read_vrt(vrt, **kwargs).attrs.get('georef_status')
- assert vrt_status == expected_status
- if not allow_rotated:
- tiff_status = open_geotiff(tiff, **kwargs).attrs.get('georef_status')
- assert tiff_status == expected_status
- assert vrt_status == tiff_status
-
-
-@pytest.mark.parametrize("pair_factory,expected_status,allow_rotated",
- _STATUS_PAIRS)
-def test_georef_status_chunked_parity(tmp_path, pair_factory, expected_status,
- allow_rotated):
- """VRT chunked and non-VRT chunked agree on ``georef_status``."""
- tiff, vrt = pair_factory(tmp_path, f'georef_chunked_{expected_status}')
- kwargs = {'allow_rotated': True} if allow_rotated else {}
- vrt_status = read_vrt(vrt, chunks=2, **kwargs).attrs.get('georef_status')
- assert vrt_status == expected_status
- if not allow_rotated:
- tiff_status = read_geotiff_dask(
- tiff, chunks=2, **kwargs
- ).attrs.get('georef_status')
- assert tiff_status == expected_status
- assert vrt_status == tiff_status
-
-
-# ---------------------------------------------------------------------------
-# Eager/chunked VRT internal parity: the same VRT read eagerly and
-# chunked should agree on the canonical attrs (modulo the documented
-# absence of ``nodata_pixels_present`` on lazy reads).
-# ---------------------------------------------------------------------------
-
-
-_VRT_FACTORIES = [
- pytest.param(_make_full_pair, False, id="full"),
- pytest.param(_make_transform_only_pair, False, id="transform_only"),
- pytest.param(_make_crs_only_pair, False, id="crs_only"),
- pytest.param(_make_none_pair, False, id="none"),
- pytest.param(_make_rotated_pair, True, id="rotated_dropped"),
-]
-
-
-@pytest.mark.parametrize("pair_factory,allow_rotated", _VRT_FACTORIES)
-def test_vrt_eager_chunked_internal_parity(tmp_path, pair_factory,
- allow_rotated):
- """Eager and chunked VRT reads of the same fixture agree on the
- shared canonical attrs (``crs`` / ``crs_wkt`` / ``transform`` /
- ``georef_status`` / contract version). The lazy contract from
- #2135 leaves ``nodata_pixels_present`` unset on chunked output, so
- the comparison drops that key."""
- _, vrt = pair_factory(tmp_path, 'internal_parity_2180')
- kwargs = {'allow_rotated': True} if allow_rotated else {}
- eager_attrs = dict(read_vrt(vrt, **kwargs).attrs)
- chunked_attrs = dict(read_vrt(vrt, chunks=2, **kwargs).attrs)
- eager_attrs.pop('nodata_pixels_present', None)
- chunked_attrs.pop('nodata_pixels_present', None)
- assert eager_attrs == chunked_attrs
diff --git a/xrspatial/geotiff/tests/test_vrt_missing_sources_default_raise_1843.py b/xrspatial/geotiff/tests/test_vrt_missing_sources_default_raise_1843.py
deleted file mode 100644
index a43f94a45..000000000
--- a/xrspatial/geotiff/tests/test_vrt_missing_sources_default_raise_1843.py
+++ /dev/null
@@ -1,83 +0,0 @@
-"""Regression test for #1843: the internal ``read_vrt`` in ``_vrt.py``
-defaults to ``missing_sources='raise'`` so an unreadable source halts
-the call instead of leaving a silent zero-fill hole on integer rasters.
-
-Callers wanting the historical lenient behaviour pass
-``missing_sources='warn'`` explicitly. The strict-mode env var
-``XRSPATIAL_GEOTIFF_STRICT=1`` continues to force-raise across the
-whole module (orthogonal axis, not affected by this change).
-"""
-from __future__ import annotations
-
-import pytest
-
-from xrspatial.geotiff import GeoTIFFFallbackWarning
-from xrspatial.geotiff._vrt import read_vrt as _read_vrt_internal
-
-
-def _write_missing_source_vrt(path):
- path.write_text(
- '\n'
- ' \n'
- ' \n'
- ' missing_1843.tif'
- '\n'
- ' 1\n'
- ' \n'
- ' \n'
- ' \n'
- ' \n'
- '\n'
- )
-
-
-def test_read_vrt_default_raises_on_unreadable_source(tmp_path):
- """Without an explicit ``missing_sources`` kwarg, an unreadable
- backing source must raise rather than silently zero-fill.
-
- This is the behaviour change from #1843. Before this commit the
- default was ``'warn'`` and a missing ``Byte`` tile produced a hole
- of zero pixels that was indistinguishable from real data unless
- the caller checked ``attrs['vrt_holes']``.
- """
- vrt = tmp_path / "tmp_1843_default_raise.vrt"
- _write_missing_source_vrt(vrt)
-
- with pytest.raises((OSError, ValueError)):
- _read_vrt_internal(str(vrt))
-
-
-def test_read_vrt_explicit_warn_preserves_lenient_behaviour(tmp_path):
- """``missing_sources='warn'`` is still the escape hatch for callers
- that want partial mosaics with ``vrt.holes`` populated.
-
- Pinning the lenient path here keeps the historical contract
- available to callers who opt in. The warning and the hole record
- must both still surface.
- """
- vrt = tmp_path / "tmp_1843_explicit_warn.vrt"
- _write_missing_source_vrt(vrt)
-
- with pytest.warns(GeoTIFFFallbackWarning, match="could not be read"):
- arr, parsed = _read_vrt_internal(str(vrt), missing_sources='warn')
-
- assert arr.shape == (2, 2)
- assert len(parsed.holes) == 1
- assert parsed.holes[0]['source'].endswith('missing_1843.tif')
-
-
-def test_read_vrt_strict_env_still_raises_under_warn(monkeypatch, tmp_path):
- """``XRSPATIAL_GEOTIFF_STRICT=1`` continues to force-raise even
- when the caller explicitly asks for the lenient ``'warn'`` policy.
-
- The strict env var is a module-wide override (see #1662); it must
- still win over per-call ``missing_sources='warn'`` so CI runs with
- strict mode catch partial mosaics regardless of caller settings.
- """
- vrt = tmp_path / "tmp_1843_strict_env.vrt"
- _write_missing_source_vrt(vrt)
-
- monkeypatch.setenv("XRSPATIAL_GEOTIFF_STRICT", "1")
-
- with pytest.raises((OSError, ValueError)):
- _read_vrt_internal(str(vrt), missing_sources='warn')
diff --git a/xrspatial/geotiff/tests/vrt/test_missing_sources.py b/xrspatial/geotiff/tests/vrt/test_missing_sources.py
index 0e41826ec..a0399e158 100644
--- a/xrspatial/geotiff/tests/vrt/test_missing_sources.py
+++ b/xrspatial/geotiff/tests/vrt/test_missing_sources.py
@@ -16,14 +16,22 @@
* Any other value raises ``ValueError`` naming the bad kwarg and
echoing the bad value via ``repr()``.
-The companion file ``test_vrt_missing_sources_default_raise_1843.py``
-stays in place for now: it exercises the internal
-``xrspatial.geotiff._vrt.read_vrt`` entry point and the
-``XRSPATIAL_GEOTIFF_STRICT=1`` env-var override, neither of which is in
-this module's surface.
+Also folds the VRT-tail missing-sources residue (cluster 13, #2437):
+
+* Internal ``_vrt.read_vrt`` entry point default-raise + explicit-warn
+ + ``XRSPATIAL_GEOTIFF_STRICT=1`` override (was
+ ``test_vrt_missing_sources_default_raise_1843.py``).
+* Public ``read_vrt`` / ``open_geotiff('.vrt')`` default-raise +
+ explicit-warn (was ``test_read_vrt_default_missing_sources_1860.py``).
+* Chunked-path missing-source policy: ``vrt_holes`` at build,
+ raise-at-build, per-task compute warnings, window / band scoping,
+ multi-source error preview (was
+ ``test_vrt_chunked_missing_sources_1799.py`` and
+ ``test_vrt_chunked_missing_raise_at_build_2265.py``).
"""
from __future__ import annotations
+import os
import warnings
import numpy as np
@@ -36,6 +44,7 @@
read_vrt,
to_geotiff,
)
+from xrspatial.geotiff._vrt import read_vrt as _internal_read_vrt
PRESENT_FILL = 7.0
@@ -315,3 +324,571 @@ def test_eager_byte_invalid_policy(self, tmp_path):
vrt = _write_byte_missing_vrt(tmp_path)
with pytest.raises(ValueError, match="missing_sources"):
read_vrt(vrt, missing_sources="ignore")
+
+
+# ===========================================================================
+# Internal ``_vrt.read_vrt`` entry point (was
+# test_vrt_missing_sources_default_raise_1843.py).
+#
+# The public matrix above exercises the package-level ``read_vrt`` /
+# ``open_geotiff`` surface. These cases pin the internal
+# ``xrspatial.geotiff._vrt.read_vrt`` entry point directly, including the
+# ``XRSPATIAL_GEOTIFF_STRICT=1`` module-wide override that wins over a
+# per-call ``missing_sources='warn'``.
+# ===========================================================================
+
+
+def _write_internal_missing_source_vrt(path):
+ """All-missing 2x2 Byte VRT for the internal-entry-point cases."""
+ path.write_text(
+ '\n'
+ ' \n'
+ ' \n'
+ ' missing_1843.tif'
+ '\n'
+ ' 1\n'
+ ' \n'
+ ' \n'
+ ' \n'
+ ' \n'
+ '\n'
+ )
+
+
+class TestInternalEntryPointMissingSources:
+ """``xrspatial.geotiff._vrt.read_vrt`` default + opt-in behaviour."""
+
+ def test_internal_default_raises_on_unreadable_source(self, tmp_path):
+ """Without an explicit ``missing_sources`` kwarg, an unreadable
+ backing source must raise rather than silently zero-fill.
+
+ Before the default flipped to ``'raise'`` a missing ``Byte`` tile
+ produced a hole of zero pixels indistinguishable from real data
+ unless the caller checked ``attrs['vrt_holes']``.
+ """
+ vrt = tmp_path / "tmp_1843_default_raise.vrt"
+ _write_internal_missing_source_vrt(vrt)
+ with pytest.raises((OSError, ValueError)):
+ _internal_read_vrt(str(vrt))
+
+ def test_internal_explicit_warn_preserves_lenient_behaviour(self, tmp_path):
+ """``missing_sources='warn'`` is still the escape hatch for callers
+ that want partial mosaics with ``parsed.holes`` populated."""
+ vrt = tmp_path / "tmp_1843_explicit_warn.vrt"
+ _write_internal_missing_source_vrt(vrt)
+ with pytest.warns(GeoTIFFFallbackWarning, match="could not be read"):
+ arr, parsed = _internal_read_vrt(str(vrt), missing_sources='warn')
+ assert arr.shape == (2, 2)
+ assert len(parsed.holes) == 1
+ assert parsed.holes[0]['source'].endswith('missing_1843.tif')
+
+ def test_internal_strict_env_still_raises_under_warn(
+ self, monkeypatch, tmp_path,
+ ):
+ """``XRSPATIAL_GEOTIFF_STRICT=1`` continues to force-raise even
+ when the caller explicitly asks for the lenient ``'warn'`` policy.
+
+ The strict env var is a module-wide override; it must still win
+ over per-call ``missing_sources='warn'`` so CI runs with strict
+ mode catch partial mosaics regardless of caller settings.
+ """
+ vrt = tmp_path / "tmp_1843_strict_env.vrt"
+ _write_internal_missing_source_vrt(vrt)
+ monkeypatch.setenv("XRSPATIAL_GEOTIFF_STRICT", "1")
+ with pytest.raises((OSError, ValueError)):
+ _internal_read_vrt(str(vrt), missing_sources='warn')
+
+
+# ===========================================================================
+# Public default ``missing_sources='raise'`` on read_vrt + open_geotiff
+# (was test_read_vrt_default_missing_sources_1860.py).
+#
+# Pins that the public wrapper's default matches the internal
+# ``_vrt.read_vrt`` default rather than silently overriding it with the
+# old lenient ``'warn'`` behaviour.
+# ===========================================================================
+
+
+def _write_public_missing_source_vrt(path):
+ path.write_text(
+ '\n'
+ ' \n'
+ ' \n'
+ ' missing_1860.tif'
+ '\n'
+ ' 1\n'
+ ' \n'
+ ' \n'
+ ' \n'
+ ' \n'
+ '\n'
+ )
+
+
+class TestPublicDefaultMissingSources:
+ """Public ``read_vrt`` / ``open_geotiff('.vrt')`` default to ``'raise'``."""
+
+ def test_public_read_vrt_default_raises(self, tmp_path):
+ """Public ``read_vrt`` with no ``missing_sources`` kwarg must raise.
+
+ The default is aligned to the internal ``_vrt.read_vrt`` default
+ of ``'raise'`` so the unreadable source halts the call instead of
+ returning a partial mosaic with ``attrs['vrt_holes']``.
+ """
+ vrt = tmp_path / "tmp_1860_public_default_raise.vrt"
+ _write_public_missing_source_vrt(vrt)
+ with pytest.raises((OSError, ValueError)):
+ read_vrt(str(vrt))
+
+ def test_open_geotiff_vrt_default_raises(self, tmp_path):
+ """``open_geotiff(vrt_path)`` with no ``missing_sources`` kwarg must
+ raise on an unreadable backing source.
+
+ ``open_geotiff`` forwards ``missing_sources`` to ``read_vrt`` only
+ when the caller passed it explicitly; otherwise the public
+ ``read_vrt`` default applies.
+ """
+ vrt = tmp_path / "tmp_1860_open_geotiff_default_raise.vrt"
+ _write_public_missing_source_vrt(vrt)
+ with pytest.raises((OSError, ValueError)):
+ open_geotiff(str(vrt))
+
+ def test_public_read_vrt_explicit_warn_preserves_lenient_behaviour(
+ self, tmp_path,
+ ):
+ """``missing_sources='warn'`` is still the escape hatch for partial
+ mosaics on the public ``read_vrt`` API."""
+ vrt = tmp_path / "tmp_1860_public_explicit_warn.vrt"
+ _write_public_missing_source_vrt(vrt)
+ with pytest.warns(GeoTIFFFallbackWarning, match="could not be read"):
+ da = read_vrt(str(vrt), missing_sources='warn')
+ assert 'vrt_holes' in da.attrs
+ assert da.attrs['vrt_holes'][0]['source'].endswith('missing_1860.tif')
+
+ def test_open_geotiff_vrt_explicit_warn_preserves_lenient_behaviour(
+ self, tmp_path,
+ ):
+ """``open_geotiff(vrt_path, missing_sources='warn')`` still produces
+ a partial mosaic with the hole record on the DataArray attrs."""
+ vrt = tmp_path / "tmp_1860_open_geotiff_explicit_warn.vrt"
+ _write_public_missing_source_vrt(vrt)
+ with pytest.warns(GeoTIFFFallbackWarning, match="could not be read"):
+ da = open_geotiff(str(vrt), missing_sources='warn')
+ assert 'vrt_holes' in da.attrs
+ assert da.attrs['vrt_holes'][0]['source'].endswith('missing_1860.tif')
+
+
+# ===========================================================================
+# Chunked-path missing-source policy (was
+# test_vrt_chunked_missing_sources_1799.py).
+#
+# The eager path scans every source at decode time. The chunked path
+# uses a parse-time ``os.path.exists`` sweep to populate ``vrt_holes`` at
+# build, and threads ``missing_sources`` through to the per-chunk decode.
+# ===========================================================================
+
+
+def _chunked_make_partial_vrt(tmp_path) -> tuple[str, str]:
+ """2-source VRT: present source on the left, missing on the right.
+
+ Returns ``(vrt_path, present_src_path)``.
+ """
+ src = os.path.join(tmp_path, "src_present.tif")
+ arr = np.full((4, 4), 7.0, dtype=np.float32)
+ da = xr.DataArray(
+ arr, dims=("y", "x"),
+ attrs={"transform": (1.0, 0.0, 0.0, 0.0, -1.0, 0.0)},
+ )
+ to_geotiff(da, src)
+
+ missing = os.path.join(tmp_path, "missing.tif")
+ vrt_path = os.path.join(tmp_path, "partial.vrt")
+ with open(vrt_path, "w") as f:
+ f.write(
+ '\n'
+ '0.0, 1.0, 0.0, 0.0, 0.0, -1.0\n'
+ '\n'
+ '\n'
+ f'{src}\n'
+ '1\n'
+ '\n'
+ '\n'
+ '\n'
+ '\n'
+ f'{missing}\n'
+ '1\n'
+ '\n'
+ '\n'
+ '\n'
+ '\n'
+ '\n'
+ )
+ return vrt_path, src
+
+
+class TestChunkedMissingSourcesWarn:
+ """``read_vrt(chunks=N, missing_sources='warn')`` records holes at build."""
+
+ def test_vrt_holes_populated_at_build(self, tmp_path):
+ vrt_path, _ = _chunked_make_partial_vrt(str(tmp_path))
+ result = read_vrt(vrt_path, chunks=4, missing_sources="warn")
+ assert "vrt_holes" in result.attrs, (
+ "Chunked path must populate vrt_holes at build time so "
+ "callers can detect partial mosaics without forcing a compute."
+ )
+ holes = result.attrs["vrt_holes"]
+ assert len(holes) == 1
+ assert set(holes[0].keys()) == {"source", "band", "dst_rect", "error"}
+ assert holes[0]["source"].endswith("missing.tif")
+ assert holes[0]["band"] == 1
+ assert holes[0]["dst_rect"] == (4, 0, 4, 4)
+
+ def test_compute_emits_per_task_warning(self, tmp_path):
+ vrt_path, _ = _chunked_make_partial_vrt(str(tmp_path))
+ with warnings.catch_warnings(record=True) as caught:
+ warnings.simplefilter("always")
+ result = read_vrt(vrt_path, chunks=4, missing_sources="warn")
+ computed = result.compute()
+ messages = [str(w.message) for w in caught
+ if isinstance(w.message, GeoTIFFFallbackWarning)]
+ assert any("missing.tif" in msg for msg in messages), (
+ f"Expected GeoTIFFFallbackWarning naming the missing source "
+ f"after compute, got messages: {messages!r}"
+ )
+ np.testing.assert_array_equal(
+ np.asarray(computed)[:, :4], np.full((4, 4), 7.0, dtype=np.float32),
+ )
+ assert np.all(np.isnan(np.asarray(computed)[:, 4:]))
+
+ def test_chunks_tuple_form(self, tmp_path):
+ """Tuple ``chunks=(h, w)`` threads through identically."""
+ vrt_path, _ = _chunked_make_partial_vrt(str(tmp_path))
+ result = read_vrt(vrt_path, chunks=(2, 4), missing_sources="warn")
+ assert "vrt_holes" in result.attrs
+ assert len(result.attrs["vrt_holes"]) == 1
+
+
+class TestChunkedMissingSourcesRaiseSmoke:
+ """``read_vrt(chunks=N, missing_sources='raise')`` fails at build.
+
+ The detailed raise-at-build matrix (window / band scoping, multi-source
+ preview, strict env) lives in the 2265 section below; this keeps the
+ 1799 smoke assertions alongside the warn cases they were paired with.
+ """
+
+ def test_build_raises_immediately(self, tmp_path):
+ vrt_path, _ = _chunked_make_partial_vrt(str(tmp_path))
+ with pytest.raises(FileNotFoundError, match="missing.tif"):
+ read_vrt(vrt_path, chunks=4, missing_sources="raise")
+
+ def test_build_raise_message_mentions_policy_kwarg(self, tmp_path):
+ vrt_path, _ = _chunked_make_partial_vrt(str(tmp_path))
+ with pytest.raises(FileNotFoundError) as excinfo:
+ read_vrt(vrt_path, chunks=4, missing_sources="raise")
+ assert "missing_sources='warn'" in str(excinfo.value)
+
+ def test_window_past_missing_succeeds_under_raise(self, tmp_path):
+ vrt_path, _ = _chunked_make_partial_vrt(str(tmp_path))
+ result = read_vrt(
+ vrt_path, chunks=4, window=(0, 0, 4, 4),
+ missing_sources="raise",
+ )
+ computed = result.compute()
+ np.testing.assert_array_equal(
+ np.asarray(computed), np.full((4, 4), 7.0, dtype=np.float32),
+ )
+
+ def test_band_selection_single_band_still_raises(self, tmp_path):
+ """Selecting band 0 (the only band) still touches the missing
+ source so the build raises. Cross-band gating is exercised by the
+ multiband cases in the 2265 section below."""
+ vrt_path, _ = _chunked_make_partial_vrt(str(tmp_path))
+ with pytest.raises(FileNotFoundError):
+ read_vrt(vrt_path, chunks=4, band=0, missing_sources="raise")
+
+
+class TestChunkedMissingSourcesDefault:
+ """The default ``missing_sources`` on chunked reads is ``'raise'``."""
+
+ def test_chunked_default_raises_at_build(self, tmp_path):
+ vrt_path, _ = _chunked_make_partial_vrt(str(tmp_path))
+ with pytest.raises(FileNotFoundError, match="missing.tif"):
+ read_vrt(vrt_path, chunks=4)
+
+
+class TestChunkedMissingSourcesValidation:
+ """Invalid ``missing_sources`` policies are rejected at entry."""
+
+ def test_invalid_policy_raises_at_build(self, tmp_path):
+ vrt_path, _ = _chunked_make_partial_vrt(str(tmp_path))
+ with pytest.raises(ValueError, match="missing_sources"):
+ read_vrt(vrt_path, chunks=4, missing_sources="ignore")
+
+ def test_invalid_policy_raises_without_chunks_too(self, tmp_path):
+ """The eager path also rejects the bad value; callers see the same
+ error whether or not they pass ``chunks=``."""
+ vrt_path, _ = _chunked_make_partial_vrt(str(tmp_path))
+ with pytest.raises(ValueError, match="missing_sources"):
+ read_vrt(vrt_path, missing_sources="ignore")
+
+
+# ===========================================================================
+# Chunked raise-at-build matrix (was
+# test_vrt_chunked_missing_raise_at_build_2265.py).
+#
+# The chunked path now honours ``missing_sources='raise'`` at build time:
+# the static ``os.path.exists`` sweep raises up front when a hole
+# intersects the requested window / selected band, instead of only the
+# per-chunk delayed decode raising at compute.
+# ===========================================================================
+
+
+def _raise_write_present_source(tmp_path: str, name: str, fill: float) -> str:
+ """Write a 4x4 float32 GeoTIFF source for a multi-source VRT."""
+ src = os.path.join(tmp_path, name)
+ arr = np.full((4, 4), fill, dtype=np.float32)
+ da = xr.DataArray(
+ arr, dims=("y", "x"),
+ attrs={"transform": (1.0, 0.0, 0.0, 0.0, -1.0, 0.0)},
+ )
+ to_geotiff(da, src)
+ return src
+
+
+def _raise_make_horizontal_partial_vrt(tmp_path: str) -> str:
+ """2-source VRT: ``[ present | missing ]`` laid out 4x8."""
+ src = _raise_write_present_source(tmp_path, "src_2265_h_present.tif", 7.0)
+ missing = os.path.join(tmp_path, "missing_2265_h.tif")
+ vrt_path = os.path.join(tmp_path, "partial_2265_h.vrt")
+ with open(vrt_path, "w") as f:
+ f.write(
+ '\n'
+ '0.0, 1.0, 0.0, 0.0, 0.0, -1.0\n'
+ '\n'
+ '\n'
+ f'{src}\n'
+ '1\n'
+ '\n'
+ '\n'
+ '\n'
+ '\n'
+ f'{missing}\n'
+ '1\n'
+ '\n'
+ '\n'
+ '\n'
+ '\n'
+ '\n'
+ )
+ return vrt_path
+
+
+def _raise_make_multiband_partial_vrt(tmp_path: str) -> str:
+ """2-band VRT where band 1 has a missing source and band 2 is intact."""
+ src_b1 = _raise_write_present_source(tmp_path, "src_2265_mb_b1.tif", 11.0)
+ src_b2 = _raise_write_present_source(tmp_path, "src_2265_mb_b2.tif", 22.0)
+ missing_b1 = os.path.join(tmp_path, "missing_2265_mb_b1.tif")
+ vrt_path = os.path.join(tmp_path, "partial_2265_multiband.vrt")
+ with open(vrt_path, "w") as f:
+ f.write(
+ '\n'
+ '0.0, 1.0, 0.0, 0.0, 0.0, -1.0\n'
+ '\n'
+ '\n'
+ f'{src_b1}\n'
+ '1\n'
+ '\n'
+ '\n'
+ '\n'
+ '\n'
+ f'{missing_b1}\n'
+ '1\n'
+ '\n'
+ '\n'
+ '\n'
+ '\n'
+ '\n'
+ '\n'
+ f'{src_b2}\n'
+ '1\n'
+ '\n'
+ '\n'
+ '\n'
+ '\n'
+ '\n'
+ )
+ return vrt_path
+
+
+def _raise_make_multi_missing_vrt(tmp_path: str, n_missing: int) -> str:
+ """VRT with ``n_missing`` missing sources tiling the destination."""
+ vrt_path = os.path.join(tmp_path, f"partial_2265_multi_{n_missing}.vrt")
+ width = 4 * n_missing
+ src_xml = []
+ for i in range(n_missing):
+ missing = os.path.join(tmp_path, f"missing_2265_multi_{i}.tif")
+ src_xml.append(
+ '\n'
+ f'{missing}\n'
+ '1\n'
+ '\n'
+ f'\n'
+ '\n'
+ )
+ with open(vrt_path, "w") as f:
+ f.write(
+ f'\n'
+ '0.0, 1.0, 0.0, 0.0, 0.0, -1.0\n'
+ '\n'
+ + ''.join(src_xml) +
+ '\n'
+ '\n'
+ )
+ return vrt_path
+
+
+class TestRaiseAtBuild:
+ """``missing_sources='raise'`` raises during construction, not compute."""
+
+ def test_build_raises_immediately(self, tmp_path):
+ vrt_path = _raise_make_horizontal_partial_vrt(str(tmp_path))
+ with pytest.raises(FileNotFoundError, match="missing_2265_h"):
+ read_vrt(vrt_path, chunks=4, missing_sources="raise")
+
+ def test_default_raises_at_build(self, tmp_path):
+ """The public default is ``'raise'`` so dropping the kwarg hits
+ the same fast-fail path."""
+ vrt_path = _raise_make_horizontal_partial_vrt(str(tmp_path))
+ with pytest.raises(FileNotFoundError):
+ read_vrt(vrt_path, chunks=4)
+
+ def test_error_message_mentions_opt_in(self, tmp_path):
+ """The exception text tells the caller how to opt into the lenient
+ path."""
+ vrt_path = _raise_make_horizontal_partial_vrt(str(tmp_path))
+ with pytest.raises(FileNotFoundError) as excinfo:
+ read_vrt(vrt_path, chunks=4, missing_sources="raise")
+ msg = str(excinfo.value)
+ assert "missing_sources='warn'" in msg
+ assert "partial mosaic" in msg
+
+
+class TestRaiseAtBuildWindowScoping:
+ """The raise honours the requested window."""
+
+ def test_window_past_missing_does_not_raise(self, tmp_path):
+ vrt_path = _raise_make_horizontal_partial_vrt(str(tmp_path))
+ result = read_vrt(
+ vrt_path, chunks=4, window=(0, 0, 4, 4),
+ missing_sources="raise",
+ )
+ computed = result.compute()
+ np.testing.assert_array_equal(
+ np.asarray(computed), np.full((4, 4), 7.0, dtype=np.float32),
+ )
+
+ def test_window_intersecting_missing_raises(self, tmp_path):
+ vrt_path = _raise_make_horizontal_partial_vrt(str(tmp_path))
+ with pytest.raises(FileNotFoundError):
+ read_vrt(
+ vrt_path, chunks=4, window=(0, 4, 4, 8),
+ missing_sources="raise",
+ )
+
+
+class TestRaiseAtBuildBandScoping:
+ """The raise honours ``band=`` restriction."""
+
+ def test_band_select_skips_other_bands_missing_source(self, tmp_path):
+ """``band=1`` reads band 2 only; band 1's missing source is
+ irrelevant to the graph, so the build must not raise."""
+ vrt_path = _raise_make_multiband_partial_vrt(str(tmp_path))
+ result = read_vrt(
+ vrt_path, chunks=4, band=1, missing_sources="raise",
+ )
+ computed = result.compute()
+ np.testing.assert_array_equal(
+ np.asarray(computed), np.full((4, 4), 22.0, dtype=np.float32),
+ )
+
+ def test_band_select_on_missing_band_raises(self, tmp_path):
+ vrt_path = _raise_make_multiband_partial_vrt(str(tmp_path))
+ with pytest.raises(FileNotFoundError):
+ read_vrt(vrt_path, chunks=4, band=0, missing_sources="raise")
+
+ def test_no_band_restriction_raises(self, tmp_path):
+ vrt_path = _raise_make_multiband_partial_vrt(str(tmp_path))
+ with pytest.raises(FileNotFoundError):
+ read_vrt(vrt_path, chunks=4, missing_sources="raise")
+
+
+class TestRaiseAtBuildWarnPreserved:
+ """``missing_sources='warn'`` keeps the record-and-warn behaviour."""
+
+ def test_warn_records_holes_at_build(self, tmp_path):
+ vrt_path = _raise_make_horizontal_partial_vrt(str(tmp_path))
+ result = read_vrt(vrt_path, chunks=4, missing_sources="warn")
+ assert "vrt_holes" in result.attrs
+ assert len(result.attrs["vrt_holes"]) == 1
+ assert result.attrs["vrt_holes"][0]["source"].endswith(
+ "missing_2265_h.tif"
+ )
+
+ def test_warn_compute_emits_per_task_warning(self, tmp_path):
+ vrt_path = _raise_make_horizontal_partial_vrt(str(tmp_path))
+ with warnings.catch_warnings(record=True) as caught:
+ warnings.simplefilter("always")
+ result = read_vrt(vrt_path, chunks=4, missing_sources="warn")
+ computed = result.compute()
+ messages = [str(w.message) for w in caught
+ if isinstance(w.message, GeoTIFFFallbackWarning)]
+ assert any("missing_2265_h" in msg for msg in messages)
+ np.testing.assert_array_equal(
+ np.asarray(computed)[:, :4],
+ np.full((4, 4), 7.0, dtype=np.float32),
+ )
+ assert np.all(np.isnan(np.asarray(computed)[:, 4:]))
+
+
+class TestRaiseAtBuildMultipleMissingSources:
+ """The error message previews multiple holes and reports the total."""
+
+ def test_two_missing_sources_listed_with_count(self, tmp_path):
+ """All missing sources fit in the preview (n=2 <= preview cap)."""
+ vrt_path = _raise_make_multi_missing_vrt(str(tmp_path), n_missing=2)
+ with pytest.raises(FileNotFoundError) as excinfo:
+ read_vrt(vrt_path, chunks=4, missing_sources="raise")
+ msg = str(excinfo.value)
+ assert "missing_2265_multi_0" in msg
+ assert "missing_2265_multi_1" in msg
+ assert "2 missing source(s) total" in msg
+ assert "more" not in msg.lower() or "and 0 more" not in msg
+
+ def test_many_missing_sources_truncated_with_more_suffix(self, tmp_path):
+ """Above the preview cap, the message says 'and N more'."""
+ n = 5
+ vrt_path = _raise_make_multi_missing_vrt(str(tmp_path), n_missing=n)
+ with pytest.raises(FileNotFoundError) as excinfo:
+ read_vrt(vrt_path, chunks=4, missing_sources="raise")
+ msg = str(excinfo.value)
+ assert "missing_2265_multi_0" in msg
+ assert f"missing_2265_multi_{n - 1}" not in msg
+ assert f"{n} missing source(s) total" in msg
+ assert "and 2 more" in msg
+
+
+class TestRaiseAtBuildStrictMode:
+ """``XRSPATIAL_GEOTIFF_STRICT=1`` forces the raise even with ``'warn'``."""
+
+ def test_strict_overrides_warn_kwarg(self, tmp_path, monkeypatch):
+ monkeypatch.setenv("XRSPATIAL_GEOTIFF_STRICT", "1")
+ vrt_path = _raise_make_horizontal_partial_vrt(str(tmp_path))
+ with pytest.raises(FileNotFoundError):
+ read_vrt(vrt_path, chunks=4, missing_sources="warn")
+
+ def test_strict_off_warn_still_warns(self, tmp_path, monkeypatch):
+ """Without strict mode, ``'warn'`` keeps warning."""
+ monkeypatch.delenv("XRSPATIAL_GEOTIFF_STRICT", raising=False)
+ vrt_path = _raise_make_horizontal_partial_vrt(str(tmp_path))
+ result = read_vrt(vrt_path, chunks=4, missing_sources="warn")
+ assert "vrt_holes" in result.attrs
diff --git a/xrspatial/geotiff/tests/vrt/test_parity.py b/xrspatial/geotiff/tests/vrt/test_parity.py
new file mode 100644
index 000000000..6f57e544d
--- /dev/null
+++ b/xrspatial/geotiff/tests/vrt/test_parity.py
@@ -0,0 +1,1156 @@
+"""Cross-backend parity and backend-coverage for the VRT read path.
+
+Consolidates the VRT-tail parity / coverage residue (cluster 13, #2437):
+
+* Backend parity for VRT reads with sidecar / overview interactions:
+ eager-vs-dask pixel + metadata (coords, transform, CRS,
+ ``georef_status``) parity, sidecar-vs-inline-overview attrs, and the
+ windowed coord / transform shift (was
+ ``test_vrt_backend_parity_2321.py``).
+* Cross-backend parity for the VRT finalization pipeline: VRT eager vs
+ ``open_geotiff`` and VRT chunked vs ``read_geotiff_dask`` for the five
+ canonical georef states, ``band_nodata='first'`` per-band attrs,
+ ``dtype=`` no-sentinel branch, ``missing_sources='warn'`` vrt_holes,
+ and eager/chunked internal parity (was
+ ``test_vrt_finalization_parity_2162.py``).
+* Backend / parameter coverage for ``read_vrt``: the GPU and dask+GPU
+ decode paths, ``dtype=`` / ``name=`` kwargs, and the file-like +
+ backend-kwarg rejection on ``open_geotiff`` (was
+ ``test_vrt_backend_coverage_2026_05_11.py``).
+
+The parity helpers (``_materialise`` / ``_assert_pixels_equal`` /
+``_assert_metadata_parity``) mirror ``parity/test_backend_matrix.py`` so
+cross-test parity reads the same way; this file keeps them VRT-local
+rather than re-homing the shared harness.
+"""
+from __future__ import annotations
+
+import importlib.util
+import io
+import os
+import pathlib
+import shutil
+import warnings
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Callable
+
+import numpy as np
+import pytest
+import xarray as xr
+
+from xrspatial.geotiff import (
+ open_geotiff,
+ read_geotiff_dask,
+ read_vrt,
+ to_geotiff,
+)
+from xrspatial.geotiff._attrs import (
+ GEOREF_STATUS_CRS_ONLY,
+ GEOREF_STATUS_FULL,
+ GEOREF_STATUS_NONE,
+ GEOREF_STATUS_ROTATED_DROPPED,
+ GEOREF_STATUS_TRANSFORM_ONLY,
+)
+from xrspatial.geotiff._coords import _NO_GEOREF_KEY
+from xrspatial.geotiff._vrt import write_vrt as _write_vrt_internal
+from xrspatial.geotiff._writer import write
+
+tifffile = pytest.importorskip("tifffile")
+
+
+# ===========================================================================
+# GPU gating (matches the rest of the geotiff test suite's predicate).
+# ===========================================================================
+
+
+def _gpu_available() -> bool:
+ if importlib.util.find_spec("cupy") is None:
+ return False
+ try:
+ import cupy
+ return bool(cupy.cuda.is_available())
+ except Exception:
+ return False
+
+
+_HAS_GPU = _gpu_available()
+_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required")
+
+
+# ===========================================================================
+# Backend parity with sidecar / overview interactions
+# (was test_vrt_backend_parity_2321.py).
+# ===========================================================================
+#
+# Asserts eager / dask parity on the surface most likely to drift:
+# metadata (transform, crs, crs_wkt, georef_status), windowed coords,
+# and sidecar (.tif.ovr) interactions. Acceptance: the VRT path cannot
+# pass by returning correct pixel values with wrong georeferencing attrs.
+
+_GOLDEN = (
+ pathlib.Path(__file__).resolve().parent.parent
+ / "golden_corpus"
+ / "fixtures"
+)
+_SIDECAR_TIF = _GOLDEN / "overview_external_ovr_uint16.tif"
+_SIDECAR_OVR = _GOLDEN / "overview_external_ovr_uint16.tif.ovr"
+_INLINE_OVR_TIF = _GOLDEN / "overview_internal_uint16.tif"
+
+
+def _sidecar_fixture_or_skip() -> Path:
+ """Return the bundled sidecar TIFF or skip if absent."""
+ if not _SIDECAR_TIF.exists() or not _SIDECAR_OVR.exists():
+ pytest.skip("sidecar overview fixture not present in golden_corpus")
+ return _SIDECAR_TIF
+
+
+def _inline_overview_fixture_or_skip() -> Path:
+ if not _INLINE_OVR_TIF.exists():
+ pytest.skip("inline overview fixture not present in golden_corpus")
+ return _INLINE_OVR_TIF
+
+
+def _materialise(da: xr.DataArray) -> np.ndarray:
+ raw = da.data
+ if hasattr(raw, "compute"):
+ raw = raw.compute()
+ if hasattr(raw, "get"):
+ raw = raw.get()
+ return np.asarray(raw)
+
+
+def _coord_view(da: xr.DataArray, name: str) -> np.ndarray:
+ return np.asarray(da.coords[name].values)
+
+
+def _assert_pixels_equal(ref: np.ndarray, actual: np.ndarray,
+ *, label: str) -> None:
+ """Pixel equality, dtype-aware (mirrors test_backend_matrix.py)."""
+ assert ref.dtype == actual.dtype, (
+ f"{label}: dtype differs ref={ref.dtype} actual={actual.dtype}"
+ )
+ assert ref.shape == actual.shape, (
+ f"{label}: shape differs ref={ref.shape} actual={actual.shape}"
+ )
+ if ref.dtype.kind == "f":
+ assert np.array_equal(ref, actual, equal_nan=True), (
+ f"{label}: float pixels differ (NaN-aware)"
+ )
+ else:
+ assert ref.tobytes() == actual.tobytes(), (
+ f"{label}: integer pixel bytes differ"
+ )
+
+
+def _assert_metadata_parity(
+ ref: xr.DataArray,
+ actual: xr.DataArray,
+ *,
+ label: str,
+ expected_dims: tuple[str, ...],
+) -> None:
+ """Fail if any parity-critical attr / coord drifts between two reads."""
+ assert actual.dims == expected_dims, (
+ f"{label}: dims {actual.dims!r} != expected {expected_dims!r}"
+ )
+ assert ref.dims == expected_dims, (
+ f"{label}: ref dims {ref.dims!r} != expected {expected_dims!r}"
+ )
+
+ for axis in expected_dims:
+ if axis not in ref.coords:
+ continue
+ ref_c = _coord_view(ref, axis)
+ actual_c = _coord_view(actual, axis)
+ assert ref_c.dtype == actual_c.dtype, (
+ f"{label}: coord {axis!r} dtype "
+ f"ref={ref_c.dtype} actual={actual_c.dtype}"
+ )
+ assert ref_c.shape == actual_c.shape, (
+ f"{label}: coord {axis!r} shape "
+ f"ref={ref_c.shape} actual={actual_c.shape}"
+ )
+ assert ref_c.tobytes() == actual_c.tobytes(), (
+ f"{label}: coord {axis!r} bytes differ "
+ f"(ref[:3]={ref_c[:3].tolist()!r}, "
+ f"actual[:3]={actual_c[:3].tolist()!r})"
+ )
+
+ ref_t = ref.attrs.get("transform")
+ actual_t = actual.attrs.get("transform")
+ assert ref_t == actual_t, (
+ f"{label}: transform tuple differs "
+ f"ref={ref_t!r} actual={actual_t!r}"
+ )
+
+ assert ref.attrs.get("crs") == actual.attrs.get("crs"), (
+ f"{label}: attrs['crs'] differs "
+ f"ref={ref.attrs.get('crs')!r} actual={actual.attrs.get('crs')!r}"
+ )
+ assert ref.attrs.get("crs_wkt") == actual.attrs.get("crs_wkt"), (
+ f"{label}: crs_wkt differs"
+ )
+ assert ref.attrs.get("georef_status") == actual.attrs.get(
+ "georef_status"
+ ), (
+ f"{label}: georef_status differs "
+ f"ref={ref.attrs.get('georef_status')!r} "
+ f"actual={actual.attrs.get('georef_status')!r}"
+ )
+
+
+def _build_two_tile_float32_vrt(tmp_path: Path) -> tuple[Path, np.dtype]:
+ """Two 16x16 float32 tiles laid out side-by-side as a 16x32 mosaic."""
+ tile_h, tile_w = 16, 16
+ paths: list[str] = []
+ for c in range(2):
+ arr = np.full(
+ (tile_h, tile_w), float(c + 1) * 1000.0, dtype=np.float32
+ )
+ arr[0, 0] = -7.0 + c
+ arr[tile_h - 1, tile_w - 1] = 9000.0 + c
+ origin_x = float(c * tile_w)
+ da = xr.DataArray(
+ arr, dims=["y", "x"],
+ coords={
+ "y": np.arange(tile_h - 1, -1, -1, dtype=np.float64),
+ "x": np.arange(
+ origin_x, origin_x + tile_w, dtype=np.float64),
+ },
+ attrs={"crs": 4326},
+ )
+ tile_path = tmp_path / f"tile_2321_{c}.tif"
+ to_geotiff(da, str(tile_path), compression="none", tiled=False)
+ paths.append(str(tile_path))
+ vrt_path = tmp_path / "two_tile_2321_.vrt"
+ _write_vrt_internal(str(vrt_path), paths, relative=False)
+ return vrt_path, np.dtype("float32")
+
+
+def _build_sidecar_vrt(tmp_path: Path) -> tuple[Path, np.dtype]:
+ """VRT over a copy of the bundled sidecar TIFF + its ``.ovr`` partner."""
+ src = _sidecar_fixture_or_skip()
+ base = tmp_path / "sidecar_2321_.tif"
+ shutil.copy(src, base)
+ shutil.copy(str(src) + ".ovr", str(base) + ".ovr")
+ vrt_path = tmp_path / "sidecar_2321_.vrt"
+ _write_vrt_internal(str(vrt_path), [str(base)], relative=False)
+ return vrt_path, np.dtype("uint16")
+
+
+def _build_inline_overview_vrt(tmp_path: Path) -> tuple[Path, np.dtype]:
+ """VRT over a copy of the inline-overview fixture (no sidecar)."""
+ src = _inline_overview_fixture_or_skip()
+ base = tmp_path / "inline_2321_.tif"
+ shutil.copy(src, base)
+ vrt_path = tmp_path / "inline_2321_.vrt"
+ _write_vrt_internal(str(vrt_path), [str(base)], relative=False)
+ return vrt_path, np.dtype("uint16")
+
+
+@dataclass(frozen=True)
+class _BackendSpec:
+ backend_id: str
+ kwargs: dict[str, Any]
+
+
+_BACKENDS: tuple[_BackendSpec, ...] = (
+ _BackendSpec(backend_id="eager", kwargs={}),
+ _BackendSpec(backend_id="dask", kwargs={"chunks": (16, 16)}),
+)
+
+
+def _backend_params() -> list:
+ return [pytest.param(b, id=b.backend_id) for b in _BACKENDS]
+
+
+@dataclass(frozen=True)
+class _FixtureSpec:
+ fix_id: str
+ builder: Callable[[Path], tuple[Path, np.dtype]]
+ expected_dims: tuple[str, ...]
+ window: tuple[int, int, int, int] | None
+
+
+_FIXTURES: tuple[_FixtureSpec, ...] = (
+ _FixtureSpec(
+ fix_id="two-tile-float32-full",
+ builder=_build_two_tile_float32_vrt,
+ expected_dims=("y", "x"),
+ window=None,
+ ),
+ _FixtureSpec(
+ fix_id="two-tile-float32-window-spans-seam",
+ builder=_build_two_tile_float32_vrt,
+ expected_dims=("y", "x"),
+ window=(4, 8, 12, 24),
+ ),
+ _FixtureSpec(
+ fix_id="sidecar-uint16-full",
+ builder=_build_sidecar_vrt,
+ expected_dims=("y", "x"),
+ window=None,
+ ),
+ _FixtureSpec(
+ fix_id="sidecar-uint16-window",
+ builder=_build_sidecar_vrt,
+ expected_dims=("y", "x"),
+ window=(8, 8, 56, 56),
+ ),
+)
+
+
+def _fixture_params() -> list:
+ return [pytest.param(f, id=f.fix_id) for f in _FIXTURES]
+
+
+@pytest.fixture(scope="session")
+def _vrt_parity_dir(tmp_path_factory):
+ return tmp_path_factory.mktemp("vrt_parity_2321_")
+
+
+@pytest.fixture(scope="session")
+def _vrt_parity_cache() -> dict[str, tuple[Path, np.dtype]]:
+ """Session-scoped (path, dtype) cache shared across every cell.
+
+ A function-scoped cache would rebuild the same VRT per cell; on
+ Windows that surfaces as PermissionError when ``to_geotiff`` renames
+ over a file another cell still holds mapped.
+ """
+ return {}
+
+
+@pytest.fixture
+def vrt_fixture(_vrt_parity_dir, _vrt_parity_cache):
+ """Resolve a :class:`_FixtureSpec` to a (vrt_path, dtype) pair on disk."""
+ base = _vrt_parity_dir
+ cache = _vrt_parity_cache
+
+ def _resolve(spec: _FixtureSpec) -> tuple[Path, np.dtype]:
+ key = spec.builder.__name__
+ if key in cache:
+ return cache[key]
+ sub = base / key
+ sub.mkdir(exist_ok=True)
+ result = spec.builder(sub)
+ cache[key] = result
+ return result
+ return _resolve
+
+
+@pytest.mark.parametrize("spec", _fixture_params())
+@pytest.mark.parametrize("backend", _backend_params())
+def test_vrt_backend_parity(spec, backend, vrt_fixture):
+ """One cell per (fixture, backend). Asserts pixels + metadata parity."""
+ vrt_path, expected_dtype = vrt_fixture(spec)
+
+ open_kwargs: dict[str, Any] = {}
+ if spec.window is not None:
+ open_kwargs["window"] = spec.window
+
+ ref = open_geotiff(str(vrt_path), **open_kwargs)
+ actual = open_geotiff(str(vrt_path), **open_kwargs, **backend.kwargs)
+
+ label = (
+ f"fixture={spec.fix_id} backend={backend.backend_id} "
+ f"window={spec.window!r}"
+ )
+
+ ref_arr = _materialise(ref)
+ actual_arr = _materialise(actual)
+
+ assert ref_arr.dtype == expected_dtype, (
+ f"{label}: reference dtype {ref_arr.dtype} != "
+ f"expected {expected_dtype}"
+ )
+ assert actual_arr.dtype == expected_dtype, (
+ f"{label}: actual dtype {actual_arr.dtype} != "
+ f"expected {expected_dtype}"
+ )
+
+ _assert_pixels_equal(ref_arr, actual_arr, label=label)
+ _assert_metadata_parity(
+ ref, actual, label=label, expected_dims=spec.expected_dims,
+ )
+
+
+@pytest.mark.parametrize("backend", _backend_params())
+def test_sidecar_vrt_attrs_match_inline(backend, tmp_path):
+ """Sidecar-backed and inline-overview-backed VRTs report identical
+ georef attrs and pixels at the base level."""
+ side_sub = tmp_path / "sidecar"
+ inline_sub = tmp_path / "inline"
+ side_sub.mkdir()
+ inline_sub.mkdir()
+ side_vrt, side_dtype = _build_sidecar_vrt(side_sub)
+ inline_vrt, inline_dtype = _build_inline_overview_vrt(inline_sub)
+
+ assert side_dtype == inline_dtype, (
+ f"sidecar dtype {side_dtype} != inline dtype {inline_dtype}; "
+ f"the golden_corpus fixtures should share a base IFD"
+ )
+
+ side = open_geotiff(str(side_vrt), **backend.kwargs)
+ inline = open_geotiff(str(inline_vrt), **backend.kwargs)
+
+ label = f"sidecar-vs-inline backend={backend.backend_id}"
+
+ assert side.shape == inline.shape, (
+ f"{label}: shape differs side={side.shape} inline={inline.shape}"
+ )
+
+ _assert_pixels_equal(
+ _materialise(inline), _materialise(side), label=label,
+ )
+ _assert_metadata_parity(
+ inline, side, label=label, expected_dims=("y", "x"),
+ )
+
+
+def test_windowed_vrt_shifts_coords_and_transform_consistently(tmp_path):
+ """Eager and lazy windowed VRT reads agree on shape, coords, attrs,
+ and values."""
+ vrt_path, _ = _build_two_tile_float32_vrt(tmp_path)
+ window = (3, 5, 13, 27)
+
+ eager = open_geotiff(str(vrt_path), window=window)
+ lazy = open_geotiff(str(vrt_path), window=window, chunks=(5, 11))
+
+ assert eager.shape == (10, 22)
+ assert lazy.shape == (10, 22)
+
+ np.testing.assert_array_equal(eager["y"].values, lazy["y"].values)
+ np.testing.assert_array_equal(eager["x"].values, lazy["x"].values)
+ assert eager["y"].dtype == lazy["y"].dtype
+ assert eager["x"].dtype == lazy["x"].dtype
+
+ assert eager["y"].values[0] == 12.0
+ assert eager["x"].values[0] == 5.0
+
+ eager_t = eager.attrs.get("transform")
+ lazy_t = lazy.attrs.get("transform")
+ assert eager_t == lazy_t, (
+ f"transform differs eager={eager_t!r} lazy={lazy_t!r}"
+ )
+ assert eager_t[0] == 1.0 and eager_t[4] == -1.0, (
+ f"pixel size mismatch in windowed transform {eager_t!r}"
+ )
+
+ np.testing.assert_array_equal(eager.values, lazy.compute().values)
+
+ assert eager.attrs.get("crs") == lazy.attrs.get("crs")
+ assert eager.attrs.get("crs_wkt") == lazy.attrs.get("crs_wkt")
+ assert eager.attrs.get("georef_status") == lazy.attrs.get(
+ "georef_status"
+ )
+
+
+def test_sidecar_window_shifts_to_known_coords(tmp_path):
+ """The sidecar VRT read with ``window=(8, 8, 56, 56)`` lands on the
+ coords / transform an absolute calculation predicts."""
+ vrt_path, _ = _build_sidecar_vrt(tmp_path)
+ window = (8, 8, 56, 56)
+
+ eager = open_geotiff(str(vrt_path), window=window)
+
+ assert eager.shape == (48, 48)
+ t = eager.attrs.get("transform")
+ assert t is not None, "windowed sidecar VRT dropped attrs['transform']"
+ assert t[0] == pytest.approx(0.001)
+ assert t[4] == pytest.approx(-0.001)
+ assert t[2] == pytest.approx(-120.0 + 8 * 0.001)
+ assert t[5] == pytest.approx(45.0 + 8 * -0.001)
+
+
+def test_assert_metadata_parity_flags_transform_drift(tmp_path):
+ """A transform-only drift between two otherwise-identical DataArrays
+ fails the parity helper (locks the harness behaviour)."""
+ vrt_path, _ = _build_two_tile_float32_vrt(tmp_path)
+ da_ref = open_geotiff(str(vrt_path))
+ da_bad = da_ref.copy()
+ da_bad.attrs = dict(da_ref.attrs)
+ old_t = da_bad.attrs["transform"]
+ da_bad.attrs["transform"] = (
+ old_t[0], old_t[1], old_t[2] + 1.0,
+ old_t[3], old_t[4], old_t[5],
+ )
+ with pytest.raises(AssertionError, match="transform"):
+ _assert_metadata_parity(
+ da_ref, da_bad, label="harness-sanity",
+ expected_dims=("y", "x"),
+ )
+
+
+# ===========================================================================
+# VRT finalization-pipeline parity
+# (was test_vrt_finalization_parity_2162.py).
+# ===========================================================================
+#
+# The VRT eager and chunked paths route through the shared
+# ``_finalize_lazy_read_attrs`` helper. These tests pin parity for the
+# attrs the helper stamps against the non-VRT eager / dask readers.
+
+_NON_VRT_ONLY_KEYS = frozenset({
+ 'extra_tags',
+ 'image_description',
+ 'extra_samples',
+ 'gdal_metadata',
+ 'gdal_metadata_xml',
+ 'x_resolution',
+ 'y_resolution',
+ 'resolution_unit',
+ 'colormap',
+})
+
+_REPRESENTATION_KEYS = frozenset({'crs_wkt', 'transform'})
+
+
+def _shared_canonical_attrs(attrs: dict) -> dict:
+ """Return the helper-emitted attrs that should match across writers."""
+ return {
+ k: v for k, v in attrs.items()
+ if k not in _NON_VRT_ONLY_KEYS and k not in _REPRESENTATION_KEYS
+ }
+
+
+def _write_single_source_vrt(tiff_path, vrt_path, *, width, height,
+ dtype='Float32', nodata=None,
+ geo_transform='0.0, 1.0, 0.0, 0.0, 0.0, -1.0',
+ srs=None):
+ """Write a one-band VRT pointing at ``tiff_path``."""
+ nodata_xml = (
+ f" {nodata}\n" if nodata is not None
+ else ''
+ )
+ srs_xml = (
+ f' {srs}\n' if srs is not None
+ else ''
+ )
+ gt_xml = (
+ f' {geo_transform}\n'
+ if geo_transform is not None
+ else ''
+ )
+ vrt_xml = (
+ f'\n'
+ f'{gt_xml}'
+ f'{srs_xml}'
+ f' \n'
+ f'{nodata_xml}'
+ f' \n'
+ f' {tiff_path}\n'
+ f' 1\n'
+ f' \n'
+ f' \n'
+ f' \n'
+ f' \n'
+ f'\n'
+ )
+ with open(vrt_path, 'w') as f:
+ f.write(vrt_xml)
+
+
+_WGS84_WKT = (
+ 'GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,'
+ 'AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,'
+ 'AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,'
+ 'AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]]'
+)
+
+
+def _make_full_pair(tmp_path, name):
+ """Full georef: float coords + CRS."""
+ tiff = str(tmp_path / f'{name}_tiff.tif')
+ vrt = str(tmp_path / f'{name}.vrt')
+ da = xr.DataArray(
+ np.zeros((4, 4), dtype=np.float32),
+ coords={
+ 'y': np.array([200.0, 199.0, 198.0, 197.0]),
+ 'x': np.array([100.0, 101.0, 102.0, 103.0]),
+ },
+ dims=('y', 'x'),
+ attrs={'crs': 4326},
+ )
+ to_geotiff(da, tiff)
+ _write_single_source_vrt(
+ tiff, vrt, width=4, height=4,
+ geo_transform='100.0, 1.0, 0.0, 200.0, 0.0, -1.0',
+ srs=_WGS84_WKT,
+ )
+ return tiff, vrt
+
+
+def _make_transform_only_pair(tmp_path, name):
+ """Float coords, no CRS."""
+ tiff = str(tmp_path / f'{name}_tiff.tif')
+ vrt = str(tmp_path / f'{name}.vrt')
+ da = xr.DataArray(
+ np.zeros((4, 4), dtype=np.float32),
+ coords={
+ 'y': np.array([200.0, 199.0, 198.0, 197.0]),
+ 'x': np.array([100.0, 101.0, 102.0, 103.0]),
+ },
+ dims=('y', 'x'),
+ )
+ to_geotiff(da, tiff)
+ _write_single_source_vrt(
+ tiff, vrt, width=4, height=4,
+ geo_transform='100.0, 1.0, 0.0, 200.0, 0.0, -1.0',
+ srs=None,
+ )
+ return tiff, vrt
+
+
+def _make_crs_only_pair(tmp_path, name):
+ """No-georef marker + CRS."""
+ tiff = str(tmp_path / f'{name}_tiff.tif')
+ vrt = str(tmp_path / f'{name}.vrt')
+ da = xr.DataArray(
+ np.zeros((4, 4), dtype=np.float32),
+ coords={
+ 'y': np.arange(4, dtype=np.int64),
+ 'x': np.arange(4, dtype=np.int64),
+ },
+ dims=('y', 'x'),
+ attrs={_NO_GEOREF_KEY: True, 'crs': 4326},
+ )
+ to_geotiff(da, tiff)
+ _write_single_source_vrt(
+ tiff, vrt, width=4, height=4,
+ geo_transform=None,
+ srs=_WGS84_WKT,
+ )
+ return tiff, vrt
+
+
+def _make_none_pair(tmp_path, name):
+ """No CRS, no transform."""
+ tiff = str(tmp_path / f'{name}_tiff.tif')
+ vrt = str(tmp_path / f'{name}.vrt')
+ arr = np.zeros((4, 4), dtype=np.float32)
+ tifffile.imwrite(
+ tiff, arr, photometric='minisblack', planarconfig='contig',
+ metadata=None,
+ )
+ _write_single_source_vrt(
+ tiff, vrt, width=4, height=4,
+ geo_transform=None,
+ srs=None,
+ )
+ return tiff, vrt
+
+
+def _make_rotated_pair(tmp_path, name):
+ """Rotated VRT with ``allow_rotated=True``: lands at ``rotated_dropped``."""
+ tiff = str(tmp_path / f'{name}_tiff.tif')
+ vrt = str(tmp_path / f'{name}.vrt')
+ arr = np.arange(16, dtype=np.uint16).reshape(4, 4)
+ write(arr, tiff, compression='none', tiled=False)
+ _write_single_source_vrt(
+ tiff, vrt, width=4, height=4, dtype='UInt16',
+ geo_transform='0.0, 1.0, 0.5, 0.0, 0.5, -1.0',
+ srs=None,
+ )
+ return tiff, vrt
+
+
+def test_vrt_eager_full_matches_open_geotiff(tmp_path):
+ """A single-source VRT wrapping a ``full`` TIFF emits the same
+ canonical helper-stamped attrs as the underlying TIFF read."""
+ tiff, vrt = _make_full_pair(tmp_path, 'full_2180')
+ tiff_attrs = _shared_canonical_attrs(dict(open_geotiff(tiff).attrs))
+ vrt_attrs = _shared_canonical_attrs(dict(read_vrt(vrt).attrs))
+ assert tiff_attrs == vrt_attrs, (
+ f"TIFF/VRT attrs diverged:\n"
+ f" tiff only: {set(tiff_attrs) - set(vrt_attrs)}\n"
+ f" vrt only: {set(vrt_attrs) - set(tiff_attrs)}\n"
+ f" shared keys with different values: "
+ f"{[k for k in set(tiff_attrs) & set(vrt_attrs) if tiff_attrs[k] != vrt_attrs[k]]}"
+ )
+ full_tiff_attrs = dict(open_geotiff(tiff).attrs)
+ full_vrt_attrs = dict(read_vrt(vrt).attrs)
+ assert full_tiff_attrs['crs'] == full_vrt_attrs['crs'] == 4326
+ assert len(full_tiff_attrs['transform']) == 6
+ assert len(full_vrt_attrs['transform']) == 6
+
+
+def test_vrt_eager_transform_only_matches_open_geotiff(tmp_path):
+ tiff, vrt = _make_transform_only_pair(tmp_path, 'tonly_2180')
+ tiff_attrs = _shared_canonical_attrs(dict(open_geotiff(tiff).attrs))
+ vrt_attrs = _shared_canonical_attrs(dict(read_vrt(vrt).attrs))
+ assert tiff_attrs == vrt_attrs
+ assert tiff_attrs['georef_status'] == GEOREF_STATUS_TRANSFORM_ONLY
+
+
+def test_vrt_eager_crs_only_matches_open_geotiff(tmp_path):
+ tiff, vrt = _make_crs_only_pair(tmp_path, 'crsonly_2180')
+ tiff_attrs = _shared_canonical_attrs(dict(open_geotiff(tiff).attrs))
+ vrt_attrs = _shared_canonical_attrs(dict(read_vrt(vrt).attrs))
+ assert tiff_attrs == vrt_attrs
+ assert tiff_attrs['georef_status'] == GEOREF_STATUS_CRS_ONLY
+
+
+def test_vrt_eager_none_matches_open_geotiff(tmp_path):
+ tiff, vrt = _make_none_pair(tmp_path, 'none_2180')
+ tiff_attrs = _shared_canonical_attrs(dict(open_geotiff(tiff).attrs))
+ vrt_attrs = _shared_canonical_attrs(dict(read_vrt(vrt).attrs))
+ assert tiff_attrs == vrt_attrs
+ assert tiff_attrs['georef_status'] == GEOREF_STATUS_NONE
+
+
+def test_vrt_eager_rotated_dropped_matches_open_geotiff(tmp_path):
+ """The rotated branch is the VRT-specific path: a non-zero skew lands
+ in ``rotated_dropped`` and the helper drops crs / transform / crs_wkt
+ while emitting ``rotated_affine`` plus the no-georef marker."""
+ _, vrt = _make_rotated_pair(tmp_path, 'rot_2180')
+ attrs = dict(read_vrt(vrt, allow_rotated=True).attrs)
+ assert attrs['georef_status'] == GEOREF_STATUS_ROTATED_DROPPED
+ assert attrs.get(_NO_GEOREF_KEY) is True
+ assert 'rotated_affine' in attrs
+ assert attrs.get('crs') is None
+ assert attrs.get('crs_wkt') is None
+ assert 'transform' not in attrs
+
+
+def test_vrt_chunked_full_matches_dask(tmp_path):
+ tiff, vrt = _make_full_pair(tmp_path, 'full_chunked_2180')
+ tiff_attrs = _shared_canonical_attrs(
+ dict(read_geotiff_dask(tiff, chunks=2).attrs)
+ )
+ vrt_attrs = _shared_canonical_attrs(
+ dict(read_vrt(vrt, chunks=2).attrs)
+ )
+ assert tiff_attrs == vrt_attrs
+
+
+def test_vrt_chunked_transform_only_matches_dask(tmp_path):
+ tiff, vrt = _make_transform_only_pair(tmp_path, 'tonly_chunked_2180')
+ tiff_attrs = _shared_canonical_attrs(
+ dict(read_geotiff_dask(tiff, chunks=2).attrs)
+ )
+ vrt_attrs = _shared_canonical_attrs(
+ dict(read_vrt(vrt, chunks=2).attrs)
+ )
+ assert tiff_attrs == vrt_attrs
+
+
+def test_vrt_chunked_crs_only_matches_dask(tmp_path):
+ tiff, vrt = _make_crs_only_pair(tmp_path, 'crsonly_chunked_2180')
+ tiff_attrs = _shared_canonical_attrs(
+ dict(read_geotiff_dask(tiff, chunks=2).attrs)
+ )
+ vrt_attrs = _shared_canonical_attrs(
+ dict(read_vrt(vrt, chunks=2).attrs)
+ )
+ assert tiff_attrs == vrt_attrs
+
+
+def test_vrt_chunked_none_matches_dask(tmp_path):
+ tiff, vrt = _make_none_pair(tmp_path, 'none_chunked_2180')
+ tiff_attrs = _shared_canonical_attrs(
+ dict(read_geotiff_dask(tiff, chunks=2).attrs)
+ )
+ vrt_attrs = _shared_canonical_attrs(
+ dict(read_vrt(vrt, chunks=2).attrs)
+ )
+ assert tiff_attrs == vrt_attrs
+
+
+def test_vrt_chunked_rotated_dropped(tmp_path):
+ _, vrt = _make_rotated_pair(tmp_path, 'rot_chunked_2180')
+ attrs = dict(read_vrt(vrt, allow_rotated=True, chunks=2).attrs)
+ assert attrs['georef_status'] == GEOREF_STATUS_ROTATED_DROPPED
+ assert attrs.get(_NO_GEOREF_KEY) is True
+ assert 'rotated_affine' in attrs
+
+
+def _write_two_band_per_band_nodata_vrt(tmp_path):
+ band0 = np.array([[1, 2], [3, 65535]], dtype=np.uint16)
+ band1 = np.array([[7, 8], [9, 65000]], dtype=np.uint16)
+ p0 = str(tmp_path / 'vrt_band0_2180.tif')
+ p1 = str(tmp_path / 'vrt_band1_2180.tif')
+ write(band0, p0, nodata=65535, compression='none', tiled=False)
+ write(band1, p1, nodata=65000, compression='none', tiled=False)
+
+ vrt_path = str(tmp_path / 'two_band_per_band_nodata_2180.vrt')
+ vrt_xml = f"""
+ 0.0, 1.0, 0.0, 0.0, 0.0, -1.0
+
+ 65535
+
+ {p0}
+ 1
+
+
+
+
+
+ 65000
+
+ {p1}
+ 1
+
+
+
+
+"""
+ with open(vrt_path, 'w') as f:
+ f.write(vrt_xml)
+ return vrt_path
+
+
+def test_band_nodata_first_band_attrs(tmp_path):
+ """``band=1`` with ``band_nodata='first'`` surfaces band 1's sentinel
+ on attrs and masks against it."""
+ vrt_path = _write_two_band_per_band_nodata_vrt(tmp_path)
+ r = read_vrt(vrt_path, band=1, band_nodata='first')
+ assert r.attrs['nodata'] == 65000.0
+ assert r.attrs['masked_nodata'] is True
+ assert np.isnan(r.values[1, 1])
+ assert r.attrs.get('nodata_pixels_present') is True
+
+
+def test_band_nodata_chunked_first_band_attrs(tmp_path):
+ """The chunked path threads the same per-band sentinel onto attrs."""
+ vrt_path = _write_two_band_per_band_nodata_vrt(tmp_path)
+ r = read_vrt(vrt_path, band=1, band_nodata='first', chunks=2)
+ assert r.attrs['nodata'] == 65000.0
+ assert r.attrs['masked_nodata'] is True
+ assert 'nodata_pixels_present' not in r.attrs
+
+
+def _make_no_sentinel_vrt(tmp_path, name):
+ """A single-band float VRT with no ````."""
+ tiff = str(tmp_path / f'{name}_tiff.tif')
+ vrt = str(tmp_path / f'{name}.vrt')
+ arr = np.arange(16, dtype=np.float32).reshape(4, 4)
+ write(arr, tiff, compression='none', tiled=False)
+ _write_single_source_vrt(
+ tiff, vrt, width=4, height=4,
+ geo_transform='0.0, 1.0, 0.0, 0.0, 0.0, -1.0',
+ nodata=None,
+ )
+ return vrt
+
+
+def test_dtype_cast_no_sentinel_omits_attr_eager(tmp_path):
+ """Eager VRT with ``dtype=`` and no declared sentinel:
+ ``nodata_dtype_cast`` stays absent."""
+ vrt = _make_no_sentinel_vrt(tmp_path, 'no_sentinel_eager_2180')
+ r = read_vrt(vrt, dtype=np.float64)
+ assert r.dtype == np.float64
+ assert 'nodata' not in r.attrs
+ assert 'masked_nodata' not in r.attrs
+ assert 'nodata_dtype_cast' not in r.attrs
+
+
+def test_dtype_cast_no_sentinel_omits_attr_chunked(tmp_path):
+ """Chunked VRT with ``dtype=`` and no declared sentinel: same
+ ``nodata_dtype_cast`` pop as the eager branch."""
+ vrt = _make_no_sentinel_vrt(tmp_path, 'no_sentinel_chunked_2180')
+ r = read_vrt(vrt, dtype=np.float64, chunks=2)
+ assert r.dtype == np.float64
+ assert 'nodata' not in r.attrs
+ assert 'masked_nodata' not in r.attrs
+ assert 'nodata_dtype_cast' not in r.attrs
+
+
+def test_missing_sources_eager_surfaces_vrt_holes(tmp_path):
+ """The eager VRT path keeps populating ``attrs['vrt_holes']`` after
+ the finalization migration."""
+ tiff_path = str(tmp_path / 'present_2180.tif')
+ arr = np.arange(16, dtype=np.float32).reshape(4, 4)
+ write(arr, tiff_path, compression='none', tiled=False)
+
+ missing_path = str(tmp_path / 'missing_2180.tif') # never created
+ vrt_path = str(tmp_path / 'mosaic_2180.vrt')
+ vrt_xml = f"""
+ 0.0, 1.0, 0.0, 0.0, 0.0, -1.0
+
+
+ {tiff_path}
+ 1
+
+
+
+
+ {missing_path}
+ 1
+
+
+
+
+"""
+ with open(vrt_path, 'w') as f:
+ f.write(vrt_xml)
+ with warnings.catch_warnings():
+ warnings.simplefilter('ignore')
+ r = read_vrt(vrt_path, missing_sources='warn')
+ assert 'vrt_holes' in r.attrs
+ holes = r.attrs['vrt_holes']
+ assert isinstance(holes, list) and len(holes) >= 1
+ for hole in holes:
+ assert 'source' in hole
+ assert 'band' in hole
+ assert 'dst_rect' in hole
+ assert 'error' in hole
+
+
+def test_missing_sources_chunked_surfaces_vrt_holes(tmp_path):
+ """Chunked path's parse-time existence sweep still populates
+ ``attrs['vrt_holes']`` after the migration."""
+ tiff_path = str(tmp_path / 'present_chunked_2180.tif')
+ arr = np.arange(16, dtype=np.float32).reshape(4, 4)
+ write(arr, tiff_path, compression='none', tiled=False)
+
+ missing_path = str(tmp_path / 'missing_chunked_2180.tif')
+ vrt_path = str(tmp_path / 'mosaic_chunked_2180.vrt')
+ vrt_xml = f"""
+ 0.0, 1.0, 0.0, 0.0, 0.0, -1.0
+
+
+ {tiff_path}
+ 1
+
+
+
+
+ {missing_path}
+ 1
+
+
+
+
+"""
+ with open(vrt_path, 'w') as f:
+ f.write(vrt_xml)
+ r = read_vrt(vrt_path, missing_sources='warn', chunks=2)
+ assert 'vrt_holes' in r.attrs
+ holes = r.attrs['vrt_holes']
+ assert isinstance(holes, list) and len(holes) >= 1
+
+
+_STATUS_PAIRS = [
+ pytest.param(_make_full_pair, GEOREF_STATUS_FULL, False, id="full"),
+ pytest.param(
+ _make_transform_only_pair, GEOREF_STATUS_TRANSFORM_ONLY,
+ False, id="transform_only",
+ ),
+ pytest.param(
+ _make_crs_only_pair, GEOREF_STATUS_CRS_ONLY,
+ False, id="crs_only",
+ ),
+ pytest.param(_make_none_pair, GEOREF_STATUS_NONE, False, id="none"),
+ pytest.param(
+ _make_rotated_pair, GEOREF_STATUS_ROTATED_DROPPED, True,
+ id="rotated_dropped",
+ ),
+]
+
+
+@pytest.mark.parametrize("pair_factory,expected_status,allow_rotated",
+ _STATUS_PAIRS)
+def test_georef_status_eager_parity(tmp_path, pair_factory, expected_status,
+ allow_rotated):
+ """VRT eager and (where applicable) non-VRT eager agree on
+ ``georef_status``."""
+ tiff, vrt = pair_factory(tmp_path, f'georef_eager_{expected_status}')
+ kwargs = {'allow_rotated': True} if allow_rotated else {}
+ vrt_status = read_vrt(vrt, **kwargs).attrs.get('georef_status')
+ assert vrt_status == expected_status
+ if not allow_rotated:
+ tiff_status = open_geotiff(tiff, **kwargs).attrs.get('georef_status')
+ assert tiff_status == expected_status
+ assert vrt_status == tiff_status
+
+
+@pytest.mark.parametrize("pair_factory,expected_status,allow_rotated",
+ _STATUS_PAIRS)
+def test_georef_status_chunked_parity(tmp_path, pair_factory, expected_status,
+ allow_rotated):
+ """VRT chunked and non-VRT chunked agree on ``georef_status``."""
+ tiff, vrt = pair_factory(tmp_path, f'georef_chunked_{expected_status}')
+ kwargs = {'allow_rotated': True} if allow_rotated else {}
+ vrt_status = read_vrt(vrt, chunks=2, **kwargs).attrs.get('georef_status')
+ assert vrt_status == expected_status
+ if not allow_rotated:
+ tiff_status = read_geotiff_dask(
+ tiff, chunks=2, **kwargs
+ ).attrs.get('georef_status')
+ assert tiff_status == expected_status
+ assert vrt_status == tiff_status
+
+
+_VRT_FACTORIES = [
+ pytest.param(_make_full_pair, False, id="full"),
+ pytest.param(_make_transform_only_pair, False, id="transform_only"),
+ pytest.param(_make_crs_only_pair, False, id="crs_only"),
+ pytest.param(_make_none_pair, False, id="none"),
+ pytest.param(_make_rotated_pair, True, id="rotated_dropped"),
+]
+
+
+@pytest.mark.parametrize("pair_factory,allow_rotated", _VRT_FACTORIES)
+def test_vrt_eager_chunked_internal_parity(tmp_path, pair_factory,
+ allow_rotated):
+ """Eager and chunked VRT reads of the same fixture agree on the shared
+ canonical attrs (modulo the lazy ``nodata_pixels_present`` carve-out)."""
+ _, vrt = pair_factory(tmp_path, 'internal_parity_2180')
+ kwargs = {'allow_rotated': True} if allow_rotated else {}
+ eager_attrs = dict(read_vrt(vrt, **kwargs).attrs)
+ chunked_attrs = dict(read_vrt(vrt, chunks=2, **kwargs).attrs)
+ eager_attrs.pop('nodata_pixels_present', None)
+ chunked_attrs.pop('nodata_pixels_present', None)
+ assert eager_attrs == chunked_attrs
+
+
+# ===========================================================================
+# read_vrt backend / parameter coverage
+# (was test_vrt_backend_coverage_2026_05_11.py).
+# ===========================================================================
+#
+# Covers the GPU and dask+GPU decode paths the read_vrt body handles, the
+# ``dtype=`` / ``name=`` kwargs, and the open_geotiff file-like +
+# backend-kwarg rejection.
+
+
+@pytest.fixture
+def single_tile_vrt(tmp_path):
+ """A trivial single-tile float32 VRT plus its source array."""
+ arr = np.arange(16, dtype=np.float32).reshape(4, 4)
+ tile_path = str(tmp_path / 'tile.tif')
+ to_geotiff(arr, tile_path)
+ vrt_path = str(tmp_path / 'mosaic.vrt')
+ _write_vrt_internal(vrt_path, [tile_path])
+ return vrt_path, arr
+
+
+@_gpu_only
+class TestReadVrtGpuBackend:
+ """``read_vrt(gpu=True)`` returns a CuPy-backed DataArray."""
+
+ def test_read_vrt_gpu_returns_cupy(self, single_tile_vrt):
+ import cupy
+
+ vrt_path, arr = single_tile_vrt
+ da = read_vrt(vrt_path, gpu=True)
+ assert isinstance(da.data, cupy.ndarray), (
+ f"expected cupy.ndarray, got {type(da.data).__name__}"
+ )
+ np.testing.assert_array_equal(da.data.get(), arr)
+
+ def test_read_vrt_gpu_chunks_returns_dask_cupy(self, single_tile_vrt):
+ """``read_vrt(gpu=True, chunks=N)`` is the dask+cupy VRT entry
+ point; the trailing ``result.chunk(...)`` block wraps the cupy
+ backing without falling back to numpy."""
+ import cupy
+ import dask.array as da_mod
+
+ vrt_path, arr = single_tile_vrt
+ result = read_vrt(vrt_path, gpu=True, chunks=2)
+
+ assert isinstance(result.data, da_mod.Array), (
+ f"expected dask Array, got {type(result.data).__name__}"
+ )
+ assert isinstance(result.data._meta, cupy.ndarray), (
+ f"expected cupy._meta, got "
+ f"{type(result.data._meta).__module__}."
+ f"{type(result.data._meta).__name__}"
+ )
+ assert result.data.chunks == ((2, 2), (2, 2))
+
+ computed = result.compute()
+ assert isinstance(computed.data, cupy.ndarray)
+ np.testing.assert_array_equal(computed.data.get(), arr)
+
+ def test_open_geotiff_vrt_gpu_routes_through(self, single_tile_vrt):
+ """``open_geotiff('.vrt', gpu=True)`` dispatches to ``read_vrt``
+ and surfaces the cupy data unchanged."""
+ import cupy
+
+ vrt_path, arr = single_tile_vrt
+ da = open_geotiff(vrt_path, gpu=True)
+ assert isinstance(da.data, cupy.ndarray)
+ np.testing.assert_array_equal(da.data.get(), arr)
+
+ def test_open_geotiff_vrt_gpu_chunks(self, single_tile_vrt):
+ """``open_geotiff('.vrt', gpu=True, chunks=N)`` is the combined
+ dask+cupy entry point."""
+ import cupy
+ import dask.array as da_mod
+
+ vrt_path, arr = single_tile_vrt
+ result = open_geotiff(vrt_path, gpu=True, chunks=2)
+
+ assert isinstance(result.data, da_mod.Array)
+ assert isinstance(result.data._meta, cupy.ndarray)
+ assert result.data.chunks == ((2, 2), (2, 2))
+
+ computed = result.compute()
+ np.testing.assert_array_equal(computed.data.get(), arr)
+
+
+class TestReadVrtDtypeKwarg:
+ """``read_vrt(dtype=...)`` casts after decode and validates the cast."""
+
+ def test_safe_widening_cast(self, single_tile_vrt):
+ """float32 -> float64 is permitted; values survive bit-for-bit."""
+ vrt_path, arr = single_tile_vrt
+ da = read_vrt(vrt_path, dtype='float64')
+ assert da.dtype == np.float64
+ np.testing.assert_array_equal(da.values, arr.astype(np.float64))
+
+ def test_float_to_int_rejected(self, single_tile_vrt):
+ """Float-to-int is lossy and refused with a descriptive error."""
+ vrt_path, _ = single_tile_vrt
+ with pytest.raises(ValueError, match="Cannot cast float"):
+ read_vrt(vrt_path, dtype='int32')
+
+
+class TestReadVrtNameKwarg:
+ """``read_vrt(name='custom')`` overrides the file-stem derivation."""
+
+ def test_explicit_name_used(self, single_tile_vrt):
+ vrt_path, _ = single_tile_vrt
+ da = read_vrt(vrt_path, name='custom_name')
+ assert da.name == 'custom_name'
+
+ def test_default_name_from_stem(self, single_tile_vrt):
+ vrt_path, _ = single_tile_vrt
+ da = read_vrt(vrt_path)
+ assert da.name == os.path.splitext(os.path.basename(vrt_path))[0]
+
+
+class TestOpenGeotiffFileLikeKwargRejection:
+ """File-like sources reject ``gpu=True`` and ``chunks=N`` up front."""
+
+ @staticmethod
+ def _buf_with_tiff(tmp_path):
+ arr = np.zeros((4, 4), dtype=np.float32)
+ path = str(tmp_path / 'src.tif')
+ to_geotiff(arr, path)
+ with open(path, 'rb') as fh:
+ return io.BytesIO(fh.read())
+
+ def test_gpu_with_file_like_raises(self, tmp_path):
+ buf = self._buf_with_tiff(tmp_path)
+ with pytest.raises(ValueError, match="gpu=True is not supported"):
+ open_geotiff(buf, gpu=True)
+
+ def test_chunks_with_file_like_raises(self, tmp_path):
+ buf = self._buf_with_tiff(tmp_path)
+ with pytest.raises(ValueError, match="chunks=.*file-like"):
+ open_geotiff(buf, chunks=64)
+
+ def test_chunks_with_pathlib_path_still_works(self, tmp_path):
+ """pathlib.Path is not file-like and must keep working through the
+ dask path."""
+ arr = np.arange(16, dtype=np.float32).reshape(4, 4)
+ path = tmp_path / 'sample.tif'
+ to_geotiff(arr, str(path))
+
+ import dask.array as da_mod
+ result = open_geotiff(path, chunks=2)
+ assert isinstance(result.data, da_mod.Array)
+ np.testing.assert_array_equal(np.asarray(result.data), arr)
diff --git a/xrspatial/geotiff/tests/vrt/test_validation.py b/xrspatial/geotiff/tests/vrt/test_validation.py
index b81c6d0a9..f1852fb61 100644
--- a/xrspatial/geotiff/tests/vrt/test_validation.py
+++ b/xrspatial/geotiff/tests/vrt/test_validation.py
@@ -1570,3 +1570,300 @@ def test_empty_entries_ignored(self, tmp_path, monkeypatch):
monkeypatch.setenv('XRSPATIAL_VRT_ALLOWED_ROOTS', value)
arr, _ = _internal_read_vrt(vrt_path)
assert arr.shape == (4, 4)
+
+
+# ===========================================================================
+# VRT-tail validation folds (cluster 13, #2437)
+# ===========================================================================
+#
+# Three originally-standalone validation files folded here:
+#
+# * SrcRect negative-size / negative-offset rejection (was
+# ``test_geotiff_vrt_srcrect_validation_1784.py``).
+# * ``open_geotiff('.vrt')`` rejecting kwargs it silently dropped:
+# ``overview_level`` and ``on_gpu_failure`` (was
+# ``test_open_geotiff_vrt_kwarg_drop_1685.py``).
+# * ``to_geotiff(..., '.vrt')`` rejecting ``tiled=False`` and validating
+# ``tile_size`` up front instead of crashing in the writer (was
+# ``test_to_geotiff_vrt_tiled_validation_1862.py``).
+
+
+# ---------------------------------------------------------------------------
+# SrcRect negative-size / negative-offset rejection (#1784)
+# ---------------------------------------------------------------------------
+#
+# A malformed ```` (or negative offset) must surface
+# as a ``ValueError`` naming the offending field, in both lenient and
+# strict modes -- never get swallowed by the missing-source fallback.
+
+
+def _srcrect_write_source(td: str, name: str = 'src.tif') -> str:
+ """Write a 10x10 uint8 source GeoTIFF and return its path."""
+ src_path = os.path.join(td, name)
+ to_geotiff(np.zeros((10, 10), dtype=np.uint8), src_path,
+ compression='none')
+ return src_path
+
+
+def _srcrect_write_vrt(td: str, *,
+ src_x_off: int = 0, src_y_off: int = 0,
+ src_x_size: int = 10, src_y_size: int = 10,
+ src_filename: str = 'src.tif',
+ raster_x: int = 100, raster_y: int = 100) -> str:
+ """Write a VRT with a single SimpleSource using the given SrcRect."""
+ vrt_path = os.path.join(td, 'mosaic.vrt')
+ vrt_xml = (
+ f'\n'
+ f' \n'
+ f' \n'
+ f' {src_filename}'
+ f'\n'
+ f' 1\n'
+ f' \n'
+ f' \n'
+ f' \n'
+ f' \n'
+ f'\n'
+ )
+ with open(vrt_path, 'w') as f:
+ f.write(vrt_xml)
+ return vrt_path
+
+
+class TestSrcRectRejection:
+ """Malformed ```` geometry rejected before the lenient
+ missing-source fallback can swallow it."""
+
+ def test_negative_x_size_rejected(self, tmp_path):
+ td = str(tmp_path)
+ _srcrect_write_source(td)
+ vrt_path = _srcrect_write_vrt(td, src_x_size=-50)
+ with pytest.raises(ValueError, match=r"SrcRect.*negative size"):
+ _internal_read_vrt(vrt_path)
+
+ def test_negative_y_size_rejected(self, tmp_path):
+ td = str(tmp_path)
+ _srcrect_write_source(td)
+ vrt_path = _srcrect_write_vrt(td, src_y_size=-50)
+ with pytest.raises(ValueError, match=r"SrcRect.*negative size"):
+ _internal_read_vrt(vrt_path)
+
+ def test_negative_x_off_rejected(self, tmp_path):
+ td = str(tmp_path)
+ _srcrect_write_source(td)
+ vrt_path = _srcrect_write_vrt(td, src_x_off=-10)
+ with pytest.raises(ValueError, match=r"SrcRect.*negative offset"):
+ _internal_read_vrt(vrt_path)
+
+ def test_negative_y_off_rejected(self, tmp_path):
+ td = str(tmp_path)
+ _srcrect_write_source(td)
+ vrt_path = _srcrect_write_vrt(td, src_y_off=-10)
+ with pytest.raises(ValueError, match=r"SrcRect.*negative offset"):
+ _internal_read_vrt(vrt_path)
+
+ def test_message_names_bad_values(self, tmp_path):
+ """The error message names the malformed field and its value so
+ the caller can find the offending ```` in the VRT."""
+ td = str(tmp_path)
+ _srcrect_write_source(td)
+ vrt_path = _srcrect_write_vrt(td, src_x_size=-7, src_y_size=-3)
+ with pytest.raises(ValueError) as excinfo:
+ _internal_read_vrt(vrt_path)
+ msg = str(excinfo.value)
+ assert "SrcRect" in msg
+ assert "-7" in msg
+ assert "-3" in msg
+
+ def test_missing_source_still_takes_lenient_warning_path(self, tmp_path):
+ """A *valid* SrcRect with a missing source file still hits the
+ lenient warning path -- the SrcRect check must not swallow the
+ missing-file case. ``missing_sources='warn'`` opts into the
+ lenient branch since the default is now ``'raise'``."""
+ td = str(tmp_path)
+ # No source file written; SrcRect itself is well-formed.
+ vrt_path = _srcrect_write_vrt(td, src_filename='does_not_exist.tif')
+ with warnings.catch_warnings(record=True) as caught:
+ warnings.simplefilter('always')
+ arr, _ = _internal_read_vrt(vrt_path, missing_sources='warn')
+ fallback = [w for w in caught
+ if issubclass(w.category, GeoTIFFFallbackWarning)]
+ assert fallback, (
+ "expected a GeoTIFFFallbackWarning for the missing source"
+ )
+ assert arr.shape == (100, 100)
+
+ def test_valid_srcrect_reads_normally(self, tmp_path):
+ """A well-formed SrcRect with a real source succeeds -- no false
+ positives on valid VRTs."""
+ td = str(tmp_path)
+ _srcrect_write_source(td)
+ vrt_path = _srcrect_write_vrt(td, raster_x=10, raster_y=10)
+ arr, _ = _internal_read_vrt(vrt_path)
+ assert arr.shape == (10, 10)
+ assert np.all(arr == 0)
+
+ def test_negative_srcrect_raises_under_strict_mode(
+ self, tmp_path, monkeypatch,
+ ):
+ """The check runs before the lenient try/except, so strict mode
+ and lenient mode both raise."""
+ monkeypatch.setenv('XRSPATIAL_GEOTIFF_STRICT', '1')
+ td = str(tmp_path)
+ _srcrect_write_source(td)
+ vrt_path = _srcrect_write_vrt(td, src_x_size=-50)
+ with pytest.raises(ValueError, match=r"SrcRect.*negative size"):
+ _internal_read_vrt(vrt_path)
+
+
+# ---------------------------------------------------------------------------
+# open_geotiff('.vrt') kwarg-drop rejection (#1685)
+# ---------------------------------------------------------------------------
+#
+# ``open_geotiff`` documents ``overview_level`` and ``on_gpu_failure`` but
+# the VRT dispatch branch routes to ``read_vrt`` whose signature accepts
+# neither, so the kwargs were silently dropped. The fix refuses the
+# unsupported combinations up front.
+
+
+@pytest.fixture
+def _kwarg_drop_small_vrt(tmp_path):
+ """Two-tile uint16 VRT for the kwarg-drop rejection cases."""
+ arr_a = np.arange(16, dtype=np.uint16).reshape(4, 4)
+ da_a = xr.DataArray(
+ arr_a, dims=["y", "x"],
+ coords={
+ "y": np.array([0.5, 1.5, 2.5, 3.5]),
+ "x": np.array([0.5, 1.5, 2.5, 3.5]),
+ },
+ attrs={"crs": 4326},
+ )
+ tile_a = tmp_path / "tile_a.tif"
+ to_geotiff(da_a, str(tile_a))
+
+ arr_b = np.arange(16, 32, dtype=np.uint16).reshape(4, 4)
+ da_b = xr.DataArray(
+ arr_b, dims=["y", "x"],
+ coords={
+ "y": np.array([0.5, 1.5, 2.5, 3.5]),
+ "x": np.array([4.5, 5.5, 6.5, 7.5]),
+ },
+ attrs={"crs": 4326},
+ )
+ tile_b = tmp_path / "tile_b.tif"
+ to_geotiff(da_b, str(tile_b))
+
+ from xrspatial.geotiff import write_vrt
+ vrt_path = tmp_path / "mosaic.vrt"
+ write_vrt(str(vrt_path), [str(tile_a), str(tile_b)])
+ return str(vrt_path)
+
+
+class TestOpenGeotiffVrtKwargRejection:
+ """``open_geotiff('.vrt')`` rejects kwargs it used to silently drop."""
+
+ def test_rejects_overview_level(self, _kwarg_drop_small_vrt):
+ """``overview_level`` plus ``.vrt`` raises, not a silent drop."""
+ with pytest.raises(
+ ValueError, match="overview_level is not supported",
+ ):
+ open_geotiff(_kwarg_drop_small_vrt, overview_level=1)
+
+ def test_accepts_overview_level_zero(self, _kwarg_drop_small_vrt):
+ """``overview_level=0`` is full resolution (the default), so it is
+ equivalent to omitting the kwarg and must not raise."""
+ da = open_geotiff(_kwarg_drop_small_vrt, overview_level=0)
+ assert da.shape == (4, 8)
+
+ def test_rejects_on_gpu_failure_with_gpu_true(self, _kwarg_drop_small_vrt):
+ """``on_gpu_failure='strict'`` plus ``.vrt`` (gpu=True) is refused.
+
+ The check fires before any GPU code runs; no CUDA needed."""
+ with pytest.raises(
+ ValueError, match="on_gpu_failure is not supported",
+ ):
+ open_geotiff(
+ _kwarg_drop_small_vrt, gpu=True, on_gpu_failure="strict",
+ )
+
+ def test_without_unsupported_kwargs_still_works(self, _kwarg_drop_small_vrt):
+ """The previously-accepted kwargs still flow through to
+ ``read_vrt``."""
+ da = open_geotiff(_kwarg_drop_small_vrt)
+ assert da.shape == (4, 8)
+
+ def test_with_window_still_works(self, _kwarg_drop_small_vrt):
+ """``window`` was already forwarded; the fix must not break it."""
+ da = open_geotiff(_kwarg_drop_small_vrt, window=(0, 1, 4, 5))
+ assert da.shape == (4, 4)
+
+ def test_non_vrt_still_accepts_overview_level(self, tmp_path):
+ """The fix is VRT-specific; ``.tif`` sources keep accepting
+ ``overview_level``."""
+ arr = np.arange(64, dtype=np.uint16).reshape(8, 8)
+ da = xr.DataArray(
+ arr, dims=["y", "x"],
+ coords={
+ "y": np.arange(8, dtype=np.float64),
+ "x": np.arange(8, dtype=np.float64),
+ },
+ attrs={"crs": 4326},
+ )
+ tif_path = tmp_path / "with_ovr.tif"
+ to_geotiff(
+ da, str(tif_path), cog=True, tile_size=16, overview_levels=[2],
+ )
+ open_geotiff(str(tif_path), overview_level=0)
+ open_geotiff(str(tif_path), overview_level=1)
+
+
+# ---------------------------------------------------------------------------
+# to_geotiff('.vrt') tiled / tile_size validation (#1862)
+# ---------------------------------------------------------------------------
+#
+# ``to_geotiff(..., '.vrt', tiled=False)`` used to warn then crash with
+# ``ZeroDivisionError`` inside the always-tiling VRT writer. The fix
+# refuses ``tiled=False`` for ``.vrt`` and validates ``tile_size``
+# unconditionally so callers get a clear ``ValueError`` up front.
+
+
+def _tiled_validation_make_da(shape=(64, 64)):
+ arr = np.arange(np.prod(shape), dtype=np.float32).reshape(shape)
+ return xr.DataArray(arr, dims=['y', 'x'])
+
+
+class TestVrtTiledValidation:
+ """VRT writer rejects ``tiled=False`` and bad ``tile_size`` up front."""
+
+ def test_rejects_tiled_false(self, tmp_path):
+ """``tiled=False`` is not a valid request for VRT output."""
+ da = _tiled_validation_make_da()
+ out = os.path.join(str(tmp_path), 'vrt_tiled_false.vrt')
+ with pytest.raises(ValueError, match='tiled=False is not compatible'):
+ to_geotiff(da, out, tiled=False)
+
+ def test_tiled_false_zero_tile_size_raises_value_error(self, tmp_path):
+ """``tiled=False`` plus ``tile_size=0`` raises ``ValueError``, not
+ the previous ``ZeroDivisionError`` from inside the writer."""
+ da = _tiled_validation_make_da()
+ out = os.path.join(str(tmp_path), 'vrt_tiled_false_zero.vrt')
+ with pytest.raises(ValueError) as exc:
+ to_geotiff(da, out, tiled=False, tile_size=0)
+ assert not isinstance(exc.value, ZeroDivisionError)
+
+ def test_zero_tile_size_default_tiled_raises_value_error(self, tmp_path):
+ """With the default ``tiled=True``, ``tile_size=0`` surfaces from
+ the shared ``_validate_tile_size`` check, not a deep
+ ``ZeroDivisionError``."""
+ da = _tiled_validation_make_da()
+ out = os.path.join(str(tmp_path), 'vrt_default_tiled_zero.vrt')
+ with pytest.raises(ValueError, match='tile_size'):
+ to_geotiff(da, out, tile_size=0)
+
+ def test_default_args_still_succeeds(self, tmp_path):
+ """The default-args VRT write path is unaffected by the fix."""
+ da = _tiled_validation_make_da()
+ out = os.path.join(str(tmp_path), 'vrt_default.vrt')
+ to_geotiff(da, out)
+ assert os.path.exists(out)
diff --git a/xrspatial/geotiff/tests/vrt/test_window.py b/xrspatial/geotiff/tests/vrt/test_window.py
index d5975d86e..78939bc5f 100644
--- a/xrspatial/geotiff/tests/vrt/test_window.py
+++ b/xrspatial/geotiff/tests/vrt/test_window.py
@@ -29,6 +29,7 @@
import pytest
import tempfile
import uuid
+import warnings
import xarray as xr
from pathlib import Path
from unittest import mock
@@ -1148,3 +1149,132 @@ def _write_and_collect(vrt_path: str) -> dict[str, bytes]:
assert set(tiles1) == set(tiles2), f'Tile file set differs between runs: {set(tiles1) ^ set(tiles2)}'
for name, blob1 in tiles1.items():
assert blob1 == tiles2[name], f'Tile {name} differs between runs (race condition?)'
+
+
+# ---------------------------------------------------------------------------
+# VRT-tail window / chunking folds (cluster 13, #2437)
+# ---------------------------------------------------------------------------
+#
+# Two originally-standalone files folded here, both exercising the
+# windowed / chunked read paths this module already covers:
+#
+# * read_vrt(chunks=...) lazy-window construction (#1798): chunk layout
+# matches eager values, build does not decode sources, and an
+# excessive task count is rejected.
+# * read_geotiff_dask('.vrt') kwarg forwarding (#1795): the direct dask
+# entry point forwards window / band / max_pixels through to read_vrt.
+
+
+def _vrttail_write_single_band_vrt(vrt_path, source_name):
+ """One-band 6x4 Float32 VRT wrapping ``source_name`` (relative)."""
+ vrt_path.write_text(
+ '\n'
+ ' \n'
+ ' \n'
+ f' {source_name}'
+ '\n'
+ ' 1\n'
+ ' \n'
+ ' \n'
+ ' \n'
+ ' \n'
+ '\n'
+ )
+
+
+def _vrttail_write_multi_band_vrt(vrt_path, source_name, *, bands):
+ """``bands``-band 6x4 Float32 VRT wrapping ``source_name`` (relative)."""
+ band_xml = []
+ for i in range(bands):
+ band_xml.append(
+ f' \n'
+ ' \n'
+ f' {source_name}'
+ '\n'
+ f' {i + 1}\n'
+ ' \n'
+ ' \n'
+ ' \n'
+ ' \n'
+ )
+ vrt_path.write_text(
+ '\n'
+ + ''.join(band_xml)
+ + '\n'
+ )
+
+
+class TestVrtTailLazyChunks:
+ """read_vrt(chunks=...) builds lazy window tasks (#1798)."""
+
+ def test_chunks_matches_eager_values(self, tmp_path):
+ arr = np.arange(24, dtype=np.float32).reshape(4, 6)
+ src = tmp_path / "tmp_1798_source.tif"
+ to_geotiff(arr, str(src), compression='none')
+ vrt = tmp_path / "tmp_1798_source.vrt"
+ _vrttail_write_single_band_vrt(vrt, os.path.basename(src))
+
+ eager = read_vrt(str(vrt))
+ lazy = read_vrt(str(vrt), chunks=2)
+
+ assert lazy.data.chunks == ((2, 2), (2, 2, 2))
+ np.testing.assert_array_equal(lazy.compute().values, eager.values)
+
+ def test_chunks_does_not_read_sources_during_construction(self, tmp_path):
+ """The chunked path runs a cheap ``os.path.exists`` sweep at build
+ but must not open or decode any source file.
+
+ Pairing the missing source with ``missing_sources='warn'`` lets
+ the build succeed; the assertion is that no decode-time warnings
+ (which would only fire if a source were actually read) leak out
+ during construction.
+ """
+ vrt = tmp_path / "tmp_1798_missing_source.vrt"
+ _vrttail_write_single_band_vrt(vrt, "missing.tif")
+
+ with warnings.catch_warnings(record=True) as caught:
+ lazy = read_vrt(str(vrt), chunks=2, missing_sources="warn")
+
+ assert caught == []
+ assert hasattr(lazy.data, 'compute')
+
+ def test_chunks_rejects_excessive_task_count(self, tmp_path):
+ vrt = tmp_path / "tmp_1798_huge_extent.vrt"
+ vrt.write_text(
+ '\n'
+ ' \n'
+ '\n'
+ )
+ with pytest.raises(ValueError, match="task cap"):
+ read_vrt(str(vrt), chunks=1, max_pixels=20_000_000_000)
+
+
+class TestVrtTailDirectDaskKwargs:
+ """read_geotiff_dask('.vrt') forwards VRT kwargs (#1795)."""
+
+ def test_forwards_window_and_band(self, tmp_path):
+ from xrspatial.geotiff import read_geotiff_dask
+
+ arr = np.arange(4 * 6 * 2, dtype=np.float32).reshape(4, 6, 2)
+ src = tmp_path / "tmp_1797_source.tif"
+ to_geotiff(arr, str(src), compression='none')
+ vrt = tmp_path / "tmp_1797_source.vrt"
+ _vrttail_write_multi_band_vrt(vrt, os.path.basename(src), bands=2)
+
+ got = read_geotiff_dask(
+ str(vrt), chunks=2, window=(1, 2, 4, 6), band=1,
+ )
+ assert got.shape == (3, 4)
+ np.testing.assert_array_equal(got.values, arr[1:4, 2:6, 1])
+
+ def test_forwards_max_pixels(self, tmp_path):
+ from xrspatial.geotiff import read_geotiff_dask
+
+ arr = np.arange(24, dtype=np.float32).reshape(4, 6)
+ src = tmp_path / "tmp_1797_source_cap.tif"
+ to_geotiff(arr, str(src), compression='none')
+ vrt = tmp_path / "tmp_1797_source_cap.vrt"
+ _vrttail_write_single_band_vrt(vrt, os.path.basename(src))
+
+ with pytest.raises(ValueError, match="exceed"):
+ read_geotiff_dask(str(vrt), chunks=2, max_pixels=10)