Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 98 additions & 1 deletion xrspatial/geotiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,9 +371,85 @@ def _read_geo_info(source, *, overview_level: int | None = None,
close_sidecar(sidecar)


def _bbox_to_window(source, bbox, *, overview_level=None,
allow_rotated=False, allow_invalid_nodata=False):
"""Resolve a geographic ``bbox`` to a pixel ``window`` for the source.

``bbox`` is ``(x_min, y_min, x_max, y_max)`` in the source's CRS.
The returned tuple is ``(row_start, col_start, row_stop, col_stop)``
clamped to the file's extent and ready to forward as the existing
``window=`` kwarg through the backend dispatch.

Uses ``_read_geo_info`` which already supports local files,
BytesIO, HTTP, and fsspec URIs via header-only reads, so this is
an O(1)-memory metadata pass rather than a full decode.

Raises ``ValueError`` if ``bbox`` is malformed, the source is not
georeferenced, or the transform is rotated. Rotated-affine files
are rejected because ``_extent_to_window`` assumes an
axis-aligned grid; the caller can pass ``allow_rotated=True`` to
drop the rotation upstream and then re-call with ``bbox=``.
"""
if (not isinstance(bbox, (tuple, list)) or len(bbox) != 4):
raise ValueError(
"open_geotiff: bbox must be a 4-tuple "
"(x_min, y_min, x_max, y_max), "
f"got {bbox!r}.")
x_min, y_min, x_max, y_max = bbox
# ``NaN >= NaN`` is False, so NaN coordinates would slip past the
# ordering check below and only surface later as an unhelpful
# integer-cast error inside ``_extent_to_window``. Reject upfront.
if not all(np.isfinite(v) for v in (x_min, y_min, x_max, y_max)):
raise ValueError(
f"open_geotiff: bbox must contain finite coordinates, "
f"got bbox={bbox!r}.")
if x_min >= x_max or y_min >= y_max:
raise ValueError(
f"open_geotiff: bbox has non-positive size "
f"(x_min={x_min}, y_min={y_min}, x_max={x_max}, "
f"y_max={y_max}). Expected x_min < x_max and y_min < y_max.")

geo_info, height, width, _dtype, _nbands = _read_geo_info(
source, overview_level=overview_level,
allow_rotated=allow_rotated,
allow_invalid_nodata=allow_invalid_nodata)

# ``allow_rotated=True`` clears a rotated affine and stashes the
# original 6-tuple on ``transform.rotated_affine`` while setting
# ``has_georef=False`` (the dropped-rotation marker). Check
# ``rotated_affine`` first so this case gets the more specific
# message that names the recovery path; the plain-no-georef
# error then covers everything else.
if (geo_info.transform is not None
and geo_info.transform.rotated_affine is not None):
raise ValueError(
"open_geotiff: bbox= requires an axis-aligned transform, "
"but this file has a rotated affine. The rotation cannot "
"be expressed as a 4-tuple bbox in the file's CRS; pass "
"window= for pixel-space windowing instead.")
if not geo_info.has_georef:
raise ValueError(
"open_geotiff: bbox= requires a georeferenced source, "
"but this file has no GeoTIFF tags. Pass window= instead "
"for pixel-space windowing.")

from ._attrs import _extent_to_window
pixel_window = _extent_to_window(
geo_info.transform, height, width,
y_min, y_max, x_min, x_max)
row_start, col_start, row_stop, col_stop = pixel_window
if row_start >= row_stop or col_start >= col_stop:
raise ValueError(
f"open_geotiff: bbox={bbox!r} does not overlap the file's "
f"extent (height={height}, width={width}). Resolved pixel "
f"window={pixel_window} has non-positive size.")
return pixel_window


def open_geotiff(source: str | BinaryIO, *,
dtype: str | np.dtype | None = None,
window: tuple | None = None,
bbox: tuple | None = None,
overview_level: int | None = None,
band: int | None = None,
name: str | None = None,
Expand Down Expand Up @@ -466,7 +542,14 @@ def open_geotiff(source: str | BinaryIO, *,
ValueError to prevent accidental data loss.
window : tuple or None
[stable] ``(row_start, col_start, row_stop, col_stop)`` for
windowed reading.
windowed reading. Mutually exclusive with ``bbox=``.
bbox : tuple or None
[stable] ``(x_min, y_min, x_max, y_max)`` in the file's CRS.
Resolved to a pixel ``window=`` via a header-only metadata read
and clamped to the file's extent. Requires the source to be
georeferenced with an axis-aligned transform; rotated affines
require ``allow_rotated=True`` to clear the rotation first.
Mutually exclusive with ``window=``.
overview_level : int or None
[advanced] Overview level (0 = full resolution). Must be a
non-negative int or ``None``; passing ``bool`` or any other
Expand Down Expand Up @@ -722,6 +805,20 @@ def open_geotiff(source: str | BinaryIO, *,
max_cloud_bytes=max_cloud_bytes,
)

# Resolve ``bbox=`` to a pixel ``window=`` via a header-only
# metadata read. Done at the dispatcher so every backend
# (eager / dask / GPU / VRT) sees a uniform pixel window without
# learning a new kwarg.
if bbox is not None:
if window is not None:
raise ValueError(
"open_geotiff: window= and bbox= are mutually exclusive; "
"pass one or the other.")
window = _bbox_to_window(
source, bbox, overview_level=overview_level,
allow_rotated=allow_rotated,
allow_invalid_nodata=allow_invalid_nodata)

missing_sources_passed = (
missing_sources is not _MISSING_SOURCES_SENTINEL)
_is_vrt_source = (
Expand Down
11 changes: 7 additions & 4 deletions xrspatial/geotiff/_layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,17 +77,20 @@ def _suggest_chunk_side(max_pixels, samples):
def _recovery_hint(max_pixels, samples):
"""Build the multi-line recovery hint appended to PixelSafetyLimitError.

Always suggests ``window=`` for reading a sub-region. Suggests
``chunks=`` when dask is installed; otherwise recommends installing
dask so chunked reads become available. ``importlib.util.find_spec``
avoids importing dask just to format an error.
Always suggests ``window=`` and ``bbox=`` for reading a sub-region.
Suggests ``chunks=`` when dask is installed; otherwise recommends
installing dask so chunked reads become available.
``importlib.util.find_spec`` avoids importing dask just to format
an error.
"""
chunk = _suggest_chunk_side(max_pixels, samples)
lines = [
"To read this file, try one of:",
" * Pass a larger max_pixels= if the allocation is acceptable.",
f" * Read a sub-region with window=(r0, c0, r1, c1), "
f"e.g. window=(0, 0, {chunk}, {chunk}).",
" * Read a geographic sub-region with "
"bbox=(x_min, y_min, x_max, y_max) in the file's CRS.",
]
if importlib.util.find_spec('dask') is not None:
lines.append(
Expand Down
220 changes: 220 additions & 0 deletions xrspatial/geotiff/tests/read/test_bbox_2555.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
"""Tests for the ``bbox=`` parameter on ``open_geotiff`` (#2555).

``bbox=(x_min, y_min, x_max, y_max)`` is resolved to a pixel
``window=`` via a header-only metadata read and forwarded to the
existing backend dispatch. The tests cover the round-trip against
``window=``, mutual exclusion, malformed inputs, clamping to file
extent, georef requirement, and rotated-affine rejection.
"""
from __future__ import annotations

import os

import numpy as np
import pytest
import xarray as xr

from xrspatial.geotiff import open_geotiff, to_geotiff


@pytest.fixture
def georef_raster_2555(tmp_path):
"""Build a 100x100 EPSG:4326 raster spanning -122.5..-122.0 lon,
37.3..37.8 lat (north-up). Returns the file path."""
data = np.arange(100 * 100, dtype=np.float32).reshape(100, 100)
xs = np.linspace(-122.5, -122.0, 100)
ys = np.linspace(37.8, 37.3, 100) # decreasing -> north-up
arr = xr.DataArray(
data,
dims=('y', 'x'),
coords={'y': ys, 'x': xs},
attrs={'crs': 'EPSG:4326'},
)
path = os.path.join(tmp_path, 'src_2555.tif')
to_geotiff(arr, path)
return path


class TestBboxResolution:
def test_full_bbox_matches_full_read(self, georef_raster_2555):
"""A bbox covering the file's extent matches a no-window read."""
full = open_geotiff(georef_raster_2555)
# Pad the bbox slightly outside the extent; _extent_to_window
# clamps to file bounds so the result must still be the full
# raster.
bb = (-122.6, 37.2, -121.9, 37.9)
sub = open_geotiff(georef_raster_2555, bbox=bb)
assert sub.shape == full.shape
np.testing.assert_array_equal(sub.values, full.values)

def test_bbox_matches_equivalent_pixel_window(self, georef_raster_2555):
"""bbox= and the pixel window it resolves to must produce the
same array. Resolved via the internal helper so the test
doesn't have to redo the affine math. Asserts both shape and
pixel values so a row/col swap in the resolver would surface."""
from xrspatial.geotiff import _bbox_to_window
bb = (-122.4, 37.4, -122.1, 37.7)
pixel_window = _bbox_to_window(georef_raster_2555, bb)
a = open_geotiff(georef_raster_2555, bbox=bb)
b = open_geotiff(georef_raster_2555, window=pixel_window)
assert a.shape == b.shape
np.testing.assert_array_equal(a.values, b.values)
# Pixel-value parity against the same slice of the full read.
full = open_geotiff(georef_raster_2555)
r0, c0, r1, c1 = pixel_window
np.testing.assert_array_equal(a.values, full.values[r0:r1, c0:c1])

def test_bbox_clamps_to_file_extent(self, georef_raster_2555):
"""An out-of-extent bbox clamps rather than erroring, as long
as it overlaps the file at all."""
full = open_geotiff(georef_raster_2555)
# bbox extends west / south past the file; only the eastern /
# northern portion intersects.
bb = (-123.0, 36.5, -122.25, 37.65)
sub = open_geotiff(georef_raster_2555, bbox=bb)
# Result must be a strict sub-region (smaller than full) but
# non-empty.
assert 0 < sub.shape[0] < full.shape[0]
assert 0 < sub.shape[1] < full.shape[1]


class TestBboxValidation:
def test_bbox_and_window_are_mutually_exclusive(self, georef_raster_2555):
with pytest.raises(ValueError, match="mutually exclusive"):
open_geotiff(
georef_raster_2555,
window=(0, 0, 10, 10),
bbox=(-122.4, 37.4, -122.1, 37.7),
)

def test_bbox_wrong_arity_rejected(self, georef_raster_2555):
with pytest.raises(ValueError, match="4-tuple"):
open_geotiff(georef_raster_2555, bbox=(-122.4, 37.4, -122.1))

def test_bbox_not_a_sequence_rejected(self, georef_raster_2555):
with pytest.raises(ValueError, match="4-tuple"):
open_geotiff(georef_raster_2555, bbox="not-a-bbox")

def test_bbox_inverted_x_rejected(self, georef_raster_2555):
with pytest.raises(ValueError, match="non-positive size"):
open_geotiff(
georef_raster_2555,
bbox=(-122.1, 37.4, -122.4, 37.7), # x_min > x_max
)

def test_bbox_inverted_y_rejected(self, georef_raster_2555):
with pytest.raises(ValueError, match="non-positive size"):
open_geotiff(
georef_raster_2555,
bbox=(-122.4, 37.7, -122.1, 37.4), # y_min > y_max
)

def test_bbox_outside_extent_rejected(self, georef_raster_2555):
"""A bbox that does not overlap the file's extent at all
raises rather than returning an empty array."""
with pytest.raises(ValueError, match="does not overlap"):
open_geotiff(
georef_raster_2555,
bbox=(10.0, 10.0, 20.0, 20.0), # eastern hemisphere
)

def test_bbox_requires_georef(self, tmp_path):
"""A file with no GeoTIFF tags is rejected with a clear error
rather than silently producing nonsense pixel windows."""
from xrspatial.geotiff.tests.conftest import make_minimal_tiff
data = make_minimal_tiff(10, 10, np.dtype('float32'))
path = str(tmp_path / 'plain_2555.tif')
with open(path, 'wb') as f:
f.write(data)
with pytest.raises(ValueError, match="has no GeoTIFF tags"):
open_geotiff(path, bbox=(-1.0, -1.0, 1.0, 1.0))

def test_bbox_rejects_nan(self, georef_raster_2555):
"""NaN coordinates pass the ordering check (NaN >= NaN is
False), so reject them upfront with a clear error rather than
surfacing the downstream integer-cast failure."""
import math
for bad in (
(math.nan, 37.4, -122.1, 37.7),
(-122.4, math.nan, -122.1, 37.7),
(-122.4, 37.4, math.nan, 37.7),
(-122.4, 37.4, -122.1, math.nan),
):
with pytest.raises(ValueError, match="finite coordinates"):
open_geotiff(georef_raster_2555, bbox=bad)

def test_bbox_rejects_inf(self, georef_raster_2555):
"""Infinite coordinates are equally meaningless."""
with pytest.raises(ValueError, match="finite coordinates"):
open_geotiff(
georef_raster_2555,
bbox=(float('-inf'), 37.4, -122.1, 37.7),
)

def test_bbox_rejects_rotated_affine(self, tmp_path):
"""A file with a rotated affine is rejected with guidance to
pass allow_rotated=True (which clears the rotation) and re-call."""
from xrspatial.geotiff.tests.read.test_crs import _write_rotated_tiff
arr = np.arange(20, dtype='<u2').reshape(4, 5)
path = str(tmp_path / 'rotated_2555.tif')
_write_rotated_tiff(path, arr)
# ``allow_rotated=True`` is required to even open the file;
# without it the file is rejected upstream. With it, the
# transform's rotated_affine is set and bbox= still refuses.
with pytest.raises(ValueError, match="rotated affine"):
open_geotiff(
path,
bbox=(0.0, 0.0, 10.0, 10.0),
allow_rotated=True,
)


class TestBboxOverviewLevel:
def test_bbox_with_overview_level_uses_overview_transform(self, tmp_path):
"""``overview_level=`` selects a downsampled IFD whose transform
has different pixel sizes. ``bbox=`` must resolve against the
overview's transform, not the base IFD's, so the resulting pixel
window matches the overview's dimensions."""
# Build a 256x256 COG with 2x and 4x reductions. Coarser pixels
# at deeper overview levels give the bbox a smaller pixel
# window for the same geographic extent.
base = np.arange(256 * 256, dtype=np.float32).reshape(256, 256)
xs = np.linspace(-122.5, -122.0, 256)
ys = np.linspace(37.8, 37.3, 256)
arr = xr.DataArray(
base, dims=('y', 'x'),
coords={'y': ys, 'x': xs},
attrs={'crs': 'EPSG:4326'},
)
path = str(tmp_path / 'overview_2555.tif')
to_geotiff(arr, path, cog=True, overview_levels=[2, 4])

bb = (-122.4, 37.5, -122.1, 37.7)
# The overview-level read should succeed and return a sub-region
# bounded by the overview's smaller pixel grid, not the base
# IFD's 256x256 grid.
sub_base = open_geotiff(path, bbox=bb, overview_level=0)
sub_ov = open_geotiff(path, bbox=bb, overview_level=1)
sub_ov2 = open_geotiff(path, bbox=bb, overview_level=2)
# Each successive overview level halves the resolution, so the
# bbox slice should shrink monotonically with level.
assert sub_base.shape[0] >= sub_ov.shape[0] >= sub_ov2.shape[0]
assert sub_base.shape[1] >= sub_ov.shape[1] >= sub_ov2.shape[1]
# All slices must be non-empty.
assert sub_ov2.shape[0] > 0 and sub_ov2.shape[1] > 0


class TestSafetyHintMentionsBbox:
def test_hint_includes_bbox_line(self):
"""The PixelSafetyLimitError recovery hint must surface the
new bbox= option alongside window= and chunks=."""
from xrspatial.geotiff._layout import (MAX_PIXELS_DEFAULT, PixelSafetyLimitError,
_check_dimensions)
with pytest.raises(PixelSafetyLimitError) as exc:
_check_dimensions(50_000, 50_000, 1, MAX_PIXELS_DEFAULT)
msg = str(exc.value)
assert "bbox=(x_min, y_min, x_max, y_max)" in msg
assert "in the file's CRS" in msg
# Previous options must still be present.
assert "window=(r0, c0, r1, c1)" in msg
assert "chunks=" in msg
4 changes: 4 additions & 0 deletions xrspatial/geotiff/tests/unit/test_signatures.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,10 @@ def test_write_geotiff_gpu_streaming_buffer_bytes_runtime_noop(tmp_path):
_CANONICAL_ORDER = (
"dtype",
"window",
# Geographic-space window. Sits immediately after ``window`` so the
# two sub-region kwargs stay grouped; mutual exclusion is enforced
# in :func:`open_geotiff`.
"bbox",
"overview_level",
"band",
"name",
Expand Down
Loading