Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion xrspatial/geotiff/_cog_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,8 @@ def _parse_cog_http_meta(
if source_path is not None:
from ._sidecar import discover_remote_sidecar
ifds, sidecar, sidecar_ifd_ids = discover_remote_sidecar(
source_path, ifds, max_cloud_bytes=max_cloud_bytes,
source_path, ifds, overview_level=overview_level,
max_cloud_bytes=max_cloud_bytes,
)

ifd = select_overview_ifd(ifds, overview_level)
Expand Down
58 changes: 38 additions & 20 deletions xrspatial/geotiff/_sidecar.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,7 @@ def discover_remote_sidecar(
source,
base_ifds: list[IFD],
*,
overview_level: int | None = None,
max_cloud_bytes: int | None = None,
) -> tuple[list[IFD], 'SidecarOverviews | None', set[int]]:
"""Probe for a sibling ``.ovr`` and return a merged IFD list.
Expand All @@ -313,27 +314,40 @@ def discover_remote_sidecar(
selected IFD's ``id()`` is in this set; non-sidecar IFDs fall
through to the original source.

Discovery is silent: probes that fail (404, missing fsspec,
timeout, etc.) return ``([...base_ifds], None, set())``. A
successful probe whose download or parse subsequently fails -- for
example a hostile server that returns 200 to every URL but the
bytes are not a valid TIFF, or a transient network error during
download -- also falls back to base-only behaviour with the
exception swallowed, so a misbehaving server cannot poison an
otherwise valid base read.
``overview_level`` is the caller's requested level. It is threaded
into :func:`handle_sidecar_parse_failure` so a download/parse
failure follows the same policy the eager local and fsspec readers
use: an explicit level the base file alone cannot serve raises with
the real parse error chained, anything else warns and falls back.

Discovery is silent only for the probe itself: probes that fail
(404, missing fsspec, timeout, etc.) return
``([...base_ifds], None, set())``. A successful probe whose download
or parse subsequently fails -- for example a hostile server that
returns 200 to every URL but the bytes are not a valid TIFF, or a
transient network error during download -- is routed through
:func:`handle_sidecar_parse_failure` so it warns (and falls back to
base-only) instead of being swallowed, matching the eager
local/fsspec readers. A misbehaving server still cannot poison an
otherwise valid base read unless the caller pinned an
``overview_level`` only the sidecar could serve.

Raises
------
CloudSizeLimitError
Re-raised verbatim when the sidecar download breaches
``max_cloud_bytes`` (the one exception that survives the
otherwise silent fall-back). The budget is a caller-set
otherwise warn-and-fall-back path). The budget is a caller-set
contract; surfacing the breach lets the caller distinguish "no
sidecar" from "sidecar too big" rather than silently dropping
the level and reading a stale base-only pyramid. Set
``max_cloud_bytes=None`` to disable the budget. Other failures
are converted to a base-only return so callers debugging a
surprise sidecar miss should reach for this carve-out first.
``max_cloud_bytes=None`` to disable the budget.
ValueError
Raised by :func:`handle_sidecar_parse_failure` (chained from the
underlying parse error) when ``overview_level`` is explicitly
requested and exceeds the base IFD count, so a level that
depends on the sidecar surfaces the real parse cause instead of
degrading to a generic out-of-range error.

Local file paths are accepted too; the helper is shape-agnostic.
"""
Expand All @@ -351,14 +365,18 @@ def discover_remote_sidecar(
# dropping the sidecar level and reading a stale base-only
# pyramid. See ``load_sidecar`` for the error contract.
raise
except Exception:
# Probe succeeded but the download / parse failed. Either the
# probe URL was a false positive (a server returning 200 for
# every path), the bytes are malformed, or a transient network
# error happened during the download. Fall back to base-only
# behaviour -- the caller still gets a working read of the in-
# file IFD chain. Mirrors the silent-fail-to-base contract
# ``find_sidecar`` uses for the probe itself.
except Exception as exc:
# Probe succeeded but the download / parse failed. Route through
# the shared policy the eager CPU/GPU and fsspec metadata paths
# use: an explicit ``overview_level`` the base file alone cannot
# serve re-raises with the real parse error chained; anything
# else warns and falls back to base-only. This replaces the
# previous silent swallow so a malformed remote sidecar is no
# longer dropped without a trace. Issue #3022.
handle_sidecar_parse_failure(
exc, sidecar_path, overview_level,
base_ifd_count=len(base_ifds),
)
return list(base_ifds), None, set()
merged = list(base_ifds) + list(sidecar.ifds)
sidecar_ifd_ids = {id(ifd) for ifd in sidecar.ifds}
Expand Down
47 changes: 38 additions & 9 deletions xrspatial/geotiff/tests/integration/test_sidecar.py
Original file line number Diff line number Diff line change
Expand Up @@ -1463,14 +1463,17 @@ def test_http_chunked_open_rejects_overview_past_sidecar(


# ---------------------------------------------------------------------------
# Defensive: ``discover_remote_sidecar`` swallows ``load_sidecar``
# failures (parse error, garbage bytes, transient network) and returns
# the unchanged base IFD list with ``sidecar=None``. The
# ``CloudSizeLimitError`` budget breach is the one exception that
# re-raises so a caller-set ceiling stays observable.
# Defensive: ``discover_remote_sidecar`` routes ``load_sidecar``
# failures (parse error, garbage bytes, transient network) through the
# shared ``handle_sidecar_parse_failure`` policy that the eager
# local/fsspec readers use. A malformed sidecar warns and falls back to
# the unchanged base IFD list with ``sidecar=None``; an explicit
# ``overview_level`` only the sidecar could serve re-raises the parse
# error. The ``CloudSizeLimitError`` budget breach still re-raises so a
# caller-set ceiling stays observable. Issue #3022.
# ---------------------------------------------------------------------------
def test_discover_remote_sidecar_falls_back_when_load_fails(monkeypatch):
"""A probe that succeeds but a load that raises returns base-only."""
"""A probe that succeeds but a load that raises warns and returns base-only."""
from xrspatial.geotiff import _sidecar
from xrspatial.geotiff._sidecar import discover_remote_sidecar

Expand All @@ -1484,14 +1487,40 @@ def _exploding_load(_path, **_kw):
monkeypatch.setattr(_sidecar, "load_sidecar", _exploding_load)

sentinel_ifds = [object(), object()]
merged, sidecar, sidecar_ifd_ids = discover_remote_sidecar(
"http://example/x.tif", sentinel_ifds,
)
with pytest.warns(RuntimeWarning, match="x.tif.ovr"):
merged, sidecar, sidecar_ifd_ids = discover_remote_sidecar(
"http://example/x.tif", sentinel_ifds,
)
assert merged == list(sentinel_ifds)
assert sidecar is None
assert sidecar_ifd_ids == set()


def test_discover_remote_sidecar_surfaces_parse_error_for_sidecar_level(
monkeypatch):
"""An explicit overview_level the base file can't serve re-raises the cause."""
from xrspatial.geotiff import _sidecar
from xrspatial.geotiff._sidecar import discover_remote_sidecar

monkeypatch.setattr(
_sidecar, "find_sidecar", lambda _src: "http://example/x.tif.ovr"
)

def _exploding_load(_path, **_kw):
raise RuntimeError("sidecar bytes did not parse")

monkeypatch.setattr(_sidecar, "load_sidecar", _exploding_load)

# Base file has a single IFD (level 0); level 1 can only come from the
# sidecar, so the parse failure must surface rather than degrade.
with pytest.raises(ValueError, match="x.tif.ovr") as excinfo:
discover_remote_sidecar(
"http://example/x.tif", [object()], overview_level=1,
)
assert isinstance(excinfo.value.__cause__, RuntimeError)
assert "did not parse" in str(excinfo.value.__cause__)


def test_discover_remote_sidecar_propagates_cloud_size_limit(monkeypatch):
"""The one exception the helper does NOT swallow is the budget breach."""
from xrspatial.geotiff import _sidecar
Expand Down
Loading