From a31a6edf2de867ea643eb816ca0841d724a61191 Mon Sep 17 00:00:00 2001 From: Stefaan Lippens Date: Thu, 26 Mar 2026 13:08:24 +0100 Subject: [PATCH] Add support for collection queryables #483 To make the annoying/misleading "Property filtering with unsupported properties" warnings more correct --- CHANGELOG.md | 1 + openeo/rest/_testing.py | 3 + openeo/rest/datacube.py | 45 +++++++-- tests/rest/datacube/test_datacube100.py | 126 +++++++++++++++++++----- 4 files changed, 143 insertions(+), 32 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5bd027a5d..c37aef2eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add `is_one_of()` to `property_filter` for compact building of an allow-list based property filter. - Add "ABDI1" and "ABDI2" to "extra-indices-dict.json" ([#875](https://github.com/Open-EO/openeo-python-client/pull/875)) +- Support for collection "queryables": more correct warnings when using property filtering in `load_collection` ([#483](https://github.com/Open-EO/openeo-python-client/issues/483)) ### Changed diff --git a/openeo/rest/_testing.py b/openeo/rest/_testing.py index 998874551..b8343cb16 100644 --- a/openeo/rest/_testing.py +++ b/openeo/rest/_testing.py @@ -432,6 +432,7 @@ def build_capabilities( basic_auth: bool = True, oidc_auth: bool = True, collections: bool = True, + collection_queryables: bool = False, processes: bool = True, sync_processing: bool = True, validation: bool = False, @@ -451,6 +452,8 @@ def build_capabilities( if collections: endpoints.append({"path": "/collections", "methods": ["GET"]}) endpoints.append({"path": "/collections/{collection_id}", "methods": ["GET"]}) + if collection_queryables: + endpoints.append({"path": "/collections/{collection_id}/queryables", "methods": ["GET"]}) if processes: endpoints.append({"path": "/processes", "methods": ["GET"]}) if sync_processing: diff --git a/openeo/rest/datacube.py b/openeo/rest/datacube.py index fb3f2e1aa..73ac8ae9e 100644 --- a/openeo/rest/datacube.py +++ b/openeo/rest/datacube.py @@ -249,7 +249,7 @@ def load_collection( properties = cls._build_load_properties_argument( properties=properties, - supported_properties=(metadata.get("summaries", default={}).keys() if metadata else None), + queryables=_Queryables.build(collection_id=collection_id, connection=connection), max_cloud_cover=max_cloud_cover, ) if properties is not None: @@ -275,7 +275,7 @@ def _build_load_properties_argument( None, ], *, - supported_properties: Optional[typing.Collection[str]] = None, + queryables: Optional[_Queryables] = None, max_cloud_cover: Optional[float] = None, ) -> Union[Dict[str, PGNode], None]: """ @@ -296,11 +296,11 @@ def _build_load_properties_argument( properties["eo:cloud_cover"] = lambda v: v <= max_cloud_cover if isinstance(properties, dict): - if supported_properties: - unsupported_properties = set(properties.keys()).difference(supported_properties) + if queryables and not queryables.additional: + unsupported_properties = set(properties.keys()).difference(queryables.properties) if unsupported_properties: warnings.warn( - f"Property filtering with properties not listed in collection/STAC metadata: {list(unsupported_properties)} (supported: {list(supported_properties)}).", + f"Property filtering with unsupported properties {sorted(unsupported_properties)} (queryables: {sorted(queryables.properties)}).", stacklevel=3, ) properties = { @@ -444,7 +444,10 @@ def load_stac( if bands is not None: arguments["bands"] = bands - properties = cls._build_load_properties_argument(properties=properties) + properties = cls._build_load_properties_argument( + properties=properties, + # TODO: possible to detect queryables here too? + ) if properties is not None: arguments["properties"] = properties @@ -3225,3 +3228,33 @@ def _get_geometry_argument( crs_name = crs geometry["crs"] = {"type": "name", "properties": {"name": crs_name}} return geometry + + +class _Queryables: + """ + Container of collection/item "queryables": + - properties: properties that can be filtered on + - additional: whether "additionalProperties" (anything goes) is enabled + """ + + __slots__ = ("properties", "additional") + + def __init__(self, properties: Iterable[str], additional: bool = False): + self.properties = set(properties) + self.additional = bool(additional) + + @classmethod + def build(cls, *, collection_id: str, connection: Optional[Connection]) -> Union[_Queryables, None]: + if connection and connection.capabilities().supports_endpoint("/collections/{collection_id}/queryables"): + path = f"/collections/{collection_id}/queryables" + try: + resp = connection.get(path, allow_redirects=True) + resp.raise_for_status() + data = resp.json() + properties = list(data.get("properties", {}).keys()) + additional = data.get("additionalProperties", False) + log.debug(f"Queryables from {path!r}: {properties=} {additional=}") + return cls(properties=properties, additional=additional) + except Exception as e: + log.warning(f"Failed to get/parse queryables of from {path}: {e!r}") + return None diff --git a/tests/rest/datacube/test_datacube100.py b/tests/rest/datacube/test_datacube100.py index 3003ac8ce..1ccafbf01 100644 --- a/tests/rest/datacube/test_datacube100.py +++ b/tests/rest/datacube/test_datacube100.py @@ -13,6 +13,7 @@ import textwrap from typing import Optional +import dirty_equals import pyproj import pytest import requests @@ -28,7 +29,7 @@ from openeo.processes import ProcessBuilder from openeo.rest import OpenEoClientException from openeo.rest.connection import Connection -from openeo.rest.datacube import THIS, UDF, DataCube +from openeo.rest.datacube import THIS, UDF, DataCube, _Queryables from openeo.utils.version import ComparableVersion from .. import get_download_graph @@ -2111,31 +2112,6 @@ def test_load_collection_max_cloud_cover_with_other_properties(con100): } -@pytest.mark.parametrize(["extra_summaries", "max_cloud_cover", "expect_warning"], [ - ({}, None, False), - ({}, 75, True), - ({"eo:cloud_cover": {"min": 0, "max": 100}}, None, False), - ({"eo:cloud_cover": {"min": 0, "max": 100}}, 75, False), -]) -def test_load_collection_max_cloud_cover_summaries_warning( - con100, requests_mock, recwarn, extra_summaries, max_cloud_cover, expect_warning, -): - s2_metadata = copy.deepcopy(DEFAULT_S2_METADATA) - s2_metadata["summaries"].update(extra_summaries) - requests_mock.get(API_URL + "/collections/S2", json=s2_metadata) - - _ = con100.load_collection("S2", max_cloud_cover=max_cloud_cover) - - if expect_warning: - assert len(recwarn.list) == 1 - assert re.search( - "Property filtering.*properties not listed.*collection.*metadata.*eo:cloud_cover", - str(recwarn.pop(UserWarning).message), - ) - else: - assert len(recwarn.list) == 0 - - def test_load_collection_with_collection_properties(con100): cube = con100.load_collection( "S2", @@ -2242,6 +2218,104 @@ def test_load_collection_with_single_collection_property_and_cloud_cover(con100) } +def _build_queryables_doc(platform: bool = True, cloud_cover: bool = True, additional: bool = True) -> dict: + """Simple helper to build dummy queryables doc""" + properties = {} + if platform: + properties["platform"] = { + "type": "string", + "enum": ["sentinel-2a", "sentinel-2b", "sentinel-2c", "sentinel-2d"], + } + if cloud_cover: + properties["eo:cloud_cover"] = { + "$ref": "https://stac-extensions.github.io/eo/v2.0.0/schema.json#/definitions/eo:cloud_cover", + "type": "number", + "maximum": 100, + "minimum": 0, + } + return { + "$id": f"{API_URL}/collection/S2/queryables", + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "properties": properties, + "additionalProperties": additional, + } + + +class TestQueryables: + + @pytest.mark.parametrize("api_capabilities", [{"collection_queryables": True}]) + def test_basic(self, con100, requests_mock, api_capabilities): + queryables_doc = _build_queryables_doc(platform=True, cloud_cover=True, additional=True) + requests_mock.get(f"{API_URL}/collections/S2/queryables", json=queryables_doc) + queryables = _Queryables.build(collection_id="S2", connection=con100) + assert queryables.properties == {"eo:cloud_cover", "platform"} + assert queryables.additional is True + + @pytest.mark.parametrize("api_capabilities", [{"collection_queryables": True}]) + def test_broken(self, con100, requests_mock, api_capabilities): + requests_mock.get(f"{API_URL}/collections/S2/queryables", status_code=500, text="nope") + queryables = _Queryables.build(collection_id="S2", connection=con100) + assert queryables is None + + +@pytest.mark.parametrize("api_capabilities", [{"collection_queryables": True}]) +@pytest.mark.parametrize( + ["queryables_doc", "expected_warnings"], + [ + ( + _build_queryables_doc(platform=False, cloud_cover=True, additional=True), + [], + ), + ( + _build_queryables_doc(platform=True, cloud_cover=True, additional=False), + [], + ), + ( + _build_queryables_doc(platform=False, cloud_cover=True, additional=False), + [dirty_equals.IsStr(regex=r".*unsupported prop.*platform.*queryables.*eo:cloud_cover.*")], + ), + ( + _build_queryables_doc(platform=False, cloud_cover=False, additional=False), + [dirty_equals.IsStr(regex=r".*unsupported prop.*eo:cloud_cover.*platform.*queryables.*\[\].*")], + ), + ], +) +def test_load_collection_with_queryables( + con100, requests_mock, api_capabilities, queryables_doc, recwarn, expected_warnings +): + requests_mock.get(f"{API_URL}/collections/S2/queryables", json=queryables_doc) + + cube = con100.load_collection( + "S2", + properties=[ + collection_property("eo:cloud_cover") <= 75, + collection_property("platform") == "Sentinel-2B", + ], + ) + assert cube.flat_graph()["loadcollection1"]["arguments"]["properties"] == { + "eo:cloud_cover": { + "process_graph": { + "lte1": { + "process_id": "lte", + "arguments": {"x": {"from_parameter": "value"}, "y": 75}, + "result": True, + } + } + }, + "platform": { + "process_graph": { + "eq1": { + "process_id": "eq", + "arguments": {"x": {"from_parameter": "value"}, "y": "Sentinel-2B"}, + "result": True, + } + } + }, + } + assert [str(w.message) for w in recwarn] == expected_warnings + + def test_load_collection_temporal_extent_process_builder_function(con100): from openeo.processes import date_shift