From 409900de1ed1b60778e9f1a0712571e102c0eb5b Mon Sep 17 00:00:00 2001 From: Matthias Fabian Meyer-Bender Date: Mon, 30 Mar 2026 11:46:26 +0000 Subject: [PATCH 1/8] Removed unnecessary compute() call --- src/spatialdata/_core/query/spatial_query.py | 57 +++++++------------- 1 file changed, 20 insertions(+), 37 deletions(-) diff --git a/src/spatialdata/_core/query/spatial_query.py b/src/spatialdata/_core/query/spatial_query.py index c29e3bc0e..01445df75 100644 --- a/src/spatialdata/_core/query/spatial_query.py +++ b/src/spatialdata/_core/query/spatial_query.py @@ -386,41 +386,21 @@ def _bounding_box_mask_points( axes: tuple[str, ...], min_coordinate: list[Number] | ArrayLike, max_coordinate: list[Number] | ArrayLike, + points_df: pd.DataFrame | None = None, # <-- new parameter ) -> list[ArrayLike]: - """Compute a mask that is true for the points inside axis-aligned bounding boxes. - - Parameters - ---------- - points - The points element to perform the query on. - axes - The axes that min_coordinate and max_coordinate refer to. - min_coordinate - PLACEHOLDER - The upper left hand corners of the bounding boxes (i.e., minimum coordinates along all dimensions). - Shape: (n_boxes, n_axes) or (n_axes,) for a single box. - {min_coordinate_docs} - max_coordinate - The lower right hand corners of the bounding boxes (i.e., the maximum coordinates along all dimensions). - Shape: (n_boxes, n_axes) or (n_axes,) for a single box. - {max_coordinate_docs} - - Returns - ------- - The masks for the points inside the bounding boxes. - """ + # TODO: add docstring back element_axes = get_axes_names(points) - min_coordinate = _parse_list_into_array(min_coordinate) max_coordinate = _parse_list_into_array(max_coordinate) - - # Ensure min_coordinate and max_coordinate are 2D arrays min_coordinate = min_coordinate[np.newaxis, :] if min_coordinate.ndim == 1 else min_coordinate max_coordinate = max_coordinate[np.newaxis, :] if max_coordinate.ndim == 1 else max_coordinate + # Compute once here only if the caller hasn't already done so + if points_df is None: + points_df = points.compute() + n_boxes = min_coordinate.shape[0] in_bounding_box_masks = [] - for box in range(n_boxes): box_masks = [] for axis_index, axis_name in enumerate(axes): @@ -428,7 +408,8 @@ def _bounding_box_mask_points( continue min_value = min_coordinate[box, axis_index] max_value = max_coordinate[box, axis_index] - box_masks.append(points[axis_name].gt(min_value).compute() & points[axis_name].lt(max_value).compute()) + col = points_df[axis_name].values # <-- numpy array, no Dask + box_masks.append((col > min_value) & (col < max_value)) bounding_box_mask = np.stack(box_masks, axis=-1) in_bounding_box_masks.append(np.all(bounding_box_mask, axis=1)) return in_bounding_box_masks @@ -663,19 +644,19 @@ def _( max_coordinate_intrinsic = max_coordinate_intrinsic.data # get the points in the intrinsic coordinate bounding box + points_pd = points.compute() # <-- moved up, single materialization in_intrinsic_bounding_box = _bounding_box_mask_points( points=points, axes=intrinsic_axes, min_coordinate=min_coordinate_intrinsic, max_coordinate=max_coordinate_intrinsic, + points_df=points_pd, # <-- pass it in ) if not (len_df := len(in_intrinsic_bounding_box)) == (len_bb := len(min_coordinate)): - raise ValueError( - f"Length of list of dataframes `{len_df}` is not equal to the number of bounding boxes axes `{len_bb}`." - ) + raise ValueError(...) points_in_intrinsic_bounding_box: list[DaskDataFrame | None] = [] - points_pd = points.compute() + attrs = points.attrs.copy() for mask_np in in_intrinsic_bounding_box: if mask_np.sum() == 0: @@ -715,22 +696,24 @@ def _( points_query_coordinate_system = transform( p, to_coordinate_system=target_coordinate_system, maintain_positioning=False ) - - # get a mask for the points in the bounding box + # Materialize once; reuse for both the mask and the final slice + transformed_pd = points_query_coordinate_system.compute() bounding_box_mask = _bounding_box_mask_points( points=points_query_coordinate_system, axes=axes, - min_coordinate=min_c, # type: ignore[arg-type] - max_coordinate=max_c, # type: ignore[arg-type] + min_coordinate=min_c, + max_coordinate=max_c, + points_df=transformed_pd, # <-- pass it in ) if len(bounding_box_mask) != 1: raise ValueError(f"Expected a single mask, got {len(bounding_box_mask)} masks. Please report this bug.") bounding_box_indices = np.where(bounding_box_mask[0])[0] - if len(bounding_box_indices) == 0: output.append(None) else: - points_df = p.compute().iloc[bounding_box_indices] + # Use the already-materialized intrinsic-space frame for the final result, + # not the transformed one (we want to return data in intrinsic coordinates) + points_df = points_pd[mask_np].iloc[bounding_box_indices] # no .compute() old_transformations = get_transformation(p, get_all=True) assert isinstance(old_transformations, dict) feature_key = p.attrs.get(ATTRS_KEY, {}).get(PointsModel.FEATURE_KEY) From 054990f2e42bd25a802b513657e417454f684589 Mon Sep 17 00:00:00 2001 From: Matthias Fabian Meyer-Bender Date: Mon, 30 Mar 2026 11:50:19 +0000 Subject: [PATCH 2/8] Using the R-tree for spatial querying of shapes --- src/spatialdata/_core/query/spatial_query.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/spatialdata/_core/query/spatial_query.py b/src/spatialdata/_core/query/spatial_query.py index 01445df75..3556b43bb 100644 --- a/src/spatialdata/_core/query/spatial_query.py +++ b/src/spatialdata/_core/query/spatial_query.py @@ -774,8 +774,8 @@ def _( ) for box_corners in intrinsic_bounding_box_corners: bounding_box_non_axes_aligned = Polygon(box_corners.data) - indices = polygons.geometry.intersects(bounding_box_non_axes_aligned) - queried = polygons[indices] + candidate_idx = polygons.sindex.query(bounding_box_non_axes_aligned, predicate="intersects") + queried = polygons.iloc[candidate_idx] if len(queried) == 0: queried_polygon = None else: From cd454dccd9039fa9740cb37ee28cc6445da3a559 Mon Sep 17 00:00:00 2001 From: Matthias Fabian Meyer-Bender Date: Mon, 30 Mar 2026 13:28:19 +0000 Subject: [PATCH 3/8] Cleanup --- src/spatialdata/_core/query/spatial_query.py | 42 +++++++++++++++----- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/src/spatialdata/_core/query/spatial_query.py b/src/spatialdata/_core/query/spatial_query.py index 3556b43bb..371b4642a 100644 --- a/src/spatialdata/_core/query/spatial_query.py +++ b/src/spatialdata/_core/query/spatial_query.py @@ -265,7 +265,7 @@ def _adjust_bounding_box_to_real_axes( return axes_bb, min_coordinate, max_coordinate -def _get_case_of_bounding_box_query( +def _get_case_ofmaskquery( m_without_c_linear: ArrayLike, input_axes_without_c: tuple[str, ...], output_axes_without_c: tuple[str, ...], @@ -386,9 +386,33 @@ def _bounding_box_mask_points( axes: tuple[str, ...], min_coordinate: list[Number] | ArrayLike, max_coordinate: list[Number] | ArrayLike, - points_df: pd.DataFrame | None = None, # <-- new parameter + points_df: pd.DataFrame | None = None, ) -> list[ArrayLike]: - # TODO: add docstring back + """Compute a mask that is true for the points inside axis-aligned bounding boxes. + + Parameters + ---------- + points + The points element to perform the query on. + axes + The axes that min_coordinate and max_coordinate refer to. + min_coordinate + PLACEHOLDER + The upper left hand corners of the bounding boxes (i.e., minimum coordinates along all dimensions). + Shape: (n_boxes, n_axes) or (n_axes,) for a single box. + {min_coordinate_docs} + max_coordinate + The lower right hand corners of the bounding boxes (i.e., the maximum coordinates along all dimensions). + Shape: (n_boxes, n_axes) or (n_axes,) for a single box. + {max_coordinate_docs} + points_df + A pre-computed pandas dataframe. Useful if the points_df has already been materialized, otherwise the methods simply + calls .compute() on the dask data frame + + Returns + ------- + The masks for the points inside the bounding boxes. + """ element_axes = get_axes_names(points) min_coordinate = _parse_list_into_array(min_coordinate) max_coordinate = _parse_list_into_array(max_coordinate) @@ -408,7 +432,7 @@ def _bounding_box_mask_points( continue min_value = min_coordinate[box, axis_index] max_value = max_coordinate[box, axis_index] - col = points_df[axis_name].values # <-- numpy array, no Dask + col = points_df[axis_name].values box_masks.append((col > min_value) & (col < max_value)) bounding_box_mask = np.stack(box_masks, axis=-1) in_bounding_box_masks.append(np.all(bounding_box_mask, axis=1)) @@ -644,17 +668,17 @@ def _( max_coordinate_intrinsic = max_coordinate_intrinsic.data # get the points in the intrinsic coordinate bounding box - points_pd = points.compute() # <-- moved up, single materialization + points_pd = points.compute() in_intrinsic_bounding_box = _bounding_box_mask_points( points=points, axes=intrinsic_axes, min_coordinate=min_coordinate_intrinsic, max_coordinate=max_coordinate_intrinsic, - points_df=points_pd, # <-- pass it in + points_df=points_pd, ) if not (len_df := len(in_intrinsic_bounding_box)) == (len_bb := len(min_coordinate)): - raise ValueError(...) + raise ValueError(f"Length of list of dataframes `{len_df}` is not equal to the number of bounding boxes axes `{len_bb}`.") points_in_intrinsic_bounding_box: list[DaskDataFrame | None] = [] attrs = points.attrs.copy() @@ -703,7 +727,7 @@ def _( axes=axes, min_coordinate=min_c, max_coordinate=max_c, - points_df=transformed_pd, # <-- pass it in + points_df=transformed_pd, ) if len(bounding_box_mask) != 1: raise ValueError(f"Expected a single mask, got {len(bounding_box_mask)} masks. Please report this bug.") @@ -713,7 +737,7 @@ def _( else: # Use the already-materialized intrinsic-space frame for the final result, # not the transformed one (we want to return data in intrinsic coordinates) - points_df = points_pd[mask_np].iloc[bounding_box_indices] # no .compute() + points_df = points_pd[mask_np].iloc[bounding_box_indices] old_transformations = get_transformation(p, get_all=True) assert isinstance(old_transformations, dict) feature_key = p.attrs.get(ATTRS_KEY, {}).get(PointsModel.FEATURE_KEY) From 8d9844012297ba5582326dd82cd03a69d580048a Mon Sep 17 00:00:00 2001 From: Matthias Fabian Meyer-Bender Date: Mon, 30 Mar 2026 13:48:48 +0000 Subject: [PATCH 4/8] Cleanup --- src/spatialdata/_core/query/spatial_query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/spatialdata/_core/query/spatial_query.py b/src/spatialdata/_core/query/spatial_query.py index 371b4642a..ff0f4ebc5 100644 --- a/src/spatialdata/_core/query/spatial_query.py +++ b/src/spatialdata/_core/query/spatial_query.py @@ -265,7 +265,7 @@ def _adjust_bounding_box_to_real_axes( return axes_bb, min_coordinate, max_coordinate -def _get_case_ofmaskquery( +def _get_case_of_bounding_box_query( m_without_c_linear: ArrayLike, input_axes_without_c: tuple[str, ...], output_axes_without_c: tuple[str, ...], From e0f0ba79a90e2b88fa566c4c64cbb1a1e87cf215 Mon Sep 17 00:00:00 2001 From: Matthias Fabian Meyer-Bender Date: Mon, 30 Mar 2026 13:51:04 +0000 Subject: [PATCH 5/8] Cleanup --- src/spatialdata/_core/query/spatial_query.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/spatialdata/_core/query/spatial_query.py b/src/spatialdata/_core/query/spatial_query.py index ff0f4ebc5..58fbdae56 100644 --- a/src/spatialdata/_core/query/spatial_query.py +++ b/src/spatialdata/_core/query/spatial_query.py @@ -725,8 +725,8 @@ def _( bounding_box_mask = _bounding_box_mask_points( points=points_query_coordinate_system, axes=axes, - min_coordinate=min_c, - max_coordinate=max_c, + min_coordinate=min_c, # type: ignore[arg-type] + max_coordinate=max_c, # type: ignore[arg-type] points_df=transformed_pd, ) if len(bounding_box_mask) != 1: From be2db1b0704b3f52c6b39f41258f936f60aac97b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 30 Mar 2026 13:53:58 +0000 Subject: [PATCH 6/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/spatialdata/_core/query/spatial_query.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/spatialdata/_core/query/spatial_query.py b/src/spatialdata/_core/query/spatial_query.py index 58fbdae56..85e868a58 100644 --- a/src/spatialdata/_core/query/spatial_query.py +++ b/src/spatialdata/_core/query/spatial_query.py @@ -406,7 +406,7 @@ def _bounding_box_mask_points( Shape: (n_boxes, n_axes) or (n_axes,) for a single box. {max_coordinate_docs} points_df - A pre-computed pandas dataframe. Useful if the points_df has already been materialized, otherwise the methods simply + A pre-computed pandas dataframe. Useful if the points_df has already been materialized, otherwise the methods simply calls .compute() on the dask data frame Returns @@ -678,7 +678,9 @@ def _( ) if not (len_df := len(in_intrinsic_bounding_box)) == (len_bb := len(min_coordinate)): - raise ValueError(f"Length of list of dataframes `{len_df}` is not equal to the number of bounding boxes axes `{len_bb}`.") + raise ValueError( + f"Length of list of dataframes `{len_df}` is not equal to the number of bounding boxes axes `{len_bb}`." + ) points_in_intrinsic_bounding_box: list[DaskDataFrame | None] = [] attrs = points.attrs.copy() From 6142d128e1aa76742fdbe305feb5b447aa0d5f42 Mon Sep 17 00:00:00 2001 From: Matthias Fabian Meyer-Bender Date: Tue, 31 Mar 2026 08:08:54 +0000 Subject: [PATCH 7/8] wip: bounding box optimisation --- src/spatialdata/_core/query/spatial_query.py | 187 +++++++------------ 1 file changed, 70 insertions(+), 117 deletions(-) diff --git a/src/spatialdata/_core/query/spatial_query.py b/src/spatialdata/_core/query/spatial_query.py index 58fbdae56..303dbc45c 100644 --- a/src/spatialdata/_core/query/spatial_query.py +++ b/src/spatialdata/_core/query/spatial_query.py @@ -8,6 +8,7 @@ from typing import TYPE_CHECKING, Any import dask.dataframe as dd +from matplotlib.path import Path import numpy as np from dask.dataframe import DataFrame as DaskDataFrame from geopandas import GeoDataFrame @@ -387,57 +388,56 @@ def _bounding_box_mask_points( min_coordinate: list[Number] | ArrayLike, max_coordinate: list[Number] | ArrayLike, points_df: pd.DataFrame | None = None, + polygon_corners: np.ndarray | None = None, # shape: (n_boxes, n_corners, 2) ) -> list[ArrayLike]: - """Compute a mask that is true for the points inside axis-aligned bounding boxes. - - Parameters - ---------- - points - The points element to perform the query on. - axes - The axes that min_coordinate and max_coordinate refer to. - min_coordinate - PLACEHOLDER - The upper left hand corners of the bounding boxes (i.e., minimum coordinates along all dimensions). - Shape: (n_boxes, n_axes) or (n_axes,) for a single box. - {min_coordinate_docs} - max_coordinate - The lower right hand corners of the bounding boxes (i.e., the maximum coordinates along all dimensions). - Shape: (n_boxes, n_axes) or (n_axes,) for a single box. - {max_coordinate_docs} - points_df - A pre-computed pandas dataframe. Useful if the points_df has already been materialized, otherwise the methods simply - calls .compute() on the dask data frame - - Returns - ------- - The masks for the points inside the bounding boxes. - """ element_axes = get_axes_names(points) min_coordinate = _parse_list_into_array(min_coordinate) max_coordinate = _parse_list_into_array(max_coordinate) min_coordinate = min_coordinate[np.newaxis, :] if min_coordinate.ndim == 1 else min_coordinate max_coordinate = max_coordinate[np.newaxis, :] if max_coordinate.ndim == 1 else max_coordinate - # Compute once here only if the caller hasn't already done so if points_df is None: points_df = points.compute() + relevant_axes = [ax for ax in axes if ax in element_axes] + point_coords = points_df[relevant_axes].values # (n_points, 2) + n_boxes = min_coordinate.shape[0] in_bounding_box_masks = [] + for box in range(n_boxes): - box_masks = [] - for axis_index, axis_name in enumerate(axes): - if axis_name not in element_axes: - continue - min_value = min_coordinate[box, axis_index] - max_value = max_coordinate[box, axis_index] - col = points_df[axis_name].values - box_masks.append((col > min_value) & (col < max_value)) - bounding_box_mask = np.stack(box_masks, axis=-1) - in_bounding_box_masks.append(np.all(bounding_box_mask, axis=1)) - return in_bounding_box_masks + if polygon_corners is not None and len(relevant_axes) == 2: + # Exact path from the (potentially rotated/sheared) corners — + # close the polygon by appending the first corner again + corners = polygon_corners[box] # (n_corners, 2) + closed = np.vstack([corners, corners[0]]) + mask = Path(closed).contains_points(point_coords) + elif len(relevant_axes) == 2: + # Axis-aligned rectangle — still faster than per-axis boolean ops + axis_indices = [list(axes).index(ax) for ax in relevant_axes] + mins = min_coordinate[box, axis_indices] + maxs = max_coordinate[box, axis_indices] + x_min, y_min = mins + x_max, y_max = maxs + box_path = Path([ + (x_min, y_min), (x_max, y_min), + (x_max, y_max), (x_min, y_max), + (x_min, y_min), + ]) + mask = box_path.contains_points(point_coords) + else: + # Fallback for 1D or >2D + axis_indices = [list(axes).index(ax) for ax in relevant_axes] + box_masks = [] + for i, axis_name in enumerate(relevant_axes): + col = points_df[axis_name].values + box_masks.append((col > min_coordinate[box, axis_indices[i]]) & + (col < max_coordinate[box, axis_indices[i]])) + mask = np.all(np.stack(box_masks, axis=-1), axis=1) + in_bounding_box_masks.append(mask) + + return in_bounding_box_masks def _dict_query_dispatcher( elements: dict[str, SpatialElement], query_function: Callable[[SpatialElement], SpatialElement], **kwargs: Any @@ -640,12 +640,9 @@ def _( min_coordinate = _parse_list_into_array(min_coordinate) max_coordinate = _parse_list_into_array(max_coordinate) - - # Ensure min_coordinate and max_coordinate are 2D arrays min_coordinate = min_coordinate[np.newaxis, :] if min_coordinate.ndim == 1 else min_coordinate max_coordinate = max_coordinate[np.newaxis, :] if max_coordinate.ndim == 1 else max_coordinate - # for triggering validation _ = BoundingBoxRequest( target_coordinate_system=target_coordinate_system, axes=axes, @@ -653,102 +650,58 @@ def _( max_coordinate=max_coordinate, ) - # get the four corners of the bounding box (2D case), or the 8 corners of the "3D bounding box" (3D case) - (intrinsic_bounding_box_corners, intrinsic_axes) = _get_bounding_box_corners_in_intrinsic_coordinates( + (intrinsic_corners, intrinsic_axes) = _get_bounding_box_corners_in_intrinsic_coordinates( element=points, axes=axes, min_coordinate=min_coordinate, max_coordinate=max_coordinate, target_coordinate_system=target_coordinate_system, ) - min_coordinate_intrinsic = intrinsic_bounding_box_corners.min(dim="corner") - max_coordinate_intrinsic = intrinsic_bounding_box_corners.max(dim="corner") - - min_coordinate_intrinsic = min_coordinate_intrinsic.data - max_coordinate_intrinsic = max_coordinate_intrinsic.data - # get the points in the intrinsic coordinate bounding box - points_pd = points.compute() - in_intrinsic_bounding_box = _bounding_box_mask_points( + # intrinsic_corners has shape (n_boxes, n_corners, n_axes) — extract the + # two spatial axes and pass the exact corner geometry to the mask function + axis_names = list(intrinsic_axes) + xy_indices = [axis_names.index("x"), axis_names.index("y")] + corners_np = intrinsic_corners.data + if corners_np.ndim == 2: + corners_np = corners_np[np.newaxis, ...] # add box dim → (1, n_corners, n_axes) + polygon_corners = corners_np[:, :, xy_indices] # (n_boxes, n_corners, 2) + + points_pd = points.compute() # single .compute() for the whole function + masks = _bounding_box_mask_points( points=points, axes=intrinsic_axes, - min_coordinate=min_coordinate_intrinsic, - max_coordinate=max_coordinate_intrinsic, + min_coordinate=intrinsic_corners.data.min(axis=1), # still needed for the fallback path + max_coordinate=intrinsic_corners.data.max(axis=1), points_df=points_pd, + polygon_corners=polygon_corners, ) - if not (len_df := len(in_intrinsic_bounding_box)) == (len_bb := len(min_coordinate)): - raise ValueError(f"Length of list of dataframes `{len_df}` is not equal to the number of bounding boxes axes `{len_bb}`.") - points_in_intrinsic_bounding_box: list[DaskDataFrame | None] = [] + if len(masks) != len(min_coordinate): + raise ValueError( + f"Length of list of dataframes `{len(masks)}` is not equal to " + f"the number of bounding boxes `{len(min_coordinate)}`." + ) attrs = points.attrs.copy() - for mask_np in in_intrinsic_bounding_box: - if mask_np.sum() == 0: - points_in_intrinsic_bounding_box.append(None) - else: - # TODO there is a problem when mixing dask dataframe graph with dask array graph. Need to compute for now. - # we can't compute either mask or points as when we calculate either one of them - # test_query_points_multiple_partitions will fail as the mask will be used to index each partition. - # However, if we compute and then create the dask array again we get the mixed dask graph problem. - filtered_pd = points_pd[mask_np] - points_filtered = dd.from_pandas(filtered_pd, npartitions=points.npartitions) - points_filtered.attrs.update(attrs) - points_in_intrinsic_bounding_box.append(points_filtered) - if len(points_in_intrinsic_bounding_box) == 0: - return None + old_transformations = get_transformation(points, get_all=True) + assert isinstance(old_transformations, dict) + feature_key = points.attrs.get(ATTRS_KEY, {}).get(PointsModel.FEATURE_KEY) - # assert that the number of queried points is correct - assert len(points_in_intrinsic_bounding_box) == len(min_coordinate) - - # # we have to reset the index since we have subset - # # https://stackoverflow.com/questions/61395351/how-to-reset-index-on-concatenated-dataframe-in-dask - # points_in_intrinsic_bounding_box = points_in_intrinsic_bounding_box.assign(idx=1) - # points_in_intrinsic_bounding_box = points_in_intrinsic_bounding_box.set_index( - # points_in_intrinsic_bounding_box.idx.cumsum() - 1 - # ) - # points_in_intrinsic_bounding_box = points_in_intrinsic_bounding_box.map_partitions( - # lambda df: df.rename(index={"idx": None}) - # ) - # points_in_intrinsic_bounding_box = points_in_intrinsic_bounding_box.drop(columns=["idx"]) - - # transform the element to the query coordinate system output: list[DaskDataFrame | None] = [] - for p, min_c, max_c in zip(points_in_intrinsic_bounding_box, min_coordinate, max_coordinate, strict=True): - if p is None: + for mask_np in masks: + if mask_np.sum() == 0: output.append(None) else: - points_query_coordinate_system = transform( - p, to_coordinate_system=target_coordinate_system, maintain_positioning=False - ) - # Materialize once; reuse for both the mask and the final slice - transformed_pd = points_query_coordinate_system.compute() - bounding_box_mask = _bounding_box_mask_points( - points=points_query_coordinate_system, - axes=axes, - min_coordinate=min_c, # type: ignore[arg-type] - max_coordinate=max_c, # type: ignore[arg-type] - points_df=transformed_pd, - ) - if len(bounding_box_mask) != 1: - raise ValueError(f"Expected a single mask, got {len(bounding_box_mask)} masks. Please report this bug.") - bounding_box_indices = np.where(bounding_box_mask[0])[0] - if len(bounding_box_indices) == 0: - output.append(None) - else: - # Use the already-materialized intrinsic-space frame for the final result, - # not the transformed one (we want to return data in intrinsic coordinates) - points_df = points_pd[mask_np].iloc[bounding_box_indices] - old_transformations = get_transformation(p, get_all=True) - assert isinstance(old_transformations, dict) - feature_key = p.attrs.get(ATTRS_KEY, {}).get(PointsModel.FEATURE_KEY) - - output.append( - PointsModel.parse( - dd.from_pandas(points_df, npartitions=1), - transformations=old_transformations.copy(), - feature_key=feature_key, - ) + filtered_pd = points_pd[mask_np] + output.append( + PointsModel.parse( + dd.from_pandas(filtered_pd, npartitions=1), + transformations=old_transformations.copy(), + feature_key=feature_key, ) + ) + if len(output) == 0: return None if len(output) == 1: From d4fec49a1f795cba5d1454e199f66a2335cf3193 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 31 Mar 2026 08:12:08 +0000 Subject: [PATCH 8/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/spatialdata/_core/query/spatial_query.py | 23 ++++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/src/spatialdata/_core/query/spatial_query.py b/src/spatialdata/_core/query/spatial_query.py index 303dbc45c..35cf38b36 100644 --- a/src/spatialdata/_core/query/spatial_query.py +++ b/src/spatialdata/_core/query/spatial_query.py @@ -8,10 +8,10 @@ from typing import TYPE_CHECKING, Any import dask.dataframe as dd -from matplotlib.path import Path import numpy as np from dask.dataframe import DataFrame as DaskDataFrame from geopandas import GeoDataFrame +from matplotlib.path import Path from shapely.geometry import MultiPolygon, Point, Polygon from xarray import DataArray, DataTree @@ -419,11 +419,15 @@ def _bounding_box_mask_points( maxs = max_coordinate[box, axis_indices] x_min, y_min = mins x_max, y_max = maxs - box_path = Path([ - (x_min, y_min), (x_max, y_min), - (x_max, y_max), (x_min, y_max), - (x_min, y_min), - ]) + box_path = Path( + [ + (x_min, y_min), + (x_max, y_min), + (x_max, y_max), + (x_min, y_max), + (x_min, y_min), + ] + ) mask = box_path.contains_points(point_coords) else: # Fallback for 1D or >2D @@ -431,14 +435,16 @@ def _bounding_box_mask_points( box_masks = [] for i, axis_name in enumerate(relevant_axes): col = points_df[axis_name].values - box_masks.append((col > min_coordinate[box, axis_indices[i]]) & - (col < max_coordinate[box, axis_indices[i]])) + box_masks.append( + (col > min_coordinate[box, axis_indices[i]]) & (col < max_coordinate[box, axis_indices[i]]) + ) mask = np.all(np.stack(box_masks, axis=-1), axis=1) in_bounding_box_masks.append(mask) return in_bounding_box_masks + def _dict_query_dispatcher( elements: dict[str, SpatialElement], query_function: Callable[[SpatialElement], SpatialElement], **kwargs: Any ) -> dict[str, SpatialElement]: @@ -635,7 +641,6 @@ def _( max_coordinate: list[Number] | ArrayLike, target_coordinate_system: str, ) -> DaskDataFrame | list[DaskDataFrame] | None: - from spatialdata import transform from spatialdata.transformations import get_transformation min_coordinate = _parse_list_into_array(min_coordinate)