Skip to content

Commit 8335fce

Browse files
author
peng.li24
committed
fix: array_to_double_vec() hardcoded *2 corrupts (N,≥3) arrays; add C-contiguous guard
- array_to_double_vec: use row stride (r*cols) instead of flat rows*2 copy, correctly extracting only x,y columns from (N,≥3) arrays. - Add ensure_c_contiguous() helper that checks buffer strides match C-order (row-major) layout — rejects non-contiguous views like arr[:,:2] with a clear ValueError directing caller to use .copy(). - Apply contiguity check to all 18 factory entry points (typed and generic overloads of point/linestring/polygon/linearring/multipoint/ multilinestring/multipolygon). - Fix point(py::array&) to handle both 1D [x,y] and 2D arrays directly instead of going through array_to_double_vec. - tests: add TestArrayShape3D with 24 tests covering (N,3) extraction, row/col layout consistency, non-contiguous rejection, and .copy() workaround.
1 parent 90042e1 commit 8335fce

2 files changed

Lines changed: 244 additions & 6 deletions

File tree

pycpp/geometry_py.h

Lines changed: 67 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,21 +43,57 @@ py::array_t<T> native_to_array(const T* data, size_t rows, size_t cols) {
4343
return result;
4444
}
4545

46+
/// Throw if a py::buffer_info is not C-contiguous (row-major, no gaps).
47+
/// Protects against silent data corruption when callers pass sliced views
48+
/// (e.g. arr[:, :2], arr[::2], arr.T) whose memory strides don't match shape.
49+
inline void ensure_c_contiguous(const py::buffer_info& buf) {
50+
if (buf.ndim == 0) return;
51+
// Last dimension stride must equal element size
52+
if (buf.strides[buf.ndim - 1] != buf.itemsize) {
53+
throw std::invalid_argument(
54+
"array must be C-contiguous; use .copy() on sliced/transposed views "
55+
"(e.g. arr.copy() or arr[:, :2].copy())");
56+
}
57+
// Each higher dimension stride must equal next-dim-size × next-stride
58+
for (ssize_t i = buf.ndim - 2; i >= 0; --i) {
59+
ssize_t expected = buf.shape[i + 1] * buf.strides[i + 1];
60+
if (buf.strides[i] != expected) {
61+
throw std::invalid_argument(
62+
"array must be C-contiguous; use .copy() on sliced/transposed views "
63+
"(e.g. arr.copy() or arr[:, :2].copy())");
64+
}
65+
}
66+
}
67+
4668
/// Convert any py::array coordinate buffer to std::vector<double>.
69+
/// Extracts the first 2 columns per row — safe for (N,2) and (N,≥3) arrays.
4770
/// dtype double → direct copy; float32/other → cast & widen to double.
4871
inline std::vector<double> array_to_double_vec(const py::array& arr) {
4972
auto buf = arr.request();
50-
size_t sz = static_cast<size_t>(buf.shape[0]) * 2;
51-
std::vector<double> tmp(sz);
73+
if (buf.ndim != 2) {
74+
throw std::invalid_argument("array_to_double_vec: expected 2D array");
75+
}
76+
ensure_c_contiguous(buf);
77+
size_t rows = static_cast<size_t>(buf.shape[0]);
78+
size_t cols = static_cast<size_t>(buf.shape[1]);
79+
std::vector<double> tmp(rows * 2);
5280
if (arr.dtype().is(py::dtype::of<double>())) {
5381
const double* src = static_cast<const double*>(buf.ptr);
54-
std::copy(src, src + sz, tmp.begin());
82+
for (size_t r = 0; r < rows; ++r) {
83+
size_t base = r * cols;
84+
tmp[r * 2] = src[base];
85+
tmp[r * 2 + 1] = src[base + 1];
86+
}
5587
} else {
5688
// float32 or other → cast through pybind11 to float32, then widen
5789
auto f32 = py::cast<py::array_t<float>>(arr);
5890
auto fbuf = f32.request();
5991
const float* src = static_cast<const float*>(fbuf.ptr);
60-
for (size_t i = 0; i < sz; ++i) tmp[i] = static_cast<double>(src[i]);
92+
for (size_t r = 0; r < rows; ++r) {
93+
size_t base = r * cols;
94+
tmp[r * 2] = static_cast<double>(src[base]);
95+
tmp[r * 2 + 1] = static_cast<double>(src[base + 1]);
96+
}
6197
}
6298
return tmp;
6399
}
@@ -72,27 +108,43 @@ inline Point<float> point(float x, float y) { return Point<float>(x, y); }
72108

73109
inline Point<double> point(const py::array_t<double>& arr) {
74110
auto buf = arr.request();
111+
ensure_c_contiguous(buf);
75112
const double* p = static_cast<const double*>(buf.ptr);
76113
return Point<double>(buf.size > 0 ? p[0] : 0, buf.size > 1 ? p[1] : 0);
77114
}
78115
inline Point<float> point(const py::array_t<float>& arr) {
79116
auto buf = arr.request();
117+
ensure_c_contiguous(buf);
80118
const float* p = static_cast<const float*>(buf.ptr);
81119
return Point<float>(buf.size > 0 ? p[0] : 0, buf.size > 1 ? p[1] : 0);
82120
}
83121
// auto-double: accept any dtype (int, unknown), always produce Point<double>
122+
// Handles 1D [x,y] and 2D (N,≥2) arrays — takes first 2 elements.
84123
inline Point<double> point(const py::array& arr) {
85-
auto tmp = array_to_double_vec(arr);
86-
return Point<double>(tmp[0], tmp[1]);
124+
auto buf = arr.request();
125+
ensure_c_contiguous(buf);
126+
if (buf.size < 2) return Point<double>(0, 0);
127+
if (arr.dtype().is(py::dtype::of<double>())) {
128+
const double* p = static_cast<const double*>(buf.ptr);
129+
return Point<double>(p[0], p[1]);
130+
} else {
131+
// float32 or int/other → cast through pybind11 to float32, then widen
132+
auto f32 = py::cast<py::array_t<float>>(arr);
133+
auto fbuf = f32.request();
134+
const float* p = static_cast<const float*>(fbuf.ptr);
135+
return Point<double>(static_cast<double>(p[0]), static_cast<double>(p[1]));
136+
}
87137
}
88138

89139
// -- LineString --
90140
inline LineString<double> linestring(const py::array_t<double>& arr) {
91141
auto buf = arr.request();
142+
ensure_c_contiguous(buf);
92143
return LineString<double>(static_cast<const double*>(buf.ptr), buf.shape[0], buf.shape[1]);
93144
}
94145
inline LineString<float> linestring(const py::array_t<float>& arr) {
95146
auto buf = arr.request();
147+
ensure_c_contiguous(buf);
96148
return LineString<float>(static_cast<const float*>(buf.ptr), buf.shape[0], buf.shape[1]);
97149
}
98150
inline LineString<double> linestring(const py::array& arr) {
@@ -104,10 +156,12 @@ inline LineString<double> linestring(const py::array& arr) {
104156
// -- Polygon --
105157
inline Polygon<double> polygon(const py::array_t<double>& arr) {
106158
auto buf = arr.request();
159+
ensure_c_contiguous(buf);
107160
return Polygon<double>(static_cast<const double*>(buf.ptr), buf.shape[0], buf.shape[1]);
108161
}
109162
inline Polygon<float> polygon(const py::array_t<float>& arr) {
110163
auto buf = arr.request();
164+
ensure_c_contiguous(buf);
111165
return Polygon<float>(static_cast<const float*>(buf.ptr), buf.shape[0], buf.shape[1]);
112166
}
113167
inline Polygon<double> polygon(const py::array& arr) {
@@ -119,6 +173,7 @@ inline Polygon<double> polygon(const py::array& arr) {
119173
// -- LinearRing (double only for now) --
120174
inline LinearRing<double> linearring(const py::array_t<double>& arr) {
121175
auto buf = arr.request();
176+
ensure_c_contiguous(buf);
122177
return LinearRing<double>(static_cast<const double*>(buf.ptr), buf.shape[0], buf.shape[1]);
123178
}
124179
inline LinearRing<double> linearring(const py::array& arr) {
@@ -130,10 +185,12 @@ inline LinearRing<double> linearring(const py::array& arr) {
130185
// -- MultiPoint: single array of shape (n_pts, 2) --
131186
inline MultiPoint<double> multipoint(const py::array_t<double>& arr) {
132187
auto buf = arr.request();
188+
ensure_c_contiguous(buf);
133189
return MultiPoint<double>(static_cast<const double*>(buf.ptr), buf.shape[0], buf.shape[1]);
134190
}
135191
inline MultiPoint<float> multipoint(const py::array_t<float>& arr) {
136192
auto buf = arr.request();
193+
ensure_c_contiguous(buf);
137194
return MultiPoint<float>(static_cast<const float*>(buf.ptr), buf.shape[0], buf.shape[1]);
138195
}
139196
inline MultiPoint<double> multipoint(const py::array& arr) {
@@ -147,6 +204,7 @@ inline MultiLineString<double> multilinestring(const std::vector<py::array_t<dou
147204
MultiLineString<double> mls;
148205
for (auto& arr : arrays) {
149206
auto buf = arr.request();
207+
ensure_c_contiguous(buf);
150208
mls.add_line(static_cast<const double*>(buf.ptr), buf.shape[0], buf.shape[1]);
151209
}
152210
return mls;
@@ -155,6 +213,7 @@ inline MultiLineString<float> multilinestring(const std::vector<py::array_t<floa
155213
MultiLineString<float> mls;
156214
for (auto& arr : arrays) {
157215
auto buf = arr.request();
216+
ensure_c_contiguous(buf);
158217
mls.add_line(static_cast<const float*>(buf.ptr), buf.shape[0], buf.shape[1]);
159218
}
160219
return mls;
@@ -174,6 +233,7 @@ inline MultiPolygon<double> multipolygon(const std::vector<py::array_t<double>>&
174233
MultiPolygon<double> mp;
175234
for (auto& arr : arrays) {
176235
auto buf = arr.request();
236+
ensure_c_contiguous(buf);
177237
mp.add_polygon(static_cast<const double*>(buf.ptr), buf.shape[0], buf.shape[1]);
178238
}
179239
return mp;
@@ -182,6 +242,7 @@ inline MultiPolygon<float> multipolygon(const std::vector<py::array_t<float>>& a
182242
MultiPolygon<float> mp;
183243
for (auto& arr : arrays) {
184244
auto buf = arr.request();
245+
ensure_c_contiguous(buf);
185246
mp.add_polygon(static_cast<const float*>(buf.ptr), buf.shape[0], buf.shape[1]);
186247
}
187248
return mp;

tests/test_api.py

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1050,3 +1050,180 @@ def test_random_ls_poly_f64(self, cpp):
10501050
i_cpp = cpp.intersects_linestring_polygon(cpp.linestring(l), cpp.polygon(c))
10511051
i_py = PyLineString(l).intersects(PyPolygon(c))
10521052
assert i_cpp == i_py
1053+
1054+
1055+
# ==============================================================================
1056+
# 3-COLUMN ARRAY SHAPE HANDLING — verify (N,≥3) → x,y extraction
1057+
# ==============================================================================
1058+
# Regression tests for the array_to_double_vec() fix (hardcoded rows*2 → stride).
1059+
# Covers: generic py::array overload (int dtype), typed double overload, point,
1060+
# polygon, multipoint; plus row/column layout consistency and non-contiguous view guard.
1061+
1062+
class TestArrayShape3D:
1063+
"""Verify (N,≥3) arrays correctly extract x,y columns across all factory overloads."""
1064+
1065+
# -- (N,2) regression: generic overload (int32) still works ----------------------
1066+
1067+
def test_2col_generic(self, cpp):
1068+
coords = np.array([[0, 1], [2, 3], [4, 5]], dtype=np.int32)
1069+
ls = cpp.linestring(coords)
1070+
assert ls.coords() == [(0.0, 1.0), (2.0, 3.0), (4.0, 5.0)]
1071+
1072+
# -- (N,3) generic overload: z column correctly ignored -------------------------
1073+
1074+
def test_3col_generic(self, cpp):
1075+
coords = np.array([[10, 20, 999], [30, 40, 888], [50, 60, 777]], dtype=np.int32)
1076+
ls = cpp.linestring(coords)
1077+
assert ls.coords() == [(10.0, 20.0), (30.0, 40.0), (50.0, 60.0)]
1078+
1079+
# -- (N,3) typed double overload: stride by shape[1]=3 --------------------------
1080+
1081+
def test_3col_typed_double(self, cpp):
1082+
coords = np.array([[10.0, 20.0, 99.0], [30.0, 40.0, 88.0], [50.0, 60.0, 77.0]],
1083+
dtype=np.float64)
1084+
ls = cpp.linestring(coords)
1085+
assert ls.coords() == [(10.0, 20.0), (30.0, 40.0), (50.0, 60.0)]
1086+
1087+
# -- Equality: (N,3) produces identical coords to (N,2) -------------------------
1088+
1089+
def test_3col_equals_2col_generic(self, cpp):
1090+
coords_3 = np.array([[1, 2, 99], [3, 4, 88], [5, 6, 77]], dtype=np.int32)
1091+
coords_2 = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], dtype=np.float64)
1092+
ls3 = cpp.linestring(coords_3)
1093+
ls2 = cpp.linestring(coords_2)
1094+
assert ls3.coords() == ls2.coords()
1095+
# Distance to self should also match
1096+
assert ls3.length() == ls2.length()
1097+
1098+
def test_3col_equals_2col_typed(self, cpp):
1099+
coords_3 = np.array([[1.0, 2.0, 99.0], [3.0, 4.0, 88.0], [5.0, 6.0, 77.0]],
1100+
dtype=np.float64)
1101+
coords_2 = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], dtype=np.float64)
1102+
ls3 = cpp.linestring(coords_3)
1103+
ls2 = cpp.linestring(coords_2)
1104+
assert ls3.coords() == ls2.coords()
1105+
assert ls3.length() == ls2.length()
1106+
1107+
# -- Polygon (N,3) generic ------------------------------------------------------
1108+
1109+
def test_polygon_3col_generic(self, cpp):
1110+
coords = np.array([[0, 0, 1], [10, 0, 2], [10, 10, 3], [0, 10, 4], [0, 0, 5]],
1111+
dtype=np.int32)
1112+
poly = cpp.polygon(coords)
1113+
ext = cpp.polygon_exterior(poly)
1114+
# First 5 points must match (GEOS may auto-close to 6 points)
1115+
assert ext.tolist()[:5] == [[0.0, 0.0], [10.0, 0.0], [10.0, 10.0], [0.0, 10.0], [0.0, 0.0]]
1116+
1117+
def test_polygon_3col_typed(self, cpp):
1118+
coords = np.array([[0.0, 0.0, 1.0], [10.0, 0.0, 2.0], [10.0, 10.0, 3.0],
1119+
[0.0, 10.0, 4.0], [0.0, 0.0, 5.0]], dtype=np.float64)
1120+
poly = cpp.polygon(coords)
1121+
ext = cpp.polygon_exterior(poly)
1122+
assert ext.tolist()[:5] == [[0.0, 0.0], [10.0, 0.0], [10.0, 10.0], [0.0, 10.0], [0.0, 0.0]]
1123+
1124+
# -- Point: 1D [x,y] and 2D (N,≥2) int arrays (auto-double overload) -------------
1125+
1126+
def test_point_1d_int(self, cpp):
1127+
p = cpp.point(np.array([3, 4], dtype=np.int32))
1128+
assert p.coords() == [(3.0, 4.0)]
1129+
1130+
def test_point_2d_int(self, cpp):
1131+
p = cpp.point(np.array([[5, 6]], dtype=np.int32))
1132+
assert p.coords() == [(5.0, 6.0)]
1133+
1134+
def test_point_2d_3col_int(self, cpp):
1135+
"""Point from (N,3) int: takes first 2 elements (row 0, col 0-1)."""
1136+
p = cpp.point(np.array([[7, 8, 999], [9, 10, 888]], dtype=np.int32))
1137+
assert p.coords() == [(7.0, 8.0)]
1138+
1139+
# -- MultiPoint (N,3) generic ---------------------------------------------------
1140+
1141+
def test_multipoint_3col_generic(self, cpp):
1142+
mp = cpp.multipoint(np.array([[1, 2, 99], [4, 6, 88]], dtype=np.int32))
1143+
# Verify via distance: (1,2) should be a point in the multipoint
1144+
p = cpp.point(1.0, 2.0)
1145+
assert mp.distance(p) == 0.0
1146+
1147+
# -- LinearRing (N,3) -----------------------------------------------------------
1148+
1149+
def test_linearring_3col(self, cpp, C):
1150+
ring = C.linearring(np.array([[0, 0, 1], [1, 0, 2], [1, 1, 3], [0, 0, 4]],
1151+
dtype=np.float64))
1152+
assert ring.is_closed() == True
1153+
assert ring.is_ring() == True
1154+
1155+
# -- Row/column layout consistency ----------------------------------------------
1156+
1157+
def test_row_col_layout(self, cpp):
1158+
"""Verify Python arr[i,j] ↔ C++ ptr[i*shape[1]+j] mapping."""
1159+
# (3,4) array: row-major [1,2,3,4, 5,6,7,8, 9,10,11,12]
1160+
arr = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]], dtype=np.float64)
1161+
ls = cpp.linestring(arr)
1162+
# C++ takes coords_[i*4+0], coords_[i*4+1] → first 2 columns per row
1163+
assert ls.coords() == [(1.0, 2.0), (5.0, 6.0), (9.0, 10.0)]
1164+
1165+
# (3,3) direct: stride=3, skip z
1166+
arr3 = np.array([[10, 20, 99], [30, 40, 88], [50, 60, 77]], dtype=np.float64)
1167+
ls3 = cpp.linestring(arr3)
1168+
assert ls3.coords() == [(10.0, 20.0), (30.0, 40.0), (50.0, 60.0)]
1169+
1170+
# -- poly_exterior output shape --------------------------------------------------
1171+
1172+
def test_exterior_shape(self, cpp):
1173+
coords = np.array([[0, 0], [10, 0], [10, 10], [0, 10]], dtype=np.float64)
1174+
poly = cpp.polygon(coords)
1175+
ext = cpp.polygon_exterior(poly)
1176+
assert ext.ndim == 2
1177+
assert ext.shape[1] == 2 # always 2 columns
1178+
assert ext.dtype == np.float64
1179+
1180+
# -- Non-contiguous view rejection (fail-fast with clear error) ----------------
1181+
1182+
def test_noncontiguous_view_rejected_typed(self, cpp):
1183+
"""Typed overloads throw on non-contiguous views (e.g. [:, :2])."""
1184+
arr = np.array([[10, 20, 99], [30, 40, 88], [50, 60, 77]], dtype=np.float64)
1185+
view = arr[:, :2]
1186+
assert not view.flags['C_CONTIGUOUS']
1187+
with pytest.raises(ValueError, match="C-contiguous"):
1188+
cpp.linestring(view)
1189+
1190+
def test_noncontiguous_view_rejected_generic(self, cpp):
1191+
"""Generic (int dtype) overloads throw on non-contiguous views."""
1192+
arr = np.array([[10, 20, 99], [30, 40, 88], [50, 60, 77]], dtype=np.int32)
1193+
view = arr[:, :2]
1194+
assert not view.flags['C_CONTIGUOUS']
1195+
with pytest.raises(ValueError, match="C-contiguous"):
1196+
cpp.linestring(view)
1197+
1198+
def test_noncontiguous_copy_works_typed(self, cpp):
1199+
""".copy() on a non-contiguous view produces correct results."""
1200+
arr = np.array([[10, 20, 99], [30, 40, 88], [50, 60, 77]], dtype=np.float64)
1201+
cpy = arr[:, :2].copy()
1202+
assert cpy.flags['C_CONTIGUOUS']
1203+
ls = cpp.linestring(cpy)
1204+
assert ls.coords() == [(10.0, 20.0), (30.0, 40.0), (50.0, 60.0)]
1205+
1206+
def test_noncontiguous_copy_works_generic(self, cpp):
1207+
""".copy() on a non-contiguous view works for generic overload."""
1208+
arr = np.array([[10, 20, 99], [30, 40, 88], [50, 60, 77]], dtype=np.int32)
1209+
cpy = arr[:, :2].copy()
1210+
assert cpy.flags['C_CONTIGUOUS']
1211+
ls = cpp.linestring(cpy)
1212+
assert ls.coords() == [(10.0, 20.0), (30.0, 40.0), (50.0, 60.0)]
1213+
1214+
@pytest.mark.parametrize("factory, arr_dtype", [
1215+
("point", np.float64),
1216+
("point", np.int32),
1217+
("polygon", np.float64),
1218+
("polygon", np.int32),
1219+
("multipoint", np.float64),
1220+
("multipoint", np.int32),
1221+
])
1222+
def test_noncontiguous_rejected_all_factories(self, cpp, factory, arr_dtype):
1223+
"""All factory functions reject non-contiguous views."""
1224+
arr = np.array([[10, 20, 99], [30, 40, 88], [50, 60, 77]], dtype=arr_dtype)
1225+
view = arr[:, :2]
1226+
assert not view.flags['C_CONTIGUOUS']
1227+
fn = getattr(cpp, factory)
1228+
with pytest.raises(ValueError, match="C-contiguous"):
1229+
fn(view)

0 commit comments

Comments
 (0)