diff --git a/src/include/query_executor.hpp b/src/include/query_executor.hpp index bf69039..8990d4c 100644 --- a/src/include/query_executor.hpp +++ b/src/include/query_executor.hpp @@ -56,7 +56,9 @@ class QueryResult { static crow::json::wvalue convertVectorIntervalToJson(const duckdb_vector &vector, const idx_t row_idx); static crow::json::wvalue convertVectorEnumToJson(const duckdb_vector &vector, const idx_t row_idx); static crow::json::wvalue convertVectorListToJson(const duckdb_vector &vector, const idx_t row_idx); + static crow::json::wvalue convertVectorArrayToJson(const duckdb_vector &vector, const idx_t row_idx); static crow::json::wvalue convertVectorStructToJson(const duckdb_vector &vector, const idx_t row_idx); + static crow::json::wvalue convertVectorUnionToJson(const duckdb_vector &vector, const idx_t row_idx); template static crow::json::wvalue convertVectorEntryToJson(const duckdb_vector &vector, const idx_t row_idx) { diff --git a/src/query_executor.cpp b/src/query_executor.cpp index 9bf4074..bb714be 100644 --- a/src/query_executor.cpp +++ b/src/query_executor.cpp @@ -288,9 +288,9 @@ crow::json::wvalue QueryResult::convertVectorEntryToJson(const duckdb_vector &ve case DUCKDB_TYPE_MAP: return convertVectorStructToJson(vector, row_idx); // Treat as struct for JSON case DUCKDB_TYPE_ARRAY: - return convertVectorListToJson(vector, row_idx); // Treat as list for JSON + return convertVectorArrayToJson(vector, row_idx); case DUCKDB_TYPE_UNION: - return convertVectorStructToJson(vector, row_idx); // Treat as struct for JSON + return convertVectorUnionToJson(vector, row_idx); default: CROW_LOG_WARNING << "Unknown type: " << type_id; return crow::json::wvalue(nullptr); @@ -422,18 +422,61 @@ crow::json::wvalue QueryResult::convertVectorEnumToJson(const duckdb_vector &vec } crow::json::wvalue QueryResult::convertVectorListToJson(const duckdb_vector &vector, const idx_t row_idx) { + auto validity = duckdb_vector_get_validity(vector); + if (!duckdb_validity_row_is_valid(validity, row_idx)) { + return crow::json::wvalue(nullptr); + } + + // A LIST column stores every row's elements back-to-back in a single child + // vector. The per-row slice is described by the list_entry at `row_idx` + // (offset + length); emitting the whole child vector instead would repeat + // every other row's elements (issue #89). + auto entries = static_cast(duckdb_vector_get_data(vector)); + auto entry = entries[row_idx]; auto child_vector = duckdb_list_vector_get_child(vector); auto child_size = duckdb_list_vector_get_size(vector); std::vector result; - for (idx_t i = 0; i < child_size; i++) { - result.push_back(convertVectorEntryToJson(child_vector, i)); + for (idx_t i = 0; i < entry.length; i++) { + idx_t child_idx = entry.offset + i; + if (child_idx >= child_size) { + break; + } + result.push_back(convertVectorEntryToJson(child_vector, child_idx)); + } + + return crow::json::wvalue(result); +} + +crow::json::wvalue QueryResult::convertVectorArrayToJson(const duckdb_vector &vector, const idx_t row_idx) { + auto validity = duckdb_vector_get_validity(vector); + if (!duckdb_validity_row_is_valid(validity, row_idx)) { + return crow::json::wvalue(nullptr); + } + + // A fixed-size ARRAY has no per-row list_entry: every row occupies a + // constant `array_size` run in the child vector, so this row's slice + // starts at row_idx * array_size (unlike LIST, which carries offsets). + auto array_type = duckdb_vector_get_column_type(vector); + auto array_size = duckdb_array_type_array_size(array_type); + duckdb_destroy_logical_type(&array_type); + + auto child_vector = duckdb_array_vector_get_child(vector); + + std::vector result; + for (idx_t i = 0; i < array_size; i++) { + result.push_back(convertVectorEntryToJson(child_vector, row_idx * array_size + i)); } return crow::json::wvalue(result); } crow::json::wvalue QueryResult::convertVectorStructToJson(const duckdb_vector &vector, const idx_t row_idx) { + auto validity = duckdb_vector_get_validity(vector); + if (!duckdb_validity_row_is_valid(validity, row_idx)) { + return crow::json::wvalue(nullptr); + } + auto struct_type = duckdb_vector_get_column_type(vector); auto child_size = duckdb_struct_type_child_count(struct_type); @@ -443,12 +486,51 @@ crow::json::wvalue QueryResult::convertVectorStructToJson(const duckdb_vector &v DuckDBString child_name_wrapper(duckdb_struct_type_child_name(struct_type, i)); auto str_name = child_name_wrapper.to_string(); + // Each struct field is a parallel child vector; read this row's entry + // (`row_idx`), not element 0, so multi-row results and structs nested + // inside a list resolve the correct element (issue #89). auto child_vector = duckdb_struct_vector_get_child(vector, i); - result[str_name] = convertVectorEntryToJson(child_vector, 0); + result[str_name] = convertVectorEntryToJson(child_vector, row_idx); } duckdb_destroy_logical_type(&struct_type); return result; } +crow::json::wvalue QueryResult::convertVectorUnionToJson(const duckdb_vector &vector, const idx_t row_idx) { + auto validity = duckdb_vector_get_validity(vector); + if (!duckdb_validity_row_is_valid(validity, row_idx)) { + return crow::json::wvalue(nullptr); + } + + // A UNION is physically a STRUCT: child 0 is the tag (uint8_t), children + // 1..n are the candidate members. Only the member selected by the row's + // tag is valid, so emit just that member as {name: value} (matching + // DuckDB's own to_json) rather than every member, which the generic + // struct path did, exposing inactive members. + auto union_type = duckdb_vector_get_column_type(vector); + auto member_count = duckdb_union_type_member_count(union_type); + + auto tag_vector = duckdb_struct_vector_get_child(vector, 0); + auto tags = static_cast(duckdb_vector_get_data(tag_vector)); + idx_t member_idx = static_cast(tags[row_idx]); + if (member_idx >= member_count) { + // Out-of-range tag should not happen for well-formed unions; fail + // safe rather than reading past the struct children. + duckdb_destroy_logical_type(&union_type); + return crow::json::wvalue(nullptr); + } + + DuckDBString member_name_wrapper(duckdb_union_type_member_name(union_type, member_idx)); + auto member_name = member_name_wrapper.to_string(); + duckdb_destroy_logical_type(&union_type); + + // Member i lives at struct child index i + 1 (the tag occupies slot 0). + auto member_vector = duckdb_struct_vector_get_child(vector, member_idx + 1); + + crow::json::wvalue result; + result[member_name] = convertVectorEntryToJson(member_vector, row_idx); + return result; +} + } // namespace flapi \ No newline at end of file diff --git a/test/cpp/query_executor_test.cpp b/test/cpp/query_executor_test.cpp index 8c78636..602b329 100644 --- a/test/cpp/query_executor_test.cpp +++ b/test/cpp/query_executor_test.cpp @@ -228,6 +228,176 @@ TEST_CASE("QueryExecutor chunk experiment", "[query_executor]") { duckdb_close(&database); } +TEST_CASE("QueryExecutor LIST/STRUCT per-row serialization", "[query_executor][list]") { + // Regression for issue #89: native LIST(STRUCT) / LIST(VARCHAR) columns + // must serialize each row's own slice of the list child vector, not the + // whole child vector, and structs nested inside a list must read their + // own element rather than always element[0]. + duckdb_database database; + REQUIRE(duckdb_open(NULL, &database) == DuckDBSuccess); + + SECTION("single-row LIST(STRUCT) emits distinct elements") { + QueryExecutor executor(database); + executor.execute(R"SQL( + SELECT [ + {'stage': 'prepare', 'n_dropped': 11945}, + {'stage': 'features', 'n_dropped': 57203}, + {'stage': 'postprocess', 'n_dropped': 238505} + ] AS by_stage + )SQL"); + + auto doc = crow::json::load(executor.toJson().dump()); + REQUIRE(doc.size() == 1); + REQUIRE(doc[0]["by_stage"].size() == 3); + REQUIRE(doc[0]["by_stage"][0]["stage"].s() == "prepare"); + REQUIRE(doc[0]["by_stage"][0]["n_dropped"].i() == 11945); + REQUIRE(doc[0]["by_stage"][1]["stage"].s() == "features"); + REQUIRE(doc[0]["by_stage"][1]["n_dropped"].i() == 57203); + REQUIRE(doc[0]["by_stage"][2]["stage"].s() == "postprocess"); + REQUIRE(doc[0]["by_stage"][2]["n_dropped"].i() == 238505); + } + + SECTION("multi-row LIST(VARCHAR) keeps each row's own list") { + QueryExecutor executor(database); + executor.execute(R"SQL( + SELECT * FROM (VALUES + (1, ['a1', 'a2', 'a3']), + (2, ['b1', 'b2', 'b3']), + (3, ['c1', 'c2', 'c3']), + (4, ['d1', 'd2', 'd3']) + ) AS t(id, flags) + ORDER BY id + )SQL"); + + auto doc = crow::json::load(executor.toJson().dump()); + REQUIRE(doc.size() == 4); + + REQUIRE(doc[0]["flags"].size() == 3); + REQUIRE(doc[0]["flags"][0].s() == "a1"); + REQUIRE(doc[0]["flags"][2].s() == "a3"); + + REQUIRE(doc[1]["flags"].size() == 3); + REQUIRE(doc[1]["flags"][0].s() == "b1"); + REQUIRE(doc[1]["flags"][2].s() == "b3"); + + REQUIRE(doc[3]["flags"].size() == 3); + REQUIRE(doc[3]["flags"][0].s() == "d1"); + REQUIRE(doc[3]["flags"][2].s() == "d3"); + } + + SECTION("multi-row LIST(STRUCT) keeps each row's own elements") { + QueryExecutor executor(database); + executor.execute(R"SQL( + SELECT * FROM (VALUES + (1, [{'k': 'a', 'v': 1}, {'k': 'b', 'v': 2}]), + (2, [{'k': 'c', 'v': 3}, {'k': 'd', 'v': 4}]) + ) AS t(id, items) + ORDER BY id + )SQL"); + + auto doc = crow::json::load(executor.toJson().dump()); + REQUIRE(doc.size() == 2); + + REQUIRE(doc[0]["items"].size() == 2); + REQUIRE(doc[0]["items"][0]["k"].s() == "a"); + REQUIRE(doc[0]["items"][0]["v"].i() == 1); + REQUIRE(doc[0]["items"][1]["k"].s() == "b"); + REQUIRE(doc[0]["items"][1]["v"].i() == 2); + + REQUIRE(doc[1]["items"].size() == 2); + REQUIRE(doc[1]["items"][0]["k"].s() == "c"); + REQUIRE(doc[1]["items"][0]["v"].i() == 3); + REQUIRE(doc[1]["items"][1]["k"].s() == "d"); + REQUIRE(doc[1]["items"][1]["v"].i() == 4); + } + + SECTION("multi-row STRUCT column reads its own row") { + QueryExecutor executor(database); + executor.execute(R"SQL( + SELECT * FROM (VALUES + (1, {'name': 'alice', 'age': 30}), + (2, {'name': 'bob', 'age': 40}) + ) AS t(id, person) + ORDER BY id + )SQL"); + + auto doc = crow::json::load(executor.toJson().dump()); + REQUIRE(doc.size() == 2); + REQUIRE(doc[0]["person"]["name"].s() == "alice"); + REQUIRE(doc[0]["person"]["age"].i() == 30); + REQUIRE(doc[1]["person"]["name"].s() == "bob"); + REQUIRE(doc[1]["person"]["age"].i() == 40); + } + + SECTION("multi-row fixed-size ARRAY keeps each row's own elements") { + QueryExecutor executor(database); + executor.execute(R"SQL( + SELECT * FROM (VALUES + (1, [10, 11, 12]::INTEGER[3]), + (2, [20, 21, 22]::INTEGER[3]), + (3, [30, 31, 32]::INTEGER[3]) + ) AS t(id, coords) + ORDER BY id + )SQL"); + + auto doc = crow::json::load(executor.toJson().dump()); + REQUIRE(doc.size() == 3); + + REQUIRE(doc[0]["coords"].size() == 3); + REQUIRE(doc[0]["coords"][0].i() == 10); + REQUIRE(doc[0]["coords"][2].i() == 12); + + REQUIRE(doc[2]["coords"].size() == 3); + REQUIRE(doc[2]["coords"][0].i() == 30); + REQUIRE(doc[2]["coords"][2].i() == 32); + } + + SECTION("multi-row UNION emits only the active member per row") { + QueryExecutor executor(database); + executor.execute(R"SQL( + SELECT id, u FROM ( + SELECT 1 AS id, union_value(num := 42)::UNION(num INTEGER, str VARCHAR) AS u + UNION ALL + SELECT 2, union_value(str := 'hi')::UNION(num INTEGER, str VARCHAR) + UNION ALL + SELECT 3, union_value(num := 7)::UNION(num INTEGER, str VARCHAR) + ) ORDER BY id + )SQL"); + + auto doc = crow::json::load(executor.toJson().dump()); + REQUIRE(doc.size() == 3); + + // Each row exposes only its active member ({name: value}), matching + // DuckDB's to_json — not every union member. + REQUIRE(doc[0]["u"]["num"].i() == 42); + REQUIRE_FALSE(doc[0]["u"].has("str")); + + REQUIRE(doc[1]["u"]["str"].s() == "hi"); + REQUIRE_FALSE(doc[1]["u"].has("num")); + + REQUIRE(doc[2]["u"]["num"].i() == 7); + REQUIRE_FALSE(doc[2]["u"].has("str")); + } + + SECTION("NULL list entry stays null") { + QueryExecutor executor(database); + executor.execute(R"SQL( + SELECT * FROM (VALUES + (1, ['x', 'y']), + (2, CAST(NULL AS VARCHAR[])) + ) AS t(id, flags) + ORDER BY id + )SQL"); + + auto doc = crow::json::load(executor.toJson().dump()); + REQUIRE(doc.size() == 2); + REQUIRE(doc[0]["flags"].size() == 2); + REQUIRE(doc[1]["flags"].t() == crow::json::type::Null); + } + + duckdb_close(&database); +} + TEST_CASE("QueryExecutor::executeWithBindings - prepared path", "[query_executor][prepared]") { duckdb_database database; REQUIRE(duckdb_open(NULL, &database) == DuckDBSuccess);