Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/include/query_executor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,9 @@ class QueryResult {
static crow::json::wvalue convertVectorIntervalToJson(const duckdb_vector &vector, const idx_t row_idx);
static crow::json::wvalue convertVectorEnumToJson(const duckdb_vector &vector, const idx_t row_idx);
static crow::json::wvalue convertVectorListToJson(const duckdb_vector &vector, const idx_t row_idx);
static crow::json::wvalue convertVectorArrayToJson(const duckdb_vector &vector, const idx_t row_idx);
static crow::json::wvalue convertVectorStructToJson(const duckdb_vector &vector, const idx_t row_idx);
static crow::json::wvalue convertVectorUnionToJson(const duckdb_vector &vector, const idx_t row_idx);

template<typename T>
static crow::json::wvalue convertVectorEntryToJson(const duckdb_vector &vector, const idx_t row_idx) {
Expand Down
92 changes: 87 additions & 5 deletions src/query_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -288,9 +288,9 @@ crow::json::wvalue QueryResult::convertVectorEntryToJson(const duckdb_vector &ve
case DUCKDB_TYPE_MAP:
return convertVectorStructToJson(vector, row_idx); // Treat as struct for JSON
case DUCKDB_TYPE_ARRAY:
return convertVectorListToJson(vector, row_idx); // Treat as list for JSON
return convertVectorArrayToJson(vector, row_idx);
case DUCKDB_TYPE_UNION:
return convertVectorStructToJson(vector, row_idx); // Treat as struct for JSON
return convertVectorUnionToJson(vector, row_idx);
default:
CROW_LOG_WARNING << "Unknown type: " << type_id;
return crow::json::wvalue(nullptr);
Expand Down Expand Up @@ -422,18 +422,61 @@ crow::json::wvalue QueryResult::convertVectorEnumToJson(const duckdb_vector &vec
}

crow::json::wvalue QueryResult::convertVectorListToJson(const duckdb_vector &vector, const idx_t row_idx) {
auto validity = duckdb_vector_get_validity(vector);
if (!duckdb_validity_row_is_valid(validity, row_idx)) {
return crow::json::wvalue(nullptr);
}

// A LIST column stores every row's elements back-to-back in a single child
// vector. The per-row slice is described by the list_entry at `row_idx`
// (offset + length); emitting the whole child vector instead would repeat
// every other row's elements (issue #89).
auto entries = static_cast<duckdb_list_entry *>(duckdb_vector_get_data(vector));
auto entry = entries[row_idx];
auto child_vector = duckdb_list_vector_get_child(vector);
auto child_size = duckdb_list_vector_get_size(vector);

std::vector<crow::json::wvalue> result;
for (idx_t i = 0; i < child_size; i++) {
result.push_back(convertVectorEntryToJson(child_vector, i));
for (idx_t i = 0; i < entry.length; i++) {
idx_t child_idx = entry.offset + i;
if (child_idx >= child_size) {
break;
}
result.push_back(convertVectorEntryToJson(child_vector, child_idx));
}

return crow::json::wvalue(result);
}

crow::json::wvalue QueryResult::convertVectorArrayToJson(const duckdb_vector &vector, const idx_t row_idx) {
auto validity = duckdb_vector_get_validity(vector);
if (!duckdb_validity_row_is_valid(validity, row_idx)) {
return crow::json::wvalue(nullptr);
}

// A fixed-size ARRAY has no per-row list_entry: every row occupies a
// constant `array_size` run in the child vector, so this row's slice
// starts at row_idx * array_size (unlike LIST, which carries offsets).
auto array_type = duckdb_vector_get_column_type(vector);
auto array_size = duckdb_array_type_array_size(array_type);
duckdb_destroy_logical_type(&array_type);

auto child_vector = duckdb_array_vector_get_child(vector);

std::vector<crow::json::wvalue> result;
for (idx_t i = 0; i < array_size; i++) {
result.push_back(convertVectorEntryToJson(child_vector, row_idx * array_size + i));
}

return crow::json::wvalue(result);
}

crow::json::wvalue QueryResult::convertVectorStructToJson(const duckdb_vector &vector, const idx_t row_idx) {
auto validity = duckdb_vector_get_validity(vector);
if (!duckdb_validity_row_is_valid(validity, row_idx)) {
return crow::json::wvalue(nullptr);
}

auto struct_type = duckdb_vector_get_column_type(vector);
auto child_size = duckdb_struct_type_child_count(struct_type);

Expand All @@ -443,12 +486,51 @@ crow::json::wvalue QueryResult::convertVectorStructToJson(const duckdb_vector &v
DuckDBString child_name_wrapper(duckdb_struct_type_child_name(struct_type, i));
auto str_name = child_name_wrapper.to_string();

// Each struct field is a parallel child vector; read this row's entry
// (`row_idx`), not element 0, so multi-row results and structs nested
// inside a list resolve the correct element (issue #89).
auto child_vector = duckdb_struct_vector_get_child(vector, i);
result[str_name] = convertVectorEntryToJson(child_vector, 0);
result[str_name] = convertVectorEntryToJson(child_vector, row_idx);
}

duckdb_destroy_logical_type(&struct_type);
return result;
}

crow::json::wvalue QueryResult::convertVectorUnionToJson(const duckdb_vector &vector, const idx_t row_idx) {
auto validity = duckdb_vector_get_validity(vector);
if (!duckdb_validity_row_is_valid(validity, row_idx)) {
return crow::json::wvalue(nullptr);
}

// A UNION is physically a STRUCT: child 0 is the tag (uint8_t), children
// 1..n are the candidate members. Only the member selected by the row's
// tag is valid, so emit just that member as {name: value} (matching
// DuckDB's own to_json) rather than every member, which the generic
// struct path did, exposing inactive members.
auto union_type = duckdb_vector_get_column_type(vector);
auto member_count = duckdb_union_type_member_count(union_type);

auto tag_vector = duckdb_struct_vector_get_child(vector, 0);
auto tags = static_cast<uint8_t *>(duckdb_vector_get_data(tag_vector));
idx_t member_idx = static_cast<idx_t>(tags[row_idx]);
if (member_idx >= member_count) {
// Out-of-range tag should not happen for well-formed unions; fail
// safe rather than reading past the struct children.
duckdb_destroy_logical_type(&union_type);
return crow::json::wvalue(nullptr);
}

DuckDBString member_name_wrapper(duckdb_union_type_member_name(union_type, member_idx));
auto member_name = member_name_wrapper.to_string();
duckdb_destroy_logical_type(&union_type);

// Member i lives at struct child index i + 1 (the tag occupies slot 0).
auto member_vector = duckdb_struct_vector_get_child(vector, member_idx + 1);

crow::json::wvalue result;
result[member_name] = convertVectorEntryToJson(member_vector, row_idx);
return result;
}

} // namespace flapi
170 changes: 170 additions & 0 deletions test/cpp/query_executor_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,176 @@ TEST_CASE("QueryExecutor chunk experiment", "[query_executor]") {
duckdb_close(&database);
}

TEST_CASE("QueryExecutor LIST/STRUCT per-row serialization", "[query_executor][list]") {
// Regression for issue #89: native LIST(STRUCT) / LIST(VARCHAR) columns
// must serialize each row's own slice of the list child vector, not the
// whole child vector, and structs nested inside a list must read their
// own element rather than always element[0].
duckdb_database database;
REQUIRE(duckdb_open(NULL, &database) == DuckDBSuccess);

SECTION("single-row LIST(STRUCT) emits distinct elements") {
QueryExecutor executor(database);
executor.execute(R"SQL(
SELECT [
{'stage': 'prepare', 'n_dropped': 11945},
{'stage': 'features', 'n_dropped': 57203},
{'stage': 'postprocess', 'n_dropped': 238505}
] AS by_stage
)SQL");

auto doc = crow::json::load(executor.toJson().dump());
REQUIRE(doc.size() == 1);
REQUIRE(doc[0]["by_stage"].size() == 3);
REQUIRE(doc[0]["by_stage"][0]["stage"].s() == "prepare");
REQUIRE(doc[0]["by_stage"][0]["n_dropped"].i() == 11945);
REQUIRE(doc[0]["by_stage"][1]["stage"].s() == "features");
REQUIRE(doc[0]["by_stage"][1]["n_dropped"].i() == 57203);
REQUIRE(doc[0]["by_stage"][2]["stage"].s() == "postprocess");
REQUIRE(doc[0]["by_stage"][2]["n_dropped"].i() == 238505);
}

SECTION("multi-row LIST(VARCHAR) keeps each row's own list") {
QueryExecutor executor(database);
executor.execute(R"SQL(
SELECT * FROM (VALUES
(1, ['a1', 'a2', 'a3']),
(2, ['b1', 'b2', 'b3']),
(3, ['c1', 'c2', 'c3']),
(4, ['d1', 'd2', 'd3'])
) AS t(id, flags)
ORDER BY id
)SQL");

auto doc = crow::json::load(executor.toJson().dump());
REQUIRE(doc.size() == 4);

REQUIRE(doc[0]["flags"].size() == 3);
REQUIRE(doc[0]["flags"][0].s() == "a1");
REQUIRE(doc[0]["flags"][2].s() == "a3");

REQUIRE(doc[1]["flags"].size() == 3);
REQUIRE(doc[1]["flags"][0].s() == "b1");
REQUIRE(doc[1]["flags"][2].s() == "b3");

REQUIRE(doc[3]["flags"].size() == 3);
REQUIRE(doc[3]["flags"][0].s() == "d1");
REQUIRE(doc[3]["flags"][2].s() == "d3");
}

SECTION("multi-row LIST(STRUCT) keeps each row's own elements") {
QueryExecutor executor(database);
executor.execute(R"SQL(
SELECT * FROM (VALUES
(1, [{'k': 'a', 'v': 1}, {'k': 'b', 'v': 2}]),
(2, [{'k': 'c', 'v': 3}, {'k': 'd', 'v': 4}])
) AS t(id, items)
ORDER BY id
)SQL");

auto doc = crow::json::load(executor.toJson().dump());
REQUIRE(doc.size() == 2);

REQUIRE(doc[0]["items"].size() == 2);
REQUIRE(doc[0]["items"][0]["k"].s() == "a");
REQUIRE(doc[0]["items"][0]["v"].i() == 1);
REQUIRE(doc[0]["items"][1]["k"].s() == "b");
REQUIRE(doc[0]["items"][1]["v"].i() == 2);

REQUIRE(doc[1]["items"].size() == 2);
REQUIRE(doc[1]["items"][0]["k"].s() == "c");
REQUIRE(doc[1]["items"][0]["v"].i() == 3);
REQUIRE(doc[1]["items"][1]["k"].s() == "d");
REQUIRE(doc[1]["items"][1]["v"].i() == 4);
}

SECTION("multi-row STRUCT column reads its own row") {
QueryExecutor executor(database);
executor.execute(R"SQL(
SELECT * FROM (VALUES
(1, {'name': 'alice', 'age': 30}),
(2, {'name': 'bob', 'age': 40})
) AS t(id, person)
ORDER BY id
)SQL");

auto doc = crow::json::load(executor.toJson().dump());
REQUIRE(doc.size() == 2);
REQUIRE(doc[0]["person"]["name"].s() == "alice");
REQUIRE(doc[0]["person"]["age"].i() == 30);
REQUIRE(doc[1]["person"]["name"].s() == "bob");
REQUIRE(doc[1]["person"]["age"].i() == 40);
}

SECTION("multi-row fixed-size ARRAY keeps each row's own elements") {
QueryExecutor executor(database);
executor.execute(R"SQL(
SELECT * FROM (VALUES
(1, [10, 11, 12]::INTEGER[3]),
(2, [20, 21, 22]::INTEGER[3]),
(3, [30, 31, 32]::INTEGER[3])
) AS t(id, coords)
ORDER BY id
)SQL");

auto doc = crow::json::load(executor.toJson().dump());
REQUIRE(doc.size() == 3);

REQUIRE(doc[0]["coords"].size() == 3);
REQUIRE(doc[0]["coords"][0].i() == 10);
REQUIRE(doc[0]["coords"][2].i() == 12);

REQUIRE(doc[2]["coords"].size() == 3);
REQUIRE(doc[2]["coords"][0].i() == 30);
REQUIRE(doc[2]["coords"][2].i() == 32);
}

SECTION("multi-row UNION emits only the active member per row") {
QueryExecutor executor(database);
executor.execute(R"SQL(
SELECT id, u FROM (
SELECT 1 AS id, union_value(num := 42)::UNION(num INTEGER, str VARCHAR) AS u
UNION ALL
SELECT 2, union_value(str := 'hi')::UNION(num INTEGER, str VARCHAR)
UNION ALL
SELECT 3, union_value(num := 7)::UNION(num INTEGER, str VARCHAR)
) ORDER BY id
)SQL");

auto doc = crow::json::load(executor.toJson().dump());
REQUIRE(doc.size() == 3);

// Each row exposes only its active member ({name: value}), matching
// DuckDB's to_json — not every union member.
REQUIRE(doc[0]["u"]["num"].i() == 42);
REQUIRE_FALSE(doc[0]["u"].has("str"));

REQUIRE(doc[1]["u"]["str"].s() == "hi");
REQUIRE_FALSE(doc[1]["u"].has("num"));

REQUIRE(doc[2]["u"]["num"].i() == 7);
REQUIRE_FALSE(doc[2]["u"].has("str"));
}

SECTION("NULL list entry stays null") {
QueryExecutor executor(database);
executor.execute(R"SQL(
SELECT * FROM (VALUES
(1, ['x', 'y']),
(2, CAST(NULL AS VARCHAR[]))
) AS t(id, flags)
ORDER BY id
)SQL");

auto doc = crow::json::load(executor.toJson().dump());
REQUIRE(doc.size() == 2);
REQUIRE(doc[0]["flags"].size() == 2);
REQUIRE(doc[1]["flags"].t() == crow::json::type::Null);
}

duckdb_close(&database);
}

TEST_CASE("QueryExecutor::executeWithBindings - prepared path", "[query_executor][prepared]") {
duckdb_database database;
REQUIRE(duckdb_open(NULL, &database) == DuckDBSuccess);
Expand Down
Loading