From 7e152c0ddbfcf68ba7635ac8bb910ce1fbd320c5 Mon Sep 17 00:00:00 2001 From: Joachim Rosskopf Date: Fri, 26 Jun 2026 19:42:04 +0200 Subject: [PATCH] fix: Serialize MAP columns as {key: value} in REST JSON (#89) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to the LIST/STRUCT/ARRAY/UNION fix. MAP columns were aliased onto the struct serializer, but a DuckDB MAP is physically LIST(STRUCT(key, value)), so they did not round-trip (serialized to null / wrong type) — reported on #89 after v26.06.25. - Add convertVectorMapToJson: slice the row's duckdb_list_entry, read key/value from the list child struct, emit {key: value} matching DuckDB's to_json. Empty map -> {}, NULL map -> null. - Add vectorEntryToMapKey: raw string for VARCHAR keys (escape-safe), JSON rendering for scalar keys (e.g. 10 -> "10"). - Fix a pre-existing logical-type leak in convertVectorDecimalToJson (now reachable via decimal map keys). - Add MAP regression tests: string keys, integer keys, empty, and NULL. Found via user follow-up + codex review. --- src/include/query_executor.hpp | 2 + src/query_executor.cpp | 63 +++++++++++++++++++++++++++++++- test/cpp/query_executor_test.cpp | 52 ++++++++++++++++++++++++++ 3 files changed, 116 insertions(+), 1 deletion(-) diff --git a/src/include/query_executor.hpp b/src/include/query_executor.hpp index 8990d4c..0ca23d9 100644 --- a/src/include/query_executor.hpp +++ b/src/include/query_executor.hpp @@ -59,6 +59,8 @@ class QueryResult { static crow::json::wvalue convertVectorArrayToJson(const duckdb_vector &vector, const idx_t row_idx); static crow::json::wvalue convertVectorStructToJson(const duckdb_vector &vector, const idx_t row_idx); static crow::json::wvalue convertVectorUnionToJson(const duckdb_vector &vector, const idx_t row_idx); + static crow::json::wvalue convertVectorMapToJson(const duckdb_vector &vector, const idx_t row_idx); + static std::string vectorEntryToMapKey(const duckdb_vector &vector, const idx_t row_idx); template static crow::json::wvalue convertVectorEntryToJson(const duckdb_vector &vector, const idx_t row_idx) { diff --git a/src/query_executor.cpp b/src/query_executor.cpp index bb714be..c24dbb5 100644 --- a/src/query_executor.cpp +++ b/src/query_executor.cpp @@ -286,7 +286,7 @@ crow::json::wvalue QueryResult::convertVectorEntryToJson(const duckdb_vector &ve case DUCKDB_TYPE_BIT: return convertVectorVarcharToJson(vector, row_idx); // Treat as string for JSON case DUCKDB_TYPE_MAP: - return convertVectorStructToJson(vector, row_idx); // Treat as struct for JSON + return convertVectorMapToJson(vector, row_idx); case DUCKDB_TYPE_ARRAY: return convertVectorArrayToJson(vector, row_idx); case DUCKDB_TYPE_UNION: @@ -348,6 +348,7 @@ crow::json::wvalue QueryResult::convertVectorDecimalToJson(const duckdb_vector & auto decimal_type = duckdb_decimal_internal_type(type); auto decimal_width = duckdb_decimal_width(type); auto decimal_scale = duckdb_decimal_scale(type); + duckdb_destroy_logical_type(&type); auto hugeint = duckdb_hugeint {0, 0}; switch (decimal_type) { @@ -533,4 +534,64 @@ crow::json::wvalue QueryResult::convertVectorUnionToJson(const duckdb_vector &ve return result; } +std::string QueryResult::vectorEntryToMapKey(const duckdb_vector &vector, const idx_t row_idx) { + auto type = duckdb_vector_get_column_type(vector); + bool is_string = duckdb_get_type_id(type) == DUCKDB_TYPE_VARCHAR; + duckdb_destroy_logical_type(&type); + + auto validity = duckdb_vector_get_validity(vector); + if (!duckdb_validity_row_is_valid(validity, row_idx)) { + return std::string(); + } + + if (is_string) { + auto data = static_cast(duckdb_vector_get_data(vector)); + return duckdb_string_is_inlined(data[row_idx]) + ? std::string(data[row_idx].value.inlined.inlined, data[row_idx].value.inlined.length) + : std::string(static_cast(data[row_idx].value.pointer.ptr), data[row_idx].value.pointer.length); + } + + // Non-string key: render the scalar and use its JSON form as the object + // key (e.g. integer 10 -> "10"), matching DuckDB's to_json for scalar + // keys. Strip the quotes a string-like rendering (date/UUID/etc.) adds. + // Composite keys (STRUCT/LIST) are rare and fall back to their JSON + // rendering rather than DuckDB's VARCHAR cast. + auto rendered = convertVectorEntryToJson(vector, row_idx); + auto dumped = rendered.dump(); + if (dumped.size() >= 2 && dumped.front() == '"' && dumped.back() == '"') { + return dumped.substr(1, dumped.size() - 2); + } + return dumped; +} + +crow::json::wvalue QueryResult::convertVectorMapToJson(const duckdb_vector &vector, const idx_t row_idx) { + auto validity = duckdb_vector_get_validity(vector); + if (!duckdb_validity_row_is_valid(validity, row_idx)) { + return crow::json::wvalue(nullptr); + } + + // A MAP is physically LIST(STRUCT(key, value)). Slice the row's entries + // from the list child via its duckdb_list_entry, then emit a JSON object + // {key: value} (DuckDB's own to_json shape). A non-null but empty map + // serializes as {} rather than null. + auto entries = static_cast(duckdb_vector_get_data(vector)); + auto entry = entries[row_idx]; + + auto kv_struct = duckdb_list_vector_get_child(vector); + auto child_size = duckdb_list_vector_get_size(vector); + auto key_vector = duckdb_struct_vector_get_child(kv_struct, 0); + auto value_vector = duckdb_struct_vector_get_child(kv_struct, 1); + + crow::json::wvalue result = crow::json::wvalue::empty_object(); + for (idx_t i = 0; i < entry.length; i++) { + idx_t child_idx = entry.offset + i; + if (child_idx >= child_size) { + break; + } + result[vectorEntryToMapKey(key_vector, child_idx)] = convertVectorEntryToJson(value_vector, child_idx); + } + + return result; +} + } // namespace flapi \ No newline at end of file diff --git a/test/cpp/query_executor_test.cpp b/test/cpp/query_executor_test.cpp index 602b329..1b0e3c7 100644 --- a/test/cpp/query_executor_test.cpp +++ b/test/cpp/query_executor_test.cpp @@ -379,6 +379,58 @@ TEST_CASE("QueryExecutor LIST/STRUCT per-row serialization", "[query_executor][l REQUIRE_FALSE(doc[2]["u"].has("str")); } + SECTION("multi-row MAP serializes as a per-row {key: value} object") { + QueryExecutor executor(database); + executor.execute(R"SQL( + SELECT * FROM (VALUES + (1, map_from_entries([('a', 1), ('b', 2)])), + (2, map_from_entries([('x', 9)])) + ) AS t(id, reasons) + ORDER BY id + )SQL"); + + auto doc = crow::json::load(executor.toJson().dump()); + REQUIRE(doc.size() == 2); + + // Matches DuckDB's to_json(map) shape; previously serialized to null. + REQUIRE(doc[0]["reasons"].t() == crow::json::type::Object); + REQUIRE(doc[0]["reasons"]["a"].i() == 1); + REQUIRE(doc[0]["reasons"]["b"].i() == 2); + + REQUIRE(doc[1]["reasons"].t() == crow::json::type::Object); + REQUIRE(doc[1]["reasons"]["x"].i() == 9); + REQUIRE_FALSE(doc[1]["reasons"].has("a")); + } + + SECTION("MAP with integer keys stringifies keys like to_json") { + QueryExecutor executor(database); + executor.execute(R"SQL( + SELECT map_from_entries([(10, 'x'), (20, 'y')]) AS m + )SQL"); + + auto doc = crow::json::load(executor.toJson().dump()); + REQUIRE(doc.size() == 1); + REQUIRE(doc[0]["m"].t() == crow::json::type::Object); + REQUIRE(doc[0]["m"]["10"].s() == "x"); + REQUIRE(doc[0]["m"]["20"].s() == "y"); + } + + SECTION("NULL and empty MAP") { + QueryExecutor executor(database); + executor.execute(R"SQL( + SELECT id, m FROM ( + SELECT 1 AS id, MAP{}::MAP(VARCHAR, INTEGER) AS m + UNION ALL + SELECT 2, CAST(NULL AS MAP(VARCHAR, INTEGER)) + ) ORDER BY id + )SQL"); + + auto doc = crow::json::load(executor.toJson().dump()); + REQUIRE(doc.size() == 2); + REQUIRE(doc[0]["m"].t() == crow::json::type::Object); // empty -> {} + REQUIRE(doc[1]["m"].t() == crow::json::type::Null); // null -> null + } + SECTION("NULL list entry stays null") { QueryExecutor executor(database); executor.execute(R"SQL(