Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/include/query_executor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@ class QueryResult {
static crow::json::wvalue convertVectorTimeToJson(const duckdb_vector &vector, const idx_t row_idx);
static crow::json::wvalue convertVectorIntervalToJson(const duckdb_vector &vector, const idx_t row_idx);
static crow::json::wvalue convertVectorEnumToJson(const duckdb_vector &vector, const idx_t row_idx);
static crow::json::wvalue convertVectorUuidToJson(const duckdb_vector &vector, const idx_t row_idx);
static crow::json::wvalue convertVectorHugeintToJson(const duckdb_vector &vector, const idx_t row_idx);
static crow::json::wvalue convertVectorUhugeintToJson(const duckdb_vector &vector, const idx_t row_idx);
static crow::json::wvalue convertVectorBlobToJson(const duckdb_vector &vector, const idx_t row_idx);
static crow::json::wvalue convertVectorBitToJson(const duckdb_vector &vector, const idx_t row_idx);
static crow::json::wvalue convertVectorListToJson(const duckdb_vector &vector, const idx_t row_idx);
static crow::json::wvalue convertVectorArrayToJson(const duckdb_vector &vector, const idx_t row_idx);
static crow::json::wvalue convertVectorStructToJson(const duckdb_vector &vector, const idx_t row_idx);
Expand Down
147 changes: 142 additions & 5 deletions src/query_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ crow::json::wvalue QueryResult::convertVectorEntryToJson(const duckdb_vector &ve
case DUCKDB_TYPE_BIGINT:
return convertVectorEntryToJson<std::int64_t>(vector, row_idx);
case DUCKDB_TYPE_HUGEINT:
return convertVectorEntryToJson<std::int64_t>(vector, row_idx);
return convertVectorHugeintToJson(vector, row_idx);
case DUCKDB_TYPE_FLOAT:
return convertVectorEntryToJson<float>(vector, row_idx);
case DUCKDB_TYPE_DOUBLE:
Expand Down Expand Up @@ -278,13 +278,13 @@ crow::json::wvalue QueryResult::convertVectorEntryToJson(const duckdb_vector &ve
case DUCKDB_TYPE_UBIGINT:
return convertVectorEntryToJson<std::uint64_t>(vector, row_idx);
case DUCKDB_TYPE_UHUGEINT:
return convertVectorEntryToJson<std::uint64_t>(vector, row_idx); // Treat as uint64
return convertVectorUhugeintToJson(vector, row_idx);
case DUCKDB_TYPE_BLOB:
return convertVectorVarcharToJson(vector, row_idx); // Treat as string for JSON
return convertVectorBlobToJson(vector, row_idx);
case DUCKDB_TYPE_UUID:
return convertVectorVarcharToJson(vector, row_idx); // Treat as string for JSON
return convertVectorUuidToJson(vector, row_idx);
case DUCKDB_TYPE_BIT:
return convertVectorVarcharToJson(vector, row_idx); // Treat as string for JSON
return convertVectorBitToJson(vector, row_idx);
case DUCKDB_TYPE_MAP:
return convertVectorMapToJson(vector, row_idx);
case DUCKDB_TYPE_ARRAY:
Expand Down Expand Up @@ -422,6 +422,143 @@ crow::json::wvalue QueryResult::convertVectorEnumToJson(const duckdb_vector &vec
return crow::json::wvalue(str_val);
}

crow::json::wvalue QueryResult::convertVectorUuidToJson(const duckdb_vector &vector, const idx_t row_idx) {
auto validity = duckdb_vector_get_validity(vector);
if (!duckdb_validity_row_is_valid(validity, row_idx)) {
return crow::json::wvalue(nullptr);
}

// UUID is physically a 128-bit integer (hugeint), NOT a string — reading
// it via the VARCHAR path dereferences garbage and crashes (issue #89).
// Format it the way DuckDB's UUID::ToString does: flip the stored MSB
// back, then emit the canonical 8-4-4-4-12 lowercase hex form.
auto data = static_cast<duckdb_hugeint *>(duckdb_vector_get_data(vector));
auto value = data[row_idx];
uint64_t upper = static_cast<uint64_t>(value.upper) ^ (uint64_t(1) << 63);
uint64_t lower = value.lower;

static const char *HEX = "0123456789abcdef";
char buf[36];
idx_t pos = 0;
auto byte_to_hex = [&](uint64_t byte_val) {
buf[pos++] = HEX[(byte_val >> 4) & 0xf];
buf[pos++] = HEX[byte_val & 0xf];
};
byte_to_hex((upper >> 56) & 0xFF);
byte_to_hex((upper >> 48) & 0xFF);
byte_to_hex((upper >> 40) & 0xFF);
byte_to_hex((upper >> 32) & 0xFF);
buf[pos++] = '-';
byte_to_hex((upper >> 24) & 0xFF);
byte_to_hex((upper >> 16) & 0xFF);
buf[pos++] = '-';
byte_to_hex((upper >> 8) & 0xFF);
byte_to_hex(upper & 0xFF);
buf[pos++] = '-';
byte_to_hex((lower >> 56) & 0xFF);
byte_to_hex((lower >> 48) & 0xFF);
buf[pos++] = '-';
byte_to_hex((lower >> 40) & 0xFF);
byte_to_hex((lower >> 32) & 0xFF);
byte_to_hex((lower >> 24) & 0xFF);
byte_to_hex((lower >> 16) & 0xFF);
byte_to_hex((lower >> 8) & 0xFF);
byte_to_hex(lower & 0xFF);

return crow::json::wvalue(std::string(buf, pos));
}

crow::json::wvalue QueryResult::convertVectorHugeintToJson(const duckdb_vector &vector, const idx_t row_idx) {
auto validity = duckdb_vector_get_validity(vector);
if (!duckdb_validity_row_is_valid(validity, row_idx)) {
return crow::json::wvalue(nullptr);
}

// HUGEINT is 128-bit: reading it as int64 truncates and mis-strides for
// multi-row chunks (issue #89). Emit the exact decimal as a JSON string so
// values beyond 2^63 survive (JSON consumers lose precision above 2^53).
auto data = static_cast<duckdb_hugeint *>(duckdb_vector_get_data(vector));
auto value = duckdb_create_hugeint(data[row_idx]);
DuckDBString str(duckdb_value_to_string(value));
duckdb_destroy_value(&value);
return crow::json::wvalue(str.to_string());
}

crow::json::wvalue QueryResult::convertVectorUhugeintToJson(const duckdb_vector &vector, const idx_t row_idx) {
auto validity = duckdb_vector_get_validity(vector);
if (!duckdb_validity_row_is_valid(validity, row_idx)) {
return crow::json::wvalue(nullptr);
}

// UHUGEINT is unsigned 128-bit; emit as a decimal string for the same
// precision reasons as HUGEINT.
auto data = static_cast<duckdb_uhugeint *>(duckdb_vector_get_data(vector));
auto value = duckdb_create_uhugeint(data[row_idx]);
DuckDBString str(duckdb_value_to_string(value));
duckdb_destroy_value(&value);
return crow::json::wvalue(str.to_string());
}

crow::json::wvalue QueryResult::convertVectorBlobToJson(const duckdb_vector &vector, const idx_t row_idx) {
auto validity = duckdb_vector_get_validity(vector);
if (!duckdb_validity_row_is_valid(validity, row_idx)) {
return crow::json::wvalue(nullptr);
}

// A BLOB's bytes are arbitrary binary; emitting them raw via the VARCHAR
// path can produce invalid UTF-8 / invalid JSON. Render DuckDB's own blob
// string form (printable bytes as-is, others as \xNN), matching to_json /
// CAST AS VARCHAR. (duckdb_value_to_string would add a '...'::BLOB wrapper.)
auto data = static_cast<duckdb_string_t *>(duckdb_vector_get_data(vector));
auto length = duckdb_string_is_inlined(data[row_idx])
? data[row_idx].value.inlined.length
: data[row_idx].value.pointer.length;
auto bytes = duckdb_string_is_inlined(data[row_idx])
? reinterpret_cast<const uint8_t *>(data[row_idx].value.inlined.inlined)
: reinterpret_cast<const uint8_t *>(data[row_idx].value.pointer.ptr);

static const char *HEX = "0123456789ABCDEF";
std::string out;
out.reserve(length);
for (idx_t i = 0; i < length; i++) {
uint8_t c = bytes[i];
bool regular = c >= 32 && c <= 126 && c != '\\' && c != '\'' && c != '"';
if (regular) {
out.push_back(static_cast<char>(c));
} else {
out.push_back('\\');
out.push_back('x');
out.push_back(HEX[(c >> 4) & 0xF]);
out.push_back(HEX[c & 0xF]);
}
}
return crow::json::wvalue(out);
}

crow::json::wvalue QueryResult::convertVectorBitToJson(const duckdb_vector &vector, const idx_t row_idx) {
auto validity = duckdb_vector_get_validity(vector);
if (!duckdb_validity_row_is_valid(validity, row_idx)) {
return crow::json::wvalue(nullptr);
}

// BIT is stored as its internal bitstring blob; the VARCHAR path renders
// that raw storage (padding byte + bytes) as garbage. Round-trip through a
// BIT value so it serializes as a "0101" string, matching to_json.
auto data = static_cast<duckdb_string_t *>(duckdb_vector_get_data(vector));
auto length = duckdb_string_is_inlined(data[row_idx])
? data[row_idx].value.inlined.length
: data[row_idx].value.pointer.length;
auto bytes = duckdb_string_is_inlined(data[row_idx])
? reinterpret_cast<const uint8_t *>(data[row_idx].value.inlined.inlined)
: reinterpret_cast<const uint8_t *>(data[row_idx].value.pointer.ptr);

duckdb_bit bit { const_cast<uint8_t *>(bytes), length };
auto value = duckdb_create_bit(bit);
DuckDBString str(duckdb_value_to_string(value));
duckdb_destroy_value(&value);
return crow::json::wvalue(str.to_string());
}

crow::json::wvalue QueryResult::convertVectorListToJson(const duckdb_vector &vector, const idx_t row_idx) {
auto validity = duckdb_vector_get_validity(vector);
if (!duckdb_validity_row_is_valid(validity, row_idx)) {
Expand Down
84 changes: 84 additions & 0 deletions test/cpp/query_executor_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,90 @@ TEST_CASE("QueryExecutor type coverage", "[query_executor]") {
duckdb_close(&database);
}

TEST_CASE("QueryExecutor scalar type coverage", "[query_executor][scalar_types]") {
// Regression for issue #89 type audit: UUID previously crashed (read as a
// string pointer), HUGEINT/UHUGEINT silently truncated, BLOB/BIT emitted
// garbage. All now match DuckDB's own string/JSON representation.
duckdb_database database;
REQUIRE(duckdb_open(NULL, &database) == DuckDBSuccess);

SECTION("UUID serializes as canonical string (no crash)") {
QueryExecutor executor(database);
executor.execute("SELECT '12345678-1234-5678-1234-567812345678'::UUID AS v");
auto doc = crow::json::load(executor.toJson().dump());
REQUIRE(doc.size() == 1);
REQUIRE(doc[0]["v"].t() == crow::json::type::String);
REQUIRE(doc[0]["v"].s() == "12345678-1234-5678-1234-567812345678");
}

SECTION("multi-row UUID keeps each row's own value") {
QueryExecutor executor(database);
executor.execute(R"SQL(
SELECT * FROM (VALUES
('11111111-1111-1111-1111-111111111111'::UUID),
('22222222-2222-2222-2222-222222222222'::UUID)
) t(v) ORDER BY v
)SQL");
auto doc = crow::json::load(executor.toJson().dump());
REQUIRE(doc.size() == 2);
REQUIRE(doc[0]["v"].s() == "11111111-1111-1111-1111-111111111111");
REQUIRE(doc[1]["v"].s() == "22222222-2222-2222-2222-222222222222");
}

SECTION("HUGEINT emits exact decimal string") {
QueryExecutor executor(database);
executor.execute(R"SQL(
SELECT * FROM (VALUES
(1, 1::HUGEINT),
(2, 170141183460469231731687303715884105727::HUGEINT),
(3, (-9999999999999999999)::HUGEINT)
) t(id, v) ORDER BY id
)SQL");
auto doc = crow::json::load(executor.toJson().dump());
REQUIRE(doc.size() == 3);
REQUIRE(doc[0]["v"].s() == "1");
REQUIRE(doc[1]["v"].s() == "170141183460469231731687303715884105727");
REQUIRE(doc[2]["v"].s() == "-9999999999999999999");
}

SECTION("UHUGEINT emits exact decimal string") {
QueryExecutor executor(database);
executor.execute("SELECT 340282366920938463463374607431768211455::UHUGEINT AS v");
auto doc = crow::json::load(executor.toJson().dump());
REQUIRE(doc[0]["v"].s() == "340282366920938463463374607431768211455");
}

SECTION("BLOB emits DuckDB blob string, not garbled bytes") {
QueryExecutor executor(database);
executor.execute("SELECT '\\xAA\\xBB'::BLOB AS v");
auto doc = crow::json::load(executor.toJson().dump());
REQUIRE(doc[0]["v"].t() == crow::json::type::String);
REQUIRE(doc[0]["v"].s() == "\\xAA\\xBB");
}

SECTION("BIT emits its 0/1 string") {
QueryExecutor executor(database);
executor.execute("SELECT '101010'::BIT AS v");
auto doc = crow::json::load(executor.toJson().dump());
REQUIRE(doc[0]["v"].s() == "101010");
}

SECTION("NULL values of these types stay null") {
QueryExecutor executor(database);
executor.execute(R"SQL(
SELECT CAST(NULL AS UUID) AS u, CAST(NULL AS HUGEINT) AS h,
CAST(NULL AS BLOB) AS b, CAST(NULL AS BIT) AS t
)SQL");
auto doc = crow::json::load(executor.toJson().dump());
REQUIRE(doc[0]["u"].t() == crow::json::type::Null);
REQUIRE(doc[0]["h"].t() == crow::json::type::Null);
REQUIRE(doc[0]["b"].t() == crow::json::type::Null);
REQUIRE(doc[0]["t"].t() == crow::json::type::Null);
}

duckdb_close(&database);
}

TEST_CASE("QueryExecutor chunk experiment", "[query_executor]") {
duckdb_database database;
REQUIRE(duckdb_open(NULL, &database) == DuckDBSuccess);
Expand Down
Loading