From 197a947651c8e1eb228e8979141ce5e9182963ad Mon Sep 17 00:00:00 2001 From: "Nihal Z. Miaji" <81457724+nihalzp@users.noreply.github.com> Date: Wed, 15 Apr 2026 17:09:10 +0000 Subject: [PATCH 1/5] Cherry-pick of https://github.com/ClickHouse/ClickHouse/pull/101272 with unresolved conflict markers (resolution in next commit) --- Original cherry-pick message follows: Merge pull request #101272 from nihalzp/support-arrow-orc-nullable-tuple Support `Nullable(Tuple)` for `Arrow`, `ArrowStream`, `ORC`, legacy `Parquet` formats --- src/DataTypes/NestedUtils.cpp | 73 +++ src/DataTypes/NestedUtils.h | 7 + src/Formats/insertNullAsDefaultIfNeeded.cpp | 26 + .../Formats/Impl/ArrowColumnToCHColumn.cpp | 30 +- .../Impl/NativeORCBlockInputFormat.cpp | 24 +- .../Formats/Impl/ORCBlockOutputFormat.cpp | 22 +- ...llable_low_cardinality_as_dict_in_arrow.sh | 12 + ...lable_low_cardinality_as_dict_in_arrow.sql | 8 - ...s_nullable_empty_tuple_roundtrip.reference | 186 +++++++ ...formats_nullable_empty_tuple_roundtrip.sql | 115 +++++ ...ide_nullable_arrow_orc_roundtrip.reference | 456 ++++++++++++++++++ ...le_inside_nullable_arrow_orc_roundtrip.sql | 399 +++++++++++++++ ...nside_nullable_parquet_roundtrip.reference | 251 ++++++++++ ...uple_inside_nullable_parquet_roundtrip.sql | 275 +++++++++++ 14 files changed, 1862 insertions(+), 22 deletions(-) create mode 100755 tests/queries/0_stateless/02384_nullable_low_cardinality_as_dict_in_arrow.sh delete mode 100644 tests/queries/0_stateless/02384_nullable_low_cardinality_as_dict_in_arrow.sql create mode 100644 tests/queries/0_stateless/04019_formats_nullable_empty_tuple_roundtrip.reference create mode 100644 tests/queries/0_stateless/04019_formats_nullable_empty_tuple_roundtrip.sql create mode 100644 tests/queries/0_stateless/04064_tuple_inside_nullable_arrow_orc_roundtrip.reference create mode 100644 tests/queries/0_stateless/04064_tuple_inside_nullable_arrow_orc_roundtrip.sql create mode 100644 tests/queries/0_stateless/04065_tuple_inside_nullable_parquet_roundtrip.reference create mode 100644 tests/queries/0_stateless/04065_tuple_inside_nullable_parquet_roundtrip.sql diff --git a/src/DataTypes/NestedUtils.cpp b/src/DataTypes/NestedUtils.cpp index 56ebe66c2ecc..8e7ed78e88d9 100644 --- a/src/DataTypes/NestedUtils.cpp +++ b/src/DataTypes/NestedUtils.cpp @@ -7,11 +7,14 @@ #include #include +#include #include #include #include #include +#include +#include #include #include @@ -121,6 +124,76 @@ std::string extractTableName(const std::string & nested_name) } +ColumnWithTypeAndName unwrapNullableTuple(const ColumnWithTypeAndName & column) +{ + const auto * type_nullable = typeid_cast(column.type.get()); + if (!type_nullable) + return column; + + const auto * tuple_type = typeid_cast(type_nullable->getNestedType().get()); + if (!tuple_type) + return column; + + const auto & col_nullable = assert_cast(*column.column); + + const auto & null_map_data = col_nullable.getNullMapData(); + bool has_nulls = !memoryIsZero(null_map_data.data(), 0, null_map_data.size()); + + if (!has_nulls) + { + /// No actual nulls — just strip the Nullable wrapper. + return {col_nullable.getNestedColumnPtr(), type_nullable->getNestedType(), column.name}; + } + + /// Propagate the struct null map to each Tuple element. + const auto & inner_tuple = assert_cast(col_nullable.getNestedColumn()); + const auto & null_map_ptr = col_nullable.getNullMapColumnPtr(); + Columns new_elements; + DataTypes new_types; + for (size_t i = 0; i < tuple_type->getElements().size(); ++i) + { + auto elem_col = inner_tuple.getColumnPtr(i); + auto elem_type = tuple_type->getElement(i); + if (elem_type->isNullable()) + { + /// Element already Nullable — merge null maps (struct null OR element null). + const auto & existing = assert_cast(*elem_col); + auto merged = ColumnUInt8::create(null_map_ptr->size()); + const auto & s = assert_cast(*null_map_ptr).getData(); + const auto & e = existing.getNullMapData(); + auto & m = merged->getData(); + for (size_t j = 0; j < s.size(); ++j) + m[j] = s[j] | e[j]; + new_elements.push_back(ColumnNullable::create(existing.getNestedColumnPtr(), std::move(merged))); + new_types.push_back(elem_type); + } + else if (elem_type->canBeInsideNullable()) + { + new_elements.push_back(ColumnNullable::create(elem_col, null_map_ptr)); + new_types.push_back(std::make_shared(elem_type)); + } + else + { + /// Array, Map, etc. — replace values at null positions with type defaults. + const auto & nm = col_nullable.getNullMapData(); + auto mutable_col = elem_col->cloneEmpty(); + for (size_t j = 0; j < elem_col->size(); ++j) + { + if (nm[j]) + mutable_col->insertDefault(); + else + mutable_col->insertFrom(*elem_col, j); + } + new_elements.push_back(std::move(mutable_col)); + new_types.push_back(elem_type); + } + } + + auto result_type = tuple_type->hasExplicitNames() ? std::make_shared(std::move(new_types), tuple_type->getElementNames()) + : std::make_shared(std::move(new_types)); + return {ColumnTuple::create(std::move(new_elements)), result_type, column.name}; +} + static Block flattenImpl(const Block & block, bool flatten_named_tuple) { Block res; diff --git a/src/DataTypes/NestedUtils.h b/src/DataTypes/NestedUtils.h index c358cb46edcf..8ee706276704 100644 --- a/src/DataTypes/NestedUtils.h +++ b/src/DataTypes/NestedUtils.h @@ -62,6 +62,13 @@ namespace Nested /// Convert old-style nested (single arrays with same prefix, `n.a`, `n.b`...) to subcolumns of data type Nested. NamesAndTypesList convertToSubcolumns(const NamesAndTypesList & names_and_types); + /// Unwrap Nullable(Tuple(...)) into Tuple(...) by propagating the struct-level null map + /// to each element. Scalar elements become Nullable(T), already-Nullable elements get merged + /// null maps, and non-nullable-compatible elements (Array, Map) get defaults at null positions. + /// When there are no actual nulls, simply strips the Nullable wrapper. + /// Used by format readers (Arrow, ORC) to convert Nullable struct elements for Nested flattening. + ColumnWithTypeAndName unwrapNullableTuple(const ColumnWithTypeAndName & column); + /// Check that sizes of arrays - elements of nested data structures - are equal. void validateArraySizes(const Block & block); diff --git a/src/Formats/insertNullAsDefaultIfNeeded.cpp b/src/Formats/insertNullAsDefaultIfNeeded.cpp index 11162303c264..d62719375d61 100644 --- a/src/Formats/insertNullAsDefaultIfNeeded.cpp +++ b/src/Formats/insertNullAsDefaultIfNeeded.cpp @@ -94,6 +94,27 @@ bool insertNullAsDefaultIfNeeded(ColumnWithTypeAndName & input_column, const Col return true; } + /// When both input and header are Nullable, unwrap and recurse into the nested types. + /// This can handle cases such as e.g. Nullable(Tuple(Nullable(Int32), String)) vs Nullable(Tuple(UInt32, String)) + if (input_column.type->isNullable() && header_column.type->isNullable()) + { + ColumnWithTypeAndName nested_input; + nested_input.column = assert_cast(input_column.column.get())->getNestedColumnPtr(); + nested_input.type = removeNullable(input_column.type); + + ColumnWithTypeAndName nested_header; + nested_header.column = assert_cast(header_column.column.get())->getNestedColumnPtr(); + nested_header.type = removeNullable(header_column.type); + + if (!insertNullAsDefaultIfNeeded(nested_input, nested_header, 0, nullptr)) + return false; + + input_column.column = ColumnNullable::create( + nested_input.column, assert_cast(input_column.column.get())->getNullMapColumnPtr()); + input_column.type = std::make_shared(std::move(nested_input.type)); + return true; + } + if (!isNullableOrLowCardinalityNullable(input_column.type) || isNullableOrLowCardinalityNullable(header_column.type)) return false; @@ -118,6 +139,11 @@ bool insertNullAsDefaultIfNeeded(ColumnWithTypeAndName & input_column, const Col input_column.type = std::make_shared(removeNullable(lc_type->getDictionaryType())); } + /// After stripping the outer Nullable, the inner type may also need processing. + /// For example, Nullable(Tuple(Nullable(Int), String)) -> Tuple(Nullable(Int), String) + /// still needs the Tuple elements compared against the header to strip inner Nullable. + insertNullAsDefaultIfNeeded(input_column, header_column, column_i, block_missing_values); + return true; } diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 1ed34febcd15..7a41c4901969 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -105,6 +105,18 @@ static ColumnWithTypeAndName readColumnWithNumericData(const std::shared_ptr buffer = chunk->data()->buffers[1]; const auto * raw_data = reinterpret_cast(buffer->data()) + chunk->offset(); column_data.insert_assume_reserved(raw_data, raw_data + chunk->length()); + + /// Values at null positions are not guaranteed to be initialized in the source buffer. + /// Zero them out because downstream code (type conversions, serialization) may read all values. + if (chunk->null_count() > 0) + { + size_t start = column_data.size() - chunk->length(); + for (int64_t i = 0; i < chunk->length(); ++i) + { + if (chunk->IsNull(i)) + column_data[start + i] = {}; + } + } } return {std::move(internal_column), std::move(internal_type), column_name}; } @@ -1160,22 +1172,27 @@ static ColumnWithTypeAndName readNonNullableColumnFromArrowColumn( return readOffsetsFromArrowListColumn(arrow_column); } }(); - auto array_column = ColumnArray::create(nested_column.column, offsets_column); - DataTypePtr array_type; - /// If type hint is Nested, we should return Nested type, - /// because we differentiate Nested and simple Array(Tuple) + ColumnPtr array_data_column = nested_column.column; + /// If type hint is Nested and the element is a named Tuple, return the Nested type + /// so that `Nested::flatten` can decompose it into separate arrays. + /// When the element is Nullable(Tuple(...)) (e.g. from Arrow's default nullable schema), + /// unwrap it and propagate the struct null map to each element via `unwrapNullableTuple`. const auto * tuple_type = type_hint && isNested(type_hint) ? typeid_cast(removeNullable(nested_column.type).get()) : nullptr; if (tuple_type) { - array_type = createNested(tuple_type->getElements(), tuple_type->getElementNames()); + auto unwrapped = Nested::unwrapNullableTuple({array_data_column, nested_column.type, column_name}); + array_data_column = unwrapped.column; + const auto & result_tuple = assert_cast(*unwrapped.type); + array_type = createNested(result_tuple.getElements(), result_tuple.getElementNames()); } else { array_type = std::make_shared(nested_column.type); } + auto array_column = ColumnArray::create(array_data_column, offsets_column); return {std::move(array_column), array_type, column_name}; } case arrow::Type::STRUCT: @@ -1408,7 +1425,10 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( arrow_column->type()->id() != arrow::Type::LARGE_LIST && arrow_column->type()->id() != arrow::Type::FIXED_SIZE_LIST && arrow_column->type()->id() != arrow::Type::MAP && +<<<<<<< HEAD arrow_column->type()->id() != arrow::Type::STRUCT && +======= +>>>>>>> fc17de3cb80 (Merge pull request #101272 from nihalzp/support-arrow-orc-nullable-tuple) arrow_column->type()->id() != arrow::Type::DICTIONARY) { DataTypePtr nested_type_hint; diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp index bfb8a02683d7..0ea9046a588d 100644 --- a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp @@ -1710,8 +1710,8 @@ ColumnWithTypeAndName ORCColumnToCHColumn::readColumnFromORCColumn( { bool skipped = false; - if (!inside_nullable && (orc_column->hasNulls || (type_hint && type_hint->isNullable())) && !orc_column->isEncoded - && (orc_type->getKind() != orc::LIST && orc_type->getKind() != orc::MAP && orc_type->getKind() != orc::STRUCT)) + if (!inside_nullable && (orc_column->hasNulls || (type_hint && isNullableOrLowCardinalityNullable(type_hint))) && !orc_column->isEncoded + && (orc_type->getKind() != orc::LIST && orc_type->getKind() != orc::MAP)) { DataTypePtr nested_type_hint; if (type_hint) @@ -1883,19 +1883,27 @@ ColumnWithTypeAndName ORCColumnToCHColumn::readColumnFromORCColumn( auto nested_column = readColumnFromORCColumn(orc_nested_column, orc_nested_type, column_name, false, nested_type_hint); auto offsets_column = readOffsetsFromORCListColumn(orc_list_column); - auto array_column = ColumnArray::create(nested_column.column, offsets_column); DataTypePtr array_type; - /// If type hint is Nested, we should return Nested type, - /// because we differentiate Nested and simple Array(Tuple) - if (type_hint && isNested(type_hint)) + ColumnPtr array_data_column = nested_column.column; + /// If type hint is Nested and the element is a named Tuple, return the Nested type + /// so that `Nested::flatten` can decompose it into separate arrays. + /// When the element is Nullable(Tuple(...)), unwrap it and propagate the struct null + /// map to each element via `unwrapNullableTuple`. + const auto * tuple_type = type_hint && isNested(type_hint) + ? typeid_cast(removeNullable(nested_column.type).get()) + : nullptr; + if (tuple_type) { - const auto & tuple_type = assert_cast(*nested_column.type); - array_type = createNested(tuple_type.getElements(), tuple_type.getElementNames()); + auto unwrapped = Nested::unwrapNullableTuple({array_data_column, nested_column.type, column_name}); + array_data_column = unwrapped.column; + const auto & result_tuple = assert_cast(*unwrapped.type); + array_type = createNested(result_tuple.getElements(), result_tuple.getElementNames()); } else { array_type = std::make_shared(nested_column.type); } + auto array_column = ColumnArray::create(array_data_column, offsets_column); return {array_column, array_type, column_name}; } case orc::STRUCT: diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp index 13d6cb6656ad..e5ef21abf1b6 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp @@ -494,7 +494,27 @@ void ORCBlockOutputFormat::writeColumn( const auto & tuple_column = assert_cast(column); auto nested_types = assert_cast(type.get())->getElements(); for (size_t i = 0; i != tuple_column.tupleSize(); ++i) - writeColumn(*struct_orc_column.fields[i], tuple_column.getColumn(i), nested_types[i], nullptr); + { + if (null_bytemap && nested_types[i]->isNullable()) + { + /// When both the struct and the element are nullable, we need to merge the two null bitmaps: + /// a child value is null if either the struct row is null OR the element itself is null. + const auto & nullable_col = assert_cast(tuple_column.getColumn(i)); + const auto & element_null_map = nullable_col.getNullMapData(); + PaddedPODArray merged_null_map(element_null_map.size()); + for (size_t j = 0; j < element_null_map.size(); ++j) + merged_null_map[j] = element_null_map[j] | (*null_bytemap)[j]; + + auto nested_type = removeNullable(nested_types[i]); + writeColumn(*struct_orc_column.fields[i], nullable_col.getNestedColumn(), nested_type, &merged_null_map); + } + else + { + /// Propagate the struct-level null_bytemap to children so the ORC library correctly handles + /// null struct rows (child values at null positions must also be marked null). + writeColumn(*struct_orc_column.fields[i], tuple_column.getColumn(i), nested_types[i], null_bytemap); + } + } break; } case TypeIndex::Map: diff --git a/tests/queries/0_stateless/02384_nullable_low_cardinality_as_dict_in_arrow.sh b/tests/queries/0_stateless/02384_nullable_low_cardinality_as_dict_in_arrow.sh new file mode 100755 index 000000000000..c54c1831a1b4 --- /dev/null +++ b/tests/queries/0_stateless/02384_nullable_low_cardinality_as_dict_in_arrow.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# no-fasttest: Arrow format is not available in fasttest builds + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select toLowCardinality(toNullable('abc')) as lc format Arrow settings output_format_arrow_low_cardinality_as_dictionary=1, output_format_arrow_string_as_string=0" | $CLICKHOUSE_LOCAL --input-format=Arrow --table=test -q "desc test" +$CLICKHOUSE_LOCAL -q "select toLowCardinality(toNullable('abc')) as lc format Arrow settings output_format_arrow_low_cardinality_as_dictionary=1, output_format_arrow_string_as_string=0" | $CLICKHOUSE_LOCAL --input-format=Arrow --table=test -q "select * from test" +$CLICKHOUSE_LOCAL -q "select toLowCardinality(toNullable('abc')) as lc format Arrow settings output_format_arrow_low_cardinality_as_dictionary=1, output_format_arrow_string_as_string=1" | $CLICKHOUSE_LOCAL --input-format=Arrow --table=test -q "desc test" +$CLICKHOUSE_LOCAL -q "select toLowCardinality(toNullable('abc')) as lc format Arrow settings output_format_arrow_low_cardinality_as_dictionary=1, output_format_arrow_string_as_string=1" | $CLICKHOUSE_LOCAL --input-format=Arrow --table=test -q "select * from test" diff --git a/tests/queries/0_stateless/02384_nullable_low_cardinality_as_dict_in_arrow.sql b/tests/queries/0_stateless/02384_nullable_low_cardinality_as_dict_in_arrow.sql deleted file mode 100644 index 975e7fb88267..000000000000 --- a/tests/queries/0_stateless/02384_nullable_low_cardinality_as_dict_in_arrow.sql +++ /dev/null @@ -1,8 +0,0 @@ --- Tags: no-fasttest - -insert into function file(02384_data.arrow) select toLowCardinality(toNullable('abc')) as lc settings output_format_arrow_low_cardinality_as_dictionary=1, output_format_arrow_string_as_string=0, engine_file_truncate_on_insert=1; -desc file(02384_data.arrow); -select * from file(02384_data.arrow); -insert into function file(02384_data.arrow) select toLowCardinality(toNullable('abc')) as lc settings output_format_arrow_low_cardinality_as_dictionary=1, output_format_arrow_string_as_string=1, engine_file_truncate_on_insert=1; -desc file(02384_data.arrow); -select * from file(02384_data.arrow); diff --git a/tests/queries/0_stateless/04019_formats_nullable_empty_tuple_roundtrip.reference b/tests/queries/0_stateless/04019_formats_nullable_empty_tuple_roundtrip.reference new file mode 100644 index 000000000000..2f627fd884b2 --- /dev/null +++ b/tests/queries/0_stateless/04019_formats_nullable_empty_tuple_roundtrip.reference @@ -0,0 +1,186 @@ +-- { echo } + +SET allow_experimental_nullable_tuple_type = 1; +SET engine_file_truncate_on_insert = 1; +DROP TABLE IF EXISTS test_nullable_empty_tuple; +CREATE TABLE test_nullable_empty_tuple (c0 Nullable(Tuple())) ENGINE = Memory; +INSERT INTO TABLE test_nullable_empty_tuple (c0) VALUES (()), (NULL), (()); +SELECT 'CSV'; +CSV +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.csv', 'CSV', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.csv', 'CSV', 'c0 Nullable(Tuple())'); +() +\N +() +SELECT 'TabSeparated'; +TabSeparated +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.tsv', 'TabSeparated', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.tsv', 'TabSeparated', 'c0 Nullable(Tuple())'); +() +\N +() +SELECT 'TabSeparatedRaw'; +TabSeparatedRaw +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.tsvraw', 'TabSeparatedRaw', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.tsvraw', 'TabSeparatedRaw', 'c0 Nullable(Tuple())'); +() +\N +() +SELECT 'JSONEachRow'; +JSONEachRow +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.json', 'JSONEachRow', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.json', 'JSONEachRow', 'c0 Nullable(Tuple())'); +() +\N +() +SELECT 'JSONCompactEachRow'; +JSONCompactEachRow +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.jsoncompact', 'JSONCompactEachRow', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.jsoncompact', 'JSONCompactEachRow', 'c0 Nullable(Tuple())'); +() +\N +() +SELECT 'JSONStringsEachRow'; +JSONStringsEachRow +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.jsonstr', 'JSONStringsEachRow', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.jsonstr', 'JSONStringsEachRow', 'c0 Nullable(Tuple())'); +() +\N +() +SELECT 'JSONCompactStringsEachRow'; +JSONCompactStringsEachRow +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.jsoncstr', 'JSONCompactStringsEachRow', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.jsoncstr', 'JSONCompactStringsEachRow', 'c0 Nullable(Tuple())'); +() +\N +() +SELECT 'Values'; +Values +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.values', 'Values', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.values', 'Values', 'c0 Nullable(Tuple())'); +() +\N +() +SELECT 'TSKV'; +TSKV +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.tskv', 'TSKV', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.tskv', 'TSKV', 'c0 Nullable(Tuple())'); +() +\N +() +SELECT 'CustomSeparated'; +CustomSeparated +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.custom', 'CustomSeparated', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.custom', 'CustomSeparated', 'c0 Nullable(Tuple())'); +() +\N +() +SELECT 'Native'; +Native +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.native', 'Native', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.native', 'Native', 'c0 Nullable(Tuple())'); +() +\N +() +SELECT 'RowBinary'; +RowBinary +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.rowbin', 'RowBinary', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.rowbin', 'RowBinary', 'c0 Nullable(Tuple())'); +() +\N +() +SELECT 'Avro'; +Avro +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.avro', 'Avro', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.avro', 'Avro', 'c0 Nullable(Tuple())'); +() +\N +() +SELECT 'MsgPack'; +MsgPack +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.msgpack', 'MsgPack', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.msgpack', 'MsgPack', 'c0 Nullable(Tuple())'); +() +\N +() +SELECT 'BSONEachRow'; +BSONEachRow +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.bson', 'BSONEachRow', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.bson', 'BSONEachRow', 'c0 Nullable(Tuple())'); +() +\N +() +SELECT 'JSON'; +JSON +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.jsonall', 'JSON', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.jsonall', 'JSON', 'c0 Nullable(Tuple())'); +() +\N +() +SELECT 'JSONCompact'; +JSONCompact +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.jsoncompactall', 'JSONCompact', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.jsoncompactall', 'JSONCompact', 'c0 Nullable(Tuple())'); +() +\N +() +SELECT 'JSONColumns'; +JSONColumns +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.jsoncols', 'JSONColumns', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.jsoncols', 'JSONColumns', 'c0 Nullable(Tuple())'); +() +\N +() +SELECT 'JSONCompactColumns'; +JSONCompactColumns +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.jsonccols', 'JSONCompactColumns', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.jsonccols', 'JSONCompactColumns', 'c0 Nullable(Tuple())'); +() +\N +() +SELECT 'JSONColumnsWithMetadata'; +JSONColumnsWithMetadata +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.jsoncolsmeta', 'JSONColumnsWithMetadata', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.jsoncolsmeta', 'JSONColumnsWithMetadata', 'c0 Nullable(Tuple())'); +() +\N +() +SELECT 'JSONObjectEachRow'; +JSONObjectEachRow +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.jsonobj', 'JSONObjectEachRow', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.jsonobj', 'JSONObjectEachRow', 'c0 Nullable(Tuple())'); +() +\N +() +SELECT 'Buffers'; +Buffers +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.buf', 'Buffers', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.buf', 'Buffers', 'c0 Nullable(Tuple())'); +() +\N +() +-- Parquet doesn't support empty tuples by design +SELECT 'Parquet'; +Parquet +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.parquet', 'Parquet', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; -- { serverError BAD_ARGUMENTS } +SELECT 'Arrow'; +Arrow +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.arrow', 'Arrow', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.arrow', 'Arrow', 'c0 Nullable(Tuple())'); +() +\N +() +SELECT 'ArrowStream'; +ArrowStream +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.arrowstream', 'ArrowStream', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.arrowstream', 'ArrowStream', 'c0 Nullable(Tuple())'); +() +\N +() +SELECT 'ORC'; +ORC +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.orc', 'ORC', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.orc', 'ORC', 'c0 Nullable(Tuple())'); +() +\N +() diff --git a/tests/queries/0_stateless/04019_formats_nullable_empty_tuple_roundtrip.sql b/tests/queries/0_stateless/04019_formats_nullable_empty_tuple_roundtrip.sql new file mode 100644 index 000000000000..ee194259f764 --- /dev/null +++ b/tests/queries/0_stateless/04019_formats_nullable_empty_tuple_roundtrip.sql @@ -0,0 +1,115 @@ +-- Tags: no-fasttest +-- no-fasttest: Some formats not available in fasttest enviroment + +-- { echo } + +SET allow_experimental_nullable_tuple_type = 1; +SET engine_file_truncate_on_insert = 1; + +DROP TABLE IF EXISTS test_nullable_empty_tuple; +CREATE TABLE test_nullable_empty_tuple (c0 Nullable(Tuple())) ENGINE = Memory; +INSERT INTO TABLE test_nullable_empty_tuple (c0) VALUES (()), (NULL), (()); + +SELECT 'CSV'; +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.csv', 'CSV', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.csv', 'CSV', 'c0 Nullable(Tuple())'); + +SELECT 'TabSeparated'; +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.tsv', 'TabSeparated', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.tsv', 'TabSeparated', 'c0 Nullable(Tuple())'); + +SELECT 'TabSeparatedRaw'; +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.tsvraw', 'TabSeparatedRaw', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.tsvraw', 'TabSeparatedRaw', 'c0 Nullable(Tuple())'); + +SELECT 'JSONEachRow'; +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.json', 'JSONEachRow', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.json', 'JSONEachRow', 'c0 Nullable(Tuple())'); + +SELECT 'JSONCompactEachRow'; +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.jsoncompact', 'JSONCompactEachRow', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.jsoncompact', 'JSONCompactEachRow', 'c0 Nullable(Tuple())'); + +SELECT 'JSONStringsEachRow'; +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.jsonstr', 'JSONStringsEachRow', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.jsonstr', 'JSONStringsEachRow', 'c0 Nullable(Tuple())'); + +SELECT 'JSONCompactStringsEachRow'; +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.jsoncstr', 'JSONCompactStringsEachRow', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.jsoncstr', 'JSONCompactStringsEachRow', 'c0 Nullable(Tuple())'); + +SELECT 'Values'; +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.values', 'Values', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.values', 'Values', 'c0 Nullable(Tuple())'); + +SELECT 'TSKV'; +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.tskv', 'TSKV', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.tskv', 'TSKV', 'c0 Nullable(Tuple())'); + +SELECT 'CustomSeparated'; +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.custom', 'CustomSeparated', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.custom', 'CustomSeparated', 'c0 Nullable(Tuple())'); + +SELECT 'Native'; +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.native', 'Native', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.native', 'Native', 'c0 Nullable(Tuple())'); + +SELECT 'RowBinary'; +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.rowbin', 'RowBinary', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.rowbin', 'RowBinary', 'c0 Nullable(Tuple())'); + +SELECT 'Avro'; +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.avro', 'Avro', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.avro', 'Avro', 'c0 Nullable(Tuple())'); + +SELECT 'MsgPack'; +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.msgpack', 'MsgPack', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.msgpack', 'MsgPack', 'c0 Nullable(Tuple())'); + +SELECT 'BSONEachRow'; +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.bson', 'BSONEachRow', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.bson', 'BSONEachRow', 'c0 Nullable(Tuple())'); + +SELECT 'JSON'; +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.jsonall', 'JSON', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.jsonall', 'JSON', 'c0 Nullable(Tuple())'); + +SELECT 'JSONCompact'; +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.jsoncompactall', 'JSONCompact', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.jsoncompactall', 'JSONCompact', 'c0 Nullable(Tuple())'); + +SELECT 'JSONColumns'; +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.jsoncols', 'JSONColumns', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.jsoncols', 'JSONColumns', 'c0 Nullable(Tuple())'); + +SELECT 'JSONCompactColumns'; +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.jsonccols', 'JSONCompactColumns', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.jsonccols', 'JSONCompactColumns', 'c0 Nullable(Tuple())'); + +SELECT 'JSONColumnsWithMetadata'; +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.jsoncolsmeta', 'JSONColumnsWithMetadata', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.jsoncolsmeta', 'JSONColumnsWithMetadata', 'c0 Nullable(Tuple())'); + +SELECT 'JSONObjectEachRow'; +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.jsonobj', 'JSONObjectEachRow', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.jsonobj', 'JSONObjectEachRow', 'c0 Nullable(Tuple())'); + +SELECT 'Buffers'; +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.buf', 'Buffers', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.buf', 'Buffers', 'c0 Nullable(Tuple())'); + +-- Parquet doesn't support empty tuples by design +SELECT 'Parquet'; +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.parquet', 'Parquet', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; -- { serverError BAD_ARGUMENTS } + +SELECT 'Arrow'; +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.arrow', 'Arrow', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.arrow', 'Arrow', 'c0 Nullable(Tuple())'); + +SELECT 'ArrowStream'; +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.arrowstream', 'ArrowStream', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.arrowstream', 'ArrowStream', 'c0 Nullable(Tuple())'); + +SELECT 'ORC'; +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04019.orc', 'ORC', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_empty_tuple; +SELECT c0 FROM file(currentDatabase() || '_04019.orc', 'ORC', 'c0 Nullable(Tuple())'); diff --git a/tests/queries/0_stateless/04064_tuple_inside_nullable_arrow_orc_roundtrip.reference b/tests/queries/0_stateless/04064_tuple_inside_nullable_arrow_orc_roundtrip.reference new file mode 100644 index 000000000000..5ade702466c0 --- /dev/null +++ b/tests/queries/0_stateless/04064_tuple_inside_nullable_arrow_orc_roundtrip.reference @@ -0,0 +1,456 @@ +-- { echo } + +SET allow_experimental_nullable_tuple_type = 1; +SET engine_file_truncate_on_insert = 1; +-- Nullable struct with non-nullable elements +DROP TABLE IF EXISTS test_nullable_tuple_basic; +CREATE TABLE test_nullable_tuple_basic (c0 Nullable(Tuple(UInt32, String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_basic VALUES ((1, 'a')), (NULL), ((3, 'c')); +-- Arrow +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064.arrow', 'Arrow', 'c0 Nullable(Tuple(UInt32, String))') SELECT c0 FROM test_nullable_tuple_basic; +SELECT c0 FROM file(currentDatabase() || '_04064.arrow', 'Arrow', 'c0 Nullable(Tuple(UInt32, String))'); +(1,'a') +\N +(3,'c') +-- ArrowStream +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064.arrowstream', 'ArrowStream', 'c0 Nullable(Tuple(UInt32, String))') SELECT c0 FROM test_nullable_tuple_basic; +SELECT c0 FROM file(currentDatabase() || '_04064.arrowstream', 'ArrowStream', 'c0 Nullable(Tuple(UInt32, String))'); +(1,'a') +\N +(3,'c') +-- ORC +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064.orc', 'ORC', 'c0 Nullable(Tuple(UInt32, String))') SELECT c0 FROM test_nullable_tuple_basic; +SELECT c0 FROM file(currentDatabase() || '_04064.orc', 'ORC', 'c0 Nullable(Tuple(UInt32, String))'); +(1,'a') +\N +(3,'c') +-- ORC legacy (Arrow-based) reader +SELECT c0 FROM file(currentDatabase() || '_04064.orc', 'ORC', 'c0 Nullable(Tuple(UInt32, String))') SETTINGS input_format_orc_use_fast_decoder = 0; +(1,'a') +\N +(3,'c') +DROP TABLE test_nullable_tuple_basic; +-- Nullable empty tuple +DROP TABLE IF EXISTS test_nullable_tuple_empty; +CREATE TABLE test_nullable_tuple_empty (c0 Nullable(Tuple())) ENGINE = Memory; +INSERT INTO test_nullable_tuple_empty VALUES (()), (NULL), (()); +-- Arrow empty +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_empty.arrow', 'Arrow', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_tuple_empty; +SELECT c0 FROM file(currentDatabase() || '_04064_empty.arrow', 'Arrow', 'c0 Nullable(Tuple())'); +() +\N +() +-- ArrowStream empty +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_empty.arrowstream', 'ArrowStream', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_tuple_empty; +SELECT c0 FROM file(currentDatabase() || '_04064_empty.arrowstream', 'ArrowStream', 'c0 Nullable(Tuple())'); +() +\N +() +-- ORC empty +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_empty.orc', 'ORC', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_tuple_empty; +SELECT c0 FROM file(currentDatabase() || '_04064_empty.orc', 'ORC', 'c0 Nullable(Tuple())'); +() +\N +() +-- ORC legacy empty +SELECT c0 FROM file(currentDatabase() || '_04064_empty.orc', 'ORC', 'c0 Nullable(Tuple())') SETTINGS input_format_orc_use_fast_decoder = 0; +() +\N +() +DROP TABLE test_nullable_tuple_empty; +-- Both struct and element nullable: Nullable(Tuple(Nullable(UInt32), String)) +DROP TABLE IF EXISTS test_nullable_tuple_both; +CREATE TABLE test_nullable_tuple_both (c0 Nullable(Tuple(Nullable(UInt32), String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_both VALUES ((1, 'a')), (NULL), ((NULL, 'c')), ((4, 'd')); +-- Arrow both nullable +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_both.arrow', 'Arrow') SELECT c0 FROM test_nullable_tuple_both; +SELECT c0 FROM file(currentDatabase() || '_04064_both.arrow', 'Arrow', 'c0 Nullable(Tuple(Nullable(UInt32), String))'); +(1,'a') +\N +(NULL,'c') +(4,'d') +-- ArrowStream both nullable +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_both.arrowstream', 'ArrowStream') SELECT c0 FROM test_nullable_tuple_both; +SELECT c0 FROM file(currentDatabase() || '_04064_both.arrowstream', 'ArrowStream', 'c0 Nullable(Tuple(Nullable(UInt32), String))'); +(1,'a') +\N +(NULL,'c') +(4,'d') +-- ORC both nullable +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_both.orc', 'ORC') SELECT c0 FROM test_nullable_tuple_both; +SELECT c0 FROM file(currentDatabase() || '_04064_both.orc', 'ORC', 'c0 Nullable(Tuple(Nullable(UInt32), String))'); +(1,'a') +\N +(NULL,'c') +(4,'d') +-- ORC legacy both nullable +SELECT c0 FROM file(currentDatabase() || '_04064_both.orc', 'ORC', 'c0 Nullable(Tuple(Nullable(UInt32), String))') SETTINGS input_format_orc_use_fast_decoder = 0; +(1,'a') +\N +(NULL,'c') +(4,'d') +DROP TABLE test_nullable_tuple_both; +-- Non-nullable struct with nullable elements (should be unchanged) +DROP TABLE IF EXISTS test_nullable_tuple_elem; +CREATE TABLE test_nullable_tuple_elem (c0 Tuple(Nullable(UInt32), String)) ENGINE = Memory; +INSERT INTO test_nullable_tuple_elem VALUES ((1, 'a')), ((NULL, 'b')); +-- Arrow nullable elements +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_elem.arrow', 'Arrow') SELECT c0 FROM test_nullable_tuple_elem; +SELECT c0 FROM file(currentDatabase() || '_04064_elem.arrow', 'Arrow', 'c0 Tuple(Nullable(UInt32), String)'); +(1,'a') +(NULL,'b') +-- ORC nullable elements +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_elem.orc', 'ORC') SELECT c0 FROM test_nullable_tuple_elem; +SELECT c0 FROM file(currentDatabase() || '_04064_elem.orc', 'ORC', 'c0 Tuple(Nullable(UInt32), String)'); +(1,'a') +(NULL,'b') +-- ORC legacy nullable elements +SELECT c0 FROM file(currentDatabase() || '_04064_elem.orc', 'ORC', 'c0 Tuple(Nullable(UInt32), String)') SETTINGS input_format_orc_use_fast_decoder = 0; +(1,'a') +(NULL,'b') +DROP TABLE test_nullable_tuple_elem; +-- Plain non-nullable tuple (baseline, should be unchanged) +DROP TABLE IF EXISTS test_nullable_tuple_plain; +CREATE TABLE test_nullable_tuple_plain (c0 Tuple(UInt32, String)) ENGINE = Memory; +INSERT INTO test_nullable_tuple_plain VALUES ((1, 'a')), ((2, 'b')); +-- Arrow plain +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_plain.arrow', 'Arrow') SELECT c0 FROM test_nullable_tuple_plain; +SELECT c0 FROM file(currentDatabase() || '_04064_plain.arrow', 'Arrow', 'c0 Tuple(UInt32, String)'); +(1,'a') +(2,'b') +-- ORC plain +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_plain.orc', 'ORC') SELECT c0 FROM test_nullable_tuple_plain; +SELECT c0 FROM file(currentDatabase() || '_04064_plain.orc', 'ORC', 'c0 Tuple(UInt32, String)'); +(1,'a') +(2,'b') +-- ORC legacy plain +SELECT c0 FROM file(currentDatabase() || '_04064_plain.orc', 'ORC', 'c0 Tuple(UInt32, String)') SETTINGS input_format_orc_use_fast_decoder = 0; +(1,'a') +(2,'b') +DROP TABLE test_nullable_tuple_plain; +-- Nested tuple inside nullable struct +DROP TABLE IF EXISTS test_nullable_tuple_nested; +CREATE TABLE test_nullable_tuple_nested (c0 Nullable(Tuple(Tuple(UInt32, String), UInt64))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_nested VALUES (((1, 'a'), 10)), (NULL), (((3, 'c'), 30)); +-- Arrow nested +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_nested.arrow', 'Arrow') SELECT c0 FROM test_nullable_tuple_nested; +SELECT c0 FROM file(currentDatabase() || '_04064_nested.arrow', 'Arrow', 'c0 Nullable(Tuple(Tuple(UInt32, String), UInt64))'); +((1,'a'),10) +\N +((3,'c'),30) +-- ORC nested +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_nested.orc', 'ORC') SELECT c0 FROM test_nullable_tuple_nested; +SELECT c0 FROM file(currentDatabase() || '_04064_nested.orc', 'ORC', 'c0 Nullable(Tuple(Tuple(UInt32, String), UInt64))'); +((1,'a'),10) +\N +((3,'c'),30) +-- ORC legacy nested +SELECT c0 FROM file(currentDatabase() || '_04064_nested.orc', 'ORC', 'c0 Nullable(Tuple(Tuple(UInt32, String), UInt64))') SETTINGS input_format_orc_use_fast_decoder = 0; +((1,'a'),10) +\N +((3,'c'),30) +DROP TABLE test_nullable_tuple_nested; +-- Schema inference without type hint +DROP TABLE IF EXISTS test_nullable_tuple_infer; +CREATE TABLE test_nullable_tuple_infer (c0 Nullable(Tuple(UInt32, String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_infer VALUES ((1, 'a')), (NULL), ((3, 'c')); +-- Arrow infer +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_infer.arrow', 'Arrow') SELECT c0 FROM test_nullable_tuple_infer; +SELECT c0 FROM file(currentDatabase() || '_04064_infer.arrow', 'Arrow'); +(1,'a') +\N +(3,'c') +-- ORC infer +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_infer.orc', 'ORC') SELECT c0 FROM test_nullable_tuple_infer; +SELECT c0 FROM file(currentDatabase() || '_04064_infer.orc', 'ORC'); +(1,'a') +\N +(3,'c') +-- ORC legacy infer +SELECT c0 FROM file(currentDatabase() || '_04064_infer.orc', 'ORC') SETTINGS input_format_orc_use_fast_decoder = 0; +(1,'a') +\N +(3,'c') +DROP TABLE test_nullable_tuple_infer; +-- Named tuple +DROP TABLE IF EXISTS test_nullable_tuple_named; +CREATE TABLE test_nullable_tuple_named (c0 Nullable(Tuple(a UInt32, b String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_named VALUES ((1, 'x')), (NULL), ((3, 'z')); +-- Arrow named +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_named.arrow', 'Arrow', 'c0 Nullable(Tuple(a UInt32, b String))') SELECT c0 FROM test_nullable_tuple_named; +SELECT c0 FROM file(currentDatabase() || '_04064_named.arrow', 'Arrow', 'c0 Nullable(Tuple(a UInt32, b String))'); +(1,'x') +\N +(3,'z') +-- ORC named +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_named.orc', 'ORC', 'c0 Nullable(Tuple(a UInt32, b String))') SELECT c0 FROM test_nullable_tuple_named; +SELECT c0 FROM file(currentDatabase() || '_04064_named.orc', 'ORC', 'c0 Nullable(Tuple(a UInt32, b String))'); +(1,'x') +\N +(3,'z') +-- ORC legacy named +SELECT c0 FROM file(currentDatabase() || '_04064_named.orc', 'ORC', 'c0 Nullable(Tuple(a UInt32, b String))') SETTINGS input_format_orc_use_fast_decoder = 0; +(1,'x') +\N +(3,'z') +DROP TABLE test_nullable_tuple_named; +-- All-NULL column +DROP TABLE IF EXISTS test_nullable_tuple_allnull; +CREATE TABLE test_nullable_tuple_allnull (c0 Nullable(Tuple(UInt32, String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_allnull VALUES (NULL), (NULL), (NULL); +-- Arrow all null +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_allnull.arrow', 'Arrow', 'c0 Nullable(Tuple(UInt32, String))') SELECT c0 FROM test_nullable_tuple_allnull; +SELECT c0 FROM file(currentDatabase() || '_04064_allnull.arrow', 'Arrow', 'c0 Nullable(Tuple(UInt32, String))'); +\N +\N +\N +-- ORC all null +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_allnull.orc', 'ORC', 'c0 Nullable(Tuple(UInt32, String))') SELECT c0 FROM test_nullable_tuple_allnull; +SELECT c0 FROM file(currentDatabase() || '_04064_allnull.orc', 'ORC', 'c0 Nullable(Tuple(UInt32, String))'); +\N +\N +\N +-- ORC legacy all null +SELECT c0 FROM file(currentDatabase() || '_04064_allnull.orc', 'ORC', 'c0 Nullable(Tuple(UInt32, String))') SETTINGS input_format_orc_use_fast_decoder = 0; +\N +\N +\N +DROP TABLE test_nullable_tuple_allnull; +-- No-NULL column (nullable type, zero actual NULLs) +DROP TABLE IF EXISTS test_nullable_tuple_nonull; +CREATE TABLE test_nullable_tuple_nonull (c0 Nullable(Tuple(UInt32, String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_nonull VALUES ((1, 'a')), ((2, 'b')), ((3, 'c')); +-- Arrow no null +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_nonull.arrow', 'Arrow', 'c0 Nullable(Tuple(UInt32, String))') SELECT c0 FROM test_nullable_tuple_nonull; +SELECT c0 FROM file(currentDatabase() || '_04064_nonull.arrow', 'Arrow', 'c0 Nullable(Tuple(UInt32, String))'); +(1,'a') +(2,'b') +(3,'c') +-- ORC no null +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_nonull.orc', 'ORC', 'c0 Nullable(Tuple(UInt32, String))') SELECT c0 FROM test_nullable_tuple_nonull; +SELECT c0 FROM file(currentDatabase() || '_04064_nonull.orc', 'ORC', 'c0 Nullable(Tuple(UInt32, String))'); +(1,'a') +(2,'b') +(3,'c') +-- ORC legacy no null +SELECT c0 FROM file(currentDatabase() || '_04064_nonull.orc', 'ORC', 'c0 Nullable(Tuple(UInt32, String))') SETTINGS input_format_orc_use_fast_decoder = 0; +(1,'a') +(2,'b') +(3,'c') +DROP TABLE test_nullable_tuple_nonull; +-- Single-element tuple +DROP TABLE IF EXISTS test_nullable_tuple_single; +CREATE TABLE test_nullable_tuple_single (c0 Nullable(Tuple(UInt32))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_single VALUES ((1,)), (NULL), ((3,)); +-- Arrow single +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_single.arrow', 'Arrow', 'c0 Nullable(Tuple(UInt32))') SELECT c0 FROM test_nullable_tuple_single; +SELECT c0 FROM file(currentDatabase() || '_04064_single.arrow', 'Arrow', 'c0 Nullable(Tuple(UInt32))'); +(1) +\N +(3) +-- ORC single +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_single.orc', 'ORC', 'c0 Nullable(Tuple(UInt32))') SELECT c0 FROM test_nullable_tuple_single; +SELECT c0 FROM file(currentDatabase() || '_04064_single.orc', 'ORC', 'c0 Nullable(Tuple(UInt32))'); +(1) +\N +(3) +-- ORC legacy single +SELECT c0 FROM file(currentDatabase() || '_04064_single.orc', 'ORC', 'c0 Nullable(Tuple(UInt32))') SETTINGS input_format_orc_use_fast_decoder = 0; +(1) +\N +(3) +DROP TABLE test_nullable_tuple_single; +-- Deeply nested: nullable tuple inside nullable tuple +DROP TABLE IF EXISTS test_nullable_tuple_deep; +CREATE TABLE test_nullable_tuple_deep (c0 Nullable(Tuple(Nullable(Tuple(UInt32, String)), UInt64))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_deep VALUES (((1, 'a'), 10)), (NULL), ((NULL, 20)), (((4, 'd'), 40)); +-- Arrow deep nested +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_deep.arrow', 'Arrow') SELECT c0 FROM test_nullable_tuple_deep; +SELECT c0 FROM file(currentDatabase() || '_04064_deep.arrow', 'Arrow', 'c0 Nullable(Tuple(Nullable(Tuple(UInt32, String)), UInt64))'); +((1,'a'),10) +\N +(NULL,20) +((4,'d'),40) +-- ORC deep nested +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_deep.orc', 'ORC') SELECT c0 FROM test_nullable_tuple_deep; +SELECT c0 FROM file(currentDatabase() || '_04064_deep.orc', 'ORC', 'c0 Nullable(Tuple(Nullable(Tuple(UInt32, String)), UInt64))'); +((1,'a'),10) +\N +(NULL,20) +((4,'d'),40) +-- ORC legacy deep nested +SELECT c0 FROM file(currentDatabase() || '_04064_deep.orc', 'ORC', 'c0 Nullable(Tuple(Nullable(Tuple(UInt32, String)), UInt64))') SETTINGS input_format_orc_use_fast_decoder = 0; +((1,'a'),10) +\N +(NULL,20) +((4,'d'),40) +DROP TABLE test_nullable_tuple_deep; +-- Nullable tuple with Array element +DROP TABLE IF EXISTS test_nullable_tuple_arr; +CREATE TABLE test_nullable_tuple_arr (c0 Nullable(Tuple(Array(UInt32), String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_arr VALUES (([1, 2], 'a')), (NULL), (([3], 'c')); +-- Arrow array elem +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_arr.arrow', 'Arrow') SELECT c0 FROM test_nullable_tuple_arr; +SELECT c0 FROM file(currentDatabase() || '_04064_arr.arrow', 'Arrow', 'c0 Nullable(Tuple(Array(UInt32), String))'); +([1,2],'a') +\N +([3],'c') +-- ORC array elem +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_arr.orc', 'ORC') SELECT c0 FROM test_nullable_tuple_arr; +SELECT c0 FROM file(currentDatabase() || '_04064_arr.orc', 'ORC', 'c0 Nullable(Tuple(Array(UInt32), String))'); +([1,2],'a') +\N +([3],'c') +-- ORC legacy array elem +SELECT c0 FROM file(currentDatabase() || '_04064_arr.orc', 'ORC', 'c0 Nullable(Tuple(Array(UInt32), String))') SETTINGS input_format_orc_use_fast_decoder = 0; +([1,2],'a') +\N +([3],'c') +DROP TABLE test_nullable_tuple_arr; +-- Multiple nullable tuple columns +DROP TABLE IF EXISTS test_nullable_tuple_multi; +CREATE TABLE test_nullable_tuple_multi (c0 Nullable(Tuple(UInt32, String)), c1 Nullable(Tuple(Float64))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_multi VALUES ((1, 'a'), (1.5)), (NULL, (2.5)), ((3, 'c'), NULL); +-- Arrow multi col +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_multi.arrow', 'Arrow') SELECT c0, c1 FROM test_nullable_tuple_multi; +SELECT c0, c1 FROM file(currentDatabase() || '_04064_multi.arrow', 'Arrow', 'c0 Nullable(Tuple(UInt32, String)), c1 Nullable(Tuple(Float64))'); +(1,'a') (1.5) +\N (2.5) +(3,'c') \N +-- ORC multi col +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_multi.orc', 'ORC') SELECT c0, c1 FROM test_nullable_tuple_multi; +SELECT c0, c1 FROM file(currentDatabase() || '_04064_multi.orc', 'ORC', 'c0 Nullable(Tuple(UInt32, String)), c1 Nullable(Tuple(Float64))'); +(1,'a') (1.5) +\N (2.5) +(3,'c') \N +-- ORC legacy multi col +SELECT c0, c1 FROM file(currentDatabase() || '_04064_multi.orc', 'ORC', 'c0 Nullable(Tuple(UInt32, String)), c1 Nullable(Tuple(Float64))') SETTINGS input_format_orc_use_fast_decoder = 0; +(1,'a') (1.5) +\N (2.5) +(3,'c') \N +DROP TABLE test_nullable_tuple_multi; +-- Type hint mismatch: file has Nullable(Tuple(...)), read as Tuple(...) (strip nullable, NULLs become defaults) +DROP TABLE IF EXISTS test_nullable_tuple_mismatch1; +CREATE TABLE test_nullable_tuple_mismatch1 (c0 Nullable(Tuple(UInt32, String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_mismatch1 VALUES ((1, 'a')), (NULL), ((3, 'c')); +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_mismatch1.arrow', 'Arrow') SELECT c0 FROM test_nullable_tuple_mismatch1; +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_mismatch1.orc', 'ORC') SELECT c0 FROM test_nullable_tuple_mismatch1; +-- Arrow: read nullable file as non-nullable +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04064_mismatch1.arrow', 'Arrow', 'c0 Tuple(UInt32, String)'); +(1,'a') Tuple(UInt32, String) +(0,'') Tuple(UInt32, String) +(3,'c') Tuple(UInt32, String) +-- ORC: read nullable file as non-nullable +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04064_mismatch1.orc', 'ORC', 'c0 Tuple(UInt32, String)'); +(1,'a') Tuple(UInt32, String) +(0,'') Tuple(UInt32, String) +(3,'c') Tuple(UInt32, String) +-- ORC legacy: read nullable file as non-nullable +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04064_mismatch1.orc', 'ORC', 'c0 Tuple(UInt32, String)') SETTINGS input_format_orc_use_fast_decoder = 0; +(1,'a') Tuple(UInt32, String) +(0,'') Tuple(UInt32, String) +(3,'c') Tuple(UInt32, String) +DROP TABLE test_nullable_tuple_mismatch1; +-- Type hint mismatch: file has Tuple(...), read as Nullable(Tuple(...)) (add nullable wrapper) +DROP TABLE IF EXISTS test_nullable_tuple_mismatch2; +CREATE TABLE test_nullable_tuple_mismatch2 (c0 Tuple(UInt32, String)) ENGINE = Memory; +INSERT INTO test_nullable_tuple_mismatch2 VALUES ((1, 'a')), ((2, 'b')); +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_mismatch2.arrow', 'Arrow') SELECT c0 FROM test_nullable_tuple_mismatch2; +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_mismatch2.orc', 'ORC') SELECT c0 FROM test_nullable_tuple_mismatch2; +-- Arrow: read non-nullable file as nullable +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04064_mismatch2.arrow', 'Arrow', 'c0 Nullable(Tuple(UInt32, String))'); +(1,'a') Nullable(Tuple(UInt32, String)) +(2,'b') Nullable(Tuple(UInt32, String)) +-- ORC: read non-nullable file as nullable +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04064_mismatch2.orc', 'ORC', 'c0 Nullable(Tuple(UInt32, String))'); +(1,'a') Nullable(Tuple(UInt32, String)) +(2,'b') Nullable(Tuple(UInt32, String)) +-- ORC legacy: read non-nullable file as nullable +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04064_mismatch2.orc', 'ORC', 'c0 Nullable(Tuple(UInt32, String))') SETTINGS input_format_orc_use_fast_decoder = 0; +(1,'a') Nullable(Tuple(UInt32, String)) +(2,'b') Nullable(Tuple(UInt32, String)) +DROP TABLE test_nullable_tuple_mismatch2; +-- Schema inference: DESCRIBE without type hint shows inferred type +DROP TABLE IF EXISTS test_nullable_tuple_describe; +CREATE TABLE test_nullable_tuple_describe (c0 Nullable(Tuple(UInt32, String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_describe VALUES ((1, 'a')), (NULL), ((3, 'c')); +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_describe.arrow', 'Arrow') SELECT c0 FROM test_nullable_tuple_describe; +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_describe.orc', 'ORC') SELECT c0 FROM test_nullable_tuple_describe; +-- Arrow: inferred type +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04064_describe.arrow', 'Arrow'); +(1,'a') Nullable(Tuple(`1` UInt32, `2` String)) +\N Nullable(Tuple(`1` UInt32, `2` String)) +(3,'c') Nullable(Tuple(`1` UInt32, `2` String)) +-- ORC: inferred type +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04064_describe.orc', 'ORC'); +(1,'a') Nullable(Tuple(`1` Nullable(Int32), `2` Nullable(String))) +\N Nullable(Tuple(`1` Nullable(Int32), `2` Nullable(String))) +(3,'c') Nullable(Tuple(`1` Nullable(Int32), `2` Nullable(String))) +DROP TABLE test_nullable_tuple_describe; +-- Array(Nullable(Tuple)) flattened via import_nested: struct-level NULLs should propagate to elements +DROP TABLE IF EXISTS test_nullable_tuple_import_nested; +CREATE TABLE test_nullable_tuple_import_nested (c0 Array(Nullable(Tuple(a UInt32, b String)))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_import_nested VALUES ([(1, 'a'), NULL, (3, 'c')]); +-- Arrow import_nested +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_import_nested.arrow', 'Arrow') SELECT c0 FROM test_nullable_tuple_import_nested; +SELECT * FROM file(currentDatabase() || '_04064_import_nested.arrow', 'Arrow', '`c0.a` Array(Nullable(UInt32)), `c0.b` Array(Nullable(String))') SETTINGS input_format_arrow_import_nested = 1; +[1,NULL,3] ['a',NULL,'c'] +-- ORC import_nested +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_import_nested.orc', 'ORC') SELECT c0 FROM test_nullable_tuple_import_nested; +SELECT * FROM file(currentDatabase() || '_04064_import_nested.orc', 'ORC', '`c0.a` Array(Nullable(UInt32)), `c0.b` Array(Nullable(String))') SETTINGS input_format_orc_import_nested = 1; +[1,NULL,3] ['a',NULL,'c'] +-- ORC legacy import_nested +SELECT * FROM file(currentDatabase() || '_04064_import_nested.orc', 'ORC', '`c0.a` Array(Nullable(UInt32)), `c0.b` Array(Nullable(String))') SETTINGS input_format_orc_import_nested = 1, input_format_orc_use_fast_decoder = 0; +[1,NULL,3] ['a',NULL,'c'] +DROP TABLE test_nullable_tuple_import_nested; +-- Array(Nullable(Tuple)) without named elements: round-trip as a single column, no flattening +DROP TABLE IF EXISTS test_nullable_tuple_arr_unnamed; +CREATE TABLE test_nullable_tuple_arr_unnamed (c0 Array(Nullable(Tuple(UInt32, String)))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_arr_unnamed VALUES ([(1, 'a'), NULL, (3, 'c')]); +-- Arrow unnamed +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_arr_unnamed.arrow', 'Arrow') SELECT c0 FROM test_nullable_tuple_arr_unnamed; +SELECT c0 FROM file(currentDatabase() || '_04064_arr_unnamed.arrow', 'Arrow', 'c0 Array(Nullable(Tuple(UInt32, String)))'); +[(1,'a'),NULL,(3,'c')] +-- ORC unnamed +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_arr_unnamed.orc', 'ORC') SELECT c0 FROM test_nullable_tuple_arr_unnamed; +SELECT c0 FROM file(currentDatabase() || '_04064_arr_unnamed.orc', 'ORC', 'c0 Array(Nullable(Tuple(UInt32, String)))'); +[(1,'a'),NULL,(3,'c')] +-- ORC legacy unnamed +SELECT c0 FROM file(currentDatabase() || '_04064_arr_unnamed.orc', 'ORC', 'c0 Array(Nullable(Tuple(UInt32, String)))') SETTINGS input_format_orc_use_fast_decoder = 0; +[(1,'a'),NULL,(3,'c')] +DROP TABLE test_nullable_tuple_arr_unnamed; +-- Array(Nullable(Tuple)) with Array element inside: import_nested flattens, Array defaults to [] at null positions +DROP TABLE IF EXISTS test_nullable_tuple_arr_nested_elem; +CREATE TABLE test_nullable_tuple_arr_nested_elem (c0 Array(Nullable(Tuple(a UInt32, b Array(UInt32))))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_arr_nested_elem VALUES ([(1, [10, 20]), NULL, (3, [30])]); +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_arr_nested_elem.arrow', 'Arrow') SELECT c0 FROM test_nullable_tuple_arr_nested_elem; +-- Arrow import_nested: scalar becomes Nullable, Array defaults to [] at null struct positions +SELECT * FROM file(currentDatabase() || '_04064_arr_nested_elem.arrow', 'Arrow', '`c0.a` Array(Nullable(UInt32)), `c0.b` Array(Array(UInt32))') SETTINGS input_format_arrow_import_nested = 1; +[1,NULL,3] [[10,20],[],[30]] +-- ORC import_nested +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_arr_nested_elem.orc', 'ORC') SELECT c0 FROM test_nullable_tuple_arr_nested_elem; +SELECT * FROM file(currentDatabase() || '_04064_arr_nested_elem.orc', 'ORC', '`c0.a` Array(Nullable(UInt32)), `c0.b` Array(Array(UInt32))') SETTINGS input_format_orc_import_nested = 1; +[1,NULL,3] [[10,20],[],[30]] +-- ORC legacy import_nested +SELECT * FROM file(currentDatabase() || '_04064_arr_nested_elem.orc', 'ORC', '`c0.a` Array(Nullable(UInt32)), `c0.b` Array(Array(UInt32))') SETTINGS input_format_orc_import_nested = 1, input_format_orc_use_fast_decoder = 0; +[1,NULL,3] [[10,20],[],[30]] +DROP TABLE test_nullable_tuple_arr_nested_elem; +-- LowCardinality(Nullable(String)) hint with no physical nulls in the file: the ORC reader must still wrap the column as nullable +DROP TABLE IF EXISTS test_nullable_tuple_lc_string; +CREATE TABLE test_nullable_tuple_lc_string (c0 String) ENGINE = Memory; +INSERT INTO test_nullable_tuple_lc_string VALUES ('hello'), ('world'); +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_lc_str.arrow', 'Arrow') SELECT c0 FROM test_nullable_tuple_lc_string; +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_lc_str.orc', 'ORC') SELECT c0 FROM test_nullable_tuple_lc_string; +-- Arrow: no physical nulls, LowCardinality(Nullable(String)) hint +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04064_lc_str.arrow', 'Arrow', 'c0 LowCardinality(Nullable(String))'); +hello LowCardinality(Nullable(String)) +world LowCardinality(Nullable(String)) +-- ORC: no physical nulls, LowCardinality(Nullable(String)) hint +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04064_lc_str.orc', 'ORC', 'c0 LowCardinality(Nullable(String))'); +hello LowCardinality(Nullable(String)) +world LowCardinality(Nullable(String)) +-- ORC legacy: no physical nulls, LowCardinality(Nullable(String)) hint +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04064_lc_str.orc', 'ORC', 'c0 LowCardinality(Nullable(String))') SETTINGS input_format_orc_use_fast_decoder = 0; +hello LowCardinality(Nullable(String)) +world LowCardinality(Nullable(String)) +DROP TABLE test_nullable_tuple_lc_string; diff --git a/tests/queries/0_stateless/04064_tuple_inside_nullable_arrow_orc_roundtrip.sql b/tests/queries/0_stateless/04064_tuple_inside_nullable_arrow_orc_roundtrip.sql new file mode 100644 index 000000000000..694c075b68e9 --- /dev/null +++ b/tests/queries/0_stateless/04064_tuple_inside_nullable_arrow_orc_roundtrip.sql @@ -0,0 +1,399 @@ +-- Tags: no-fasttest +-- no-fasttest: Arrow and ORC formats are not available in fasttest builds + +-- { echo } + +SET allow_experimental_nullable_tuple_type = 1; +SET engine_file_truncate_on_insert = 1; + +-- Nullable struct with non-nullable elements +DROP TABLE IF EXISTS test_nullable_tuple_basic; +CREATE TABLE test_nullable_tuple_basic (c0 Nullable(Tuple(UInt32, String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_basic VALUES ((1, 'a')), (NULL), ((3, 'c')); + +-- Arrow +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064.arrow', 'Arrow', 'c0 Nullable(Tuple(UInt32, String))') SELECT c0 FROM test_nullable_tuple_basic; +SELECT c0 FROM file(currentDatabase() || '_04064.arrow', 'Arrow', 'c0 Nullable(Tuple(UInt32, String))'); + +-- ArrowStream +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064.arrowstream', 'ArrowStream', 'c0 Nullable(Tuple(UInt32, String))') SELECT c0 FROM test_nullable_tuple_basic; +SELECT c0 FROM file(currentDatabase() || '_04064.arrowstream', 'ArrowStream', 'c0 Nullable(Tuple(UInt32, String))'); + +-- ORC +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064.orc', 'ORC', 'c0 Nullable(Tuple(UInt32, String))') SELECT c0 FROM test_nullable_tuple_basic; +SELECT c0 FROM file(currentDatabase() || '_04064.orc', 'ORC', 'c0 Nullable(Tuple(UInt32, String))'); + +-- ORC legacy (Arrow-based) reader +SELECT c0 FROM file(currentDatabase() || '_04064.orc', 'ORC', 'c0 Nullable(Tuple(UInt32, String))') SETTINGS input_format_orc_use_fast_decoder = 0; + +DROP TABLE test_nullable_tuple_basic; + +-- Nullable empty tuple +DROP TABLE IF EXISTS test_nullable_tuple_empty; +CREATE TABLE test_nullable_tuple_empty (c0 Nullable(Tuple())) ENGINE = Memory; +INSERT INTO test_nullable_tuple_empty VALUES (()), (NULL), (()); + +-- Arrow empty +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_empty.arrow', 'Arrow', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_tuple_empty; +SELECT c0 FROM file(currentDatabase() || '_04064_empty.arrow', 'Arrow', 'c0 Nullable(Tuple())'); + +-- ArrowStream empty +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_empty.arrowstream', 'ArrowStream', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_tuple_empty; +SELECT c0 FROM file(currentDatabase() || '_04064_empty.arrowstream', 'ArrowStream', 'c0 Nullable(Tuple())'); + +-- ORC empty +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_empty.orc', 'ORC', 'c0 Nullable(Tuple())') SELECT c0 FROM test_nullable_tuple_empty; +SELECT c0 FROM file(currentDatabase() || '_04064_empty.orc', 'ORC', 'c0 Nullable(Tuple())'); + +-- ORC legacy empty +SELECT c0 FROM file(currentDatabase() || '_04064_empty.orc', 'ORC', 'c0 Nullable(Tuple())') SETTINGS input_format_orc_use_fast_decoder = 0; + +DROP TABLE test_nullable_tuple_empty; + +-- Both struct and element nullable: Nullable(Tuple(Nullable(UInt32), String)) +DROP TABLE IF EXISTS test_nullable_tuple_both; +CREATE TABLE test_nullable_tuple_both (c0 Nullable(Tuple(Nullable(UInt32), String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_both VALUES ((1, 'a')), (NULL), ((NULL, 'c')), ((4, 'd')); + +-- Arrow both nullable +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_both.arrow', 'Arrow') SELECT c0 FROM test_nullable_tuple_both; +SELECT c0 FROM file(currentDatabase() || '_04064_both.arrow', 'Arrow', 'c0 Nullable(Tuple(Nullable(UInt32), String))'); + +-- ArrowStream both nullable +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_both.arrowstream', 'ArrowStream') SELECT c0 FROM test_nullable_tuple_both; +SELECT c0 FROM file(currentDatabase() || '_04064_both.arrowstream', 'ArrowStream', 'c0 Nullable(Tuple(Nullable(UInt32), String))'); + +-- ORC both nullable +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_both.orc', 'ORC') SELECT c0 FROM test_nullable_tuple_both; +SELECT c0 FROM file(currentDatabase() || '_04064_both.orc', 'ORC', 'c0 Nullable(Tuple(Nullable(UInt32), String))'); + +-- ORC legacy both nullable +SELECT c0 FROM file(currentDatabase() || '_04064_both.orc', 'ORC', 'c0 Nullable(Tuple(Nullable(UInt32), String))') SETTINGS input_format_orc_use_fast_decoder = 0; + +DROP TABLE test_nullable_tuple_both; + +-- Non-nullable struct with nullable elements (should be unchanged) +DROP TABLE IF EXISTS test_nullable_tuple_elem; +CREATE TABLE test_nullable_tuple_elem (c0 Tuple(Nullable(UInt32), String)) ENGINE = Memory; +INSERT INTO test_nullable_tuple_elem VALUES ((1, 'a')), ((NULL, 'b')); + +-- Arrow nullable elements +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_elem.arrow', 'Arrow') SELECT c0 FROM test_nullable_tuple_elem; +SELECT c0 FROM file(currentDatabase() || '_04064_elem.arrow', 'Arrow', 'c0 Tuple(Nullable(UInt32), String)'); + +-- ORC nullable elements +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_elem.orc', 'ORC') SELECT c0 FROM test_nullable_tuple_elem; +SELECT c0 FROM file(currentDatabase() || '_04064_elem.orc', 'ORC', 'c0 Tuple(Nullable(UInt32), String)'); + +-- ORC legacy nullable elements +SELECT c0 FROM file(currentDatabase() || '_04064_elem.orc', 'ORC', 'c0 Tuple(Nullable(UInt32), String)') SETTINGS input_format_orc_use_fast_decoder = 0; + +DROP TABLE test_nullable_tuple_elem; + +-- Plain non-nullable tuple (baseline, should be unchanged) +DROP TABLE IF EXISTS test_nullable_tuple_plain; +CREATE TABLE test_nullable_tuple_plain (c0 Tuple(UInt32, String)) ENGINE = Memory; +INSERT INTO test_nullable_tuple_plain VALUES ((1, 'a')), ((2, 'b')); + +-- Arrow plain +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_plain.arrow', 'Arrow') SELECT c0 FROM test_nullable_tuple_plain; +SELECT c0 FROM file(currentDatabase() || '_04064_plain.arrow', 'Arrow', 'c0 Tuple(UInt32, String)'); + +-- ORC plain +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_plain.orc', 'ORC') SELECT c0 FROM test_nullable_tuple_plain; +SELECT c0 FROM file(currentDatabase() || '_04064_plain.orc', 'ORC', 'c0 Tuple(UInt32, String)'); + +-- ORC legacy plain +SELECT c0 FROM file(currentDatabase() || '_04064_plain.orc', 'ORC', 'c0 Tuple(UInt32, String)') SETTINGS input_format_orc_use_fast_decoder = 0; + +DROP TABLE test_nullable_tuple_plain; + +-- Nested tuple inside nullable struct +DROP TABLE IF EXISTS test_nullable_tuple_nested; +CREATE TABLE test_nullable_tuple_nested (c0 Nullable(Tuple(Tuple(UInt32, String), UInt64))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_nested VALUES (((1, 'a'), 10)), (NULL), (((3, 'c'), 30)); + +-- Arrow nested +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_nested.arrow', 'Arrow') SELECT c0 FROM test_nullable_tuple_nested; +SELECT c0 FROM file(currentDatabase() || '_04064_nested.arrow', 'Arrow', 'c0 Nullable(Tuple(Tuple(UInt32, String), UInt64))'); + +-- ORC nested +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_nested.orc', 'ORC') SELECT c0 FROM test_nullable_tuple_nested; +SELECT c0 FROM file(currentDatabase() || '_04064_nested.orc', 'ORC', 'c0 Nullable(Tuple(Tuple(UInt32, String), UInt64))'); + +-- ORC legacy nested +SELECT c0 FROM file(currentDatabase() || '_04064_nested.orc', 'ORC', 'c0 Nullable(Tuple(Tuple(UInt32, String), UInt64))') SETTINGS input_format_orc_use_fast_decoder = 0; + +DROP TABLE test_nullable_tuple_nested; + +-- Schema inference without type hint +DROP TABLE IF EXISTS test_nullable_tuple_infer; +CREATE TABLE test_nullable_tuple_infer (c0 Nullable(Tuple(UInt32, String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_infer VALUES ((1, 'a')), (NULL), ((3, 'c')); + +-- Arrow infer +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_infer.arrow', 'Arrow') SELECT c0 FROM test_nullable_tuple_infer; +SELECT c0 FROM file(currentDatabase() || '_04064_infer.arrow', 'Arrow'); + +-- ORC infer +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_infer.orc', 'ORC') SELECT c0 FROM test_nullable_tuple_infer; +SELECT c0 FROM file(currentDatabase() || '_04064_infer.orc', 'ORC'); + +-- ORC legacy infer +SELECT c0 FROM file(currentDatabase() || '_04064_infer.orc', 'ORC') SETTINGS input_format_orc_use_fast_decoder = 0; + +DROP TABLE test_nullable_tuple_infer; + +-- Named tuple +DROP TABLE IF EXISTS test_nullable_tuple_named; +CREATE TABLE test_nullable_tuple_named (c0 Nullable(Tuple(a UInt32, b String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_named VALUES ((1, 'x')), (NULL), ((3, 'z')); + +-- Arrow named +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_named.arrow', 'Arrow', 'c0 Nullable(Tuple(a UInt32, b String))') SELECT c0 FROM test_nullable_tuple_named; +SELECT c0 FROM file(currentDatabase() || '_04064_named.arrow', 'Arrow', 'c0 Nullable(Tuple(a UInt32, b String))'); + +-- ORC named +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_named.orc', 'ORC', 'c0 Nullable(Tuple(a UInt32, b String))') SELECT c0 FROM test_nullable_tuple_named; +SELECT c0 FROM file(currentDatabase() || '_04064_named.orc', 'ORC', 'c0 Nullable(Tuple(a UInt32, b String))'); + +-- ORC legacy named +SELECT c0 FROM file(currentDatabase() || '_04064_named.orc', 'ORC', 'c0 Nullable(Tuple(a UInt32, b String))') SETTINGS input_format_orc_use_fast_decoder = 0; + +DROP TABLE test_nullable_tuple_named; + +-- All-NULL column +DROP TABLE IF EXISTS test_nullable_tuple_allnull; +CREATE TABLE test_nullable_tuple_allnull (c0 Nullable(Tuple(UInt32, String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_allnull VALUES (NULL), (NULL), (NULL); + +-- Arrow all null +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_allnull.arrow', 'Arrow', 'c0 Nullable(Tuple(UInt32, String))') SELECT c0 FROM test_nullable_tuple_allnull; +SELECT c0 FROM file(currentDatabase() || '_04064_allnull.arrow', 'Arrow', 'c0 Nullable(Tuple(UInt32, String))'); + +-- ORC all null +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_allnull.orc', 'ORC', 'c0 Nullable(Tuple(UInt32, String))') SELECT c0 FROM test_nullable_tuple_allnull; +SELECT c0 FROM file(currentDatabase() || '_04064_allnull.orc', 'ORC', 'c0 Nullable(Tuple(UInt32, String))'); + +-- ORC legacy all null +SELECT c0 FROM file(currentDatabase() || '_04064_allnull.orc', 'ORC', 'c0 Nullable(Tuple(UInt32, String))') SETTINGS input_format_orc_use_fast_decoder = 0; + +DROP TABLE test_nullable_tuple_allnull; + +-- No-NULL column (nullable type, zero actual NULLs) +DROP TABLE IF EXISTS test_nullable_tuple_nonull; +CREATE TABLE test_nullable_tuple_nonull (c0 Nullable(Tuple(UInt32, String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_nonull VALUES ((1, 'a')), ((2, 'b')), ((3, 'c')); + +-- Arrow no null +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_nonull.arrow', 'Arrow', 'c0 Nullable(Tuple(UInt32, String))') SELECT c0 FROM test_nullable_tuple_nonull; +SELECT c0 FROM file(currentDatabase() || '_04064_nonull.arrow', 'Arrow', 'c0 Nullable(Tuple(UInt32, String))'); + +-- ORC no null +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_nonull.orc', 'ORC', 'c0 Nullable(Tuple(UInt32, String))') SELECT c0 FROM test_nullable_tuple_nonull; +SELECT c0 FROM file(currentDatabase() || '_04064_nonull.orc', 'ORC', 'c0 Nullable(Tuple(UInt32, String))'); + +-- ORC legacy no null +SELECT c0 FROM file(currentDatabase() || '_04064_nonull.orc', 'ORC', 'c0 Nullable(Tuple(UInt32, String))') SETTINGS input_format_orc_use_fast_decoder = 0; + +DROP TABLE test_nullable_tuple_nonull; + +-- Single-element tuple +DROP TABLE IF EXISTS test_nullable_tuple_single; +CREATE TABLE test_nullable_tuple_single (c0 Nullable(Tuple(UInt32))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_single VALUES ((1,)), (NULL), ((3,)); + +-- Arrow single +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_single.arrow', 'Arrow', 'c0 Nullable(Tuple(UInt32))') SELECT c0 FROM test_nullable_tuple_single; +SELECT c0 FROM file(currentDatabase() || '_04064_single.arrow', 'Arrow', 'c0 Nullable(Tuple(UInt32))'); + +-- ORC single +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_single.orc', 'ORC', 'c0 Nullable(Tuple(UInt32))') SELECT c0 FROM test_nullable_tuple_single; +SELECT c0 FROM file(currentDatabase() || '_04064_single.orc', 'ORC', 'c0 Nullable(Tuple(UInt32))'); + +-- ORC legacy single +SELECT c0 FROM file(currentDatabase() || '_04064_single.orc', 'ORC', 'c0 Nullable(Tuple(UInt32))') SETTINGS input_format_orc_use_fast_decoder = 0; + +DROP TABLE test_nullable_tuple_single; + +-- Deeply nested: nullable tuple inside nullable tuple +DROP TABLE IF EXISTS test_nullable_tuple_deep; +CREATE TABLE test_nullable_tuple_deep (c0 Nullable(Tuple(Nullable(Tuple(UInt32, String)), UInt64))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_deep VALUES (((1, 'a'), 10)), (NULL), ((NULL, 20)), (((4, 'd'), 40)); + +-- Arrow deep nested +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_deep.arrow', 'Arrow') SELECT c0 FROM test_nullable_tuple_deep; +SELECT c0 FROM file(currentDatabase() || '_04064_deep.arrow', 'Arrow', 'c0 Nullable(Tuple(Nullable(Tuple(UInt32, String)), UInt64))'); + +-- ORC deep nested +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_deep.orc', 'ORC') SELECT c0 FROM test_nullable_tuple_deep; +SELECT c0 FROM file(currentDatabase() || '_04064_deep.orc', 'ORC', 'c0 Nullable(Tuple(Nullable(Tuple(UInt32, String)), UInt64))'); + +-- ORC legacy deep nested +SELECT c0 FROM file(currentDatabase() || '_04064_deep.orc', 'ORC', 'c0 Nullable(Tuple(Nullable(Tuple(UInt32, String)), UInt64))') SETTINGS input_format_orc_use_fast_decoder = 0; + +DROP TABLE test_nullable_tuple_deep; + +-- Nullable tuple with Array element +DROP TABLE IF EXISTS test_nullable_tuple_arr; +CREATE TABLE test_nullable_tuple_arr (c0 Nullable(Tuple(Array(UInt32), String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_arr VALUES (([1, 2], 'a')), (NULL), (([3], 'c')); + +-- Arrow array elem +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_arr.arrow', 'Arrow') SELECT c0 FROM test_nullable_tuple_arr; +SELECT c0 FROM file(currentDatabase() || '_04064_arr.arrow', 'Arrow', 'c0 Nullable(Tuple(Array(UInt32), String))'); + +-- ORC array elem +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_arr.orc', 'ORC') SELECT c0 FROM test_nullable_tuple_arr; +SELECT c0 FROM file(currentDatabase() || '_04064_arr.orc', 'ORC', 'c0 Nullable(Tuple(Array(UInt32), String))'); + +-- ORC legacy array elem +SELECT c0 FROM file(currentDatabase() || '_04064_arr.orc', 'ORC', 'c0 Nullable(Tuple(Array(UInt32), String))') SETTINGS input_format_orc_use_fast_decoder = 0; + +DROP TABLE test_nullable_tuple_arr; + +-- Multiple nullable tuple columns +DROP TABLE IF EXISTS test_nullable_tuple_multi; +CREATE TABLE test_nullable_tuple_multi (c0 Nullable(Tuple(UInt32, String)), c1 Nullable(Tuple(Float64))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_multi VALUES ((1, 'a'), (1.5)), (NULL, (2.5)), ((3, 'c'), NULL); + +-- Arrow multi col +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_multi.arrow', 'Arrow') SELECT c0, c1 FROM test_nullable_tuple_multi; +SELECT c0, c1 FROM file(currentDatabase() || '_04064_multi.arrow', 'Arrow', 'c0 Nullable(Tuple(UInt32, String)), c1 Nullable(Tuple(Float64))'); + +-- ORC multi col +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_multi.orc', 'ORC') SELECT c0, c1 FROM test_nullable_tuple_multi; +SELECT c0, c1 FROM file(currentDatabase() || '_04064_multi.orc', 'ORC', 'c0 Nullable(Tuple(UInt32, String)), c1 Nullable(Tuple(Float64))'); + +-- ORC legacy multi col +SELECT c0, c1 FROM file(currentDatabase() || '_04064_multi.orc', 'ORC', 'c0 Nullable(Tuple(UInt32, String)), c1 Nullable(Tuple(Float64))') SETTINGS input_format_orc_use_fast_decoder = 0; + +DROP TABLE test_nullable_tuple_multi; + +-- Type hint mismatch: file has Nullable(Tuple(...)), read as Tuple(...) (strip nullable, NULLs become defaults) +DROP TABLE IF EXISTS test_nullable_tuple_mismatch1; +CREATE TABLE test_nullable_tuple_mismatch1 (c0 Nullable(Tuple(UInt32, String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_mismatch1 VALUES ((1, 'a')), (NULL), ((3, 'c')); + +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_mismatch1.arrow', 'Arrow') SELECT c0 FROM test_nullable_tuple_mismatch1; +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_mismatch1.orc', 'ORC') SELECT c0 FROM test_nullable_tuple_mismatch1; + +-- Arrow: read nullable file as non-nullable +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04064_mismatch1.arrow', 'Arrow', 'c0 Tuple(UInt32, String)'); + +-- ORC: read nullable file as non-nullable +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04064_mismatch1.orc', 'ORC', 'c0 Tuple(UInt32, String)'); + +-- ORC legacy: read nullable file as non-nullable +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04064_mismatch1.orc', 'ORC', 'c0 Tuple(UInt32, String)') SETTINGS input_format_orc_use_fast_decoder = 0; + +DROP TABLE test_nullable_tuple_mismatch1; + +-- Type hint mismatch: file has Tuple(...), read as Nullable(Tuple(...)) (add nullable wrapper) +DROP TABLE IF EXISTS test_nullable_tuple_mismatch2; +CREATE TABLE test_nullable_tuple_mismatch2 (c0 Tuple(UInt32, String)) ENGINE = Memory; +INSERT INTO test_nullable_tuple_mismatch2 VALUES ((1, 'a')), ((2, 'b')); + +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_mismatch2.arrow', 'Arrow') SELECT c0 FROM test_nullable_tuple_mismatch2; +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_mismatch2.orc', 'ORC') SELECT c0 FROM test_nullable_tuple_mismatch2; + +-- Arrow: read non-nullable file as nullable +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04064_mismatch2.arrow', 'Arrow', 'c0 Nullable(Tuple(UInt32, String))'); + +-- ORC: read non-nullable file as nullable +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04064_mismatch2.orc', 'ORC', 'c0 Nullable(Tuple(UInt32, String))'); + +-- ORC legacy: read non-nullable file as nullable +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04064_mismatch2.orc', 'ORC', 'c0 Nullable(Tuple(UInt32, String))') SETTINGS input_format_orc_use_fast_decoder = 0; + +DROP TABLE test_nullable_tuple_mismatch2; + +-- Schema inference: DESCRIBE without type hint shows inferred type +DROP TABLE IF EXISTS test_nullable_tuple_describe; +CREATE TABLE test_nullable_tuple_describe (c0 Nullable(Tuple(UInt32, String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_describe VALUES ((1, 'a')), (NULL), ((3, 'c')); + +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_describe.arrow', 'Arrow') SELECT c0 FROM test_nullable_tuple_describe; +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_describe.orc', 'ORC') SELECT c0 FROM test_nullable_tuple_describe; + +-- Arrow: inferred type +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04064_describe.arrow', 'Arrow'); + +-- ORC: inferred type +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04064_describe.orc', 'ORC'); + +DROP TABLE test_nullable_tuple_describe; + +-- Array(Nullable(Tuple)) flattened via import_nested: struct-level NULLs should propagate to elements +DROP TABLE IF EXISTS test_nullable_tuple_import_nested; +CREATE TABLE test_nullable_tuple_import_nested (c0 Array(Nullable(Tuple(a UInt32, b String)))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_import_nested VALUES ([(1, 'a'), NULL, (3, 'c')]); + +-- Arrow import_nested +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_import_nested.arrow', 'Arrow') SELECT c0 FROM test_nullable_tuple_import_nested; +SELECT * FROM file(currentDatabase() || '_04064_import_nested.arrow', 'Arrow', '`c0.a` Array(Nullable(UInt32)), `c0.b` Array(Nullable(String))') SETTINGS input_format_arrow_import_nested = 1; + +-- ORC import_nested +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_import_nested.orc', 'ORC') SELECT c0 FROM test_nullable_tuple_import_nested; +SELECT * FROM file(currentDatabase() || '_04064_import_nested.orc', 'ORC', '`c0.a` Array(Nullable(UInt32)), `c0.b` Array(Nullable(String))') SETTINGS input_format_orc_import_nested = 1; + +-- ORC legacy import_nested +SELECT * FROM file(currentDatabase() || '_04064_import_nested.orc', 'ORC', '`c0.a` Array(Nullable(UInt32)), `c0.b` Array(Nullable(String))') SETTINGS input_format_orc_import_nested = 1, input_format_orc_use_fast_decoder = 0; + +DROP TABLE test_nullable_tuple_import_nested; + +-- Array(Nullable(Tuple)) without named elements: round-trip as a single column, no flattening +DROP TABLE IF EXISTS test_nullable_tuple_arr_unnamed; +CREATE TABLE test_nullable_tuple_arr_unnamed (c0 Array(Nullable(Tuple(UInt32, String)))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_arr_unnamed VALUES ([(1, 'a'), NULL, (3, 'c')]); + +-- Arrow unnamed +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_arr_unnamed.arrow', 'Arrow') SELECT c0 FROM test_nullable_tuple_arr_unnamed; +SELECT c0 FROM file(currentDatabase() || '_04064_arr_unnamed.arrow', 'Arrow', 'c0 Array(Nullable(Tuple(UInt32, String)))'); + +-- ORC unnamed +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_arr_unnamed.orc', 'ORC') SELECT c0 FROM test_nullable_tuple_arr_unnamed; +SELECT c0 FROM file(currentDatabase() || '_04064_arr_unnamed.orc', 'ORC', 'c0 Array(Nullable(Tuple(UInt32, String)))'); + +-- ORC legacy unnamed +SELECT c0 FROM file(currentDatabase() || '_04064_arr_unnamed.orc', 'ORC', 'c0 Array(Nullable(Tuple(UInt32, String)))') SETTINGS input_format_orc_use_fast_decoder = 0; + +DROP TABLE test_nullable_tuple_arr_unnamed; + +-- Array(Nullable(Tuple)) with Array element inside: import_nested flattens, Array defaults to [] at null positions +DROP TABLE IF EXISTS test_nullable_tuple_arr_nested_elem; +CREATE TABLE test_nullable_tuple_arr_nested_elem (c0 Array(Nullable(Tuple(a UInt32, b Array(UInt32))))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_arr_nested_elem VALUES ([(1, [10, 20]), NULL, (3, [30])]); + +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_arr_nested_elem.arrow', 'Arrow') SELECT c0 FROM test_nullable_tuple_arr_nested_elem; + +-- Arrow import_nested: scalar becomes Nullable, Array defaults to [] at null struct positions +SELECT * FROM file(currentDatabase() || '_04064_arr_nested_elem.arrow', 'Arrow', '`c0.a` Array(Nullable(UInt32)), `c0.b` Array(Array(UInt32))') SETTINGS input_format_arrow_import_nested = 1; + +-- ORC import_nested +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_arr_nested_elem.orc', 'ORC') SELECT c0 FROM test_nullable_tuple_arr_nested_elem; +SELECT * FROM file(currentDatabase() || '_04064_arr_nested_elem.orc', 'ORC', '`c0.a` Array(Nullable(UInt32)), `c0.b` Array(Array(UInt32))') SETTINGS input_format_orc_import_nested = 1; + +-- ORC legacy import_nested +SELECT * FROM file(currentDatabase() || '_04064_arr_nested_elem.orc', 'ORC', '`c0.a` Array(Nullable(UInt32)), `c0.b` Array(Array(UInt32))') SETTINGS input_format_orc_import_nested = 1, input_format_orc_use_fast_decoder = 0; + +DROP TABLE test_nullable_tuple_arr_nested_elem; + +-- LowCardinality(Nullable(String)) hint with no physical nulls in the file: the ORC reader must still wrap the column as nullable +DROP TABLE IF EXISTS test_nullable_tuple_lc_string; +CREATE TABLE test_nullable_tuple_lc_string (c0 String) ENGINE = Memory; +INSERT INTO test_nullable_tuple_lc_string VALUES ('hello'), ('world'); + +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_lc_str.arrow', 'Arrow') SELECT c0 FROM test_nullable_tuple_lc_string; +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04064_lc_str.orc', 'ORC') SELECT c0 FROM test_nullable_tuple_lc_string; + +-- Arrow: no physical nulls, LowCardinality(Nullable(String)) hint +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04064_lc_str.arrow', 'Arrow', 'c0 LowCardinality(Nullable(String))'); + +-- ORC: no physical nulls, LowCardinality(Nullable(String)) hint +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04064_lc_str.orc', 'ORC', 'c0 LowCardinality(Nullable(String))'); + +-- ORC legacy: no physical nulls, LowCardinality(Nullable(String)) hint +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04064_lc_str.orc', 'ORC', 'c0 LowCardinality(Nullable(String))') SETTINGS input_format_orc_use_fast_decoder = 0; + +DROP TABLE test_nullable_tuple_lc_string; diff --git a/tests/queries/0_stateless/04065_tuple_inside_nullable_parquet_roundtrip.reference b/tests/queries/0_stateless/04065_tuple_inside_nullable_parquet_roundtrip.reference new file mode 100644 index 000000000000..9ea83fc503fe --- /dev/null +++ b/tests/queries/0_stateless/04065_tuple_inside_nullable_parquet_roundtrip.reference @@ -0,0 +1,251 @@ +-- { echo } + +SET allow_experimental_nullable_tuple_type = 1; +SET engine_file_truncate_on_insert = 1; +-- Nullable struct with non-nullable elements +DROP TABLE IF EXISTS test_nullable_tuple_basic; +CREATE TABLE test_nullable_tuple_basic (c0 Nullable(Tuple(UInt32, String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_basic VALUES ((1, 'a')), (NULL), ((3, 'c')); +-- Parquet Arrow reader +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065.parquet', 'Parquet', 'c0 Nullable(Tuple(UInt32, String))') SELECT c0 FROM test_nullable_tuple_basic; +SELECT c0 FROM file(currentDatabase() || '_04065.parquet', 'Parquet', 'c0 Nullable(Tuple(UInt32, String))') SETTINGS input_format_parquet_use_native_reader_v3 = 0; +(1,'a') +\N +(3,'c') +-- Parquet V3 native reader (not yet supported) +SELECT c0 FROM file(currentDatabase() || '_04065.parquet', 'Parquet', 'c0 Nullable(Tuple(UInt32, String))') SETTINGS input_format_parquet_use_native_reader_v3 = 1; -- { serverError TYPE_MISMATCH } +DROP TABLE test_nullable_tuple_basic; +-- Both struct and element nullable: Nullable(Tuple(Nullable(UInt32), String)) +DROP TABLE IF EXISTS test_nullable_tuple_both; +CREATE TABLE test_nullable_tuple_both (c0 Nullable(Tuple(Nullable(UInt32), String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_both VALUES ((1, 'a')), (NULL), ((NULL, 'c')), ((4, 'd')); +-- Parquet Arrow reader both nullable +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_both.parquet', 'Parquet') SELECT c0 FROM test_nullable_tuple_both; +SELECT c0 FROM file(currentDatabase() || '_04065_both.parquet', 'Parquet', 'c0 Nullable(Tuple(Nullable(UInt32), String))') SETTINGS input_format_parquet_use_native_reader_v3 = 0; +(1,'a') +\N +(NULL,'c') +(4,'d') +-- Parquet V3 native reader (not yet supported) +SELECT c0 FROM file(currentDatabase() || '_04065_both.parquet', 'Parquet', 'c0 Nullable(Tuple(Nullable(UInt32), String))') SETTINGS input_format_parquet_use_native_reader_v3 = 1; -- { serverError TYPE_MISMATCH } +DROP TABLE test_nullable_tuple_both; +-- Non-nullable struct with nullable elements +DROP TABLE IF EXISTS test_nullable_tuple_elem; +CREATE TABLE test_nullable_tuple_elem (c0 Tuple(Nullable(UInt32), String)) ENGINE = Memory; +INSERT INTO test_nullable_tuple_elem VALUES ((1, 'a')), ((NULL, 'b')); +-- Parquet Arrow reader nullable elements +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_elem.parquet', 'Parquet') SELECT c0 FROM test_nullable_tuple_elem; +SELECT c0 FROM file(currentDatabase() || '_04065_elem.parquet', 'Parquet', 'c0 Tuple(Nullable(UInt32), String)') SETTINGS input_format_parquet_use_native_reader_v3 = 0; +(1,'a') +(NULL,'b') +-- Parquet V3 native reader nullable elements +SELECT c0 FROM file(currentDatabase() || '_04065_elem.parquet', 'Parquet', 'c0 Tuple(Nullable(UInt32), String)') SETTINGS input_format_parquet_use_native_reader_v3 = 1; +(1,'a') +(NULL,'b') +DROP TABLE test_nullable_tuple_elem; +-- Plain non-nullable tuple +DROP TABLE IF EXISTS test_nullable_tuple_plain; +CREATE TABLE test_nullable_tuple_plain (c0 Tuple(UInt32, String)) ENGINE = Memory; +INSERT INTO test_nullable_tuple_plain VALUES ((1, 'a')), ((2, 'b')); +-- Parquet Arrow reader plain +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_plain.parquet', 'Parquet') SELECT c0 FROM test_nullable_tuple_plain; +SELECT c0 FROM file(currentDatabase() || '_04065_plain.parquet', 'Parquet', 'c0 Tuple(UInt32, String)') SETTINGS input_format_parquet_use_native_reader_v3 = 0; +(1,'a') +(2,'b') +-- Parquet V3 native reader plain +SELECT c0 FROM file(currentDatabase() || '_04065_plain.parquet', 'Parquet', 'c0 Tuple(UInt32, String)') SETTINGS input_format_parquet_use_native_reader_v3 = 1; +(1,'a') +(2,'b') +DROP TABLE test_nullable_tuple_plain; +-- Named tuple +DROP TABLE IF EXISTS test_nullable_tuple_named; +CREATE TABLE test_nullable_tuple_named (c0 Nullable(Tuple(a UInt32, b String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_named VALUES ((1, 'x')), (NULL), ((3, 'z')); +-- Parquet Arrow reader named +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_named.parquet', 'Parquet', 'c0 Nullable(Tuple(a UInt32, b String))') SELECT c0 FROM test_nullable_tuple_named; +SELECT c0 FROM file(currentDatabase() || '_04065_named.parquet', 'Parquet', 'c0 Nullable(Tuple(a UInt32, b String))') SETTINGS input_format_parquet_use_native_reader_v3 = 0; +(1,'x') +\N +(3,'z') +-- Parquet V3 native reader named (not yet supported) +SELECT c0 FROM file(currentDatabase() || '_04065_named.parquet', 'Parquet', 'c0 Nullable(Tuple(a UInt32, b String))') SETTINGS input_format_parquet_use_native_reader_v3 = 1; -- { serverError TYPE_MISMATCH } +DROP TABLE test_nullable_tuple_named; +-- All-NULL column +DROP TABLE IF EXISTS test_nullable_tuple_allnull; +CREATE TABLE test_nullable_tuple_allnull (c0 Nullable(Tuple(UInt32, String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_allnull VALUES (NULL), (NULL), (NULL); +-- Parquet Arrow reader all null +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_allnull.parquet', 'Parquet', 'c0 Nullable(Tuple(UInt32, String))') SELECT c0 FROM test_nullable_tuple_allnull; +SELECT c0 FROM file(currentDatabase() || '_04065_allnull.parquet', 'Parquet', 'c0 Nullable(Tuple(UInt32, String))') SETTINGS input_format_parquet_use_native_reader_v3 = 0; +\N +\N +\N +-- Parquet V3 native reader all null (not yet supported) +SELECT c0 FROM file(currentDatabase() || '_04065_allnull.parquet', 'Parquet', 'c0 Nullable(Tuple(UInt32, String))') SETTINGS input_format_parquet_use_native_reader_v3 = 1; -- { serverError TYPE_MISMATCH } +DROP TABLE test_nullable_tuple_allnull; +-- No-NULL column (nullable type, zero actual NULLs) +DROP TABLE IF EXISTS test_nullable_tuple_nonull; +CREATE TABLE test_nullable_tuple_nonull (c0 Nullable(Tuple(UInt32, String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_nonull VALUES ((1, 'a')), ((2, 'b')), ((3, 'c')); +-- Parquet Arrow reader no null +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_nonull.parquet', 'Parquet', 'c0 Nullable(Tuple(UInt32, String))') SELECT c0 FROM test_nullable_tuple_nonull; +SELECT c0 FROM file(currentDatabase() || '_04065_nonull.parquet', 'Parquet', 'c0 Nullable(Tuple(UInt32, String))') SETTINGS input_format_parquet_use_native_reader_v3 = 0; +(1,'a') +(2,'b') +(3,'c') +-- Parquet V3 native reader no null (not yet supported) +SELECT c0 FROM file(currentDatabase() || '_04065_nonull.parquet', 'Parquet', 'c0 Nullable(Tuple(UInt32, String))') SETTINGS input_format_parquet_use_native_reader_v3 = 1; -- { serverError TYPE_MISMATCH } +DROP TABLE test_nullable_tuple_nonull; +-- Single-element tuple +DROP TABLE IF EXISTS test_nullable_tuple_single; +CREATE TABLE test_nullable_tuple_single (c0 Nullable(Tuple(UInt32))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_single VALUES ((1,)), (NULL), ((3,)); +-- Parquet Arrow reader single +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_single.parquet', 'Parquet', 'c0 Nullable(Tuple(UInt32))') SELECT c0 FROM test_nullable_tuple_single; +SELECT c0 FROM file(currentDatabase() || '_04065_single.parquet', 'Parquet', 'c0 Nullable(Tuple(UInt32))') SETTINGS input_format_parquet_use_native_reader_v3 = 0; +(1) +\N +(3) +-- Parquet V3 native reader single (not yet supported) +SELECT c0 FROM file(currentDatabase() || '_04065_single.parquet', 'Parquet', 'c0 Nullable(Tuple(UInt32))') SETTINGS input_format_parquet_use_native_reader_v3 = 1; -- { serverError TYPE_MISMATCH } +DROP TABLE test_nullable_tuple_single; +-- Deeply nested: nullable tuple inside nullable tuple +DROP TABLE IF EXISTS test_nullable_tuple_deep; +CREATE TABLE test_nullable_tuple_deep (c0 Nullable(Tuple(Nullable(Tuple(UInt32, String)), UInt64))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_deep VALUES (((1, 'a'), 10)), (NULL), ((NULL, 20)), (((4, 'd'), 40)); +-- Parquet Arrow reader deep nested +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_deep.parquet', 'Parquet') SELECT c0 FROM test_nullable_tuple_deep; +SELECT c0 FROM file(currentDatabase() || '_04065_deep.parquet', 'Parquet', 'c0 Nullable(Tuple(Nullable(Tuple(UInt32, String)), UInt64))') SETTINGS input_format_parquet_use_native_reader_v3 = 0; +((1,'a'),10) +\N +(NULL,20) +((4,'d'),40) +-- Parquet V3 native reader deep nested (not yet supported) +SELECT c0 FROM file(currentDatabase() || '_04065_deep.parquet', 'Parquet', 'c0 Nullable(Tuple(Nullable(Tuple(UInt32, String)), UInt64))') SETTINGS input_format_parquet_use_native_reader_v3 = 1; -- { serverError TYPE_MISMATCH } +DROP TABLE test_nullable_tuple_deep; +-- Nullable tuple with Array element +DROP TABLE IF EXISTS test_nullable_tuple_arr; +CREATE TABLE test_nullable_tuple_arr (c0 Nullable(Tuple(Array(UInt32), String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_arr VALUES (([1, 2], 'a')), (NULL), (([3], 'c')); +-- Parquet Arrow reader array elem +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_arr.parquet', 'Parquet') SELECT c0 FROM test_nullable_tuple_arr; +SELECT c0 FROM file(currentDatabase() || '_04065_arr.parquet', 'Parquet', 'c0 Nullable(Tuple(Array(UInt32), String))') SETTINGS input_format_parquet_use_native_reader_v3 = 0; +([1,2],'a') +\N +([3],'c') +-- Parquet V3 native reader array elem (not yet supported) +SELECT c0 FROM file(currentDatabase() || '_04065_arr.parquet', 'Parquet', 'c0 Nullable(Tuple(Array(UInt32), String))') SETTINGS input_format_parquet_use_native_reader_v3 = 1; -- { serverError TYPE_MISMATCH } +DROP TABLE test_nullable_tuple_arr; +-- Multiple nullable tuple columns +DROP TABLE IF EXISTS test_nullable_tuple_multi; +CREATE TABLE test_nullable_tuple_multi (c0 Nullable(Tuple(UInt32, String)), c1 Nullable(Tuple(Float64))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_multi VALUES ((1, 'a'), (1.5)), (NULL, (2.5)), ((3, 'c'), NULL); +-- Parquet Arrow reader multi col +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_multi.parquet', 'Parquet') SELECT c0, c1 FROM test_nullable_tuple_multi; +SELECT c0, c1 FROM file(currentDatabase() || '_04065_multi.parquet', 'Parquet', 'c0 Nullable(Tuple(UInt32, String)), c1 Nullable(Tuple(Float64))') SETTINGS input_format_parquet_use_native_reader_v3 = 0; +(1,'a') (1.5) +\N (2.5) +(3,'c') \N +-- Parquet V3 native reader multi col (not yet supported) +SELECT c0, c1 FROM file(currentDatabase() || '_04065_multi.parquet', 'Parquet', 'c0 Nullable(Tuple(UInt32, String)), c1 Nullable(Tuple(Float64))') SETTINGS input_format_parquet_use_native_reader_v3 = 1; -- { serverError TYPE_MISMATCH } +DROP TABLE test_nullable_tuple_multi; +-- Schema inference without type hint (works for both readers, but V3 loses struct-level NULL) +DROP TABLE IF EXISTS test_nullable_tuple_infer; +CREATE TABLE test_nullable_tuple_infer (c0 Nullable(Tuple(UInt32, String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_infer VALUES ((1, 'a')), (NULL), ((3, 'c')); +-- Parquet Arrow reader infer +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_infer.parquet', 'Parquet') SELECT c0 FROM test_nullable_tuple_infer; +SELECT c0 FROM file(currentDatabase() || '_04065_infer.parquet', 'Parquet') SETTINGS input_format_parquet_use_native_reader_v3 = 0; +(1,'a') +\N +(3,'c') +DROP TABLE test_nullable_tuple_infer; +-- Type hint mismatch: file has Nullable(Tuple(...)), read as Tuple(...) (strip nullable, NULLs become defaults) +DROP TABLE IF EXISTS test_nullable_tuple_mismatch1; +CREATE TABLE test_nullable_tuple_mismatch1 (c0 Nullable(Tuple(UInt32, String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_mismatch1 VALUES ((1, 'a')), (NULL), ((3, 'c')); +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_mismatch1.parquet', 'Parquet') SELECT c0 FROM test_nullable_tuple_mismatch1; +-- Parquet Arrow reader: read nullable file as non-nullable +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04065_mismatch1.parquet', 'Parquet', 'c0 Tuple(UInt32, String)') SETTINGS input_format_parquet_use_native_reader_v3 = 0; +(1,'a') Tuple(UInt32, String) +(0,'') Tuple(UInt32, String) +(3,'c') Tuple(UInt32, String) +DROP TABLE test_nullable_tuple_mismatch1; +-- Type hint mismatch: file has Tuple(...), read as Nullable(Tuple(...)) (add nullable wrapper) +DROP TABLE IF EXISTS test_nullable_tuple_mismatch2; +CREATE TABLE test_nullable_tuple_mismatch2 (c0 Tuple(UInt32, String)) ENGINE = Memory; +INSERT INTO test_nullable_tuple_mismatch2 VALUES ((1, 'a')), ((2, 'b')); +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_mismatch2.parquet', 'Parquet') SELECT c0 FROM test_nullable_tuple_mismatch2; +-- Parquet Arrow reader: read non-nullable file as nullable +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04065_mismatch2.parquet', 'Parquet', 'c0 Nullable(Tuple(UInt32, String))') SETTINGS input_format_parquet_use_native_reader_v3 = 0; +(1,'a') Nullable(Tuple(UInt32, String)) +(2,'b') Nullable(Tuple(UInt32, String)) +-- Parquet V3 native reader: read non-nullable file as nullable (not yet supported) +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04065_mismatch2.parquet', 'Parquet', 'c0 Nullable(Tuple(UInt32, String))') SETTINGS input_format_parquet_use_native_reader_v3 = 1; -- { serverError TYPE_MISMATCH } +DROP TABLE test_nullable_tuple_mismatch2; +-- Schema inference: inferred type with toTypeName +DROP TABLE IF EXISTS test_nullable_tuple_describe; +CREATE TABLE test_nullable_tuple_describe (c0 Nullable(Tuple(UInt32, String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_describe VALUES ((1, 'a')), (NULL), ((3, 'c')); +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_describe.parquet', 'Parquet') SELECT c0 FROM test_nullable_tuple_describe; +-- Parquet Arrow reader: inferred type +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04065_describe.parquet', 'Parquet') SETTINGS input_format_parquet_use_native_reader_v3 = 0; +(1,'a') Nullable(Tuple(`1` UInt32, `2` String)) +\N Nullable(Tuple(`1` UInt32, `2` String)) +(3,'c') Nullable(Tuple(`1` UInt32, `2` String)) +-- Parquet V3 native reader: inferred type (struct-level NULL not supported, becomes (NULL,NULL)) +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04065_describe.parquet', 'Parquet') SETTINGS input_format_parquet_use_native_reader_v3 = 1; +(1,'a') Tuple(\n `1` Nullable(UInt32),\n `2` Nullable(String)) +(NULL,NULL) Tuple(\n `1` Nullable(UInt32),\n `2` Nullable(String)) +(3,'c') Tuple(\n `1` Nullable(UInt32),\n `2` Nullable(String)) +DROP TABLE test_nullable_tuple_describe; +-- Array(Nullable(Tuple)) flattened via import_nested: struct-level NULLs should propagate to elements +DROP TABLE IF EXISTS test_nullable_tuple_import_nested; +CREATE TABLE test_nullable_tuple_import_nested (c0 Array(Nullable(Tuple(a UInt32, b String)))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_import_nested VALUES ([(1, 'a'), NULL, (3, 'c')]); +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_import_nested.parquet', 'Parquet') SELECT c0 FROM test_nullable_tuple_import_nested; +-- Parquet Arrow reader import_nested +SELECT * FROM file(currentDatabase() || '_04065_import_nested.parquet', 'Parquet', '`c0.a` Array(Nullable(UInt32)), `c0.b` Array(Nullable(String))') SETTINGS input_format_parquet_use_native_reader_v3 = 0, input_format_parquet_import_nested = 1; +[1,NULL,3] ['a',NULL,'c'] +-- Parquet V3 native reader import_nested +-- This works because V3 reader sees the already-flattened column names (c0.a, c0.b), not the Nullable(Tuple(...)) +SELECT * FROM file(currentDatabase() || '_04065_import_nested.parquet', 'Parquet', '`c0.a` Array(Nullable(UInt32)), `c0.b` Array(Nullable(String))') SETTINGS input_format_parquet_use_native_reader_v3 = 1, input_format_parquet_import_nested = 1; +[1,NULL,3] ['a',NULL,'c'] +DROP TABLE test_nullable_tuple_import_nested; +-- Array(Nullable(Tuple)) without named elements: round-trip as a single column, no flattening +DROP TABLE IF EXISTS test_nullable_tuple_arr_unnamed; +CREATE TABLE test_nullable_tuple_arr_unnamed (c0 Array(Nullable(Tuple(UInt32, String)))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_arr_unnamed VALUES ([(1, 'a'), NULL, (3, 'c')]); +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_arr_unnamed.parquet', 'Parquet') SELECT c0 FROM test_nullable_tuple_arr_unnamed; +-- Parquet Arrow reader unnamed +SELECT c0 FROM file(currentDatabase() || '_04065_arr_unnamed.parquet', 'Parquet', 'c0 Array(Nullable(Tuple(UInt32, String)))') SETTINGS input_format_parquet_use_native_reader_v3 = 0; +[(1,'a'),NULL,(3,'c')] +-- Parquet V3 native reader unnamed (not yet supported) +SELECT c0 FROM file(currentDatabase() || '_04065_arr_unnamed.parquet', 'Parquet', 'c0 Array(Nullable(Tuple(UInt32, String)))') SETTINGS input_format_parquet_use_native_reader_v3 = 1; -- { serverError TYPE_MISMATCH } +DROP TABLE test_nullable_tuple_arr_unnamed; +-- Array(Nullable(Tuple)) with Array element inside: import_nested flattens, Array defaults to [] at null positions +DROP TABLE IF EXISTS test_nullable_tuple_arr_nested_elem; +CREATE TABLE test_nullable_tuple_arr_nested_elem (c0 Array(Nullable(Tuple(a UInt32, b Array(UInt32))))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_arr_nested_elem VALUES ([(1, [10, 20]), NULL, (3, [30])]); +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_arr_nested_elem.parquet', 'Parquet') SELECT c0 FROM test_nullable_tuple_arr_nested_elem; +-- Parquet Arrow reader import_nested: scalar becomes Nullable, Array defaults to [] at null struct positions +SELECT * FROM file(currentDatabase() || '_04065_arr_nested_elem.parquet', 'Parquet', '`c0.a` Array(Nullable(UInt32)), `c0.b` Array(Array(UInt32))') SETTINGS input_format_parquet_use_native_reader_v3 = 0, input_format_parquet_import_nested = 1; +[1,NULL,3] [[10,20],[],[30]] +-- Parquet V3 native reader import_nested +SELECT * FROM file(currentDatabase() || '_04065_arr_nested_elem.parquet', 'Parquet', '`c0.a` Array(Nullable(UInt32)), `c0.b` Array(Array(UInt32))') SETTINGS input_format_parquet_use_native_reader_v3 = 1, input_format_parquet_import_nested = 1; +[1,NULL,3] [[10,20],[],[30]] +DROP TABLE test_nullable_tuple_arr_nested_elem; +-- LowCardinality(Nullable(String)) hint with no physical nulls in the file: the reader must still wrap the column as nullable +DROP TABLE IF EXISTS test_nullable_tuple_lc_string; +CREATE TABLE test_nullable_tuple_lc_string (c0 String) ENGINE = Memory; +INSERT INTO test_nullable_tuple_lc_string VALUES ('hello'), ('world'); +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_lc_str.parquet', 'Parquet') SELECT c0 FROM test_nullable_tuple_lc_string; +-- Parquet Arrow reader: no physical nulls, LowCardinality(Nullable(String)) hint +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04065_lc_str.parquet', 'Parquet', 'c0 LowCardinality(Nullable(String))') SETTINGS input_format_parquet_use_native_reader_v3 = 0; +hello LowCardinality(Nullable(String)) +world LowCardinality(Nullable(String)) +-- Parquet V3 native reader: no physical nulls, LowCardinality(Nullable(String)) hint +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04065_lc_str.parquet', 'Parquet', 'c0 LowCardinality(Nullable(String))') SETTINGS input_format_parquet_use_native_reader_v3 = 1; +hello LowCardinality(Nullable(String)) +world LowCardinality(Nullable(String)) +DROP TABLE test_nullable_tuple_lc_string; diff --git a/tests/queries/0_stateless/04065_tuple_inside_nullable_parquet_roundtrip.sql b/tests/queries/0_stateless/04065_tuple_inside_nullable_parquet_roundtrip.sql new file mode 100644 index 000000000000..048970dfe0a4 --- /dev/null +++ b/tests/queries/0_stateless/04065_tuple_inside_nullable_parquet_roundtrip.sql @@ -0,0 +1,275 @@ +-- Tags: no-fasttest +-- no-fasttest: Parquet format is not available in fasttest builds + +-- { echo } + +SET allow_experimental_nullable_tuple_type = 1; +SET engine_file_truncate_on_insert = 1; + +-- Nullable struct with non-nullable elements +DROP TABLE IF EXISTS test_nullable_tuple_basic; +CREATE TABLE test_nullable_tuple_basic (c0 Nullable(Tuple(UInt32, String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_basic VALUES ((1, 'a')), (NULL), ((3, 'c')); + +-- Parquet Arrow reader +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065.parquet', 'Parquet', 'c0 Nullable(Tuple(UInt32, String))') SELECT c0 FROM test_nullable_tuple_basic; +SELECT c0 FROM file(currentDatabase() || '_04065.parquet', 'Parquet', 'c0 Nullable(Tuple(UInt32, String))') SETTINGS input_format_parquet_use_native_reader_v3 = 0; + +-- Parquet V3 native reader (not yet supported) +SELECT c0 FROM file(currentDatabase() || '_04065.parquet', 'Parquet', 'c0 Nullable(Tuple(UInt32, String))') SETTINGS input_format_parquet_use_native_reader_v3 = 1; -- { serverError TYPE_MISMATCH } + +DROP TABLE test_nullable_tuple_basic; + +-- Both struct and element nullable: Nullable(Tuple(Nullable(UInt32), String)) +DROP TABLE IF EXISTS test_nullable_tuple_both; +CREATE TABLE test_nullable_tuple_both (c0 Nullable(Tuple(Nullable(UInt32), String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_both VALUES ((1, 'a')), (NULL), ((NULL, 'c')), ((4, 'd')); + +-- Parquet Arrow reader both nullable +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_both.parquet', 'Parquet') SELECT c0 FROM test_nullable_tuple_both; +SELECT c0 FROM file(currentDatabase() || '_04065_both.parquet', 'Parquet', 'c0 Nullable(Tuple(Nullable(UInt32), String))') SETTINGS input_format_parquet_use_native_reader_v3 = 0; + +-- Parquet V3 native reader (not yet supported) +SELECT c0 FROM file(currentDatabase() || '_04065_both.parquet', 'Parquet', 'c0 Nullable(Tuple(Nullable(UInt32), String))') SETTINGS input_format_parquet_use_native_reader_v3 = 1; -- { serverError TYPE_MISMATCH } + +DROP TABLE test_nullable_tuple_both; + +-- Non-nullable struct with nullable elements +DROP TABLE IF EXISTS test_nullable_tuple_elem; +CREATE TABLE test_nullable_tuple_elem (c0 Tuple(Nullable(UInt32), String)) ENGINE = Memory; +INSERT INTO test_nullable_tuple_elem VALUES ((1, 'a')), ((NULL, 'b')); + +-- Parquet Arrow reader nullable elements +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_elem.parquet', 'Parquet') SELECT c0 FROM test_nullable_tuple_elem; +SELECT c0 FROM file(currentDatabase() || '_04065_elem.parquet', 'Parquet', 'c0 Tuple(Nullable(UInt32), String)') SETTINGS input_format_parquet_use_native_reader_v3 = 0; + +-- Parquet V3 native reader nullable elements +SELECT c0 FROM file(currentDatabase() || '_04065_elem.parquet', 'Parquet', 'c0 Tuple(Nullable(UInt32), String)') SETTINGS input_format_parquet_use_native_reader_v3 = 1; + +DROP TABLE test_nullable_tuple_elem; + +-- Plain non-nullable tuple +DROP TABLE IF EXISTS test_nullable_tuple_plain; +CREATE TABLE test_nullable_tuple_plain (c0 Tuple(UInt32, String)) ENGINE = Memory; +INSERT INTO test_nullable_tuple_plain VALUES ((1, 'a')), ((2, 'b')); + +-- Parquet Arrow reader plain +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_plain.parquet', 'Parquet') SELECT c0 FROM test_nullable_tuple_plain; +SELECT c0 FROM file(currentDatabase() || '_04065_plain.parquet', 'Parquet', 'c0 Tuple(UInt32, String)') SETTINGS input_format_parquet_use_native_reader_v3 = 0; + +-- Parquet V3 native reader plain +SELECT c0 FROM file(currentDatabase() || '_04065_plain.parquet', 'Parquet', 'c0 Tuple(UInt32, String)') SETTINGS input_format_parquet_use_native_reader_v3 = 1; + +DROP TABLE test_nullable_tuple_plain; + +-- Named tuple +DROP TABLE IF EXISTS test_nullable_tuple_named; +CREATE TABLE test_nullable_tuple_named (c0 Nullable(Tuple(a UInt32, b String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_named VALUES ((1, 'x')), (NULL), ((3, 'z')); + +-- Parquet Arrow reader named +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_named.parquet', 'Parquet', 'c0 Nullable(Tuple(a UInt32, b String))') SELECT c0 FROM test_nullable_tuple_named; +SELECT c0 FROM file(currentDatabase() || '_04065_named.parquet', 'Parquet', 'c0 Nullable(Tuple(a UInt32, b String))') SETTINGS input_format_parquet_use_native_reader_v3 = 0; + +-- Parquet V3 native reader named (not yet supported) +SELECT c0 FROM file(currentDatabase() || '_04065_named.parquet', 'Parquet', 'c0 Nullable(Tuple(a UInt32, b String))') SETTINGS input_format_parquet_use_native_reader_v3 = 1; -- { serverError TYPE_MISMATCH } + +DROP TABLE test_nullable_tuple_named; + +-- All-NULL column +DROP TABLE IF EXISTS test_nullable_tuple_allnull; +CREATE TABLE test_nullable_tuple_allnull (c0 Nullable(Tuple(UInt32, String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_allnull VALUES (NULL), (NULL), (NULL); + +-- Parquet Arrow reader all null +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_allnull.parquet', 'Parquet', 'c0 Nullable(Tuple(UInt32, String))') SELECT c0 FROM test_nullable_tuple_allnull; +SELECT c0 FROM file(currentDatabase() || '_04065_allnull.parquet', 'Parquet', 'c0 Nullable(Tuple(UInt32, String))') SETTINGS input_format_parquet_use_native_reader_v3 = 0; + +-- Parquet V3 native reader all null (not yet supported) +SELECT c0 FROM file(currentDatabase() || '_04065_allnull.parquet', 'Parquet', 'c0 Nullable(Tuple(UInt32, String))') SETTINGS input_format_parquet_use_native_reader_v3 = 1; -- { serverError TYPE_MISMATCH } + +DROP TABLE test_nullable_tuple_allnull; + +-- No-NULL column (nullable type, zero actual NULLs) +DROP TABLE IF EXISTS test_nullable_tuple_nonull; +CREATE TABLE test_nullable_tuple_nonull (c0 Nullable(Tuple(UInt32, String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_nonull VALUES ((1, 'a')), ((2, 'b')), ((3, 'c')); + +-- Parquet Arrow reader no null +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_nonull.parquet', 'Parquet', 'c0 Nullable(Tuple(UInt32, String))') SELECT c0 FROM test_nullable_tuple_nonull; +SELECT c0 FROM file(currentDatabase() || '_04065_nonull.parquet', 'Parquet', 'c0 Nullable(Tuple(UInt32, String))') SETTINGS input_format_parquet_use_native_reader_v3 = 0; + +-- Parquet V3 native reader no null (not yet supported) +SELECT c0 FROM file(currentDatabase() || '_04065_nonull.parquet', 'Parquet', 'c0 Nullable(Tuple(UInt32, String))') SETTINGS input_format_parquet_use_native_reader_v3 = 1; -- { serverError TYPE_MISMATCH } + +DROP TABLE test_nullable_tuple_nonull; + +-- Single-element tuple +DROP TABLE IF EXISTS test_nullable_tuple_single; +CREATE TABLE test_nullable_tuple_single (c0 Nullable(Tuple(UInt32))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_single VALUES ((1,)), (NULL), ((3,)); + +-- Parquet Arrow reader single +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_single.parquet', 'Parquet', 'c0 Nullable(Tuple(UInt32))') SELECT c0 FROM test_nullable_tuple_single; +SELECT c0 FROM file(currentDatabase() || '_04065_single.parquet', 'Parquet', 'c0 Nullable(Tuple(UInt32))') SETTINGS input_format_parquet_use_native_reader_v3 = 0; + +-- Parquet V3 native reader single (not yet supported) +SELECT c0 FROM file(currentDatabase() || '_04065_single.parquet', 'Parquet', 'c0 Nullable(Tuple(UInt32))') SETTINGS input_format_parquet_use_native_reader_v3 = 1; -- { serverError TYPE_MISMATCH } + +DROP TABLE test_nullable_tuple_single; + +-- Deeply nested: nullable tuple inside nullable tuple +DROP TABLE IF EXISTS test_nullable_tuple_deep; +CREATE TABLE test_nullable_tuple_deep (c0 Nullable(Tuple(Nullable(Tuple(UInt32, String)), UInt64))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_deep VALUES (((1, 'a'), 10)), (NULL), ((NULL, 20)), (((4, 'd'), 40)); + +-- Parquet Arrow reader deep nested +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_deep.parquet', 'Parquet') SELECT c0 FROM test_nullable_tuple_deep; +SELECT c0 FROM file(currentDatabase() || '_04065_deep.parquet', 'Parquet', 'c0 Nullable(Tuple(Nullable(Tuple(UInt32, String)), UInt64))') SETTINGS input_format_parquet_use_native_reader_v3 = 0; + +-- Parquet V3 native reader deep nested (not yet supported) +SELECT c0 FROM file(currentDatabase() || '_04065_deep.parquet', 'Parquet', 'c0 Nullable(Tuple(Nullable(Tuple(UInt32, String)), UInt64))') SETTINGS input_format_parquet_use_native_reader_v3 = 1; -- { serverError TYPE_MISMATCH } + +DROP TABLE test_nullable_tuple_deep; + +-- Nullable tuple with Array element +DROP TABLE IF EXISTS test_nullable_tuple_arr; +CREATE TABLE test_nullable_tuple_arr (c0 Nullable(Tuple(Array(UInt32), String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_arr VALUES (([1, 2], 'a')), (NULL), (([3], 'c')); + +-- Parquet Arrow reader array elem +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_arr.parquet', 'Parquet') SELECT c0 FROM test_nullable_tuple_arr; +SELECT c0 FROM file(currentDatabase() || '_04065_arr.parquet', 'Parquet', 'c0 Nullable(Tuple(Array(UInt32), String))') SETTINGS input_format_parquet_use_native_reader_v3 = 0; + +-- Parquet V3 native reader array elem (not yet supported) +SELECT c0 FROM file(currentDatabase() || '_04065_arr.parquet', 'Parquet', 'c0 Nullable(Tuple(Array(UInt32), String))') SETTINGS input_format_parquet_use_native_reader_v3 = 1; -- { serverError TYPE_MISMATCH } + +DROP TABLE test_nullable_tuple_arr; + +-- Multiple nullable tuple columns +DROP TABLE IF EXISTS test_nullable_tuple_multi; +CREATE TABLE test_nullable_tuple_multi (c0 Nullable(Tuple(UInt32, String)), c1 Nullable(Tuple(Float64))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_multi VALUES ((1, 'a'), (1.5)), (NULL, (2.5)), ((3, 'c'), NULL); + +-- Parquet Arrow reader multi col +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_multi.parquet', 'Parquet') SELECT c0, c1 FROM test_nullable_tuple_multi; +SELECT c0, c1 FROM file(currentDatabase() || '_04065_multi.parquet', 'Parquet', 'c0 Nullable(Tuple(UInt32, String)), c1 Nullable(Tuple(Float64))') SETTINGS input_format_parquet_use_native_reader_v3 = 0; + +-- Parquet V3 native reader multi col (not yet supported) +SELECT c0, c1 FROM file(currentDatabase() || '_04065_multi.parquet', 'Parquet', 'c0 Nullable(Tuple(UInt32, String)), c1 Nullable(Tuple(Float64))') SETTINGS input_format_parquet_use_native_reader_v3 = 1; -- { serverError TYPE_MISMATCH } + +DROP TABLE test_nullable_tuple_multi; + +-- Schema inference without type hint (works for both readers, but V3 loses struct-level NULL) +DROP TABLE IF EXISTS test_nullable_tuple_infer; +CREATE TABLE test_nullable_tuple_infer (c0 Nullable(Tuple(UInt32, String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_infer VALUES ((1, 'a')), (NULL), ((3, 'c')); + +-- Parquet Arrow reader infer +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_infer.parquet', 'Parquet') SELECT c0 FROM test_nullable_tuple_infer; +SELECT c0 FROM file(currentDatabase() || '_04065_infer.parquet', 'Parquet') SETTINGS input_format_parquet_use_native_reader_v3 = 0; + +DROP TABLE test_nullable_tuple_infer; + +-- Type hint mismatch: file has Nullable(Tuple(...)), read as Tuple(...) (strip nullable, NULLs become defaults) +DROP TABLE IF EXISTS test_nullable_tuple_mismatch1; +CREATE TABLE test_nullable_tuple_mismatch1 (c0 Nullable(Tuple(UInt32, String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_mismatch1 VALUES ((1, 'a')), (NULL), ((3, 'c')); + +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_mismatch1.parquet', 'Parquet') SELECT c0 FROM test_nullable_tuple_mismatch1; + +-- Parquet Arrow reader: read nullable file as non-nullable +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04065_mismatch1.parquet', 'Parquet', 'c0 Tuple(UInt32, String)') SETTINGS input_format_parquet_use_native_reader_v3 = 0; + +DROP TABLE test_nullable_tuple_mismatch1; + +-- Type hint mismatch: file has Tuple(...), read as Nullable(Tuple(...)) (add nullable wrapper) +DROP TABLE IF EXISTS test_nullable_tuple_mismatch2; +CREATE TABLE test_nullable_tuple_mismatch2 (c0 Tuple(UInt32, String)) ENGINE = Memory; +INSERT INTO test_nullable_tuple_mismatch2 VALUES ((1, 'a')), ((2, 'b')); + +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_mismatch2.parquet', 'Parquet') SELECT c0 FROM test_nullable_tuple_mismatch2; + +-- Parquet Arrow reader: read non-nullable file as nullable +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04065_mismatch2.parquet', 'Parquet', 'c0 Nullable(Tuple(UInt32, String))') SETTINGS input_format_parquet_use_native_reader_v3 = 0; + +-- Parquet V3 native reader: read non-nullable file as nullable (not yet supported) +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04065_mismatch2.parquet', 'Parquet', 'c0 Nullable(Tuple(UInt32, String))') SETTINGS input_format_parquet_use_native_reader_v3 = 1; -- { serverError TYPE_MISMATCH } + +DROP TABLE test_nullable_tuple_mismatch2; + +-- Schema inference: inferred type with toTypeName +DROP TABLE IF EXISTS test_nullable_tuple_describe; +CREATE TABLE test_nullable_tuple_describe (c0 Nullable(Tuple(UInt32, String))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_describe VALUES ((1, 'a')), (NULL), ((3, 'c')); + +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_describe.parquet', 'Parquet') SELECT c0 FROM test_nullable_tuple_describe; + +-- Parquet Arrow reader: inferred type +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04065_describe.parquet', 'Parquet') SETTINGS input_format_parquet_use_native_reader_v3 = 0; + +-- Parquet V3 native reader: inferred type (struct-level NULL not supported, becomes (NULL,NULL)) +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04065_describe.parquet', 'Parquet') SETTINGS input_format_parquet_use_native_reader_v3 = 1; + +DROP TABLE test_nullable_tuple_describe; + +-- Array(Nullable(Tuple)) flattened via import_nested: struct-level NULLs should propagate to elements +DROP TABLE IF EXISTS test_nullable_tuple_import_nested; +CREATE TABLE test_nullable_tuple_import_nested (c0 Array(Nullable(Tuple(a UInt32, b String)))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_import_nested VALUES ([(1, 'a'), NULL, (3, 'c')]); + +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_import_nested.parquet', 'Parquet') SELECT c0 FROM test_nullable_tuple_import_nested; + +-- Parquet Arrow reader import_nested +SELECT * FROM file(currentDatabase() || '_04065_import_nested.parquet', 'Parquet', '`c0.a` Array(Nullable(UInt32)), `c0.b` Array(Nullable(String))') SETTINGS input_format_parquet_use_native_reader_v3 = 0, input_format_parquet_import_nested = 1; + +-- Parquet V3 native reader import_nested +-- This works because V3 reader sees the already-flattened column names (c0.a, c0.b), not the Nullable(Tuple(...)) +SELECT * FROM file(currentDatabase() || '_04065_import_nested.parquet', 'Parquet', '`c0.a` Array(Nullable(UInt32)), `c0.b` Array(Nullable(String))') SETTINGS input_format_parquet_use_native_reader_v3 = 1, input_format_parquet_import_nested = 1; + +DROP TABLE test_nullable_tuple_import_nested; + +-- Array(Nullable(Tuple)) without named elements: round-trip as a single column, no flattening +DROP TABLE IF EXISTS test_nullable_tuple_arr_unnamed; +CREATE TABLE test_nullable_tuple_arr_unnamed (c0 Array(Nullable(Tuple(UInt32, String)))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_arr_unnamed VALUES ([(1, 'a'), NULL, (3, 'c')]); + +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_arr_unnamed.parquet', 'Parquet') SELECT c0 FROM test_nullable_tuple_arr_unnamed; + +-- Parquet Arrow reader unnamed +SELECT c0 FROM file(currentDatabase() || '_04065_arr_unnamed.parquet', 'Parquet', 'c0 Array(Nullable(Tuple(UInt32, String)))') SETTINGS input_format_parquet_use_native_reader_v3 = 0; + +-- Parquet V3 native reader unnamed (not yet supported) +SELECT c0 FROM file(currentDatabase() || '_04065_arr_unnamed.parquet', 'Parquet', 'c0 Array(Nullable(Tuple(UInt32, String)))') SETTINGS input_format_parquet_use_native_reader_v3 = 1; -- { serverError TYPE_MISMATCH } + +DROP TABLE test_nullable_tuple_arr_unnamed; + +-- Array(Nullable(Tuple)) with Array element inside: import_nested flattens, Array defaults to [] at null positions +DROP TABLE IF EXISTS test_nullable_tuple_arr_nested_elem; +CREATE TABLE test_nullable_tuple_arr_nested_elem (c0 Array(Nullable(Tuple(a UInt32, b Array(UInt32))))) ENGINE = Memory; +INSERT INTO test_nullable_tuple_arr_nested_elem VALUES ([(1, [10, 20]), NULL, (3, [30])]); + +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_arr_nested_elem.parquet', 'Parquet') SELECT c0 FROM test_nullable_tuple_arr_nested_elem; + +-- Parquet Arrow reader import_nested: scalar becomes Nullable, Array defaults to [] at null struct positions +SELECT * FROM file(currentDatabase() || '_04065_arr_nested_elem.parquet', 'Parquet', '`c0.a` Array(Nullable(UInt32)), `c0.b` Array(Array(UInt32))') SETTINGS input_format_parquet_use_native_reader_v3 = 0, input_format_parquet_import_nested = 1; + +-- Parquet V3 native reader import_nested +SELECT * FROM file(currentDatabase() || '_04065_arr_nested_elem.parquet', 'Parquet', '`c0.a` Array(Nullable(UInt32)), `c0.b` Array(Array(UInt32))') SETTINGS input_format_parquet_use_native_reader_v3 = 1, input_format_parquet_import_nested = 1; + +DROP TABLE test_nullable_tuple_arr_nested_elem; + +-- LowCardinality(Nullable(String)) hint with no physical nulls in the file: the reader must still wrap the column as nullable +DROP TABLE IF EXISTS test_nullable_tuple_lc_string; +CREATE TABLE test_nullable_tuple_lc_string (c0 String) ENGINE = Memory; +INSERT INTO test_nullable_tuple_lc_string VALUES ('hello'), ('world'); + +INSERT INTO TABLE FUNCTION file(currentDatabase() || '_04065_lc_str.parquet', 'Parquet') SELECT c0 FROM test_nullable_tuple_lc_string; + +-- Parquet Arrow reader: no physical nulls, LowCardinality(Nullable(String)) hint +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04065_lc_str.parquet', 'Parquet', 'c0 LowCardinality(Nullable(String))') SETTINGS input_format_parquet_use_native_reader_v3 = 0; + +-- Parquet V3 native reader: no physical nulls, LowCardinality(Nullable(String)) hint +SELECT c0, toTypeName(c0) FROM file(currentDatabase() || '_04065_lc_str.parquet', 'Parquet', 'c0 LowCardinality(Nullable(String))') SETTINGS input_format_parquet_use_native_reader_v3 = 1; + +DROP TABLE test_nullable_tuple_lc_string; From 973f0110da208e542c024ef86c287c3a7b89bebd Mon Sep 17 00:00:00 2001 From: Andrey Zvonov Date: Fri, 15 May 2026 21:29:11 +0200 Subject: [PATCH 2/5] Resolve conflicts in cherry-pick of #101272 --- src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 7a41c4901969..ec0aada93897 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -1425,10 +1425,6 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( arrow_column->type()->id() != arrow::Type::LARGE_LIST && arrow_column->type()->id() != arrow::Type::FIXED_SIZE_LIST && arrow_column->type()->id() != arrow::Type::MAP && -<<<<<<< HEAD - arrow_column->type()->id() != arrow::Type::STRUCT && -======= ->>>>>>> fc17de3cb80 (Merge pull request #101272 from nihalzp/support-arrow-orc-nullable-tuple) arrow_column->type()->id() != arrow::Type::DICTIONARY) { DataTypePtr nested_type_hint; From 8b48e5d321da048403af0c997d115fd308815fb6 Mon Sep 17 00:00:00 2001 From: Andrey Zvonov Date: Sun, 17 May 2026 19:14:08 +0200 Subject: [PATCH 3/5] Fix CI: support `Nullable(Tuple)` in `MsgPackRowInputFormat` The cherry-pick of upstream PR #101272 added test `04019_formats_nullable_empty_tuple_roundtrip` (which exercises `MsgPack` with `Nullable(Tuple())`) but missed the matching change to `MsgPackVisitor::start_array` from upstream commit acf78216b6c. As a result the `SELECT` half of the `MsgPack` roundtrip throws `ILLEGAL_COLUMN: Cannot insert MessagePack array into column with type Nullable(Tuple())`. Apply the upstream fix verbatim: unwrap the `Nullable` before dispatching on `isTuple`, mark the row non-null in the null map, and descend into the inner `ColumnTuple`. Addresses 1 failing test in Stateless tests (amd_asan, distributed plan, parallel, 1/4) on https://github.com/Altinity/ClickHouse/pull/1802. Failure report: https://altinity-build-artifacts.s3.amazonaws.com/json.html?PR=1802&sha=4dccbce9dfb174c622d7a6cd95a7aba252e20e9f&name_0=PR&name_1=Stateless%20tests%20%28amd_asan%2C%20distributed%20plan%2C%20parallel%2C%201%2F4%29 Co-Authored-By: Claude Opus 4.7 (1M context) --- .../Formats/Impl/MsgPackRowInputFormat.cpp | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp index 0823edfcf1d7..a8ac9a28a2b4 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp @@ -398,14 +398,26 @@ bool MsgPackVisitor::start_array(size_t size) // NOLINT if (size > 0) info_stack.push(Info{nested_column, nested_type, false, size, nullptr}); } - else if (isTuple(info_stack.top().type)) + else if (isTuple(removeNullable(info_stack.top().type))) { - const auto & tuple_type = assert_cast(*info_stack.top().type); + /// If the type is Nullable, reaching start_array means the value + /// is non-null (for nulls, the parser calls visit_nil instead). + /// So we can safely unwrap the Nullable to work with the inner + /// ColumnTuple directly. + IColumn * column_ptr = &info_stack.top().column; + if (info_stack.top().type->isNullable()) + { + auto & nullable_column = assert_cast(*column_ptr); + nullable_column.getNullMapColumn().insertValue(0); + column_ptr = &nullable_column.getNestedColumn(); + } + + const auto & tuple_type = assert_cast(*removeNullable(info_stack.top().type)); const auto & nested_types = tuple_type.getElements(); if (size != nested_types.size()) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert MessagePack array with size {} into Tuple column with {} elements", size, nested_types.size()); - ColumnTuple & column_tuple = assert_cast(info_stack.top().column); + ColumnTuple & column_tuple = assert_cast(*column_ptr); /// Push nested columns into stack in reverse order. for (ssize_t i = static_cast(nested_types.size()) - 1; i >= 0; --i) info_stack.push(Info{column_tuple.getColumn(i), nested_types[i], true, std::nullopt, nullptr}); From 36ae78193451a1aaf81d238b7b3bcb25fab3ee3d Mon Sep 17 00:00:00 2001 From: Andrey Zvonov Date: Tue, 19 May 2026 12:21:34 +0200 Subject: [PATCH 4/5] Fix CI: support `Nullable(Tuple)` in `CHColumnToArrowColumn` The cherry-pick of upstream PR #101272 added tests `04019_formats_nullable_empty_tuple_roundtrip` and `04064_tuple_inside_nullable_arrow_orc_roundtrip` (which exercise `Arrow`/`ArrowStream` roundtrip of `Nullable(Tuple(...))`) but missed the matching change to `fillArrowArrayWithTupleColumnData` from upstream commit `1e5b4344275` ("Add `Nullable(Tuple)` Arrow writing support"). As a result, the Arrow writer ignored the struct-level null bytemap: `NULL` rows roundtripped as default-valued tuples (`()` instead of `\N`, `((0,''),0)` instead of `((NULL,20))`, etc.). Apply the upstream fix verbatim: pass `nullptr` for the children's null bytemap (struct-level and child-level nulls are independent in Arrow) and use `builder.AppendNull` for null struct rows. Addresses failing tests in `Stateless tests (amd_asan, distributed plan, parallel, 1/4)`, `Stateless tests (amd_asan, distributed plan, parallel, 3/4)`, `Stateless tests (amd_debug, parallel)`, and `Stateless tests (arm_binary, parallel)` on https://github.com/Altinity/ClickHouse/pull/1802. Failure report: https://altinity-build-artifacts.s3.amazonaws.com/json.html?PR=1802&sha=e77fa9f7de8f967c20e9e55efa8be5ac4e5bb482&name_0=PR Co-Authored-By: Claude Opus 4.7 (1M context) --- src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp index 5acfa96721c6..c9306bafca1a 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp @@ -300,9 +300,12 @@ namespace DB for (size_t i = 0; i != column_tuple->tupleSize(); ++i) { ColumnPtr nested_column = column_tuple->getColumnPtr(i); + /// Do not propagate the struct-level null_bytemap to child fields. + /// In Arrow, struct-level nulls and child-level nulls are independent; + /// child values at null struct positions are undefined. fillArrowArray( column_name + "." + nested_names[i], - nested_column, nested_types[i], null_bytemap, + nested_column, nested_types[i], nullptr, builder.field_builder(static_cast(i)), format_name, start, end, @@ -312,7 +315,7 @@ namespace DB for (size_t i = start; i != end; ++i) { - auto status = builder.Append(); + auto status = (null_bytemap && (*null_bytemap)[i]) ? builder.AppendNull() : builder.Append(); checkStatus(status, column->getName(), format_name); } } From ec4b12420c50d755d150c35ff69ae1aedb58d72b Mon Sep 17 00:00:00 2001 From: Andrey Zvonov Date: Tue, 19 May 2026 15:24:43 +0200 Subject: [PATCH 5/5] Fix CI: support `Nullable(Tuple())` CSV roundtrip `CSVFormatReader::readField` treats an empty unquoted field as the default value when `input_format_csv_empty_as_default` is enabled (the default). For a `Nullable(Tuple())` column the default is `NULL`, so the empty-tuple values written by `SerializationTuple::serializeTextCSV` (which produces an empty field for a tuple with zero elements) are read back as `NULL` instead of `()`. This breaks the `04019_formats_nullable_empty_tuple_roundtrip` test added in the cherry-pick of upstream PR #101272. The fix is the same as upstream commit `29f6f23cafe` from upstream PR \#100038 ("Fix `Nullable(Tuple)` not working with `CSV`, `MsgPack` format properly"), which #101272 depends on: special-case `Nullable(Tuple())` (zero-element tuple) so the empty field falls through to normal deserialization instead of being replaced with `NULL`. Applied verbatim (diff matches upstream byte-for-byte). Addresses CSV section failure in test `04019_formats_nullable_empty_tuple_roundtrip` on https://github.com/Altinity/ClickHouse/pull/1802. Failure report: https://altinity-build-artifacts.s3.amazonaws.com/json.html?PR=1802&sha=36ae78193451a1aaf81d238b7b3bcb25fab3ee3d&name_0=PR&name_1=Stateless+tests+%28amd_asan%2C+distributed+plan%2C+parallel%2C+1%2F4%29&name_2=Tests Co-Authored-By: Claude Opus 4.7 (1M context) --- .../Formats/Impl/CSVRowInputFormat.cpp | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index d4984a08e99f..87a2e689ba9a 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -15,6 +15,7 @@ #include #include #include +#include namespace DB @@ -377,8 +378,23 @@ bool CSVFormatReader::readField( /// commas, which might be also used as delimiters. However, /// they do not contain empty unquoted fields, so this check /// works for tuples as well. - column.insertDefault(); - return false; + /// + /// Exception: `Nullable(Tuple())` with zero elements serializes to + /// an empty field in CSV, so an empty value is its only valid + /// representation. Let it fall through to normal deserialization + /// instead of inserting NULL as the default. + bool is_nullable_empty_tuple = false; + if (type->isNullable()) + { + if (const auto * tuple_type = typeid_cast(removeNullable(type).get())) + is_nullable_empty_tuple = tuple_type->getElements().empty(); + } + + if (!is_nullable_empty_tuple) + { + column.insertDefault(); + return false; + } } if (format_settings.csv.use_default_on_bad_values)