From be49be12cf00161b966d1cb6b61f3e7a6dab8e67 Mon Sep 17 00:00:00 2001 From: csun5285 Date: Sat, 28 Feb 2026 15:13:47 +0800 Subject: [PATCH] [fix](variant) fix variant column data_serdes not synced after wrapp_array_nullable and ensure_root_node_type When `wrapp_array_nullable()` wraps a ColumnArray with ColumnNullable, it updates `data_types[0]` but not `data_serdes[0]`, leaving a stale serde. Similarly, `ensure_root_node_type()` casts the root column (e.g. INT32 -> JSONB) and updates `data[0]` and `data_types[0]`, but not `data_serdes[0]`. This causes BE crash during MV refresh with row store enabled: when `serialize_text_json()` uses the stale `data_serdes[0]` (e.g. INT32 serde) on the actual column data (e.g. JSONB ColumnString), `assert_cast` fails. Fix: sync `data_serdes[0]` in both `wrapp_array_nullable()` and `ensure_root_node_type()` after updating `data_types[0]`. --- be/src/vec/columns/column_variant.cpp | 2 + .../mv/variant_mv_rowstore_crash.out | 11 +++ .../mv/variant_mv_rowstore_crash.groovy | 93 +++++++++++++++++++ 3 files changed, 106 insertions(+) create mode 100644 regression-test/data/variant_p0/mv/variant_mv_rowstore_crash.out create mode 100644 regression-test/suites/variant_p0/mv/variant_mv_rowstore_crash.groovy diff --git a/be/src/vec/columns/column_variant.cpp b/be/src/vec/columns/column_variant.cpp index 1129196fc93fbb..98a54a092142eb 100644 --- a/be/src/vec/columns/column_variant.cpp +++ b/be/src/vec/columns/column_variant.cpp @@ -1421,6 +1421,7 @@ void ColumnVariant::Subcolumn::wrapp_array_nullable() { } result_column = ColumnNullable::create(std::move(result_column), std::move(new_null_map)); data_types[0] = make_nullable(data_types[0]); + data_serdes[0] = generate_data_serdes(data_types[0], is_root); least_common_type = LeastCommonType {data_types[0], is_root}; } } @@ -2042,6 +2043,7 @@ void ColumnVariant::ensure_root_node_type(const DataTypePtr& expected_root_type) expected_root_type, &casted_column)); root.data[0] = casted_column; root.data_types[0] = expected_root_type; + root.data_serdes[0] = Subcolumn::generate_data_serdes(expected_root_type, true); root.least_common_type = Subcolumn::LeastCommonType {expected_root_type, true}; root.num_rows = casted_column->size(); } diff --git a/regression-test/data/variant_p0/mv/variant_mv_rowstore_crash.out b/regression-test/data/variant_p0/mv/variant_mv_rowstore_crash.out new file mode 100644 index 00000000000000..77086f5235b57b --- /dev/null +++ b/regression-test/data/variant_p0/mv/variant_mv_rowstore_crash.out @@ -0,0 +1,11 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !mv -- +1 1 [{"x":1},{"x":2}] +2 2 [{"x":3}] +3 3 [{"x":4},{"x":5},{"x":6}] + +-- !direct -- +1 [{"x":1},{"x":2}] +2 [{"x":3}] +3 [{"x":4},{"x":5},{"x":6}] + diff --git a/regression-test/suites/variant_p0/mv/variant_mv_rowstore_crash.groovy b/regression-test/suites/variant_p0/mv/variant_mv_rowstore_crash.groovy new file mode 100644 index 00000000000000..e57858e86dfdc0 --- /dev/null +++ b/regression-test/suites/variant_p0/mv/variant_mv_rowstore_crash.groovy @@ -0,0 +1,93 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Regression test for variant column with rowstore MV crash +// Bug: wrapp_array_nullable() updates data_types but not data_serdes, +// and ensure_root_node_type() updates data/data_types but not data_serdes, +// causing assert_cast failure during row store serialization in MV refresh. + +suite("variant_mv_rowstore_crash", "variant_type") { + + def tbl = "var_mv_rs_tbl" + def mv_name = "var_mv_rs_mv" + + sql "DROP MATERIALIZED VIEW IF EXISTS ${mv_name}" + sql "DROP TABLE IF EXISTS ${tbl}" + + // Test 1: MV with variant array extraction + rowstore + sql """ + CREATE TABLE ${tbl} ( + k int, + v variant NULL + ) ENGINE=OLAP + DUPLICATE KEY(k) + DISTRIBUTED BY HASH(k) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1", + "store_row_column" = "true" + ); + """ + + // All rows have non-null array values in a single batch + sql """INSERT INTO ${tbl} VALUES + (1, '{"a":1,"arr":[{"x":1},{"x":2}]}'), + (2, '{"a":2,"arr":[{"x":3}]}'), + (3, '{"a":3,"arr":[{"x":4},{"x":5},{"x":6}]}')""" + + sql "DROP MATERIALIZED VIEW IF EXISTS ${mv_name}" + sql """ + CREATE MATERIALIZED VIEW ${mv_name} + BUILD IMMEDIATE REFRESH AUTO ON MANUAL + DISTRIBUTED BY RANDOM BUCKETS 1 + PROPERTIES ( + "replication_num" = "1", + "store_row_column" = "true" + ) + AS SELECT k, v['a'] AS a, v['arr'] AS arr FROM ${tbl}; + """ + + String db = context.config.getDbNameByFile(context.file) + def job_name = getJobName(db, mv_name) + waitingMTMVTaskFinished(job_name) + + order_qt_mv "SELECT * FROM ${mv_name} ORDER BY k" + + // Test 2: INSERT INTO ... SELECT variant subcolumn into rowstore table + // This ensures variant goes through parse_and_materialize_variant_columns + // with ensure_root_node_type path + def tbl3 = "var_rs_target_tbl" + sql "DROP TABLE IF EXISTS ${tbl3}" + sql """ + CREATE TABLE ${tbl3} ( + k int, + arr variant NULL + ) ENGINE=OLAP + DUPLICATE KEY(k) + DISTRIBUTED BY HASH(k) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1", + "store_row_column" = "true" + ); + """ + // Insert variant subcolumn (array type) into rowstore table + sql """INSERT INTO ${tbl3} SELECT k, v['arr'] FROM ${tbl}""" + order_qt_direct "SELECT * FROM ${tbl3} ORDER BY k" + + sql "DROP MATERIALIZED VIEW IF EXISTS ${mv_name}" + sql "DROP TABLE IF EXISTS ${tbl}" + sql "DROP TABLE IF EXISTS ${tbl3}" +}