From d3746175ccb0a5d6108a5bcd98a548abe111e054 Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Tue, 2 Jun 2026 15:51:38 +0800 Subject: [PATCH 01/22] [refactor](be) Avoid rebinding storage expression column ids ### What problem does this PR solve? Issue Number: N/A Related PR: N/A Problem Summary: Storage expression pushdown previously rebound slot refs to the expanded reader schema by mutating expression column ids inside segment iterators. That violates the assumption that pushed-down expressions are immutable and can produce surprising behavior when the same expression objects are reused. This change keeps the full scan schema unchanged, adds a project schema aligned with the final projected columns, and evaluates pushed-down expressions against a temporary project block. Slot ordinal to tablet column id mapping now uses the project schema, so no expression mutation is required. ### Release note None ### Check List (For Author) - Test: Manual test - ./build.sh --be - Behavior changed: No - Does this need documentation: No --- be/src/storage/iterators.h | 2 + be/src/storage/rowset/beta_rowset_reader.cpp | 3 + be/src/storage/segment/segment_iterator.cpp | 247 +++++++------------ be/src/storage/segment/segment_iterator.h | 5 + 4 files changed, 103 insertions(+), 154 deletions(-) diff --git a/be/src/storage/iterators.h b/be/src/storage/iterators.h index 7c0b78959d79bc..2292a898edd0f8 100644 --- a/be/src/storage/iterators.h +++ b/be/src/storage/iterators.h @@ -128,6 +128,8 @@ class StorageReadOptions { std::vector* read_orderby_key_columns = nullptr; io::IOContext io_ctx; VExprContextSPtrs common_expr_ctxs_push_down; + // Final scan project columns before storage-side read schema expansion. + const std::vector* project_columns = nullptr; const std::set* output_columns = nullptr; // runtime state RuntimeState* runtime_state = nullptr; diff --git a/be/src/storage/rowset/beta_rowset_reader.cpp b/be/src/storage/rowset/beta_rowset_reader.cpp index 717a555264a0d1..afd6545f98842f 100644 --- a/be/src/storage/rowset/beta_rowset_reader.cpp +++ b/be/src/storage/rowset/beta_rowset_reader.cpp @@ -100,6 +100,9 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context _read_options.stats = _stats; _read_options.push_down_agg_type_opt = _read_context->push_down_agg_type_opt; _read_options.common_expr_ctxs_push_down = _read_context->common_expr_ctxs_push_down; + _read_options.project_columns = _read_context->origin_return_columns != nullptr + ? _read_context->origin_return_columns + : _read_context->return_columns; _read_options.virtual_column_exprs = _read_context->virtual_column_exprs; _read_options.all_access_paths = _read_context->all_access_paths; diff --git a/be/src/storage/segment/segment_iterator.cpp b/be/src/storage/segment/segment_iterator.cpp index 1bf4dc261db777..53970ddd7db2b3 100644 --- a/be/src/storage/segment/segment_iterator.cpp +++ b/be/src/storage/segment/segment_iterator.cpp @@ -69,7 +69,6 @@ #include "io/cache/cached_remote_file_reader.h" #include "io/fs/file_reader.h" #include "io/io_common.h" -#include "runtime/descriptors.h" #include "runtime/query_context.h" #include "runtime/runtime_predicate.h" #include "runtime/runtime_state.h" @@ -109,127 +108,11 @@ #include "storage/utils.h" #include "util/concurrency_stats.h" #include "util/defer_op.h" -#include "util/json/path_in_data.h" #include "util/simd/bits.h" namespace doris { using namespace ErrorCode; namespace segment_v2 { -namespace { - -Status tablet_column_id_by_slot(const TabletSchemaSPtr& tablet_schema, const SlotDescriptor* slot, - ColumnId* cid) { - int32_t field_index = -1; - if (slot->type()->get_primitive_type() == PrimitiveType::TYPE_VARIANT) { - field_index = tablet_schema->field_index( - PathInData(tablet_schema->column_by_uid(slot->col_unique_id()).name_lower_case(), - slot->column_paths())); - } else { - field_index = slot->col_unique_id() >= 0 ? tablet_schema->field_index(slot->col_unique_id()) - : tablet_schema->field_index(slot->col_name()); - } - if (field_index < 0) { - return Status::InternalError( - "field name is invalid. field={}, field_name_to_index={}, col_unique_id={}", - slot->col_name(), tablet_schema->get_all_field_names(), slot->col_unique_id()); - } - *cid = field_index; - return Status::OK(); -} - -Status rebind_storage_expr_to_reader_schema( - const StorageReadOptions& opts, const VExprSPtr& expr, - const std::unordered_map& cid_to_pos) { - DORIS_CHECK(expr != nullptr); - - if (expr->is_slot_ref()) { - auto slot_ref = std::static_pointer_cast(expr); - auto* slot = opts.runtime_state->desc_tbl().get_slot_descriptor(slot_ref->slot_id()); - if (slot == nullptr) { - return Status::InternalError("slot {} is not found in descriptor table", - slot_ref->slot_id()); - } - - ColumnId cid = 0; - RETURN_IF_ERROR(tablet_column_id_by_slot(opts.tablet_schema, slot, &cid)); - auto pos_it = cid_to_pos.find(cid); - if (pos_it == cid_to_pos.end()) { - return Status::InternalError("slot {} column {} with cid {} is not in reader schema", - slot_ref->slot_id(), slot->col_name(), cid); - } - slot_ref->set_column_id(cast_set(pos_it->second)); - } else if (expr->is_virtual_slot_ref()) { - auto virtual_slot_ref = std::static_pointer_cast(expr); - auto* slot = - opts.runtime_state->desc_tbl().get_slot_descriptor(virtual_slot_ref->slot_id()); - if (slot == nullptr) { - return Status::InternalError("slot {} is not found in descriptor table", - virtual_slot_ref->slot_id()); - } - - ColumnId cid = 0; - RETURN_IF_ERROR(tablet_column_id_by_slot(opts.tablet_schema, slot, &cid)); - auto pos_it = cid_to_pos.find(cid); - if (pos_it == cid_to_pos.end()) { - return Status::InternalError( - "virtual slot {} column {} with cid {} is not in reader schema", - virtual_slot_ref->slot_id(), slot->col_name(), cid); - } - virtual_slot_ref->set_column_id(cast_set(pos_it->second)); - // A virtual slot has its own output position in the reader block, and its - // materialization expression may also contain real slot refs. Rebind both - // sides so evaluating the virtual expression reads from the same block - // layout used by SegmentIterator. - RETURN_IF_ERROR(rebind_storage_expr_to_reader_schema( - opts, virtual_slot_ref->get_virtual_column_expr(), cid_to_pos)); - } - - for (const auto& child : expr->children()) { - RETURN_IF_ERROR(rebind_storage_expr_to_reader_schema(opts, child, cid_to_pos)); - } - return Status::OK(); -} - -Status rebind_storage_exprs_to_reader_schema(const StorageReadOptions& opts, const Schema& schema, - const VExprContextSPtrs& common_exprs, - std::map& virtual_exprs) { - if (common_exprs.empty() && virtual_exprs.empty()) { - return Status::OK(); - } - DORIS_CHECK(opts.runtime_state != nullptr); - DORIS_CHECK(opts.tablet_schema != nullptr); - - const auto keys_type = opts.tablet_schema->keys_type(); - if (keys_type == KeysType::DUP_KEYS || - (keys_type == KeysType::UNIQUE_KEYS && opts.enable_unique_key_merge_on_write)) { - return Status::OK(); - } - - // Storage exprs are prepared with RowDescriptor, so VSlotRef/VirtualSlotRef column_id points to - // the scan tuple column ordinal. SegmentIterator evaluates cloned exprs on a block built from - // the reader schema instead. AGG_KEYS and non-MOW UNIQUE_KEYS readers may expand the reader - // schema, for example by filling all key columns before merging/aggregating rows, so the scan - // tuple ordinal is not always the same as the runtime block ordinal. - // - // DUP_KEYS and UNIQUE_KEYS MOW use direct readers for query scans, so their reader block keeps - // the scan tuple layout and can skip this per-segment expression-tree traversal. For merge/agg - // readers, the reader schema is the source of truth: map tablet column id to reader-block - // position and rebind every storage expr slot to that position. - std::unordered_map cid_to_pos; - for (size_t pos = 0; pos < schema.num_column_ids(); ++pos) { - cid_to_pos.emplace(schema.column_id(cast_set(pos)), pos); - } - - for (const auto& ctx : common_exprs) { - RETURN_IF_ERROR(rebind_storage_expr_to_reader_schema(opts, ctx->root(), cid_to_pos)); - } - for (const auto& [_, ctx] : virtual_exprs) { - RETURN_IF_ERROR(rebind_storage_expr_to_reader_schema(opts, ctx->root(), cid_to_pos)); - } - return Status::OK(); -} - -} // namespace SegmentIterator::~SegmentIterator() = default; @@ -529,6 +412,7 @@ Status SegmentIterator::_init_impl(const StorageReadOptions& opts) { _vir_cid_to_idx_in_block = _opts.vir_cid_to_idx_in_block; _score_runtime = _opts.score_runtime; _ann_topn_runtime = _opts.ann_topn_runtime; + RETURN_IF_ERROR(_init_project_schema()); if (opts.output_columns != nullptr) { _output_columns = *(opts.output_columns); @@ -586,6 +470,55 @@ Status SegmentIterator::_init_impl(const StorageReadOptions& opts) { return Status::OK(); } +Status SegmentIterator::_init_project_schema() { + _schema_block_id_map.assign(_schema->columns().size(), -1); + for (int i = 0; i < _schema->num_column_ids(); i++) { + auto cid = _schema->column_id(i); + _schema_block_id_map[cid] = i; + } + + _project_schema = _opts.project_columns != nullptr + ? std::make_shared(_schema->columns(), *_opts.project_columns) + : _schema; + return Status::OK(); +} + +Status SegmentIterator::_build_project_block(Block* block, uint16_t selected_size, + Block* project_block) { + project_block->clear(); + DORIS_CHECK(_project_schema != nullptr); + for (auto cid : _project_schema->column_ids()) { + auto loc = _schema_block_id_map[cid]; + auto& output_column = block->get_by_position(loc); + auto type = output_column.type; + auto column = output_column.column; + auto virtual_it = _vir_cid_to_idx_in_block.find(cid); + if (virtual_it != _vir_cid_to_idx_in_block.end()) { + auto type_it = _opts.vir_col_idx_to_type.find(virtual_it->second); + DORIS_CHECK(type_it != _opts.vir_col_idx_to_type.end()); + type = type_it->second; + if (!column || check_and_get_column(column.get()) || + column->size() != selected_size) { + column = ColumnNothing::create(selected_size); + } + } else { + if (!type) { + type = Schema::get_data_type_ptr(*_schema->column(cid)); + } + if (!column) { + return Status::InternalError( + "project column {} is not materialized before project block build", cid); + } + if (column->size() != selected_size) { + return Status::InternalError("project column {} has {} rows, expected {}", cid, + column->size(), selected_size); + } + } + project_block->insert({std::move(column), type, _schema->column(cid)->name()}); + } + return Status::OK(); +} + void SegmentIterator::_initialize_predicate_results() { // Initialize from _col_predicates for (auto pred : _col_predicates) { @@ -997,7 +930,7 @@ Status SegmentIterator::_apply_ann_topn_predicate() { VLOG_DEBUG << fmt::format("Try apply ann topn: {}", _ann_topn_runtime->debug_string()); size_t src_col_idx = _ann_topn_runtime->get_src_column_idx(); - ColumnId src_cid = _schema->column_id(src_col_idx); + ColumnId src_cid = _project_schema->column_id(src_col_idx); IndexIterator* ann_index_iterator = _index_iterators[src_cid].get(); bool has_ann_index = _column_has_ann_index(src_cid); bool has_common_expr_push_down = !_common_expr_ctxs_push_down.empty(); @@ -1118,7 +1051,7 @@ Status SegmentIterator::_apply_ann_topn_predicate() { _opts.stats->ann_index_topn_search_cnt += 1; _opts.stats->ann_index_cache_hits += ann_index_stats.topn_cache_hits.value(); const size_t dst_col_idx = _ann_topn_runtime->get_dest_column_idx(); - ColumnIterator* column_iter = _column_iterators[_schema->column_id(dst_col_idx)].get(); + ColumnIterator* column_iter = _column_iterators[_project_schema->column_id(dst_col_idx)].get(); DCHECK(column_iter != nullptr); VirtualColumnIterator* virtual_column_iter = dynamic_cast(column_iter); DCHECK(virtual_column_iter != nullptr); @@ -1263,8 +1196,9 @@ Status SegmentIterator::_extract_common_expr_columns(const VExprSPtr& expr) { auto node_type = expr->node_type(); if (node_type == TExprNodeType::SLOT_REF) { auto slot_expr = std::dynamic_pointer_cast(expr); - _is_common_expr_column[_schema->column_id(slot_expr->column_id())] = true; - _common_expr_columns.insert(_schema->column_id(slot_expr->column_id())); + auto cid = _project_schema->column_id(slot_expr->column_id()); + _is_common_expr_column[cid] = true; + _common_expr_columns.insert(cid); } else if (node_type == TExprNodeType::VIRTUAL_SLOT_REF) { std::shared_ptr virtual_slot_ref = std::dynamic_pointer_cast(expr); @@ -1365,7 +1299,7 @@ Status SegmentIterator::_apply_index_expr() { size_t origin_rows = _row_bitmap.cardinality(); bool ann_range_search_executed = false; RETURN_IF_ERROR(expr_ctx->evaluate_ann_range_search( - _index_iterators, _schema->column_ids(), _column_iterators, + _index_iterators, _project_schema->column_ids(), _column_iterators, _common_expr_to_slotref_map, _row_bitmap, ann_index_stats, enable_ann_index_result_cache, &ann_range_search_executed)); if (ann_range_search_executed) { @@ -2055,7 +1989,7 @@ Status SegmentIterator::_vec_init_lazy_materialization() { // ColumnId to column index in block // ColumnId will contail all columns in tablet schema, including virtual columns and global rowid column, - _schema_block_id_map.resize(_schema->columns().size(), -1); + _schema_block_id_map.assign(_schema->columns().size(), -1); // Use cols read by query to initialize _schema_block_id_map. // We need to know the index of each column in the block. // There is an assumption here that the columns in the block are in the same order as in the read schema. @@ -2085,7 +2019,7 @@ Status SegmentIterator::_vec_init_lazy_materialization() { } for (auto pair : _vir_cid_to_idx_in_block) { - _columns_to_filter.push_back(cast_set(pair.second)); + _columns_to_filter.push_back(_schema_block_id_map[pair.first]); } } } @@ -2824,8 +2758,8 @@ Status SegmentIterator::next_batch(Block* block) { // So a replacement of nothing column with real column is needed. const auto& idx_to_datatype = _opts.vir_col_idx_to_type; for (const auto& pair : _vir_cid_to_idx_in_block) { - size_t idx = pair.second; - auto type = idx_to_datatype.find(idx)->second; + auto idx = _schema_block_id_map[pair.first]; + auto type = idx_to_datatype.find(pair.second)->second; block->replace_by_position(idx, type->create_column()); } @@ -3174,12 +3108,6 @@ Status SegmentIterator::_process_common_expr(uint16_t* sel_rowid_idx, uint16_t& _selected_size)); } - std::vector common_ctxs; - common_ctxs.reserve(_common_expr_ctxs_push_down.size()); - for (auto& ctx : _common_expr_ctxs_push_down) { - common_ctxs.push_back(ctx.get()); - } - _output_index_result_column(common_ctxs, _sel_rowid_idx.data(), _selected_size, block); RETURN_IF_ERROR(_execute_common_expr(_sel_rowid_idx.data(), _selected_size, block)); if (need_mock_col) { @@ -3195,14 +3123,27 @@ Status SegmentIterator::_execute_common_expr(uint16_t* sel_rowid_idx, uint16_t& Block* block) { SCOPED_RAW_TIMER(&_opts.stats->expr_filter_ns); DCHECK(!_common_expr_ctxs_push_down.empty()); - DCHECK(block->rows() != 0); - int prev_columns = block->columns(); + Block project_block; + RETURN_IF_ERROR(_build_project_block(block, selected_size, &project_block)); + std::vector common_ctxs; + common_ctxs.reserve(_common_expr_ctxs_push_down.size()); + for (auto& ctx : _common_expr_ctxs_push_down) { + common_ctxs.push_back(ctx.get()); + } + _output_index_result_column(common_ctxs, sel_rowid_idx, selected_size, &project_block); + + DCHECK(project_block.rows() != 0); + int prev_columns = project_block.columns(); uint16_t original_size = selected_size; _opts.stats->expr_cond_input_rows += original_size; IColumn::Filter filter; + std::vector expr_columns_to_filter(prev_columns); + std::iota(expr_columns_to_filter.begin(), expr_columns_to_filter.end(), 0); RETURN_IF_ERROR(VExprContext::execute_conjuncts_and_filter_block( - _common_expr_ctxs_push_down, block, _columns_to_filter, prev_columns, filter)); + _common_expr_ctxs_push_down, &project_block, expr_columns_to_filter, prev_columns, + filter)); + RETURN_IF_CATCH_EXCEPTION(Block::filter_block_internal(block, _columns_to_filter, filter)); selected_size = _evaluate_common_expr_filter(sel_rowid_idx, selected_size, filter); _opts.stats->rows_expr_cond_filtered += original_size - selected_size; @@ -3366,7 +3307,7 @@ Status SegmentIterator::_construct_compound_expr_context() { .io_ctx = _opts.io_ctx, }; auto inverted_index_context = std::make_shared( - _schema->column_ids(), _index_iterators, _storage_name_and_type, + _project_schema->column_ids(), _index_iterators, _storage_name_and_type, _common_expr_index_exec_status, _score_runtime, _segment.get(), iter_opts); inverted_index_context->set_index_query_context(_index_query_context); for (const auto& expr_ctx : _opts.common_expr_ctxs_push_down) { @@ -3386,8 +3327,6 @@ Status SegmentIterator::_construct_compound_expr_context() { context->set_index_context(inverted_index_context); expr_ctx = context; } - RETURN_IF_ERROR(rebind_storage_exprs_to_reader_schema( - _opts, *_schema, _common_expr_ctxs_push_down, _virtual_column_exprs)); return Status::OK(); } @@ -3425,8 +3364,9 @@ void SegmentIterator::_calculate_common_expr_index_exec_status() { for (const auto& vir_child : vir_node->children()) { if (vir_child->is_slot_ref()) { auto* inner_slot_ref = assert_cast(vir_child.get()); - _common_expr_index_exec_status[_schema->column_id( - inner_slot_ref->column_id())][expr.get()] = false; + auto cid = + _project_schema->column_id(inner_slot_ref->column_id()); + _common_expr_index_exec_status[cid][expr.get()] = false; _common_expr_to_slotref_map[root_expr_ctx.get()] [inner_slot_ref->column_id()] = expr.get(); @@ -3443,8 +3383,8 @@ void SegmentIterator::_calculate_common_expr_index_exec_status() { auto expr_without_cast = VExpr::expr_without_cast(child); if (expr_without_cast->is_slot_ref() && expr->op() != TExprOpcode::CAST) { auto* column_slot_ref = assert_cast(expr_without_cast.get()); - _common_expr_index_exec_status[_schema->column_id(column_slot_ref->column_id())] - [expr.get()] = false; + auto cid = _project_schema->column_id(column_slot_ref->column_id()); + _common_expr_index_exec_status[cid][expr.get()] = false; _common_expr_to_slotref_map[root_expr_ctx.get()][column_slot_ref->column_id()] = expr.get(); } @@ -3547,7 +3487,8 @@ bool SegmentIterator::_can_opt_limit_reads() { // Before get next batch. make sure all virtual columns in block has type ColumnNothing. void SegmentIterator::_init_virtual_columns(Block* block) { for (const auto& pair : _vir_cid_to_idx_in_block) { - auto& col_with_type_and_name = block->get_by_position(pair.second); + auto idx = _schema_block_id_map[pair.first]; + auto& col_with_type_and_name = block->get_by_position(idx); col_with_type_and_name.column = ColumnNothing::create(0); col_with_type_and_name.type = _opts.vir_col_idx_to_type[pair.second]; } @@ -3558,7 +3499,8 @@ Status SegmentIterator::_materialization_of_virtual_column(Block* block) { // So materialize virtual column in advance to avoid errors. if (block->rows() == 0) { for (const auto& pair : _vir_cid_to_idx_in_block) { - auto& col_with_type_and_name = block->get_by_position(pair.second); + auto idx = _schema_block_id_map[pair.first]; + auto& col_with_type_and_name = block->get_by_position(idx); col_with_type_and_name.column = _opts.vir_col_idx_to_type[pair.second]->create_column(); col_with_type_and_name.type = _opts.vir_col_idx_to_type[pair.second]; } @@ -3568,14 +3510,8 @@ Status SegmentIterator::_materialization_of_virtual_column(Block* block) { for (const auto& cid_and_expr : _virtual_column_exprs) { auto cid = cid_and_expr.first; auto column_expr = cid_and_expr.second; - size_t idx_in_block = _vir_cid_to_idx_in_block[cid]; - if (block->columns() <= idx_in_block) { - return Status::InternalError( - "Virtual column index {} is out of range, block columns {}, " - "virtual columns size {}, virtual column expr {}", - idx_in_block, block->columns(), _vir_cid_to_idx_in_block.size(), - column_expr->root()->debug_string()); - } else if (block->get_by_position(idx_in_block).column.get() == nullptr) { + auto idx_in_block = _schema_block_id_map[cid]; + if (block->get_by_position(idx_in_block).column.get() == nullptr) { return Status::InternalError( "Virtual column index {} is null, block columns {}, virtual columns size {}, " "virtual column expr {}", @@ -3587,7 +3523,10 @@ Status SegmentIterator::_materialization_of_virtual_column(Block* block) { VLOG_DEBUG << fmt::format("Virtual column is doing materialization, cid {}, col idx {}", cid, idx_in_block); ColumnPtr result_column; - RETURN_IF_ERROR(column_expr->execute(block, result_column)); + Block project_block; + RETURN_IF_ERROR( + _build_project_block(block, cast_set(block->rows()), &project_block)); + RETURN_IF_ERROR(column_expr->execute(&project_block, result_column)); block->replace_by_position(idx_in_block, std::move(result_column)); if (block->get_by_position(idx_in_block).column->size() == 0) { @@ -3623,7 +3562,7 @@ void SegmentIterator::_prepare_score_column_materialization() { result_row_ids, filter); } const size_t dst_col_idx = _score_runtime->get_dest_column_idx(); - auto* column_iter = _column_iterators[_schema->column_id(dst_col_idx)].get(); + auto* column_iter = _column_iterators[_project_schema->column_id(dst_col_idx)].get(); auto* virtual_column_iter = dynamic_cast(column_iter); virtual_column_iter->prepare_materialization( std::move(result_column), diff --git a/be/src/storage/segment/segment_iterator.h b/be/src/storage/segment/segment_iterator.h index 8fd143867ed97b..f34487bc0896e5 100644 --- a/be/src/storage/segment/segment_iterator.h +++ b/be/src/storage/segment/segment_iterator.h @@ -273,6 +273,9 @@ class SegmentIterator : public RowwiseIterator { bool _can_evaluated_by_vectorized(std::shared_ptr predicate); + [[nodiscard]] Status _init_project_schema(); + [[nodiscard]] Status _build_project_block(Block* block, uint16_t selected_size, + Block* project_block); [[nodiscard]] Status _extract_common_expr_columns(const VExprSPtr& expr); // same with _extract_common_expr_columns, but only extract columns that can be used for index [[nodiscard]] Status _execute_common_expr(uint16_t* sel_rowid_idx, uint16_t& selected_size, @@ -348,6 +351,8 @@ class SegmentIterator : public RowwiseIterator { std::shared_ptr _segment; // read schema from scanner SchemaSPtr _schema; + // final scan project schema before storage-side read schema expansion + SchemaSPtr _project_schema; // storage type schema related to _schema, since column in segment may be different with type in _schema std::vector _storage_name_and_type; // vector idx -> column iterarator From f8bc485c0802b3a79c66e80d47b87a3dcf46266e Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Tue, 2 Jun 2026 18:41:33 +0800 Subject: [PATCH 02/22] update --- be/src/storage/segment/segment_iterator.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/be/src/storage/segment/segment_iterator.cpp b/be/src/storage/segment/segment_iterator.cpp index 53970ddd7db2b3..673eea4af1cddf 100644 --- a/be/src/storage/segment/segment_iterator.cpp +++ b/be/src/storage/segment/segment_iterator.cpp @@ -478,7 +478,8 @@ Status SegmentIterator::_init_project_schema() { } _project_schema = _opts.project_columns != nullptr - ? std::make_shared(_schema->columns(), *_opts.project_columns) + ? std::make_shared(_opts.tablet_schema->columns(), + *_opts.project_columns) : _schema; return Status::OK(); } From 66d261a34b2c2d2d2cab2a975bfca1af9ea4cba6 Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Tue, 2 Jun 2026 23:01:39 +0800 Subject: [PATCH 03/22] update --- be/src/storage/segment/segment_iterator.cpp | 94 ++++++--------------- be/src/storage/segment/segment_iterator.h | 6 +- 2 files changed, 27 insertions(+), 73 deletions(-) diff --git a/be/src/storage/segment/segment_iterator.cpp b/be/src/storage/segment/segment_iterator.cpp index 673eea4af1cddf..02ec6305e9b02f 100644 --- a/be/src/storage/segment/segment_iterator.cpp +++ b/be/src/storage/segment/segment_iterator.cpp @@ -484,8 +484,7 @@ Status SegmentIterator::_init_project_schema() { return Status::OK(); } -Status SegmentIterator::_build_project_block(Block* block, uint16_t selected_size, - Block* project_block) { +Status SegmentIterator::_build_project_block(Block* block, Block* project_block) { project_block->clear(); DORIS_CHECK(_project_schema != nullptr); for (auto cid : _project_schema->column_ids()) { @@ -498,22 +497,6 @@ Status SegmentIterator::_build_project_block(Block* block, uint16_t selected_siz auto type_it = _opts.vir_col_idx_to_type.find(virtual_it->second); DORIS_CHECK(type_it != _opts.vir_col_idx_to_type.end()); type = type_it->second; - if (!column || check_and_get_column(column.get()) || - column->size() != selected_size) { - column = ColumnNothing::create(selected_size); - } - } else { - if (!type) { - type = Schema::get_data_type_ptr(*_schema->column(cid)); - } - if (!column) { - return Status::InternalError( - "project column {} is not materialized before project block build", cid); - } - if (column->size() != selected_size) { - return Status::InternalError("project column {} has {} rows, expected {}", cid, - column->size(), selected_size); - } } project_block->insert({std::move(column), type, _schema->column(cid)->name()}); } @@ -3003,7 +2986,7 @@ Status SegmentIterator::_next_batch_internal(Block* block) { for (auto& [cid, ctx] : _virtual_column_exprs) { vir_ctxs.push_back(ctx.get()); } - _output_index_result_column(vir_ctxs, sel_rowid_idx, _selected_size, block); + _output_index_result_column(vir_ctxs, sel_rowid_idx, _selected_size); } RETURN_IF_ERROR(_materialization_of_virtual_column(block)); if (_opts.read_limit > 0) { @@ -3094,26 +3077,10 @@ Status SegmentIterator::_process_eof(Block* block) { Status SegmentIterator::_process_common_expr(uint16_t* sel_rowid_idx, uint16_t& selected_size, Block* block) { - // Here we just use col0 as row_number indicator. when reach here, we will calculate the predicates first. - // then use the result to reduce our data read(that is, expr push down). there's now row in block means the first - // column is not in common expr. so it's safe to replace it temporarily to provide correct `selected_size`. VLOG_DEBUG << fmt::format("Execute common expr. block rows {}, selected size {}", block->rows(), _selected_size); - bool need_mock_col = block->rows() != selected_size; - MutableColumnPtr col0; - if (need_mock_col) { - col0 = std::move(*block->get_by_position(0).column).mutate(); - block->replace_by_position( - 0, block->get_by_position(0).type->create_column_const_with_default_value( - _selected_size)); - } - - RETURN_IF_ERROR(_execute_common_expr(_sel_rowid_idx.data(), _selected_size, block)); - - if (need_mock_col) { - block->replace_by_position(0, std::move(col0)); - } + RETURN_IF_ERROR(_execute_common_expr(sel_rowid_idx, selected_size, block)); VLOG_DEBUG << fmt::format("Execute common expr end. block rows {}, selected size {}", block->rows(), _selected_size); @@ -3125,25 +3092,26 @@ Status SegmentIterator::_execute_common_expr(uint16_t* sel_rowid_idx, uint16_t& SCOPED_RAW_TIMER(&_opts.stats->expr_filter_ns); DCHECK(!_common_expr_ctxs_push_down.empty()); Block project_block; - RETURN_IF_ERROR(_build_project_block(block, selected_size, &project_block)); + RETURN_IF_ERROR(_build_project_block(block, &project_block)); std::vector common_ctxs; common_ctxs.reserve(_common_expr_ctxs_push_down.size()); for (auto& ctx : _common_expr_ctxs_push_down) { common_ctxs.push_back(ctx.get()); } - _output_index_result_column(common_ctxs, sel_rowid_idx, selected_size, &project_block); + _output_index_result_column(common_ctxs, sel_rowid_idx, selected_size); - DCHECK(project_block.rows() != 0); - int prev_columns = project_block.columns(); uint16_t original_size = selected_size; _opts.stats->expr_cond_input_rows += original_size; - IColumn::Filter filter; - std::vector expr_columns_to_filter(prev_columns); - std::iota(expr_columns_to_filter.begin(), expr_columns_to_filter.end(), 0); - RETURN_IF_ERROR(VExprContext::execute_conjuncts_and_filter_block( - _common_expr_ctxs_push_down, &project_block, expr_columns_to_filter, prev_columns, - filter)); + IColumn::Filter filter(selected_size, 1); + bool can_filter_all = false; + for (const auto& ctx : _common_expr_ctxs_push_down) { + RETURN_IF_ERROR(ctx->execute_filter(&project_block, filter.data(), selected_size, false, + &can_filter_all)); + if (can_filter_all) { + break; + } + } RETURN_IF_CATCH_EXCEPTION(Block::filter_block_internal(block, _columns_to_filter, filter)); selected_size = _evaluate_common_expr_filter(sel_rowid_idx, selected_size, filter); @@ -3194,10 +3162,9 @@ uint16_t SegmentIterator::_evaluate_common_expr_filter(uint16_t* sel_rowid_idx, } void SegmentIterator::_output_index_result_column(const std::vector& expr_ctxs, - uint16_t* sel_rowid_idx, uint16_t select_size, - Block* block) { + uint16_t* sel_rowid_idx, uint16_t select_size) { SCOPED_RAW_TIMER(&_opts.stats->output_index_result_column_timer); - if (block->rows() == 0) { + if (select_size == 0) { return; } for (auto* expr_ctx_ptr : expr_ctxs) { @@ -3211,7 +3178,7 @@ void SegmentIterator::_output_index_result_column(const std::vectorget_data(); - vec_match_pred.resize(block->rows()); + vec_match_pred.resize(select_size); std::fill(vec_match_pred.begin(), vec_match_pred.end(), 0); const auto& null_bitmap = result_bitmap.get_null_bitmap(); @@ -3223,7 +3190,7 @@ void SegmentIterator::_output_index_result_column(const std::vectorget_data(); - null_map_vec.resize(block->rows()); + null_map_vec.resize(select_size); std::fill(null_map_vec.begin(), null_map_vec.end(), 0); null_map_data = &null_map_column->get_data(); } @@ -3240,7 +3207,7 @@ void SegmentIterator::_output_index_result_column(const std::vectorrows() == vec_match_pred.size()); + DCHECK(select_size == vec_match_pred.size()); if (null_map_column) { index_ctx->set_index_result_column_for_expr( @@ -3498,7 +3465,7 @@ void SegmentIterator::_init_virtual_columns(Block* block) { Status SegmentIterator::_materialization_of_virtual_column(Block* block) { // Some expr can not process empty block, such as function `element_at`. // So materialize virtual column in advance to avoid errors. - if (block->rows() == 0) { + if (_selected_size == 0) { for (const auto& pair : _vir_cid_to_idx_in_block) { auto idx = _schema_block_id_map[pair.first]; auto& col_with_type_and_name = block->get_by_position(idx); @@ -3512,28 +3479,17 @@ Status SegmentIterator::_materialization_of_virtual_column(Block* block) { auto cid = cid_and_expr.first; auto column_expr = cid_and_expr.second; auto idx_in_block = _schema_block_id_map[cid]; - if (block->get_by_position(idx_in_block).column.get() == nullptr) { - return Status::InternalError( - "Virtual column index {} is null, block columns {}, virtual columns size {}, " - "virtual column expr {}", - idx_in_block, block->columns(), _vir_cid_to_idx_in_block.size(), - column_expr->root()->debug_string()); - } - if (check_and_get_column( - block->get_by_position(idx_in_block).column.get())) { + auto& column = block->get_by_position(idx_in_block).column; + if (check_and_get_column(column.get())) { VLOG_DEBUG << fmt::format("Virtual column is doing materialization, cid {}, col idx {}", cid, idx_in_block); ColumnPtr result_column; Block project_block; - RETURN_IF_ERROR( - _build_project_block(block, cast_set(block->rows()), &project_block)); - RETURN_IF_ERROR(column_expr->execute(&project_block, result_column)); + RETURN_IF_ERROR(_build_project_block(block, &project_block)); + RETURN_IF_ERROR(column_expr->root()->execute_column( + column_expr.get(), &project_block, nullptr, _selected_size, result_column)); block->replace_by_position(idx_in_block, std::move(result_column)); - if (block->get_by_position(idx_in_block).column->size() == 0) { - LOG_WARNING("Result of expr column {} is empty. cid {}, idx_in_block {}", - column_expr->root()->debug_string(), cid, idx_in_block); - } } } return Status::OK(); diff --git a/be/src/storage/segment/segment_iterator.h b/be/src/storage/segment/segment_iterator.h index f34487bc0896e5..37baba95e51856 100644 --- a/be/src/storage/segment/segment_iterator.h +++ b/be/src/storage/segment/segment_iterator.h @@ -274,10 +274,8 @@ class SegmentIterator : public RowwiseIterator { bool _can_evaluated_by_vectorized(std::shared_ptr predicate); [[nodiscard]] Status _init_project_schema(); - [[nodiscard]] Status _build_project_block(Block* block, uint16_t selected_size, - Block* project_block); + [[nodiscard]] Status _build_project_block(Block* block, Block* project_block); [[nodiscard]] Status _extract_common_expr_columns(const VExprSPtr& expr); - // same with _extract_common_expr_columns, but only extract columns that can be used for index [[nodiscard]] Status _execute_common_expr(uint16_t* sel_rowid_idx, uint16_t& selected_size, Block* block); Status _process_common_expr(uint16_t* sel_rowid_idx, uint16_t& selected_size, Block* block); @@ -294,7 +292,7 @@ class SegmentIterator : public RowwiseIterator { bool _check_apply_by_inverted_index(std::shared_ptr pred); void _output_index_result_column(const std::vector& expr_ctxs, - uint16_t* sel_rowid_idx, uint16_t select_size, Block* block); + uint16_t* sel_rowid_idx, uint16_t select_size); bool _need_read_data(ColumnId cid); bool _prune_column(ColumnId cid, MutableColumnPtr& column, bool fill_defaults, From efaaad49cf67ac3e7bec83698805050dc671b13c Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Fri, 5 Jun 2026 18:40:12 +0800 Subject: [PATCH 04/22] [refactor](be) Refine storage project block handling ### What problem does this PR solve? Issue Number: None Related PR: #64010 Problem Summary: Address review comments for avoiding storage expression column id rebinding. The project-schema column-id to block-position map was initialized in multiple places, and project-block construction still ran on ordinary scan paths where the project schema matched the reader schema. Virtual column materialization also rebuilt the project block for each expression. This change centralizes schema block mapping initialization, reuses the original block when the project schema is identical to the reader schema, reuses the projected block during virtual column materialization, and keeps expression exceptions converted to Status at the virtual-column evaluation boundary. ### Release note None ### Check List (For Author) - Test: Unit Test - ./build.sh --be - build-support/run-clang-tidy.sh attempted; it reported pre-existing/analysis diagnostics in segment_iterator.cpp, segment_iterator.h, and be/src/util/jni-util.h unrelated to the changed logic. - Behavior changed: No - Does this need documentation: No --- be/src/storage/segment/segment_iterator.cpp | 81 ++++++++++++++------- be/src/storage/segment/segment_iterator.h | 5 +- 2 files changed, 57 insertions(+), 29 deletions(-) diff --git a/be/src/storage/segment/segment_iterator.cpp b/be/src/storage/segment/segment_iterator.cpp index 02ec6305e9b02f..2397f01b60b74d 100644 --- a/be/src/storage/segment/segment_iterator.cpp +++ b/be/src/storage/segment/segment_iterator.cpp @@ -470,23 +470,35 @@ Status SegmentIterator::_init_impl(const StorageReadOptions& opts) { return Status::OK(); } -Status SegmentIterator::_init_project_schema() { +void SegmentIterator::_init_schema_block_id_map() { _schema_block_id_map.assign(_schema->columns().size(), -1); for (int i = 0; i < _schema->num_column_ids(); i++) { auto cid = _schema->column_id(i); _schema_block_id_map[cid] = i; } +} - _project_schema = _opts.project_columns != nullptr - ? std::make_shared(_opts.tablet_schema->columns(), - *_opts.project_columns) - : _schema; +Status SegmentIterator::_init_project_schema() { + _init_schema_block_id_map(); + if (_opts.project_columns == nullptr || *_opts.project_columns == _schema->column_ids()) { + _project_schema = _schema; + } else { + _project_schema = + std::make_shared(_opts.tablet_schema->columns(), *_opts.project_columns); + } return Status::OK(); } -Status SegmentIterator::_build_project_block(Block* block, Block* project_block) { - project_block->clear(); +Status SegmentIterator::_build_project_block(Block* block, Block* project_block, + Block** project_block_or_origin) { + DORIS_CHECK(project_block_or_origin != nullptr); DORIS_CHECK(_project_schema != nullptr); + if (_project_schema == _schema) { + *project_block_or_origin = block; + return Status::OK(); + } + + project_block->clear(); for (auto cid : _project_schema->column_ids()) { auto loc = _schema_block_id_map[cid]; auto& output_column = block->get_by_position(loc); @@ -500,9 +512,22 @@ Status SegmentIterator::_build_project_block(Block* block, Block* project_block) } project_block->insert({std::move(column), type, _schema->column(cid)->name()}); } + *project_block_or_origin = project_block; return Status::OK(); } +void SegmentIterator::_sync_project_block_column(Block* project_block, ColumnId cid, + const ColumnPtr& column) { + if (_project_schema == _schema) { + return; + } + const auto& project_column_ids = _project_schema->column_ids(); + auto it = std::find(project_column_ids.begin(), project_column_ids.end(), cid); + DORIS_CHECK(it != project_column_ids.end()); + project_block->replace_by_position( + static_cast(std::distance(project_column_ids.begin(), it)), column); +} + void SegmentIterator::_initialize_predicate_results() { // Initialize from _col_predicates for (auto pred : _col_predicates) { @@ -1971,19 +1996,6 @@ Status SegmentIterator::_vec_init_lazy_materialization() { _is_need_short_eval = true; } - // ColumnId to column index in block - // ColumnId will contail all columns in tablet schema, including virtual columns and global rowid column, - _schema_block_id_map.assign(_schema->columns().size(), -1); - // Use cols read by query to initialize _schema_block_id_map. - // We need to know the index of each column in the block. - // There is an assumption here that the columns in the block are in the same order as in the read schema. - // TODO: A probelm is that, delete condition columns will exist in _schema->column_ids but not in block if - // delete column is not read by the query. - for (int i = 0; i < _schema->num_column_ids(); i++) { - auto cid = _schema->column_id(i); - _schema_block_id_map[cid] = i; - } - // Step2: extract columns that can execute expr context _is_common_expr_column.resize(_schema->columns().size(), false); if (!_common_expr_ctxs_push_down.empty()) { @@ -3092,7 +3104,8 @@ Status SegmentIterator::_execute_common_expr(uint16_t* sel_rowid_idx, uint16_t& SCOPED_RAW_TIMER(&_opts.stats->expr_filter_ns); DCHECK(!_common_expr_ctxs_push_down.empty()); Block project_block; - RETURN_IF_ERROR(_build_project_block(block, &project_block)); + Block* project_block_or_origin = nullptr; + RETURN_IF_ERROR(_build_project_block(block, &project_block, &project_block_or_origin)); std::vector common_ctxs; common_ctxs.reserve(_common_expr_ctxs_push_down.size()); for (auto& ctx : _common_expr_ctxs_push_down) { @@ -3106,8 +3119,8 @@ Status SegmentIterator::_execute_common_expr(uint16_t* sel_rowid_idx, uint16_t& IColumn::Filter filter(selected_size, 1); bool can_filter_all = false; for (const auto& ctx : _common_expr_ctxs_push_down) { - RETURN_IF_ERROR(ctx->execute_filter(&project_block, filter.data(), selected_size, false, - &can_filter_all)); + RETURN_IF_ERROR(ctx->execute_filter(project_block_or_origin, filter.data(), selected_size, + false, &can_filter_all)); if (can_filter_all) { break; } @@ -3475,6 +3488,9 @@ Status SegmentIterator::_materialization_of_virtual_column(Block* block) { return Status::OK(); } + Block project_block; + Block* project_block_or_origin = nullptr; + bool project_block_ready = false; for (const auto& cid_and_expr : _virtual_column_exprs) { auto cid = cid_and_expr.first; auto column_expr = cid_and_expr.second; @@ -3483,13 +3499,22 @@ Status SegmentIterator::_materialization_of_virtual_column(Block* block) { if (check_and_get_column(column.get())) { VLOG_DEBUG << fmt::format("Virtual column is doing materialization, cid {}, col idx {}", cid, idx_in_block); + if (!project_block_ready) { + RETURN_IF_ERROR( + _build_project_block(block, &project_block, &project_block_or_origin)); + project_block_ready = true; + } ColumnPtr result_column; - Block project_block; - RETURN_IF_ERROR(_build_project_block(block, &project_block)); - RETURN_IF_ERROR(column_expr->root()->execute_column( - column_expr.get(), &project_block, nullptr, _selected_size, result_column)); - + Status st; + RETURN_IF_CATCH_EXCEPTION({ + st = column_expr->root()->execute_column(column_expr.get(), project_block_or_origin, + nullptr, _selected_size, result_column); + }); + RETURN_IF_ERROR(st); + + auto project_column = result_column; block->replace_by_position(idx_in_block, std::move(result_column)); + _sync_project_block_column(&project_block, cid, project_column); } } return Status::OK(); diff --git a/be/src/storage/segment/segment_iterator.h b/be/src/storage/segment/segment_iterator.h index 37baba95e51856..12969c468a5833 100644 --- a/be/src/storage/segment/segment_iterator.h +++ b/be/src/storage/segment/segment_iterator.h @@ -273,8 +273,11 @@ class SegmentIterator : public RowwiseIterator { bool _can_evaluated_by_vectorized(std::shared_ptr predicate); + void _init_schema_block_id_map(); [[nodiscard]] Status _init_project_schema(); - [[nodiscard]] Status _build_project_block(Block* block, Block* project_block); + [[nodiscard]] Status _build_project_block(Block* block, Block* project_block, + Block** project_block_or_origin); + void _sync_project_block_column(Block* project_block, ColumnId cid, const ColumnPtr& column); [[nodiscard]] Status _extract_common_expr_columns(const VExprSPtr& expr); [[nodiscard]] Status _execute_common_expr(uint16_t* sel_rowid_idx, uint16_t& selected_size, Block* block); From 6498df9e89ec296861f361ddb67d5b48565a4ea7 Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Mon, 8 Jun 2026 17:13:30 +0800 Subject: [PATCH 05/22] update --- be/src/storage/segment/segment_iterator.cpp | 67 +++++++++------------ be/src/storage/segment/segment_iterator.h | 6 +- 2 files changed, 31 insertions(+), 42 deletions(-) diff --git a/be/src/storage/segment/segment_iterator.cpp b/be/src/storage/segment/segment_iterator.cpp index 2397f01b60b74d..3e02177068a084 100644 --- a/be/src/storage/segment/segment_iterator.cpp +++ b/be/src/storage/segment/segment_iterator.cpp @@ -412,7 +412,7 @@ Status SegmentIterator::_init_impl(const StorageReadOptions& opts) { _vir_cid_to_idx_in_block = _opts.vir_cid_to_idx_in_block; _score_runtime = _opts.score_runtime; _ann_topn_runtime = _opts.ann_topn_runtime; - RETURN_IF_ERROR(_init_project_schema()); + _init_project_schema(); if (opts.output_columns != nullptr) { _output_columns = *(opts.output_columns); @@ -478,7 +478,7 @@ void SegmentIterator::_init_schema_block_id_map() { } } -Status SegmentIterator::_init_project_schema() { +void SegmentIterator::_init_project_schema() { _init_schema_block_id_map(); if (_opts.project_columns == nullptr || *_opts.project_columns == _schema->column_ids()) { _project_schema = _schema; @@ -486,16 +486,12 @@ Status SegmentIterator::_init_project_schema() { _project_schema = std::make_shared(_opts.tablet_schema->columns(), *_opts.project_columns); } - return Status::OK(); } -Status SegmentIterator::_build_project_block(Block* block, Block* project_block, - Block** project_block_or_origin) { - DORIS_CHECK(project_block_or_origin != nullptr); +Block* SegmentIterator::_build_project_block(Block* block, Block* project_block) { DORIS_CHECK(_project_schema != nullptr); if (_project_schema == _schema) { - *project_block_or_origin = block; - return Status::OK(); + return block; } project_block->clear(); @@ -512,20 +508,7 @@ Status SegmentIterator::_build_project_block(Block* block, Block* project_block, } project_block->insert({std::move(column), type, _schema->column(cid)->name()}); } - *project_block_or_origin = project_block; - return Status::OK(); -} - -void SegmentIterator::_sync_project_block_column(Block* project_block, ColumnId cid, - const ColumnPtr& column) { - if (_project_schema == _schema) { - return; - } - const auto& project_column_ids = _project_schema->column_ids(); - auto it = std::find(project_column_ids.begin(), project_column_ids.end(), cid); - DORIS_CHECK(it != project_column_ids.end()); - project_block->replace_by_position( - static_cast(std::distance(project_column_ids.begin(), it)), column); + return project_block; } void SegmentIterator::_initialize_predicate_results() { @@ -3104,8 +3087,7 @@ Status SegmentIterator::_execute_common_expr(uint16_t* sel_rowid_idx, uint16_t& SCOPED_RAW_TIMER(&_opts.stats->expr_filter_ns); DCHECK(!_common_expr_ctxs_push_down.empty()); Block project_block; - Block* project_block_or_origin = nullptr; - RETURN_IF_ERROR(_build_project_block(block, &project_block, &project_block_or_origin)); + Block* expr_block = _build_project_block(block, &project_block); std::vector common_ctxs; common_ctxs.reserve(_common_expr_ctxs_push_down.size()); for (auto& ctx : _common_expr_ctxs_push_down) { @@ -3119,8 +3101,8 @@ Status SegmentIterator::_execute_common_expr(uint16_t* sel_rowid_idx, uint16_t& IColumn::Filter filter(selected_size, 1); bool can_filter_all = false; for (const auto& ctx : _common_expr_ctxs_push_down) { - RETURN_IF_ERROR(ctx->execute_filter(project_block_or_origin, filter.data(), selected_size, - false, &can_filter_all)); + RETURN_IF_ERROR(ctx->execute_filter(expr_block, filter.data(), selected_size, false, + &can_filter_all)); if (can_filter_all) { break; } @@ -3487,36 +3469,45 @@ Status SegmentIterator::_materialization_of_virtual_column(Block* block) { } return Status::OK(); } + if (_virtual_column_exprs.empty()) { + return Status::OK(); + } Block project_block; - Block* project_block_or_origin = nullptr; - bool project_block_ready = false; + Block* materialize_block = _build_project_block(block, &project_block); for (const auto& cid_and_expr : _virtual_column_exprs) { auto cid = cid_and_expr.first; auto column_expr = cid_and_expr.second; + auto vir_it = _vir_cid_to_idx_in_block.find(cid); + DORIS_CHECK(vir_it != _vir_cid_to_idx_in_block.end()); auto idx_in_block = _schema_block_id_map[cid]; - auto& column = block->get_by_position(idx_in_block).column; + auto materialized_pos = _project_schema == _schema ? idx_in_block : vir_it->second; + auto& column = materialize_block->get_by_position(materialized_pos).column; if (check_and_get_column(column.get())) { VLOG_DEBUG << fmt::format("Virtual column is doing materialization, cid {}, col idx {}", cid, idx_in_block); - if (!project_block_ready) { - RETURN_IF_ERROR( - _build_project_block(block, &project_block, &project_block_or_origin)); - project_block_ready = true; - } ColumnPtr result_column; Status st; RETURN_IF_CATCH_EXCEPTION({ - st = column_expr->root()->execute_column(column_expr.get(), project_block_or_origin, + st = column_expr->root()->execute_column(column_expr.get(), materialize_block, nullptr, _selected_size, result_column); }); RETURN_IF_ERROR(st); - auto project_column = result_column; - block->replace_by_position(idx_in_block, std::move(result_column)); - _sync_project_block_column(&project_block, cid, project_column); + materialize_block->replace_by_position(materialized_pos, std::move(result_column)); } } + if (materialize_block == block) { + return Status::OK(); + } + for (const auto& cid_and_expr : _virtual_column_exprs) { + auto cid = cid_and_expr.first; + auto idx_in_block = _schema_block_id_map[cid]; + auto materialized_pos = _vir_cid_to_idx_in_block.at(cid); + const auto& column = project_block.get_by_position(materialized_pos).column; + DORIS_CHECK(!check_and_get_column(column.get())); + block->replace_by_position(idx_in_block, column); + } return Status::OK(); } diff --git a/be/src/storage/segment/segment_iterator.h b/be/src/storage/segment/segment_iterator.h index 12969c468a5833..cdca1015da5074 100644 --- a/be/src/storage/segment/segment_iterator.h +++ b/be/src/storage/segment/segment_iterator.h @@ -274,10 +274,8 @@ class SegmentIterator : public RowwiseIterator { bool _can_evaluated_by_vectorized(std::shared_ptr predicate); void _init_schema_block_id_map(); - [[nodiscard]] Status _init_project_schema(); - [[nodiscard]] Status _build_project_block(Block* block, Block* project_block, - Block** project_block_or_origin); - void _sync_project_block_column(Block* project_block, ColumnId cid, const ColumnPtr& column); + void _init_project_schema(); + Block* _build_project_block(Block* block, Block* project_block); [[nodiscard]] Status _extract_common_expr_columns(const VExprSPtr& expr); [[nodiscard]] Status _execute_common_expr(uint16_t* sel_rowid_idx, uint16_t& selected_size, Block* block); From 7fef88ba13f11f67da831aa1cf14aad445308d45 Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Tue, 9 Jun 2026 10:45:46 +0800 Subject: [PATCH 06/22] update --- be/src/storage/segment/segment_iterator.cpp | 28 ++++++++++----------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/be/src/storage/segment/segment_iterator.cpp b/be/src/storage/segment/segment_iterator.cpp index 3e02177068a084..ca7431f547feaa 100644 --- a/be/src/storage/segment/segment_iterator.cpp +++ b/be/src/storage/segment/segment_iterator.cpp @@ -3475,17 +3475,16 @@ Status SegmentIterator::_materialization_of_virtual_column(Block* block) { Block project_block; Block* materialize_block = _build_project_block(block, &project_block); + const bool materialize_on_project_block = _project_schema != _schema; for (const auto& cid_and_expr : _virtual_column_exprs) { auto cid = cid_and_expr.first; auto column_expr = cid_and_expr.second; - auto vir_it = _vir_cid_to_idx_in_block.find(cid); - DORIS_CHECK(vir_it != _vir_cid_to_idx_in_block.end()); - auto idx_in_block = _schema_block_id_map[cid]; - auto materialized_pos = _project_schema == _schema ? idx_in_block : vir_it->second; + auto materialized_pos = materialize_on_project_block ? _vir_cid_to_idx_in_block.at(cid) + : _schema_block_id_map[cid]; auto& column = materialize_block->get_by_position(materialized_pos).column; if (check_and_get_column(column.get())) { VLOG_DEBUG << fmt::format("Virtual column is doing materialization, cid {}, col idx {}", - cid, idx_in_block); + cid, materialized_pos); ColumnPtr result_column; Status st; RETURN_IF_CATCH_EXCEPTION({ @@ -3497,16 +3496,15 @@ Status SegmentIterator::_materialization_of_virtual_column(Block* block) { materialize_block->replace_by_position(materialized_pos, std::move(result_column)); } } - if (materialize_block == block) { - return Status::OK(); - } - for (const auto& cid_and_expr : _virtual_column_exprs) { - auto cid = cid_and_expr.first; - auto idx_in_block = _schema_block_id_map[cid]; - auto materialized_pos = _vir_cid_to_idx_in_block.at(cid); - const auto& column = project_block.get_by_position(materialized_pos).column; - DORIS_CHECK(!check_and_get_column(column.get())); - block->replace_by_position(idx_in_block, column); + if (materialize_block != block) { + for (const auto& cid_and_expr : _virtual_column_exprs) { + auto cid = cid_and_expr.first; + auto idx_in_block = _schema_block_id_map[cid]; + auto materialized_pos = _vir_cid_to_idx_in_block.at(cid); + const auto& column = project_block.get_by_position(materialized_pos).column; + DORIS_CHECK(!check_and_get_column(column.get())); + block->replace_by_position(idx_in_block, column); + } } return Status::OK(); } From 7dfe3912be1b4477eea30462938aa1894b07bf44 Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Tue, 9 Jun 2026 11:18:31 +0800 Subject: [PATCH 07/22] update --- be/src/storage/segment/segment_iterator.cpp | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/be/src/storage/segment/segment_iterator.cpp b/be/src/storage/segment/segment_iterator.cpp index ca7431f547feaa..95fda9e0a4afb4 100644 --- a/be/src/storage/segment/segment_iterator.cpp +++ b/be/src/storage/segment/segment_iterator.cpp @@ -495,9 +495,10 @@ Block* SegmentIterator::_build_project_block(Block* block, Block* project_block) } project_block->clear(); - for (auto cid : _project_schema->column_ids()) { - auto loc = _schema_block_id_map[cid]; - auto& output_column = block->get_by_position(loc); + const auto& project_column_ids = _project_schema->column_ids(); + for (size_t i = 0; i < project_column_ids.size(); ++i) { + auto cid = project_column_ids[i]; + auto& output_column = block->get_by_position(i); auto type = output_column.type; auto column = output_column.column; auto virtual_it = _vir_cid_to_idx_in_block.find(cid); @@ -1998,7 +1999,7 @@ Status SegmentIterator::_vec_init_lazy_materialization() { } for (auto pair : _vir_cid_to_idx_in_block) { - _columns_to_filter.push_back(_schema_block_id_map[pair.first]); + _columns_to_filter.push_back(cast_set(pair.second)); } } } @@ -2737,9 +2738,8 @@ Status SegmentIterator::next_batch(Block* block) { // So a replacement of nothing column with real column is needed. const auto& idx_to_datatype = _opts.vir_col_idx_to_type; for (const auto& pair : _vir_cid_to_idx_in_block) { - auto idx = _schema_block_id_map[pair.first]; auto type = idx_to_datatype.find(pair.second)->second; - block->replace_by_position(idx, type->create_column()); + block->replace_by_position(pair.second, type->create_column()); } if (_opts.condition_cache_digest && !_find_condition_cache) { @@ -3450,8 +3450,7 @@ bool SegmentIterator::_can_opt_limit_reads() { // Before get next batch. make sure all virtual columns in block has type ColumnNothing. void SegmentIterator::_init_virtual_columns(Block* block) { for (const auto& pair : _vir_cid_to_idx_in_block) { - auto idx = _schema_block_id_map[pair.first]; - auto& col_with_type_and_name = block->get_by_position(idx); + auto& col_with_type_and_name = block->get_by_position(pair.second); col_with_type_and_name.column = ColumnNothing::create(0); col_with_type_and_name.type = _opts.vir_col_idx_to_type[pair.second]; } @@ -3462,8 +3461,7 @@ Status SegmentIterator::_materialization_of_virtual_column(Block* block) { // So materialize virtual column in advance to avoid errors. if (_selected_size == 0) { for (const auto& pair : _vir_cid_to_idx_in_block) { - auto idx = _schema_block_id_map[pair.first]; - auto& col_with_type_and_name = block->get_by_position(idx); + auto& col_with_type_and_name = block->get_by_position(pair.second); col_with_type_and_name.column = _opts.vir_col_idx_to_type[pair.second]->create_column(); col_with_type_and_name.type = _opts.vir_col_idx_to_type[pair.second]; } @@ -3499,11 +3497,10 @@ Status SegmentIterator::_materialization_of_virtual_column(Block* block) { if (materialize_block != block) { for (const auto& cid_and_expr : _virtual_column_exprs) { auto cid = cid_and_expr.first; - auto idx_in_block = _schema_block_id_map[cid]; auto materialized_pos = _vir_cid_to_idx_in_block.at(cid); const auto& column = project_block.get_by_position(materialized_pos).column; DORIS_CHECK(!check_and_get_column(column.get())); - block->replace_by_position(idx_in_block, column); + block->replace_by_position(materialized_pos, column); } } return Status::OK(); From 1120a619d9ef6c806ae3ecd75bbfb69173fd4bc7 Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Tue, 9 Jun 2026 11:30:57 +0800 Subject: [PATCH 08/22] update --- be/src/storage/segment/segment_iterator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/storage/segment/segment_iterator.cpp b/be/src/storage/segment/segment_iterator.cpp index 95fda9e0a4afb4..71a1cf2b15876e 100644 --- a/be/src/storage/segment/segment_iterator.cpp +++ b/be/src/storage/segment/segment_iterator.cpp @@ -3494,7 +3494,7 @@ Status SegmentIterator::_materialization_of_virtual_column(Block* block) { materialize_block->replace_by_position(materialized_pos, std::move(result_column)); } } - if (materialize_block != block) { + if (materialize_on_project_block) { for (const auto& cid_and_expr : _virtual_column_exprs) { auto cid = cid_and_expr.first; auto materialized_pos = _vir_cid_to_idx_in_block.at(cid); From 9bdd717ad77852d5bdb422ff61255e1d3d53ba07 Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Tue, 9 Jun 2026 11:39:02 +0800 Subject: [PATCH 09/22] update --- be/src/storage/segment/segment_iterator.cpp | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/be/src/storage/segment/segment_iterator.cpp b/be/src/storage/segment/segment_iterator.cpp index 71a1cf2b15876e..c04327916a694e 100644 --- a/be/src/storage/segment/segment_iterator.cpp +++ b/be/src/storage/segment/segment_iterator.cpp @@ -496,9 +496,9 @@ Block* SegmentIterator::_build_project_block(Block* block, Block* project_block) project_block->clear(); const auto& project_column_ids = _project_schema->column_ids(); - for (size_t i = 0; i < project_column_ids.size(); ++i) { - auto cid = project_column_ids[i]; - auto& output_column = block->get_by_position(i); + for (auto cid : project_column_ids) { + auto loc = _schema_block_id_map[cid]; + auto& output_column = block->get_by_position(loc); auto type = output_column.type; auto column = output_column.column; auto virtual_it = _vir_cid_to_idx_in_block.find(cid); @@ -1999,7 +1999,7 @@ Status SegmentIterator::_vec_init_lazy_materialization() { } for (auto pair : _vir_cid_to_idx_in_block) { - _columns_to_filter.push_back(cast_set(pair.second)); + _columns_to_filter.push_back(_schema_block_id_map[pair.first]); } } } @@ -2738,8 +2738,9 @@ Status SegmentIterator::next_batch(Block* block) { // So a replacement of nothing column with real column is needed. const auto& idx_to_datatype = _opts.vir_col_idx_to_type; for (const auto& pair : _vir_cid_to_idx_in_block) { + auto idx = _schema_block_id_map[pair.first]; auto type = idx_to_datatype.find(pair.second)->second; - block->replace_by_position(pair.second, type->create_column()); + block->replace_by_position(idx, type->create_column()); } if (_opts.condition_cache_digest && !_find_condition_cache) { @@ -3450,7 +3451,8 @@ bool SegmentIterator::_can_opt_limit_reads() { // Before get next batch. make sure all virtual columns in block has type ColumnNothing. void SegmentIterator::_init_virtual_columns(Block* block) { for (const auto& pair : _vir_cid_to_idx_in_block) { - auto& col_with_type_and_name = block->get_by_position(pair.second); + auto idx = _schema_block_id_map[pair.first]; + auto& col_with_type_and_name = block->get_by_position(idx); col_with_type_and_name.column = ColumnNothing::create(0); col_with_type_and_name.type = _opts.vir_col_idx_to_type[pair.second]; } @@ -3461,7 +3463,8 @@ Status SegmentIterator::_materialization_of_virtual_column(Block* block) { // So materialize virtual column in advance to avoid errors. if (_selected_size == 0) { for (const auto& pair : _vir_cid_to_idx_in_block) { - auto& col_with_type_and_name = block->get_by_position(pair.second); + auto idx = _schema_block_id_map[pair.first]; + auto& col_with_type_and_name = block->get_by_position(idx); col_with_type_and_name.column = _opts.vir_col_idx_to_type[pair.second]->create_column(); col_with_type_and_name.type = _opts.vir_col_idx_to_type[pair.second]; } @@ -3497,10 +3500,11 @@ Status SegmentIterator::_materialization_of_virtual_column(Block* block) { if (materialize_on_project_block) { for (const auto& cid_and_expr : _virtual_column_exprs) { auto cid = cid_and_expr.first; + auto idx_in_block = _schema_block_id_map[cid]; auto materialized_pos = _vir_cid_to_idx_in_block.at(cid); const auto& column = project_block.get_by_position(materialized_pos).column; DORIS_CHECK(!check_and_get_column(column.get())); - block->replace_by_position(materialized_pos, column); + block->replace_by_position(idx_in_block, column); } } return Status::OK(); From f6dfc44495268fc2203e38ad59960fab2254793a Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Wed, 10 Jun 2026 17:35:07 +0800 Subject: [PATCH 10/22] [refactor](be) Tighten project schema invariants ### What problem does this PR solve? Issue Number: N/A Related PR: #64010 Problem Summary: Review feedback pointed out that the segment iterator should work from an explicit project schema, and that the project block helper should fill caller-owned storage instead of returning a selected block pointer. Follow-up coverage showed that direct RowsetReader and Segment::new_iterator callers do not pass the TabletReader-expanded project schema fields. The reader now keeps the SegmentIterator project_columns invariant, while BetaRowsetReader maps direct callers to return_columns and Segment::new_iterator maps direct callers to schema column IDs. The project block helper remains caller-owned and the ANN source column ordinal translation is documented. ### Release note None ### Check List (For Author) - Test: Unit Test - build-support/clang-format.sh - git diff --check - ./run-be-ut.sh --run --filter=TestDeltaWriterClusterKey.vec_sequence_col - Behavior changed: No - Does this need documentation: No --- be/src/storage/rowset/beta_rowset_reader.cpp | 3 ++ be/src/storage/segment/segment.cpp | 43 +++++++++++--------- be/src/storage/segment/segment_iterator.cpp | 23 +++++++---- be/src/storage/segment/segment_iterator.h | 2 +- 4 files changed, 43 insertions(+), 28 deletions(-) diff --git a/be/src/storage/rowset/beta_rowset_reader.cpp b/be/src/storage/rowset/beta_rowset_reader.cpp index afd6545f98842f..30c29d4852a871 100644 --- a/be/src/storage/rowset/beta_rowset_reader.cpp +++ b/be/src/storage/rowset/beta_rowset_reader.cpp @@ -100,6 +100,9 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context _read_options.stats = _stats; _read_options.push_down_agg_type_opt = _read_context->push_down_agg_type_opt; _read_options.common_expr_ctxs_push_down = _read_context->common_expr_ctxs_push_down; + DORIS_CHECK(_read_context->return_columns != nullptr); + // Direct RowsetReader users do not expand return_columns, so return_columns is already + // the project layout. TabletReader sets origin_return_columns before any expansion. _read_options.project_columns = _read_context->origin_return_columns != nullptr ? _read_context->origin_return_columns : _read_context->return_columns; diff --git a/be/src/storage/segment/segment.cpp b/be/src/storage/segment/segment.cpp index 680195f7c73ae7..9493ef9e627ec5 100644 --- a/be/src/storage/segment/segment.cpp +++ b/be/src/storage/segment/segment.cpp @@ -254,19 +254,23 @@ Status Segment::new_iterator(SchemaSPtr schema, const StorageReadOptions& read_o if (read_options.runtime_state != nullptr) { _be_exec_version = read_options.runtime_state->be_exec_version(); } - RETURN_IF_ERROR(_create_column_meta_once(read_options.stats)); + auto iterator_read_options = read_options; + if (iterator_read_options.project_columns == nullptr) { + iterator_read_options.project_columns = &schema->column_ids(); + } + RETURN_IF_ERROR(_create_column_meta_once(iterator_read_options.stats)); - read_options.stats->total_segment_number++; + iterator_read_options.stats->total_segment_number++; // trying to prune the current segment by segment-level zone map - for (const auto& entry : read_options.col_id_to_predicates) { + for (const auto& entry : iterator_read_options.col_id_to_predicates) { int32_t column_id = entry.first; // schema change if (_tablet_schema->num_columns() <= column_id) { continue; } - const TabletColumn& col = read_options.tablet_schema->column(column_id); + const TabletColumn& col = iterator_read_options.tablet_schema->column(column_id); std::shared_ptr reader; - Status st = get_column_reader(col, &reader, read_options.stats); + Status st = get_column_reader(col, &reader, iterator_read_options.stats); // not found in this segment, skip if (st.is()) { continue; @@ -277,41 +281,42 @@ Status Segment::new_iterator(SchemaSPtr schema, const StorageReadOptions& read_o if (!reader->has_zone_map()) { continue; } - if (read_options.col_id_to_predicates.contains(column_id) && + if (iterator_read_options.col_id_to_predicates.contains(column_id) && can_apply_predicate_safely(column_id, *schema, - read_options.target_cast_type_for_variants, read_options)) { + iterator_read_options.target_cast_type_for_variants, + iterator_read_options)) { bool matched = true; RETURN_IF_ERROR(reader->match_condition(entry.second.get(), &matched)); if (!matched) { // any condition not satisfied, return. *iter = std::make_unique(*schema); - read_options.stats->filtered_segment_number++; + iterator_read_options.stats->filtered_segment_number++; return Status::OK(); } } } { - SCOPED_RAW_TIMER(&read_options.stats->segment_load_index_timer_ns); - RETURN_IF_ERROR(load_index(read_options.stats)); + SCOPED_RAW_TIMER(&iterator_read_options.stats->segment_load_index_timer_ns); + RETURN_IF_ERROR(load_index(iterator_read_options.stats)); } - if (read_options.delete_condition_predicates->num_of_column_predicate() == 0 && - read_options.push_down_agg_type_opt != TPushAggOp::NONE && - read_options.push_down_agg_type_opt != TPushAggOp::COUNT_ON_INDEX) { + if (iterator_read_options.delete_condition_predicates->num_of_column_predicate() == 0 && + iterator_read_options.push_down_agg_type_opt != TPushAggOp::NONE && + iterator_read_options.push_down_agg_type_opt != TPushAggOp::COUNT_ON_INDEX) { iter->reset(new_vstatistics_iterator(this->shared_from_this(), *schema)); } else { *iter = std::make_unique(this->shared_from_this(), schema); } // TODO: Valid the opt not only in ReaderType::READER_QUERY - if (read_options.io_ctx.reader_type == ReaderType::READER_QUERY && - !read_options.column_predicates.empty()) { - auto pruned_predicates = read_options.column_predicates; + if (iterator_read_options.io_ctx.reader_type == ReaderType::READER_QUERY && + !iterator_read_options.column_predicates.empty()) { + auto pruned_predicates = iterator_read_options.column_predicates; auto pruned = false; for (auto& it : _column_reader_cache->get_available_readers(false)) { const auto uid = it.first; - const auto column_id = read_options.tablet_schema->field_index(uid); + const auto column_id = iterator_read_options.tablet_schema->field_index(uid); bool tmp_pruned = false; RETURN_IF_ERROR(it.second->prune_predicates_by_zone_map(pruned_predicates, column_id, &tmp_pruned)); @@ -319,7 +324,7 @@ Status Segment::new_iterator(SchemaSPtr schema, const StorageReadOptions& read_o } if (pruned) { - auto options_with_pruned_predicates = read_options; + auto options_with_pruned_predicates = iterator_read_options; options_with_pruned_predicates.column_predicates = pruned_predicates; //because column_predicates is changed, we need to rebuild col_id_to_predicates so that inverted index will not go through it. options_with_pruned_predicates.col_id_to_predicates.clear(); @@ -335,7 +340,7 @@ Status Segment::new_iterator(SchemaSPtr schema, const StorageReadOptions& read_o return iter->get()->init(options_with_pruned_predicates); } } - return iter->get()->init(read_options); + return iter->get()->init(iterator_read_options); } Status Segment::_write_error_file(size_t file_size, size_t offset, size_t bytes_read, char* data, diff --git a/be/src/storage/segment/segment_iterator.cpp b/be/src/storage/segment/segment_iterator.cpp index c04327916a694e..e05d6c9c304231 100644 --- a/be/src/storage/segment/segment_iterator.cpp +++ b/be/src/storage/segment/segment_iterator.cpp @@ -480,7 +480,8 @@ void SegmentIterator::_init_schema_block_id_map() { void SegmentIterator::_init_project_schema() { _init_schema_block_id_map(); - if (_opts.project_columns == nullptr || *_opts.project_columns == _schema->column_ids()) { + DORIS_CHECK(_opts.project_columns != nullptr); + if (*_opts.project_columns == _schema->column_ids()) { _project_schema = _schema; } else { _project_schema = @@ -488,11 +489,9 @@ void SegmentIterator::_init_project_schema() { } } -Block* SegmentIterator::_build_project_block(Block* block, Block* project_block) { +void SegmentIterator::_build_project_block(Block* block, Block* project_block) { DORIS_CHECK(_project_schema != nullptr); - if (_project_schema == _schema) { - return block; - } + DORIS_CHECK(_project_schema != _schema); project_block->clear(); const auto& project_column_ids = _project_schema->column_ids(); @@ -509,7 +508,6 @@ Block* SegmentIterator::_build_project_block(Block* block, Block* project_block) } project_block->insert({std::move(column), type, _schema->column(cid)->name()}); } - return project_block; } void SegmentIterator::_initialize_predicate_results() { @@ -923,6 +921,7 @@ Status SegmentIterator::_apply_ann_topn_predicate() { VLOG_DEBUG << fmt::format("Try apply ann topn: {}", _ann_topn_runtime->debug_string()); size_t src_col_idx = _ann_topn_runtime->get_src_column_idx(); + // AnnTopNRuntime keeps VSlotRef::column_id(), which is the project block ordinal. ColumnId src_cid = _project_schema->column_id(src_col_idx); IndexIterator* ann_index_iterator = _index_iterators[src_cid].get(); bool has_ann_index = _column_has_ann_index(src_cid); @@ -3088,7 +3087,11 @@ Status SegmentIterator::_execute_common_expr(uint16_t* sel_rowid_idx, uint16_t& SCOPED_RAW_TIMER(&_opts.stats->expr_filter_ns); DCHECK(!_common_expr_ctxs_push_down.empty()); Block project_block; - Block* expr_block = _build_project_block(block, &project_block); + Block* expr_block = block; + if (_project_schema != _schema) { + _build_project_block(block, &project_block); + expr_block = &project_block; + } std::vector common_ctxs; common_ctxs.reserve(_common_expr_ctxs_push_down.size()); for (auto& ctx : _common_expr_ctxs_push_down) { @@ -3475,8 +3478,12 @@ Status SegmentIterator::_materialization_of_virtual_column(Block* block) { } Block project_block; - Block* materialize_block = _build_project_block(block, &project_block); const bool materialize_on_project_block = _project_schema != _schema; + Block* materialize_block = block; + if (materialize_on_project_block) { + _build_project_block(block, &project_block); + materialize_block = &project_block; + } for (const auto& cid_and_expr : _virtual_column_exprs) { auto cid = cid_and_expr.first; auto column_expr = cid_and_expr.second; diff --git a/be/src/storage/segment/segment_iterator.h b/be/src/storage/segment/segment_iterator.h index cdca1015da5074..0378acb962ae7f 100644 --- a/be/src/storage/segment/segment_iterator.h +++ b/be/src/storage/segment/segment_iterator.h @@ -275,7 +275,7 @@ class SegmentIterator : public RowwiseIterator { void _init_schema_block_id_map(); void _init_project_schema(); - Block* _build_project_block(Block* block, Block* project_block); + void _build_project_block(Block* block, Block* project_block); [[nodiscard]] Status _extract_common_expr_columns(const VExprSPtr& expr); [[nodiscard]] Status _execute_common_expr(uint16_t* sel_rowid_idx, uint16_t& selected_size, Block* block); From 320259dac08eaf460d4a8e4e0f6a24c007df2396 Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Thu, 11 Jun 2026 14:24:25 +0800 Subject: [PATCH 11/22] update --- be/src/storage/segment/segment.cpp | 43 +++++++++------------ be/src/storage/segment/segment_iterator.cpp | 8 ++-- 2 files changed, 24 insertions(+), 27 deletions(-) diff --git a/be/src/storage/segment/segment.cpp b/be/src/storage/segment/segment.cpp index 9493ef9e627ec5..680195f7c73ae7 100644 --- a/be/src/storage/segment/segment.cpp +++ b/be/src/storage/segment/segment.cpp @@ -254,23 +254,19 @@ Status Segment::new_iterator(SchemaSPtr schema, const StorageReadOptions& read_o if (read_options.runtime_state != nullptr) { _be_exec_version = read_options.runtime_state->be_exec_version(); } - auto iterator_read_options = read_options; - if (iterator_read_options.project_columns == nullptr) { - iterator_read_options.project_columns = &schema->column_ids(); - } - RETURN_IF_ERROR(_create_column_meta_once(iterator_read_options.stats)); + RETURN_IF_ERROR(_create_column_meta_once(read_options.stats)); - iterator_read_options.stats->total_segment_number++; + read_options.stats->total_segment_number++; // trying to prune the current segment by segment-level zone map - for (const auto& entry : iterator_read_options.col_id_to_predicates) { + for (const auto& entry : read_options.col_id_to_predicates) { int32_t column_id = entry.first; // schema change if (_tablet_schema->num_columns() <= column_id) { continue; } - const TabletColumn& col = iterator_read_options.tablet_schema->column(column_id); + const TabletColumn& col = read_options.tablet_schema->column(column_id); std::shared_ptr reader; - Status st = get_column_reader(col, &reader, iterator_read_options.stats); + Status st = get_column_reader(col, &reader, read_options.stats); // not found in this segment, skip if (st.is()) { continue; @@ -281,42 +277,41 @@ Status Segment::new_iterator(SchemaSPtr schema, const StorageReadOptions& read_o if (!reader->has_zone_map()) { continue; } - if (iterator_read_options.col_id_to_predicates.contains(column_id) && + if (read_options.col_id_to_predicates.contains(column_id) && can_apply_predicate_safely(column_id, *schema, - iterator_read_options.target_cast_type_for_variants, - iterator_read_options)) { + read_options.target_cast_type_for_variants, read_options)) { bool matched = true; RETURN_IF_ERROR(reader->match_condition(entry.second.get(), &matched)); if (!matched) { // any condition not satisfied, return. *iter = std::make_unique(*schema); - iterator_read_options.stats->filtered_segment_number++; + read_options.stats->filtered_segment_number++; return Status::OK(); } } } { - SCOPED_RAW_TIMER(&iterator_read_options.stats->segment_load_index_timer_ns); - RETURN_IF_ERROR(load_index(iterator_read_options.stats)); + SCOPED_RAW_TIMER(&read_options.stats->segment_load_index_timer_ns); + RETURN_IF_ERROR(load_index(read_options.stats)); } - if (iterator_read_options.delete_condition_predicates->num_of_column_predicate() == 0 && - iterator_read_options.push_down_agg_type_opt != TPushAggOp::NONE && - iterator_read_options.push_down_agg_type_opt != TPushAggOp::COUNT_ON_INDEX) { + if (read_options.delete_condition_predicates->num_of_column_predicate() == 0 && + read_options.push_down_agg_type_opt != TPushAggOp::NONE && + read_options.push_down_agg_type_opt != TPushAggOp::COUNT_ON_INDEX) { iter->reset(new_vstatistics_iterator(this->shared_from_this(), *schema)); } else { *iter = std::make_unique(this->shared_from_this(), schema); } // TODO: Valid the opt not only in ReaderType::READER_QUERY - if (iterator_read_options.io_ctx.reader_type == ReaderType::READER_QUERY && - !iterator_read_options.column_predicates.empty()) { - auto pruned_predicates = iterator_read_options.column_predicates; + if (read_options.io_ctx.reader_type == ReaderType::READER_QUERY && + !read_options.column_predicates.empty()) { + auto pruned_predicates = read_options.column_predicates; auto pruned = false; for (auto& it : _column_reader_cache->get_available_readers(false)) { const auto uid = it.first; - const auto column_id = iterator_read_options.tablet_schema->field_index(uid); + const auto column_id = read_options.tablet_schema->field_index(uid); bool tmp_pruned = false; RETURN_IF_ERROR(it.second->prune_predicates_by_zone_map(pruned_predicates, column_id, &tmp_pruned)); @@ -324,7 +319,7 @@ Status Segment::new_iterator(SchemaSPtr schema, const StorageReadOptions& read_o } if (pruned) { - auto options_with_pruned_predicates = iterator_read_options; + auto options_with_pruned_predicates = read_options; options_with_pruned_predicates.column_predicates = pruned_predicates; //because column_predicates is changed, we need to rebuild col_id_to_predicates so that inverted index will not go through it. options_with_pruned_predicates.col_id_to_predicates.clear(); @@ -340,7 +335,7 @@ Status Segment::new_iterator(SchemaSPtr schema, const StorageReadOptions& read_o return iter->get()->init(options_with_pruned_predicates); } } - return iter->get()->init(iterator_read_options); + return iter->get()->init(read_options); } Status Segment::_write_error_file(size_t file_size, size_t offset, size_t bytes_read, char* data, diff --git a/be/src/storage/segment/segment_iterator.cpp b/be/src/storage/segment/segment_iterator.cpp index e05d6c9c304231..27d06f02aa5ca8 100644 --- a/be/src/storage/segment/segment_iterator.cpp +++ b/be/src/storage/segment/segment_iterator.cpp @@ -480,12 +480,14 @@ void SegmentIterator::_init_schema_block_id_map() { void SegmentIterator::_init_project_schema() { _init_schema_block_id_map(); - DORIS_CHECK(_opts.project_columns != nullptr); - if (*_opts.project_columns == _schema->column_ids()) { + // Direct Segment::new_iterator callers use the input schema as the project layout. + const auto& project_column_ids = + _opts.project_columns != nullptr ? *_opts.project_columns : _schema->column_ids(); + if (project_column_ids == _schema->column_ids()) { _project_schema = _schema; } else { _project_schema = - std::make_shared(_opts.tablet_schema->columns(), *_opts.project_columns); + std::make_shared(_opts.tablet_schema->columns(), project_column_ids); } } From 0038652b121a6155875982eb549a2fd84114e5c1 Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Thu, 11 Jun 2026 16:02:49 +0800 Subject: [PATCH 12/22] [improvement](query) Align olap scan schema with storage keys Issue Number: N/A Related PR: N/A Problem Summary: Avoid maintaining separate execution and storage schemas in the BE segment iterator for pushed-down storage expressions. The planner expands AGG-key and non-MOW unique-key olap scan tuples with missing storage key columns and marks the extra key slots in thrift. The scan node projects back to the original output tuple, while BE maps the marked slots to tablet column ids and, in direct reader mode, fills those storage-only key columns without reading them. Merge and aggregation reader paths still read real key columns. None - Test: Manual test - ./build.sh --fe - ./build.sh --be - git diff --check - build-support/clang-format.sh - build-support/run-clang-tidy.sh --build-dir be/build_Release --base 23e21f44f0080e25deff5dcdda2e61af3efc9480 (attempted; failed on existing file-level clang-tidy diagnostics before producing a clean changed-line gate) - Behavior changed: No - Does this need documentation: No --- be/src/exec/operator/olap_scan_operator.cpp | 5 + be/src/exec/operator/olap_scan_operator.h | 1 + be/src/exec/scan/olap_scanner.cpp | 7 ++ be/src/exec/scan/olap_scanner.h | 2 + be/src/storage/iterators.h | 2 + be/src/storage/rowset/beta_rowset_reader.cpp | 1 + be/src/storage/rowset/rowset_reader_context.h | 2 + be/src/storage/segment/segment_iterator.cpp | 41 ++++++-- be/src/storage/segment/segment_iterator.h | 3 + be/src/storage/tablet/tablet_reader.cpp | 1 + be/src/storage/tablet/tablet_reader.h | 1 + .../translator/PhysicalPlanTranslator.java | 99 ++++++++++++++++++- .../LogicalOlapScanToPhysicalOlapScan.java | 1 + .../plans/physical/PhysicalOlapScan.java | 70 +++++++++---- .../apache/doris/planner/OlapScanNode.java | 12 +++ gensrc/thrift/PlanNodes.thrift | 2 + 16 files changed, 217 insertions(+), 33 deletions(-) diff --git a/be/src/exec/operator/olap_scan_operator.cpp b/be/src/exec/operator/olap_scan_operator.cpp index 6e7946a082b249..8b1c521c216aa7 100644 --- a/be/src/exec/operator/olap_scan_operator.cpp +++ b/be/src/exec/operator/olap_scan_operator.cpp @@ -594,6 +594,11 @@ Status OlapScanLocalState::_init_scanners(std::list* scanners) { for (auto uid : p._olap_scan_node.output_column_unique_ids) { _output_column_ids.emplace(uid); } + if (p._olap_scan_node.__isset.filled_key_column_slot_ids) { + for (auto slot_id : p._olap_scan_node.filled_key_column_slot_ids) { + _filled_key_column_slot_ids.emplace(slot_id); + } + } // Step 3: convert accumulated scan key pairs into OlapScanRange objects. // Each OlapScanRange carries real begin/end OlapTuples with has_lower_bound = true. diff --git a/be/src/exec/operator/olap_scan_operator.h b/be/src/exec/operator/olap_scan_operator.h index d22aac75052ff2..f882ef0083a00e 100644 --- a/be/src/exec/operator/olap_scan_operator.h +++ b/be/src/exec/operator/olap_scan_operator.h @@ -136,6 +136,7 @@ class OlapScanLocalState final : public ScanLocalState { OlapScanKeys _scan_keys; // If column id in this set, indicate that we need to read data after index filtering std::set _output_column_ids; + std::set _filled_key_column_slot_ids; std::unique_ptr _segment_profile; std::unique_ptr _index_filter_profile; diff --git a/be/src/exec/scan/olap_scanner.cpp b/be/src/exec/scan/olap_scanner.cpp index 33f225864da7cf..5402b648ee4f48 100644 --- a/be/src/exec/scan/olap_scanner.cpp +++ b/be/src/exec/scan/olap_scanner.cpp @@ -89,6 +89,7 @@ OlapScanner::OlapScanner(ScanLocalStateBase* parent, OlapScanner::Params&& param .rs_splits {}, .return_columns {}, .output_columns {}, + .filled_columns {}, .common_expr_ctxs_push_down {}, .topn_filter_source_node_ids {}, .key_group_cluster_key_idxes {}, @@ -393,6 +394,7 @@ Status OlapScanner::_init_tablet_reader_params( if (_tablet_reader_params.direct_mode) { _tablet_reader_params.return_columns = _return_columns; + _tablet_reader_params.filled_columns = _filled_columns; } else { // we need to fetch all key columns to do the right aggregation on storage engine side. for (size_t i = 0; i < tablet_schema->num_key_columns(); ++i) { @@ -609,6 +611,11 @@ Status OlapScanner::_init_return_columns() { } const auto& column = tablet_schema->column(index); + auto* olap_local_state = static_cast(_local_state); + if (olap_local_state->_filled_key_column_slot_ids.contains(slot->id())) { + DORIS_CHECK(column.is_key()); + _filled_columns.insert(index); + } int32_t unique_id = column.unique_id() >= 0 ? column.unique_id() : column.parent_unique_id(); if (!slot->all_access_paths().empty()) { diff --git a/be/src/exec/scan/olap_scanner.h b/be/src/exec/scan/olap_scanner.h index e7c9598b864573..f1e8b5c719b08a 100644 --- a/be/src/exec/scan/olap_scanner.h +++ b/be/src/exec/scan/olap_scanner.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -110,6 +111,7 @@ class OlapScanner : public Scanner { public: std::vector _return_columns; + std::set _filled_columns; std::unordered_set _tablet_columns_convert_to_null_set; diff --git a/be/src/storage/iterators.h b/be/src/storage/iterators.h index 2292a898edd0f8..8afc73eecd56c6 100644 --- a/be/src/storage/iterators.h +++ b/be/src/storage/iterators.h @@ -19,6 +19,7 @@ #include #include +#include #include "common/status.h" #include "core/block/block.h" @@ -131,6 +132,7 @@ class StorageReadOptions { // Final scan project columns before storage-side read schema expansion. const std::vector* project_columns = nullptr; const std::set* output_columns = nullptr; + std::set filled_columns; // runtime state RuntimeState* runtime_state = nullptr; RowsetId rowset_id; diff --git a/be/src/storage/rowset/beta_rowset_reader.cpp b/be/src/storage/rowset/beta_rowset_reader.cpp index 30c29d4852a871..3b1f3e4a37b208 100644 --- a/be/src/storage/rowset/beta_rowset_reader.cpp +++ b/be/src/storage/rowset/beta_rowset_reader.cpp @@ -157,6 +157,7 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context // create segment iterators VLOG_NOTICE << "read columns size: " << read_columns.size(); _input_schema = std::make_shared(_read_context->tablet_schema->columns(), read_columns); + _read_options.filled_columns = _read_context->filled_columns; // output_schema only contains return_columns (excludes extra columns like delete-predicate columns). // It is used by merge/union iterators to determine how many columns to copy to the output block. _output_schema = std::make_shared(_read_context->tablet_schema->columns(), diff --git a/be/src/storage/rowset/rowset_reader_context.h b/be/src/storage/rowset/rowset_reader_context.h index d82488424a64c8..6fcbef4d39b94f 100644 --- a/be/src/storage/rowset/rowset_reader_context.h +++ b/be/src/storage/rowset/rowset_reader_context.h @@ -18,6 +18,7 @@ #ifndef DORIS_BE_SRC_OLAP_ROWSET_ROWSET_READER_CONTEXT_H #define DORIS_BE_SRC_OLAP_ROWSET_ROWSET_READER_CONTEXT_H +#include #include #include "exprs/score_runtime.h" @@ -89,6 +90,7 @@ struct RowsetReaderContext { RowIdConversion* rowid_conversion = nullptr; bool is_key_column_group = false; const std::set* output_columns = nullptr; + std::set filled_columns; RowsetId rowset_id; // slots that cast may be eliminated in storage layer std::map target_cast_type_for_variants; diff --git a/be/src/storage/segment/segment_iterator.cpp b/be/src/storage/segment/segment_iterator.cpp index 27d06f02aa5ca8..c3436520d1a5ac 100644 --- a/be/src/storage/segment/segment_iterator.cpp +++ b/be/src/storage/segment/segment_iterator.cpp @@ -2111,15 +2111,8 @@ bool SegmentIterator::_can_evaluated_by_vectorized(std::shared_ptris_nullable()) { auto nullable_col_ptr = reinterpret_cast(column.get()); nullable_col_ptr->get_null_map_column().insert_many_defaults(num_of_defaults); nullable_col_ptr->get_nested_column_ptr()->insert_many_defaults(num_of_defaults); @@ -2127,6 +2120,36 @@ bool SegmentIterator::_prune_column(ColumnId cid, MutableColumnPtr& column, bool // assert(column->is_const()); column->insert_many_defaults(num_of_defaults); } +} + +bool SegmentIterator::_fill_filled_column(ColumnId cid, MutableColumnPtr& column, + bool fill_defaults, size_t num_of_defaults) { + if (!_opts.filled_columns.contains(cid)) { + return false; + } + DORIS_CHECK(!_vir_cid_to_idx_in_block.contains(cid)); + DORIS_CHECK(!_has_delete_predicate(cid)); + DORIS_CHECK(cid < _is_pred_column.size()); + DORIS_CHECK(!_is_pred_column[cid]); + DORIS_CHECK(cid < _is_common_expr_column.size()); + DORIS_CHECK(!_is_common_expr_column[cid]); + if (fill_defaults) { + _fill_default_column(column, num_of_defaults); + } + return true; +} + +bool SegmentIterator::_prune_column(ColumnId cid, MutableColumnPtr& column, bool fill_defaults, + size_t num_of_defaults) { + if (_fill_filled_column(cid, column, fill_defaults, num_of_defaults)) { + return true; + } + if (_need_read_data(cid)) { + return false; + } + if (fill_defaults) { + _fill_default_column(column, num_of_defaults); + } return true; } diff --git a/be/src/storage/segment/segment_iterator.h b/be/src/storage/segment/segment_iterator.h index 0378acb962ae7f..7f972ed4ce3995 100644 --- a/be/src/storage/segment/segment_iterator.h +++ b/be/src/storage/segment/segment_iterator.h @@ -296,6 +296,9 @@ class SegmentIterator : public RowwiseIterator { uint16_t* sel_rowid_idx, uint16_t select_size); bool _need_read_data(ColumnId cid); + void _fill_default_column(MutableColumnPtr& column, size_t num_of_defaults); + bool _fill_filled_column(ColumnId cid, MutableColumnPtr& column, bool fill_defaults, + size_t num_of_defaults); bool _prune_column(ColumnId cid, MutableColumnPtr& column, bool fill_defaults, size_t num_of_defaults); diff --git a/be/src/storage/tablet/tablet_reader.cpp b/be/src/storage/tablet/tablet_reader.cpp index 5a7b9a58bb9420..5b90967f3c3812 100644 --- a/be/src/storage/tablet/tablet_reader.cpp +++ b/be/src/storage/tablet/tablet_reader.cpp @@ -184,6 +184,7 @@ Status TabletReader::_capture_rs_readers(const ReaderParams& read_params) { _reader_context.is_key_column_group = read_params.is_key_column_group; _reader_context.common_expr_ctxs_push_down = read_params.common_expr_ctxs_push_down; _reader_context.output_columns = &read_params.output_columns; + _reader_context.filled_columns = read_params.filled_columns; _reader_context.push_down_agg_type_opt = read_params.push_down_agg_type_opt; _reader_context.ttl_seconds = _tablet->ttl_seconds(); _reader_context.score_runtime = read_params.score_runtime; diff --git a/be/src/storage/tablet/tablet_reader.h b/be/src/storage/tablet/tablet_reader.h index 911bdf4fe50198..6d8bd5e0d3962f 100644 --- a/be/src/storage/tablet/tablet_reader.h +++ b/be/src/storage/tablet/tablet_reader.h @@ -158,6 +158,7 @@ class TabletReader { std::vector return_columns; // output_columns only contain columns in OrderByExprs and outputExprs std::set output_columns; + std::set filled_columns; RuntimeProfile* profile = nullptr; RuntimeState* runtime_state = nullptr; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index a4212918c38e9f..aa31a4c171036b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -36,6 +36,7 @@ import org.apache.doris.catalog.AliasFunction; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.KeysType; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.TableIf; import org.apache.doris.common.Config; @@ -868,11 +869,15 @@ public PlanFragment visitPhysicalOlapScan(PhysicalOlapScan olapScan, PlanTransla } private PlanFragment computePhysicalOlapScan(PhysicalOlapScan olapScan, PlanTranslatorContext context) { - List slots = olapScan.getOutput(); + List outputSlots = olapScan.getOutput(); + StorageAlignedScanSlots storageAlignedScanSlots = computeStorageAlignedScanSlots(olapScan); + List slots = storageAlignedScanSlots.scanSlots; OlapTable olapTable = olapScan.getTable(); // generate real output tuple TupleDescriptor tupleDescriptor = generateTupleDesc(slots, olapTable, context); List slotDescriptors = tupleDescriptor.getSlots(); + Map exprIdToSlotDescriptor = slotDescriptors.stream() + .collect(Collectors.toMap(s -> context.findExprId(s.getId()), s -> s)); // put virtual column expr into slot desc Map slotToVirtualColumnMap = olapScan.getSlotToVirtualColumnMap(); @@ -892,6 +897,12 @@ private PlanFragment computePhysicalOlapScan(PhysicalOlapScan olapScan, PlanTran OlapScanNode olapScanNode = new OlapScanNode(context.nextPlanNodeId(), tupleDescriptor, "OlapScanNode", context.getScanContext()); + Set filledKeyColumnSlotIds = storageAlignedScanSlots.filledKeyExprIds.stream() + .map(exprId -> Objects.requireNonNull(exprIdToSlotDescriptor.get(exprId), + "missing filled key slot descriptor for " + exprId)) + .map(slotDescriptor -> slotDescriptor.getId().asInt()) + .collect(Collectors.toSet()); + olapScanNode.setFilledKeyColumnSlotIds(filledKeyColumnSlotIds); olapScanNode.setNereidsId(olapScan.getId()); context.getNereidsIdToPlanNodeIdMap().put(olapScan.getId(), olapScanNode.getId()); @@ -925,15 +936,18 @@ private PlanFragment computePhysicalOlapScan(PhysicalOlapScan olapScan, PlanTran // because it is whole table cardinality and will break block rules. // olapScanNode.setCardinality((long) olapScan.getStats().getRowCount()); if (context.getSessionVariable() != null && context.getSessionVariable().forbidUnknownColStats) { - for (int i = 0; i < slots.size(); i++) { - SlotReference slot = (SlotReference) slots.get(i); + for (Slot outputSlot : outputSlots) { + SlotReference slot = (SlotReference) outputSlot; boolean inVisibleCol = slot.getOriginalColumn().isPresent() && StatisticConstants.shouldIgnoreCol(olapTable, slot.getOriginalColumn().get()); if (olapScan.getStats().findColumnStatistics(slot).isUnKnown() && !isComplexDataType(slot.getDataType()) && !StatisticConstants.isSystemTable(olapTable) && !inVisibleCol) { - context.addUnknownStatsColumn(olapScanNode, slotDescriptors.get(i).getId()); + SlotDescriptor slotDescriptor = Objects.requireNonNull( + exprIdToSlotDescriptor.get(slot.getExprId()), + "missing output slot descriptor for " + slot.getExprId()); + context.addUnknownStatsColumn(olapScanNode, slotDescriptor.getId()); } } } @@ -964,6 +978,14 @@ private PlanFragment computePhysicalOlapScan(PhysicalOlapScan olapScan, PlanTran context.addScanNode(olapScanNode, olapScan); translateRuntimeFilter(olapScan, olapScanNode, context); + if (!storageAlignedScanSlots.filledKeyExprIds.isEmpty()) { + List projectionExprs = outputSlots.stream() + .map(slot -> context.findSlotRef(slot.getExprId())) + .collect(Collectors.toList()); + TupleDescriptor projectionTuple = generateTupleDesc(outputSlots, olapTable, context); + olapScanNode.setProjectList(projectionExprs); + olapScanNode.setOutputTupleDesc(projectionTuple); + } olapScanNode.setPushDownAggNoGrouping(context.getRelationPushAggOp(olapScan.getRelationId())); // Create PlanFragment @@ -985,6 +1007,70 @@ private PlanFragment computePhysicalOlapScan(PhysicalOlapScan olapScan, PlanTran return planFragment; } + private StorageAlignedScanSlots computeStorageAlignedScanSlots(PhysicalOlapScan olapScan) { + if (!shouldAlignScanSlotsToStorageSchema(olapScan)) { + return new StorageAlignedScanSlots(olapScan.getOutput(), Collections.emptySet()); + } + + Set outputExprIds = olapScan.getOutput().stream() + .map(Slot::getExprId) + .collect(Collectors.toSet()); + Map slotByColumnUniqueId = Stream.concat( + olapScan.getSelectedIndexOutputs().stream(), olapScan.getOutput().stream()) + .filter(slot -> ((SlotReference) slot).getOriginalColumn().isPresent()) + .collect(Collectors.toMap( + slot -> ((SlotReference) slot).getOriginalColumn().get().getUniqueId(), + slot -> slot, + (left, right) -> right)); + + List storageSlots = new ArrayList<>(); + Set storageExprIds = new HashSet<>(); + Set filledKeyExprIds = new HashSet<>(); + long selectedIndexId = olapScan.getSelectedIndexId() == -1 + ? olapScan.getTable().getBaseIndexId() + : olapScan.getSelectedIndexId(); + for (Column column : olapScan.getTable().getSchemaByIndexId(selectedIndexId, true)) { + if (!column.isKey()) { + break; + } + Slot slot = Objects.requireNonNull(slotByColumnUniqueId.get(column.getUniqueId()), + "missing scan slot for storage key column " + column.getName()); + if (storageExprIds.add(slot.getExprId())) { + storageSlots.add(slot); + } + if (!outputExprIds.contains(slot.getExprId())) { + filledKeyExprIds.add(slot.getExprId()); + } + } + for (Slot slot : olapScan.getOutput()) { + if (storageExprIds.add(slot.getExprId())) { + storageSlots.add(slot); + } + } + if (filledKeyExprIds.isEmpty()) { + return new StorageAlignedScanSlots(olapScan.getOutput(), Collections.emptySet()); + } + return new StorageAlignedScanSlots(storageSlots, filledKeyExprIds); + } + + private boolean shouldAlignScanSlotsToStorageSchema(PhysicalOlapScan olapScan) { + KeysType keysType = olapScan.getSelectedIndexId() == -1 + ? olapScan.getTable().getKeysType() + : olapScan.getTable().getIndexMetaByIndexId(olapScan.getSelectedIndexId()).getKeysType(); + return keysType == KeysType.AGG_KEYS + || (keysType == KeysType.UNIQUE_KEYS && !olapScan.getTable().getEnableUniqueKeyMergeOnWrite()); + } + + private static class StorageAlignedScanSlots { + private final List scanSlots; + private final Set filledKeyExprIds; + + StorageAlignedScanSlots(List scanSlots, Set filledKeyExprIds) { + this.scanSlots = scanSlots; + this.filledKeyExprIds = filledKeyExprIds; + } + } + private void translateRuntimeFilter(PhysicalRelation physicalRelation, ScanNode scanNode, PlanTranslatorContext context) { if (context.getRuntimeTranslator().isPresent()) { @@ -2912,6 +2998,11 @@ private void updateScanSlotsMaterialization(ScanNode scanNode, Set requiredSlotIdSet, Set requiredByProjectSlotIdSet, PlanTranslatorContext context) { Set requiredWithVirtualColumns = Sets.newHashSet(requiredSlotIdSet); + if (scanNode instanceof OlapScanNode) { + ((OlapScanNode) scanNode).getFilledKeyColumnSlotIds().stream() + .map(SlotId::new) + .forEach(requiredWithVirtualColumns::add); + } for (SlotDescriptor virtualSlot : scanNode.getTupleDesc().getSlots()) { Expr virtualColumn = virtualSlot.getVirtualColumn(); if (virtualColumn == null) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/LogicalOlapScanToPhysicalOlapScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/LogicalOlapScanToPhysicalOlapScan.java index 48ff1674709245..3d13e1bfa95587 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/LogicalOlapScanToPhysicalOlapScan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/LogicalOlapScanToPhysicalOlapScan.java @@ -58,6 +58,7 @@ public Rule build() { convertDistribution(olapScan), olapScan.getPreAggStatus(), olapScan.getOutputByIndex(olapScan.getTable().getBaseIndexId()), + olapScan.getOutputByIndex(olapScan.getSelectedIndexId()), Optional.empty(), olapScan.getLogicalProperties(), null, diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalOlapScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalOlapScan.java index 9205f008cf4b1d..94661d638d81c7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalOlapScan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalOlapScan.java @@ -66,6 +66,7 @@ public class PhysicalOlapScan extends PhysicalCatalogRelation implements OlapSca private final boolean hasPartitionPredicate; private final PreAggStatus preAggStatus; private final List baseOutputs; + private final List selectedIndexOutputs; private final Optional tableSample; private final ImmutableList operativeSlots; @@ -100,7 +101,7 @@ public PhysicalOlapScan(RelationId id, OlapTable olapTable, List qualifi List annOrderKeys, Optional annLimit) { this(id, olapTable, qualifier, selectedIndexId, selectedTabletIds, selectedPartitionIds, false, distributionSpec, - preAggStatus, baseOutputs, + preAggStatus, baseOutputs, baseOutputs, groupExpression, logicalProperties, null, null, tableSample, operativeSlots, virtualColumns, scoreOrderKeys, scoreLimit, scoreRangeInfo, annOrderKeys, annLimit, ""); @@ -119,7 +120,7 @@ public PhysicalOlapScan(RelationId id, OlapTable olapTable, List qualifi List scoreOrderKeys, Optional scoreLimit, Optional scoreRangeInfo, List annOrderKeys, Optional annLimit) { this(id, olapTable, qualifier, selectedIndexId, selectedTabletIds, selectedPartitionIds, false, - distributionSpec, preAggStatus, baseOutputs, groupExpression, logicalProperties, + distributionSpec, preAggStatus, baseOutputs, baseOutputs, groupExpression, logicalProperties, physicalProperties, statistics, tableSample, operativeSlots, virtualColumns, scoreOrderKeys, scoreLimit, scoreRangeInfo, annOrderKeys, annLimit, ""); } @@ -137,7 +138,7 @@ public PhysicalOlapScan(RelationId id, OlapTable olapTable, List qualifi List scoreOrderKeys, Optional scoreLimit, Optional scoreRangeInfo, List annOrderKeys, Optional annLimit, String tableAlias) { this(id, olapTable, qualifier, selectedIndexId, selectedTabletIds, selectedPartitionIds, false, - distributionSpec, preAggStatus, baseOutputs, groupExpression, logicalProperties, + distributionSpec, preAggStatus, baseOutputs, baseOutputs, groupExpression, logicalProperties, physicalProperties, statistics, tableSample, operativeSlots, virtualColumns, scoreOrderKeys, scoreLimit, scoreRangeInfo, annOrderKeys, annLimit, tableAlias); } @@ -155,20 +156,39 @@ public PhysicalOlapScan(RelationId id, OlapTable olapTable, List qualifi List scoreOrderKeys, Optional scoreLimit, Optional scoreRangeInfo, List annOrderKeys, Optional annLimit, String tableAlias) { this(id, olapTable, qualifier, selectedIndexId, selectedTabletIds, selectedPartitionIds, - hasPartitionPredicate, distributionSpec, preAggStatus, baseOutputs, groupExpression, + hasPartitionPredicate, distributionSpec, preAggStatus, baseOutputs, baseOutputs, groupExpression, logicalProperties, physicalProperties, statistics, tableSample, operativeSlots, virtualColumns, scoreOrderKeys, scoreLimit, scoreRangeInfo, annOrderKeys, annLimit, tableAlias, Optional.empty()); } + /** + * Constructor for PhysicalOlapScan. + */ + public PhysicalOlapScan(RelationId id, OlapTable olapTable, List qualifier, long selectedIndexId, + List selectedTabletIds, List selectedPartitionIds, boolean hasPartitionPredicate, + DistributionSpec distributionSpec, PreAggStatus preAggStatus, List baseOutputs, + List selectedIndexOutputs, Optional groupExpression, + LogicalProperties logicalProperties, PhysicalProperties physicalProperties, Statistics statistics, + Optional tableSample, Collection operativeSlots, + List virtualColumns, List scoreOrderKeys, Optional scoreLimit, + Optional scoreRangeInfo, List annOrderKeys, Optional annLimit, + String tableAlias) { + this(id, olapTable, qualifier, selectedIndexId, selectedTabletIds, selectedPartitionIds, + hasPartitionPredicate, distributionSpec, preAggStatus, baseOutputs, selectedIndexOutputs, + groupExpression, logicalProperties, physicalProperties, statistics, tableSample, operativeSlots, + virtualColumns, scoreOrderKeys, scoreLimit, scoreRangeInfo, annOrderKeys, annLimit, tableAlias, + Optional.empty()); + } + /** * Ultimate constructor for PhysicalOlapScan. */ public PhysicalOlapScan(RelationId id, OlapTable olapTable, List qualifier, long selectedIndexId, List selectedTabletIds, List selectedPartitionIds, boolean hasPartitionPredicate, DistributionSpec distributionSpec, PreAggStatus preAggStatus, List baseOutputs, - Optional groupExpression, LogicalProperties logicalProperties, - PhysicalProperties physicalProperties, Statistics statistics, + List selectedIndexOutputs, Optional groupExpression, + LogicalProperties logicalProperties, PhysicalProperties physicalProperties, Statistics statistics, Optional tableSample, Collection operativeSlots, List virtualColumns, List scoreOrderKeys, Optional scoreLimit, Optional scoreRangeInfo, @@ -183,6 +203,7 @@ public PhysicalOlapScan(RelationId id, OlapTable olapTable, List qualifi this.distributionSpec = distributionSpec; this.preAggStatus = preAggStatus; this.baseOutputs = ImmutableList.copyOf(baseOutputs); + this.selectedIndexOutputs = ImmutableList.copyOf(selectedIndexOutputs); this.tableSample = tableSample; this.operativeSlots = ImmutableList.copyOf(operativeSlots); this.virtualColumns = ImmutableList.copyOf(virtualColumns); @@ -228,9 +249,10 @@ public PhysicalOlapScan withPartitionPrunablePredicates( Optional partitionPrunablePredicates) { return AbstractPlan.copyWithSameId(this, () -> new PhysicalOlapScan(relationId, getTable(), qualifier, selectedIndexId, selectedTabletIds, selectedPartitionIds, hasPartitionPredicate, - distributionSpec, preAggStatus, baseOutputs, groupExpression, getLogicalProperties(), - getPhysicalProperties(), statistics, tableSample, operativeSlots, virtualColumns, scoreOrderKeys, - scoreLimit, scoreRangeInfo, annOrderKeys, annLimit, tableAlias, partitionPrunablePredicates)); + distributionSpec, preAggStatus, baseOutputs, selectedIndexOutputs, groupExpression, + getLogicalProperties(), getPhysicalProperties(), statistics, tableSample, operativeSlots, + virtualColumns, scoreOrderKeys, scoreLimit, scoreRangeInfo, annOrderKeys, annLimit, tableAlias, + partitionPrunablePredicates)); } @Override @@ -256,6 +278,10 @@ public List getBaseOutputs() { return baseOutputs; } + public List getSelectedIndexOutputs() { + return selectedIndexOutputs; + } + public List getVirtualColumns() { return virtualColumns; } @@ -351,6 +377,7 @@ public boolean equals(Object o) { && Objects.equals(selectedPartitionIds, olapScan.selectedPartitionIds) && Objects.equals(preAggStatus, olapScan.preAggStatus) && Objects.equals(baseOutputs, olapScan.baseOutputs) + && Objects.equals(selectedIndexOutputs, olapScan.selectedIndexOutputs) && Objects.equals(tableSample, olapScan.tableSample) && Objects.equals(operativeSlots, olapScan.operativeSlots) && Objects.equals(virtualColumns, olapScan.virtualColumns) @@ -376,8 +403,9 @@ public R accept(PlanVisitor visitor, C context) { public PhysicalOlapScan withGroupExpression(Optional groupExpression) { return AbstractPlan.copyWithSameId(this, () -> new PhysicalOlapScan(relationId, getTable(), qualifier, selectedIndexId, selectedTabletIds, selectedPartitionIds, hasPartitionPredicate, - distributionSpec, preAggStatus, baseOutputs, groupExpression, getLogicalProperties(), null, null, - tableSample, operativeSlots, virtualColumns, scoreOrderKeys, scoreLimit, scoreRangeInfo, + distributionSpec, preAggStatus, baseOutputs, selectedIndexOutputs, groupExpression, + getLogicalProperties(), null, null, tableSample, operativeSlots, virtualColumns, scoreOrderKeys, + scoreLimit, scoreRangeInfo, annOrderKeys, annLimit, tableAlias, partitionPrunablePredicates)); } @@ -386,8 +414,9 @@ public Plan withGroupExprLogicalPropChildren(Optional groupExpr Optional logicalProperties, List children) { return AbstractPlan.copyWithSameId(this, () -> new PhysicalOlapScan(relationId, getTable(), qualifier, selectedIndexId, selectedTabletIds, selectedPartitionIds, hasPartitionPredicate, - distributionSpec, preAggStatus, baseOutputs, groupExpression, logicalProperties.get(), null, null, - tableSample, operativeSlots, virtualColumns, scoreOrderKeys, scoreLimit, scoreRangeInfo, + distributionSpec, preAggStatus, baseOutputs, selectedIndexOutputs, groupExpression, + logicalProperties.get(), null, null, tableSample, operativeSlots, virtualColumns, scoreOrderKeys, + scoreLimit, scoreRangeInfo, annOrderKeys, annLimit, tableAlias, partitionPrunablePredicates)); } @@ -396,9 +425,10 @@ public PhysicalOlapScan withPhysicalPropertiesAndStats( PhysicalProperties physicalProperties, Statistics statistics) { return AbstractPlan.copyWithSameId(this, () -> new PhysicalOlapScan(relationId, getTable(), qualifier, selectedIndexId, selectedTabletIds, selectedPartitionIds, hasPartitionPredicate, - distributionSpec, preAggStatus, baseOutputs, groupExpression, getLogicalProperties(), - physicalProperties, statistics, tableSample, operativeSlots, virtualColumns, scoreOrderKeys, - scoreLimit, scoreRangeInfo, annOrderKeys, annLimit, tableAlias, partitionPrunablePredicates)); + distributionSpec, preAggStatus, baseOutputs, selectedIndexOutputs, groupExpression, + getLogicalProperties(), physicalProperties, statistics, tableSample, operativeSlots, virtualColumns, + scoreOrderKeys, scoreLimit, scoreRangeInfo, annOrderKeys, annLimit, tableAlias, + partitionPrunablePredicates)); } @Override @@ -423,10 +453,10 @@ public Optional getTableSample() { public CatalogRelation withOperativeSlots(Collection operativeSlots) { return AbstractPlan.copyWithSameId(this, () -> new PhysicalOlapScan(relationId, (OlapTable) table, qualifier, selectedIndexId, selectedTabletIds, selectedPartitionIds, hasPartitionPredicate, - distributionSpec, preAggStatus, baseOutputs, groupExpression, getLogicalProperties(), - getPhysicalProperties(), statistics, tableSample, operativeSlots, virtualColumns, scoreOrderKeys, - scoreLimit, - scoreRangeInfo, annOrderKeys, annLimit, tableAlias, partitionPrunablePredicates)); + distributionSpec, preAggStatus, baseOutputs, selectedIndexOutputs, groupExpression, + getLogicalProperties(), getPhysicalProperties(), statistics, tableSample, operativeSlots, + virtualColumns, scoreOrderKeys, scoreLimit, scoreRangeInfo, annOrderKeys, annLimit, tableAlias, + partitionPrunablePredicates)); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java index 11f8a24cca1ec9..9b99f08b8ab74e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java @@ -178,6 +178,7 @@ public class OlapScanNode extends ScanNode { private SortInfo sortInfo = null; private Set outputColumnUniqueIds = new HashSet<>(); + private Set filledKeyColumnSlotIds = new HashSet<>(); // When scan match sort_info, we can push limit into OlapScanNode. // It's limit for scanner instead of scanNode so we add a new limit. @@ -256,6 +257,14 @@ public void setTableSample(TableSample tSample) { this.tableSample = tSample; } + public void setFilledKeyColumnSlotIds(Set filledKeyColumnSlotIds) { + this.filledKeyColumnSlotIds = filledKeyColumnSlotIds; + } + + public Set getFilledKeyColumnSlotIds() { + return filledKeyColumnSlotIds; + } + public void setNereidsPrunedTabletIds(Set nereidsPrunedTabletIds) { this.nereidsPrunedTabletIds = nereidsPrunedTabletIds; } @@ -1283,6 +1292,9 @@ protected void toThrift(TPlanNode msg) { if (outputColumnUniqueIds != null) { msg.olap_scan_node.setOutputColumnUniqueIds(outputColumnUniqueIds); } + if (filledKeyColumnSlotIds != null) { + msg.olap_scan_node.setFilledKeyColumnSlotIds(filledKeyColumnSlotIds); + } msg.olap_scan_node.setDistributeColumnIds(new ArrayList<>(distributionColumnIds)); diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift index c7954c423af73c..c0248be53412e1 100644 --- a/gensrc/thrift/PlanNodes.thrift +++ b/gensrc/thrift/PlanNodes.thrift @@ -996,6 +996,8 @@ struct TOlapScanNode { // Only partitions that are candidates for pruning are included; partitions FE // does not want pruned (e.g. default catch-all) are omitted from this list. 27: optional list partition_boundaries + // Slot ids of storage key columns filled by FE only to align the scan tuple with storage schema. + 28: optional set filled_key_column_slot_ids } struct TEqJoinCondition { From 88c36158b59506fe7d72dd19de68a900d7b71d2b Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Thu, 11 Jun 2026 17:19:10 +0800 Subject: [PATCH 13/22] [refactor](be) Remove segment project schema state ### What problem does this PR solve? Issue Number: None Related PR: #64413 Problem Summary: SegmentIterator no longer needs to maintain a second Schema object for the expression slot layout after the FE-side scan schema is expanded to match storage keys. This change removes the project schema state and replaces it with a lightweight expression ordinal to storage column id mapping. The mapping still preserves the old FE compatibility path when TabletReader expands return columns before segment iteration, while the common aligned case is normalized to the storage schema pointer so per-batch checks stay O(1). ### Release note None ### Check List (For Author) - Test: Manual test - git diff --check - ./build.sh --be - build-support/run-clang-tidy.sh (fails on existing diagnostics in touched Doris headers/functions, including jni-util.h static_assert diagnostics and pre-existing complexity/modernize warnings) - Behavior changed: No - Does this need documentation: No --- be/src/storage/iterators.h | 4 +- be/src/storage/rowset/beta_rowset_reader.cpp | 6 +- be/src/storage/segment/segment_iterator.cpp | 90 +++++++++++--------- be/src/storage/segment/segment_iterator.h | 11 ++- 4 files changed, 61 insertions(+), 50 deletions(-) diff --git a/be/src/storage/iterators.h b/be/src/storage/iterators.h index 8afc73eecd56c6..ef8c90f03237fd 100644 --- a/be/src/storage/iterators.h +++ b/be/src/storage/iterators.h @@ -129,8 +129,8 @@ class StorageReadOptions { std::vector* read_orderby_key_columns = nullptr; io::IOContext io_ctx; VExprContextSPtrs common_expr_ctxs_push_down; - // Final scan project columns before storage-side read schema expansion. - const std::vector* project_columns = nullptr; + // Column ids in the slot ordinal layout used by storage-side expressions. + const std::vector* expr_column_ids = nullptr; const std::set* output_columns = nullptr; std::set filled_columns; // runtime state diff --git a/be/src/storage/rowset/beta_rowset_reader.cpp b/be/src/storage/rowset/beta_rowset_reader.cpp index 3b1f3e4a37b208..5eb8627c05933e 100644 --- a/be/src/storage/rowset/beta_rowset_reader.cpp +++ b/be/src/storage/rowset/beta_rowset_reader.cpp @@ -101,9 +101,9 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context _read_options.push_down_agg_type_opt = _read_context->push_down_agg_type_opt; _read_options.common_expr_ctxs_push_down = _read_context->common_expr_ctxs_push_down; DORIS_CHECK(_read_context->return_columns != nullptr); - // Direct RowsetReader users do not expand return_columns, so return_columns is already - // the project layout. TabletReader sets origin_return_columns before any expansion. - _read_options.project_columns = _read_context->origin_return_columns != nullptr + // Direct RowsetReader users do not expand return_columns, so return_columns already matches + // expression slot ordinals. TabletReader sets origin_return_columns before any expansion. + _read_options.expr_column_ids = _read_context->origin_return_columns != nullptr ? _read_context->origin_return_columns : _read_context->return_columns; _read_options.virtual_column_exprs = _read_context->virtual_column_exprs; diff --git a/be/src/storage/segment/segment_iterator.cpp b/be/src/storage/segment/segment_iterator.cpp index c3436520d1a5ac..6feecb4be4da5d 100644 --- a/be/src/storage/segment/segment_iterator.cpp +++ b/be/src/storage/segment/segment_iterator.cpp @@ -412,7 +412,7 @@ Status SegmentIterator::_init_impl(const StorageReadOptions& opts) { _vir_cid_to_idx_in_block = _opts.vir_cid_to_idx_in_block; _score_runtime = _opts.score_runtime; _ann_topn_runtime = _opts.ann_topn_runtime; - _init_project_schema(); + _init_column_id_mappings(); if (opts.output_columns != nullptr) { _output_columns = *(opts.output_columns); @@ -478,26 +478,35 @@ void SegmentIterator::_init_schema_block_id_map() { } } -void SegmentIterator::_init_project_schema() { +void SegmentIterator::_init_column_id_mappings() { _init_schema_block_id_map(); - // Direct Segment::new_iterator callers use the input schema as the project layout. - const auto& project_column_ids = - _opts.project_columns != nullptr ? *_opts.project_columns : _schema->column_ids(); - if (project_column_ids == _schema->column_ids()) { - _project_schema = _schema; - } else { - _project_schema = - std::make_shared(_opts.tablet_schema->columns(), project_column_ids); - } + // Direct Segment::new_iterator callers use the input schema as the expression slot layout. + const auto* expr_column_ids = + _opts.expr_column_ids != nullptr ? _opts.expr_column_ids : &_schema->column_ids(); + _expr_column_ids = + *expr_column_ids == _schema->column_ids() ? &_schema->column_ids() : expr_column_ids; +} + +const std::vector& SegmentIterator::_expr_column_ids_ref() const { + DORIS_CHECK(_expr_column_ids != nullptr); + return *_expr_column_ids; +} + +ColumnId SegmentIterator::_expr_column_id(size_t ordinal) const { + const auto& expr_column_ids = _expr_column_ids_ref(); + DORIS_CHECK(ordinal < expr_column_ids.size()); + return expr_column_ids[ordinal]; +} + +bool SegmentIterator::_expr_column_ids_match_schema() const { + return _expr_column_ids == &_schema->column_ids(); } -void SegmentIterator::_build_project_block(Block* block, Block* project_block) { - DORIS_CHECK(_project_schema != nullptr); - DORIS_CHECK(_project_schema != _schema); +void SegmentIterator::_build_expr_block(Block* block, Block* expr_block) { + DORIS_CHECK(!_expr_column_ids_match_schema()); - project_block->clear(); - const auto& project_column_ids = _project_schema->column_ids(); - for (auto cid : project_column_ids) { + expr_block->clear(); + for (auto cid : _expr_column_ids_ref()) { auto loc = _schema_block_id_map[cid]; auto& output_column = block->get_by_position(loc); auto type = output_column.type; @@ -508,7 +517,7 @@ void SegmentIterator::_build_project_block(Block* block, Block* project_block) { DORIS_CHECK(type_it != _opts.vir_col_idx_to_type.end()); type = type_it->second; } - project_block->insert({std::move(column), type, _schema->column(cid)->name()}); + expr_block->insert({std::move(column), type, _schema->column(cid)->name()}); } } @@ -923,8 +932,8 @@ Status SegmentIterator::_apply_ann_topn_predicate() { VLOG_DEBUG << fmt::format("Try apply ann topn: {}", _ann_topn_runtime->debug_string()); size_t src_col_idx = _ann_topn_runtime->get_src_column_idx(); - // AnnTopNRuntime keeps VSlotRef::column_id(), which is the project block ordinal. - ColumnId src_cid = _project_schema->column_id(src_col_idx); + // AnnTopNRuntime keeps VSlotRef::column_id(), which is the expression block ordinal. + ColumnId src_cid = _expr_column_id(src_col_idx); IndexIterator* ann_index_iterator = _index_iterators[src_cid].get(); bool has_ann_index = _column_has_ann_index(src_cid); bool has_common_expr_push_down = !_common_expr_ctxs_push_down.empty(); @@ -1045,7 +1054,7 @@ Status SegmentIterator::_apply_ann_topn_predicate() { _opts.stats->ann_index_topn_search_cnt += 1; _opts.stats->ann_index_cache_hits += ann_index_stats.topn_cache_hits.value(); const size_t dst_col_idx = _ann_topn_runtime->get_dest_column_idx(); - ColumnIterator* column_iter = _column_iterators[_project_schema->column_id(dst_col_idx)].get(); + ColumnIterator* column_iter = _column_iterators[_expr_column_id(dst_col_idx)].get(); DCHECK(column_iter != nullptr); VirtualColumnIterator* virtual_column_iter = dynamic_cast(column_iter); DCHECK(virtual_column_iter != nullptr); @@ -1190,7 +1199,7 @@ Status SegmentIterator::_extract_common_expr_columns(const VExprSPtr& expr) { auto node_type = expr->node_type(); if (node_type == TExprNodeType::SLOT_REF) { auto slot_expr = std::dynamic_pointer_cast(expr); - auto cid = _project_schema->column_id(slot_expr->column_id()); + auto cid = _expr_column_id(slot_expr->column_id()); _is_common_expr_column[cid] = true; _common_expr_columns.insert(cid); } else if (node_type == TExprNodeType::VIRTUAL_SLOT_REF) { @@ -1293,7 +1302,7 @@ Status SegmentIterator::_apply_index_expr() { size_t origin_rows = _row_bitmap.cardinality(); bool ann_range_search_executed = false; RETURN_IF_ERROR(expr_ctx->evaluate_ann_range_search( - _index_iterators, _project_schema->column_ids(), _column_iterators, + _index_iterators, _expr_column_ids_ref(), _column_iterators, _common_expr_to_slotref_map, _row_bitmap, ann_index_stats, enable_ann_index_result_cache, &ann_range_search_executed)); if (ann_range_search_executed) { @@ -3111,11 +3120,11 @@ Status SegmentIterator::_execute_common_expr(uint16_t* sel_rowid_idx, uint16_t& Block* block) { SCOPED_RAW_TIMER(&_opts.stats->expr_filter_ns); DCHECK(!_common_expr_ctxs_push_down.empty()); - Block project_block; + Block expr_layout_block; Block* expr_block = block; - if (_project_schema != _schema) { - _build_project_block(block, &project_block); - expr_block = &project_block; + if (!_expr_column_ids_match_schema()) { + _build_expr_block(block, &expr_layout_block); + expr_block = &expr_layout_block; } std::vector common_ctxs; common_ctxs.reserve(_common_expr_ctxs_push_down.size()); @@ -3299,7 +3308,7 @@ Status SegmentIterator::_construct_compound_expr_context() { .io_ctx = _opts.io_ctx, }; auto inverted_index_context = std::make_shared( - _project_schema->column_ids(), _index_iterators, _storage_name_and_type, + _expr_column_ids_ref(), _index_iterators, _storage_name_and_type, _common_expr_index_exec_status, _score_runtime, _segment.get(), iter_opts); inverted_index_context->set_index_query_context(_index_query_context); for (const auto& expr_ctx : _opts.common_expr_ctxs_push_down) { @@ -3356,8 +3365,7 @@ void SegmentIterator::_calculate_common_expr_index_exec_status() { for (const auto& vir_child : vir_node->children()) { if (vir_child->is_slot_ref()) { auto* inner_slot_ref = assert_cast(vir_child.get()); - auto cid = - _project_schema->column_id(inner_slot_ref->column_id()); + auto cid = _expr_column_id(inner_slot_ref->column_id()); _common_expr_index_exec_status[cid][expr.get()] = false; _common_expr_to_slotref_map[root_expr_ctx.get()] [inner_slot_ref->column_id()] = @@ -3375,7 +3383,7 @@ void SegmentIterator::_calculate_common_expr_index_exec_status() { auto expr_without_cast = VExpr::expr_without_cast(child); if (expr_without_cast->is_slot_ref() && expr->op() != TExprOpcode::CAST) { auto* column_slot_ref = assert_cast(expr_without_cast.get()); - auto cid = _project_schema->column_id(column_slot_ref->column_id()); + auto cid = _expr_column_id(column_slot_ref->column_id()); _common_expr_index_exec_status[cid][expr.get()] = false; _common_expr_to_slotref_map[root_expr_ctx.get()][column_slot_ref->column_id()] = expr.get(); @@ -3502,18 +3510,18 @@ Status SegmentIterator::_materialization_of_virtual_column(Block* block) { return Status::OK(); } - Block project_block; - const bool materialize_on_project_block = _project_schema != _schema; + Block expr_layout_block; + const bool materialize_on_expr_block = !_expr_column_ids_match_schema(); Block* materialize_block = block; - if (materialize_on_project_block) { - _build_project_block(block, &project_block); - materialize_block = &project_block; + if (materialize_on_expr_block) { + _build_expr_block(block, &expr_layout_block); + materialize_block = &expr_layout_block; } for (const auto& cid_and_expr : _virtual_column_exprs) { auto cid = cid_and_expr.first; auto column_expr = cid_and_expr.second; - auto materialized_pos = materialize_on_project_block ? _vir_cid_to_idx_in_block.at(cid) - : _schema_block_id_map[cid]; + auto materialized_pos = materialize_on_expr_block ? _vir_cid_to_idx_in_block.at(cid) + : _schema_block_id_map[cid]; auto& column = materialize_block->get_by_position(materialized_pos).column; if (check_and_get_column(column.get())) { VLOG_DEBUG << fmt::format("Virtual column is doing materialization, cid {}, col idx {}", @@ -3529,12 +3537,12 @@ Status SegmentIterator::_materialization_of_virtual_column(Block* block) { materialize_block->replace_by_position(materialized_pos, std::move(result_column)); } } - if (materialize_on_project_block) { + if (materialize_on_expr_block) { for (const auto& cid_and_expr : _virtual_column_exprs) { auto cid = cid_and_expr.first; auto idx_in_block = _schema_block_id_map[cid]; auto materialized_pos = _vir_cid_to_idx_in_block.at(cid); - const auto& column = project_block.get_by_position(materialized_pos).column; + const auto& column = expr_layout_block.get_by_position(materialized_pos).column; DORIS_CHECK(!check_and_get_column(column.get())); block->replace_by_position(idx_in_block, column); } @@ -3566,7 +3574,7 @@ void SegmentIterator::_prepare_score_column_materialization() { result_row_ids, filter); } const size_t dst_col_idx = _score_runtime->get_dest_column_idx(); - auto* column_iter = _column_iterators[_project_schema->column_id(dst_col_idx)].get(); + auto* column_iter = _column_iterators[_expr_column_id(dst_col_idx)].get(); auto* virtual_column_iter = dynamic_cast(column_iter); virtual_column_iter->prepare_materialization( std::move(result_column), diff --git a/be/src/storage/segment/segment_iterator.h b/be/src/storage/segment/segment_iterator.h index 7f972ed4ce3995..c2ea231acc4827 100644 --- a/be/src/storage/segment/segment_iterator.h +++ b/be/src/storage/segment/segment_iterator.h @@ -274,8 +274,11 @@ class SegmentIterator : public RowwiseIterator { bool _can_evaluated_by_vectorized(std::shared_ptr predicate); void _init_schema_block_id_map(); - void _init_project_schema(); - void _build_project_block(Block* block, Block* project_block); + void _init_column_id_mappings(); + const std::vector& _expr_column_ids_ref() const; + ColumnId _expr_column_id(size_t ordinal) const; + bool _expr_column_ids_match_schema() const; + void _build_expr_block(Block* block, Block* expr_block); [[nodiscard]] Status _extract_common_expr_columns(const VExprSPtr& expr); [[nodiscard]] Status _execute_common_expr(uint16_t* sel_rowid_idx, uint16_t& selected_size, Block* block); @@ -353,8 +356,8 @@ class SegmentIterator : public RowwiseIterator { std::shared_ptr _segment; // read schema from scanner SchemaSPtr _schema; - // final scan project schema before storage-side read schema expansion - SchemaSPtr _project_schema; + // expr slot ordinal -> storage column id before storage-side read schema expansion + const std::vector* _expr_column_ids = nullptr; // storage type schema related to _schema, since column in segment may be different with type in _schema std::vector _storage_name_and_type; // vector idx -> column iterarator From 4798f7c5ee23f68d33d64f41bbae8bc7f70c3921 Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Thu, 11 Jun 2026 18:48:09 +0800 Subject: [PATCH 14/22] [refactor](be) Drop segment expression layout fallback ### What problem does this PR solve? Issue Number: close #xxx Related PR: #64413 Problem Summary: FE now aligns scan slots to the storage schema and marks filled key columns explicitly. SegmentIterator no longer needs to keep a separate expression-column layout for pushed-down expressions, so this change removes the expression layout fallback from StorageReadOptions, RowsetReaderContext, and SegmentIterator. Pushed-down expression, ANN, index, score, and virtual-column paths now use the scan schema column ids directly. ### Release note None ### Check List (For Author) - Test: Manual test - ./build.sh --be - git diff --check - build-support/run-clang-tidy.sh --base HEAD (fails on pre-existing diagnostics in touched files, including jni-util.h static_assert(false), deprecated headers, and existing function complexity warnings) - Behavior changed: No - Does this need documentation: No --- be/src/storage/iterators.h | 2 - be/src/storage/rowset/beta_rowset_reader.cpp | 5 - be/src/storage/rowset/rowset_reader_context.h | 3 - be/src/storage/segment/segment_iterator.cpp | 101 +++--------------- be/src/storage/segment/segment_iterator.h | 7 -- be/src/storage/tablet/tablet_reader.cpp | 4 - 6 files changed, 17 insertions(+), 105 deletions(-) diff --git a/be/src/storage/iterators.h b/be/src/storage/iterators.h index ef8c90f03237fd..2e175a7ba90e66 100644 --- a/be/src/storage/iterators.h +++ b/be/src/storage/iterators.h @@ -129,8 +129,6 @@ class StorageReadOptions { std::vector* read_orderby_key_columns = nullptr; io::IOContext io_ctx; VExprContextSPtrs common_expr_ctxs_push_down; - // Column ids in the slot ordinal layout used by storage-side expressions. - const std::vector* expr_column_ids = nullptr; const std::set* output_columns = nullptr; std::set filled_columns; // runtime state diff --git a/be/src/storage/rowset/beta_rowset_reader.cpp b/be/src/storage/rowset/beta_rowset_reader.cpp index 5eb8627c05933e..21980384eee6ad 100644 --- a/be/src/storage/rowset/beta_rowset_reader.cpp +++ b/be/src/storage/rowset/beta_rowset_reader.cpp @@ -101,11 +101,6 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context _read_options.push_down_agg_type_opt = _read_context->push_down_agg_type_opt; _read_options.common_expr_ctxs_push_down = _read_context->common_expr_ctxs_push_down; DORIS_CHECK(_read_context->return_columns != nullptr); - // Direct RowsetReader users do not expand return_columns, so return_columns already matches - // expression slot ordinals. TabletReader sets origin_return_columns before any expansion. - _read_options.expr_column_ids = _read_context->origin_return_columns != nullptr - ? _read_context->origin_return_columns - : _read_context->return_columns; _read_options.virtual_column_exprs = _read_context->virtual_column_exprs; _read_options.all_access_paths = _read_context->all_access_paths; diff --git a/be/src/storage/rowset/rowset_reader_context.h b/be/src/storage/rowset/rowset_reader_context.h index 6fcbef4d39b94f..9ce6e5f2611dd7 100644 --- a/be/src/storage/rowset/rowset_reader_context.h +++ b/be/src/storage/rowset/rowset_reader_context.h @@ -77,9 +77,6 @@ struct RowsetReaderContext { // Effective adaptive batch size byte budget. 0 means disabled internally. size_t preferred_block_size_bytes = 8388608UL; - // Points to the "true" output column list before non-direct-mode expansion. - // Used by BlockReader to map expanded storage columns back to the requested output layout. - const std::vector* origin_return_columns = nullptr; bool is_unique = false; //record row num merged in generic iterator uint64_t* merged_rows = nullptr; diff --git a/be/src/storage/segment/segment_iterator.cpp b/be/src/storage/segment/segment_iterator.cpp index 6feecb4be4da5d..75b64f0480e8ee 100644 --- a/be/src/storage/segment/segment_iterator.cpp +++ b/be/src/storage/segment/segment_iterator.cpp @@ -412,7 +412,7 @@ Status SegmentIterator::_init_impl(const StorageReadOptions& opts) { _vir_cid_to_idx_in_block = _opts.vir_cid_to_idx_in_block; _score_runtime = _opts.score_runtime; _ann_topn_runtime = _opts.ann_topn_runtime; - _init_column_id_mappings(); + _init_schema_block_id_map(); if (opts.output_columns != nullptr) { _output_columns = *(opts.output_columns); @@ -478,49 +478,6 @@ void SegmentIterator::_init_schema_block_id_map() { } } -void SegmentIterator::_init_column_id_mappings() { - _init_schema_block_id_map(); - // Direct Segment::new_iterator callers use the input schema as the expression slot layout. - const auto* expr_column_ids = - _opts.expr_column_ids != nullptr ? _opts.expr_column_ids : &_schema->column_ids(); - _expr_column_ids = - *expr_column_ids == _schema->column_ids() ? &_schema->column_ids() : expr_column_ids; -} - -const std::vector& SegmentIterator::_expr_column_ids_ref() const { - DORIS_CHECK(_expr_column_ids != nullptr); - return *_expr_column_ids; -} - -ColumnId SegmentIterator::_expr_column_id(size_t ordinal) const { - const auto& expr_column_ids = _expr_column_ids_ref(); - DORIS_CHECK(ordinal < expr_column_ids.size()); - return expr_column_ids[ordinal]; -} - -bool SegmentIterator::_expr_column_ids_match_schema() const { - return _expr_column_ids == &_schema->column_ids(); -} - -void SegmentIterator::_build_expr_block(Block* block, Block* expr_block) { - DORIS_CHECK(!_expr_column_ids_match_schema()); - - expr_block->clear(); - for (auto cid : _expr_column_ids_ref()) { - auto loc = _schema_block_id_map[cid]; - auto& output_column = block->get_by_position(loc); - auto type = output_column.type; - auto column = output_column.column; - auto virtual_it = _vir_cid_to_idx_in_block.find(cid); - if (virtual_it != _vir_cid_to_idx_in_block.end()) { - auto type_it = _opts.vir_col_idx_to_type.find(virtual_it->second); - DORIS_CHECK(type_it != _opts.vir_col_idx_to_type.end()); - type = type_it->second; - } - expr_block->insert({std::move(column), type, _schema->column(cid)->name()}); - } -} - void SegmentIterator::_initialize_predicate_results() { // Initialize from _col_predicates for (auto pred : _col_predicates) { @@ -932,8 +889,8 @@ Status SegmentIterator::_apply_ann_topn_predicate() { VLOG_DEBUG << fmt::format("Try apply ann topn: {}", _ann_topn_runtime->debug_string()); size_t src_col_idx = _ann_topn_runtime->get_src_column_idx(); - // AnnTopNRuntime keeps VSlotRef::column_id(), which is the expression block ordinal. - ColumnId src_cid = _expr_column_id(src_col_idx); + // AnnTopNRuntime keeps VSlotRef::column_id(), which is the scan schema ordinal. + ColumnId src_cid = _schema->column_id(src_col_idx); IndexIterator* ann_index_iterator = _index_iterators[src_cid].get(); bool has_ann_index = _column_has_ann_index(src_cid); bool has_common_expr_push_down = !_common_expr_ctxs_push_down.empty(); @@ -1054,7 +1011,7 @@ Status SegmentIterator::_apply_ann_topn_predicate() { _opts.stats->ann_index_topn_search_cnt += 1; _opts.stats->ann_index_cache_hits += ann_index_stats.topn_cache_hits.value(); const size_t dst_col_idx = _ann_topn_runtime->get_dest_column_idx(); - ColumnIterator* column_iter = _column_iterators[_expr_column_id(dst_col_idx)].get(); + ColumnIterator* column_iter = _column_iterators[_schema->column_id(dst_col_idx)].get(); DCHECK(column_iter != nullptr); VirtualColumnIterator* virtual_column_iter = dynamic_cast(column_iter); DCHECK(virtual_column_iter != nullptr); @@ -1199,7 +1156,7 @@ Status SegmentIterator::_extract_common_expr_columns(const VExprSPtr& expr) { auto node_type = expr->node_type(); if (node_type == TExprNodeType::SLOT_REF) { auto slot_expr = std::dynamic_pointer_cast(expr); - auto cid = _expr_column_id(slot_expr->column_id()); + auto cid = _schema->column_id(slot_expr->column_id()); _is_common_expr_column[cid] = true; _common_expr_columns.insert(cid); } else if (node_type == TExprNodeType::VIRTUAL_SLOT_REF) { @@ -1302,7 +1259,7 @@ Status SegmentIterator::_apply_index_expr() { size_t origin_rows = _row_bitmap.cardinality(); bool ann_range_search_executed = false; RETURN_IF_ERROR(expr_ctx->evaluate_ann_range_search( - _index_iterators, _expr_column_ids_ref(), _column_iterators, + _index_iterators, _schema->column_ids(), _column_iterators, _common_expr_to_slotref_map, _row_bitmap, ann_index_stats, enable_ann_index_result_cache, &ann_range_search_executed)); if (ann_range_search_executed) { @@ -3120,12 +3077,6 @@ Status SegmentIterator::_execute_common_expr(uint16_t* sel_rowid_idx, uint16_t& Block* block) { SCOPED_RAW_TIMER(&_opts.stats->expr_filter_ns); DCHECK(!_common_expr_ctxs_push_down.empty()); - Block expr_layout_block; - Block* expr_block = block; - if (!_expr_column_ids_match_schema()) { - _build_expr_block(block, &expr_layout_block); - expr_block = &expr_layout_block; - } std::vector common_ctxs; common_ctxs.reserve(_common_expr_ctxs_push_down.size()); for (auto& ctx : _common_expr_ctxs_push_down) { @@ -3139,8 +3090,8 @@ Status SegmentIterator::_execute_common_expr(uint16_t* sel_rowid_idx, uint16_t& IColumn::Filter filter(selected_size, 1); bool can_filter_all = false; for (const auto& ctx : _common_expr_ctxs_push_down) { - RETURN_IF_ERROR(ctx->execute_filter(expr_block, filter.data(), selected_size, false, - &can_filter_all)); + RETURN_IF_ERROR( + ctx->execute_filter(block, filter.data(), selected_size, false, &can_filter_all)); if (can_filter_all) { break; } @@ -3308,7 +3259,7 @@ Status SegmentIterator::_construct_compound_expr_context() { .io_ctx = _opts.io_ctx, }; auto inverted_index_context = std::make_shared( - _expr_column_ids_ref(), _index_iterators, _storage_name_and_type, + _schema->column_ids(), _index_iterators, _storage_name_and_type, _common_expr_index_exec_status, _score_runtime, _segment.get(), iter_opts); inverted_index_context->set_index_query_context(_index_query_context); for (const auto& expr_ctx : _opts.common_expr_ctxs_push_down) { @@ -3365,7 +3316,7 @@ void SegmentIterator::_calculate_common_expr_index_exec_status() { for (const auto& vir_child : vir_node->children()) { if (vir_child->is_slot_ref()) { auto* inner_slot_ref = assert_cast(vir_child.get()); - auto cid = _expr_column_id(inner_slot_ref->column_id()); + auto cid = _schema->column_id(inner_slot_ref->column_id()); _common_expr_index_exec_status[cid][expr.get()] = false; _common_expr_to_slotref_map[root_expr_ctx.get()] [inner_slot_ref->column_id()] = @@ -3383,7 +3334,7 @@ void SegmentIterator::_calculate_common_expr_index_exec_status() { auto expr_without_cast = VExpr::expr_without_cast(child); if (expr_without_cast->is_slot_ref() && expr->op() != TExprOpcode::CAST) { auto* column_slot_ref = assert_cast(expr_without_cast.get()); - auto cid = _expr_column_id(column_slot_ref->column_id()); + auto cid = _schema->column_id(column_slot_ref->column_id()); _common_expr_index_exec_status[cid][expr.get()] = false; _common_expr_to_slotref_map[root_expr_ctx.get()][column_slot_ref->column_id()] = expr.get(); @@ -3510,41 +3461,23 @@ Status SegmentIterator::_materialization_of_virtual_column(Block* block) { return Status::OK(); } - Block expr_layout_block; - const bool materialize_on_expr_block = !_expr_column_ids_match_schema(); - Block* materialize_block = block; - if (materialize_on_expr_block) { - _build_expr_block(block, &expr_layout_block); - materialize_block = &expr_layout_block; - } for (const auto& cid_and_expr : _virtual_column_exprs) { auto cid = cid_and_expr.first; auto column_expr = cid_and_expr.second; - auto materialized_pos = materialize_on_expr_block ? _vir_cid_to_idx_in_block.at(cid) - : _schema_block_id_map[cid]; - auto& column = materialize_block->get_by_position(materialized_pos).column; + auto materialized_pos = _schema_block_id_map[cid]; + auto& column = block->get_by_position(materialized_pos).column; if (check_and_get_column(column.get())) { VLOG_DEBUG << fmt::format("Virtual column is doing materialization, cid {}, col idx {}", cid, materialized_pos); ColumnPtr result_column; Status st; RETURN_IF_CATCH_EXCEPTION({ - st = column_expr->root()->execute_column(column_expr.get(), materialize_block, - nullptr, _selected_size, result_column); + st = column_expr->root()->execute_column(column_expr.get(), block, nullptr, + _selected_size, result_column); }); RETURN_IF_ERROR(st); - materialize_block->replace_by_position(materialized_pos, std::move(result_column)); - } - } - if (materialize_on_expr_block) { - for (const auto& cid_and_expr : _virtual_column_exprs) { - auto cid = cid_and_expr.first; - auto idx_in_block = _schema_block_id_map[cid]; - auto materialized_pos = _vir_cid_to_idx_in_block.at(cid); - const auto& column = expr_layout_block.get_by_position(materialized_pos).column; - DORIS_CHECK(!check_and_get_column(column.get())); - block->replace_by_position(idx_in_block, column); + block->replace_by_position(materialized_pos, std::move(result_column)); } } return Status::OK(); @@ -3574,7 +3507,7 @@ void SegmentIterator::_prepare_score_column_materialization() { result_row_ids, filter); } const size_t dst_col_idx = _score_runtime->get_dest_column_idx(); - auto* column_iter = _column_iterators[_expr_column_id(dst_col_idx)].get(); + auto* column_iter = _column_iterators[_schema->column_id(dst_col_idx)].get(); auto* virtual_column_iter = dynamic_cast(column_iter); virtual_column_iter->prepare_materialization( std::move(result_column), diff --git a/be/src/storage/segment/segment_iterator.h b/be/src/storage/segment/segment_iterator.h index c2ea231acc4827..43e2efab74af3f 100644 --- a/be/src/storage/segment/segment_iterator.h +++ b/be/src/storage/segment/segment_iterator.h @@ -274,11 +274,6 @@ class SegmentIterator : public RowwiseIterator { bool _can_evaluated_by_vectorized(std::shared_ptr predicate); void _init_schema_block_id_map(); - void _init_column_id_mappings(); - const std::vector& _expr_column_ids_ref() const; - ColumnId _expr_column_id(size_t ordinal) const; - bool _expr_column_ids_match_schema() const; - void _build_expr_block(Block* block, Block* expr_block); [[nodiscard]] Status _extract_common_expr_columns(const VExprSPtr& expr); [[nodiscard]] Status _execute_common_expr(uint16_t* sel_rowid_idx, uint16_t& selected_size, Block* block); @@ -356,8 +351,6 @@ class SegmentIterator : public RowwiseIterator { std::shared_ptr _segment; // read schema from scanner SchemaSPtr _schema; - // expr slot ordinal -> storage column id before storage-side read schema expansion - const std::vector* _expr_column_ids = nullptr; // storage type schema related to _schema, since column in segment may be different with type in _schema std::vector _storage_name_and_type; // vector idx -> column iterarator diff --git a/be/src/storage/tablet/tablet_reader.cpp b/be/src/storage/tablet/tablet_reader.cpp index 5b90967f3c3812..92778d71e0ebaa 100644 --- a/be/src/storage/tablet/tablet_reader.cpp +++ b/be/src/storage/tablet/tablet_reader.cpp @@ -202,10 +202,6 @@ Status TabletReader::_capture_rs_readers(const ReaderParams& read_params) { // Propagate general read limit for DUP_KEYS and UNIQUE_KEYS with MOW _reader_context.general_read_limit = read_params.general_read_limit; - // Preserve the original requested output layout so BlockReader can map expanded storage - // columns (for non-direct AGG/UNIQUE paths) back to the final output block. - _reader_context.origin_return_columns = read_params.origin_return_columns; - return Status::OK(); } From acb846a712d8f6b4f56999110a76465782f0d676 Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Thu, 11 Jun 2026 21:40:02 +0800 Subject: [PATCH 15/22] [refactor](be) Simplify segment virtual column layout ### What problem does this PR solve? Issue Number: None Related PR: #64413 Problem Summary: FE-filled key column schema keeps scan output aligned with the storage schema, so BE no longer needs to carry virtual-column block index/type mappings through scan and storage readers. This removes those mapping fields from OlapScanner, TabletReader, RowsetReaderContext, StorageReadOptions, and SegmentIterator, derives virtual column type from its expression context, routes common expression filtering through VExprContext::execute_conjuncts, and materializes virtual columns through VExprContext::execute. ### Release note None ### Check List (For Author) - Test: Manual test - build-support/clang-format.sh - git diff --check - ./build.sh --be - build-support/run-clang-tidy.sh --base HEAD (stopped after noisy existing full-file/header diagnostics; fixed the reported changed-line use-after-move in vcollect_iterator.cpp) - Behavior changed: No - Does this need documentation: No --- be/src/exec/operator/olap_scan_operator.cpp | 10 -- be/src/exec/operator/olap_scan_operator.h | 3 - be/src/exec/scan/olap_scanner.cpp | 17 +--- be/src/exec/scan/olap_scanner.h | 8 +- be/src/exec/scan/scanner_scheduler.cpp | 11 +-- be/src/storage/iterator/vcollect_iterator.cpp | 22 ++--- be/src/storage/iterators.h | 2 - be/src/storage/rowset/beta_rowset_reader.cpp | 2 - be/src/storage/rowset/rowset_reader_context.h | 2 - be/src/storage/segment/segment_iterator.cpp | 99 ++++++++----------- be/src/storage/segment/segment_iterator.h | 5 +- be/src/storage/tablet/tablet_reader.cpp | 2 - be/src/storage/tablet/tablet_reader.h | 2 - 13 files changed, 60 insertions(+), 125 deletions(-) diff --git a/be/src/exec/operator/olap_scan_operator.cpp b/be/src/exec/operator/olap_scan_operator.cpp index 8b1c521c216aa7..347ef6a100d871 100644 --- a/be/src/exec/operator/olap_scan_operator.cpp +++ b/be/src/exec/operator/olap_scan_operator.cpp @@ -980,16 +980,6 @@ Status OlapScanLocalState::open(RuntimeState* state) { RETURN_IF_ERROR(virtual_column_expr_ctx->open(state)); _slot_id_to_virtual_column_expr[slot_desc->id()] = virtual_column_expr_ctx; - _slot_id_to_col_type[slot_desc->id()] = slot_desc->get_data_type_ptr(); - int col_pos = p.intermediate_row_desc().get_column_id(slot_desc->id()); - if (col_pos < 0) { - return Status::InternalError( - "Invalid virtual slot, can not find its information. Slot desc:\n{}\nRow " - "desc:\n{}", - slot_desc->debug_string(), p.row_desc().debug_string()); - } else { - _slot_id_to_index_in_block[slot_desc->id()] = col_pos; - } } } diff --git a/be/src/exec/operator/olap_scan_operator.h b/be/src/exec/operator/olap_scan_operator.h index f882ef0083a00e..e3345e69e30244 100644 --- a/be/src/exec/operator/olap_scan_operator.h +++ b/be/src/exec/operator/olap_scan_operator.h @@ -325,9 +325,6 @@ class OlapScanLocalState final : public ScanLocalState { std::vector _read_sources; std::map _slot_id_to_virtual_column_expr; - std::map _slot_id_to_index_in_block; - // this map is needed for scanner opening. - std::map _slot_id_to_col_type; // ---- Runtime-filter partition pruning ---- // Attaches this per-instance pruner to the shared parse result owned by diff --git a/be/src/exec/scan/olap_scanner.cpp b/be/src/exec/scan/olap_scanner.cpp index 5402b648ee4f48..2d13a80b265fce 100644 --- a/be/src/exec/scan/olap_scanner.cpp +++ b/be/src/exec/scan/olap_scanner.cpp @@ -94,8 +94,6 @@ OlapScanner::OlapScanner(ScanLocalStateBase* parent, OlapScanner::Params&& param .topn_filter_source_node_ids {}, .key_group_cluster_key_idxes {}, .virtual_column_exprs {}, - .vir_cid_to_idx_in_block {}, - .vir_col_idx_to_type {}, .score_runtime {}, .collection_statistics {}, .ann_topn_runtime {}, @@ -174,8 +172,6 @@ Status OlapScanner::_prepare_impl() { _slot_id_to_virtual_column_expr[pair.first] = context; } - _slot_id_to_index_in_block = local_state->_slot_id_to_index_in_block; - _slot_id_to_col_type = local_state->_slot_id_to_col_type; _score_runtime = local_state->_score_runtime; // All scanners share the same ann_topn_runtime. _ann_topn_runtime = local_state->_ann_topn_runtime; @@ -338,8 +334,6 @@ Status OlapScanner::_init_tablet_reader_params( _tablet_reader_params.common_expr_ctxs_push_down = _common_expr_ctxs_push_down; _tablet_reader_params.virtual_column_exprs = _virtual_column_exprs; - _tablet_reader_params.vir_cid_to_idx_in_block = _vir_cid_to_idx_in_block; - _tablet_reader_params.vir_col_idx_to_type = _vir_col_idx_to_type; _tablet_reader_params.score_runtime = _score_runtime; _tablet_reader_params.output_columns = ((OlapScanLocalState*)_local_state)->_output_column_ids; _tablet_reader_params.ann_topn_runtime = _ann_topn_runtime; @@ -600,14 +594,9 @@ Status OlapScanner::_init_return_columns() { if (slot->get_virtual_column_expr()) { ColumnId virtual_column_cid = index; _virtual_column_exprs[virtual_column_cid] = _slot_id_to_virtual_column_expr[slot->id()]; - size_t idx_in_block = _slot_id_to_index_in_block[slot->id()]; - _vir_cid_to_idx_in_block[virtual_column_cid] = idx_in_block; - _vir_col_idx_to_type[idx_in_block] = _slot_id_to_col_type[slot->id()]; - - VLOG_DEBUG << fmt::format( - "Virtual column, slot id: {}, cid {}, column index: {}, type: {}", slot->id(), - virtual_column_cid, _vir_cid_to_idx_in_block[virtual_column_cid], - _vir_col_idx_to_type[idx_in_block]->get_name()); + + VLOG_DEBUG << fmt::format("Virtual column, slot id: {}, cid {}, type: {}", slot->id(), + virtual_column_cid, slot->get_data_type_ptr()->get_name()); } const auto& column = tablet_schema->column(index); diff --git a/be/src/exec/scan/olap_scanner.h b/be/src/exec/scan/olap_scanner.h index f1e8b5c719b08a..0ea7d41c5c14d1 100644 --- a/be/src/exec/scan/olap_scanner.h +++ b/be/src/exec/scan/olap_scanner.h @@ -115,17 +115,11 @@ class OlapScanner : public Scanner { std::unordered_set _tablet_columns_convert_to_null_set; - // This three fields are copied from OlapScanLocalState. + // This field is copied from OlapScanLocalState. std::map _slot_id_to_virtual_column_expr; - std::map _slot_id_to_index_in_block; - std::map _slot_id_to_col_type; // ColumnId of virtual column to its expr context std::map _virtual_column_exprs; - // ColumnId of virtual column to its index in block - std::map _vir_cid_to_idx_in_block; - // The idx of vir_col in block to its data type. - std::map _vir_col_idx_to_type; std::shared_ptr _score_runtime; std::shared_ptr _ann_topn_runtime; diff --git a/be/src/exec/scan/scanner_scheduler.cpp b/be/src/exec/scan/scanner_scheduler.cpp index 7e1deb2bae7f54..230a54048a2813 100644 --- a/be/src/exec/scan/scanner_scheduler.cpp +++ b/be/src/exec/scan/scanner_scheduler.cpp @@ -362,18 +362,17 @@ void ScannerScheduler::_make_sure_virtual_col_is_materialized( continue; } - std::vector vcid_to_idx; - - for (const auto& pair : olap_scanner->_vir_cid_to_idx_in_block) { - vcid_to_idx.push_back(fmt::format("{}-{}", pair.first, pair.second)); + std::vector virtual_column_ids; + for (const auto& pair : olap_scanner->_virtual_column_exprs) { + virtual_column_ids.push_back(pair.first); } std::string error_msg = fmt::format( "Column in idx {} is nothing, block columns {}, normal_columns " "{}, " - "vir_cid_to_idx_in_block_msg {}", + "virtual_column_ids [{}]", idx, free_block->columns(), olap_scanner->_return_columns.size(), - fmt::format("_vir_cid_to_idx_in_block:[{}]", fmt::join(vcid_to_idx, ","))); + fmt::join(virtual_column_ids, ",")); throw doris::Exception(ErrorCode::INTERNAL_ERROR, error_msg); } #endif diff --git a/be/src/storage/iterator/vcollect_iterator.cpp b/be/src/storage/iterator/vcollect_iterator.cpp index 11c74a6cf7affc..e0e56375852ec6 100644 --- a/be/src/storage/iterator/vcollect_iterator.cpp +++ b/be/src/storage/iterator/vcollect_iterator.cpp @@ -284,22 +284,20 @@ Status VCollectIterator::_topn_next(Block* block) { } auto clone_block = block->clone_empty(); - // Initialize virtual slot columns by schema (avoid runtime type checks): - // use _reader_context.vir_col_idx_to_type to construct real columns for those positions. - if (!_reader->_reader_context.vir_col_idx_to_type.empty()) { - const auto& idx_to_type = _reader->_reader_context.vir_col_idx_to_type; - for (const auto& kv : idx_to_type) { - size_t idx = kv.first; - if (idx < clone_block.columns()) { - clone_block.get_by_position(idx).column = kv.second->create_column(); - } - } - } + // Initialize virtual slot columns by schema (avoid runtime type checks). + for (const auto& [cid, expr_ctx] : _reader->_reader_context.virtual_column_exprs) { + auto it = std::find(_reader->_return_columns.begin(), _reader->_return_columns.end(), cid); + DORIS_CHECK(it != _reader->_return_columns.end()); + auto idx = cast_set(std::distance(_reader->_return_columns.begin(), it)); + DORIS_CHECK(idx < clone_block.columns()); + clone_block.get_by_position(idx).column = expr_ctx->root()->data_type()->create_column(); + } + const size_t clone_block_columns = clone_block.columns(); MutableBlock mutable_block = MutableBlock::build_mutable_block(std::move(clone_block)); const std::vector* sort_columns = _reader->_reader_context.read_orderby_key_columns; for (auto column_idx : *sort_columns) { - DORIS_CHECK(column_idx < clone_block.columns()); + DORIS_CHECK(column_idx < clone_block_columns); } size_t first_sort_column_idx = (*sort_columns)[0]; diff --git a/be/src/storage/iterators.h b/be/src/storage/iterators.h index 2e175a7ba90e66..42ec5ccc4ba6a3 100644 --- a/be/src/storage/iterators.h +++ b/be/src/storage/iterators.h @@ -149,8 +149,6 @@ class StorageReadOptions { std::map virtual_column_exprs; std::shared_ptr ann_topn_runtime; - std::map vir_cid_to_idx_in_block; - std::map vir_col_idx_to_type; std::map all_access_paths; std::map predicate_access_paths; diff --git a/be/src/storage/rowset/beta_rowset_reader.cpp b/be/src/storage/rowset/beta_rowset_reader.cpp index 21980384eee6ad..f5b0019c449938 100644 --- a/be/src/storage/rowset/beta_rowset_reader.cpp +++ b/be/src/storage/rowset/beta_rowset_reader.cpp @@ -107,8 +107,6 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context _read_options.predicate_access_paths = _read_context->predicate_access_paths; _read_options.ann_topn_runtime = _read_context->ann_topn_runtime; - _read_options.vir_cid_to_idx_in_block = _read_context->vir_cid_to_idx_in_block; - _read_options.vir_col_idx_to_type = _read_context->vir_col_idx_to_type; _read_options.score_runtime = _read_context->score_runtime; _read_options.collection_statistics = _read_context->collection_statistics; _read_options.rowset_id = _rowset->rowset_id(); diff --git a/be/src/storage/rowset/rowset_reader_context.h b/be/src/storage/rowset/rowset_reader_context.h index 9ce6e5f2611dd7..5b53dba5d5a18c 100644 --- a/be/src/storage/rowset/rowset_reader_context.h +++ b/be/src/storage/rowset/rowset_reader_context.h @@ -94,8 +94,6 @@ struct RowsetReaderContext { int64_t ttl_seconds = 0; std::map virtual_column_exprs; - std::map vir_cid_to_idx_in_block; - std::map vir_col_idx_to_type; std::map all_access_paths; std::map predicate_access_paths; diff --git a/be/src/storage/segment/segment_iterator.cpp b/be/src/storage/segment/segment_iterator.cpp index 75b64f0480e8ee..a4daacdd427382 100644 --- a/be/src/storage/segment/segment_iterator.cpp +++ b/be/src/storage/segment/segment_iterator.cpp @@ -409,7 +409,6 @@ Status SegmentIterator::_init_impl(const StorageReadOptions& opts) { } _virtual_column_exprs = _opts.virtual_column_exprs; - _vir_cid_to_idx_in_block = _opts.vir_cid_to_idx_in_block; _score_runtime = _opts.score_runtime; _ann_topn_runtime = _opts.ann_topn_runtime; _init_schema_block_id_map(); @@ -462,10 +461,8 @@ Status SegmentIterator::_init_impl(const StorageReadOptions& opts) { RETURN_IF_ERROR(_construct_compound_expr_context()); VLOG_DEBUG << fmt::format( - "Segment iterator init, virtual_column_exprs size: {}, " - "_vir_cid_to_idx_in_block size: {}, common_expr_pushdown size: {}", - _opts.virtual_column_exprs.size(), _opts.vir_cid_to_idx_in_block.size(), - _common_expr_ctxs_push_down.size()); + "Segment iterator init, virtual_column_exprs size: {}, common_expr_pushdown size: {}", + _opts.virtual_column_exprs.size(), _common_expr_ctxs_push_down.size()); _initialize_predicate_results(); return Status::OK(); } @@ -1383,7 +1380,7 @@ bool SegmentIterator::_need_read_data(ColumnId cid) { return true; } // this is a virtual column, we always need to read data - if (this->_vir_cid_to_idx_in_block.contains(cid)) { + if (_virtual_column_exprs.contains(cid)) { return true; } @@ -1548,8 +1545,8 @@ Status SegmentIterator::_init_return_column_iterators() { } #ifndef NDEBUG - for (auto pair : _vir_cid_to_idx_in_block) { - ColumnId vir_col_cid = pair.first; + for (const auto& entry : _virtual_column_exprs) { + ColumnId vir_col_cid = entry.first; DCHECK(_column_iterators[vir_col_cid] != nullptr) << "Virtual column iterator for " << vir_col_cid << " should not be null"; ColumnIterator* column_iter = _column_iterators[vir_col_cid].get(); @@ -1965,8 +1962,8 @@ Status SegmentIterator::_vec_init_lazy_materialization() { } } - for (auto pair : _vir_cid_to_idx_in_block) { - _columns_to_filter.push_back(_schema_block_id_map[pair.first]); + for (const auto& entry : _virtual_column_exprs) { + _columns_to_filter.push_back(_schema_block_id_map[entry.first]); } } } @@ -2093,7 +2090,7 @@ bool SegmentIterator::_fill_filled_column(ColumnId cid, MutableColumnPtr& column if (!_opts.filled_columns.contains(cid)) { return false; } - DORIS_CHECK(!_vir_cid_to_idx_in_block.contains(cid)); + DORIS_CHECK(!_virtual_column_exprs.contains(cid)); DORIS_CHECK(!_has_delete_predicate(cid)); DORIS_CHECK(cid < _is_pred_column.size()); DORIS_CHECK(!_is_pred_column[cid]); @@ -2178,7 +2175,7 @@ Status SegmentIterator::_init_current_block(Block* block, } } - for (auto entry : _virtual_column_exprs) { + for (const auto& entry : _virtual_column_exprs) { auto cid = entry.first; current_columns[cid] = ColumnNothing::create(0); current_columns[cid]->reserve(nrows_read_limit); @@ -2212,7 +2209,7 @@ Status SegmentIterator::_output_non_pred_columns(Block* block) { if (loc < block->columns()) { bool column_in_block_is_nothing = check_and_get_column( block->get_by_position(loc).column.get()); - bool column_is_normal = !_vir_cid_to_idx_in_block.contains(cid); + bool column_is_normal = !_virtual_column_exprs.contains(cid); bool return_column_is_nothing = check_and_get_column(_current_return_columns[cid].get()); VLOG_DEBUG << fmt::format( @@ -2726,10 +2723,9 @@ Status SegmentIterator::next_batch(Block* block) { if (res.is()) { // Since we have a type check at the caller. // So a replacement of nothing column with real column is needed. - const auto& idx_to_datatype = _opts.vir_col_idx_to_type; - for (const auto& pair : _vir_cid_to_idx_in_block) { - auto idx = _schema_block_id_map[pair.first]; - auto type = idx_to_datatype.find(pair.second)->second; + for (const auto& [cid, expr_ctx] : _virtual_column_exprs) { + auto idx = _schema_block_id_map[cid]; + auto type = expr_ctx->root()->data_type(); block->replace_by_position(idx, type->create_column()); } @@ -2967,10 +2963,10 @@ Status SegmentIterator::_next_batch_internal(Block* block) { if (!_virtual_column_exprs.empty()) { bool use_sel = _is_need_vec_eval || _is_need_short_eval || _is_need_expr_eval; uint16_t* sel_rowid_idx = use_sel ? _sel_rowid_idx.data() : nullptr; - std::vector vir_ctxs; + VExprContextSPtrs vir_ctxs; vir_ctxs.reserve(_virtual_column_exprs.size()); for (auto& [cid, ctx] : _virtual_column_exprs) { - vir_ctxs.push_back(ctx.get()); + vir_ctxs.push_back(ctx); } _output_index_result_column(vir_ctxs, sel_rowid_idx, _selected_size); } @@ -2995,12 +2991,10 @@ void SegmentIterator::_fill_column_nothing() { // Because: // 1. Before each batch, _init_return_columns is called to initialize _current_return_columns, and virtual columns in _current_return_columns are initialized as ColumnNothing. // 2. When select_size == 0, the read method of VirtualColumnIterator will definitely not be called, so the corresponding Column remains a ColumnNothing - for (const auto pair : _vir_cid_to_idx_in_block) { - auto cid = pair.first; - auto pos = pair.second; + for (const auto& [cid, expr_ctx] : _virtual_column_exprs) { [[maybe_unused]] const auto* nothing_col = assert_cast(_current_return_columns[cid].get()); - _current_return_columns[cid] = _opts.vir_col_idx_to_type[pos]->create_column(); + _current_return_columns[cid] = expr_ctx->root()->data_type()->create_column(); } } @@ -3016,17 +3010,15 @@ Status SegmentIterator::_check_output_block(Block* block) { idx, block->columns(), _schema->num_column_ids(), _virtual_column_exprs.size()); } else if (check_and_get_column(entry.column.get())) { if (rows > 0) { - std::vector vcid_to_idx; - for (const auto& pair : _vir_cid_to_idx_in_block) { - vcid_to_idx.push_back(fmt::format("{}-{}", pair.first, pair.second)); + std::vector virtual_column_ids; + for (const auto& pair : _virtual_column_exprs) { + virtual_column_ids.push_back(pair.first); } - std::string vir_cid_to_idx_in_block_msg = - fmt::format("_vir_cid_to_idx_in_block:[{}]", fmt::join(vcid_to_idx, ",")); return Status::InternalError( "Column in idx {} is nothing, block columns {}, normal_columns {}, " - "vir_cid_to_idx_in_block_msg {}", + "virtual_column_ids [{}]", idx, block->columns(), _schema->num_column_ids(), - vir_cid_to_idx_in_block_msg); + fmt::join(virtual_column_ids, ",")); } } else if (entry.column->size() != rows) { return Status::InternalError( @@ -3077,25 +3069,16 @@ Status SegmentIterator::_execute_common_expr(uint16_t* sel_rowid_idx, uint16_t& Block* block) { SCOPED_RAW_TIMER(&_opts.stats->expr_filter_ns); DCHECK(!_common_expr_ctxs_push_down.empty()); - std::vector common_ctxs; - common_ctxs.reserve(_common_expr_ctxs_push_down.size()); - for (auto& ctx : _common_expr_ctxs_push_down) { - common_ctxs.push_back(ctx.get()); - } - _output_index_result_column(common_ctxs, sel_rowid_idx, selected_size); + _output_index_result_column(_common_expr_ctxs_push_down, sel_rowid_idx, selected_size); uint16_t original_size = selected_size; _opts.stats->expr_cond_input_rows += original_size; - IColumn::Filter filter(selected_size, 1); + DCHECK_EQ(block->rows(), selected_size); + IColumn::Filter filter(block->rows(), 1); bool can_filter_all = false; - for (const auto& ctx : _common_expr_ctxs_push_down) { - RETURN_IF_ERROR( - ctx->execute_filter(block, filter.data(), selected_size, false, &can_filter_all)); - if (can_filter_all) { - break; - } - } + RETURN_IF_ERROR(VExprContext::execute_conjuncts(_common_expr_ctxs_push_down, nullptr, block, + &filter, &can_filter_all)); RETURN_IF_CATCH_EXCEPTION(Block::filter_block_internal(block, _columns_to_filter, filter)); selected_size = _evaluate_common_expr_filter(sel_rowid_idx, selected_size, filter); @@ -3145,14 +3128,14 @@ uint16_t SegmentIterator::_evaluate_common_expr_filter(uint16_t* sel_rowid_idx, } } -void SegmentIterator::_output_index_result_column(const std::vector& expr_ctxs, +void SegmentIterator::_output_index_result_column(const VExprContextSPtrs& expr_ctxs, uint16_t* sel_rowid_idx, uint16_t select_size) { SCOPED_RAW_TIMER(&_opts.stats->output_index_result_column_timer); if (select_size == 0) { return; } - for (auto* expr_ctx_ptr : expr_ctxs) { - auto index_ctx = expr_ctx_ptr->get_index_context(); + for (const auto& expr_ctx : expr_ctxs) { + auto index_ctx = expr_ctx->get_index_context(); if (index_ctx == nullptr) { continue; } @@ -3437,11 +3420,11 @@ bool SegmentIterator::_can_opt_limit_reads() { // Before get next batch. make sure all virtual columns in block has type ColumnNothing. void SegmentIterator::_init_virtual_columns(Block* block) { - for (const auto& pair : _vir_cid_to_idx_in_block) { - auto idx = _schema_block_id_map[pair.first]; + for (const auto& [cid, expr_ctx] : _virtual_column_exprs) { + auto idx = _schema_block_id_map[cid]; auto& col_with_type_and_name = block->get_by_position(idx); col_with_type_and_name.column = ColumnNothing::create(0); - col_with_type_and_name.type = _opts.vir_col_idx_to_type[pair.second]; + col_with_type_and_name.type = expr_ctx->root()->data_type(); } } @@ -3449,11 +3432,11 @@ Status SegmentIterator::_materialization_of_virtual_column(Block* block) { // Some expr can not process empty block, such as function `element_at`. // So materialize virtual column in advance to avoid errors. if (_selected_size == 0) { - for (const auto& pair : _vir_cid_to_idx_in_block) { - auto idx = _schema_block_id_map[pair.first]; + for (const auto& [cid, expr_ctx] : _virtual_column_exprs) { + auto idx = _schema_block_id_map[cid]; auto& col_with_type_and_name = block->get_by_position(idx); - col_with_type_and_name.column = _opts.vir_col_idx_to_type[pair.second]->create_column(); - col_with_type_and_name.type = _opts.vir_col_idx_to_type[pair.second]; + col_with_type_and_name.column = expr_ctx->root()->data_type()->create_column(); + col_with_type_and_name.type = expr_ctx->root()->data_type(); } return Status::OK(); } @@ -3461,6 +3444,7 @@ Status SegmentIterator::_materialization_of_virtual_column(Block* block) { return Status::OK(); } + DCHECK_EQ(block->rows(), _selected_size); for (const auto& cid_and_expr : _virtual_column_exprs) { auto cid = cid_and_expr.first; auto column_expr = cid_and_expr.second; @@ -3470,12 +3454,7 @@ Status SegmentIterator::_materialization_of_virtual_column(Block* block) { VLOG_DEBUG << fmt::format("Virtual column is doing materialization, cid {}, col idx {}", cid, materialized_pos); ColumnPtr result_column; - Status st; - RETURN_IF_CATCH_EXCEPTION({ - st = column_expr->root()->execute_column(column_expr.get(), block, nullptr, - _selected_size, result_column); - }); - RETURN_IF_ERROR(st); + RETURN_IF_ERROR(column_expr->execute(block, result_column)); block->replace_by_position(materialized_pos, std::move(result_column)); } diff --git a/be/src/storage/segment/segment_iterator.h b/be/src/storage/segment/segment_iterator.h index 43e2efab74af3f..e0d3893385a5d4 100644 --- a/be/src/storage/segment/segment_iterator.h +++ b/be/src/storage/segment/segment_iterator.h @@ -290,8 +290,8 @@ class SegmentIterator : public RowwiseIterator { bool _check_apply_by_inverted_index(std::shared_ptr pred); - void _output_index_result_column(const std::vector& expr_ctxs, - uint16_t* sel_rowid_idx, uint16_t select_size); + void _output_index_result_column(const VExprContextSPtrs& expr_ctxs, uint16_t* sel_rowid_idx, + uint16_t select_size); bool _need_read_data(ColumnId cid); void _fill_default_column(MutableColumnPtr& column, size_t num_of_defaults); @@ -474,7 +474,6 @@ class SegmentIterator : public RowwiseIterator { // cid to virtual column expr std::map _virtual_column_exprs; - std::map _vir_cid_to_idx_in_block; IndexQueryContextPtr _index_query_context; diff --git a/be/src/storage/tablet/tablet_reader.cpp b/be/src/storage/tablet/tablet_reader.cpp index 92778d71e0ebaa..e1f329b7902660 100644 --- a/be/src/storage/tablet/tablet_reader.cpp +++ b/be/src/storage/tablet/tablet_reader.cpp @@ -191,8 +191,6 @@ Status TabletReader::_capture_rs_readers(const ReaderParams& read_params) { _reader_context.collection_statistics = read_params.collection_statistics; _reader_context.virtual_column_exprs = read_params.virtual_column_exprs; - _reader_context.vir_cid_to_idx_in_block = read_params.vir_cid_to_idx_in_block; - _reader_context.vir_col_idx_to_type = read_params.vir_col_idx_to_type; _reader_context.ann_topn_runtime = read_params.ann_topn_runtime; _reader_context.condition_cache_digest = read_params.condition_cache_digest; diff --git a/be/src/storage/tablet/tablet_reader.h b/be/src/storage/tablet/tablet_reader.h index 6d8bd5e0d3962f..ba398197b5474f 100644 --- a/be/src/storage/tablet/tablet_reader.h +++ b/be/src/storage/tablet/tablet_reader.h @@ -204,8 +204,6 @@ class TabletReader { int64_t batch_size = -1; std::map virtual_column_exprs; - std::map vir_cid_to_idx_in_block; - std::map vir_col_idx_to_type; std::shared_ptr score_runtime; CollectionStatisticsPtr collection_statistics; From f530e95783a30b13939c0071a75945aae5860b4a Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Thu, 11 Jun 2026 22:15:53 +0800 Subject: [PATCH 16/22] [refactor](be) Remove filled key slot cache ### What problem does this PR solve? Issue Number: None Related PR: #64413 Problem Summary: FE-filled key columns are carried in the scan node thrift only when non-empty, and BE no longer keeps a separate LocalState copy of those filled key slot ids. OlapScanner reads the thrift field directly while initializing return columns and still records filled storage column ids for direct-mode readers to avoid reading FE-added key columns when storage merge is unnecessary. ### Release note None ### Check List (For Author) - Test: Manual test - build-support/clang-format.sh - git diff --check - ./build.sh --be --fe - build-support/run-clang-tidy.sh --base HEAD (failed on existing full-function/header/JNI static_assert diagnostics in the touched translation units) - Behavior changed: No - Does this need documentation: No --- be/src/exec/operator/olap_scan_operator.cpp | 5 ----- be/src/exec/operator/olap_scan_operator.h | 1 - be/src/exec/scan/olap_scanner.cpp | 4 +++- .../src/main/java/org/apache/doris/planner/OlapScanNode.java | 2 +- 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/be/src/exec/operator/olap_scan_operator.cpp b/be/src/exec/operator/olap_scan_operator.cpp index 347ef6a100d871..ae4fca5c8c2363 100644 --- a/be/src/exec/operator/olap_scan_operator.cpp +++ b/be/src/exec/operator/olap_scan_operator.cpp @@ -594,11 +594,6 @@ Status OlapScanLocalState::_init_scanners(std::list* scanners) { for (auto uid : p._olap_scan_node.output_column_unique_ids) { _output_column_ids.emplace(uid); } - if (p._olap_scan_node.__isset.filled_key_column_slot_ids) { - for (auto slot_id : p._olap_scan_node.filled_key_column_slot_ids) { - _filled_key_column_slot_ids.emplace(slot_id); - } - } // Step 3: convert accumulated scan key pairs into OlapScanRange objects. // Each OlapScanRange carries real begin/end OlapTuples with has_lower_bound = true. diff --git a/be/src/exec/operator/olap_scan_operator.h b/be/src/exec/operator/olap_scan_operator.h index e3345e69e30244..7e69b8a33dfb9f 100644 --- a/be/src/exec/operator/olap_scan_operator.h +++ b/be/src/exec/operator/olap_scan_operator.h @@ -136,7 +136,6 @@ class OlapScanLocalState final : public ScanLocalState { OlapScanKeys _scan_keys; // If column id in this set, indicate that we need to read data after index filtering std::set _output_column_ids; - std::set _filled_key_column_slot_ids; std::unique_ptr _segment_profile; std::unique_ptr _index_filter_profile; diff --git a/be/src/exec/scan/olap_scanner.cpp b/be/src/exec/scan/olap_scanner.cpp index 2d13a80b265fce..7a5e875515cc41 100644 --- a/be/src/exec/scan/olap_scanner.cpp +++ b/be/src/exec/scan/olap_scanner.cpp @@ -601,7 +601,9 @@ Status OlapScanner::_init_return_columns() { const auto& column = tablet_schema->column(index); auto* olap_local_state = static_cast(_local_state); - if (olap_local_state->_filled_key_column_slot_ids.contains(slot->id())) { + const auto& olap_scan_node = olap_local_state->olap_scan_node(); + if (olap_scan_node.__isset.filled_key_column_slot_ids && + olap_scan_node.filled_key_column_slot_ids.contains(slot->id())) { DORIS_CHECK(column.is_key()); _filled_columns.insert(index); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java index 9b99f08b8ab74e..92d3a7361be936 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java @@ -1292,7 +1292,7 @@ protected void toThrift(TPlanNode msg) { if (outputColumnUniqueIds != null) { msg.olap_scan_node.setOutputColumnUniqueIds(outputColumnUniqueIds); } - if (filledKeyColumnSlotIds != null) { + if (!filledKeyColumnSlotIds.isEmpty()) { msg.olap_scan_node.setFilledKeyColumnSlotIds(filledKeyColumnSlotIds); } From 7c930f7fe9179b38071c4e915d35e3bfecd8d2dc Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Thu, 11 Jun 2026 23:53:35 +0800 Subject: [PATCH 17/22] [refactor](be) Simplify filled key column pruning ### What problem does this PR solve? Issue Number: None Related PR: #64413 Problem Summary: After aligning scan slots with the storage key schema, the scan path still kept a separate filled-key scanner cache and a dedicated SegmentIterator helper for filled columns. This removes the redundant scanner member, records filled key columns directly in ReaderParams only for direct reads, and folds filled-column default generation into the generic prune path. Other BlockReader and SegmentIterator mapping state remains because it still maps expanded storage-read blocks, delete-predicate columns, virtual columns, and expression columns back to caller blocks. ### Release note None ### Check List (For Author) - Test: Manual test - build-support/clang-format.sh be/src/exec/scan/olap_scanner.cpp be/src/exec/scan/olap_scanner.h be/src/storage/segment/segment_iterator.cpp be/src/storage/segment/segment_iterator.h - git diff --check - ./build.sh --be - Behavior changed: No - Does this need documentation: No --- be/src/exec/scan/olap_scanner.cpp | 6 ++-- be/src/exec/scan/olap_scanner.h | 4 +-- be/src/storage/segment/segment_iterator.cpp | 34 +++++---------------- be/src/storage/segment/segment_iterator.h | 11 +++---- 4 files changed, 17 insertions(+), 38 deletions(-) diff --git a/be/src/exec/scan/olap_scanner.cpp b/be/src/exec/scan/olap_scanner.cpp index 7a5e875515cc41..1475869ea1261a 100644 --- a/be/src/exec/scan/olap_scanner.cpp +++ b/be/src/exec/scan/olap_scanner.cpp @@ -327,6 +327,7 @@ Status OlapScanner::_init_tablet_reader_params( push_down_agg_type != TPushAggOp::COUNT_ON_INDEX); } + _tablet_reader_params.filled_columns.clear(); RETURN_IF_ERROR(_init_variant_columns()); RETURN_IF_ERROR(_init_return_columns()); @@ -388,7 +389,6 @@ Status OlapScanner::_init_tablet_reader_params( if (_tablet_reader_params.direct_mode) { _tablet_reader_params.return_columns = _return_columns; - _tablet_reader_params.filled_columns = _filled_columns; } else { // we need to fetch all key columns to do the right aggregation on storage engine side. for (size_t i = 0; i < tablet_schema->num_key_columns(); ++i) { @@ -605,7 +605,9 @@ Status OlapScanner::_init_return_columns() { if (olap_scan_node.__isset.filled_key_column_slot_ids && olap_scan_node.filled_key_column_slot_ids.contains(slot->id())) { DORIS_CHECK(column.is_key()); - _filled_columns.insert(index); + if (_tablet_reader_params.direct_mode) { + _tablet_reader_params.filled_columns.insert(index); + } } int32_t unique_id = column.unique_id() >= 0 ? column.unique_id() : column.parent_unique_id(); diff --git a/be/src/exec/scan/olap_scanner.h b/be/src/exec/scan/olap_scanner.h index 0ea7d41c5c14d1..a9bc9b933e705f 100644 --- a/be/src/exec/scan/olap_scanner.h +++ b/be/src/exec/scan/olap_scanner.h @@ -18,12 +18,11 @@ #pragma once #include -#include #include +#include #include #include -#include #include #include #include @@ -111,7 +110,6 @@ class OlapScanner : public Scanner { public: std::vector _return_columns; - std::set _filled_columns; std::unordered_set _tablet_columns_convert_to_null_set; diff --git a/be/src/storage/segment/segment_iterator.cpp b/be/src/storage/segment/segment_iterator.cpp index a4daacdd427382..8b39801a71ffa0 100644 --- a/be/src/storage/segment/segment_iterator.cpp +++ b/be/src/storage/segment/segment_iterator.cpp @@ -17,7 +17,6 @@ #include "storage/segment/segment_iterator.h" -#include #include #include #include @@ -25,6 +24,7 @@ #include #include +#include #include #include #include @@ -2085,34 +2085,16 @@ void SegmentIterator::_fill_default_column(MutableColumnPtr& column, size_t num_ } } -bool SegmentIterator::_fill_filled_column(ColumnId cid, MutableColumnPtr& column, - bool fill_defaults, size_t num_of_defaults) { - if (!_opts.filled_columns.contains(cid)) { - return false; - } - DORIS_CHECK(!_virtual_column_exprs.contains(cid)); - DORIS_CHECK(!_has_delete_predicate(cid)); - DORIS_CHECK(cid < _is_pred_column.size()); - DORIS_CHECK(!_is_pred_column[cid]); - DORIS_CHECK(cid < _is_common_expr_column.size()); - DORIS_CHECK(!_is_common_expr_column[cid]); - if (fill_defaults) { - _fill_default_column(column, num_of_defaults); - } - return true; -} - -bool SegmentIterator::_prune_column(ColumnId cid, MutableColumnPtr& column, bool fill_defaults, +bool SegmentIterator::_prune_column(ColumnId cid, MutableColumnPtr& column, size_t num_of_defaults) { - if (_fill_filled_column(cid, column, fill_defaults, num_of_defaults)) { + if (_opts.filled_columns.contains(cid)) { + _fill_default_column(column, num_of_defaults); return true; } if (_need_read_data(cid)) { return false; } - if (fill_defaults) { - _fill_default_column(column, num_of_defaults); - } + _fill_default_column(column, num_of_defaults); return true; } @@ -2121,7 +2103,7 @@ Status SegmentIterator::_read_columns(const std::vector& column_ids, for (auto cid : column_ids) { auto& column = column_block[cid]; size_t rows_read = nrows; - if (_prune_column(cid, column, true, rows_read)) { + if (_prune_column(cid, column, rows_read)) { continue; } RETURN_IF_ERROR(_column_iterators[cid]->next_batch(&rows_read, column)); @@ -2278,7 +2260,7 @@ Status SegmentIterator::_read_columns_by_index(uint32_t nrows_read_limit, uint16 VLOG_DEBUG << fmt::format("Column {} no need to read.", cid); continue; } - if (_prune_column(cid, column, true, nrows_read)) { + if (_prune_column(cid, column, nrows_read)) { VLOG_DEBUG << fmt::format("Column {} is pruned. No need to read data.", cid); continue; } @@ -2662,7 +2644,7 @@ Status SegmentIterator::_read_columns_by_rowids(std::vector& read_colu if (_no_need_read_key_data(cid, colunm, select_size)) { continue; } - if (_prune_column(cid, colunm, true, select_size)) { + if (_prune_column(cid, colunm, select_size)) { continue; } diff --git a/be/src/storage/segment/segment_iterator.h b/be/src/storage/segment/segment_iterator.h index e0d3893385a5d4..74f628c4f7a29e 100644 --- a/be/src/storage/segment/segment_iterator.h +++ b/be/src/storage/segment/segment_iterator.h @@ -18,9 +18,9 @@ #pragma once #include -#include -#include +#include +#include #include #include #include @@ -295,10 +295,7 @@ class SegmentIterator : public RowwiseIterator { bool _need_read_data(ColumnId cid); void _fill_default_column(MutableColumnPtr& column, size_t num_of_defaults); - bool _fill_filled_column(ColumnId cid, MutableColumnPtr& column, bool fill_defaults, - size_t num_of_defaults); - bool _prune_column(ColumnId cid, MutableColumnPtr& column, bool fill_defaults, - size_t num_of_defaults); + bool _prune_column(ColumnId cid, MutableColumnPtr& column, size_t num_of_defaults); Status _construct_compound_expr_context(); @@ -307,7 +304,7 @@ class SegmentIterator : public RowwiseIterator { for (auto cid : col_ids) { auto ord = key.field(cid) <=> (*_seek_block[cid])[0]; if (ord != std::strong_ordering::equal) { - return ord < 0 ? -1 : 1; + return ord == std::strong_ordering::less ? -1 : 1; } } return 0; From 9e0f26233bd0c4e0b40da5e8ca202fe7c190ada1 Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Fri, 12 Jun 2026 10:16:06 +0800 Subject: [PATCH 18/22] [fix](be) Fix common expr filtering with filled keys ### What problem does this PR solve? Issue Number: None Related PR: None Problem Summary: The FE-filled key column path can leave some scan output columns empty until after SegmentIterator finishes common expression filtering. The previous cleanup let filled columns be synthesized even when they participate in predicate/common-expression evaluation, and common expr filtering derived its batch size from Block::rows(), which returns the first materialized column size. A P0 pipeline hit DCHECK block->rows() == selected_size in SegmentIterator::_execute_common_expr when selected rows existed but the first output column was still empty. This change only default-fills filled columns when they are not needed by predicate/common/virtual/delete expression paths, and evaluates common expr filters with SegmentIterator's selected row count instead of Block::rows(). ### Release note None ### Check List (For Author) - Test: - Manual test: build-support/clang-format.sh be/src/storage/segment/segment_iterator.cpp - Manual test: git diff --check - Manual test: ./build.sh --be - Behavior changed: No - Does this need documentation: No --- be/src/storage/segment/segment_iterator.cpp | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/be/src/storage/segment/segment_iterator.cpp b/be/src/storage/segment/segment_iterator.cpp index 8b39801a71ffa0..0971b2a24fc406 100644 --- a/be/src/storage/segment/segment_iterator.cpp +++ b/be/src/storage/segment/segment_iterator.cpp @@ -2087,7 +2087,10 @@ void SegmentIterator::_fill_default_column(MutableColumnPtr& column, size_t num_ bool SegmentIterator::_prune_column(ColumnId cid, MutableColumnPtr& column, size_t num_of_defaults) { - if (_opts.filled_columns.contains(cid)) { + DCHECK_LT(cid, _is_pred_column.size()); + DCHECK_LT(cid, _is_common_expr_column.size()); + if (_opts.filled_columns.contains(cid) && !_virtual_column_exprs.contains(cid) && + !_has_delete_predicate(cid) && !_is_pred_column[cid] && !_is_common_expr_column[cid]) { _fill_default_column(column, num_of_defaults); return true; } @@ -3056,11 +3059,18 @@ Status SegmentIterator::_execute_common_expr(uint16_t* sel_rowid_idx, uint16_t& uint16_t original_size = selected_size; _opts.stats->expr_cond_input_rows += original_size; - DCHECK_EQ(block->rows(), selected_size); - IColumn::Filter filter(block->rows(), 1); + // Some output columns may stay empty until after common expr filtering. Use the + // selected row count instead of Block::rows(), which is derived from the first column. + IColumn::Filter filter(selected_size, 1); bool can_filter_all = false; - RETURN_IF_ERROR(VExprContext::execute_conjuncts(_common_expr_ctxs_push_down, nullptr, block, - &filter, &can_filter_all)); + auto* __restrict filter_data = filter.data(); + for (const auto& expr_ctx : _common_expr_ctxs_push_down) { + RETURN_IF_ERROR(expr_ctx->execute_filter(block, filter_data, selected_size, false, + &can_filter_all)); + if (can_filter_all) { + break; + } + } RETURN_IF_CATCH_EXCEPTION(Block::filter_block_internal(block, _columns_to_filter, filter)); selected_size = _evaluate_common_expr_filter(sel_rowid_idx, selected_size, filter); From b33e5eaae76b20c33b7c5edcb6ab5eb683a2977f Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Fri, 12 Jun 2026 10:35:58 +0800 Subject: [PATCH 19/22] [fix](fe) Preserve filled keys for lazy scan projection ### What problem does this PR solve? Issue Number: None Related PR: None Problem Summary: Storage-aligned OLAP scan planning adds filled key slots to the scan tuple and projects back to the execution output. The lazy materialize OLAP scan path prunes the original scan tuple after computePhysicalOlapScan returns. If the storage-aligned projection remaps PlanTranslatorContext before that pruning, lazy materialization can prune with projection slot ids instead of the original scan tuple slot ids. The lazy path also needs to keep storage-only filled key slots in the underlying scan tuple while hiding them from its public output. This change skips the shared projection remap for lazy scan construction, preserves filled key slots during lazy tuple pruning, and then adds a lazy-scan projection only for the public lazy output. ### Release note None ### Check List (For Author) - Test: - Manual test: git diff --check - Manual test: ./build.sh --fe - Behavior changed: No - Does this need documentation: No --- .../translator/PhysicalPlanTranslator.java | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index aa31a4c171036b..22fe6e874a9745 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -865,10 +865,11 @@ private PlanFragment getPlanFragmentForPhysicalFileScan(PhysicalFileScan fileSca @Override public PlanFragment visitPhysicalOlapScan(PhysicalOlapScan olapScan, PlanTranslatorContext context) { - return computePhysicalOlapScan(olapScan, context); + return computePhysicalOlapScan(olapScan, context, true); } - private PlanFragment computePhysicalOlapScan(PhysicalOlapScan olapScan, PlanTranslatorContext context) { + private PlanFragment computePhysicalOlapScan(PhysicalOlapScan olapScan, PlanTranslatorContext context, + boolean projectStorageAlignedScanOutput) { List outputSlots = olapScan.getOutput(); StorageAlignedScanSlots storageAlignedScanSlots = computeStorageAlignedScanSlots(olapScan); List slots = storageAlignedScanSlots.scanSlots; @@ -978,7 +979,7 @@ private PlanFragment computePhysicalOlapScan(PhysicalOlapScan olapScan, PlanTran context.addScanNode(olapScanNode, olapScan); translateRuntimeFilter(olapScan, olapScanNode, context); - if (!storageAlignedScanSlots.filledKeyExprIds.isEmpty()) { + if (projectStorageAlignedScanOutput && !storageAlignedScanSlots.filledKeyExprIds.isEmpty()) { List projectionExprs = outputSlots.stream() .map(slot -> context.findSlotRef(slot.getExprId())) .collect(Collectors.toList()); @@ -2921,7 +2922,7 @@ public PlanFragment visitPhysicalLazyMaterializeTVFScan(PhysicalLazyMaterializeT @Override public PlanFragment visitPhysicalLazyMaterializeOlapScan(PhysicalLazyMaterializeOlapScan lazyScan, PlanTranslatorContext context) { - PlanFragment planFragment = computePhysicalOlapScan(lazyScan.getScan(), context); + PlanFragment planFragment = computePhysicalOlapScan(lazyScan.getScan(), context, false); OlapScanNode olapScanNode = (OlapScanNode) planFragment.getPlanRoot(); // set lazy materialized context olapScanNode.setIsTopnLazyMaterialize(true); @@ -2929,9 +2930,18 @@ public PlanFragment visitPhysicalLazyMaterializeOlapScan(PhysicalLazyMaterialize Set scanIds = lazyScan.getOutput().stream().map(NamedExpression::getExprId) .map(context::findSlotRef).filter(Objects::nonNull).map(SlotRef::getSlotId) .collect(Collectors.toSet()); + olapScanNode.getFilledKeyColumnSlotIds().stream().map(SlotId::new).forEach(scanIds::add); olapScanNode.getTupleDesc().getSlots().removeIf(slot -> !scanIds.contains(slot.getId())); context.createSlotDesc(olapScanNode.getTupleDesc(), lazyScan.getRowId()); + if (!olapScanNode.getFilledKeyColumnSlotIds().isEmpty()) { + List projectionExprs = lazyScan.getOutput().stream() + .map(slot -> context.findSlotRef(slot.getExprId())) + .collect(Collectors.toList()); + TupleDescriptor projectionTuple = generateTupleDesc(lazyScan.getOutput(), lazyScan.getTable(), context); + olapScanNode.setProjectList(projectionExprs); + olapScanNode.setOutputTupleDesc(projectionTuple); + } for (Slot slot : lazyScan.getOutput()) { if (((SlotReference) slot).getOriginalColumn().isPresent()) { olapScanNode.addTopnLazyMaterializeOutputColumns(((SlotReference) slot).getOriginalColumn().get()); From 0629b7f130c63bc270e9aa2d14d65264ece45414 Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Fri, 12 Jun 2026 14:39:40 +0800 Subject: [PATCH 20/22] [fix](be) Avoid filled key pruning before lazy init ### What problem does this PR solve? Issue Number: close #xxx Related PR: #64413 Problem Summary: Filled key pruning can run from the segment seek path before lazy materialization initializes predicate and common expression column state. The unconditional state access trips a DCHECK during key range lookup. Only use the filled-key default-column shortcut after the lazy materialization column state has been initialized, so seek-key reads still use real storage data. ### Release note None ### Check List (For Author) - Test: Manual test - ./build.sh --be - git diff --check - Behavior changed: No - Does this need documentation: No --- be/src/storage/segment/segment_iterator.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/be/src/storage/segment/segment_iterator.cpp b/be/src/storage/segment/segment_iterator.cpp index 0971b2a24fc406..1481f2d50a740d 100644 --- a/be/src/storage/segment/segment_iterator.cpp +++ b/be/src/storage/segment/segment_iterator.cpp @@ -2087,12 +2087,14 @@ void SegmentIterator::_fill_default_column(MutableColumnPtr& column, size_t num_ bool SegmentIterator::_prune_column(ColumnId cid, MutableColumnPtr& column, size_t num_of_defaults) { - DCHECK_LT(cid, _is_pred_column.size()); - DCHECK_LT(cid, _is_common_expr_column.size()); - if (_opts.filled_columns.contains(cid) && !_virtual_column_exprs.contains(cid) && - !_has_delete_predicate(cid) && !_is_pred_column[cid] && !_is_common_expr_column[cid]) { - _fill_default_column(column, num_of_defaults); - return true; + if (_opts.filled_columns.contains(cid) && !_is_pred_column.empty()) { + DCHECK_EQ(_is_pred_column.size(), _is_common_expr_column.size()); + DCHECK_LT(cid, _is_pred_column.size()); + if (!_virtual_column_exprs.contains(cid) && !_has_delete_predicate(cid) && + !_is_pred_column[cid] && !_is_common_expr_column[cid]) { + _fill_default_column(column, num_of_defaults); + return true; + } } if (_need_read_data(cid)) { return false; From 5332485e5143b509000e5df73d2c7e28caf6917c Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Fri, 12 Jun 2026 15:52:10 +0800 Subject: [PATCH 21/22] [fix](fe) Match storage key slots without unique ids ### What problem does this PR solve? Issue Number: None Related PR: #64413 Problem Summary: FE expands scan slots to match storage key columns for AGG and non-MOW UNIQUE tables. The storage key slot lookup previously keyed only by column unique id, but old metadata or tables with light schema change disabled can keep all column unique ids at the init value. That collapses multiple key columns into one map entry and can leave filled key columns unresolved. Use column unique id only when it is initialized, and otherwise fall back to matching by column name in the selected index schema. ### Release note None ### Check List (For Author) - Test: Build / Manual test - ./build.sh --fe - git diff --check - Behavior changed: No - Does this need documentation: No --- .../translator/PhysicalPlanTranslator.java | 27 ++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index 22fe6e874a9745..ac38a1782102ef 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -1016,13 +1016,20 @@ private StorageAlignedScanSlots computeStorageAlignedScanSlots(PhysicalOlapScan Set outputExprIds = olapScan.getOutput().stream() .map(Slot::getExprId) .collect(Collectors.toSet()); - Map slotByColumnUniqueId = Stream.concat( - olapScan.getSelectedIndexOutputs().stream(), olapScan.getOutput().stream()) - .filter(slot -> ((SlotReference) slot).getOriginalColumn().isPresent()) - .collect(Collectors.toMap( - slot -> ((SlotReference) slot).getOriginalColumn().get().getUniqueId(), - slot -> slot, - (left, right) -> right)); + Map slotByColumnUniqueId = new HashMap<>(); + Map slotByColumnName = new HashMap<>(); + Stream.concat(olapScan.getSelectedIndexOutputs().stream(), olapScan.getOutput().stream()) + .forEach(slot -> { + Optional originalColumn = ((SlotReference) slot).getOriginalColumn(); + if (originalColumn.isPresent()) { + Column column = originalColumn.get(); + if (column.getUniqueId() == Column.COLUMN_UNIQUE_ID_INIT_VALUE) { + slotByColumnName.put(column.getName(), slot); + } else { + slotByColumnUniqueId.put(column.getUniqueId(), slot); + } + } + }); List storageSlots = new ArrayList<>(); Set storageExprIds = new HashSet<>(); @@ -1034,8 +1041,10 @@ private StorageAlignedScanSlots computeStorageAlignedScanSlots(PhysicalOlapScan if (!column.isKey()) { break; } - Slot slot = Objects.requireNonNull(slotByColumnUniqueId.get(column.getUniqueId()), - "missing scan slot for storage key column " + column.getName()); + Slot slot = column.getUniqueId() == Column.COLUMN_UNIQUE_ID_INIT_VALUE + ? slotByColumnName.get(column.getName()) + : slotByColumnUniqueId.get(column.getUniqueId()); + slot = Objects.requireNonNull(slot, "missing scan slot for storage key column " + column.getName()); if (storageExprIds.add(slot.getExprId())) { storageSlots.add(slot); } From 7f62b027cf30a45adb571e7e3e35b674b08e56a6 Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Fri, 12 Jun 2026 17:28:04 +0800 Subject: [PATCH 22/22] [test](fe) Keep olap translator test slots column-backed ### What problem does this PR solve? Issue Number: None Related PR: #64413 Problem Summary: PhysicalPlanTranslatorTest manually builds an OLAP scan over an AGG_KEYS mock table. After scan slots are aligned with storage key columns, the test slots must carry their original table columns so the translator can match storage keys by unique id or column name. Build the mock slots from the table schema instead of using unrelated names without original column metadata. ### Release note None ### Check List (For Author) - Test: Unit Test - ./run-fe-ut.sh --run org.apache.doris.nereids.glue.translator.PhysicalPlanTranslatorTest#testOlapPrune - git diff --check - Behavior changed: No - Does this need documentation: No --- .../glue/translator/PhysicalPlanTranslatorTest.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslatorTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslatorTest.java index c31f96792f28e7..21c10d07470970 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslatorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslatorTest.java @@ -102,9 +102,12 @@ public void testOlapPrune() throws Exception { List qualifier = new ArrayList<>(); qualifier.add("test"); List t1Output = new ArrayList<>(); - SlotReference col1 = new SlotReference("col1", IntegerType.INSTANCE); - SlotReference col2 = new SlotReference("col2", IntegerType.INSTANCE); - SlotReference col3 = new SlotReference("col2", IntegerType.INSTANCE); + SlotReference col1 = SlotReference.fromColumn(StatementScopeIdGenerator.newExprId(), + t1, t1.getBaseSchema().get(0), qualifier); + SlotReference col2 = SlotReference.fromColumn(StatementScopeIdGenerator.newExprId(), + t1, t1.getBaseSchema().get(1), qualifier); + SlotReference col3 = SlotReference.fromColumn(StatementScopeIdGenerator.newExprId(), + t1, t1.getBaseSchema().get(1), qualifier); t1Output.add(col1); t1Output.add(col2); t1Output.add(col3);