Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions be/src/core/block/block.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -464,8 +464,7 @@ std::string Block::dump_data_json(size_t begin, size_t row_limit, bool allow_nul

// This value-extraction logic is preserved from your original function
// to maintain consistency, especially for handling nullability mismatches.
if (data[i].column && data[i].type->is_nullable() &&
!data[i].column->is_concrete_nullable()) {
if (data[i].column && data[i].type->is_nullable() && !data[i].column->is_nullable()) {
// This branch handles a specific internal representation of nullable columns.
// The original code would assert here if allow_null_mismatch is false.
assert(allow_null_mismatch);
Expand Down Expand Up @@ -530,7 +529,7 @@ std::string Block::dump_data(size_t begin, size_t row_limit, bool allow_null_mis
std::string s;
if (data[i].column) { // column may be const
// for code inside `default_implementation_for_nulls`, there's could have: type = null, col != null
if (data[i].type->is_nullable() && !data[i].column->is_concrete_nullable()) {
if (data[i].type->is_nullable() && !data[i].column->is_nullable()) {
assert(allow_null_mismatch);
s = assert_cast<const DataTypeNullable*>(data[i].type.get())
->get_nested_type()
Expand Down
4 changes: 1 addition & 3 deletions be/src/core/column/column.h
Original file line number Diff line number Diff line change
Expand Up @@ -611,10 +611,8 @@ class IColumn : public COW<IColumn> {

/// Various properties on behaviour of column type.

/// It's true for ColumnNullable only.
/// It's true for ColumnNullable and Const(ColumnNullable).
virtual bool is_nullable() const { return false; }
/// It's true for ColumnNullable, can be true or false for ColumnConst, etc.
virtual bool is_concrete_nullable() const { return false; }

// true if column has null element
virtual bool has_null() const { return false; }
Expand Down
3 changes: 1 addition & 2 deletions be/src/core/column/column_const.h
Original file line number Diff line number Diff line change
Expand Up @@ -270,8 +270,7 @@ class ColumnConst final : public COWHelper<IColumn, ColumnConst> {
return false;
}

// ColumnConst is not nullable, but may be concrete nullable.
bool is_concrete_nullable() const override { return is_column_nullable(*data); }
bool is_nullable() const override { return is_column_nullable(*data); }
Comment thread
zclllyybb marked this conversation as resolved.
bool only_null() const override { return data->is_null_at(0); }
StringRef get_raw_data() const override { return data->get_raw_data(); }

Expand Down
1 change: 0 additions & 1 deletion be/src/core/column/column_nullable.h
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,6 @@ class ColumnNullable final : public COWHelper<IColumn, ColumnNullable> {
}

bool is_nullable() const override { return true; }
bool is_concrete_nullable() const override { return true; }
bool is_column_string() const override { return get_nested_column().is_column_string(); }

bool is_exclusive() const override {
Expand Down
8 changes: 4 additions & 4 deletions be/src/exec/common/hash_table/hash_map_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@ struct MethodStringNoCache : public MethodBase<TData> {
DorisVector<StringRef>& stored_keys) {
const IColumn& column = *key_columns[0];
const auto& nested_column =
column.is_nullable()
is_column_nullable(column)
? assert_cast<const ColumnNullable&>(column).get_nested_column()
: column;
auto serialized_str = [](const auto& column_string, DorisVector<StringRef>& stored_keys) {
Expand Down Expand Up @@ -744,7 +744,7 @@ struct MethodOneNumber : public MethodBase<TData> {
void init_serialized_keys(const ColumnRawPtrs& key_columns, uint32_t num_rows,
const uint8_t* null_map = nullptr, bool is_join = false,
bool is_build = false, uint32_t bucket_size = 0) override {
Base::keys = (FieldType*)(key_columns[0]->is_nullable()
Base::keys = (FieldType*)(is_column_nullable(*key_columns[0])
? assert_cast<const ColumnNullable*>(key_columns[0])
->get_nested_column_ptr()
->get_raw_data()
Expand Down Expand Up @@ -782,7 +782,7 @@ struct MethodOneNumberDirect : public MethodOneNumber<FieldType, TData> {
void init_serialized_keys(const ColumnRawPtrs& key_columns, uint32_t num_rows,
const uint8_t* null_map = nullptr, bool is_join = false,
bool is_build = false, uint32_t bucket_size = 0) override {
Base::keys = (FieldType*)(key_columns[0]->is_nullable()
Base::keys = (FieldType*)(is_column_nullable(*key_columns[0])
? assert_cast<const ColumnNullable*>(key_columns[0])
->get_nested_column_ptr()
->get_raw_data()
Expand Down Expand Up @@ -1174,4 +1174,4 @@ struct MethodSingleNullableColumn : public SingleColumnMethod {
}
}
};
} // namespace doris
} // namespace doris
2 changes: 1 addition & 1 deletion be/src/exec/common/join_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ void primary_to_direct_mapping(Method* context, const ColumnRawPtrs& key_columns
FieldType min_key = std::numeric_limits<FieldType>::max();

size_t num_rows = key_columns[0]->size();
if (key_columns[0]->is_nullable()) {
if (is_column_nullable(*key_columns[0])) {
const FieldType* input_keys = (FieldType*)assert_cast<const ColumnNullable*>(key_columns[0])
->get_nested_column_ptr()
->get_raw_data()
Expand Down
8 changes: 4 additions & 4 deletions be/src/exec/common/util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,12 +105,12 @@ class VectorizedUtils {

// Helper function to extract null map from column (including ColumnConst cases)
static const NullMap* get_null_map(const ColumnPtr& col) {
if (col->is_nullable()) {
return &static_cast<const ColumnNullable&>(*col).get_null_map_data();
if (const auto* nullable = check_and_get_column<ColumnNullable>(col.get())) {
return &nullable->get_null_map_data();
}
// Handle Const(Nullable) case
if (const auto* const_col = check_and_get_column<ColumnConst>(col.get());
const_col != nullptr && const_col->is_concrete_nullable()) {
const_col != nullptr && const_col->is_nullable()) {
return &static_cast<const ColumnNullable&>(const_col->get_data_column())
.get_null_map_data();
}
Expand Down Expand Up @@ -274,7 +274,7 @@ inline size_t calculate_false_number(ColumnPtr column) {
return calculate_false_number(
assert_cast<const ColumnConst*>(column.get())->get_data_column_ptr()) *
rows;
} else if (column->is_nullable()) {
} else if (is_column_nullable(*column)) {
const auto* nullable = assert_cast<const ColumnNullable*>(column.get());
const auto* data = assert_cast<const ColumnUInt8*>(nullable->get_nested_column_ptr().get())
->get_data()
Expand Down
15 changes: 7 additions & 8 deletions be/src/exec/common/variant_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

#include "exec/common/variant_util.h"

#include <assert.h>
#include <fmt/format.h>
#include <gen_cpp/FrontendService.h>
#include <gen_cpp/FrontendService_types.h>
Expand Down Expand Up @@ -349,13 +348,13 @@ Status cast_column(const ColumnWithTypeAndName& arg, const DataTypePtr& type, Co
return Status::OK();
}
// set variant root column/type to from column/type
CHECK(arg.column->is_nullable());
CHECK(is_column_nullable(*arg.column));
auto to_type = remove_nullable(type);
const auto& data_type_object = assert_cast<const DataTypeVariant&>(*to_type);
auto variant = ColumnVariant::create(data_type_object.variant_max_subcolumns_count(),
data_type_object.enable_doc_mode());

variant->create_root(arg.type, std::move(*arg.column).mutate());
variant->create_root(arg.type, IColumn::mutate(arg.column));
ColumnPtr nullable = ColumnNullable::create(
variant->get_ptr(),
assert_cast<const ColumnNullable*>(arg.column.get())->get_null_map_column_ptr());
Expand Down Expand Up @@ -2152,14 +2151,14 @@ Status _parse_and_materialize_variant_columns(Block& block,
const std::vector<ParseConfig>& configs) {
for (size_t i = 0; i < variant_pos.size(); ++i) {
auto column_ref = block.get_by_position(variant_pos[i]).column;
bool is_nullable = column_ref->is_nullable();
MutableColumnPtr owner_column = std::move(*column_ref).mutate();
bool is_nullable = is_column_nullable(*column_ref);
MutableColumnPtr owner_column = IColumn::mutate(std::move(column_ref));
ColumnPtr nullable_null_map;
MutableColumnPtr var_column;
if (is_nullable) {
const auto& nullable = assert_cast<const ColumnNullable&>(*owner_column);
nullable_null_map = nullable.get_null_map_column_ptr();
var_column = std::move(*nullable.get_nested_column_ptr()).mutate();
var_column = IColumn::mutate(nullable.get_nested_column_ptr());
} else {
var_column = std::move(owner_column);
}
Expand All @@ -2182,14 +2181,14 @@ Status _parse_and_materialize_variant_columns(Block& block,
? make_nullable(std::make_shared<DataTypeString>())
: std::make_shared<DataTypeString>(),
&scalar_root_column));
if (scalar_root_column->is_nullable()) {
if (is_column_nullable(*scalar_root_column)) {
scalar_root_column = assert_cast<const ColumnNullable*>(scalar_root_column.get())
->get_nested_column_ptr();
}
} else {
const auto& root = *var.get_root();
scalar_root_column =
root.is_nullable()
is_column_nullable(root)
? assert_cast<const ColumnNullable&>(root).get_nested_column_ptr()
: var.get_root();
}
Expand Down
27 changes: 18 additions & 9 deletions be/src/exec/operator/hashjoin_build_sink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <variant>

#include "core/block/block.h"
#include "core/column/column_const.h"
#include "core/column/column_nullable.h"
#include "core/data_type/data_type_nullable.h"
#include "exec/common/template_helpers.hpp"
Expand Down Expand Up @@ -388,8 +389,8 @@ Status HashJoinBuildSinkLocalState::build_asof_index(Block& block) {
// Handle nullable: extract nested column for value access, keep nullable for null checks
const ColumnNullable* nullable_col = nullptr;
ColumnPtr build_col_nested = asof_build_col;
if (asof_build_col->is_nullable()) {
nullable_col = assert_cast<const ColumnNullable*>(asof_build_col.get());
if (const auto* nullable = check_and_get_column<ColumnNullable>(asof_build_col.get())) {
nullable_col = nullable;
build_col_nested = nullable_col->get_nested_column_ptr();
}

Expand Down Expand Up @@ -514,7 +515,9 @@ Status HashJoinBuildSinkLocalState::_do_evaluate(Block& block, VExprContextSPtrs
RETURN_IF_ERROR(exprs[i]->execute(&block, &result_col_id));
}

// TODO: opt the column is const
// _extract_join_column() handles physical ColumnNullable only, so build-key const
// columns, including Const(Nullable), must be materialized before they are merged.
// TODO: if const-key optimization is added, update _extract_join_column() together.
block.get_by_position(result_col_id).column =
block.get_by_position(result_col_id).column->convert_to_full_column_if_const();
res_col_ids[i] = result_col_id;
Expand Down Expand Up @@ -544,15 +547,21 @@ Status HashJoinBuildSinkLocalState::_extract_join_column(Block& block,
DCHECK(_should_build_hash_table);
auto& shared_state = *_shared_state;
for (size_t i = 0; i < shared_state.build_exprs_size; ++i) {
const auto* column = block.get_by_position(res_col_ids[i]).column.get();
if (!column->is_nullable() &&
_parent->cast<HashJoinBuildSinkOperatorX>()._serialize_null_into_key[i]) {
const auto& column_ptr = block.get_by_position(res_col_ids[i]).column;
const auto* column = column_ptr.get();
const bool serialize_null_into_key =
_parent->cast<HashJoinBuildSinkOperatorX>()._serialize_null_into_key[i];
// _do_evaluate() must have materialized Const(Nullable) build keys. If this check fails,
// is_nullable() no longer implies a physical ColumnNullable for the logic below.
const auto* const_column = check_and_get_column<ColumnConst>(*column);
DORIS_CHECK(const_column == nullptr ||
!is_column_nullable(const_column->get_data_column()));
if (!column->is_nullable() && serialize_null_into_key) {
_key_columns_holder.emplace_back(
make_nullable(block.get_by_position(res_col_ids[i]).column));
raw_ptrs[i] = _key_columns_holder.back().get();
} else if (const auto* nullable = check_and_get_column<ColumnNullable>(*column);
!_parent->cast<HashJoinBuildSinkOperatorX>()._serialize_null_into_key[i] &&
nullable) {
!serialize_null_into_key && nullable) {
// update nulllmap and split nested out of ColumnNullable when serialize_null_into_key is false and column is nullable
const auto& col_nested = nullable->get_nested_column();
const auto& col_nullmap = nullable->get_null_map_data();
Expand All @@ -574,7 +583,7 @@ Status HashJoinBuildSinkLocalState::process_build_block(RuntimeState* state, Blo
// 1. Dispose the overflow of ColumnString
// 2. Finalize the ColumnVariant to speed up
for (auto& data : block) {
data.column = std::move(*data.column).mutate()->convert_column_if_overflow();
data.column = IColumn::mutate(std::move(data.column))->convert_column_if_overflow();
if (p._need_finalize_variant_column) {
auto mutable_column = IColumn::mutate(std::move(data.column));
mutable_column->finalize();
Expand Down
22 changes: 16 additions & 6 deletions be/src/exec/operator/hashjoin_probe_operator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
#include "common/cast_set.h"
#include "common/logging.h"
#include "core/assert_cast.h"
#include "core/column/column_const.h"
#include "core/column/column_nullable.h"
#include "core/data_type/data_type_nullable.h"
#include "exec/operator/operator.h"
#include "runtime/descriptors.h"
Expand Down Expand Up @@ -349,15 +351,21 @@ Status HashJoinProbeLocalState::_extract_join_column(Block& block,

auto& shared_state = *_shared_state;
for (size_t i = 0; i < shared_state.build_exprs_size; ++i) {
const auto* column = block.get_by_position(res_col_ids[i]).column.get();
if (!column->is_nullable() &&
_parent->cast<HashJoinProbeOperatorX>()._serialize_null_into_key[i]) {
const auto& column_ptr = block.get_by_position(res_col_ids[i]).column;
const auto* column = column_ptr.get();
const bool serialize_null_into_key =
_parent->cast<HashJoinProbeOperatorX>()._serialize_null_into_key[i];
// _do_evaluate() must have materialized Const(Nullable) probe keys. If this check fails,
// is_nullable() no longer implies a physical ColumnNullable for the logic below.
const auto* const_column = check_and_get_column<ColumnConst>(*column);
DORIS_CHECK(const_column == nullptr ||
!is_column_nullable(const_column->get_data_column()));
if (!column->is_nullable() && serialize_null_into_key) {
_key_columns_holder.emplace_back(
make_nullable(block.get_by_position(res_col_ids[i]).column));
_probe_columns[i] = _key_columns_holder.back().get();
} else if (const auto* nullable = check_and_get_column<ColumnNullable>(*column);
nullable &&
!_parent->cast<HashJoinProbeOperatorX>()._serialize_null_into_key[i]) {
nullable && !serialize_null_into_key) {
// update nulllmap and split nested out of ColumnNullable when serialize_null_into_key is false and column is nullable
const auto& col_nested = nullable->get_nested_column();
const auto& col_nullmap = nullable->get_null_map_data();
Expand Down Expand Up @@ -420,7 +428,9 @@ Status HashJoinProbeOperatorX::_do_evaluate(Block& block, VExprContextSPtrs& exp
RETURN_IF_ERROR(exprs[i]->execute(&block, &result_col_id));
}

// TODO: opt the column is const
// _extract_join_column() handles physical ColumnNullable only, so probe-key const
// columns, including Const(Nullable), must be materialized before probing.
// TODO: if const-key optimization is added, update _extract_join_column() together.
block.get_by_position(result_col_id).column =
block.get_by_position(result_col_id).column->convert_to_full_column_if_const();
res_col_ids[i] = result_col_id;
Expand Down
4 changes: 2 additions & 2 deletions be/src/exec/operator/join/process_hash_table_probe_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ void ProcessHashTableProbe<JoinOpType>::build_side_output_column(MutableColumns&
for (int i = 0; i < _right_col_len; i++) {
const auto& column = *_build_block->safe_get_by_position(i).column;
_build_column_has_null[i] = false;
if (_right_output_slot_flags[i] && column.is_nullable()) {
if (_right_output_slot_flags[i] && is_column_nullable(column)) {
const auto& nullable = assert_cast<const ColumnNullable&>(column);
_build_column_has_null[i] = !simd::contain_one(
nullable.get_null_map_data().data() + 1, nullable.size() - 1);
Expand Down Expand Up @@ -264,7 +264,7 @@ uint32_t ProcessHashTableProbe<JoinOpType>::
// Remove nullable wrapper for comparison - keep original for null check
ColumnPtr probe_col_for_compare = probe_col_ptr;
const uint8_t* asof_probe_null_map = nullptr;
if (probe_col_ptr->is_nullable()) {
if (is_column_nullable(*probe_col_ptr)) {
const auto* nullable_probe_col = assert_cast<const ColumnNullable*>(probe_col_ptr.get());
asof_probe_null_map = nullable_probe_col->get_null_map_data().data();
probe_col_for_compare = nullable_probe_col->get_nested_column_ptr();
Expand Down
Loading
Loading