From 30242f7b49babe559663bf1823adf73e5d911a09 Mon Sep 17 00:00:00 2001 From: kould Date: Wed, 1 Apr 2026 19:45:25 +0800 Subject: [PATCH 01/10] refactor: split apply into scalar and mark variants --- src/binder/expr.rs | 22 +- src/binder/mod.rs | 2 +- src/binder/select.rs | 183 +++++----- src/execution/dql/filter.rs | 1 - src/execution/dql/mark_apply.rs | 312 ++++++++++++++++++ src/execution/dql/mod.rs | 2 + src/execution/dql/scalar_apply.rs | 239 ++++++++++++++ src/execution/mod.rs | 20 ++ src/optimizer/heuristic/optimizer.rs | 6 + .../rule/implementation/dql/mark_apply.rs | 37 +++ src/optimizer/rule/implementation/dql/mod.rs | 2 + .../rule/implementation/dql/scalar_apply.rs | 37 +++ src/optimizer/rule/implementation/mod.rs | 17 + .../rule/normalization/column_pruning.rs | 36 +- .../normalization/compilation_in_advance.rs | 12 +- src/optimizer/rule/normalization/mod.rs | 1 + src/planner/mod.rs | 22 ++ src/planner/operator/mark_apply.rs | 78 +++++ src/planner/operator/mod.rs | 28 +- src/planner/operator/scalar_apply.rs | 40 +++ src/storage/mod.rs | 4 +- tests/slt/subquery.slt | 20 ++ tpcc/src/main.rs | 3 +- 23 files changed, 1009 insertions(+), 115 deletions(-) create mode 100644 src/execution/dql/mark_apply.rs create mode 100644 src/execution/dql/scalar_apply.rs create mode 100644 src/optimizer/rule/implementation/dql/mark_apply.rs create mode 100644 src/optimizer/rule/implementation/dql/scalar_apply.rs create mode 100644 src/planner/operator/mark_apply.rs create mode 100644 src/planner/operator/scalar_apply.rs diff --git a/src/binder/expr.rs b/src/binder/expr.rs index 5e6ea8c1..fcc2d00c 100644 --- a/src/binder/expr.rs +++ b/src/binder/expr.rs @@ -244,18 +244,24 @@ impl<'a, T: Transaction, A: AsRef<[(&'static str, DataValue)]>> Binder<'a, '_, T }) } Expr::Exists { subquery, negated } => { - let (sub_query, column, correlated) = self.bind_subquery(None, subquery)?; - let (_, sub_query) = if !self.context.is_step(&QueryBindStep::Where) { - self.bind_temp_table(column, sub_query)? - } else { - (column, sub_query) - }; + let (sub_query, _column, correlated) = self.bind_subquery(None, subquery)?; + let (_, marker_ref) = self + .bind_temp_table_alias(ScalarExpression::Constant(DataValue::Boolean(true)), 0); self.context.sub_query(SubQueryType::ExistsSubQuery { - negated: *negated, plan: sub_query, correlated, + output_column: marker_ref.output_column(), }); - Ok(ScalarExpression::Constant(DataValue::Boolean(true))) + if *negated { + Ok(ScalarExpression::Unary { + op: expression::UnaryOperator::Not, + expr: Box::new(marker_ref), + evaluator: None, + ty: LogicalType::Boolean, + }) + } else { + Ok(marker_ref) + } } Expr::Subquery(subquery) => { let (sub_query, column, correlated) = self.bind_subquery(None, subquery)?; diff --git a/src/binder/mod.rs b/src/binder/mod.rs index 7f0dc79d..4df11212 100644 --- a/src/binder/mod.rs +++ b/src/binder/mod.rs @@ -152,9 +152,9 @@ pub enum SubQueryType { correlated: bool, }, ExistsSubQuery { - negated: bool, plan: LogicalPlan, correlated: bool, + output_column: ColumnRef, }, InSubQuery { negated: bool, diff --git a/src/binder/select.rs b/src/binder/select.rs index ef6080cb..e4d3be6d 100644 --- a/src/binder/select.rs +++ b/src/binder/select.rs @@ -17,7 +17,8 @@ use crate::{ planner::{ operator::{ filter::FilterOperator, join::JoinOperator as LJoinOperator, limit::LimitOperator, - project::ProjectOperator, Operator, + mark_apply::MarkApplyOperator, project::ProjectOperator, + scalar_apply::ScalarApplyOperator, Operator, }, operator::{join::JoinType, table_scan::TableScanOperator}, }, @@ -37,11 +38,9 @@ use crate::catalog::{ }; use crate::errors::DatabaseError; use crate::execution::dql::join::joins_nullable; -use crate::expression::agg::AggKind; use crate::expression::simplify::ConstantCalculator; use crate::expression::visitor_mut::{walk_mut_expr, PositionShift, VisitorMut}; use crate::expression::{AliasType, BinaryOperator}; -use crate::planner::operator::aggregate::AggregateOperator; use crate::planner::operator::except::ExceptOperator; use crate::planner::operator::function_scan::FunctionScanOperator; use crate::planner::operator::insert::InsertOperator; @@ -51,14 +50,12 @@ use crate::planner::operator::union::UnionOperator; use crate::planner::{Childrens, LogicalPlan, SchemaOutput}; use crate::storage::Transaction; use crate::types::tuple::{Schema, SchemaRef}; -use crate::types::value::Utf8Type; use crate::types::{ColumnId, LogicalType}; use itertools::Itertools; use sqlparser::ast::{ - CharLengthUnits, Distinct, Expr, GroupByExpr, Join, JoinConstraint, JoinOperator, LimitClause, - OrderByExpr, OrderByKind, Query, Select, SelectInto, SelectItem, - SelectItemQualifiedWildcardKind, SetExpr, SetOperator, SetQuantifier, TableAlias, - TableAliasColumnDef, TableFactor, TableWithJoins, + Distinct, Expr, GroupByExpr, Join, JoinConstraint, JoinOperator, LimitClause, OrderByExpr, + OrderByKind, Query, Select, SelectInto, SelectItem, SelectItemQualifiedWildcardKind, SetExpr, + SetOperator, SetQuantifier, TableAlias, TableAliasColumnDef, TableFactor, TableWithJoins, }; struct RightSidePositionGlobalizer<'a> { @@ -107,6 +104,24 @@ impl VisitorMut<'_> for SplitScopePositionRebinder<'_> { } } +struct MarkerPositionGlobalizer<'a> { + output_column: &'a ColumnRef, + left_len: usize, +} + +impl VisitorMut<'_> for MarkerPositionGlobalizer<'_> { + fn visit_column_ref( + &mut self, + column: &mut ColumnRef, + position: &mut usize, + ) -> Result<(), DatabaseError> { + if column.same_column(self.output_column) { + *position = self.left_len; + } + Ok(()) + } +} + struct ProjectionOutputBinder<'a> { project_exprs: &'a [ScalarExpression], } @@ -1233,9 +1248,71 @@ impl<'a: 'b, 'b, T: Transaction, A: AsRef<[(&'static str, DataValue)]>> Binder<' ) -> Result { self.context.step(QueryBindStep::Where); - let predicate = self.bind_expr(predicate)?; + let mut predicate = self.bind_expr(predicate)?; if let Some(sub_queries) = self.context.sub_queries_at_now() { + if sub_queries + .iter() + .all(|sub_query| matches!(sub_query, SubQueryType::ExistsSubQuery { .. })) + { + let passthrough_exprs = children + .output_schema() + .iter() + .cloned() + .enumerate() + .map(|(position, column)| ScalarExpression::column_expr(column, position)) + .collect(); + for sub_query in sub_queries { + let SubQueryType::ExistsSubQuery { + plan, + correlated, + output_column, + } = sub_query + else { + unreachable!() + }; + let left_len = children.output_schema().len(); + MarkerPositionGlobalizer { + output_column: &output_column, + left_len, + } + .visit(&mut predicate)?; + let (mut plan, mut predicates) = if correlated { + Self::prepare_correlated_subquery_plan( + plan, + children.output_schema(), + false, + )? + } else { + (plan, Vec::new()) + }; + let right_schema = plan.output_schema(); + for expr in predicates.iter_mut() { + RightSidePositionGlobalizer { + right_schema: right_schema.as_ref(), + left_len, + } + .visit(expr)?; + } + children = + MarkApplyOperator::build_exists(children, plan, output_column, predicates); + } + let filter = FilterOperator::build(predicate, children, false); + return Ok(LogicalPlan::new( + Operator::Project(ProjectOperator { + exprs: passthrough_exprs, + }), + Childrens::Only(Box::new(filter)), + )); + } + if sub_queries + .iter() + .any(|sub_query| matches!(sub_query, SubQueryType::ExistsSubQuery { .. })) + { + return Err(DatabaseError::UnsupportedStmt( + "mixed EXISTS with other WHERE subqueries is not supported yet".to_string(), + )); + } for sub_query in sub_queries { let (plan, join_ty) = match sub_query { SubQueryType::SubQuery { plan, correlated } => { @@ -1247,18 +1324,7 @@ impl<'a: 'b, 'b, T: Transaction, A: AsRef<[(&'static str, DataValue)]>> Binder<' } (plan, JoinType::Inner) } - SubQueryType::ExistsSubQuery { - negated, - plan, - correlated, - } => { - children = if correlated { - self.bind_correlated_exists(children, plan, negated)? - } else { - Self::bind_uncorrelated_exists(children, plan, negated) - }; - continue; - } + SubQueryType::ExistsSubQuery { .. } => unreachable!(), SubQueryType::InSubQuery { negated, plan, @@ -1295,78 +1361,6 @@ impl<'a: 'b, 'b, T: Transaction, A: AsRef<[(&'static str, DataValue)]>> Binder<' Ok(FilterOperator::build(predicate, children, false)) } - fn bind_correlated_exists( - &self, - mut children: LogicalPlan, - plan: LogicalPlan, - negated: bool, - ) -> Result { - let join_ty = if negated { - JoinType::LeftAnti - } else { - JoinType::LeftSemi - }; - let (plan, correlated_filters) = - Self::prepare_correlated_subquery_plan(plan, children.output_schema(), false)?; - Self::build_join_from_split_scope_predicates( - children, - plan, - join_ty, - correlated_filters, - false, - ) - } - - fn bind_uncorrelated_exists( - children: LogicalPlan, - plan: LogicalPlan, - negated: bool, - ) -> LogicalPlan { - let limit = LimitOperator::build(None, Some(1), plan); - let mut agg = AggregateOperator::build( - limit, - vec![ScalarExpression::AggCall { - distinct: false, - kind: AggKind::Count, - args: vec![ScalarExpression::Constant(DataValue::Utf8 { - value: "*".to_string(), - ty: Utf8Type::Fixed(1), - unit: CharLengthUnits::Characters, - })], - ty: LogicalType::Integer, - }], - vec![], - false, - ); - let filter = FilterOperator::build( - ScalarExpression::Binary { - op: if negated { - BinaryOperator::NotEq - } else { - BinaryOperator::Eq - }, - left_expr: Box::new(ScalarExpression::column_expr( - agg.output_schema()[0].clone(), - 0, - )), - right_expr: Box::new(ScalarExpression::Constant(DataValue::Int32(1))), - evaluator: None, - ty: LogicalType::Boolean, - }, - agg, - false, - ); - let projection = ProjectOperator { - exprs: vec![ScalarExpression::Constant(DataValue::Int32(1))], - }; - let plan = LogicalPlan::new( - Operator::Project(projection), - Childrens::Only(Box::new(filter)), - ); - - LJoinOperator::build(children, plan, JoinCondition::None, JoinType::Cross) - } - fn bind_correlated_in_subquery( &self, mut children: LogicalPlan, @@ -1624,8 +1618,7 @@ impl<'a: 'b, 'b, T: Transaction, A: AsRef<[(&'static str, DataValue)]>> Binder<' .visit(expr)?; } - children = - LJoinOperator::build(children, plan, JoinCondition::None, JoinType::Cross); + children = ScalarApplyOperator::build(children, plan); } } diff --git a/src/execution/dql/filter.rs b/src/execution/dql/filter.rs index d316c28a..70c5a94d 100644 --- a/src/execution/dql/filter.rs +++ b/src/execution/dql/filter.rs @@ -68,7 +68,6 @@ impl Filter { return Ok(()); }; let tuple = arena.result_tuple(); - if self .predicate .eval(Some((tuple, &self.input_schema)))? diff --git a/src/execution/dql/mark_apply.rs b/src/execution/dql/mark_apply.rs new file mode 100644 index 00000000..58cb8ed9 --- /dev/null +++ b/src/execution/dql/mark_apply.rs @@ -0,0 +1,312 @@ +// Copyright 2024 KipData/KiteSQL +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::errors::DatabaseError; +use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ReadExecutor}; +use crate::planner::operator::mark_apply::MarkApplyOperator; +use crate::planner::LogicalPlan; +use crate::storage::Transaction; +use crate::types::tuple::{Schema, SchemaRef, Tuple}; +use crate::types::value::DataValue; +use std::mem; +use std::sync::Arc; + +pub struct MarkApply { + op: MarkApplyOperator, + left_input_plan: Option, + right_input_plan: Option, + left_input: Option, + predicate_schema: SchemaRef, + left_tuple: Tuple, +} + +impl From<(MarkApplyOperator, LogicalPlan, LogicalPlan)> for MarkApply { + fn from( + (op, mut left_input, mut right_input): (MarkApplyOperator, LogicalPlan, LogicalPlan), + ) -> Self { + let predicate_schema = Arc::new( + left_input + .output_schema() + .iter() + .chain(right_input.output_schema().iter()) + .cloned() + .collect::(), + ); + Self { + op, + left_input_plan: Some(left_input), + right_input_plan: Some(right_input), + left_input: None, + predicate_schema, + left_tuple: Tuple::default(), + } + } +} + +impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for MarkApply { + fn into_executor( + mut self, + arena: &mut ExecArena<'a, T>, + cache: ExecutionCaches<'a>, + transaction: *mut T, + ) -> ExecId { + self.left_input = Some(build_read( + arena, + self.left_input_plan + .take() + .expect("mark apply left input plan initialized"), + cache, + transaction, + )); + arena.push(ExecNode::MarkApply(self)) + } +} + +impl MarkApply { + fn build_right_input<'a, T: Transaction + 'a>(&self, arena: &mut ExecArena<'a, T>) -> ExecId { + let cache = (arena.table_cache(), arena.view_cache(), arena.meta_cache()); + let transaction = arena.transaction_mut() as *mut T; + build_read( + arena, + self.right_input_plan + .clone() + .expect("mark apply right input plan initialized"), + cache, + transaction, + ) + } + + fn predicate_matched( + &self, + left_tuple: &Tuple, + right_tuple: &Tuple, + ) -> Result { + // FIXME + let values = Vec::from_iter( + left_tuple + .values + .iter() + .chain(right_tuple.values.iter()) + .cloned(), + ); + + for predicate in self.op.predicates() { + match predicate.eval(Some((values.as_slice(), self.predicate_schema.as_ref())))? { + DataValue::Boolean(true) => {} + DataValue::Boolean(false) | DataValue::Null => return Ok(false), + _ => return Err(DatabaseError::InvalidType), + } + } + + Ok(true) + } + + pub(crate) fn next_tuple<'a, T: Transaction + 'a>( + &mut self, + arena: &mut ExecArena<'a, T>, + ) -> Result<(), DatabaseError> { + let left_input = self + .left_input + .expect("mark apply left input executor initialized"); + + if !arena.next_tuple(left_input)? { + arena.finish(); + return Ok(()); + } + + self.left_tuple = mem::take(arena.result_tuple_mut()); + let right_input = self.build_right_input(arena); + let mut matched = false; + + while arena.next_tuple(right_input)? { + let right_tuple = arena.result_tuple(); + if self.predicate_matched(&self.left_tuple, right_tuple)? { + matched = true; + break; + } + } + + arena.produce_tuple(mem::take(&mut self.left_tuple)); + arena.result_tuple_mut().values.push(DataValue::Boolean(matched)); + arena.resume(); + Ok(()) + } +} + +#[cfg(all(test, not(target_arch = "wasm32")))] +mod tests { + use super::*; + use crate::catalog::{ColumnCatalog, ColumnDesc, ColumnRef}; + use crate::execution::{execute, try_collect}; + use crate::expression::{BinaryOperator, ScalarExpression}; + use crate::planner::operator::mark_apply::MarkApplyOperator; + use crate::planner::operator::values::ValuesOperator; + use crate::planner::operator::Operator; + use crate::planner::{Childrens, LogicalPlan}; + use crate::storage::rocksdb::RocksStorage; + use crate::storage::{StatisticsMetaCache, Storage, TableCache, ViewCache}; + use crate::types::evaluator::EvaluatorFactory; + use crate::types::value::DataValue; + use crate::types::LogicalType; + use crate::utils::lru::SharedLruCache; + use std::hash::RandomState; + use std::sync::Arc; + use tempfile::TempDir; + + fn build_values(name: &str, rows: Vec>) -> LogicalPlan { + let desc = ColumnDesc::new(LogicalType::Integer, None, false, None).unwrap(); + let schema_ref = Arc::new(vec![ColumnRef::from(ColumnCatalog::new( + name.to_string(), + true, + desc, + ))]); + + LogicalPlan::new( + Operator::Values(ValuesOperator { rows, schema_ref }), + Childrens::None, + ) + } + + fn build_test_storage() -> Result< + ( + Arc, + Arc, + Arc, + TempDir, + RocksStorage, + ), + DatabaseError, + > { + let meta_cache = Arc::new(SharedLruCache::new(4, 1, RandomState::new())?); + let view_cache = Arc::new(SharedLruCache::new(4, 1, RandomState::new())?); + let table_cache = Arc::new(SharedLruCache::new(4, 1, RandomState::new())?); + + let temp_dir = TempDir::new().expect("unable to create temporary working directory"); + let storage = RocksStorage::new(temp_dir.path())?; + + Ok((table_cache, view_cache, meta_cache, temp_dir, storage)) + } + + fn build_marker_column() -> ColumnRef { + ColumnRef::from(ColumnCatalog::new( + "__exists".to_string(), + true, + ColumnDesc::new(LogicalType::Boolean, None, true, None).unwrap(), + )) + } + + #[test] + fn mark_exists_apply_appends_boolean_match_column() -> Result<(), DatabaseError> { + let mut left = build_values( + "left_c1", + vec![vec![DataValue::Int32(1)], vec![DataValue::Int32(2)]], + ); + let mut right = build_values( + "right_c1", + vec![vec![DataValue::Int32(2)], vec![DataValue::Int32(3)]], + ); + let left_column = left.output_schema()[0].clone(); + let right_column = right.output_schema()[0].clone(); + + let predicate = ScalarExpression::Binary { + op: BinaryOperator::Eq, + left_expr: Box::new(ScalarExpression::column_expr(left_column, 0)), + right_expr: Box::new(ScalarExpression::column_expr(right_column, 1)), + evaluator: Some(EvaluatorFactory::binary_create( + LogicalType::Integer, + BinaryOperator::Eq, + )?), + ty: LogicalType::Boolean, + }; + + let (table_cache, view_cache, meta_cache, _temp_dir, storage) = build_test_storage()?; + let mut transaction = storage.transaction()?; + let tuples = try_collect(execute( + MarkApply::from(( + MarkApplyOperator::new_exists(build_marker_column(), vec![predicate]), + left, + right, + )), + (&table_cache, &view_cache, &meta_cache), + &mut transaction, + ))?; + + assert_eq!( + tuples + .into_iter() + .flat_map(|tuple| tuple.values) + .collect::>(), + vec![ + DataValue::Int32(1), + DataValue::Boolean(false), + DataValue::Int32(2), + DataValue::Boolean(true), + ] + ); + + Ok(()) + } + + #[test] + fn mark_exists_apply_treats_null_predicate_as_not_matched() -> Result<(), DatabaseError> { + let mut left = build_values( + "left_c1", + vec![vec![DataValue::Int32(1)], vec![DataValue::Int32(2)]], + ); + let mut right = build_values( + "right_c1", + vec![vec![DataValue::Null], vec![DataValue::Int32(2)]], + ); + let left_column = left.output_schema()[0].clone(); + let right_column = right.output_schema()[0].clone(); + + let predicate = ScalarExpression::Binary { + op: BinaryOperator::Eq, + left_expr: Box::new(ScalarExpression::column_expr(left_column, 0)), + right_expr: Box::new(ScalarExpression::column_expr(right_column, 1)), + evaluator: Some(EvaluatorFactory::binary_create( + LogicalType::Integer, + BinaryOperator::Eq, + )?), + ty: LogicalType::Boolean, + }; + + let (table_cache, view_cache, meta_cache, _temp_dir, storage) = build_test_storage()?; + let mut transaction = storage.transaction()?; + let tuples = try_collect(execute( + MarkApply::from(( + MarkApplyOperator::new_exists(build_marker_column(), vec![predicate]), + left, + right, + )), + (&table_cache, &view_cache, &meta_cache), + &mut transaction, + ))?; + + assert_eq!( + tuples + .into_iter() + .flat_map(|tuple| tuple.values) + .collect::>(), + vec![ + DataValue::Int32(1), + DataValue::Boolean(false), + DataValue::Int32(2), + DataValue::Boolean(true), + ] + ); + + Ok(()) + } +} diff --git a/src/execution/dql/mod.rs b/src/execution/dql/mod.rs index 86595e43..6248c766 100644 --- a/src/execution/dql/mod.rs +++ b/src/execution/dql/mod.rs @@ -22,6 +22,8 @@ pub(crate) mod function_scan; pub(crate) mod index_scan; pub(crate) mod join; pub(crate) mod limit; +pub(crate) mod mark_apply; +pub(crate) mod scalar_apply; pub(crate) mod projection; pub(crate) mod scalar_subquery; pub(crate) mod seq_scan; diff --git a/src/execution/dql/scalar_apply.rs b/src/execution/dql/scalar_apply.rs new file mode 100644 index 00000000..6949a80b --- /dev/null +++ b/src/execution/dql/scalar_apply.rs @@ -0,0 +1,239 @@ +// Copyright 2024 KipData/KiteSQL +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::mem; + +use crate::errors::DatabaseError; +use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ReadExecutor}; +use crate::planner::operator::scalar_apply::ScalarApplyOperator; +use crate::planner::LogicalPlan; +use crate::storage::Transaction; +use crate::types::tuple::Tuple; + +pub struct ScalarApply { + left_input_plan: Option, + right_input_plan: Option, + left_input: Option, + right_input: Option, + cached_right: Option, +} + +impl From<(ScalarApplyOperator, LogicalPlan, LogicalPlan)> for ScalarApply { + fn from((_, left_input, right_input): (ScalarApplyOperator, LogicalPlan, LogicalPlan)) -> Self { + Self { + left_input_plan: Some(left_input), + right_input_plan: Some(right_input), + left_input: None, + right_input: None, + cached_right: None, + } + } +} + +impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for ScalarApply { + fn into_executor( + mut self, + arena: &mut ExecArena<'a, T>, + cache: ExecutionCaches<'a>, + transaction: *mut T, + ) -> ExecId { + self.left_input = Some(build_read( + arena, + self.left_input_plan + .take() + .expect("scalar apply left input plan initialized"), + cache, + transaction, + )); + self.right_input = Some(build_read( + arena, + self.right_input_plan + .take() + .expect("scalar apply right input plan initialized"), + cache, + transaction, + )); + arena.push(ExecNode::ScalarApply(self)) + } +} + +impl ScalarApply { + fn load_right_once<'a, T: Transaction + 'a>( + cached_right: &mut Option, + right_input: Option, + arena: &mut ExecArena<'a, T>, + ) -> Result<(), DatabaseError> { + if cached_right.is_none() { + let right_input = right_input + .expect("scalar apply right input executor initialized"); + if !arena.next_tuple(right_input)? { + return Err(DatabaseError::InvalidValue( + "scalar apply right input returned no rows".to_string(), + )); + } + *cached_right = Some(mem::take(arena.result_tuple_mut())); + } + + Ok(()) + } + + pub(crate) fn next_tuple<'a, T: Transaction + 'a>( + &mut self, + arena: &mut ExecArena<'a, T>, + ) -> Result<(), DatabaseError> { + Self::load_right_once(&mut self.cached_right, self.right_input, arena)?; + + let right_tuple = self.cached_right + .as_ref() + .expect("scalar apply right tuple initialized"); + let left_input = self + .left_input + .expect("scalar apply left input executor initialized"); + + if !arena.next_tuple(left_input)? { + arena.finish(); + return Ok(()); + } + arena.result_tuple_mut().values.extend(right_tuple.values.iter().cloned()); + arena.resume(); + Ok(()) + } +} + +#[cfg(all(test, not(target_arch = "wasm32")))] +mod tests { + use super::*; + use crate::catalog::{ColumnCatalog, ColumnDesc, ColumnRef}; + use crate::execution::{execute, try_collect}; + use crate::planner::operator::scalar_subquery::ScalarSubqueryOperator; + use crate::planner::operator::values::ValuesOperator; + use crate::planner::operator::Operator; + use crate::planner::{Childrens, LogicalPlan}; + use crate::storage::rocksdb::RocksStorage; + use crate::storage::{StatisticsMetaCache, Storage, TableCache, ViewCache}; + use crate::types::value::DataValue; + use crate::types::LogicalType; + use crate::utils::lru::SharedLruCache; + use std::hash::RandomState; + use std::sync::Arc; + use tempfile::TempDir; + + fn build_values(name: &str, rows: Vec>) -> LogicalPlan { + let desc = ColumnDesc::new(LogicalType::Integer, None, false, None).unwrap(); + let schema_ref = Arc::new(vec![ColumnRef::from(ColumnCatalog::new( + name.to_string(), + true, + desc, + ))]); + + LogicalPlan::new( + Operator::Values(ValuesOperator { rows, schema_ref }), + Childrens::None, + ) + } + + fn build_test_storage() -> Result< + ( + Arc, + Arc, + Arc, + TempDir, + RocksStorage, + ), + DatabaseError, + > { + let meta_cache = Arc::new(SharedLruCache::new(4, 1, RandomState::new())?); + let view_cache = Arc::new(SharedLruCache::new(4, 1, RandomState::new())?); + let table_cache = Arc::new(SharedLruCache::new(4, 1, RandomState::new())?); + + let temp_dir = TempDir::new().expect("unable to create temporary working directory"); + let storage = RocksStorage::new(temp_dir.path())?; + + Ok((table_cache, view_cache, meta_cache, temp_dir, storage)) + } + + #[test] + fn scalar_apply_repeats_scalar_result_for_each_left_row() -> Result<(), DatabaseError> { + let left = build_values( + "left_c1", + vec![ + vec![crate::types::value::DataValue::Int32(1)], + vec![crate::types::value::DataValue::Int32(2)], + ], + ); + let right = ScalarSubqueryOperator::build(build_values( + "right_c1", + vec![vec![crate::types::value::DataValue::Int32(7)]], + )); + + let (table_cache, view_cache, meta_cache, _temp_dir, storage) = build_test_storage()?; + let mut transaction = storage.transaction()?; + let tuples = try_collect(execute( + ScalarApply::from((ScalarApplyOperator, left, right)), + (&table_cache, &view_cache, &meta_cache), + &mut transaction, + ))?; + + let actual = tuples + .into_iter() + .flat_map(|tuple| tuple.values) + .collect::>(); + assert_eq!( + actual, + vec![ + DataValue::Int32(1), + DataValue::Int32(7), + DataValue::Int32(2), + DataValue::Int32(7), + ] + ); + + Ok(()) + } + + #[test] + fn scalar_apply_repeats_null_scalar_result_for_each_left_row() -> Result<(), DatabaseError> { + let left = build_values( + "left_c1", + vec![vec![DataValue::Int32(1)], vec![DataValue::Int32(2)]], + ); + let right = ScalarSubqueryOperator::build(build_values( + "right_c1", + vec![vec![DataValue::Null]], + )); + + let (table_cache, view_cache, meta_cache, _temp_dir, storage) = build_test_storage()?; + let mut transaction = storage.transaction()?; + let tuples = try_collect(execute( + ScalarApply::from((ScalarApplyOperator, left, right)), + (&table_cache, &view_cache, &meta_cache), + &mut transaction, + ))?; + + assert_eq!( + tuples + .into_iter() + .flat_map(|tuple| tuple.values) + .collect::>(), + vec![ + DataValue::Int32(1), + DataValue::Null, + DataValue::Int32(2), + DataValue::Null, + ] + ); + + Ok(()) + } +} diff --git a/src/execution/mod.rs b/src/execution/mod.rs index 6fb8f7ea..c57a85eb 100644 --- a/src/execution/mod.rs +++ b/src/execution/mod.rs @@ -19,6 +19,8 @@ pub(crate) mod dql; use self::ddl::add_column::AddColumn; use self::ddl::change_column::ChangeColumn; use self::dql::join::nested_loop_join::NestedLoopJoin; +use self::dql::mark_apply::MarkApply; +use self::dql::scalar_apply::ScalarApply; use crate::errors::DatabaseError; use crate::execution::ddl::create_index::CreateIndex; use crate::execution::ddl::create_table::CreateTable; @@ -133,8 +135,10 @@ pub(crate) enum ExecNode<'a, T: Transaction + 'a> { IndexScan(IndexScan<'a, T>), Insert(Insert), Limit(Limit), + MarkApply(MarkApply), NestedLoopJoin(NestedLoopJoin), Projection(Projection), + ScalarApply(ScalarApply), ScalarSubquery(ScalarSubquery), SeqScan(SeqScan<'a, T>), ShowTables(ShowTables), @@ -194,8 +198,10 @@ impl_exec_node_runner!( IndexScan<'a, T>, Insert, Limit, + MarkApply, NestedLoopJoin, Projection, + ScalarApply, ScalarSubquery, SeqScan<'a, T>, ShowTables, @@ -237,8 +243,10 @@ impl<'a, T: Transaction + 'a> ExecNodeRunner<'a, T> for ExecNode<'a, T> { ExecNode::IndexScan(exec) => ExecNodeRunner::next_tuple(exec, arena), ExecNode::Insert(exec) => ExecNodeRunner::next_tuple(exec, arena), ExecNode::Limit(exec) => ExecNodeRunner::next_tuple(exec, arena), + ExecNode::MarkApply(exec) => ExecNodeRunner::next_tuple(exec, arena), ExecNode::NestedLoopJoin(exec) => ExecNodeRunner::next_tuple(exec, arena), ExecNode::Projection(exec) => ExecNodeRunner::next_tuple(exec, arena), + ExecNode::ScalarApply(exec) => ExecNodeRunner::next_tuple(exec, arena), ExecNode::ScalarSubquery(exec) => ExecNodeRunner::next_tuple(exec, arena), ExecNode::SeqScan(exec) => ExecNodeRunner::next_tuple(exec, arena), ExecNode::ShowTables(exec) => ExecNodeRunner::next_tuple(exec, arena), @@ -413,6 +421,18 @@ pub(crate) fn build_read<'a, T: Transaction + 'a>( Filter::from((op, input)).into_executor(arena, cache, transaction) } + Operator::ScalarApply(op) => { + let (left_input, right_input) = childrens.pop_twins(); + + ScalarApply::from((op, left_input, right_input)) + .into_executor(arena, cache, transaction) + } + Operator::MarkApply(op) => { + let (left_input, right_input) = childrens.pop_twins(); + + MarkApply::from((op, left_input, right_input)) + .into_executor(arena, cache, transaction) + } Operator::Join(op) => { let (left_input, right_input) = childrens.pop_twins(); diff --git a/src/optimizer/heuristic/optimizer.rs b/src/optimizer/heuristic/optimizer.rs index 754abc9c..d7041969 100644 --- a/src/optimizer/heuristic/optimizer.rs +++ b/src/optimizer/heuristic/optimizer.rs @@ -454,9 +454,15 @@ impl ImplementationRuleIndex { Operator::Limit(_) if self.contains(ImplementationRuleImpl::Limit) => { Some(PhysicalOption::new(PlanImpl::Limit, SortOption::Follow)) } + Operator::MarkApply(_) if self.contains(ImplementationRuleImpl::MarkApply) => { + Some(PhysicalOption::new(PlanImpl::MarkApply, SortOption::Follow)) + } Operator::Project(_) if self.contains(ImplementationRuleImpl::Projection) => { Some(PhysicalOption::new(PlanImpl::Project, SortOption::Follow)) } + Operator::ScalarApply(_) if self.contains(ImplementationRuleImpl::ScalarApply) => { + Some(PhysicalOption::new(PlanImpl::ScalarApply, SortOption::Follow)) + } Operator::ScalarSubquery(_) if self.contains(ImplementationRuleImpl::ScalarSubquery) => { diff --git a/src/optimizer/rule/implementation/dql/mark_apply.rs b/src/optimizer/rule/implementation/dql/mark_apply.rs new file mode 100644 index 00000000..43f21691 --- /dev/null +++ b/src/optimizer/rule/implementation/dql/mark_apply.rs @@ -0,0 +1,37 @@ +// Copyright 2024 KipData/KiteSQL +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::errors::DatabaseError; +use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; +use crate::optimizer::core::rule::BestPhysicalOption; +use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; +use crate::optimizer::core::statistics_meta::StatisticMetaLoader; +use crate::planner::operator::{Operator, PhysicalOption, PlanImpl, SortOption}; +use crate::single_mapping; +use crate::storage::Transaction; +use std::sync::LazyLock; + +static MARK_APPLY_PATTERN: LazyLock = LazyLock::new(|| Pattern { + predicate: |op| matches!(op, Operator::MarkApply(_)), + children: PatternChildrenPredicate::None, +}); + +#[derive(Clone)] +pub struct MarkApplyImplementation; + +single_mapping!( + MarkApplyImplementation, + MARK_APPLY_PATTERN, + PhysicalOption::new(PlanImpl::MarkApply, SortOption::Follow) +); diff --git a/src/optimizer/rule/implementation/dql/mod.rs b/src/optimizer/rule/implementation/dql/mod.rs index eb18286e..400de6cc 100644 --- a/src/optimizer/rule/implementation/dql/mod.rs +++ b/src/optimizer/rule/implementation/dql/mod.rs @@ -18,7 +18,9 @@ pub(crate) mod filter; pub(crate) mod function_scan; pub(crate) mod join; pub(crate) mod limit; +pub(crate) mod mark_apply; pub(crate) mod projection; +pub(crate) mod scalar_apply; pub(crate) mod scalar_subquery; pub(crate) mod sort; pub(crate) mod table_scan; diff --git a/src/optimizer/rule/implementation/dql/scalar_apply.rs b/src/optimizer/rule/implementation/dql/scalar_apply.rs new file mode 100644 index 00000000..8ac4f5b9 --- /dev/null +++ b/src/optimizer/rule/implementation/dql/scalar_apply.rs @@ -0,0 +1,37 @@ +// Copyright 2024 KipData/KiteSQL +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::errors::DatabaseError; +use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; +use crate::optimizer::core::rule::BestPhysicalOption; +use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; +use crate::optimizer::core::statistics_meta::StatisticMetaLoader; +use crate::planner::operator::{Operator, PhysicalOption, PlanImpl, SortOption}; +use crate::single_mapping; +use crate::storage::Transaction; +use std::sync::LazyLock; + +static SCALAR_APPLY_PATTERN: LazyLock = LazyLock::new(|| Pattern { + predicate: |op| matches!(op, Operator::ScalarApply(_)), + children: PatternChildrenPredicate::None, +}); + +#[derive(Clone)] +pub struct ScalarApplyImplementation; + +single_mapping!( + ScalarApplyImplementation, + SCALAR_APPLY_PATTERN, + PhysicalOption::new(PlanImpl::ScalarApply, SortOption::Follow) +); diff --git a/src/optimizer/rule/implementation/mod.rs b/src/optimizer/rule/implementation/mod.rs index cebc4d7c..093bb1c2 100644 --- a/src/optimizer/rule/implementation/mod.rs +++ b/src/optimizer/rule/implementation/mod.rs @@ -41,7 +41,9 @@ use crate::optimizer::rule::implementation::dql::filter::FilterImplementation; use crate::optimizer::rule::implementation::dql::function_scan::FunctionScanImplementation; use crate::optimizer::rule::implementation::dql::join::JoinImplementation; use crate::optimizer::rule::implementation::dql::limit::LimitImplementation; +use crate::optimizer::rule::implementation::dql::mark_apply::MarkApplyImplementation; use crate::optimizer::rule::implementation::dql::projection::ProjectionImplementation; +use crate::optimizer::rule::implementation::dql::scalar_apply::ScalarApplyImplementation; use crate::optimizer::rule::implementation::dql::scalar_subquery::ScalarSubqueryImplementation; use crate::optimizer::rule::implementation::dql::sort::SortImplementation; use crate::optimizer::rule::implementation::dql::table_scan::{ @@ -60,7 +62,9 @@ pub enum ImplementationRuleRootTag { Filter, Join, Limit, + MarkApply, Project, + ScalarApply, ScalarSubquery, TableScan, FunctionScan, @@ -91,7 +95,9 @@ impl ImplementationRuleRootTag { Operator::Filter(_) => Some(Self::Filter), Operator::Join(_) => Some(Self::Join), Operator::Limit(_) => Some(Self::Limit), + Operator::MarkApply(_) => Some(Self::MarkApply), Operator::Project(_) => Some(Self::Project), + Operator::ScalarApply(_) => Some(Self::ScalarApply), Operator::ScalarSubquery(_) => Some(Self::ScalarSubquery), Operator::TableScan(_) => Some(Self::TableScan), Operator::FunctionScan(_) => Some(Self::FunctionScan), @@ -133,7 +139,9 @@ pub enum ImplementationRuleImpl { Filter, HashJoin, Limit, + MarkApply, Projection, + ScalarApply, ScalarSubquery, SeqScan, FunctionScan, @@ -166,7 +174,9 @@ impl MatchPattern for ImplementationRuleImpl { ImplementationRuleImpl::Filter => FilterImplementation.pattern(), ImplementationRuleImpl::HashJoin => JoinImplementation.pattern(), ImplementationRuleImpl::Limit => LimitImplementation.pattern(), + ImplementationRuleImpl::MarkApply => MarkApplyImplementation.pattern(), ImplementationRuleImpl::Projection => ProjectionImplementation.pattern(), + ImplementationRuleImpl::ScalarApply => ScalarApplyImplementation.pattern(), ImplementationRuleImpl::ScalarSubquery => ScalarSubqueryImplementation.pattern(), ImplementationRuleImpl::SeqScan => SeqScanImplementation.pattern(), ImplementationRuleImpl::IndexScan => IndexScanImplementation.pattern(), @@ -200,7 +210,9 @@ impl ImplementationRuleImpl { ImplementationRuleImpl::Filter => ImplementationRuleRootTag::Filter, ImplementationRuleImpl::HashJoin => ImplementationRuleRootTag::Join, ImplementationRuleImpl::Limit => ImplementationRuleRootTag::Limit, + ImplementationRuleImpl::MarkApply => ImplementationRuleRootTag::MarkApply, ImplementationRuleImpl::Projection => ImplementationRuleRootTag::Project, + ImplementationRuleImpl::ScalarApply => ImplementationRuleRootTag::ScalarApply, ImplementationRuleImpl::ScalarSubquery => ImplementationRuleRootTag::ScalarSubquery, ImplementationRuleImpl::SeqScan | ImplementationRuleImpl::IndexScan => { ImplementationRuleRootTag::TableScan @@ -249,11 +261,16 @@ impl ImplementationRule for ImplementationRuleImpl { ImplementationRuleImpl::Limit => { LimitImplementation.update_best_option(operator, loader, best_physical_option)? } + ImplementationRuleImpl::MarkApply => { + MarkApplyImplementation.update_best_option(operator, loader, best_physical_option)? + } ImplementationRuleImpl::Projection => ProjectionImplementation.update_best_option( operator, loader, best_physical_option, )?, + ImplementationRuleImpl::ScalarApply => ScalarApplyImplementation + .update_best_option(operator, loader, best_physical_option)?, ImplementationRuleImpl::ScalarSubquery => ScalarSubqueryImplementation .update_best_option(operator, loader, best_physical_option)?, ImplementationRuleImpl::SeqScan => { diff --git a/src/optimizer/rule/normalization/column_pruning.rs b/src/optimizer/rule/normalization/column_pruning.rs index 2420f100..8a4d452d 100644 --- a/src/optimizer/rule/normalization/column_pruning.rs +++ b/src/optimizer/rule/normalization/column_pruning.rs @@ -104,6 +104,10 @@ impl ColumnPruning { Operator::Project(op) => { Self::extend_expr_referenced_columns(op.exprs.iter(), referenced_columns); } + Operator::MarkApply(op) => { + Self::extend_expr_referenced_columns(op.predicates().iter(), referenced_columns); + referenced_columns.insert(op.output_column().summary()); + } Operator::TableScan(op) => { referenced_columns.extend(op.columns.values().map(|column| column.summary())); } @@ -149,6 +153,7 @@ impl ColumnPruning { } Operator::Dummy | Operator::Limit(_) + | Operator::ScalarApply(_) | Operator::ScalarSubquery(_) | Operator::Analyze(_) | Operator::ShowTable @@ -237,6 +242,10 @@ impl ColumnPruning { Operator::Project(op) => { remap_exprs_positions(op.exprs.iter_mut(), removed_positions)?; } + Operator::MarkApply(op) => { + Self::remap_exprs_after_child_change(op.predicates_mut().iter_mut(), removed_positions)?; + } + Operator::ScalarApply(_) => {} Operator::ScalarSubquery(_) => {} Operator::Sort(op) => { Self::remap_exprs_after_child_change( @@ -486,13 +495,18 @@ impl ColumnPruning { } Operator::Sort(_) | Operator::Limit(_) + | Operator::ScalarApply(_) + | Operator::MarkApply(_) | Operator::ScalarSubquery(_) | Operator::Join(_) | Operator::Filter(_) | Operator::Union(_) | Operator::Except(_) | Operator::TopK(_) => { - if matches!(operator, Operator::Join(_)) { + if matches!( + operator, + Operator::ScalarApply(_) | Operator::MarkApply(_) | Operator::Join(_) + ) { let (child_outcome, old_left_outputs_len) = { let mut child_required = required_columns.clone(); Self::extend_operator_referenced_columns(operator, &mut child_required); @@ -548,6 +562,26 @@ impl ColumnPruning { output_removed_positions = Self::copy_removed_positions(&left_removed_positions, arena); } + } else if let Operator::MarkApply(op) = operator { + let removed_positions = Self::merge_removed_positions( + &left_removed_positions, + &right_removed_positions, + old_left_outputs_len, + arena, + ); + Self::remap_exprs_after_child_change( + op.predicates_mut().iter_mut(), + &removed_positions, + )?; + output_removed_positions = + Self::copy_removed_positions(&left_removed_positions, arena); + } else { + output_removed_positions = Self::merge_removed_positions( + &left_removed_positions, + &right_removed_positions, + old_left_outputs_len, + arena, + ); } changed = true; } diff --git a/src/optimizer/rule/normalization/compilation_in_advance.rs b/src/optimizer/rule/normalization/compilation_in_advance.rs index 8a2b9361..cb6b0aa4 100644 --- a/src/optimizer/rule/normalization/compilation_in_advance.rs +++ b/src/optimizer/rule/normalization/compilation_in_advance.rs @@ -56,6 +56,12 @@ pub(crate) fn evaluator_bind_current(plan: &mut LogicalPlan) -> Result<(), Datab BindEvaluator.visit(expr)?; } } + Operator::MarkApply(op) => { + for predicate in op.predicates_mut().iter_mut() { + BindEvaluator.visit(predicate)?; + } + } + Operator::ScalarApply(_) => {} Operator::Sort(op) => { for sort_field in op.sort_fields.iter_mut() { BindEvaluator.visit(&mut sort_field.expr)?; @@ -115,7 +121,11 @@ impl EvaluatorBind { Self::_apply(left)?; if matches!( plan.operator, - Operator::Join(_) | Operator::Union(_) | Operator::Except(_) + Operator::ScalarApply(_) + | Operator::MarkApply(_) + | Operator::Join(_) + | Operator::Union(_) + | Operator::Except(_) ) { Self::_apply(right)?; } diff --git a/src/optimizer/rule/normalization/mod.rs b/src/optimizer/rule/normalization/mod.rs index 692c23d6..6f52d8ad 100644 --- a/src/optimizer/rule/normalization/mod.rs +++ b/src/optimizer/rule/normalization/mod.rs @@ -98,6 +98,7 @@ impl NormalizationRuleRootTag { pub fn from_operator(operator: &Operator) -> Option { match operator { Operator::Aggregate(_) => Some(Self::Aggregate), + Operator::ScalarApply(_) | Operator::MarkApply(_) => Some(Self::Any), Operator::Filter(_) => Some(Self::Filter), Operator::Join(_) => Some(Self::Join), Operator::Limit(_) => Some(Self::Limit), diff --git a/src/planner/mod.rs b/src/planner/mod.rs index 0c0ec3b0..ff89b9c5 100644 --- a/src/planner/mod.rs +++ b/src/planner/mod.rs @@ -166,6 +166,28 @@ impl LogicalPlan { | Operator::Limit(_) | Operator::TopK(_) | Operator::ScalarSubquery(_) => childrens_iter.next().unwrap().output_schema_direct(), + Operator::ScalarApply(_) => { + let mut columns = Vec::new(); + + for plan in childrens_iter { + for column in plan.output_schema_direct().columns() { + columns.push(column.clone()); + } + } + SchemaOutput::Schema(columns) + } + Operator::MarkApply(op) => { + let mut columns = Vec::new(); + + if let Some(left) = childrens_iter.next() { + for column in left.output_schema_direct().columns() { + columns.push(column.clone()); + } + } + columns.push(op.output_column().clone()); + + SchemaOutput::Schema(columns) + } Operator::Aggregate(op) => SchemaOutput::Schema( op.agg_calls .iter() diff --git a/src/planner/operator/mark_apply.rs b/src/planner/operator/mark_apply.rs new file mode 100644 index 00000000..9dbad0ac --- /dev/null +++ b/src/planner/operator/mark_apply.rs @@ -0,0 +1,78 @@ +// Copyright 2024 KipData/KiteSQL +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use super::Operator; +use crate::catalog::ColumnRef; +use crate::expression::ScalarExpression; +use crate::planner::{Childrens, LogicalPlan}; +use kite_sql_serde_macros::ReferenceSerialization; +use std::fmt; +use std::fmt::Formatter; + +#[derive(Debug, PartialEq, Eq, Clone, Hash, ReferenceSerialization)] +pub enum MarkApplyKind { + Exists, +} + +#[derive(Debug, PartialEq, Eq, Clone, Hash, ReferenceSerialization)] +pub struct MarkApplyOperator { + pub kind: MarkApplyKind, + predicates: Vec, + output_column: ColumnRef, +} + +impl MarkApplyOperator { + pub fn new_exists(output_column: ColumnRef, predicates: Vec) -> Self { + Self { + kind: MarkApplyKind::Exists, + predicates, + output_column, + } + } + + pub fn build_exists( + left: LogicalPlan, + right: LogicalPlan, + output_column: ColumnRef, + predicates: Vec, + ) -> LogicalPlan { + LogicalPlan::new( + Operator::MarkApply(MarkApplyOperator::new_exists(output_column, predicates)), + Childrens::Twins { + left: Box::new(left), + right: Box::new(right), + }, + ) + } + + pub fn predicates(&self) -> &[ScalarExpression] { + &self.predicates + } + + pub fn predicates_mut(&mut self) -> &mut Vec { + &mut self.predicates + } + + pub fn output_column(&self) -> &ColumnRef { + &self.output_column + } +} + +impl fmt::Display for MarkApplyOperator { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + match self.kind { + MarkApplyKind::Exists => write!(f, "MarkExistsApply"), + } + } +} diff --git a/src/planner/operator/mod.rs b/src/planner/operator/mod.rs index 081b2232..f903e3c4 100644 --- a/src/planner/operator/mod.rs +++ b/src/planner/operator/mod.rs @@ -31,7 +31,9 @@ pub mod function_scan; pub mod insert; pub mod join; pub mod limit; +pub mod mark_apply; pub mod project; +pub mod scalar_apply; pub mod scalar_subquery; pub mod sort; pub mod table_scan; @@ -43,9 +45,10 @@ pub mod values; use self::{ aggregate::AggregateOperator, alter_table::add_column::AddColumnOperator, - alter_table::change_column::ChangeColumnOperator, filter::FilterOperator, join::JoinOperator, - limit::LimitOperator, project::ProjectOperator, scalar_subquery::ScalarSubqueryOperator, - sort::SortOperator, table_scan::TableScanOperator, + alter_table::change_column::ChangeColumnOperator, filter::FilterOperator, + join::JoinOperator, limit::LimitOperator, mark_apply::MarkApplyOperator, + project::ProjectOperator, scalar_apply::ScalarApplyOperator, + scalar_subquery::ScalarSubqueryOperator, sort::SortOperator, table_scan::TableScanOperator, }; use crate::catalog::ColumnRef; use crate::expression::ScalarExpression; @@ -81,6 +84,8 @@ pub enum Operator { // DQL Dummy, Aggregate(AggregateOperator), + ScalarApply(ScalarApplyOperator), + MarkApply(MarkApplyOperator), Filter(FilterOperator), Join(JoinOperator), Project(ProjectOperator), @@ -153,6 +158,8 @@ pub enum PlanImpl { SimpleAggregate, HashAggregate, StreamDistinct, + ScalarApply, + MarkApply, Filter, HashJoin, NestLoopJoin, @@ -189,7 +196,11 @@ impl Operator { output_exprs.extend(op.agg_calls.iter().chain(op.groupby_exprs.iter()).cloned()); true } - Operator::Filter(_) | Operator::Join(_) | Operator::ScalarSubquery(_) => false, + Operator::ScalarApply(_) + | Operator::MarkApply(_) + | Operator::Filter(_) + | Operator::Join(_) + | Operator::ScalarSubquery(_) => false, Operator::Project(op) => { output_exprs.clear(); output_exprs.extend(op.exprs.iter().cloned()); @@ -270,6 +281,11 @@ impl Operator { .iter() .chain(op.groupby_exprs.iter()) .all(|expr| expr.visit_referenced_columns(only_column_ref, f)), + Operator::ScalarApply(_) => true, + Operator::MarkApply(op) => op + .predicates() + .iter() + .all(|expr| expr.visit_referenced_columns(only_column_ref, f)), Operator::Filter(op) => op.predicate.visit_referenced_columns(only_column_ref, f), Operator::Join(op) => { if let JoinCondition::On { on, filter } = &op.on { @@ -377,6 +393,8 @@ impl fmt::Display for Operator { match self { Operator::Dummy => write!(f, "Dummy"), Operator::Aggregate(op) => write!(f, "{op}"), + Operator::ScalarApply(op) => write!(f, "{op}"), + Operator::MarkApply(op) => write!(f, "{op}"), Operator::Filter(op) => write!(f, "{op}"), Operator::Join(op) => write!(f, "{op}"), Operator::Project(op) => write!(f, "{op}"), @@ -449,6 +467,8 @@ impl fmt::Display for PlanImpl { PlanImpl::SimpleAggregate => write!(f, "SimpleAggregate"), PlanImpl::HashAggregate => write!(f, "HashAggregate"), PlanImpl::StreamDistinct => write!(f, "StreamDistinct"), + PlanImpl::ScalarApply => write!(f, "ScalarApply"), + PlanImpl::MarkApply => write!(f, "MarkApply"), PlanImpl::Filter => write!(f, "Filter"), PlanImpl::HashJoin => write!(f, "HashJoin"), PlanImpl::NestLoopJoin => write!(f, "NestLoopJoin"), diff --git a/src/planner/operator/scalar_apply.rs b/src/planner/operator/scalar_apply.rs new file mode 100644 index 00000000..99df8d84 --- /dev/null +++ b/src/planner/operator/scalar_apply.rs @@ -0,0 +1,40 @@ +// Copyright 2024 KipData/KiteSQL +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use super::Operator; +use crate::planner::{Childrens, LogicalPlan}; +use kite_sql_serde_macros::ReferenceSerialization; +use std::fmt; +use std::fmt::Formatter; + +#[derive(Debug, PartialEq, Eq, Clone, Hash, ReferenceSerialization)] +pub struct ScalarApplyOperator; + +impl ScalarApplyOperator { + pub fn build(left: LogicalPlan, right: LogicalPlan) -> LogicalPlan { + LogicalPlan::new( + Operator::ScalarApply(ScalarApplyOperator), + Childrens::Twins { + left: Box::new(left), + right: Box::new(right), + }, + ) + } +} + +impl fmt::Display for ScalarApplyOperator { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "ScalarApply") + } +} diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 8850e390..4e0d2a3b 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -136,7 +136,7 @@ pub trait Transaction: Sized { let table = self .table(table_cache, table_name.clone())? .ok_or(DatabaseError::TableNotFound)?; - if columns.is_empty() { + if with_pk { for (i, column) in table.primary_keys() { columns.insert(*i, column.clone()); } @@ -178,7 +178,7 @@ pub trait Transaction: Sized { let table_name = table.name.as_ref(); let offset = offset_option.unwrap_or(0); - if columns.is_empty() || with_pk { + if with_pk { for (i, column) in table.primary_keys() { columns.insert(*i, column.clone()); } diff --git a/tests/slt/subquery.slt b/tests/slt/subquery.slt index 70387fec..f405b893 100644 --- a/tests/slt/subquery.slt +++ b/tests/slt/subquery.slt @@ -147,6 +147,26 @@ where not exists ( ---- 3 +query I rowsort +select id from users +where exists ( + select 1 from orders where orders.user_id = users.id +) or age = 40; +---- +1 +2 +3 + +query I rowsort +select id from users +where not ( + exists ( + select 1 from orders where orders.user_id = users.id + ) or age = 30 +); +---- +3 + query I rowsort select id from users where id in ( diff --git a/tpcc/src/main.rs b/tpcc/src/main.rs index 9e4fd8d6..1d3ea9c5 100644 --- a/tpcc/src/main.rs +++ b/tpcc/src/main.rs @@ -19,7 +19,7 @@ use crate::backend::sqlite::{SqliteBackend, SqliteProfile}; use crate::backend::{ BackendControl, BackendTransaction, ColumnType, PreparedStatement, StatementSpec, }; -use crate::delivery::{Delivery, DeliveryArgs, DeliveryTest}; +use crate::delivery::DeliveryTest; use crate::load::Load; use crate::new_ord::NewOrdTest; use crate::order_stat::OrderStatTest; @@ -30,7 +30,6 @@ use crate::utils::SeqGen; use clap::{Parser, ValueEnum}; use indicatif::{ProgressBar, ProgressStyle}; use kite_sql::errors::DatabaseError; -use kite_sql::types::value::DataValue; #[cfg(all(unix, feature = "pprof"))] use pprof::ProfilerGuard; use rand::prelude::ThreadRng; From c574d1586be0a4c1ea5a7b3afb911c727baf23f6 Mon Sep 17 00:00:00 2001 From: kould Date: Wed, 1 Apr 2026 23:39:28 +0800 Subject: [PATCH 02/10] refactor(execution): revert mutable plan dispatch and remove Option --- src/execution/ddl/create_index.rs | 17 +- src/execution/dml/analyze.rs | 12 +- src/execution/dml/copy_to_file.rs | 18 +- src/execution/dml/delete.rs | 12 +- src/execution/dml/insert.rs | 12 +- src/execution/dml/update.rs | 12 +- src/execution/dql/aggregate/hash_agg.rs | 57 +- src/execution/dql/aggregate/simple_agg.rs | 54 +- .../dql/aggregate/stream_distinct.rs | 55 +- src/execution/dql/describe.rs | 19 +- src/execution/dql/dummy.rs | 19 +- src/execution/dql/except.rs | 47 +- src/execution/dql/explain.rs | 37 +- src/execution/dql/filter.rs | 42 +- src/execution/dql/function_scan.rs | 19 +- src/execution/dql/index_scan.rs | 25 +- src/execution/dql/join/hash_join.rs | 37 +- src/execution/dql/join/nested_loop_join.rs | 34 +- src/execution/dql/limit.rs | 44 +- src/execution/dql/mark_apply.rs | 132 ++-- src/execution/dql/mod.rs | 2 +- src/execution/dql/projection.rs | 45 +- src/execution/dql/scalar_apply.rs | 109 +-- src/execution/dql/scalar_subquery.rs | 52 +- src/execution/dql/seq_scan.rs | 19 +- src/execution/dql/sort.rs | 45 +- src/execution/dql/top_k.rs | 53 +- src/execution/dql/union.rs | 47 +- src/execution/dql/values.rs | 19 +- src/execution/mod.rs | 732 ++++++++++++------ src/optimizer/heuristic/optimizer.rs | 6 +- src/optimizer/rule/implementation/mod.rs | 15 +- .../rule/normalization/column_pruning.rs | 5 +- src/planner/operator/mod.rs | 8 +- 34 files changed, 1059 insertions(+), 802 deletions(-) diff --git a/src/execution/ddl/create_index.rs b/src/execution/ddl/create_index.rs index 8d8cc2e2..acfed681 100644 --- a/src/execution/ddl/create_index.rs +++ b/src/execution/ddl/create_index.rs @@ -14,7 +14,9 @@ use crate::errors::DatabaseError; use crate::execution::dql::projection::Projection; -use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, WriteExecutor}; +use crate::execution::{ + build_read, take_plan, ExecArena, ExecId, ExecNode, ExecutionCaches, WriteExecutor, +}; use crate::expression::ScalarExpression; use crate::planner::operator::create_index::CreateIndexOperator; use crate::planner::LogicalPlan; @@ -28,7 +30,7 @@ use crate::types::ColumnId; pub struct CreateIndex { op: Option, input_schema: SchemaRef, - input_plan: Option, + input_plan: LogicalPlan, input: ExecId, } @@ -37,7 +39,7 @@ impl From<(CreateIndexOperator, LogicalPlan)> for CreateIndex { Self { op: Some(op), input_schema: input.output_schema().clone(), - input_plan: Some(input), + input_plan: input, input: 0, } } @@ -50,14 +52,7 @@ impl<'a, T: Transaction + 'a> WriteExecutor<'a, T> for CreateIndex { cache: ExecutionCaches<'a>, transaction: *mut T, ) -> ExecId { - self.input = build_read( - arena, - self.input_plan - .take() - .expect("create index input plan initialized"), - cache, - transaction, - ); + self.input = build_read(arena, take_plan(&mut self.input_plan), cache, transaction); arena.push(ExecNode::CreateIndex(self)) } } diff --git a/src/execution/dml/analyze.rs b/src/execution/dml/analyze.rs index 2f55d67b..b96b78eb 100644 --- a/src/execution/dml/analyze.rs +++ b/src/execution/dml/analyze.rs @@ -15,7 +15,9 @@ use crate::catalog::TableName; use crate::errors::DatabaseError; use crate::execution::dql::projection::Projection; -use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, WriteExecutor}; +use crate::execution::{ + build_read, take_plan, ExecArena, ExecId, ExecNode, ExecutionCaches, WriteExecutor, +}; use crate::expression::ScalarExpression; use crate::optimizer::core::histogram::HistogramBuilder; use crate::optimizer::core::statistics_meta::StatisticsMeta; @@ -35,7 +37,7 @@ const DEFAULT_NUM_OF_BUCKETS: usize = 100; pub struct Analyze { table_name: TableName, input_schema: SchemaRef, - input_plan: Option, + input_plan: LogicalPlan, input: Option, histogram_buckets: Option, } @@ -55,7 +57,7 @@ impl From<(AnalyzeOperator, LogicalPlan)> for Analyze { Analyze { table_name, input_schema: input.output_schema().clone(), - input_plan: Some(input), + input_plan: input, input: None, histogram_buckets, } @@ -71,9 +73,7 @@ impl<'a, T: Transaction + 'a> WriteExecutor<'a, T> for Analyze { ) -> ExecId { self.input = Some(build_read( arena, - self.input_plan - .take() - .expect("analyze input plan initialized"), + take_plan(&mut self.input_plan), cache, transaction, )); diff --git a/src/execution/dml/copy_to_file.rs b/src/execution/dml/copy_to_file.rs index b1dfdbe4..a7c6cc48 100644 --- a/src/execution/dml/copy_to_file.rs +++ b/src/execution/dml/copy_to_file.rs @@ -14,7 +14,9 @@ use crate::binder::copy::FileFormat; use crate::errors::DatabaseError; -use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ReadExecutor}; +use crate::execution::{ + build_read, take_plan, ExecArena, ExecId, ExecNode, ExecutionCaches, ReadExecutor, +}; use crate::planner::operator::copy_to_file::CopyToFileOperator; use crate::planner::LogicalPlan; use crate::storage::Transaction; @@ -22,7 +24,7 @@ use crate::types::tuple_builder::TupleBuilder; pub struct CopyToFile { op: CopyToFileOperator, - input_plan: Option, + input_plan: LogicalPlan, input: Option, } @@ -30,7 +32,7 @@ impl From<(CopyToFileOperator, LogicalPlan)> for CopyToFile { fn from((op, input): (CopyToFileOperator, LogicalPlan)) -> Self { CopyToFile { op, - input_plan: Some(input), + input_plan: input, input: None, } } @@ -45,9 +47,7 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for CopyToFile { ) -> ExecId { self.input = Some(build_read( arena, - self.input_plan - .take() - .expect("copy to file input plan initialized"), + take_plan(&mut self.input_plan), cache, transaction, )); @@ -207,11 +207,7 @@ mod tests { let executor = CopyToFile { op: op.clone(), - input_plan: Some(TableScanOperator::build( - "t1".to_string().into(), - table, - true, - )?), + input_plan: TableScanOperator::build("t1".to_string().into(), table, true)?, input: None, }; let mut executor = crate::execution::execute( diff --git a/src/execution/dml/delete.rs b/src/execution/dml/delete.rs index f8da2325..56b4aeb5 100644 --- a/src/execution/dml/delete.rs +++ b/src/execution/dml/delete.rs @@ -15,7 +15,9 @@ use crate::catalog::TableName; use crate::errors::DatabaseError; use crate::execution::dql::projection::Projection; -use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, WriteExecutor}; +use crate::execution::{ + build_read, take_plan, ExecArena, ExecId, ExecNode, ExecutionCaches, WriteExecutor, +}; use crate::expression::ScalarExpression; use crate::planner::operator::delete::DeleteOperator; use crate::planner::LogicalPlan; @@ -29,7 +31,7 @@ use std::collections::HashMap; pub struct Delete { table_name: TableName, input_schema: SchemaRef, - input_plan: Option, + input_plan: LogicalPlan, input: Option, } @@ -38,7 +40,7 @@ impl From<(DeleteOperator, LogicalPlan)> for Delete { Delete { table_name, input_schema: input.output_schema().clone(), - input_plan: Some(input), + input_plan: input, input: None, } } @@ -53,9 +55,7 @@ impl<'a, T: Transaction + 'a> WriteExecutor<'a, T> for Delete { ) -> ExecId { self.input = Some(build_read( arena, - self.input_plan - .take() - .expect("delete input plan initialized"), + take_plan(&mut self.input_plan), cache, transaction, )); diff --git a/src/execution/dml/insert.rs b/src/execution/dml/insert.rs index 3b7fb94b..a39b8bee 100644 --- a/src/execution/dml/insert.rs +++ b/src/execution/dml/insert.rs @@ -15,7 +15,9 @@ use crate::catalog::{ColumnCatalog, TableName}; use crate::errors::DatabaseError; use crate::execution::dql::projection::Projection; -use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, WriteExecutor}; +use crate::execution::{ + build_read, take_plan, ExecArena, ExecId, ExecNode, ExecutionCaches, WriteExecutor, +}; use crate::planner::operator::insert::InsertOperator; use crate::planner::LogicalPlan; use crate::storage::Transaction; @@ -31,7 +33,7 @@ use std::collections::HashMap; pub struct Insert { table_name: TableName, input_schema: SchemaRef, - input_plan: Option, + input_plan: LogicalPlan, input: Option, is_overwrite: bool, is_mapping_by_name: bool, @@ -51,7 +53,7 @@ impl From<(InsertOperator, LogicalPlan)> for Insert { Insert { table_name, input_schema: input.output_schema().clone(), - input_plan: Some(input), + input_plan: input, input: None, is_overwrite, is_mapping_by_name, @@ -84,9 +86,7 @@ impl<'a, T: Transaction + 'a> WriteExecutor<'a, T> for Insert { ) -> ExecId { self.input = Some(build_read( arena, - self.input_plan - .take() - .expect("insert input plan initialized"), + take_plan(&mut self.input_plan), cache, transaction, )); diff --git a/src/execution/dml/update.rs b/src/execution/dml/update.rs index 626baf57..c31fa17b 100644 --- a/src/execution/dml/update.rs +++ b/src/execution/dml/update.rs @@ -15,7 +15,9 @@ use crate::catalog::{ColumnRef, TableName}; use crate::errors::DatabaseError; use crate::execution::dql::projection::Projection; -use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, WriteExecutor}; +use crate::execution::{ + build_read, take_plan, ExecArena, ExecId, ExecNode, ExecutionCaches, WriteExecutor, +}; use crate::expression::ScalarExpression; use crate::planner::operator::update::UpdateOperator; use crate::planner::LogicalPlan; @@ -32,7 +34,7 @@ pub struct Update { table_name: TableName, value_exprs: Vec<(ColumnRef, ScalarExpression)>, input_schema: SchemaRef, - input_plan: Option, + input_plan: LogicalPlan, input: Option, } @@ -50,7 +52,7 @@ impl From<(UpdateOperator, LogicalPlan)> for Update { table_name, value_exprs, input_schema: input.output_schema().clone(), - input_plan: Some(input), + input_plan: input, input: None, } } @@ -65,9 +67,7 @@ impl<'a, T: Transaction + 'a> WriteExecutor<'a, T> for Update { ) -> ExecId { self.input = Some(build_read( arena, - self.input_plan - .take() - .expect("update input plan initialized"), + take_plan(&mut self.input_plan), cache, transaction, )); diff --git a/src/execution/dql/aggregate/hash_agg.rs b/src/execution/dql/aggregate/hash_agg.rs index b7d67248..f59ea27b 100644 --- a/src/execution/dql/aggregate/hash_agg.rs +++ b/src/execution/dql/aggregate/hash_agg.rs @@ -14,7 +14,7 @@ use crate::errors::DatabaseError; use crate::execution::dql::aggregate::{create_accumulators, Accumulator}; -use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ReadExecutor}; +use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ExecutorNode}; use crate::expression::ScalarExpression; use crate::planner::operator::aggregate::AggregateOperator; use crate::planner::LogicalPlan; @@ -31,13 +31,14 @@ pub struct HashAggExecutor { agg_calls: Vec, groupby_exprs: Vec, input_schema: SchemaRef, - input_plan: Option, input: ExecId, output: Option, } -impl From<(AggregateOperator, LogicalPlan)> for HashAggExecutor { - fn from( +impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for HashAggExecutor { + type Input = (AggregateOperator, LogicalPlan); + + fn into_executor( ( AggregateOperator { agg_calls, @@ -45,43 +46,23 @@ impl From<(AggregateOperator, LogicalPlan)> for HashAggExecutor { .. }, mut input, - ): (AggregateOperator, LogicalPlan), - ) -> Self { - HashAggExecutor { - agg_calls, - groupby_exprs, - input_schema: input.output_schema().clone(), - input_plan: Some(input), - input: 0, - output: None, - } - } -} - -impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for HashAggExecutor { - fn into_executor( - mut self, + ): Self::Input, arena: &mut ExecArena<'a, T>, cache: ExecutionCaches<'a>, transaction: *mut T, ) -> ExecId { - self.input = build_read( - arena, - self.input_plan - .take() - .expect("hash aggregate input plan initialized"), - cache, - transaction, - ); - arena.push(ExecNode::HashAgg(self)) + let input_schema = input.output_schema().clone(); + let input = build_read(arena, input, cache, transaction); + arena.push(ExecNode::HashAgg(HashAggExecutor { + agg_calls, + groupby_exprs, + input_schema, + input, + output: None, + })) } -} -impl HashAggExecutor { - pub(crate) fn next_tuple<'a, T: Transaction + 'a>( - &mut self, - arena: &mut ExecArena<'a, T>, - ) -> Result<(), DatabaseError> { + fn next_tuple(&mut self, arena: &mut ExecArena<'a, T>) -> Result<(), DatabaseError> { if self.output.is_none() { let mut group_hash_accs: HashMap, Vec>> = HashMap::new(); @@ -143,7 +124,7 @@ mod test { use crate::errors::DatabaseError; use crate::execution::dql::aggregate::hash_agg::HashAggExecutor; use crate::execution::dql::test::build_integers; - use crate::execution::try_collect; + use crate::execution::{execute_input, try_collect}; use crate::expression::agg::AggKind; use crate::expression::ScalarExpression; use crate::optimizer::heuristic::batch::HepBatchStrategy; @@ -238,8 +219,8 @@ mod test { let Operator::Aggregate(op) = plan.operator else { unreachable!() }; - let tuples = try_collect(crate::execution::execute( - HashAggExecutor::from((op, plan.childrens.pop_only())), + let tuples = try_collect(execute_input::<_, HashAggExecutor>( + (op, plan.childrens.pop_only()), (&table_cache, &view_cache, &meta_cache), &mut transaction, ))?; diff --git a/src/execution/dql/aggregate/simple_agg.rs b/src/execution/dql/aggregate/simple_agg.rs index 5f880b67..185fa78d 100644 --- a/src/execution/dql/aggregate/simple_agg.rs +++ b/src/execution/dql/aggregate/simple_agg.rs @@ -14,7 +14,7 @@ use crate::errors::DatabaseError; use crate::execution::dql::aggregate::create_accumulators; -use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ReadExecutor}; +use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ExecutorNode}; use crate::expression::ScalarExpression; use crate::planner::operator::aggregate::AggregateOperator; use crate::planner::LogicalPlan; @@ -23,55 +23,38 @@ use crate::types::tuple::SchemaRef; pub struct SimpleAggExecutor { agg_calls: Vec, input_schema: SchemaRef, - input_plan: Option, - input: Option, + input: ExecId, + returned: bool, } -impl From<(AggregateOperator, LogicalPlan)> for SimpleAggExecutor { - fn from( - (AggregateOperator { agg_calls, .. }, mut input): (AggregateOperator, LogicalPlan), - ) -> Self { - SimpleAggExecutor { - agg_calls, - input_schema: input.output_schema().clone(), - input_plan: Some(input), - input: None, - } - } -} +impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for SimpleAggExecutor { + type Input = (AggregateOperator, LogicalPlan); -impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for SimpleAggExecutor { fn into_executor( - mut self, + (AggregateOperator { agg_calls, .. }, mut input): Self::Input, arena: &mut ExecArena<'a, T>, cache: ExecutionCaches<'a>, transaction: *mut T, ) -> ExecId { - self.input = Some(build_read( - arena, - self.input_plan - .take() - .expect("simple aggregate input plan initialized"), - cache, - transaction, - )); - arena.push(ExecNode::SimpleAgg(self)) + let input_schema = input.output_schema().clone(); + let input = build_read(arena, input, cache, transaction); + arena.push(ExecNode::SimpleAgg(SimpleAggExecutor { + agg_calls, + input_schema, + input, + returned: false, + })) } -} -impl SimpleAggExecutor { - pub(crate) fn next_tuple<'a, T: Transaction + 'a>( - &mut self, - arena: &mut ExecArena<'a, T>, - ) -> Result<(), DatabaseError> { - let Some(input) = self.input.take() else { + fn next_tuple(&mut self, arena: &mut ExecArena<'a, T>) -> Result<(), DatabaseError> { + if self.returned { arena.finish(); return Ok(()); - }; + } let mut accs = create_accumulators(&self.agg_calls)?; - while arena.next_tuple(input)? { + while arena.next_tuple(self.input)? { let tuple = arena.result_tuple(); for (acc, expr) in accs.iter_mut().zip(self.agg_calls.iter()) { let ScalarExpression::AggCall { args, .. } = expr else { @@ -96,6 +79,7 @@ impl SimpleAggExecutor { for acc in accs { output.values.push(acc.evaluate()?); } + self.returned = true; arena.resume(); Ok(()) } diff --git a/src/execution/dql/aggregate/stream_distinct.rs b/src/execution/dql/aggregate/stream_distinct.rs index 2d3758a4..133e3826 100644 --- a/src/execution/dql/aggregate/stream_distinct.rs +++ b/src/execution/dql/aggregate/stream_distinct.rs @@ -13,7 +13,7 @@ // limitations under the License. use crate::errors::DatabaseError; -use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ReadExecutor}; +use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ExecutorNode}; use crate::expression::ScalarExpression; use crate::planner::operator::aggregate::AggregateOperator; use crate::planner::LogicalPlan; @@ -25,49 +25,32 @@ use itertools::Itertools; pub struct StreamDistinctExecutor { groupby_exprs: Vec, input_schema: SchemaRef, - input_plan: Option, input: ExecId, last_keys: Option>, scratch: Tuple, } -impl From<(AggregateOperator, LogicalPlan)> for StreamDistinctExecutor { - fn from((op, mut input): (AggregateOperator, LogicalPlan)) -> Self { - StreamDistinctExecutor { - groupby_exprs: op.groupby_exprs, - input_schema: input.output_schema().clone(), - input_plan: Some(input), - input: 0, - last_keys: None, - scratch: Tuple::default(), - } - } -} +impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for StreamDistinctExecutor { + type Input = (AggregateOperator, LogicalPlan); -impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for StreamDistinctExecutor { fn into_executor( - mut self, + (op, mut input): Self::Input, arena: &mut ExecArena<'a, T>, cache: ExecutionCaches<'a>, transaction: *mut T, ) -> ExecId { - self.input = build_read( - arena, - self.input_plan - .take() - .expect("stream distinct input plan initialized"), - cache, - transaction, - ); - arena.push(ExecNode::StreamDistinct(self)) + let input_schema = input.output_schema().clone(); + let input = build_read(arena, input, cache, transaction); + arena.push(ExecNode::StreamDistinct(StreamDistinctExecutor { + groupby_exprs: op.groupby_exprs, + input_schema, + input, + last_keys: None, + scratch: Tuple::default(), + })) } -} -impl StreamDistinctExecutor { - pub(crate) fn next_tuple<'a, T: Transaction + 'a>( - &mut self, - arena: &mut ExecArena<'a, T>, - ) -> Result<(), DatabaseError> { + fn next_tuple(&mut self, arena: &mut ExecArena<'a, T>) -> Result<(), DatabaseError> { loop { if !arena.next_tuple(self.input)? { arena.finish(); @@ -98,7 +81,7 @@ mod tests { use crate::catalog::{ColumnCatalog, ColumnDesc, ColumnRef}; use crate::errors::DatabaseError; use crate::execution::dql::aggregate::stream_distinct::StreamDistinctExecutor; - use crate::execution::try_collect; + use crate::execution::{execute_input, try_collect}; use crate::expression::ScalarExpression; use crate::optimizer::heuristic::batch::HepBatchStrategy; use crate::optimizer::heuristic::optimizer::HepOptimizerPipeline; @@ -185,8 +168,8 @@ mod tests { let (table_cache, view_cache, meta_cache, _temp_dir, storage) = build_test_storage()?; let mut transaction = storage.transaction()?; - let tuples = try_collect(crate::execution::execute( - StreamDistinctExecutor::from((agg, plan.childrens.pop_only())), + let tuples = try_collect(execute_input::<_, StreamDistinctExecutor>( + (agg, plan.childrens.pop_only()), (&table_cache, &view_cache, &meta_cache), &mut transaction, ))?; @@ -238,8 +221,8 @@ mod tests { let (table_cache, view_cache, meta_cache, _temp_dir, storage) = build_test_storage()?; let mut transaction = storage.transaction()?; - let tuples = try_collect(crate::execution::execute( - StreamDistinctExecutor::from((agg, plan.childrens.pop_only())), + let tuples = try_collect(execute_input::<_, StreamDistinctExecutor>( + (agg, plan.childrens.pop_only()), (&table_cache, &view_cache, &meta_cache), &mut transaction, ))?; diff --git a/src/execution/dql/describe.rs b/src/execution/dql/describe.rs index e1011bcd..666c3721 100644 --- a/src/execution/dql/describe.rs +++ b/src/execution/dql/describe.rs @@ -14,7 +14,7 @@ use crate::catalog::{ColumnCatalog, ColumnRef, TableName}; use crate::errors::DatabaseError; -use crate::execution::{ExecArena, ExecId, ExecNode, ExecutionCaches, ReadExecutor}; +use crate::execution::{ExecArena, ExecId, ExecNode, ExecutionCaches, ExecutorNode, ReadExecutor}; use crate::planner::operator::describe::DescribeOperator; use crate::storage::Transaction; use crate::types::value::{DataValue, Utf8Type}; @@ -66,6 +66,23 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for Describe { } } +impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for Describe { + type Input = DescribeOperator; + + fn into_executor( + input: Self::Input, + arena: &mut ExecArena<'a, T>, + _: ExecutionCaches<'a>, + _: *mut T, + ) -> ExecId { + arena.push(ExecNode::Describe(Describe::from(input))) + } + + fn next_tuple(&mut self, arena: &mut ExecArena<'a, T>) -> Result<(), DatabaseError> { + Describe::next_tuple(self, arena) + } +} + impl Describe { pub(crate) fn next_tuple<'a, T: Transaction + 'a>( &mut self, diff --git a/src/execution/dql/dummy.rs b/src/execution/dql/dummy.rs index 6d7c26cd..22ec66ce 100644 --- a/src/execution/dql/dummy.rs +++ b/src/execution/dql/dummy.rs @@ -13,7 +13,7 @@ // limitations under the License. use crate::errors::DatabaseError; -use crate::execution::{ExecArena, ExecId, ExecNode, ExecutionCaches, ReadExecutor}; +use crate::execution::{ExecArena, ExecId, ExecNode, ExecutionCaches, ExecutorNode, ReadExecutor}; use crate::storage::Transaction; use crate::types::tuple::Tuple; @@ -40,6 +40,23 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for Dummy { } } +impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for Dummy { + type Input = Self; + + fn into_executor( + input: Self::Input, + arena: &mut ExecArena<'a, T>, + _: ExecutionCaches<'a>, + _: *mut T, + ) -> ExecId { + arena.push(ExecNode::Dummy(input)) + } + + fn next_tuple(&mut self, arena: &mut ExecArena<'a, T>) -> Result<(), DatabaseError> { + Dummy::next_tuple(self, arena) + } +} + impl Dummy { pub(crate) fn next_tuple<'a, T: Transaction + 'a>( &mut self, diff --git a/src/execution/dql/except.rs b/src/execution/dql/except.rs index 912eca2c..704fe703 100644 --- a/src/execution/dql/except.rs +++ b/src/execution/dql/except.rs @@ -13,14 +13,16 @@ // limitations under the License. use crate::errors::DatabaseError; -use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ReadExecutor}; +use crate::execution::{ + build_read, take_plan, ExecArena, ExecId, ExecNode, ExecutionCaches, ExecutorNode, ReadExecutor, +}; use crate::planner::LogicalPlan; use crate::storage::Transaction; use crate::types::tuple::Tuple; use ahash::{HashMap, HashMapExt}; pub struct Except { - left_plan: Option, - right_plan: Option, + left_plan: LogicalPlan, + right_plan: LogicalPlan, left_input: ExecId, right_input: ExecId, except_col: HashMap, @@ -30,8 +32,8 @@ pub struct Except { impl From<(LogicalPlan, LogicalPlan)> for Except { fn from((left_input, right_input): (LogicalPlan, LogicalPlan)) -> Self { Except { - left_plan: Some(left_input), - right_plan: Some(right_input), + left_plan: left_input, + right_plan: right_input, left_input: 0, right_input: 0, except_col: HashMap::new(), @@ -47,26 +49,29 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for Except { cache: ExecutionCaches<'a>, transaction: *mut T, ) -> ExecId { - self.left_input = build_read( - arena, - self.left_plan - .take() - .expect("except left input plan initialized"), - cache, - transaction, - ); - self.right_input = build_read( - arena, - self.right_plan - .take() - .expect("except right input plan initialized"), - cache, - transaction, - ); + self.left_input = build_read(arena, take_plan(&mut self.left_plan), cache, transaction); + self.right_input = build_read(arena, take_plan(&mut self.right_plan), cache, transaction); arena.push(ExecNode::Except(self)) } } +impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for Except { + type Input = (LogicalPlan, LogicalPlan); + + fn into_executor( + input: Self::Input, + arena: &mut ExecArena<'a, T>, + cache: ExecutionCaches<'a>, + transaction: *mut T, + ) -> ExecId { + >::into_executor(Self::from(input), arena, cache, transaction) + } + + fn next_tuple(&mut self, arena: &mut ExecArena<'a, T>) -> Result<(), DatabaseError> { + Except::next_tuple(self, arena) + } +} + impl Except { pub(crate) fn next_tuple<'a, T: Transaction + 'a>( &mut self, diff --git a/src/execution/dql/explain.rs b/src/execution/dql/explain.rs index d1e5f4df..c6277b2d 100644 --- a/src/execution/dql/explain.rs +++ b/src/execution/dql/explain.rs @@ -13,19 +13,23 @@ // limitations under the License. use crate::errors::DatabaseError; -use crate::execution::{ExecArena, ExecId, ExecNode, ExecutionCaches, ReadExecutor}; +use crate::execution::{ExecArena, ExecId, ExecNode, ExecutionCaches, ExecutorNode, ReadExecutor}; use crate::planner::LogicalPlan; use crate::storage::Transaction; use crate::types::value::{DataValue, Utf8Type}; use sqlparser::ast::CharLengthUnits; pub struct Explain { - plan: Option, + plan: LogicalPlan, + emitted: bool, } impl From for Explain { fn from(plan: LogicalPlan) -> Self { - Explain { plan: Some(plan) } + Explain { + plan, + emitted: false, + } } } @@ -40,25 +44,46 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for Explain { } } +impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for Explain { + type Input = LogicalPlan; + + fn into_executor( + input: Self::Input, + arena: &mut ExecArena<'a, T>, + _: ExecutionCaches<'a>, + _: *mut T, + ) -> ExecId { + arena.push(ExecNode::Explain(Explain { + plan: input, + emitted: false, + })) + } + + fn next_tuple(&mut self, arena: &mut ExecArena<'a, T>) -> Result<(), DatabaseError> { + Explain::next_tuple(self, arena) + } +} + impl Explain { pub(crate) fn next_tuple<'a, T: Transaction + 'a>( &mut self, arena: &mut ExecArena<'a, T>, ) -> Result<(), DatabaseError> { - let Some(plan) = self.plan.take() else { + if self.emitted { arena.finish(); return Ok(()); - }; + } let output = arena.result_tuple_mut(); output.pk = None; output.values.clear(); output.values.push(DataValue::Utf8 { - value: plan.explain(0), + value: self.plan.explain(0), ty: Utf8Type::Variable(None), unit: CharLengthUnits::Characters, }); + self.emitted = true; arena.resume(); Ok(()) } diff --git a/src/execution/dql/filter.rs b/src/execution/dql/filter.rs index 70c5a94d..2d809a04 100644 --- a/src/execution/dql/filter.rs +++ b/src/execution/dql/filter.rs @@ -13,7 +13,7 @@ // limitations under the License. use crate::errors::DatabaseError; -use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ReadExecutor}; +use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ExecutorNode}; use crate::expression::ScalarExpression; use crate::planner::operator::filter::FilterOperator; use crate::planner::LogicalPlan; @@ -22,46 +22,28 @@ use crate::types::tuple::SchemaRef; pub struct Filter { predicate: ScalarExpression, input_schema: SchemaRef, - input_plan: Option, input: ExecId, } -impl From<(FilterOperator, LogicalPlan)> for Filter { - fn from((FilterOperator { predicate, .. }, mut input): (FilterOperator, LogicalPlan)) -> Self { - let input_schema = input.output_schema().clone(); - Filter { - predicate, - input_schema, - input_plan: Some(input), - input: 0, - } - } -} +impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for Filter { + type Input = (FilterOperator, LogicalPlan); -impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for Filter { fn into_executor( - mut self, + (FilterOperator { predicate, .. }, mut input): Self::Input, arena: &mut ExecArena<'a, T>, cache: ExecutionCaches<'a>, transaction: *mut T, ) -> ExecId { - self.input = build_read( - arena, - self.input_plan - .take() - .expect("filter input plan initialized"), - cache, - transaction, - ); - arena.push(ExecNode::Filter(self)) + let input_schema = input.output_schema().clone(); + let input = build_read(arena, input, cache, transaction); + arena.push(ExecNode::Filter(Filter { + predicate, + input_schema, + input, + })) } -} -impl Filter { - pub(crate) fn next_tuple<'a, T: Transaction + 'a>( - &mut self, - arena: &mut ExecArena<'a, T>, - ) -> Result<(), DatabaseError> { + fn next_tuple(&mut self, arena: &mut ExecArena<'a, T>) -> Result<(), DatabaseError> { loop { if !arena.next_tuple(self.input)? { arena.finish(); diff --git a/src/execution/dql/function_scan.rs b/src/execution/dql/function_scan.rs index e8c1f689..bcec714f 100644 --- a/src/execution/dql/function_scan.rs +++ b/src/execution/dql/function_scan.rs @@ -13,7 +13,7 @@ // limitations under the License. use crate::errors::DatabaseError; -use crate::execution::{ExecArena, ExecId, ExecNode, ExecutionCaches, ReadExecutor}; +use crate::execution::{ExecArena, ExecId, ExecNode, ExecutionCaches, ExecutorNode, ReadExecutor}; use crate::expression::function::table::TableFunction; use crate::planner::operator::function_scan::FunctionScanOperator; use crate::storage::Transaction; @@ -44,6 +44,23 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for FunctionScan { } } +impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for FunctionScan { + type Input = FunctionScanOperator; + + fn into_executor( + input: Self::Input, + arena: &mut ExecArena<'a, T>, + _: ExecutionCaches<'a>, + _: *mut T, + ) -> ExecId { + arena.push(ExecNode::FunctionScan(FunctionScan::from(input))) + } + + fn next_tuple(&mut self, arena: &mut ExecArena<'a, T>) -> Result<(), DatabaseError> { + FunctionScan::next_tuple(self, arena) + } +} + impl FunctionScan { pub(crate) fn next_tuple<'a, T: Transaction + 'a>( &mut self, diff --git a/src/execution/dql/index_scan.rs b/src/execution/dql/index_scan.rs index b5181599..5b59acdf 100644 --- a/src/execution/dql/index_scan.rs +++ b/src/execution/dql/index_scan.rs @@ -13,7 +13,7 @@ // limitations under the License. use crate::errors::DatabaseError; -use crate::execution::{ExecArena, ExecId, ExecNode, ExecutionCaches, ReadExecutor}; +use crate::execution::{ExecArena, ExecId, ExecNode, ExecutionCaches, ExecutorNode, ReadExecutor}; use crate::expression::range_detacher::Range; use crate::planner::operator::table_scan::TableScanOperator; use crate::storage::{IndexIter, Iter, Transaction}; @@ -74,6 +74,29 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for IndexScan<'a, T> { } } +impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for IndexScan<'a, T> { + type Input = ( + TableScanOperator, + IndexMetaRef, + Range, + Option>, + Option>, + ); + + fn into_executor( + input: Self::Input, + arena: &mut ExecArena<'a, T>, + _: ExecutionCaches<'a>, + _: *mut T, + ) -> ExecId { + arena.push(ExecNode::IndexScan(IndexScan::from(input))) + } + + fn next_tuple(&mut self, arena: &mut ExecArena<'a, T>) -> Result<(), DatabaseError> { + IndexScan::next_tuple(self, arena) + } +} + impl<'a, T: Transaction + 'a> IndexScan<'a, T> { pub(crate) fn next_tuple(&mut self, arena: &mut ExecArena<'a, T>) -> Result<(), DatabaseError> { if self.iter.is_none() { diff --git a/src/execution/dql/join/hash_join.rs b/src/execution/dql/join/hash_join.rs index 86d56555..5a2a6f29 100644 --- a/src/execution/dql/join/hash_join.rs +++ b/src/execution/dql/join/hash_join.rs @@ -25,7 +25,9 @@ use crate::execution::dql::join::hash::{ }; use crate::execution::dql::join::joins_nullable; use crate::execution::dql::sort::BumpVec; -use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ReadExecutor}; +use crate::execution::{ + build_read, take_plan, ExecArena, ExecId, ExecNode, ExecutionCaches, ExecutorNode, ReadExecutor, +}; use crate::expression::ScalarExpression; use crate::planner::operator::join::{JoinCondition, JoinOperator, JoinType}; use crate::planner::LogicalPlan; @@ -47,8 +49,8 @@ pub struct HashJoin { filter: Option, left_schema_len: usize, right_schema_len: usize, - left_input_plan: Option, - right_input_plan: Option, + left_input_plan: LogicalPlan, + right_input_plan: LogicalPlan, left_input: ExecId, right_input: ExecId, bump: Box, @@ -120,8 +122,8 @@ impl From<(JoinOperator, LogicalPlan, LogicalPlan)> for HashJoin { }), left_schema_len, right_schema_len, - left_input_plan: Some(left_input), - right_input_plan: Some(right_input), + left_input_plan: left_input, + right_input_plan: right_input, left_input: 0, right_input: 0, bump: Box::::default(), @@ -275,17 +277,13 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for HashJoin { ) -> ExecId { self.left_input = build_read( arena, - self.left_input_plan - .take() - .expect("hash join left input plan initialized"), + take_plan(&mut self.left_input_plan), cache, transaction, ); self.right_input = build_read( arena, - self.right_input_plan - .take() - .expect("hash join right input plan initialized"), + take_plan(&mut self.right_input_plan), cache, transaction, ); @@ -293,6 +291,23 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for HashJoin { } } +impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for HashJoin { + type Input = (JoinOperator, LogicalPlan, LogicalPlan); + + fn into_executor( + input: Self::Input, + arena: &mut ExecArena<'a, T>, + cache: ExecutionCaches<'a>, + transaction: *mut T, + ) -> ExecId { + >::into_executor(Self::from(input), arena, cache, transaction) + } + + fn next_tuple(&mut self, arena: &mut ExecArena<'a, T>) -> Result<(), DatabaseError> { + HashJoin::next_tuple(self, arena) + } +} + impl HashJoin { pub(crate) fn next_tuple<'a, T: Transaction + 'a>( &mut self, diff --git a/src/execution/dql/join/nested_loop_join.rs b/src/execution/dql/join/nested_loop_join.rs index 0438ad8a..7443ad53 100644 --- a/src/execution/dql/join/nested_loop_join.rs +++ b/src/execution/dql/join/nested_loop_join.rs @@ -19,7 +19,9 @@ use super::joins_nullable; use crate::catalog::ColumnRef; use crate::errors::DatabaseError; use crate::execution::dql::projection::Projection; -use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ReadExecutor}; +use crate::execution::{ + build_read, take_plan, ExecArena, ExecId, ExecNode, ExecutionCaches, ExecutorNode, ReadExecutor, +}; use crate::expression::ScalarExpression; use crate::planner::operator::join::{JoinCondition, JoinOperator, JoinType}; use crate::planner::LogicalPlan; @@ -85,7 +87,7 @@ impl EqualCondition { /// |--------------------------------|----------------|----------------| /// | Full | left | right | pub struct NestedLoopJoin { - left_input_plan: Option, + left_input_plan: LogicalPlan, right_input_plan: LogicalPlan, output_schema_ref: SchemaRef, ty: JoinType, @@ -151,7 +153,7 @@ impl From<(JoinOperator, LogicalPlan, LogicalPlan)> for NestedLoopJoin { ); NestedLoopJoin { - left_input_plan: Some(left_input), + left_input_plan: left_input, right_input_plan: right_input, output_schema_ref, ty: join_type, @@ -172,9 +174,7 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for NestedLoopJoin { ) -> ExecId { self.left_input = build_read( arena, - self.left_input_plan - .take() - .expect("nested loop join left input plan initialized"), + take_plan(&mut self.left_input_plan), cache, transaction, ); @@ -182,8 +182,28 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for NestedLoopJoin { } } +impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for NestedLoopJoin { + type Input = (JoinOperator, LogicalPlan, LogicalPlan); + + fn into_executor( + input: Self::Input, + arena: &mut ExecArena<'a, T>, + cache: ExecutionCaches<'a>, + transaction: *mut T, + ) -> ExecId { + >::into_executor(Self::from(input), arena, cache, transaction) + } + + fn next_tuple(&mut self, arena: &mut ExecArena<'a, T>) -> Result<(), DatabaseError> { + NestedLoopJoin::next_tuple(self, arena) + } +} + impl NestedLoopJoin { - fn build_right_input<'a, T: Transaction + 'a>(&self, arena: &mut ExecArena<'a, T>) -> ExecId { + fn build_right_input<'a, T: Transaction + 'a>( + &mut self, + arena: &mut ExecArena<'a, T>, + ) -> ExecId { let cache = (arena.table_cache(), arena.view_cache(), arena.meta_cache()); let transaction = arena.transaction_mut() as *mut T; build_read(arena, self.right_input_plan.clone(), cache, transaction) diff --git a/src/execution/dql/limit.rs b/src/execution/dql/limit.rs index 9cd3e814..a7ca9336 100644 --- a/src/execution/dql/limit.rs +++ b/src/execution/dql/limit.rs @@ -13,56 +13,38 @@ // limitations under the License. use crate::errors::DatabaseError; -use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ReadExecutor}; +use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ExecutorNode}; use crate::planner::operator::limit::LimitOperator; use crate::planner::LogicalPlan; use crate::storage::Transaction; pub struct Limit { offset: Option, limit: Option, - input_plan: Option, input: ExecId, skipped: usize, emitted: usize, } -impl From<(LimitOperator, LogicalPlan)> for Limit { - fn from((LimitOperator { offset, limit }, input): (LimitOperator, LogicalPlan)) -> Self { - Limit { - offset, - limit, - input_plan: Some(input), - input: 0, - skipped: 0, - emitted: 0, - } - } -} +impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for Limit { + type Input = (LimitOperator, LogicalPlan); -impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for Limit { fn into_executor( - mut self, + (LimitOperator { offset, limit }, input): Self::Input, arena: &mut ExecArena<'a, T>, cache: ExecutionCaches<'a>, transaction: *mut T, ) -> ExecId { - self.input = build_read( - arena, - self.input_plan - .take() - .expect("limit input plan initialized"), - cache, - transaction, - ); - arena.push(ExecNode::Limit(self)) + let input = build_read(arena, input, cache, transaction); + arena.push(ExecNode::Limit(Limit { + offset, + limit, + input, + skipped: 0, + emitted: 0, + })) } -} -impl Limit { - pub(crate) fn next_tuple<'a, T: Transaction + 'a>( - &mut self, - arena: &mut ExecArena<'a, T>, - ) -> Result<(), DatabaseError> { + fn next_tuple(&mut self, arena: &mut ExecArena<'a, T>) -> Result<(), DatabaseError> { let offset = self.offset.unwrap_or(0); let limit = self.limit.unwrap_or(usize::MAX); diff --git a/src/execution/dql/mark_apply.rs b/src/execution/dql/mark_apply.rs index 58cb8ed9..bbf0d659 100644 --- a/src/execution/dql/mark_apply.rs +++ b/src/execution/dql/mark_apply.rs @@ -13,7 +13,7 @@ // limitations under the License. use crate::errors::DatabaseError; -use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ReadExecutor}; +use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ExecutorNode}; use crate::planner::operator::mark_apply::MarkApplyOperator; use crate::planner::LogicalPlan; use crate::storage::Transaction; @@ -24,17 +24,21 @@ use std::sync::Arc; pub struct MarkApply { op: MarkApplyOperator, - left_input_plan: Option, - right_input_plan: Option, - left_input: Option, + right_input_plan: LogicalPlan, + left_input: ExecId, predicate_schema: SchemaRef, left_tuple: Tuple, } -impl From<(MarkApplyOperator, LogicalPlan, LogicalPlan)> for MarkApply { - fn from( - (op, mut left_input, mut right_input): (MarkApplyOperator, LogicalPlan, LogicalPlan), - ) -> Self { +impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for MarkApply { + type Input = (MarkApplyOperator, LogicalPlan, LogicalPlan); + + fn into_executor( + (op, mut left_input, mut right_input): Self::Input, + arena: &mut ExecArena<'a, T>, + cache: ExecutionCaches<'a>, + transaction: *mut T, + ) -> ExecId { let predicate_schema = Arc::new( left_input .output_schema() @@ -43,48 +47,52 @@ impl From<(MarkApplyOperator, LogicalPlan, LogicalPlan)> for MarkApply { .cloned() .collect::(), ); - Self { + let left_input = build_read(arena, left_input, cache, transaction); + arena.push(ExecNode::MarkApply(Self { op, - left_input_plan: Some(left_input), - right_input_plan: Some(right_input), - left_input: None, + right_input_plan: right_input, + left_input, predicate_schema, left_tuple: Tuple::default(), - } + })) } -} -impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for MarkApply { - fn into_executor( - mut self, - arena: &mut ExecArena<'a, T>, - cache: ExecutionCaches<'a>, - transaction: *mut T, - ) -> ExecId { - self.left_input = Some(build_read( - arena, - self.left_input_plan - .take() - .expect("mark apply left input plan initialized"), - cache, - transaction, - )); - arena.push(ExecNode::MarkApply(self)) + fn next_tuple(&mut self, arena: &mut ExecArena<'a, T>) -> Result<(), DatabaseError> { + if !arena.next_tuple(self.left_input)? { + arena.finish(); + return Ok(()); + } + + self.left_tuple = mem::take(arena.result_tuple_mut()); + let right_input = self.build_right_input(arena); + let mut matched = false; + + while arena.next_tuple(right_input)? { + let right_tuple = arena.result_tuple(); + if self.predicate_matched(&self.left_tuple, right_tuple)? { + matched = true; + break; + } + } + + arena.produce_tuple(mem::take(&mut self.left_tuple)); + arena + .result_tuple_mut() + .values + .push(DataValue::Boolean(matched)); + arena.resume(); + Ok(()) } } impl MarkApply { - fn build_right_input<'a, T: Transaction + 'a>(&self, arena: &mut ExecArena<'a, T>) -> ExecId { + fn build_right_input<'a, T: Transaction + 'a>( + &mut self, + arena: &mut ExecArena<'a, T>, + ) -> ExecId { let cache = (arena.table_cache(), arena.view_cache(), arena.meta_cache()); let transaction = arena.transaction_mut() as *mut T; - build_read( - arena, - self.right_input_plan - .clone() - .expect("mark apply right input plan initialized"), - cache, - transaction, - ) + build_read(arena, self.right_input_plan.clone(), cache, transaction) } fn predicate_matched( @@ -92,7 +100,6 @@ impl MarkApply { left_tuple: &Tuple, right_tuple: &Tuple, ) -> Result { - // FIXME let values = Vec::from_iter( left_tuple .values @@ -111,44 +118,13 @@ impl MarkApply { Ok(true) } - - pub(crate) fn next_tuple<'a, T: Transaction + 'a>( - &mut self, - arena: &mut ExecArena<'a, T>, - ) -> Result<(), DatabaseError> { - let left_input = self - .left_input - .expect("mark apply left input executor initialized"); - - if !arena.next_tuple(left_input)? { - arena.finish(); - return Ok(()); - } - - self.left_tuple = mem::take(arena.result_tuple_mut()); - let right_input = self.build_right_input(arena); - let mut matched = false; - - while arena.next_tuple(right_input)? { - let right_tuple = arena.result_tuple(); - if self.predicate_matched(&self.left_tuple, right_tuple)? { - matched = true; - break; - } - } - - arena.produce_tuple(mem::take(&mut self.left_tuple)); - arena.result_tuple_mut().values.push(DataValue::Boolean(matched)); - arena.resume(); - Ok(()) - } } #[cfg(all(test, not(target_arch = "wasm32")))] mod tests { use super::*; use crate::catalog::{ColumnCatalog, ColumnDesc, ColumnRef}; - use crate::execution::{execute, try_collect}; + use crate::execution::{execute_input, try_collect}; use crate::expression::{BinaryOperator, ScalarExpression}; use crate::planner::operator::mark_apply::MarkApplyOperator; use crate::planner::operator::values::ValuesOperator; @@ -232,12 +208,12 @@ mod tests { let (table_cache, view_cache, meta_cache, _temp_dir, storage) = build_test_storage()?; let mut transaction = storage.transaction()?; - let tuples = try_collect(execute( - MarkApply::from(( + let tuples = try_collect(execute_input::<_, MarkApply>( + ( MarkApplyOperator::new_exists(build_marker_column(), vec![predicate]), left, right, - )), + ), (&table_cache, &view_cache, &meta_cache), &mut transaction, ))?; @@ -284,12 +260,12 @@ mod tests { let (table_cache, view_cache, meta_cache, _temp_dir, storage) = build_test_storage()?; let mut transaction = storage.transaction()?; - let tuples = try_collect(execute( - MarkApply::from(( + let tuples = try_collect(execute_input::<_, MarkApply>( + ( MarkApplyOperator::new_exists(build_marker_column(), vec![predicate]), left, right, - )), + ), (&table_cache, &view_cache, &meta_cache), &mut transaction, ))?; diff --git a/src/execution/dql/mod.rs b/src/execution/dql/mod.rs index 6248c766..3e7ea856 100644 --- a/src/execution/dql/mod.rs +++ b/src/execution/dql/mod.rs @@ -23,8 +23,8 @@ pub(crate) mod index_scan; pub(crate) mod join; pub(crate) mod limit; pub(crate) mod mark_apply; -pub(crate) mod scalar_apply; pub(crate) mod projection; +pub(crate) mod scalar_apply; pub(crate) mod scalar_subquery; pub(crate) mod seq_scan; pub(crate) mod show_table; diff --git a/src/execution/dql/projection.rs b/src/execution/dql/projection.rs index eeca5b65..38540538 100644 --- a/src/execution/dql/projection.rs +++ b/src/execution/dql/projection.rs @@ -14,7 +14,7 @@ use crate::catalog::ColumnRef; use crate::errors::DatabaseError; -use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ReadExecutor}; +use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ExecutorNode}; use crate::expression::ScalarExpression; use crate::planner::operator::project::ProjectOperator; use crate::planner::LogicalPlan; @@ -25,47 +25,30 @@ use crate::types::value::DataValue; pub struct Projection { exprs: Vec, input_schema: SchemaRef, - input_plan: Option, input: ExecId, scratch: Tuple, } -impl From<(ProjectOperator, LogicalPlan)> for Projection { - fn from((ProjectOperator { exprs }, mut input): (ProjectOperator, LogicalPlan)) -> Self { - Projection { - exprs, - input_schema: input.output_schema().clone(), - input_plan: Some(input), - input: 0, - scratch: Tuple::default(), - } - } -} +impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for Projection { + type Input = (ProjectOperator, LogicalPlan); -impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for Projection { fn into_executor( - mut self, + (ProjectOperator { exprs }, mut input): Self::Input, arena: &mut ExecArena<'a, T>, cache: ExecutionCaches<'a>, transaction: *mut T, ) -> ExecId { - self.input = build_read( - arena, - self.input_plan - .take() - .expect("projection input plan initialized"), - cache, - transaction, - ); - arena.push(ExecNode::Projection(self)) + let input_schema = input.output_schema().clone(); + let input = build_read(arena, input, cache, transaction); + arena.push(ExecNode::Projection(Projection { + exprs, + input_schema, + input, + scratch: Tuple::default(), + })) } -} -impl Projection { - pub(crate) fn next_tuple<'a, T: Transaction + 'a>( - &mut self, - arena: &mut ExecArena<'a, T>, - ) -> Result<(), DatabaseError> { + fn next_tuple(&mut self, arena: &mut ExecArena<'a, T>) -> Result<(), DatabaseError> { if !arena.next_tuple(self.input)? { arena.finish(); return Ok(()); @@ -85,7 +68,9 @@ impl Projection { arena.resume(); Ok(()) } +} +impl Projection { pub fn projection( tuple: &Tuple, exprs: &[ScalarExpression], diff --git a/src/execution/dql/scalar_apply.rs b/src/execution/dql/scalar_apply.rs index 6949a80b..d8f0ae77 100644 --- a/src/execution/dql/scalar_apply.rs +++ b/src/execution/dql/scalar_apply.rs @@ -15,68 +15,63 @@ use std::mem; use crate::errors::DatabaseError; -use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ReadExecutor}; +use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ExecutorNode}; use crate::planner::operator::scalar_apply::ScalarApplyOperator; use crate::planner::LogicalPlan; use crate::storage::Transaction; use crate::types::tuple::Tuple; pub struct ScalarApply { - left_input_plan: Option, - right_input_plan: Option, - left_input: Option, - right_input: Option, + left_input: ExecId, + right_input: ExecId, cached_right: Option, } -impl From<(ScalarApplyOperator, LogicalPlan, LogicalPlan)> for ScalarApply { - fn from((_, left_input, right_input): (ScalarApplyOperator, LogicalPlan, LogicalPlan)) -> Self { - Self { - left_input_plan: Some(left_input), - right_input_plan: Some(right_input), - left_input: None, - right_input: None, - cached_right: None, - } - } -} +impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for ScalarApply { + type Input = (ScalarApplyOperator, LogicalPlan, LogicalPlan); -impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for ScalarApply { fn into_executor( - mut self, + (_, left_input, right_input): Self::Input, arena: &mut ExecArena<'a, T>, cache: ExecutionCaches<'a>, transaction: *mut T, ) -> ExecId { - self.left_input = Some(build_read( - arena, - self.left_input_plan - .take() - .expect("scalar apply left input plan initialized"), - cache, - transaction, - )); - self.right_input = Some(build_read( - arena, - self.right_input_plan - .take() - .expect("scalar apply right input plan initialized"), - cache, - transaction, - )); - arena.push(ExecNode::ScalarApply(self)) + let left_input = build_read(arena, left_input, cache, transaction); + let right_input = build_read(arena, right_input, cache, transaction); + arena.push(ExecNode::ScalarApply(Self { + left_input, + right_input, + cached_right: None, + })) + } + + fn next_tuple(&mut self, arena: &mut ExecArena<'a, T>) -> Result<(), DatabaseError> { + Self::load_right_once(&mut self.cached_right, self.right_input, arena)?; + + let right_tuple = self + .cached_right + .as_ref() + .expect("scalar apply right tuple initialized"); + if !arena.next_tuple(self.left_input)? { + arena.finish(); + return Ok(()); + } + arena + .result_tuple_mut() + .values + .extend(right_tuple.values.iter().cloned()); + arena.resume(); + Ok(()) } } impl ScalarApply { fn load_right_once<'a, T: Transaction + 'a>( cached_right: &mut Option, - right_input: Option, + right_input: ExecId, arena: &mut ExecArena<'a, T>, ) -> Result<(), DatabaseError> { if cached_right.is_none() { - let right_input = right_input - .expect("scalar apply right input executor initialized"); if !arena.next_tuple(right_input)? { return Err(DatabaseError::InvalidValue( "scalar apply right input returned no rows".to_string(), @@ -87,35 +82,13 @@ impl ScalarApply { Ok(()) } - - pub(crate) fn next_tuple<'a, T: Transaction + 'a>( - &mut self, - arena: &mut ExecArena<'a, T>, - ) -> Result<(), DatabaseError> { - Self::load_right_once(&mut self.cached_right, self.right_input, arena)?; - - let right_tuple = self.cached_right - .as_ref() - .expect("scalar apply right tuple initialized"); - let left_input = self - .left_input - .expect("scalar apply left input executor initialized"); - - if !arena.next_tuple(left_input)? { - arena.finish(); - return Ok(()); - } - arena.result_tuple_mut().values.extend(right_tuple.values.iter().cloned()); - arena.resume(); - Ok(()) - } } #[cfg(all(test, not(target_arch = "wasm32")))] mod tests { use super::*; use crate::catalog::{ColumnCatalog, ColumnDesc, ColumnRef}; - use crate::execution::{execute, try_collect}; + use crate::execution::{execute_input, try_collect}; use crate::planner::operator::scalar_subquery::ScalarSubqueryOperator; use crate::planner::operator::values::ValuesOperator; use crate::planner::operator::Operator; @@ -179,8 +152,8 @@ mod tests { let (table_cache, view_cache, meta_cache, _temp_dir, storage) = build_test_storage()?; let mut transaction = storage.transaction()?; - let tuples = try_collect(execute( - ScalarApply::from((ScalarApplyOperator, left, right)), + let tuples = try_collect(execute_input::<_, ScalarApply>( + (ScalarApplyOperator, left, right), (&table_cache, &view_cache, &meta_cache), &mut transaction, ))?; @@ -208,15 +181,13 @@ mod tests { "left_c1", vec![vec![DataValue::Int32(1)], vec![DataValue::Int32(2)]], ); - let right = ScalarSubqueryOperator::build(build_values( - "right_c1", - vec![vec![DataValue::Null]], - )); + let right = + ScalarSubqueryOperator::build(build_values("right_c1", vec![vec![DataValue::Null]])); let (table_cache, view_cache, meta_cache, _temp_dir, storage) = build_test_storage()?; let mut transaction = storage.transaction()?; - let tuples = try_collect(execute( - ScalarApply::from((ScalarApplyOperator, left, right)), + let tuples = try_collect(execute_input::<_, ScalarApply>( + (ScalarApplyOperator, left, right), (&table_cache, &view_cache, &meta_cache), &mut transaction, ))?; diff --git a/src/execution/dql/scalar_subquery.rs b/src/execution/dql/scalar_subquery.rs index 152a8436..0884e4de 100644 --- a/src/execution/dql/scalar_subquery.rs +++ b/src/execution/dql/scalar_subquery.rs @@ -13,58 +13,44 @@ // limitations under the License. use crate::errors::DatabaseError; -use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ReadExecutor}; +use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ExecutorNode}; use crate::planner::operator::scalar_subquery::ScalarSubqueryOperator; use crate::planner::LogicalPlan; use crate::storage::Transaction; use crate::types::value::DataValue; pub struct ScalarSubquery { - input_plan: Option, - input: Option, + input: ExecId, value_count: usize, + returned: bool, } -impl From<(ScalarSubqueryOperator, LogicalPlan)> for ScalarSubquery { - fn from((_, mut input): (ScalarSubqueryOperator, LogicalPlan)) -> Self { - Self { - value_count: input.output_schema().len(), - input_plan: Some(input), - input: None, - } - } -} +impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for ScalarSubquery { + type Input = (ScalarSubqueryOperator, LogicalPlan); -impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for ScalarSubquery { fn into_executor( - mut self, + (_, mut input): Self::Input, arena: &mut ExecArena<'a, T>, cache: ExecutionCaches<'a>, transaction: *mut T, ) -> ExecId { - self.input = Some(build_read( - arena, - self.input_plan - .take() - .expect("scalar subquery input plan initialized"), - cache, - transaction, - )); - arena.push(ExecNode::ScalarSubquery(self)) + let value_count = input.output_schema().len(); + let input = build_read(arena, input, cache, transaction); + arena.push(ExecNode::ScalarSubquery(Self { + input, + value_count, + returned: false, + })) } -} -impl ScalarSubquery { - pub(crate) fn next_tuple<'a, T: Transaction + 'a>( - &mut self, - arena: &mut ExecArena<'a, T>, - ) -> Result<(), DatabaseError> { - let Some(input) = self.input.take() else { + fn next_tuple(&mut self, arena: &mut ExecArena<'a, T>) -> Result<(), DatabaseError> { + if self.returned { arena.finish(); return Ok(()); - }; + } + self.returned = true; - let has_first = arena.next_tuple(input)?; + let has_first = arena.next_tuple(self.input)?; if !has_first { let output = arena.result_tuple_mut(); output.pk = None; @@ -76,7 +62,7 @@ impl ScalarSubquery { return Ok(()); } - if arena.next_tuple(input)? { + if arena.next_tuple(self.input)? { return Err(DatabaseError::InvalidValue( "scalar subquery returned more than one row".to_string(), )); diff --git a/src/execution/dql/seq_scan.rs b/src/execution/dql/seq_scan.rs index fe7b1631..8a214926 100644 --- a/src/execution/dql/seq_scan.rs +++ b/src/execution/dql/seq_scan.rs @@ -13,7 +13,7 @@ // limitations under the License. use crate::errors::DatabaseError; -use crate::execution::{ExecArena, ExecId, ExecNode, ExecutionCaches, ReadExecutor}; +use crate::execution::{ExecArena, ExecId, ExecNode, ExecutionCaches, ExecutorNode, ReadExecutor}; use crate::planner::operator::table_scan::TableScanOperator; use crate::storage::{Iter, Transaction, TupleIter}; @@ -42,6 +42,23 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for SeqScan<'a, T> { } } +impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for SeqScan<'a, T> { + type Input = TableScanOperator; + + fn into_executor( + input: Self::Input, + arena: &mut ExecArena<'a, T>, + _: ExecutionCaches<'a>, + _: *mut T, + ) -> ExecId { + arena.push(ExecNode::SeqScan(SeqScan::from(input))) + } + + fn next_tuple(&mut self, arena: &mut ExecArena<'a, T>) -> Result<(), DatabaseError> { + SeqScan::next_tuple(self, arena) + } +} + impl<'a, T: Transaction + 'a> SeqScan<'a, T> { pub(crate) fn next_tuple(&mut self, arena: &mut ExecArena<'a, T>) -> Result<(), DatabaseError> { if self.iter.is_none() { diff --git a/src/execution/dql/sort.rs b/src/execution/dql/sort.rs index 6de82cfe..ba2b0811 100644 --- a/src/execution/dql/sort.rs +++ b/src/execution/dql/sort.rs @@ -13,7 +13,7 @@ // limitations under the License. use crate::errors::DatabaseError; -use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ReadExecutor}; +use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ExecutorNode}; use crate::planner::operator::sort::{SortField, SortOperator}; use crate::planner::LogicalPlan; use crate::storage::table_codec::BumpBytes; @@ -281,45 +281,30 @@ pub struct Sort { limit: Option, input_schema: SchemaRef, input: ExecId, - input_plan: Option, } -impl From<(SortOperator, LogicalPlan)> for Sort { - fn from((SortOperator { sort_fields, limit }, mut input): (SortOperator, LogicalPlan)) -> Self { - Sort { - output: None, - arena: Box::::default(), - sort_fields, - limit, - input_schema: input.output_schema().clone(), - input: 0, - input_plan: Some(input), - } - } -} +impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for Sort { + type Input = (SortOperator, LogicalPlan); -impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for Sort { fn into_executor( - mut self, + (SortOperator { sort_fields, limit }, mut input): Self::Input, arena: &mut ExecArena<'a, T>, cache: ExecutionCaches<'a>, transaction: *mut T, ) -> ExecId { - self.input = build_read( - arena, - self.input_plan.take().expect("sort input plan initialized"), - cache, - transaction, - ); - arena.push(ExecNode::Sort(self)) + let input_schema = input.output_schema().clone(); + let input = build_read(arena, input, cache, transaction); + arena.push(ExecNode::Sort(Sort { + output: None, + arena: Box::::default(), + sort_fields, + limit, + input_schema, + input, + })) } -} -impl Sort { - pub(crate) fn next_tuple<'a, T: Transaction + 'a>( - &mut self, - arena: &mut ExecArena<'a, T>, - ) -> Result<(), DatabaseError> { + fn next_tuple(&mut self, arena: &mut ExecArena<'a, T>) -> Result<(), DatabaseError> { if self.output.is_none() { let mut tuples = NullableVec::new(&self.arena); diff --git a/src/execution/dql/top_k.rs b/src/execution/dql/top_k.rs index 07cffa0e..849571eb 100644 --- a/src/execution/dql/top_k.rs +++ b/src/execution/dql/top_k.rs @@ -14,7 +14,7 @@ use crate::errors::DatabaseError; use crate::execution::dql::sort::BumpVec; -use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ReadExecutor}; +use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ExecutorNode}; use crate::planner::operator::sort::SortField; use crate::planner::operator::top_k::TopKOperator; use crate::planner::LogicalPlan; @@ -95,12 +95,13 @@ pub struct TopK { limit: usize, offset: Option, input_schema: SchemaRef, - input_plan: Option, input: ExecId, } -impl From<(TopKOperator, LogicalPlan)> for TopK { - fn from( +impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for TopK { + type Input = (TopKOperator, LogicalPlan); + + fn into_executor( ( TopKOperator { sort_fields, @@ -108,46 +109,26 @@ impl From<(TopKOperator, LogicalPlan)> for TopK { offset, }, mut input, - ): (TopKOperator, LogicalPlan), - ) -> Self { - TopK { + ): Self::Input, + arena: &mut ExecArena<'a, T>, + cache: ExecutionCaches<'a>, + transaction: *mut T, + ) -> ExecId { + let input_schema = input.output_schema().clone(); + let input = build_read(arena, input, cache, transaction); + arena.push(ExecNode::TopK(TopK { output: None, arena: Box::::default(), sort_fields, limit, offset, - input_schema: input.output_schema().clone(), - input_plan: Some(input), - input: 0, - } + input_schema, + input, + })) } -} -impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for TopK { - fn into_executor( - mut self, - arena: &mut ExecArena<'a, T>, - cache: ExecutionCaches<'a>, - transaction: *mut T, - ) -> ExecId { - self.input = build_read( - arena, - self.input_plan - .take() - .expect("top-k input plan initialized"), - cache, - transaction, - ); - arena.push(ExecNode::TopK(self)) - } -} - -impl TopK { #[allow(clippy::mutable_key_type)] - pub(crate) fn next_tuple<'a, T: Transaction + 'a>( - &mut self, - arena: &mut ExecArena<'a, T>, - ) -> Result<(), DatabaseError> { + fn next_tuple(&mut self, arena: &mut ExecArena<'a, T>) -> Result<(), DatabaseError> { if self.output.is_none() { let keep_count = self.offset.unwrap_or(0) + self.limit; let mut set = BTreeSet::new(); diff --git a/src/execution/dql/union.rs b/src/execution/dql/union.rs index 78c72e38..7e8ba5e0 100644 --- a/src/execution/dql/union.rs +++ b/src/execution/dql/union.rs @@ -13,12 +13,14 @@ // limitations under the License. use crate::errors::DatabaseError; -use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ReadExecutor}; +use crate::execution::{ + build_read, take_plan, ExecArena, ExecId, ExecNode, ExecutionCaches, ExecutorNode, ReadExecutor, +}; use crate::planner::LogicalPlan; use crate::storage::Transaction; pub struct Union { - left_plan: Option, - right_plan: Option, + left_plan: LogicalPlan, + right_plan: LogicalPlan, left_input: ExecId, right_input: ExecId, reading_left: bool, @@ -27,8 +29,8 @@ pub struct Union { impl From<(LogicalPlan, LogicalPlan)> for Union { fn from((left_input, right_input): (LogicalPlan, LogicalPlan)) -> Self { Union { - left_plan: Some(left_input), - right_plan: Some(right_input), + left_plan: left_input, + right_plan: right_input, left_input: 0, right_input: 0, reading_left: true, @@ -43,26 +45,29 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for Union { cache: ExecutionCaches<'a>, transaction: *mut T, ) -> ExecId { - self.left_input = build_read( - arena, - self.left_plan - .take() - .expect("union left input plan initialized"), - cache, - transaction, - ); - self.right_input = build_read( - arena, - self.right_plan - .take() - .expect("union right input plan initialized"), - cache, - transaction, - ); + self.left_input = build_read(arena, take_plan(&mut self.left_plan), cache, transaction); + self.right_input = build_read(arena, take_plan(&mut self.right_plan), cache, transaction); arena.push(ExecNode::Union(self)) } } +impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for Union { + type Input = (LogicalPlan, LogicalPlan); + + fn into_executor( + input: Self::Input, + arena: &mut ExecArena<'a, T>, + cache: ExecutionCaches<'a>, + transaction: *mut T, + ) -> ExecId { + >::into_executor(Self::from(input), arena, cache, transaction) + } + + fn next_tuple(&mut self, arena: &mut ExecArena<'a, T>) -> Result<(), DatabaseError> { + Union::next_tuple(self, arena) + } +} + impl Union { pub(crate) fn next_tuple<'a, T: Transaction + 'a>( &mut self, diff --git a/src/execution/dql/values.rs b/src/execution/dql/values.rs index 9e462ff4..492b3a22 100644 --- a/src/execution/dql/values.rs +++ b/src/execution/dql/values.rs @@ -13,7 +13,7 @@ // limitations under the License. use crate::errors::DatabaseError; -use crate::execution::{ExecArena, ExecId, ExecNode, ExecutionCaches, ReadExecutor}; +use crate::execution::{ExecArena, ExecId, ExecNode, ExecutionCaches, ExecutorNode, ReadExecutor}; use crate::planner::operator::values::ValuesOperator; use crate::storage::Transaction; use crate::types::tuple::SchemaRef; @@ -45,6 +45,23 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for Values { } } +impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for Values { + type Input = ValuesOperator; + + fn into_executor( + input: Self::Input, + arena: &mut ExecArena<'a, T>, + _: ExecutionCaches<'a>, + _: *mut T, + ) -> ExecId { + arena.push(ExecNode::Values(Values::from(input))) + } + + fn next_tuple(&mut self, arena: &mut ExecArena<'a, T>) -> Result<(), DatabaseError> { + Values::next_tuple(self, arena) + } +} + impl Values { pub(crate) fn next_tuple<'a, T: Transaction + 'a>( &mut self, diff --git a/src/execution/mod.rs b/src/execution/mod.rs index c57a85eb..85e77dba 100644 --- a/src/execution/mod.rs +++ b/src/execution/mod.rs @@ -61,7 +61,6 @@ use crate::planner::operator::join::JoinCondition; use crate::planner::operator::{Operator, PhysicalOption, PlanImpl}; use crate::planner::LogicalPlan; use crate::storage::{StatisticsMetaCache, TableCache, Transaction, ViewCache}; -use crate::types::index::IndexInfo; use crate::types::tuple::Tuple; pub(crate) type ExecutionCaches<'a> = (&'a TableCache, &'a ViewCache, &'a StatisticsMetaCache); @@ -154,111 +153,108 @@ pub(crate) enum ExecNode<'a, T: Transaction + 'a> { Empty, } -pub(crate) trait ExecNodeRunner<'a, T: Transaction + 'a> { - fn next_tuple(&mut self, arena: &mut ExecArena<'a, T>) -> Result<(), DatabaseError>; -} +pub(crate) trait ExecutorNode<'a, T: Transaction + 'a>: Sized { + type Input; -macro_rules! impl_exec_node_runner { - ($($ty:ty),* $(,)?) => { - $( - impl<'a, T: Transaction + 'a> ExecNodeRunner<'a, T> for $ty { - fn next_tuple( - &mut self, - arena: &mut ExecArena<'a, T>, - ) -> Result<(), DatabaseError> { - <$ty>::next_tuple(self, arena) - } - } - )* - }; -} + fn into_executor( + input: Self::Input, + arena: &mut ExecArena<'a, T>, + cache: ExecutionCaches<'a>, + transaction: *mut T, + ) -> ExecId; -impl_exec_node_runner!( - AddColumn, - Analyze, - ChangeColumn, - CopyFromFile, - CopyToFile, - CreateIndex, - CreateTable, - CreateView, - Delete, - Describe, - DropColumn, - DropIndex, - DropTable, - DropView, - Dummy, - Except, - Explain, - Filter, - FunctionScan, - HashAggExecutor, - HashJoin, - IndexScan<'a, T>, - Insert, - Limit, - MarkApply, - NestedLoopJoin, - Projection, - ScalarApply, - ScalarSubquery, - SeqScan<'a, T>, - ShowTables, - ShowViews, - SimpleAggExecutor, - Sort, - StreamDistinctExecutor, - TopK, - Truncate, - Union, - Update, - Values, -); + fn next_tuple(&mut self, arena: &mut ExecArena<'a, T>) -> Result<(), DatabaseError>; +} -impl<'a, T: Transaction + 'a> ExecNodeRunner<'a, T> for ExecNode<'a, T> { +impl<'a, T: Transaction + 'a> ExecNode<'a, T> { fn next_tuple(&mut self, arena: &mut ExecArena<'a, T>) -> Result<(), DatabaseError> { match self { - ExecNode::AddColumn(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::Analyze(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::ChangeColumn(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::CopyFromFile(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::CopyToFile(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::CreateIndex(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::CreateTable(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::CreateView(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::Delete(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::Describe(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::DropColumn(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::DropIndex(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::DropTable(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::DropView(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::Dummy(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::Except(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::Explain(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::Filter(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::FunctionScan(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::HashAgg(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::HashJoin(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::IndexScan(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::Insert(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::Limit(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::MarkApply(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::NestedLoopJoin(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::Projection(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::ScalarApply(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::ScalarSubquery(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::SeqScan(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::ShowTables(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::ShowViews(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::SimpleAgg(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::Sort(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::StreamDistinct(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::TopK(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::Truncate(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::Union(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::Update(exec) => ExecNodeRunner::next_tuple(exec, arena), - ExecNode::Values(exec) => ExecNodeRunner::next_tuple(exec, arena), + ExecNode::AddColumn(exec) => { + >::next_tuple(exec, arena) + } + ExecNode::Analyze(exec) => >::next_tuple(exec, arena), + ExecNode::ChangeColumn(exec) => { + >::next_tuple(exec, arena) + } + ExecNode::CopyFromFile(exec) => { + >::next_tuple(exec, arena) + } + ExecNode::CopyToFile(exec) => { + >::next_tuple(exec, arena) + } + ExecNode::CreateIndex(exec) => { + >::next_tuple(exec, arena) + } + ExecNode::CreateTable(exec) => { + >::next_tuple(exec, arena) + } + ExecNode::CreateView(exec) => { + >::next_tuple(exec, arena) + } + ExecNode::Delete(exec) => >::next_tuple(exec, arena), + ExecNode::Describe(exec) => >::next_tuple(exec, arena), + ExecNode::DropColumn(exec) => { + >::next_tuple(exec, arena) + } + ExecNode::DropIndex(exec) => { + >::next_tuple(exec, arena) + } + ExecNode::DropTable(exec) => { + >::next_tuple(exec, arena) + } + ExecNode::DropView(exec) => >::next_tuple(exec, arena), + ExecNode::Dummy(exec) => >::next_tuple(exec, arena), + ExecNode::Except(exec) => >::next_tuple(exec, arena), + ExecNode::Explain(exec) => >::next_tuple(exec, arena), + ExecNode::Filter(exec) => >::next_tuple(exec, arena), + ExecNode::FunctionScan(exec) => { + >::next_tuple(exec, arena) + } + ExecNode::HashAgg(exec) => { + >::next_tuple(exec, arena) + } + ExecNode::HashJoin(exec) => >::next_tuple(exec, arena), + ExecNode::IndexScan(exec) => { + as ExecutorNode<'a, T>>::next_tuple(exec, arena) + } + ExecNode::Insert(exec) => >::next_tuple(exec, arena), + ExecNode::Limit(exec) => >::next_tuple(exec, arena), + ExecNode::MarkApply(exec) => { + >::next_tuple(exec, arena) + } + ExecNode::NestedLoopJoin(exec) => { + >::next_tuple(exec, arena) + } + ExecNode::Projection(exec) => { + >::next_tuple(exec, arena) + } + ExecNode::ScalarApply(exec) => { + >::next_tuple(exec, arena) + } + ExecNode::ScalarSubquery(exec) => { + >::next_tuple(exec, arena) + } + ExecNode::SeqScan(exec) => { + as ExecutorNode<'a, T>>::next_tuple(exec, arena) + } + ExecNode::ShowTables(exec) => { + >::next_tuple(exec, arena) + } + ExecNode::ShowViews(exec) => { + >::next_tuple(exec, arena) + } + ExecNode::SimpleAgg(exec) => { + >::next_tuple(exec, arena) + } + ExecNode::Sort(exec) => >::next_tuple(exec, arena), + ExecNode::StreamDistinct(exec) => { + >::next_tuple(exec, arena) + } + ExecNode::TopK(exec) => >::next_tuple(exec, arena), + ExecNode::Truncate(exec) => >::next_tuple(exec, arena), + ExecNode::Union(exec) => >::next_tuple(exec, arena), + ExecNode::Update(exec) => >::next_tuple(exec, arena), + ExecNode::Values(exec) => >::next_tuple(exec, arena), ExecNode::Empty => unreachable!("executor node re-entered while active"), } } @@ -350,7 +346,7 @@ impl<'a, T: Transaction + 'a> ExecArena<'a, T> { pub(crate) fn next_tuple(&mut self, id: ExecId) -> Result { self.result.status = None; let mut node = std::mem::replace(&mut self.nodes[id], ExecNode::Empty); - let result = ExecNodeRunner::next_tuple(&mut node, self); + let result = node.next_tuple(self); self.nodes[id] = node; result?; @@ -379,6 +375,189 @@ pub(crate) trait WriteExecutor<'a, T: Transaction + 'a>: Sized { ) -> ExecId; } +macro_rules! impl_read_executor_node_via_from { + ($ty:ty, $input:ty) => { + impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for $ty + where + Self: ReadExecutor<'a, T> + From<$input>, + { + type Input = $input; + + fn into_executor( + input: Self::Input, + arena: &mut ExecArena<'a, T>, + cache: ExecutionCaches<'a>, + transaction: *mut T, + ) -> ExecId { + >::into_executor( + Self::from(input), + arena, + cache, + transaction, + ) + } + + fn next_tuple(&mut self, arena: &mut ExecArena<'a, T>) -> Result<(), DatabaseError> { + <$ty>::next_tuple(self, arena) + } + } + }; +} + +macro_rules! impl_write_executor_node_via_from { + ($ty:ty, $input:ty) => { + impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for $ty + where + Self: WriteExecutor<'a, T> + From<$input>, + { + type Input = $input; + + fn into_executor( + input: Self::Input, + arena: &mut ExecArena<'a, T>, + cache: ExecutionCaches<'a>, + transaction: *mut T, + ) -> ExecId { + >::into_executor( + Self::from(input), + arena, + cache, + transaction, + ) + } + + fn next_tuple(&mut self, arena: &mut ExecArena<'a, T>) -> Result<(), DatabaseError> { + <$ty>::next_tuple(self, arena) + } + } + }; +} + +impl_read_executor_node_via_from!( + CopyToFile, + ( + crate::planner::operator::copy_to_file::CopyToFileOperator, + LogicalPlan + ) +); + +impl_write_executor_node_via_from!( + AddColumn, + crate::planner::operator::alter_table::add_column::AddColumnOperator +); +impl_write_executor_node_via_from!( + Analyze, + ( + crate::planner::operator::analyze::AnalyzeOperator, + LogicalPlan + ) +); +impl_write_executor_node_via_from!( + ChangeColumn, + crate::planner::operator::alter_table::change_column::ChangeColumnOperator +); +impl_write_executor_node_via_from!( + CopyFromFile, + crate::planner::operator::copy_from_file::CopyFromFileOperator +); +impl_write_executor_node_via_from!( + CreateIndex, + ( + crate::planner::operator::create_index::CreateIndexOperator, + LogicalPlan + ) +); +impl_write_executor_node_via_from!( + CreateTable, + crate::planner::operator::create_table::CreateTableOperator +); +impl_write_executor_node_via_from!( + CreateView, + crate::planner::operator::create_view::CreateViewOperator +); +impl_write_executor_node_via_from!( + Delete, + ( + crate::planner::operator::delete::DeleteOperator, + LogicalPlan + ) +); +impl_write_executor_node_via_from!( + DropColumn, + crate::planner::operator::alter_table::drop_column::DropColumnOperator +); +impl_write_executor_node_via_from!( + DropIndex, + crate::planner::operator::drop_index::DropIndexOperator +); +impl_write_executor_node_via_from!( + DropTable, + crate::planner::operator::drop_table::DropTableOperator +); +impl_write_executor_node_via_from!( + DropView, + crate::planner::operator::drop_view::DropViewOperator +); +impl_write_executor_node_via_from!( + Insert, + ( + crate::planner::operator::insert::InsertOperator, + LogicalPlan + ) +); +impl_write_executor_node_via_from!( + Truncate, + crate::planner::operator::truncate::TruncateOperator +); +impl_write_executor_node_via_from!( + Update, + ( + crate::planner::operator::update::UpdateOperator, + LogicalPlan + ) +); + +impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for ShowTables { + type Input = Self; + + fn into_executor( + input: Self::Input, + arena: &mut ExecArena<'a, T>, + _: ExecutionCaches<'a>, + _: *mut T, + ) -> ExecId { + arena.push(ExecNode::ShowTables(input)) + } + + fn next_tuple(&mut self, arena: &mut ExecArena<'a, T>) -> Result<(), DatabaseError> { + ShowTables::next_tuple(self, arena) + } +} + +impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for ShowViews { + type Input = Self; + + fn into_executor( + input: Self::Input, + arena: &mut ExecArena<'a, T>, + _: ExecutionCaches<'a>, + _: *mut T, + ) -> ExecId { + arena.push(ExecNode::ShowViews(input)) + } + + fn next_tuple(&mut self, arena: &mut ExecArena<'a, T>) -> Result<(), DatabaseError> { + ShowViews::next_tuple(self, arena) + } +} + +pub(crate) fn take_plan(plan: &mut LogicalPlan) -> LogicalPlan { + std::mem::replace( + plan, + LogicalPlan::new(Operator::Dummy, crate::planner::Childrens::None), + ) +} + pub(crate) fn build_read<'a, T: Transaction + 'a>( arena: &mut ExecArena<'a, T>, plan: LogicalPlan, @@ -391,16 +570,26 @@ pub(crate) fn build_read<'a, T: Transaction + 'a>( operator, childrens, physical_option, - .. + _output_schema_ref, } = plan; match operator { - Operator::Dummy => Dummy::default().into_executor(arena, cache, transaction), + Operator::Dummy => >::into_executor( + Dummy::default(), + arena, + cache, + transaction, + ), Operator::Aggregate(op) => { let input = childrens.pop_only(); if op.groupby_exprs.is_empty() { - SimpleAggExecutor::from((op, input)).into_executor(arena, cache, transaction) + >::into_executor( + (op, input), + arena, + cache, + transaction, + ) } else if op.is_distinct && op.agg_calls.is_empty() && matches!( @@ -411,129 +600,167 @@ pub(crate) fn build_read<'a, T: Transaction + 'a>( }) ) { - StreamDistinctExecutor::from((op, input)).into_executor(arena, cache, transaction) + >::into_executor( + (op, input), + arena, + cache, + transaction, + ) } else { - HashAggExecutor::from((op, input)).into_executor(arena, cache, transaction) + >::into_executor( + (op, input), + arena, + cache, + transaction, + ) } } - Operator::Filter(op) => { - let input = childrens.pop_only(); - - Filter::from((op, input)).into_executor(arena, cache, transaction) - } + Operator::Filter(op) => >::into_executor( + (op, childrens.pop_only()), + arena, + cache, + transaction, + ), Operator::ScalarApply(op) => { - let (left_input, right_input) = childrens.pop_twins(); - - ScalarApply::from((op, left_input, right_input)) - .into_executor(arena, cache, transaction) + let (left, right) = childrens.pop_twins(); + >::into_executor( + (op, left, right), + arena, + cache, + transaction, + ) } Operator::MarkApply(op) => { - let (left_input, right_input) = childrens.pop_twins(); - - MarkApply::from((op, left_input, right_input)) - .into_executor(arena, cache, transaction) + let (left, right) = childrens.pop_twins(); + >::into_executor( + (op, left, right), + arena, + cache, + transaction, + ) } Operator::Join(op) => { - let (left_input, right_input) = childrens.pop_twins(); - - match &op.on { - JoinCondition::On { on, .. } - if !on.is_empty() - && matches!( - physical_option, - Some(PhysicalOption { - plan: PlanImpl::HashJoin, - .. - }) - ) => - { - HashJoin::from((op, left_input, right_input)).into_executor( - arena, - cache, - transaction, - ) - } - _ => NestedLoopJoin::from((op, left_input, right_input)).into_executor( + let use_hash_join = matches!( + &op.on, + JoinCondition::On { on, .. } if !on.is_empty() + ) && matches!( + physical_option, + Some(PhysicalOption { + plan: PlanImpl::HashJoin, + .. + }) + ); + let (left, right) = childrens.pop_twins(); + + if use_hash_join { + >::into_executor( + (op, left, right), + arena, + cache, + transaction, + ) + } else { + >::into_executor( + (op, left, right), arena, cache, transaction, - ), + ) } } - Operator::Project(op) => { - let input = childrens.pop_only(); - - Projection::from((op, input)).into_executor(arena, cache, transaction) - } - Operator::ScalarSubquery(op) => { - let input = childrens.pop_only(); - - ScalarSubquery::from((op, input)).into_executor(arena, cache, transaction) - } + Operator::Project(op) => >::into_executor( + (op, childrens.pop_only()), + arena, + cache, + transaction, + ), + Operator::ScalarSubquery(op) => >::into_executor( + (op, childrens.pop_only()), + arena, + cache, + transaction, + ), Operator::TableScan(op) => { if let Some(PhysicalOption { plan: PlanImpl::IndexScan(index_info), .. }) = physical_option { - let IndexInfo { - meta, - range, - covered_deserializers, - cover_mapping, - .. - } = *index_info; - if let Some(range) = range { - return IndexScan::from(( - op, - meta, - range, - covered_deserializers, - cover_mapping, - )) - .into_executor(arena, cache, transaction); + if let Some(range) = index_info.range.clone() { + return as ExecutorNode<'a, T>>::into_executor( + ( + op, + index_info.meta.clone(), + range, + index_info.covered_deserializers.clone(), + index_info.cover_mapping.clone(), + ), + arena, + cache, + transaction, + ); } } - SeqScan::from(op).into_executor(arena, cache, transaction) + as ExecutorNode<'a, T>>::into_executor(op, arena, cache, transaction) } Operator::FunctionScan(op) => { - FunctionScan::from(op).into_executor(arena, cache, transaction) + >::into_executor(op, arena, cache, transaction) } - Operator::Sort(op) => { - let input = childrens.pop_only(); - - Sort::from((op, input)).into_executor(arena, cache, transaction) - } - Operator::Limit(op) => { - let input = childrens.pop_only(); - - Limit::from((op, input)).into_executor(arena, cache, transaction) - } - Operator::TopK(op) => { - let input = childrens.pop_only(); - - TopK::from((op, input)).into_executor(arena, cache, transaction) - } - Operator::Values(op) => Values::from(op).into_executor(arena, cache, transaction), - Operator::ShowTable => arena.push(ExecNode::ShowTables(ShowTables { metas: None })), - Operator::ShowView => arena.push(ExecNode::ShowViews(ShowViews { metas: None })), - Operator::Explain => { - let input = childrens.pop_only(); - - Explain::from(input).into_executor(arena, cache, transaction) - } - Operator::Describe(op) => Describe::from(op).into_executor(arena, cache, transaction), - Operator::Union(_) => { - let (left_input, right_input) = childrens.pop_twins(); - - Union::from((left_input, right_input)).into_executor(arena, cache, transaction) + Operator::Sort(op) => >::into_executor( + (op, childrens.pop_only()), + arena, + cache, + transaction, + ), + Operator::Limit(op) => >::into_executor( + (op, childrens.pop_only()), + arena, + cache, + transaction, + ), + Operator::TopK(op) => >::into_executor( + (op, childrens.pop_only()), + arena, + cache, + transaction, + ), + Operator::Values(op) => { + >::into_executor(op, arena, cache, transaction) } - Operator::Except(_) => { - let (left_input, right_input) = childrens.pop_twins(); - - Except::from((left_input, right_input)).into_executor(arena, cache, transaction) + Operator::ShowTable => >::into_executor( + ShowTables { metas: None }, + arena, + cache, + transaction, + ), + Operator::ShowView => >::into_executor( + ShowViews { metas: None }, + arena, + cache, + transaction, + ), + Operator::Explain => >::into_executor( + childrens.pop_only(), + arena, + cache, + transaction, + ), + Operator::Describe(op) => { + >::into_executor(op, arena, cache, transaction) } + Operator::Union(_) => >::into_executor( + childrens.pop_twins(), + arena, + cache, + transaction, + ), + Operator::Except(_) => >::into_executor( + childrens.pop_twins(), + arena, + cache, + transaction, + ), _ => unreachable!(), } } @@ -557,58 +784,82 @@ pub(crate) fn build_write<'a, T: Transaction + 'a>( Operator::Insert(op) => { let input = childrens.pop_only(); - Insert::from((op, input)).into_executor(arena, cache, transaction) + >::into_executor((op, input), arena, cache, transaction) } Operator::Update(op) => { let input = childrens.pop_only(); - Update::from((op, input)).into_executor(arena, cache, transaction) + >::into_executor((op, input), arena, cache, transaction) } Operator::Delete(op) => { let input = childrens.pop_only(); - Delete::from((op, input)).into_executor(arena, cache, transaction) + >::into_executor((op, input), arena, cache, transaction) + } + Operator::AddColumn(op) => { + >::into_executor(op, arena, cache, transaction) } - Operator::AddColumn(op) => AddColumn::from(op).into_executor(arena, cache, transaction), Operator::ChangeColumn(op) => { - ChangeColumn::from(op).into_executor(arena, cache, transaction) + >::into_executor(op, arena, cache, transaction) + } + Operator::DropColumn(op) => { + >::into_executor(op, arena, cache, transaction) + } + Operator::CreateTable(op) => { + >::into_executor(op, arena, cache, transaction) } - Operator::DropColumn(op) => DropColumn::from(op).into_executor(arena, cache, transaction), - Operator::CreateTable(op) => CreateTable::from(op).into_executor(arena, cache, transaction), Operator::CreateIndex(op) => { let input = childrens.pop_only(); - CreateIndex::from((op, input)).into_executor(arena, cache, transaction) + >::into_executor( + (op, input), + arena, + cache, + transaction, + ) + } + Operator::CreateView(op) => { + >::into_executor(op, arena, cache, transaction) + } + Operator::DropTable(op) => { + >::into_executor(op, arena, cache, transaction) + } + Operator::DropView(op) => { + >::into_executor(op, arena, cache, transaction) + } + Operator::DropIndex(op) => { + >::into_executor(op, arena, cache, transaction) + } + Operator::Truncate(op) => { + >::into_executor(op, arena, cache, transaction) } - Operator::CreateView(op) => CreateView::from(op).into_executor(arena, cache, transaction), - Operator::DropTable(op) => DropTable::from(op).into_executor(arena, cache, transaction), - Operator::DropView(op) => DropView::from(op).into_executor(arena, cache, transaction), - Operator::DropIndex(op) => DropIndex::from(op).into_executor(arena, cache, transaction), - Operator::Truncate(op) => Truncate::from(op).into_executor(arena, cache, transaction), Operator::CopyFromFile(op) => { - CopyFromFile::from(op).into_executor(arena, cache, transaction) + >::into_executor(op, arena, cache, transaction) } Operator::CopyToFile(op) => { let input = childrens.pop_only(); - CopyToFile::from((op, input)).into_executor(arena, cache, transaction) + >::into_executor( + (op, input), + arena, + cache, + transaction, + ) } Operator::Analyze(op) => { let input = childrens.pop_only(); - Analyze::from((op, input)).into_executor(arena, cache, transaction) + >::into_executor((op, input), arena, cache, transaction) } - operator => build_read( - arena, - LogicalPlan { + operator => { + let plan = LogicalPlan { operator, childrens, physical_option, _output_schema_ref, - }, - cache, - transaction, - ), + }; + build_read(arena, plan, cache, transaction) + } } } @@ -644,6 +895,39 @@ where Executor::new(arena, root) } +#[cfg(all(test, not(target_arch = "wasm32")))] +pub(crate) fn execute_input<'a, T, E>( + input: E::Input, + cache: ExecutionCaches<'a>, + transaction: *mut T, +) -> Executor<'a, T> +where + T: Transaction + 'a, + E: ExecutorNode<'a, T>, +{ + let mut arena = ExecArena::default(); + arena.init_context(cache, transaction); + let root = E::into_executor(input, &mut arena, cache, transaction); + Executor::new(arena, root) +} + +#[cfg(all(test, not(target_arch = "wasm32")))] +#[allow(dead_code)] +pub(crate) fn execute_input_mut<'a, T, E>( + input: E::Input, + cache: ExecutionCaches<'a>, + transaction: *mut T, +) -> Executor<'a, T> +where + T: Transaction + 'a, + E: ExecutorNode<'a, T>, +{ + let mut arena = ExecArena::default(); + arena.init_context(cache, transaction); + let root = E::into_executor(input, &mut arena, cache, transaction); + Executor::new(arena, root) +} + #[cfg(all(test, not(target_arch = "wasm32")))] pub fn try_collect(executor: Executor<'_, T>) -> Result, DatabaseError> { let mut executor = executor; diff --git a/src/optimizer/heuristic/optimizer.rs b/src/optimizer/heuristic/optimizer.rs index d7041969..ea7f19e2 100644 --- a/src/optimizer/heuristic/optimizer.rs +++ b/src/optimizer/heuristic/optimizer.rs @@ -460,9 +460,9 @@ impl ImplementationRuleIndex { Operator::Project(_) if self.contains(ImplementationRuleImpl::Projection) => { Some(PhysicalOption::new(PlanImpl::Project, SortOption::Follow)) } - Operator::ScalarApply(_) if self.contains(ImplementationRuleImpl::ScalarApply) => { - Some(PhysicalOption::new(PlanImpl::ScalarApply, SortOption::Follow)) - } + Operator::ScalarApply(_) if self.contains(ImplementationRuleImpl::ScalarApply) => Some( + PhysicalOption::new(PlanImpl::ScalarApply, SortOption::Follow), + ), Operator::ScalarSubquery(_) if self.contains(ImplementationRuleImpl::ScalarSubquery) => { diff --git a/src/optimizer/rule/implementation/mod.rs b/src/optimizer/rule/implementation/mod.rs index 093bb1c2..d3bdb3ee 100644 --- a/src/optimizer/rule/implementation/mod.rs +++ b/src/optimizer/rule/implementation/mod.rs @@ -261,16 +261,21 @@ impl ImplementationRule for ImplementationRuleImpl { ImplementationRuleImpl::Limit => { LimitImplementation.update_best_option(operator, loader, best_physical_option)? } - ImplementationRuleImpl::MarkApply => { - MarkApplyImplementation.update_best_option(operator, loader, best_physical_option)? - } + ImplementationRuleImpl::MarkApply => MarkApplyImplementation.update_best_option( + operator, + loader, + best_physical_option, + )?, ImplementationRuleImpl::Projection => ProjectionImplementation.update_best_option( operator, loader, best_physical_option, )?, - ImplementationRuleImpl::ScalarApply => ScalarApplyImplementation - .update_best_option(operator, loader, best_physical_option)?, + ImplementationRuleImpl::ScalarApply => ScalarApplyImplementation.update_best_option( + operator, + loader, + best_physical_option, + )?, ImplementationRuleImpl::ScalarSubquery => ScalarSubqueryImplementation .update_best_option(operator, loader, best_physical_option)?, ImplementationRuleImpl::SeqScan => { diff --git a/src/optimizer/rule/normalization/column_pruning.rs b/src/optimizer/rule/normalization/column_pruning.rs index 8a4d452d..6ae67b59 100644 --- a/src/optimizer/rule/normalization/column_pruning.rs +++ b/src/optimizer/rule/normalization/column_pruning.rs @@ -243,7 +243,10 @@ impl ColumnPruning { remap_exprs_positions(op.exprs.iter_mut(), removed_positions)?; } Operator::MarkApply(op) => { - Self::remap_exprs_after_child_change(op.predicates_mut().iter_mut(), removed_positions)?; + Self::remap_exprs_after_child_change( + op.predicates_mut().iter_mut(), + removed_positions, + )?; } Operator::ScalarApply(_) => {} Operator::ScalarSubquery(_) => {} diff --git a/src/planner/operator/mod.rs b/src/planner/operator/mod.rs index f903e3c4..7b45b6c3 100644 --- a/src/planner/operator/mod.rs +++ b/src/planner/operator/mod.rs @@ -45,10 +45,10 @@ pub mod values; use self::{ aggregate::AggregateOperator, alter_table::add_column::AddColumnOperator, - alter_table::change_column::ChangeColumnOperator, filter::FilterOperator, - join::JoinOperator, limit::LimitOperator, mark_apply::MarkApplyOperator, - project::ProjectOperator, scalar_apply::ScalarApplyOperator, - scalar_subquery::ScalarSubqueryOperator, sort::SortOperator, table_scan::TableScanOperator, + alter_table::change_column::ChangeColumnOperator, filter::FilterOperator, join::JoinOperator, + limit::LimitOperator, mark_apply::MarkApplyOperator, project::ProjectOperator, + scalar_apply::ScalarApplyOperator, scalar_subquery::ScalarSubqueryOperator, sort::SortOperator, + table_scan::TableScanOperator, }; use crate::catalog::ColumnRef; use crate::expression::ScalarExpression; From a56a9d04f2711672187a13a7f284a0aea1681dda Mon Sep 17 00:00:00 2001 From: kould Date: Thu, 2 Apr 2026 00:22:02 +0800 Subject: [PATCH 03/10] perf(execution): avoid materializing join tuples for filter evaluation --- src/execution/dql/join/hash/full_join.rs | 16 ++- src/execution/dql/join/hash/inner_join.rs | 16 ++- src/execution/dql/join/hash/left_anti_join.rs | 2 +- src/execution/dql/join/hash/left_join.rs | 16 ++- src/execution/dql/join/hash/left_semi_join.rs | 10 +- src/execution/dql/join/hash/mod.rs | 9 +- src/execution/dql/join/hash/right_join.rs | 16 ++- src/execution/dql/join/nested_loop_join.rs | 43 ++---- src/execution/dql/mark_apply.rs | 13 +- src/expression/evaluator.rs | 17 ++- src/expression/function/scala.rs | 3 +- src/function/char_length.rs | 3 +- src/function/current_date.rs | 3 +- src/function/current_timestamp.rs | 3 +- src/function/lower.rs | 3 +- src/function/octet_length.rs | 3 +- src/function/upper.rs | 3 +- src/macros/mod.rs | 2 +- src/types/tuple.rs | 132 ++++++++++++++++++ 19 files changed, 217 insertions(+), 96 deletions(-) diff --git a/src/execution/dql/join/hash/full_join.rs b/src/execution/dql/join/hash/full_join.rs index fdaa97c4..93c057e3 100644 --- a/src/execution/dql/join/hash/full_join.rs +++ b/src/execution/dql/join/hash/full_join.rs @@ -17,7 +17,7 @@ use crate::execution::dql::join::hash::{ filter, FilterArgs, JoinProbeState, LeftDropState, LeftDropTuples, ProbeState, }; use crate::execution::dql::join::hash_join::BuildState; -use crate::types::tuple::Tuple; +use crate::types::tuple::{SplitTupleRef, Tuple}; use crate::types::value::DataValue; use fixedbitset::FixedBitSet; @@ -63,14 +63,10 @@ impl JoinProbeState for FullJoinState { if probe_state.index < build_state.tuples.len() { let (i, Tuple { values, pk }) = &build_state.tuples[probe_state.index]; probe_state.index += 1; - let full_values = Vec::from_iter( - values - .iter() - .chain(probe_state.probe_tuple.values.iter()) - .cloned(), - ); if let Some(filter_args) = filter_args { + let full_values = + SplitTupleRef::from_slices(values, &probe_state.probe_tuple.values); if !filter(&full_values, filter_args)? { probe_state.has_filtered = true; self.bits.set(*i, true); @@ -80,6 +76,12 @@ impl JoinProbeState for FullJoinState { ))); } } + let full_values = Vec::from_iter( + values + .iter() + .chain(probe_state.probe_tuple.values.iter()) + .cloned(), + ); build_state.is_used = true; build_state.has_filted = probe_state.has_filtered; return Ok(Some(Tuple::new(pk.clone(), full_values))); diff --git a/src/execution/dql/join/hash/inner_join.rs b/src/execution/dql/join/hash/inner_join.rs index d9413fea..e14aadb3 100644 --- a/src/execution/dql/join/hash/inner_join.rs +++ b/src/execution/dql/join/hash/inner_join.rs @@ -15,7 +15,7 @@ use crate::errors::DatabaseError; use crate::execution::dql::join::hash::{filter, FilterArgs, JoinProbeState, ProbeState}; use crate::execution::dql::join::hash_join::BuildState; -use crate::types::tuple::Tuple; +use crate::types::tuple::{SplitTupleRef, Tuple}; pub(crate) struct InnerJoinState; @@ -40,18 +40,20 @@ impl JoinProbeState for InnerJoinState { while probe_state.index < build_state.tuples.len() { let (_, Tuple { values, pk }) = &build_state.tuples[probe_state.index]; probe_state.index += 1; - let full_values = Vec::from_iter( - values - .iter() - .chain(probe_state.probe_tuple.values.iter()) - .cloned(), - ); if let Some(filter_args) = filter_args { + let full_values = + SplitTupleRef::from_slices(values, &probe_state.probe_tuple.values); if !filter(&full_values, filter_args)? { continue; } } + let full_values = Vec::from_iter( + values + .iter() + .chain(probe_state.probe_tuple.values.iter()) + .cloned(), + ); return Ok(Some(Tuple::new(pk.clone(), full_values))); } diff --git a/src/execution/dql/join/hash/left_anti_join.rs b/src/execution/dql/join/hash/left_anti_join.rs index f2291c30..6d14d025 100644 --- a/src/execution/dql/join/hash/left_anti_join.rs +++ b/src/execution/dql/join/hash/left_anti_join.rs @@ -61,7 +61,7 @@ impl JoinProbeState for LeftAntiJoinState { .cloned() .chain((0..right_schema_len).map(|_| DataValue::Null)), ); - if !filter(&full_values, filter_args)? { + if !filter(&full_values.as_slice(), filter_args)? { continue; } } diff --git a/src/execution/dql/join/hash/left_join.rs b/src/execution/dql/join/hash/left_join.rs index 5fb657f6..99594d59 100644 --- a/src/execution/dql/join/hash/left_join.rs +++ b/src/execution/dql/join/hash/left_join.rs @@ -17,7 +17,7 @@ use crate::execution::dql::join::hash::{ filter, FilterArgs, JoinProbeState, LeftDropState, LeftDropTuples, ProbeState, }; use crate::execution::dql::join::hash_join::BuildState; -use crate::types::tuple::Tuple; +use crate::types::tuple::{SplitTupleRef, Tuple}; use crate::types::value::DataValue; use fixedbitset::FixedBitSet; @@ -47,20 +47,22 @@ impl JoinProbeState for LeftJoinState { while probe_state.index < build_state.tuples.len() { let (i, Tuple { values, pk }) = &build_state.tuples[probe_state.index]; probe_state.index += 1; - let full_values = Vec::from_iter( - values - .iter() - .chain(probe_state.probe_tuple.values.iter()) - .cloned(), - ); if let Some(filter_args) = filter_args { + let full_values = + SplitTupleRef::from_slices(values, &probe_state.probe_tuple.values); if !filter(&full_values, filter_args)? { probe_state.has_filtered = true; self.bits.set(*i, true); continue; } } + let full_values = Vec::from_iter( + values + .iter() + .chain(probe_state.probe_tuple.values.iter()) + .cloned(), + ); build_state.is_used = true; return Ok(Some(Tuple::new(pk.clone(), full_values))); } diff --git a/src/execution/dql/join/hash/left_semi_join.rs b/src/execution/dql/join/hash/left_semi_join.rs index 43e1e066..4a0432c3 100644 --- a/src/execution/dql/join/hash/left_semi_join.rs +++ b/src/execution/dql/join/hash/left_semi_join.rs @@ -17,7 +17,7 @@ use crate::execution::dql::join::hash::{ filter, FilterArgs, JoinProbeState, LeftDropState, LeftDropTuples, ProbeState, }; use crate::execution::dql::join::hash_join::BuildState; -use crate::types::tuple::Tuple; +use crate::types::tuple::{SplitTupleRef, Tuple}; use fixedbitset::FixedBitSet; pub(crate) struct LeftSemiJoinState { @@ -44,14 +44,10 @@ impl JoinProbeState for LeftSemiJoinState { while probe_state.index < build_state.tuples.len() { let (i, Tuple { values, .. }) = &build_state.tuples[probe_state.index]; probe_state.index += 1; - let full_values = Vec::from_iter( - values - .iter() - .chain(probe_state.probe_tuple.values.iter()) - .cloned(), - ); if let Some(filter_args) = filter_args { + let full_values = + SplitTupleRef::from_slices(values, &probe_state.probe_tuple.values); if !filter(&full_values, filter_args)? { probe_state.has_filtered = true; self.bits.set(*i, true); diff --git a/src/execution/dql/join/hash/mod.rs b/src/execution/dql/join/hash/mod.rs index fc23dd62..644ac7e8 100644 --- a/src/execution/dql/join/hash/mod.rs +++ b/src/execution/dql/join/hash/mod.rs @@ -29,7 +29,7 @@ use crate::execution::dql::join::hash::right_join::RightJoinState; use crate::execution::dql::join::hash_join::BuildState; use crate::execution::dql::sort::BumpVec; use crate::expression::ScalarExpression; -use crate::types::tuple::{SchemaRef, Tuple}; +use crate::types::tuple::{SchemaRef, Tuple, TupleLike}; use crate::types::value::DataValue; use std::collections::hash_map::IntoIter as HashMapIntoIter; @@ -133,14 +133,17 @@ impl JoinProbeState for JoinProbeStateImpl { } } -pub(crate) fn filter(values: &[DataValue], filter_arg: &FilterArgs) -> Result { +pub(crate) fn filter( + values: &T, + filter_arg: &FilterArgs, +) -> Result { let FilterArgs { full_schema, filter_expr, .. } = filter_arg; - match &filter_expr.eval(Some((values, full_schema)))? { + match &filter_expr.eval(Some((values as &dyn TupleLike, full_schema)))? { DataValue::Boolean(false) | DataValue::Null => Ok(false), DataValue::Boolean(true) => Ok(true), _ => Err(DatabaseError::InvalidType), diff --git a/src/execution/dql/join/hash/right_join.rs b/src/execution/dql/join/hash/right_join.rs index da07be48..03a7ec2e 100644 --- a/src/execution/dql/join/hash/right_join.rs +++ b/src/execution/dql/join/hash/right_join.rs @@ -16,7 +16,7 @@ use crate::errors::DatabaseError; use crate::execution::dql::join::hash::full_join::FullJoinState; use crate::execution::dql::join::hash::{filter, FilterArgs, JoinProbeState, ProbeState}; use crate::execution::dql::join::hash_join::BuildState; -use crate::types::tuple::Tuple; +use crate::types::tuple::{SplitTupleRef, Tuple}; pub(crate) struct RightJoinState { pub(crate) left_schema_len: usize, @@ -58,19 +58,21 @@ impl JoinProbeState for RightJoinState { while probe_state.index < build_state.tuples.len() { let (_, Tuple { values, pk }) = &build_state.tuples[probe_state.index]; probe_state.index += 1; - let full_values = Vec::from_iter( - values - .iter() - .chain(probe_state.probe_tuple.values.iter()) - .cloned(), - ); if let Some(filter_args) = filter_args { + let full_values = + SplitTupleRef::from_slices(values, &probe_state.probe_tuple.values); if !filter(&full_values, filter_args)? { probe_state.has_filtered = true; continue; } } + let full_values = Vec::from_iter( + values + .iter() + .chain(probe_state.probe_tuple.values.iter()) + .cloned(), + ); probe_state.produced = true; build_state.is_used = true; build_state.has_filted = probe_state.has_filtered; diff --git a/src/execution/dql/join/nested_loop_join.rs b/src/execution/dql/join/nested_loop_join.rs index 7443ad53..cdde266a 100644 --- a/src/execution/dql/join/nested_loop_join.rs +++ b/src/execution/dql/join/nested_loop_join.rs @@ -26,7 +26,7 @@ use crate::expression::ScalarExpression; use crate::planner::operator::join::{JoinCondition, JoinOperator, JoinType}; use crate::planner::LogicalPlan; use crate::storage::Transaction; -use crate::types::tuple::{Schema, SchemaRef, Tuple}; +use crate::types::tuple::{Schema, SchemaRef, SplitTupleRef, Tuple}; use crate::types::value::DataValue; use fixedbitset::FixedBitSet; use itertools::Itertools; @@ -206,6 +206,7 @@ impl NestedLoopJoin { ) -> ExecId { let cache = (arena.table_cache(), arena.view_cache(), arena.meta_cache()); let transaction = arena.transaction_mut() as *mut T; + // Fixme: Executor reset build_read(arena, self.right_input_plan.clone(), cache, transaction) } @@ -276,13 +277,12 @@ impl NestedLoopJoin { ) } (Some(filter), true) => { - let new_tuple = Self::merge_tuple( - &active_left.left_tuple, - &right_tuple, - &self.ty, - ); - let value = - filter.eval(Some((&new_tuple, &self.output_schema_ref)))?; + let values = if matches!(self.ty, JoinType::RightOuter) { + SplitTupleRef::new(&right_tuple, &active_left.left_tuple) + } else { + SplitTupleRef::new(&active_left.left_tuple, &right_tuple) + }; + let value = filter.eval(Some((values, &self.output_schema_ref)))?; match &value { DataValue::Boolean(true) => { let tuple = match self.ty { @@ -478,33 +478,6 @@ impl NestedLoopJoin { )) } - /// Merge the two tuples. - /// `left_tuple` must be from the `NestedLoopJoin.left_input` - /// `right_tuple` must be from the `NestedLoopJoin.right_input` - fn merge_tuple(left_tuple: &Tuple, right_tuple: &Tuple, ty: &JoinType) -> Tuple { - let pk = left_tuple.pk.as_ref().or(right_tuple.pk.as_ref()).cloned(); - match ty { - JoinType::RightOuter => Tuple::new( - pk, - right_tuple - .values - .iter() - .chain(left_tuple.values.iter()) - .cloned() - .collect_vec(), - ), - _ => Tuple::new( - pk, - left_tuple - .values - .iter() - .chain(right_tuple.values.iter()) - .cloned() - .collect_vec(), - ), - } - } - fn merge_schema( left_schema: &[ColumnRef], right_schema: &[ColumnRef], diff --git a/src/execution/dql/mark_apply.rs b/src/execution/dql/mark_apply.rs index bbf0d659..c9f83506 100644 --- a/src/execution/dql/mark_apply.rs +++ b/src/execution/dql/mark_apply.rs @@ -17,7 +17,7 @@ use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, use crate::planner::operator::mark_apply::MarkApplyOperator; use crate::planner::LogicalPlan; use crate::storage::Transaction; -use crate::types::tuple::{Schema, SchemaRef, Tuple}; +use crate::types::tuple::{Schema, SchemaRef, SplitTupleRef, Tuple}; use crate::types::value::DataValue; use std::mem; use std::sync::Arc; @@ -92,6 +92,7 @@ impl MarkApply { ) -> ExecId { let cache = (arena.table_cache(), arena.view_cache(), arena.meta_cache()); let transaction = arena.transaction_mut() as *mut T; + // Fixme: Executor reset build_read(arena, self.right_input_plan.clone(), cache, transaction) } @@ -100,16 +101,10 @@ impl MarkApply { left_tuple: &Tuple, right_tuple: &Tuple, ) -> Result { - let values = Vec::from_iter( - left_tuple - .values - .iter() - .chain(right_tuple.values.iter()) - .cloned(), - ); + let values = SplitTupleRef::new(left_tuple, right_tuple); for predicate in self.op.predicates() { - match predicate.eval(Some((values.as_slice(), self.predicate_schema.as_ref())))? { + match predicate.eval(Some((values, self.predicate_schema.as_ref())))? { DataValue::Boolean(true) => {} DataValue::Boolean(false) | DataValue::Null => return Ok(false), _ => return Err(DatabaseError::InvalidType), diff --git a/src/expression/evaluator.rs b/src/expression/evaluator.rs index cdf30624..4acc7ce0 100644 --- a/src/expression/evaluator.rs +++ b/src/expression/evaluator.rs @@ -17,6 +17,7 @@ use crate::errors::DatabaseError; use crate::expression::function::scala::ScalarFunction; use crate::expression::{AliasType, BinaryOperator, ScalarExpression}; use crate::types::evaluator::EvaluatorFactory; +use crate::types::tuple::TupleLike; use crate::types::value::{DataValue, Utf8Type}; use crate::types::LogicalType; use regex::Regex; @@ -35,7 +36,7 @@ macro_rules! eval_to_num { } impl ScalarExpression { - pub fn eval<'a, T: Into<&'a [DataValue]> + Copy>( + pub fn eval( &self, tuple: Option<(T, &[ColumnRef])>, ) -> Result { @@ -52,7 +53,7 @@ impl ScalarExpression { let Some((tuple, _)) = tuple else { return Ok(DataValue::Null); }; - Ok(tuple.into()[*position].clone()) + Ok(tuple.value_at(*position).clone()) } ScalarExpression::Alias { expr, alias } => { let Some((tuple, schema)) = tuple else { @@ -260,9 +261,15 @@ impl ScalarExpression { } Ok(DataValue::Tuple(values, false)) } - ScalarExpression::ScalaFunction(ScalarFunction { inner, args, .. }) => inner - .eval(args, tuple.map(|(a, b)| (a.into(), b)))? - .cast(inner.return_type()), + ScalarExpression::ScalaFunction(ScalarFunction { inner, args, .. }) => { + let value = inner.eval( + args, + tuple + .as_ref() + .map(|(tuple, schema)| (tuple as &dyn TupleLike, *schema)), + )?; + value.cast(inner.return_type()) + } ScalarExpression::Empty => unreachable!(), ScalarExpression::If { condition, diff --git a/src/expression/function/scala.rs b/src/expression/function/scala.rs index d7261d09..589c4c40 100644 --- a/src/expression/function/scala.rs +++ b/src/expression/function/scala.rs @@ -16,6 +16,7 @@ use crate::catalog::ColumnRef; use crate::errors::DatabaseError; use crate::expression::function::FunctionSummary; use crate::expression::ScalarExpression; +use crate::types::tuple::TupleLike; use crate::types::value::DataValue; use crate::types::LogicalType; use kite_sql_serde_macros::ReferenceSerialization; @@ -67,7 +68,7 @@ pub trait ScalarFunctionImpl: Debug + Send + Sync { fn eval( &self, args: &[ScalarExpression], - tuple: Option<(&[DataValue], &[ColumnRef])>, + tuple: Option<(&dyn TupleLike, &[ColumnRef])>, ) -> Result; // TODO: Exploiting monotonicity when optimizing `ScalarFunctionImpl::monotonicity()` diff --git a/src/function/char_length.rs b/src/function/char_length.rs index 1b65e9da..99bc7c46 100644 --- a/src/function/char_length.rs +++ b/src/function/char_length.rs @@ -18,6 +18,7 @@ use crate::expression::function::scala::FuncMonotonicity; use crate::expression::function::scala::ScalarFunctionImpl; use crate::expression::function::FunctionSummary; use crate::expression::ScalarExpression; +use crate::types::tuple::TupleLike; use crate::types::value::DataValue; use crate::types::LogicalType; use serde::Deserialize; @@ -48,7 +49,7 @@ impl ScalarFunctionImpl for CharLength { fn eval( &self, exprs: &[ScalarExpression], - tuples: Option<(&[DataValue], &[ColumnRef])>, + tuples: Option<(&dyn TupleLike, &[ColumnRef])>, ) -> Result { let mut value = exprs[0].eval(tuples)?; if !matches!(value.logical_type(), LogicalType::Varchar(_, _)) { diff --git a/src/function/current_date.rs b/src/function/current_date.rs index 98e57a9d..8790d09d 100644 --- a/src/function/current_date.rs +++ b/src/function/current_date.rs @@ -18,6 +18,7 @@ use crate::expression::function::scala::FuncMonotonicity; use crate::expression::function::scala::ScalarFunctionImpl; use crate::expression::function::FunctionSummary; use crate::expression::ScalarExpression; +use crate::types::tuple::TupleLike; use crate::types::value::DataValue; use crate::types::LogicalType; use chrono::{Datelike, Local}; @@ -50,7 +51,7 @@ impl ScalarFunctionImpl for CurrentDate { fn eval( &self, _: &[ScalarExpression], - _: Option<(&[DataValue], &[ColumnRef])>, + _: Option<(&dyn TupleLike, &[ColumnRef])>, ) -> Result { Ok(DataValue::Date32(Local::now().num_days_from_ce())) } diff --git a/src/function/current_timestamp.rs b/src/function/current_timestamp.rs index cd1d1369..15ddf6a2 100644 --- a/src/function/current_timestamp.rs +++ b/src/function/current_timestamp.rs @@ -18,6 +18,7 @@ use crate::expression::function::scala::FuncMonotonicity; use crate::expression::function::scala::ScalarFunctionImpl; use crate::expression::function::FunctionSummary; use crate::expression::ScalarExpression; +use crate::types::tuple::TupleLike; use crate::types::value::DataValue; use crate::types::LogicalType; use chrono::Utc; @@ -50,7 +51,7 @@ impl ScalarFunctionImpl for CurrentTimeStamp { fn eval( &self, _: &[ScalarExpression], - _: Option<(&[DataValue], &[ColumnRef])>, + _: Option<(&dyn TupleLike, &[ColumnRef])>, ) -> Result { Ok(DataValue::Time64(Utc::now().timestamp(), 0, false)) } diff --git a/src/function/lower.rs b/src/function/lower.rs index ffe7e5dc..72eea7db 100644 --- a/src/function/lower.rs +++ b/src/function/lower.rs @@ -18,6 +18,7 @@ use crate::expression::function::scala::FuncMonotonicity; use crate::expression::function::scala::ScalarFunctionImpl; use crate::expression::function::FunctionSummary; use crate::expression::ScalarExpression; +use crate::types::tuple::TupleLike; use crate::types::value::DataValue; use crate::types::LogicalType; use serde::Deserialize; @@ -50,7 +51,7 @@ impl ScalarFunctionImpl for Lower { fn eval( &self, exprs: &[ScalarExpression], - tuples: Option<(&[DataValue], &[ColumnRef])>, + tuples: Option<(&dyn TupleLike, &[ColumnRef])>, ) -> Result { let mut value = exprs[0].eval(tuples)?; if !matches!(value.logical_type(), LogicalType::Varchar(_, _)) { diff --git a/src/function/octet_length.rs b/src/function/octet_length.rs index 355e4cd3..e317fe44 100644 --- a/src/function/octet_length.rs +++ b/src/function/octet_length.rs @@ -18,6 +18,7 @@ use crate::expression::function::scala::FuncMonotonicity; use crate::expression::function::scala::ScalarFunctionImpl; use crate::expression::function::FunctionSummary; use crate::expression::ScalarExpression; +use crate::types::tuple::TupleLike; use crate::types::value::DataValue; use crate::types::LogicalType; use serde::Deserialize; @@ -49,7 +50,7 @@ impl ScalarFunctionImpl for OctetLength { fn eval( &self, exprs: &[ScalarExpression], - tuples: Option<(&[DataValue], &[ColumnRef])>, + tuples: Option<(&dyn TupleLike, &[ColumnRef])>, ) -> Result { let mut value = exprs[0].eval(tuples)?; if !matches!(value.logical_type(), LogicalType::Varchar(_, _)) { diff --git a/src/function/upper.rs b/src/function/upper.rs index 26986081..f308f24d 100644 --- a/src/function/upper.rs +++ b/src/function/upper.rs @@ -18,6 +18,7 @@ use crate::expression::function::scala::FuncMonotonicity; use crate::expression::function::scala::ScalarFunctionImpl; use crate::expression::function::FunctionSummary; use crate::expression::ScalarExpression; +use crate::types::tuple::TupleLike; use crate::types::value::DataValue; use crate::types::LogicalType; use serde::Deserialize; @@ -50,7 +51,7 @@ impl ScalarFunctionImpl for Upper { fn eval( &self, exprs: &[ScalarExpression], - tuples: Option<(&[DataValue], &[ColumnRef])>, + tuples: Option<(&dyn TupleLike, &[ColumnRef])>, ) -> Result { let mut value = exprs[0].eval(tuples)?; if !matches!(value.logical_type(), LogicalType::Varchar(_, _)) { diff --git a/src/macros/mod.rs b/src/macros/mod.rs index 48d7c19d..94a737f8 100644 --- a/src/macros/mod.rs +++ b/src/macros/mod.rs @@ -107,7 +107,7 @@ macro_rules! scala_function { #[typetag::serde] impl ::kite_sql::expression::function::scala::ScalarFunctionImpl for $struct_name { #[allow(unused_variables, clippy::redundant_closure_call)] - fn eval(&self, args: &[::kite_sql::expression::ScalarExpression], tuple: Option<(&[::kite_sql::types::value::DataValue], &[::kite_sql::catalog::column::ColumnRef])>) -> Result<::kite_sql::types::value::DataValue, ::kite_sql::errors::DatabaseError> { + fn eval(&self, args: &[::kite_sql::expression::ScalarExpression], tuple: Option<(&dyn ::kite_sql::types::tuple::TupleLike, &[::kite_sql::catalog::column::ColumnRef])>) -> Result<::kite_sql::types::value::DataValue, ::kite_sql::errors::DatabaseError> { let mut _index = 0; $closure($({ diff --git a/src/types/tuple.rs b/src/types/tuple.rs index 42087e4a..284edc07 100644 --- a/src/types/tuple.rs +++ b/src/types/tuple.rs @@ -30,12 +30,144 @@ pub type TupleId = DataValue; pub type Schema = Vec; pub type SchemaRef = Arc; +pub trait TupleLike { + fn len(&self) -> usize; + fn value_at(&self, index: usize) -> &DataValue; + + #[inline] + fn as_slice(&self) -> Option<&[DataValue]> { + None + } +} + +#[derive(Clone, Copy)] +pub struct SplitTupleRef<'a> { + left: &'a [DataValue], + right: &'a [DataValue], + left_len: usize, +} + +impl<'a> SplitTupleRef<'a> { + pub fn new(left: &'a Tuple, right: &'a Tuple) -> Self { + Self::from_slices(left.values.as_slice(), right.values.as_slice()) + } + + pub fn from_slices(left: &'a [DataValue], right: &'a [DataValue]) -> Self { + SplitTupleRef { + left, + right, + left_len: left.len(), + } + } +} + #[derive(Clone, Debug, PartialEq, Eq, Hash, Default)] pub struct Tuple { pub pk: Option, pub values: Vec, } +impl TupleLike for Tuple { + #[inline] + fn len(&self) -> usize { + self.values.len() + } + + #[inline] + fn value_at(&self, index: usize) -> &DataValue { + &self.values[index] + } + + #[inline] + fn as_slice(&self) -> Option<&[DataValue]> { + Some(self.values.as_slice()) + } +} + +impl TupleLike for [DataValue] { + #[inline] + fn len(&self) -> usize { + <[DataValue]>::len(self) + } + + #[inline] + fn value_at(&self, index: usize) -> &DataValue { + &self[index] + } + + #[inline] + fn as_slice(&self) -> Option<&[DataValue]> { + Some(self) + } +} + +impl TupleLike for &Tuple { + #[inline] + fn len(&self) -> usize { + self.values.len() + } + + #[inline] + fn value_at(&self, index: usize) -> &DataValue { + &self.values[index] + } + + #[inline] + fn as_slice(&self) -> Option<&[DataValue]> { + Some(self.values.as_slice()) + } +} + +impl TupleLike for &[DataValue] { + #[inline] + fn len(&self) -> usize { + <[DataValue]>::len(self) + } + + #[inline] + fn value_at(&self, index: usize) -> &DataValue { + &self[index] + } + + #[inline] + fn as_slice(&self) -> Option<&[DataValue]> { + Some(self) + } +} + +impl TupleLike for &dyn TupleLike { + #[inline] + fn len(&self) -> usize { + (*self).len() + } + + #[inline] + fn value_at(&self, index: usize) -> &DataValue { + (*self).value_at(index) + } + + #[inline] + fn as_slice(&self) -> Option<&[DataValue]> { + (*self).as_slice() + } +} + +impl TupleLike for SplitTupleRef<'_> { + #[inline] + fn len(&self) -> usize { + self.left_len + self.right.len() + } + + #[inline] + fn value_at(&self, index: usize) -> &DataValue { + if index < self.left_len { + &self.left[index] + } else { + &self.right[index - self.left_len] + } + } +} + impl<'a> From<&'a Tuple> for &'a [DataValue] { fn from(val: &'a Tuple) -> Self { val.values.as_slice() From cb09bad87d0df09379cf37d82b254aee534cf7eb Mon Sep 17 00:00:00 2001 From: kould Date: Thu, 2 Apr 2026 02:11:25 +0800 Subject: [PATCH 04/10] refactor: localize appended mark-apply right outputs precisely --- src/binder/expr.rs | 30 ++- src/binder/mod.rs | 2 + src/binder/select.rs | 332 ++++++++++++++++++++++------- src/execution/dql/mark_apply.rs | 283 ++++++++++++++++++++++-- src/expression/evaluator.rs | 68 +++++- src/planner/operator/mark_apply.rs | 25 +++ tests/slt/filter.slt | 9 + tests/slt/subquery.slt | 60 ++++++ 8 files changed, 694 insertions(+), 115 deletions(-) diff --git a/src/binder/expr.rs b/src/binder/expr.rs index fcc2d00c..4d442a31 100644 --- a/src/binder/expr.rs +++ b/src/binder/expr.rs @@ -282,7 +282,7 @@ impl<'a, T: Transaction, A: AsRef<[(&'static str, DataValue)]>> Binder<'a, '_, T subquery, negated, } => { - let left_expr = Box::new(self.bind_expr(expr)?); + let left_expr = self.bind_expr(expr)?; let (sub_query, column, correlated) = self.bind_subquery(Some(left_expr.return_type()), subquery)?; @@ -293,19 +293,33 @@ impl<'a, T: Transaction, A: AsRef<[(&'static str, DataValue)]>> Binder<'a, '_, T } let (alias_expr, sub_query) = self.bind_temp_table(column, sub_query)?; + let predicate = ScalarExpression::Binary { + op: expression::BinaryOperator::Eq, + left_expr: Box::new(left_expr), + right_expr: Box::new(alias_expr), + evaluator: None, + ty: LogicalType::Boolean, + }; + let (_, marker_ref) = self + .bind_temp_table_alias(ScalarExpression::Constant(DataValue::Boolean(true)), 0); self.context.sub_query(SubQueryType::InSubQuery { negated: *negated, plan: sub_query, correlated, + output_column: marker_ref.output_column(), + predicate, }); - Ok(ScalarExpression::Binary { - op: expression::BinaryOperator::Eq, - left_expr, - right_expr: Box::new(alias_expr), - evaluator: None, - ty: LogicalType::Boolean, - }) + if *negated { + Ok(ScalarExpression::Unary { + op: expression::UnaryOperator::Not, + expr: Box::new(marker_ref), + evaluator: None, + ty: LogicalType::Boolean, + }) + } else { + Ok(marker_ref) + } } Expr::Tuple(exprs) => { let mut bond_exprs = Vec::with_capacity(exprs.len()); diff --git a/src/binder/mod.rs b/src/binder/mod.rs index 4df11212..df04b1ea 100644 --- a/src/binder/mod.rs +++ b/src/binder/mod.rs @@ -160,6 +160,8 @@ pub enum SubQueryType { negated: bool, plan: LogicalPlan, correlated: bool, + output_column: ColumnRef, + predicate: ScalarExpression, }, } diff --git a/src/binder/select.rs b/src/binder/select.rs index e4d3be6d..61007bd7 100644 --- a/src/binder/select.rs +++ b/src/binder/select.rs @@ -76,6 +76,12 @@ impl<'a> VisitorMut<'a> for RightSidePositionGlobalizer<'_> { } } +struct AppendedRightOutput { + column: ColumnRef, + child_position: usize, + output_position: usize, +} + struct SplitScopePositionRebinder<'a> { left_schema: &'a Schema, right_schema: &'a Schema, @@ -262,6 +268,65 @@ impl<'a: 'b, 'b, T: Transaction, A: AsRef<[(&'static str, DataValue)]>> Binder<' Ok(()) } + fn globalize_mark_predicate( + predicate: &mut ScalarExpression, + output_column: &ColumnRef, + left_len: usize, + ) -> Result<(), DatabaseError> { + MarkerPositionGlobalizer { + output_column, + left_len, + } + .visit(predicate) + } + + fn globalize_right_side_exprs<'expr>( + exprs: impl Iterator, + left_len: usize, + right_schema: &Schema, + ) -> Result<(), DatabaseError> { + for expr in exprs { + RightSidePositionGlobalizer { + right_schema, + left_len, + } + .visit(expr)?; + } + + Ok(()) + } + + fn localize_appended_right_outputs<'expr>( + exprs: impl Iterator, + appended_outputs: &[AppendedRightOutput], + ) -> Result<(), DatabaseError> { + struct AppendedRightOutputBinder<'a> { + appended_outputs: &'a [AppendedRightOutput], + } + + impl VisitorMut<'_> for AppendedRightOutputBinder<'_> { + fn visit_column_ref( + &mut self, + column: &mut ColumnRef, + position: &mut usize, + ) -> Result<(), DatabaseError> { + if let Some(output) = self.appended_outputs.iter().find(|output| { + *position == output.child_position && column.same_column(&output.column) + }) { + *position = output.output_position; + } + Ok(()) + } + } + + let mut binder = AppendedRightOutputBinder { appended_outputs }; + for expr in exprs { + binder.visit(expr)?; + } + + Ok(()) + } + fn rebind_split_scope_positions( expr: &mut ScalarExpression, left_schema: &Schema, @@ -1251,10 +1316,12 @@ impl<'a: 'b, 'b, T: Transaction, A: AsRef<[(&'static str, DataValue)]>> Binder<' let mut predicate = self.bind_expr(predicate)?; if let Some(sub_queries) = self.context.sub_queries_at_now() { - if sub_queries - .iter() - .all(|sub_query| matches!(sub_query, SubQueryType::ExistsSubQuery { .. })) - { + if sub_queries.iter().all(|sub_query| { + matches!( + sub_query, + SubQueryType::ExistsSubQuery { .. } | SubQueryType::InSubQuery { .. } + ) + }) { let passthrough_exprs = children .output_schema() .iter() @@ -1263,39 +1330,56 @@ impl<'a: 'b, 'b, T: Transaction, A: AsRef<[(&'static str, DataValue)]>> Binder<' .map(|(position, column)| ScalarExpression::column_expr(column, position)) .collect(); for sub_query in sub_queries { - let SubQueryType::ExistsSubQuery { - plan, - correlated, - output_column, - } = sub_query - else { - unreachable!() - }; - let left_len = children.output_schema().len(); - MarkerPositionGlobalizer { - output_column: &output_column, - left_len, - } - .visit(&mut predicate)?; - let (mut plan, mut predicates) = if correlated { - Self::prepare_correlated_subquery_plan( + match sub_query { + SubQueryType::ExistsSubQuery { plan, - children.output_schema(), - false, - )? - } else { - (plan, Vec::new()) - }; - let right_schema = plan.output_schema(); - for expr in predicates.iter_mut() { - RightSidePositionGlobalizer { - right_schema: right_schema.as_ref(), - left_len, + correlated, + output_column, + } => { + let (plan, predicates) = Self::prepare_mark_apply( + &mut predicate, + &output_column, + children.output_schema(), + plan, + correlated, + false, + Vec::new(), + )?; + children = MarkApplyOperator::build_exists( + children, + plan, + output_column, + predicates, + ); + } + SubQueryType::InSubQuery { + plan, + correlated, + output_column, + predicate: mut in_predicate, + .. + } => { + if correlated { + in_predicate = Self::rewrite_correlated_in_predicate(in_predicate); + } + let (plan, predicates) = Self::prepare_mark_apply( + &mut predicate, + &output_column, + children.output_schema(), + plan, + correlated, + true, + vec![in_predicate], + )?; + children = MarkApplyOperator::build_in( + children, + plan, + output_column, + predicates, + ); } - .visit(expr)?; + SubQueryType::SubQuery { .. } => unreachable!(), } - children = - MarkApplyOperator::build_exists(children, plan, output_column, predicates); } let filter = FilterOperator::build(predicate, children, false); return Ok(LogicalPlan::new( @@ -1305,12 +1389,14 @@ impl<'a: 'b, 'b, T: Transaction, A: AsRef<[(&'static str, DataValue)]>> Binder<' Childrens::Only(Box::new(filter)), )); } - if sub_queries - .iter() - .any(|sub_query| matches!(sub_query, SubQueryType::ExistsSubQuery { .. })) - { + if sub_queries.iter().any(|sub_query| { + matches!( + sub_query, + SubQueryType::ExistsSubQuery { .. } | SubQueryType::InSubQuery { .. } + ) + }) { return Err(DatabaseError::UnsupportedStmt( - "mixed EXISTS with other WHERE subqueries is not supported yet".to_string(), + "mixed EXISTS/IN with other WHERE subqueries is not supported yet".to_string(), )); } for sub_query in sub_queries { @@ -1324,27 +1410,8 @@ impl<'a: 'b, 'b, T: Transaction, A: AsRef<[(&'static str, DataValue)]>> Binder<' } (plan, JoinType::Inner) } - SubQueryType::ExistsSubQuery { .. } => unreachable!(), - SubQueryType::InSubQuery { - negated, - plan, - correlated, - } => { - if correlated { - children = self.bind_correlated_in_subquery( - children, - plan, - negated, - predicate.clone(), - )?; - continue; - } - let join_ty = if negated { - JoinType::LeftAnti - } else { - JoinType::LeftSemi - }; - (plan, join_ty) + SubQueryType::ExistsSubQuery { .. } | SubQueryType::InSubQuery { .. } => { + unreachable!() } }; @@ -1361,28 +1428,91 @@ impl<'a: 'b, 'b, T: Transaction, A: AsRef<[(&'static str, DataValue)]>> Binder<' Ok(FilterOperator::build(predicate, children, false)) } - fn bind_correlated_in_subquery( - &self, - mut children: LogicalPlan, + fn ensure_mark_apply_right_outputs( + plan: &mut LogicalPlan, + predicates: &[ScalarExpression], + ) -> Vec { + let output_schema = plan.output_schema().clone(); + let output_len = output_schema.len(); + if let LogicalPlan { + operator: Operator::Project(op), + childrens, + .. + } = plan + { + let Childrens::Only(child) = childrens.as_mut() else { + return Vec::new(); + }; + let mut appended_outputs = Vec::new(); + op.exprs.extend( + child + .output_schema() + .iter() + .enumerate() + .filter(|(_, column)| { + !output_schema.contains(column) + && predicates.iter().any(|expr| { + expr.any_referenced_column(true, |candidate| { + candidate.same_column(column) + }) + }) + }) + .map(|(position, column)| { + appended_outputs.push(AppendedRightOutput { + column: column.clone(), + child_position: position, + output_position: output_len + appended_outputs.len(), + }); + ScalarExpression::column_expr(column.clone(), position) + }), + ); + if !appended_outputs.is_empty() { + plan.reset_output_schema_cache(); + } + return appended_outputs; + } + + Vec::new() + } + + fn prepare_mark_apply( + predicate: &mut ScalarExpression, + output_column: &ColumnRef, + left_schema: &Schema, plan: LogicalPlan, - negated: bool, - predicate: ScalarExpression, - ) -> Result { - let join_ty = if negated { - JoinType::LeftAnti + correlated: bool, + preserve_projection: bool, + mut apply_predicates: Vec, + ) -> Result<(LogicalPlan, Vec), DatabaseError> { + let left_len = left_schema.len(); + Self::globalize_mark_predicate(predicate, output_column, left_len)?; + + let (plan, correlated_filters) = if correlated { + Self::prepare_correlated_subquery_plan(plan, left_schema, preserve_projection)? } else { - JoinType::LeftSemi + (plan, Vec::new()) }; - let (plan, correlated_filters) = - Self::prepare_correlated_subquery_plan(plan, children.output_schema(), true)?; - let predicate = Self::rewrite_correlated_in_predicate(predicate); - Self::build_join_from_split_scope_predicates( - children, - plan, - join_ty, - std::iter::once(predicate).chain(correlated_filters), - false, - ) + apply_predicates.extend(correlated_filters); + + let mut plan = plan; + if correlated { + let appended_right_outputs = + Self::ensure_mark_apply_right_outputs(&mut plan, &apply_predicates); + if !appended_right_outputs.is_empty() { + Self::localize_appended_right_outputs( + apply_predicates.iter_mut(), + &appended_right_outputs, + )?; + } + } + let right_schema = plan.output_schema().clone(); + Self::globalize_right_side_exprs( + apply_predicates.iter_mut(), + left_len, + right_schema.as_ref(), + )?; + + Ok((plan, apply_predicates)) } fn rewrite_correlated_in_predicate(predicate: ScalarExpression) -> ScalarExpression { @@ -2020,6 +2150,7 @@ mod tests { use crate::expression::visitor_mut::VisitorMut; use crate::expression::{AliasType, ScalarExpression}; use crate::planner::operator::join::{JoinCondition, JoinType}; + use crate::planner::operator::mark_apply::{MarkApplyKind, MarkApplyOperator}; use crate::planner::operator::Operator; use crate::planner::{Childrens, LogicalPlan}; use crate::types::LogicalType; @@ -2152,6 +2283,20 @@ mod tests { } } + fn find_mark_apply(plan: &LogicalPlan) -> Option<&MarkApplyOperator> { + if let Operator::MarkApply(op) = &plan.operator { + return Some(op); + } + + match plan.childrens.as_ref() { + Childrens::Only(child) => find_mark_apply(child), + Childrens::Twins { left, right } => { + find_mark_apply(left).or_else(|| find_mark_apply(right)) + } + Childrens::None => None, + } + } + #[test] fn test_scalar_subquery_in_where_binds_as_inner_join() -> Result<(), DatabaseError> { let table_states = build_t1_table()?; @@ -2166,6 +2311,35 @@ mod tests { Ok(()) } + #[test] + fn test_in_subquery_in_where_binds_as_mark_apply() -> Result<(), DatabaseError> { + let table_states = build_t1_table()?; + let plan = table_states.plan("select * from t1 where c1 in (select c3 from t2)")?; + let Some(mark_apply) = find_mark_apply(&plan) else { + panic!("expected IN subquery to introduce a mark apply") + }; + + assert_eq!(mark_apply.kind, MarkApplyKind::In); + assert_eq!(mark_apply.predicates().len(), 1); + + Ok(()) + } + + #[test] + fn test_correlated_in_subquery_in_where_binds_as_mark_apply() -> Result<(), DatabaseError> { + let table_states = build_t1_table()?; + let plan = + table_states.plan("select * from t1 where c1 in (select c3 from t2 where c4 = c2)")?; + let Some(mark_apply) = find_mark_apply(&plan) else { + panic!("expected correlated IN subquery to introduce a mark apply") + }; + + assert_eq!(mark_apply.kind, MarkApplyKind::In); + assert_eq!(mark_apply.predicates().len(), 2); + + Ok(()) + } + fn find_top_join(plan: &LogicalPlan) -> Option<&LogicalPlan> { if matches!(plan.operator, Operator::Join(_)) { return Some(plan); diff --git a/src/execution/dql/mark_apply.rs b/src/execution/dql/mark_apply.rs index c9f83506..23838dc6 100644 --- a/src/execution/dql/mark_apply.rs +++ b/src/execution/dql/mark_apply.rs @@ -14,7 +14,7 @@ use crate::errors::DatabaseError; use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ExecutorNode}; -use crate::planner::operator::mark_apply::MarkApplyOperator; +use crate::planner::operator::mark_apply::{MarkApplyKind, MarkApplyOperator}; use crate::planner::LogicalPlan; use crate::storage::Transaction; use crate::types::tuple::{Schema, SchemaRef, SplitTupleRef, Tuple}; @@ -65,21 +65,10 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for MarkApply { self.left_tuple = mem::take(arena.result_tuple_mut()); let right_input = self.build_right_input(arena); - let mut matched = false; - - while arena.next_tuple(right_input)? { - let right_tuple = arena.result_tuple(); - if self.predicate_matched(&self.left_tuple, right_tuple)? { - matched = true; - break; - } - } + let marker = self.mark_value(arena, right_input)?; arena.produce_tuple(mem::take(&mut self.left_tuple)); - arena - .result_tuple_mut() - .values - .push(DataValue::Boolean(matched)); + arena.result_tuple_mut().values.push(marker); arena.resume(); Ok(()) } @@ -96,7 +85,45 @@ impl MarkApply { build_read(arena, self.right_input_plan.clone(), cache, transaction) } - fn predicate_matched( + fn mark_value<'a, T: Transaction + 'a>( + &self, + arena: &mut ExecArena<'a, T>, + right_input: ExecId, + ) -> Result { + match self.op.kind { + MarkApplyKind::Exists => { + while arena.next_tuple(right_input)? { + let right_tuple = arena.result_tuple(); + if self.exists_predicate_matched(&self.left_tuple, right_tuple)? { + return Ok(DataValue::Boolean(true)); + } + } + + Ok(DataValue::Boolean(false)) + } + MarkApplyKind::In => { + let mut saw_null = false; + + while arena.next_tuple(right_input)? { + let right_tuple = arena.result_tuple(); + match self.in_predicate_value(&self.left_tuple, right_tuple)? { + Some(DataValue::Boolean(true)) => return Ok(DataValue::Boolean(true)), + Some(DataValue::Boolean(false)) | None => {} + Some(DataValue::Null) => saw_null = true, + Some(_) => return Err(DatabaseError::InvalidType), + } + } + + if saw_null { + Ok(DataValue::Null) + } else { + Ok(DataValue::Boolean(false)) + } + } + } + } + + fn exists_predicate_matched( &self, left_tuple: &Tuple, right_tuple: &Tuple, @@ -113,6 +140,32 @@ impl MarkApply { Ok(true) } + + fn in_predicate_value( + &self, + left_tuple: &Tuple, + right_tuple: &Tuple, + ) -> Result, DatabaseError> { + let values = SplitTupleRef::new(left_tuple, right_tuple); + // probe_predicate is in predicate, always first + let (probe_predicate, correlated_predicates) = self + .op + .predicates() + .split_first() + .ok_or(DatabaseError::InvalidType)?; + + for predicate in correlated_predicates { + match predicate.eval(Some((values, self.predicate_schema.as_ref())))? { + DataValue::Boolean(true) => {} + DataValue::Boolean(false) | DataValue::Null => return Ok(None), + _ => return Err(DatabaseError::InvalidType), + } + } + + Ok(Some( + probe_predicate.eval(Some((values, self.predicate_schema.as_ref())))?, + )) + } } #[cfg(all(test, not(target_arch = "wasm32")))] @@ -135,13 +188,22 @@ mod tests { use std::sync::Arc; use tempfile::TempDir; - fn build_values(name: &str, rows: Vec>) -> LogicalPlan { - let desc = ColumnDesc::new(LogicalType::Integer, None, false, None).unwrap(); - let schema_ref = Arc::new(vec![ColumnRef::from(ColumnCatalog::new( - name.to_string(), - true, - desc, - ))]); + fn build_values_with_schema( + columns: Vec<(&str, LogicalType)>, + rows: Vec>, + ) -> LogicalPlan { + let schema_ref = Arc::new( + columns + .into_iter() + .map(|(name, ty)| { + ColumnRef::from(ColumnCatalog::new( + name.to_string(), + true, + ColumnDesc::new(ty, None, true, None).unwrap(), + )) + }) + .collect(), + ); LogicalPlan::new( Operator::Values(ValuesOperator { rows, schema_ref }), @@ -149,6 +211,10 @@ mod tests { ) } + fn build_values(name: &str, rows: Vec>) -> LogicalPlan { + build_values_with_schema(vec![(name, LogicalType::Integer)], rows) + } + fn build_test_storage() -> Result< ( Arc, @@ -280,4 +346,177 @@ mod tests { Ok(()) } + + #[test] + fn mark_in_apply_appends_boolean_match_column() -> Result<(), DatabaseError> { + let mut left = build_values( + "left_c1", + vec![vec![DataValue::Int32(1)], vec![DataValue::Int32(2)]], + ); + let mut right = build_values( + "right_c1", + vec![vec![DataValue::Int32(2)], vec![DataValue::Int32(3)]], + ); + let left_column = left.output_schema()[0].clone(); + let right_column = right.output_schema()[0].clone(); + + let predicate = ScalarExpression::Binary { + op: BinaryOperator::Eq, + left_expr: Box::new(ScalarExpression::column_expr(left_column, 0)), + right_expr: Box::new(ScalarExpression::column_expr(right_column, 1)), + evaluator: Some(EvaluatorFactory::binary_create( + LogicalType::Integer, + BinaryOperator::Eq, + )?), + ty: LogicalType::Boolean, + }; + + let (table_cache, view_cache, meta_cache, _temp_dir, storage) = build_test_storage()?; + let mut transaction = storage.transaction()?; + let tuples = try_collect(execute_input::<_, MarkApply>( + ( + MarkApplyOperator::new_in(build_marker_column(), vec![predicate]), + left, + right, + ), + (&table_cache, &view_cache, &meta_cache), + &mut transaction, + ))?; + + assert_eq!( + tuples + .into_iter() + .flat_map(|tuple| tuple.values) + .collect::>(), + vec![ + DataValue::Int32(1), + DataValue::Boolean(false), + DataValue::Int32(2), + DataValue::Boolean(true), + ] + ); + + Ok(()) + } + + #[test] + fn mark_in_apply_treats_null_predicate_as_not_matched() -> Result<(), DatabaseError> { + let mut left = build_values( + "left_c1", + vec![vec![DataValue::Int32(1)], vec![DataValue::Int32(2)]], + ); + let mut right = build_values( + "right_c1", + vec![vec![DataValue::Null], vec![DataValue::Int32(2)]], + ); + let left_column = left.output_schema()[0].clone(); + let right_column = right.output_schema()[0].clone(); + + let predicate = ScalarExpression::Binary { + op: BinaryOperator::Eq, + left_expr: Box::new(ScalarExpression::column_expr(left_column, 0)), + right_expr: Box::new(ScalarExpression::column_expr(right_column, 1)), + evaluator: Some(EvaluatorFactory::binary_create( + LogicalType::Integer, + BinaryOperator::Eq, + )?), + ty: LogicalType::Boolean, + }; + + let (table_cache, view_cache, meta_cache, _temp_dir, storage) = build_test_storage()?; + let mut transaction = storage.transaction()?; + let tuples = try_collect(execute_input::<_, MarkApply>( + ( + MarkApplyOperator::new_in(build_marker_column(), vec![predicate]), + left, + right, + ), + (&table_cache, &view_cache, &meta_cache), + &mut transaction, + ))?; + + assert_eq!( + tuples + .into_iter() + .flat_map(|tuple| tuple.values) + .collect::>(), + vec![ + DataValue::Int32(1), + DataValue::Null, + DataValue::Int32(2), + DataValue::Boolean(true), + ] + ); + + Ok(()) + } + + #[test] + fn mark_in_apply_ignores_null_correlated_predicate_rows() -> Result<(), DatabaseError> { + let mut left = build_values( + "left_c1", + vec![vec![DataValue::Int32(1)], vec![DataValue::Int32(2)]], + ); + let mut right = build_values_with_schema( + vec![ + ("right_c1", LogicalType::Integer), + ("right_flag", LogicalType::Integer), + ], + vec![vec![DataValue::Int32(1), DataValue::Null]], + ); + let left_column = left.output_schema()[0].clone(); + let right_value_column = right.output_schema()[0].clone(); + let right_flag_column = right.output_schema()[1].clone(); + + let probe_predicate = ScalarExpression::Binary { + op: BinaryOperator::Eq, + left_expr: Box::new(ScalarExpression::column_expr(left_column, 0)), + right_expr: Box::new(ScalarExpression::column_expr(right_value_column, 1)), + evaluator: Some(EvaluatorFactory::binary_create( + LogicalType::Integer, + BinaryOperator::Eq, + )?), + ty: LogicalType::Boolean, + }; + let correlated_predicate = ScalarExpression::Binary { + op: BinaryOperator::Eq, + left_expr: Box::new(ScalarExpression::column_expr(right_flag_column, 2)), + right_expr: Box::new(ScalarExpression::Constant(DataValue::Int32(1))), + evaluator: Some(EvaluatorFactory::binary_create( + LogicalType::Integer, + BinaryOperator::Eq, + )?), + ty: LogicalType::Boolean, + }; + + let (table_cache, view_cache, meta_cache, _temp_dir, storage) = build_test_storage()?; + let mut transaction = storage.transaction()?; + let tuples = try_collect(execute_input::<_, MarkApply>( + ( + MarkApplyOperator::new_in( + build_marker_column(), + vec![probe_predicate, correlated_predicate], + ), + left, + right, + ), + (&table_cache, &view_cache, &meta_cache), + &mut transaction, + ))?; + + assert_eq!( + tuples + .into_iter() + .flat_map(|tuple| tuple.values) + .collect::>(), + vec![ + DataValue::Int32(1), + DataValue::Boolean(false), + DataValue::Int32(2), + DataValue::Boolean(false), + ] + ); + + Ok(()) + } } diff --git a/src/expression/evaluator.rs b/src/expression/evaluator.rs index 4acc7ce0..5f7b8f71 100644 --- a/src/expression/evaluator.rs +++ b/src/expression/evaluator.rs @@ -102,22 +102,29 @@ impl ScalarExpression { if value.is_null() { return Ok(DataValue::Null); } - let mut is_in = false; + + let mut matched = false; + let mut saw_null = false; for arg in args { let arg_value = arg.eval(tuple)?; if arg_value.is_null() { - return Ok(DataValue::Null); + saw_null = true; + continue; } if arg_value == value { - is_in = true; + matched = true; break; } } - if *negated { - is_in = !is_in; + + if matched { + Ok(DataValue::Boolean(!negated)) + } else if saw_null { + Ok(DataValue::Null) + } else { + Ok(DataValue::Boolean(*negated)) } - Ok(DataValue::Boolean(is_in)) } ScalarExpression::Unary { expr, evaluator, .. @@ -365,3 +372,52 @@ impl ScalarExpression { } } } + +#[cfg(test)] +mod tests { + use super::*; + + fn const_in(expr: DataValue, args: Vec, negated: bool) -> ScalarExpression { + ScalarExpression::In { + negated, + expr: Box::new(ScalarExpression::Constant(expr)), + args: args.into_iter().map(ScalarExpression::Constant).collect(), + } + } + + #[test] + fn in_eval_matches_even_if_null_appears_first() -> Result<(), DatabaseError> { + let expr = const_in( + DataValue::Int32(1), + vec![DataValue::Null, DataValue::Int32(1)], + false, + ); + + assert_eq!(expr.eval::<&[DataValue]>(None)?, DataValue::Boolean(true)); + Ok(()) + } + + #[test] + fn in_eval_returns_null_when_only_null_blocks_non_match() -> Result<(), DatabaseError> { + let expr = const_in( + DataValue::Int32(2), + vec![DataValue::Null, DataValue::Int32(1)], + false, + ); + + assert_eq!(expr.eval::<&[DataValue]>(None)?, DataValue::Null); + Ok(()) + } + + #[test] + fn not_in_eval_matches_even_if_null_appears_first() -> Result<(), DatabaseError> { + let expr = const_in( + DataValue::Int32(1), + vec![DataValue::Null, DataValue::Int32(1)], + true, + ); + + assert_eq!(expr.eval::<&[DataValue]>(None)?, DataValue::Boolean(false)); + Ok(()) + } +} diff --git a/src/planner/operator/mark_apply.rs b/src/planner/operator/mark_apply.rs index 9dbad0ac..0b60452b 100644 --- a/src/planner/operator/mark_apply.rs +++ b/src/planner/operator/mark_apply.rs @@ -23,6 +23,7 @@ use std::fmt::Formatter; #[derive(Debug, PartialEq, Eq, Clone, Hash, ReferenceSerialization)] pub enum MarkApplyKind { Exists, + In, } #[derive(Debug, PartialEq, Eq, Clone, Hash, ReferenceSerialization)] @@ -56,6 +57,29 @@ impl MarkApplyOperator { ) } + pub fn new_in(output_column: ColumnRef, predicates: Vec) -> Self { + Self { + kind: MarkApplyKind::In, + predicates, + output_column, + } + } + + pub fn build_in( + left: LogicalPlan, + right: LogicalPlan, + output_column: ColumnRef, + predicates: Vec, + ) -> LogicalPlan { + LogicalPlan::new( + Operator::MarkApply(MarkApplyOperator::new_in(output_column, predicates)), + Childrens::Twins { + left: Box::new(left), + right: Box::new(right), + }, + ) + } + pub fn predicates(&self) -> &[ScalarExpression] { &self.predicates } @@ -73,6 +97,7 @@ impl fmt::Display for MarkApplyOperator { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match self.kind { MarkApplyKind::Exists => write!(f, "MarkExistsApply"), + MarkApplyKind::In => write!(f, "MarkInApply"), } } } diff --git a/tests/slt/filter.slt b/tests/slt/filter.slt index 9e41adc3..142037d0 100644 --- a/tests/slt/filter.slt +++ b/tests/slt/filter.slt @@ -148,6 +148,11 @@ select * from t1 where id in (1, null) ---- 1 KipDB +query IT +select * from t1 where id in (null, 1) +---- +1 KipDB + query IT select * from t1 where null in (1, 2) ---- @@ -160,6 +165,10 @@ query IT select * from t1 where id not in (1, null) ---- +query IT +select * from t1 where id not in (null, 1) +---- + query IT select * from t1 where null not in (1, 2) ---- diff --git a/tests/slt/subquery.slt b/tests/slt/subquery.slt index f405b893..40d8057b 100644 --- a/tests/slt/subquery.slt +++ b/tests/slt/subquery.slt @@ -58,6 +58,12 @@ select * from t1 where a in (select 1) and b = 4 ---- 3 1 4 +query I rowsort +select id from t1 where id in (select id from t1 where id in (1, 3)); +---- +1 +3 + query III select * from t1 where a not in (select 1) ---- @@ -72,6 +78,60 @@ select * from t1 where a not in (select 1) and b = 3 statement ok drop table t1; +statement ok +create table in_null_outer(id int primary key, a int); + +statement ok +create table in_null_inner(id int primary key, v int); + +statement ok +insert into in_null_outer values (0, 1), (1, 2); + +statement ok +insert into in_null_inner values (0, 1), (1, null); + +query I rowsort +select id from in_null_outer where a in (select v from in_null_inner); +---- +0 + +query I rowsort +select id from in_null_outer where a not in (select v from in_null_inner); +---- + +statement ok +drop table in_null_outer; + +statement ok +drop table in_null_inner; + +statement ok +create table in_corr_null_outer(id int primary key, a int, b int); + +statement ok +create table in_corr_null_inner(id int primary key, v int, flag int); + +statement ok +insert into in_corr_null_outer values (0, 1, 1), (1, 2, 1); + +statement ok +insert into in_corr_null_inner values (0, 1, null); + +query I rowsort +select id from in_corr_null_outer +where a not in ( + select v from in_corr_null_inner where flag = b +); +---- +0 +1 + +statement ok +drop table in_corr_null_outer; + +statement ok +drop table in_corr_null_inner; + # https://github.com/KipData/KiteSQL/issues/169 statement ok create table t2(id int primary key, a int not null, b int not null); From 098070fc0ce3b8aca9fa021f45573b37e1e9baa5 Mon Sep 17 00:00:00 2001 From: kould Date: Fri, 3 Apr 2026 00:20:20 +0800 Subject: [PATCH 05/10] refactor: remove semi/anti join support in favor of apply-based subqueries --- src/binder/select.rs | 12 +- src/execution/dql/join/hash/left_anti_join.rs | 86 ------------ src/execution/dql/join/hash/left_semi_join.rs | 96 ------------- src/execution/dql/join/hash/mod.rs | 18 --- src/execution/dql/join/hash_join.rs | 54 -------- src/execution/dql/join/mod.rs | 2 +- src/execution/dql/join/nested_loop_join.rs | 124 +---------------- .../rule/normalization/column_pruning.rs | 18 +-- .../rule/normalization/pushdown_limit.rs | 2 +- .../rule/normalization/pushdown_predicates.rs | 131 +----------------- src/planner/mod.rs | 16 +-- src/planner/operator/join.rs | 4 - 12 files changed, 24 insertions(+), 539 deletions(-) delete mode 100644 src/execution/dql/join/hash/left_anti_join.rs delete mode 100644 src/execution/dql/join/hash/left_semi_join.rs diff --git a/src/binder/select.rs b/src/binder/select.rs index 61007bd7..56c150ac 100644 --- a/src/binder/select.rs +++ b/src/binder/select.rs @@ -1253,14 +1253,12 @@ impl<'a: 'b, 'b, T: Transaction, A: AsRef<[(&'static str, DataValue)]>> Binder<' (JoinType::RightOuter, Some(constraint)) } JoinOperator::FullOuter(constraint) => (JoinType::Full, Some(constraint)), - JoinOperator::Semi(constraint) | JoinOperator::LeftSemi(constraint) => { - (JoinType::LeftSemi, Some(constraint)) - } - JoinOperator::Anti(constraint) | JoinOperator::LeftAnti(constraint) => { - (JoinType::LeftAnti, Some(constraint)) - } JoinOperator::CrossJoin(constraint) => (JoinType::Cross, Some(constraint)), - JoinOperator::RightSemi(_) + JoinOperator::Semi(_) + | JoinOperator::LeftSemi(_) + | JoinOperator::Anti(_) + | JoinOperator::LeftAnti(_) + | JoinOperator::RightSemi(_) | JoinOperator::RightAnti(_) | JoinOperator::CrossApply | JoinOperator::OuterApply diff --git a/src/execution/dql/join/hash/left_anti_join.rs b/src/execution/dql/join/hash/left_anti_join.rs deleted file mode 100644 index 6d14d025..00000000 --- a/src/execution/dql/join/hash/left_anti_join.rs +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2024 KipData/KiteSQL -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use crate::errors::DatabaseError; -use crate::execution::dql::join::hash::left_semi_join::LeftSemiJoinState; -use crate::execution::dql::join::hash::{ - filter, FilterArgs, JoinProbeState, LeftDropState, LeftDropTuples, ProbeState, -}; -use crate::types::tuple::Tuple; -use crate::types::value::DataValue; -use fixedbitset::FixedBitSet; - -pub(crate) struct LeftAntiJoinState { - pub(crate) right_schema_len: usize, - pub(crate) inner: LeftSemiJoinState, -} - -impl JoinProbeState for LeftAntiJoinState { - fn probe_next( - &mut self, - probe_state: &mut ProbeState, - build_state: Option<&mut crate::execution::dql::join::hash_join::BuildState>, - filter_args: Option<&FilterArgs>, - ) -> Result, DatabaseError> { - self.inner.probe_next(probe_state, build_state, filter_args) - } - - fn left_drop_next( - &mut self, - left_drop_state: &mut LeftDropState, - filter_args: Option<&FilterArgs>, - ) -> Result, DatabaseError> { - let bits: &FixedBitSet = &self.inner.bits; - let right_schema_len = self.right_schema_len; - - loop { - if let Some(LeftDropTuples { - tuples, has_filted, .. - }) = left_drop_state.current.as_mut() - { - for (i, tuple) in tuples.by_ref() { - if bits.contains(i) && *has_filted { - continue; - } - if let Some(filter_args) = filter_args { - let full_values = Vec::from_iter( - tuple - .values - .iter() - .cloned() - .chain((0..right_schema_len).map(|_| DataValue::Null)), - ); - if !filter(&full_values.as_slice(), filter_args)? { - continue; - } - } - return Ok(Some(tuple)); - } - left_drop_state.current = None; - } - - let Some((_, state)) = left_drop_state.states.next() else { - return Ok(None); - }; - - if state.is_used { - continue; - } - left_drop_state.current = Some(LeftDropTuples { - tuples: state.tuples.into_iter(), - has_filted: state.has_filted, - }); - } - } -} diff --git a/src/execution/dql/join/hash/left_semi_join.rs b/src/execution/dql/join/hash/left_semi_join.rs deleted file mode 100644 index 4a0432c3..00000000 --- a/src/execution/dql/join/hash/left_semi_join.rs +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright 2024 KipData/KiteSQL -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use crate::errors::DatabaseError; -use crate::execution::dql::join::hash::{ - filter, FilterArgs, JoinProbeState, LeftDropState, LeftDropTuples, ProbeState, -}; -use crate::execution::dql::join::hash_join::BuildState; -use crate::types::tuple::{SplitTupleRef, Tuple}; -use fixedbitset::FixedBitSet; - -pub(crate) struct LeftSemiJoinState { - pub(crate) bits: FixedBitSet, -} - -impl JoinProbeState for LeftSemiJoinState { - fn probe_next( - &mut self, - probe_state: &mut ProbeState, - build_state: Option<&mut BuildState>, - filter_args: Option<&FilterArgs>, - ) -> Result, DatabaseError> { - if probe_state.is_keys_has_null { - probe_state.finished = true; - return Ok(None); - } - - let Some(build_state) = build_state else { - probe_state.finished = true; - return Ok(None); - }; - - while probe_state.index < build_state.tuples.len() { - let (i, Tuple { values, .. }) = &build_state.tuples[probe_state.index]; - probe_state.index += 1; - - if let Some(filter_args) = filter_args { - let full_values = - SplitTupleRef::from_slices(values, &probe_state.probe_tuple.values); - if !filter(&full_values, filter_args)? { - probe_state.has_filtered = true; - self.bits.set(*i, true); - } - } - } - build_state.is_used = true; - build_state.has_filted = probe_state.has_filtered; - probe_state.finished = true; - - Ok(None) - } - - fn left_drop_next( - &mut self, - left_drop_state: &mut LeftDropState, - _filter_args: Option<&FilterArgs>, - ) -> Result, DatabaseError> { - loop { - if let Some(LeftDropTuples { - tuples, has_filted, .. - }) = left_drop_state.current.as_mut() - { - for (i, tuple) in tuples.by_ref() { - if self.bits.contains(i) && *has_filted { - continue; - } - return Ok(Some(tuple)); - } - left_drop_state.current = None; - } - - let Some((_, state)) = left_drop_state.states.next() else { - return Ok(None); - }; - - if !state.is_used { - continue; - } - left_drop_state.current = Some(LeftDropTuples { - tuples: state.tuples.into_iter(), - has_filted: state.has_filted, - }); - } - } -} diff --git a/src/execution/dql/join/hash/mod.rs b/src/execution/dql/join/hash/mod.rs index 644ac7e8..f139ad64 100644 --- a/src/execution/dql/join/hash/mod.rs +++ b/src/execution/dql/join/hash/mod.rs @@ -14,17 +14,13 @@ pub(crate) mod full_join; pub(crate) mod inner_join; -pub(crate) mod left_anti_join; pub(crate) mod left_join; -pub(crate) mod left_semi_join; pub(crate) mod right_join; use crate::errors::DatabaseError; use crate::execution::dql::join::hash::full_join::FullJoinState; use crate::execution::dql::join::hash::inner_join::InnerJoinState; -use crate::execution::dql::join::hash::left_anti_join::LeftAntiJoinState; use crate::execution::dql::join::hash::left_join::LeftJoinState; -use crate::execution::dql::join::hash::left_semi_join::LeftSemiJoinState; use crate::execution::dql::join::hash::right_join::RightJoinState; use crate::execution::dql::join::hash_join::BuildState; use crate::execution::dql::sort::BumpVec; @@ -80,8 +76,6 @@ pub(crate) enum JoinProbeStateImpl { Left(LeftJoinState), Right(RightJoinState), Full(FullJoinState), - LeftSemi(LeftSemiJoinState), - LeftAnti(LeftAntiJoinState), } impl JoinProbeState for JoinProbeStateImpl { @@ -104,12 +98,6 @@ impl JoinProbeState for JoinProbeStateImpl { JoinProbeStateImpl::Full(state) => { state.probe_next(probe_state, build_state, filter_args) } - JoinProbeStateImpl::LeftSemi(state) => { - state.probe_next(probe_state, build_state, filter_args) - } - JoinProbeStateImpl::LeftAnti(state) => { - state.probe_next(probe_state, build_state, filter_args) - } } } @@ -123,12 +111,6 @@ impl JoinProbeState for JoinProbeStateImpl { JoinProbeStateImpl::Left(state) => state.left_drop_next(left_drop_state, filter_args), JoinProbeStateImpl::Right(state) => state.left_drop_next(left_drop_state, filter_args), JoinProbeStateImpl::Full(state) => state.left_drop_next(left_drop_state, filter_args), - JoinProbeStateImpl::LeftSemi(state) => { - state.left_drop_next(left_drop_state, filter_args) - } - JoinProbeStateImpl::LeftAnti(state) => { - state.left_drop_next(left_drop_state, filter_args) - } } } } diff --git a/src/execution/dql/join/hash_join.rs b/src/execution/dql/join/hash_join.rs index 5a2a6f29..ab0a43db 100644 --- a/src/execution/dql/join/hash_join.rs +++ b/src/execution/dql/join/hash_join.rs @@ -16,9 +16,7 @@ use crate::catalog::ColumnRef; use crate::errors::DatabaseError; use crate::execution::dql::join::hash::full_join::FullJoinState; use crate::execution::dql::join::hash::inner_join::InnerJoinState; -use crate::execution::dql::join::hash::left_anti_join::LeftAntiJoinState; use crate::execution::dql::join::hash::left_join::LeftJoinState; -use crate::execution::dql::join::hash::left_semi_join::LeftSemiJoinState; use crate::execution::dql::join::hash::right_join::RightJoinState; use crate::execution::dql::join::hash::{ FilterArgs, JoinProbeState, JoinProbeStateImpl, LeftDropState, ProbeState, @@ -241,15 +239,6 @@ impl HashJoin { right_schema_len, bits: FixedBitSet::with_capacity(build_count), }), - JoinType::LeftSemi => JoinProbeStateImpl::LeftSemi(LeftSemiJoinState { - bits: FixedBitSet::with_capacity(build_count), - }), - JoinType::LeftAnti => JoinProbeStateImpl::LeftAnti(LeftAntiJoinState { - right_schema_len, - inner: LeftSemiJoinState { - bits: FixedBitSet::with_capacity(build_count), - }, - }), JoinType::RightOuter => JoinProbeStateImpl::Right(RightJoinState { left_schema_len }), JoinType::Full => JoinProbeStateImpl::Full(FullJoinState { left_schema_len, @@ -433,12 +422,10 @@ mod test { use crate::planner::operator::Operator; use crate::planner::{Childrens, LogicalPlan}; use crate::storage::rocksdb::{RocksStorage, RocksTransaction}; - use crate::storage::table_codec::BumpBytes; use crate::storage::Storage; use crate::types::value::DataValue; use crate::types::LogicalType; use crate::utils::lru::SharedLruCache; - use bumpalo::Bump; use std::hash::RandomState; use std::sync::Arc; use tempfile::TempDir; @@ -649,47 +636,6 @@ mod test { build_integers(vec![Some(3), Some(5), Some(7), None, None, None]) ); } - { - let mut executor = HashJoin::from((op.clone(), left.clone(), right.clone())); - executor.ty = JoinType::LeftSemi; - let mut tuples = try_collect(crate::execution::execute( - executor, - (&table_cache, &view_cache, &meta_cache), - &mut transaction, - ))?; - - let arena = Bump::new(); - assert_eq!(tuples.len(), 2); - tuples.sort_by_key(|tuple| { - let mut bytes = BumpBytes::new_in(&arena); - tuple.values[0].memcomparable_encode(&mut bytes).unwrap(); - bytes - }); - - assert_eq!( - tuples[0].values, - build_integers(vec![Some(0), Some(2), Some(4)]) - ); - assert_eq!( - tuples[1].values, - build_integers(vec![Some(1), Some(3), Some(5)]) - ); - } - { - let mut executor = HashJoin::from((op, left, right)); - executor.ty = JoinType::LeftAnti; - let tuples = try_collect(crate::execution::execute( - executor, - (&table_cache, &view_cache, &meta_cache), - &mut transaction, - ))?; - - assert_eq!(tuples.len(), 1); - assert_eq!( - tuples[0].values, - build_integers(vec![Some(3), Some(5), Some(7)]) - ); - } Ok(()) } diff --git a/src/execution/dql/join/mod.rs b/src/execution/dql/join/mod.rs index 2ae7ffaa..d15176f2 100644 --- a/src/execution/dql/join/mod.rs +++ b/src/execution/dql/join/mod.rs @@ -21,7 +21,7 @@ pub(crate) mod nested_loop_join; pub fn joins_nullable(join_type: &JoinType) -> (bool, bool) { match join_type { JoinType::Inner => (false, false), - JoinType::LeftOuter | JoinType::LeftSemi | JoinType::LeftAnti => (false, true), + JoinType::LeftOuter => (false, true), JoinType::RightOuter => (true, false), JoinType::Full => (true, true), JoinType::Cross => (true, true), diff --git a/src/execution/dql/join/nested_loop_join.rs b/src/execution/dql/join/nested_loop_join.rs index cdde266a..6aaa34f0 100644 --- a/src/execution/dql/join/nested_loop_join.rs +++ b/src/execution/dql/join/nested_loop_join.rs @@ -13,7 +13,7 @@ // limitations under the License. //! Defines the nested loop join executor, it supports [`JoinType::Inner`], [`JoinType::LeftOuter`], -//! [`JoinType::LeftSemi`], [`JoinType::LeftAnti`], [`JoinType::RightOuter`], [`JoinType::Cross`], [`JoinType::Full`]. +//! [`JoinType::RightOuter`], [`JoinType::Cross`], [`JoinType::Full`]. use super::joins_nullable; use crate::catalog::ColumnRef; @@ -81,7 +81,7 @@ impl EqualCondition { /// One input will be selected to be the inner table and the other will be the outer /// | JoinType | Inner-table | Outer-table | /// |--------------------------------|----------------|----------------| -/// | Inner/Left/LeftSemi/LeftAnti | right | left | +/// | Inner/Left | right | left | /// |--------------------------------|----------------|----------------| /// | Right/RightSemi/RightAnti/Full | left | right | /// |--------------------------------|----------------|----------------| @@ -286,8 +286,6 @@ impl NestedLoopJoin { match &value { DataValue::Boolean(true) => { let tuple = match self.ty { - JoinType::LeftAnti => None, - JoinType::LeftSemi if active_left.has_matched => None, JoinType::RightOuter => Self::emit_tuple( &right_tuple, &active_left.left_tuple, @@ -320,21 +318,13 @@ impl NestedLoopJoin { } } - self.state = if matches!(self.ty, JoinType::LeftSemi) { - NestedLoopJoinState::PullLeft { right_bitmap } - } else { - NestedLoopJoinState::ScanRight { - active_left, - right_bitmap, - } + self.state = NestedLoopJoinState::ScanRight { + active_left, + right_bitmap, }; arena.produce_tuple(tuple); return Ok(()); } - - if matches!(self.ty, JoinType::LeftAnti) && active_left.has_matched { - break; - } } if matches!(self.ty, JoinType::Full) { @@ -352,13 +342,7 @@ impl NestedLoopJoin { } let right_schema_len = self.eq_cond.right_schema.len(); let tuple = match self.ty { - JoinType::LeftAnti if !active_left.has_matched => { - Some(active_left.left_tuple) - } - JoinType::LeftOuter - | JoinType::LeftSemi - | JoinType::RightOuter - | JoinType::Full + JoinType::LeftOuter | JoinType::RightOuter | JoinType::Full if !active_left.has_matched => { let right_tuple = @@ -445,7 +429,7 @@ impl NestedLoopJoin { .chain(right_tuple.values.clone()) .collect_vec(); match ty { - JoinType::Inner | JoinType::Cross | JoinType::LeftSemi if !is_matched => values.clear(), + JoinType::Inner | JoinType::Cross if !is_matched => values.clear(), JoinType::LeftOuter | JoinType::Full if !is_matched => { values .iter_mut() @@ -457,14 +441,6 @@ impl NestedLoopJoin { values[i] = DataValue::Null; }); } - JoinType::LeftSemi => values.truncate(left_len), - JoinType::LeftAnti => { - if is_matched { - values.clear(); - } else { - values.truncate(left_len); - } - } _ => (), }; @@ -961,92 +937,6 @@ mod test { Ok(()) } - #[test] - fn test_nested_left_semi_join() -> Result<(), DatabaseError> { - let temp_dir = TempDir::new().expect("unable to create temporary working directory"); - let storage = RocksStorage::new(temp_dir.path())?; - let mut transaction = storage.transaction()?; - let meta_cache = Arc::new(SharedLruCache::new(4, 1, RandomState::new())?); - let view_cache = Arc::new(SharedLruCache::new(4, 1, RandomState::new())?); - let table_cache = Arc::new(SharedLruCache::new(4, 1, RandomState::new())?); - let (keys, left, right, filter) = build_join_values(true); - let plan = LogicalPlan::new( - Operator::Join(JoinOperator { - on: JoinCondition::On { - on: keys, - filter: Some(filter), - }, - join_type: JoinType::LeftSemi, - }), - Childrens::Twins { - left: Box::new(left), - right: Box::new(right), - }, - ); - let plan = optimize_exprs(plan)?; - let Operator::Join(op) = plan.operator else { - unreachable!() - }; - let (left, right) = plan.childrens.pop_twins(); - let executor = crate::execution::execute( - NestedLoopJoin::from((op, left, right)), - (&table_cache, &view_cache, &meta_cache), - &mut transaction, - ); - let tuples = try_collect(executor)?; - - let mut expected_set = HashSet::with_capacity(1); - expected_set.insert(build_integers(vec![Some(1), Some(2), Some(5)])); - - valid_result(&mut expected_set, &tuples); - - Ok(()) - } - - #[test] - fn test_nested_left_anti_join() -> Result<(), DatabaseError> { - let temp_dir = TempDir::new().expect("unable to create temporary working directory"); - let storage = RocksStorage::new(temp_dir.path())?; - let mut transaction = storage.transaction()?; - let meta_cache = Arc::new(SharedLruCache::new(4, 1, RandomState::new())?); - let view_cache = Arc::new(SharedLruCache::new(4, 1, RandomState::new())?); - let table_cache = Arc::new(SharedLruCache::new(4, 1, RandomState::new())?); - let (keys, left, right, filter) = build_join_values(true); - let plan = LogicalPlan::new( - Operator::Join(JoinOperator { - on: JoinCondition::On { - on: keys, - filter: Some(filter), - }, - join_type: JoinType::LeftAnti, - }), - Childrens::Twins { - left: Box::new(left), - right: Box::new(right), - }, - ); - let plan = optimize_exprs(plan)?; - let Operator::Join(op) = plan.operator else { - unreachable!() - }; - let (left, right) = plan.childrens.pop_twins(); - let executor = crate::execution::execute( - NestedLoopJoin::from((op, left, right)), - (&table_cache, &view_cache, &meta_cache), - &mut transaction, - ); - let tuples = try_collect(executor)?; - - let mut expected_set = HashSet::with_capacity(3); - expected_set.insert(build_integers(vec![Some(0), Some(2), Some(4)])); - expected_set.insert(build_integers(vec![Some(1), Some(3), Some(5)])); - expected_set.insert(build_integers(vec![Some(3), Some(5), Some(7)])); - - valid_result(&mut expected_set, &tuples); - - Ok(()) - } - #[test] fn test_nested_right_out_join() -> Result<(), DatabaseError> { let temp_dir = TempDir::new().expect("unable to create temporary working directory"); diff --git a/src/optimizer/rule/normalization/column_pruning.rs b/src/optimizer/rule/normalization/column_pruning.rs index 6ae67b59..186eee86 100644 --- a/src/optimizer/rule/normalization/column_pruning.rs +++ b/src/optimizer/rule/normalization/column_pruning.rs @@ -20,7 +20,6 @@ use crate::expression::{HasCountStar, ScalarExpression}; use crate::optimizer::core::rule::NormalizationRule; use crate::optimizer::rule::normalization::{remap_expr_positions, remap_exprs_positions}; use crate::planner::operator::join::JoinCondition; -use crate::planner::operator::join::JoinType; use crate::planner::operator::Operator; use crate::planner::{Childrens, LogicalPlan}; use crate::types::value::{DataValue, Utf8Type}; @@ -554,17 +553,12 @@ impl ColumnPruning { } JoinCondition::None => {} } - if !matches!(op.join_type, JoinType::LeftSemi | JoinType::LeftAnti) { - output_removed_positions = Self::merge_removed_positions( - &left_removed_positions, - &right_removed_positions, - old_left_outputs_len, - arena, - ); - } else { - output_removed_positions = - Self::copy_removed_positions(&left_removed_positions, arena); - } + output_removed_positions = Self::merge_removed_positions( + &left_removed_positions, + &right_removed_positions, + old_left_outputs_len, + arena, + ); } else if let Operator::MarkApply(op) = operator { let removed_positions = Self::merge_removed_positions( &left_removed_positions, diff --git a/src/optimizer/rule/normalization/pushdown_limit.rs b/src/optimizer/rule/normalization/pushdown_limit.rs index 7b5507b1..bbdb0c8d 100644 --- a/src/optimizer/rule/normalization/pushdown_limit.rs +++ b/src/optimizer/rule/normalization/pushdown_limit.rs @@ -73,7 +73,7 @@ impl NormalizationRule for PushLimitThroughJoin { if let Operator::Join(join_op) = &child.operator { let mut applied = false; match join_op.join_type { - JoinType::LeftOuter | JoinType::LeftSemi | JoinType::LeftAnti => { + JoinType::LeftOuter => { applied |= wrap_child_with(child, 0, Operator::Limit(limit_op.clone())); } JoinType::RightOuter => { diff --git a/src/optimizer/rule/normalization/pushdown_predicates.rs b/src/optimizer/rule/normalization/pushdown_predicates.rs index aa9e7b09..2d552794 100644 --- a/src/optimizer/rule/normalization/pushdown_predicates.rs +++ b/src/optimizer/rule/normalization/pushdown_predicates.rs @@ -110,11 +110,7 @@ impl NormalizationRule for PushPredicateThroughJoin { if !matches!( join_op.join_type, - JoinType::Inner - | JoinType::LeftOuter - | JoinType::LeftSemi - | JoinType::LeftAnti - | JoinType::RightOuter + JoinType::Inner | JoinType::LeftOuter | JoinType::RightOuter ) { return Ok(false); } @@ -148,7 +144,7 @@ impl NormalizationRule for PushPredicateThroughJoin { common_filters } - JoinType::LeftOuter | JoinType::LeftSemi | JoinType::LeftAnti => { + JoinType::LeftOuter => { if let Some(left_filter_op) = reduce_filters(left_filters, filter_op.having) { new_ops.0 = Some(Operator::Filter(left_filter_op)); } @@ -362,11 +358,7 @@ impl NormalizationRule for PushJoinPredicateIntoScan { }; if !matches!( join_op.join_type, - JoinType::Inner - | JoinType::LeftOuter - | JoinType::LeftSemi - | JoinType::LeftAnti - | JoinType::RightOuter + JoinType::Inner | JoinType::LeftOuter | JoinType::RightOuter ) { return Ok(false); } @@ -399,8 +391,6 @@ impl NormalizationRule for PushJoinPredicateIntoScan { JoinType::Inner => (true, true), JoinType::LeftOuter => (false, true), JoinType::RightOuter => (true, false), - JoinType::LeftSemi => (true, false), - JoinType::LeftAnti => (false, false), _ => (false, false), }; @@ -503,27 +493,6 @@ mod tests { .find_best::(None) } - fn with_join_type(mut plan: LogicalPlan, join_type: JoinType) -> LogicalPlan { - fn visit(plan: &mut LogicalPlan, join_type: JoinType) -> bool { - if let Operator::Join(join_op) = &mut plan.operator { - join_op.join_type = join_type; - return true; - } - match plan.childrens.as_mut() { - Childrens::Only(child) => visit(child, join_type), - Childrens::Twins { left, right } => { - visit(left, join_type) || visit(right, join_type) - } - Childrens::None => false, - } - } - assert!( - visit(&mut plan, join_type), - "expected plan to contain a join" - ); - plan - } - #[test] fn test_push_predicate_into_scan() -> Result<(), DatabaseError> { let table_state = build_t1_table()?; @@ -1052,98 +1021,4 @@ mod tests { Ok(()) } - - #[test] - fn test_push_join_predicate_left_semi_keeps_right_filter() -> Result<(), DatabaseError> { - let table_state = build_t1_table()?; - let plan = - table_state.plan("select * from t1 inner join t2 on t1.c1 = t2.c3 and t2.c3 < 2")?; - let plan = with_join_type(plan, JoinType::LeftSemi); - - let mut best_plan = apply_pipeline( - plan, - HepOptimizerPipeline::builder().before_batch( - "push_join_predicate_into_scan".to_string(), - HepBatchStrategy::once_topdown(), - vec![NormalizationRuleImpl::PushJoinPredicateIntoScan], - ), - )?; - - if matches!(best_plan.operator, Operator::Project(_)) { - best_plan = best_plan.childrens.pop_only(); - } - - let join_plan = best_plan; - { - let join_op = match &join_plan.operator { - Operator::Join(op) => op, - _ => unreachable!("expected join root"), - }; - - assert!(matches!(join_op.join_type, JoinType::LeftSemi)); - match &join_op.on { - JoinCondition::On { filter, .. } => assert!( - filter.is_some(), - "semi join should keep right-side predicates in the join filter" - ), - JoinCondition::None => unreachable!("expected join condition"), - } - } - let (_left_child, right_child) = join_plan.childrens.pop_twins(); - assert!( - !matches!(right_child.operator, Operator::Filter(_)), - "right child should not get a pushed-down filter for semi join" - ); - - Ok(()) - } - - #[test] - fn test_push_join_predicate_left_anti_keeps_filters() -> Result<(), DatabaseError> { - let table_state = build_t1_table()?; - let plan = table_state - .plan("select * from t1 inner join t2 on t1.c1 = t2.c3 and t1.c1 > 1 and t2.c3 < 2")?; - let plan = with_join_type(plan, JoinType::LeftAnti); - - let mut best_plan = apply_pipeline( - plan, - HepOptimizerPipeline::builder().before_batch( - "push_join_predicate_into_scan".to_string(), - HepBatchStrategy::once_topdown(), - vec![NormalizationRuleImpl::PushJoinPredicateIntoScan], - ), - )?; - - if matches!(best_plan.operator, Operator::Project(_)) { - best_plan = best_plan.childrens.pop_only(); - } - - let join_plan = best_plan; - { - let join_op = match &join_plan.operator { - Operator::Join(op) => op, - _ => unreachable!("expected join root"), - }; - assert!(matches!(join_op.join_type, JoinType::LeftAnti)); - - match &join_op.on { - JoinCondition::On { filter, .. } => { - assert!(filter.is_some(), "left anti join should keep ON predicates") - } - JoinCondition::None => unreachable!("expected join condition"), - } - } - - let (left_child, right_child) = join_plan.childrens.pop_twins(); - assert!( - !matches!(left_child.operator, Operator::Filter(_)), - "left anti join should not push predicates to the left child" - ); - assert!( - !matches!(right_child.operator, Operator::Filter(_)), - "left anti join should not push predicates to the right child" - ); - - Ok(()) - } } diff --git a/src/planner/mod.rs b/src/planner/mod.rs index ff89b9c5..e12cdaa9 100644 --- a/src/planner/mod.rs +++ b/src/planner/mod.rs @@ -16,7 +16,6 @@ pub mod operator; use crate::catalog::{ColumnCatalog, ColumnRef, TableName}; use crate::planner::operator::except::ExceptOperator; -use crate::planner::operator::join::JoinType; use crate::planner::operator::union::UnionOperator; use crate::planner::operator::values::ValuesOperator; use crate::planner::operator::{Operator, PhysicalOption}; @@ -166,7 +165,7 @@ impl LogicalPlan { | Operator::Limit(_) | Operator::TopK(_) | Operator::ScalarSubquery(_) => childrens_iter.next().unwrap().output_schema_direct(), - Operator::ScalarApply(_) => { + Operator::ScalarApply(_) | Operator::Join(_) => { let mut columns = Vec::new(); for plan in childrens_iter { @@ -195,19 +194,6 @@ impl LogicalPlan { .map(|expr| expr.output_column()) .collect_vec(), ), - Operator::Join(op) => { - if matches!(op.join_type, JoinType::LeftSemi | JoinType::LeftAnti) { - return childrens_iter.next().unwrap().output_schema_direct(); - } - let mut columns = Vec::new(); - - for plan in childrens_iter { - for column in plan.output_schema_direct().columns() { - columns.push(column.clone()); - } - } - SchemaOutput::Schema(columns) - } Operator::Project(op) => SchemaOutput::Schema( op.exprs .iter() diff --git a/src/planner/operator/join.rs b/src/planner/operator/join.rs index 176d4133..93cb3346 100644 --- a/src/planner/operator/join.rs +++ b/src/planner/operator/join.rs @@ -24,8 +24,6 @@ use std::fmt::Formatter; pub enum JoinType { Inner, LeftOuter, - LeftSemi, - LeftAnti, RightOuter, Full, Cross, @@ -76,8 +74,6 @@ impl fmt::Display for JoinType { match self { JoinType::Inner => write!(f, "Inner")?, JoinType::LeftOuter => write!(f, "LeftOuter")?, - JoinType::LeftSemi => write!(f, "LeftSemi")?, - JoinType::LeftAnti => write!(f, "LeftAnti")?, JoinType::RightOuter => write!(f, "RightOuter")?, JoinType::Full => write!(f, "Full")?, JoinType::Cross => write!(f, "Cross")?, From 54068c81ca1224b570d4bffca6a5fa90ee75e8f2 Mon Sep 17 00:00:00 2001 From: kould Date: Fri, 3 Apr 2026 19:13:07 +0800 Subject: [PATCH 06/10] feat: parameterize EXISTS/IN subqueries with runtime index probes --- src/db.rs | 354 +++++++++++++++--- src/execution/dql/index_scan.rs | 66 +++- src/execution/dql/mark_apply.rs | 352 +++++++++++++---- src/execution/mod.rs | 27 +- src/optimizer/core/rule.rs | 34 +- src/optimizer/heuristic/optimizer.rs | 85 +++-- .../rule/implementation/dql/table_scan.rs | 24 +- .../rule/normalization/agg_elimination.rs | 49 ++- .../rule/normalization/column_pruning.rs | 8 +- .../rule/normalization/combine_operators.rs | 26 +- .../normalization/compilation_in_advance.rs | 8 +- .../rule/normalization/min_max_top_k.rs | 14 +- src/optimizer/rule/normalization/mod.rs | 49 ++- .../rule/normalization/parameterized_index.rs | 193 ++++++++++ .../rule/normalization/pushdown_limit.rs | 20 +- .../rule/normalization/pushdown_predicates.rs | 43 ++- .../rule/normalization/simplification.rs | 14 +- src/optimizer/rule/normalization/top_k.rs | 8 +- src/planner/operator/mark_apply.rs | 32 ++ src/planner/operator/table_scan.rs | 2 +- src/storage/mod.rs | 20 +- src/storage/rocksdb.rs | 2 +- src/types/index.rs | 26 +- tests/slt/subquery.slt | 135 +++++++ 24 files changed, 1328 insertions(+), 263 deletions(-) create mode 100644 src/optimizer/rule/normalization/parameterized_index.rs diff --git a/src/db.rs b/src/db.rs index 8ca92275..fa7bdfb0 100644 --- a/src/db.rs +++ b/src/db.rs @@ -391,6 +391,11 @@ fn default_optimizer_pipeline() -> HepOptimizerPipeline { NormalizationRuleImpl::CombineFilter, ], ) + .after_batch( + "Parameterize Mark Apply".to_string(), + HepBatchStrategy::once_topdown(), + vec![NormalizationRuleImpl::ParameterizeMarkApply], + ) .after_batch( "Expression Remapper".to_string(), HepBatchStrategy::once_topdown(), @@ -458,25 +463,19 @@ impl State { &self.view_cache } - #[allow(clippy::too_many_arguments)] - pub(crate) fn build_plan>( + fn build_plan_and_runtime_params>( &self, stmt: &Statement, params: A, - table_cache: &TableCache, - view_cache: &ViewCache, - meta_cache: &StatisticsMetaCache, transaction: &::TransactionType<'_>, - scala_functions: &ScalaFunctions, - table_functions: &TableFunctions, - ) -> Result { + ) -> Result<(LogicalPlan, usize), DatabaseError> { let mut binder = Binder::new( BinderContext::new( - table_cache, - view_cache, + self.table_cache(), + self.view_cache(), transaction, - scala_functions, - table_functions, + self.scala_functions(), + self.table_functions(), Arc::new(AtomicUsize::new(0)), ), ¶ms, @@ -490,10 +489,10 @@ impl State { /// Limit(1) /// Project(a,b) let source_plan = binder.bind(stmt)?; - let mut best_plan = self - .optimizer_pipeline - .instantiate(source_plan) - .find_best(Some(&transaction.meta_loader(meta_cache)))?; + let mut optimizer = self.optimizer_pipeline.instantiate(source_plan); + optimizer.optimize(Some(&transaction.meta_loader(self.meta_cache())))?; + let runtime_param_count = optimizer.runtime_param_count(); + let mut best_plan = optimizer.into_plan(); if let Operator::Analyze(op) = &mut best_plan.operator { if op.histogram_buckets.is_none() { @@ -501,7 +500,7 @@ impl State { } } - Ok(best_plan) + Ok((best_plan, runtime_param_count)) } fn execute<'a, 'txn, A: AsRef<[(&'static str, DataValue)]>>( @@ -513,18 +512,11 @@ impl State { where S: 'txn, { - let mut plan = self.build_plan( - stmt, - params, - self.table_cache(), - self.view_cache(), - self.meta_cache(), - transaction, - self.scala_functions(), - self.table_functions(), - )?; + let (mut plan, runtime_param_count) = + self.build_plan_and_runtime_params(stmt, params, transaction)?; let schema = plan.output_schema().clone(); let mut arena = ExecArena::default(); + arena.init_runtime_params(runtime_param_count); let root = build_write( &mut arena, plan, @@ -1173,16 +1165,10 @@ pub(crate) mod test { None, ); let source_plan = binder.bind(&stmt)?; - let best_plan = kite_sql.state.build_plan( - &stmt, - [], - kite_sql.state.table_cache(), - kite_sql.state.view_cache(), - kite_sql.state.meta_cache(), - &transaction, - kite_sql.state.scala_functions(), - kite_sql.state.table_functions(), - )?; + let (best_plan, _) = + kite_sql + .state + .build_plan_and_runtime_params(&stmt, [], &transaction)?; let join_plan = match source_plan.operator { Operator::Project(_) => source_plan.childrens.pop_only(), @@ -1266,16 +1252,10 @@ pub(crate) mod test { None, ); let source_plan = binder.bind(&stmt)?; - let best_plan = kite_sql.state.build_plan( - &stmt, - [], - kite_sql.state.table_cache(), - kite_sql.state.view_cache(), - kite_sql.state.meta_cache(), - &transaction, - kite_sql.state.scala_functions(), - kite_sql.state.table_functions(), - )?; + let (best_plan, _) = + kite_sql + .state + .build_plan_and_runtime_params(&stmt, [], &transaction)?; let join_plan = match source_plan.operator { Operator::Project(_) => source_plan.childrens.pop_only(), @@ -1373,16 +1353,10 @@ pub(crate) mod test { "SELECT o.x, t.y FROM onecolumn o INNER JOIN twocolumn t ON (o.x=t.x AND t.y=53)", )?; let transaction = kite_sql.storage.transaction()?; - let best_plan = kite_sql.state.build_plan( - &stmt, - [], - kite_sql.state.table_cache(), - kite_sql.state.view_cache(), - kite_sql.state.meta_cache(), - &transaction, - kite_sql.state.scala_functions(), - kite_sql.state.table_functions(), - )?; + let (best_plan, _) = + kite_sql + .state + .build_plan_and_runtime_params(&stmt, [], &transaction)?; let join_plan = match best_plan.operator { Operator::Project(_) => best_plan.childrens.pop_only(), Operator::Join(_) => best_plan, @@ -1516,6 +1490,272 @@ pub(crate) mod test { Ok(()) } + // FIXME: keep this as a unit test instead of SLT for now. The current + // sqllogictest runner does not reliably match the pretty-printed multi-line + // EXPLAIN output produced by correlated IN, even though the plan itself is stable. + #[test] + fn test_subquery_explain_uses_parameterized_index_for_in() -> Result<(), DatabaseError> { + let temp_dir = TempDir::new().expect("unable to create temporary working directory"); + let kite_sql = DataBaseBuilder::path(temp_dir.path()).build_rocksdb()?; + + kite_sql + .run("create table in_outer(id int primary key, a int)")? + .done()?; + kite_sql + .run("create table in_inner(id int primary key, v int)")? + .done()?; + kite_sql + .run("create table in_inner_nn(id int primary key, v int)")? + .done()?; + kite_sql + .run("create index in_inner_v_index on in_inner(v)")? + .done()?; + kite_sql + .run("create index in_inner_nn_v_index on in_inner_nn(v)")? + .done()?; + + kite_sql + .run("insert into in_outer values (0, null), (1, 1), (2, 2), (3, 3)")? + .done()?; + kite_sql + .run("insert into in_inner values (0, 2), (1, null)")? + .done()?; + kite_sql + .run("insert into in_inner_nn values (0, 2)")? + .done()?; + + let collect_plan = |sql: &str| -> Result { + let mut iter = kite_sql.run(sql)?; + let rows = iter.by_ref().collect::, _>>()?; + iter.done()?; + Ok(rows + .iter() + .filter_map(|row| match row.values.first() { + Some(DataValue::Utf8 { value, .. }) => Some(value.as_str()), + _ => None, + }) + .collect::>() + .join("\n")) + }; + let collect_ids = |sql: &str| -> Result, DatabaseError> { + let mut iter = kite_sql.run(sql)?; + let mut ids = Vec::new(); + while let Some(row) = iter.next() { + let row = row?; + ids.push(row.values[0].i32().unwrap()); + } + iter.done()?; + Ok(ids) + }; + + let assert_mark_in_uses_parameterized_index = + |sql: &str, index_name: &str| -> Result<(), DatabaseError> { + let explain_plan = collect_plan(sql)?; + assert!( + explain_plan.contains("MarkInApply"), + "unexpected explain plan: {explain_plan}" + ); + assert!( + explain_plan.contains(&format!("IndexScan By {index_name} => Probe $0")), + "unexpected explain plan: {explain_plan}" + ); + Ok(()) + }; + + assert_mark_in_uses_parameterized_index( + "explain select id from in_outer where a in (select v from in_inner where in_inner.v = in_outer.a)", + "in_inner_v_index", + )?; + assert_mark_in_uses_parameterized_index( + "explain select id from in_outer where a not in (select v from in_inner where in_inner.v = in_outer.a)", + "in_inner_v_index", + )?; + assert_mark_in_uses_parameterized_index( + "explain select id from in_outer where a in (select v from in_inner_nn where in_inner_nn.v = in_outer.a)", + "in_inner_nn_v_index", + )?; + assert_mark_in_uses_parameterized_index( + "explain select id from in_outer where a not in (select v from in_inner_nn where in_inner_nn.v = in_outer.a)", + "in_inner_nn_v_index", + )?; + + assert_eq!( + collect_ids( + "select id from in_outer where a in (select v from in_inner where in_inner.v = in_outer.a) order by id", + )?, + vec![2] + ); + assert_eq!( + collect_ids( + "select id from in_outer where a not in (select v from in_inner where in_inner.v = in_outer.a) order by id", + )?, + vec![0, 1, 3] + ); + assert_eq!( + collect_ids( + "select id from in_outer where a in (select v from in_inner_nn where in_inner_nn.v = in_outer.a) order by id", + )?, + vec![2] + ); + assert_eq!( + collect_ids( + "select id from in_outer where a not in (select v from in_inner_nn where in_inner_nn.v = in_outer.a) order by id", + )?, + vec![0, 1, 3] + ); + + kite_sql + .run("create table in_outer_flag(id int primary key, a int, b int)")? + .done()?; + kite_sql + .run("create table in_inner_flag(id int primary key, v int, flag int)")? + .done()?; + kite_sql + .run("create table in_inner_flag_nn(id int primary key, v int, flag int)")? + .done()?; + kite_sql + .run("create index in_inner_flag_v_index on in_inner_flag(v)")? + .done()?; + kite_sql + .run("create index in_inner_flag_nn_v_index on in_inner_flag_nn(v)")? + .done()?; + + kite_sql + .run("insert into in_outer_flag values (0, null, 1), (1, 1, 1), (2, 2, 1), (3, 3, 1)")? + .done()?; + kite_sql + .run("insert into in_inner_flag values (0, 2, 1), (1, null, 1)")? + .done()?; + kite_sql + .run("insert into in_inner_flag_nn values (0, 2, 1)")? + .done()?; + + assert_mark_in_uses_parameterized_index( + "explain select id from in_outer_flag where a in (select v from in_inner_flag where in_inner_flag.flag = in_outer_flag.b)", + "in_inner_flag_v_index", + )?; + assert_mark_in_uses_parameterized_index( + "explain select id from in_outer_flag where a not in (select v from in_inner_flag where in_inner_flag.flag = in_outer_flag.b)", + "in_inner_flag_v_index", + )?; + assert_mark_in_uses_parameterized_index( + "explain select id from in_outer_flag where a in (select v from in_inner_flag_nn where in_inner_flag_nn.flag = in_outer_flag.b)", + "in_inner_flag_nn_v_index", + )?; + assert_mark_in_uses_parameterized_index( + "explain select id from in_outer_flag where a not in (select v from in_inner_flag_nn where in_inner_flag_nn.flag = in_outer_flag.b)", + "in_inner_flag_nn_v_index", + )?; + + assert_eq!( + collect_ids( + "select id from in_outer_flag where a in (select v from in_inner_flag where in_inner_flag.flag = in_outer_flag.b) order by id", + )?, + vec![2] + ); + assert_eq!( + collect_ids( + "select id from in_outer_flag where a not in (select v from in_inner_flag where in_inner_flag.flag = in_outer_flag.b) order by id", + )?, + Vec::::new() + ); + assert_eq!( + collect_ids( + "select id from in_outer_flag where a in (select v from in_inner_flag_nn where in_inner_flag_nn.flag = in_outer_flag.b) order by id", + )?, + vec![2] + ); + assert_eq!( + collect_ids( + "select id from in_outer_flag where a not in (select v from in_inner_flag_nn where in_inner_flag_nn.flag = in_outer_flag.b) order by id", + )?, + vec![1, 3] + ); + + Ok(()) + } + + #[test] + fn test_subquery_explain_uses_parameterized_index_for_exists() -> Result<(), DatabaseError> { + let temp_dir = TempDir::new().expect("unable to create temporary working directory"); + let kite_sql = DataBaseBuilder::path(temp_dir.path()).build_rocksdb()?; + + kite_sql + .run("create table exists_outer(id int primary key, a int, b int)")? + .done()?; + kite_sql + .run("create table exists_inner(id int primary key, v int, flag int)")? + .done()?; + kite_sql + .run("create index exists_inner_v_index on exists_inner(v)")? + .done()?; + + kite_sql + .run("insert into exists_outer values (0, 1, 1), (1, 1, 2), (2, 2, null), (3, 3, 1)")? + .done()?; + kite_sql + .run("insert into exists_inner values (0, 1, 1), (1, 1, null), (2, 2, 1)")? + .done()?; + + let collect_plan = |sql: &str| -> Result { + let mut iter = kite_sql.run(sql)?; + let rows = iter.by_ref().collect::, _>>()?; + iter.done()?; + Ok(rows + .iter() + .filter_map(|row| match row.values.first() { + Some(DataValue::Utf8 { value, .. }) => Some(value.as_str()), + _ => None, + }) + .collect::>() + .join("\n")) + }; + let collect_ids = |sql: &str| -> Result, DatabaseError> { + let mut iter = kite_sql.run(sql)?; + let mut ids = Vec::new(); + while let Some(row) = iter.next() { + let row = row?; + ids.push(row.values[0].i32().unwrap()); + } + iter.done()?; + Ok(ids) + }; + let assert_mark_exists_uses_parameterized_index = |sql: &str| -> Result<(), DatabaseError> { + let explain_plan = collect_plan(sql)?; + assert!( + explain_plan.contains("MarkExistsApply"), + "unexpected explain plan: {explain_plan}" + ); + assert!( + explain_plan.contains("IndexScan By exists_inner_v_index => Probe $0"), + "unexpected explain plan: {explain_plan}" + ); + Ok(()) + }; + + assert_mark_exists_uses_parameterized_index( + "explain select id from exists_outer where exists (select 1 from exists_inner where exists_inner.v = exists_outer.a and exists_inner.flag = exists_outer.b)", + )?; + assert_mark_exists_uses_parameterized_index( + "explain select id from exists_outer where not exists (select 1 from exists_inner where exists_inner.v = exists_outer.a and exists_inner.flag = exists_outer.b)", + )?; + + assert_eq!( + collect_ids( + "select id from exists_outer where exists (select 1 from exists_inner where exists_inner.v = exists_outer.a and exists_inner.flag = exists_outer.b) order by id", + )?, + vec![0] + ); + assert_eq!( + collect_ids( + "select id from exists_outer where not exists (select 1 from exists_inner where exists_inner.v = exists_outer.a and exists_inner.flag = exists_outer.b) order by id", + )?, + vec![1, 2, 3] + ); + + Ok(()) + } + #[test] fn test_run_multi_statement() -> Result<(), DatabaseError> { let temp_dir = TempDir::new().expect("unable to create temporary working directory"); diff --git a/src/execution/dql/index_scan.rs b/src/execution/dql/index_scan.rs index 5b59acdf..5955be17 100644 --- a/src/execution/dql/index_scan.rs +++ b/src/execution/dql/index_scan.rs @@ -17,45 +17,58 @@ use crate::execution::{ExecArena, ExecId, ExecNode, ExecutionCaches, ExecutorNod use crate::expression::range_detacher::Range; use crate::planner::operator::table_scan::TableScanOperator; use crate::storage::{IndexIter, Iter, Transaction}; -use crate::types::index::IndexMetaRef; +use crate::types::index::{IndexLookup, IndexMetaRef, RuntimeIndexProbe}; use crate::types::serialize::TupleValueSerializableImpl; +use std::array; +use std::vec; + +enum IndexLookupRanges { + One(array::IntoIter), + Many(vec::IntoIter), +} + +impl Iterator for IndexLookupRanges { + type Item = Range; + + fn next(&mut self) -> Option { + match self { + IndexLookupRanges::One(iter) => iter.next(), + IndexLookupRanges::Many(iter) => iter.next(), + } + } +} pub(crate) struct IndexScan<'a, T: Transaction + 'a> { op: Option, index_by: IndexMetaRef, - ranges: Vec, + lookup: Option, covered_deserializers: Option>, cover_mapping: Option>, - iter: Option>, + iter: Option>, } impl<'a, T: Transaction + 'a> From<( TableScanOperator, IndexMetaRef, - Range, + IndexLookup, Option>, Option>, )> for IndexScan<'a, T> { fn from( - (op, index_by, range, covered_deserializers, cover_mapping): ( + (op, index_by, lookup, covered_deserializers, cover_mapping): ( TableScanOperator, IndexMetaRef, - Range, + IndexLookup, Option>, Option>, ), ) -> Self { - let ranges = match range { - Range::SortedRanges(ranges) => ranges, - range => vec![range], - }; - IndexScan { op: Some(op), index_by, - ranges, + lookup: Some(lookup), covered_deserializers, cover_mapping, iter: None, @@ -78,7 +91,7 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for IndexScan<'a, T> { type Input = ( TableScanOperator, IndexMetaRef, - Range, + IndexLookup, Option>, Option>, ); @@ -98,6 +111,27 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for IndexScan<'a, T> { } impl<'a, T: Transaction + 'a> IndexScan<'a, T> { + fn ranges_from_lookup(lookup: IndexLookup, arena: &ExecArena<'a, T>) -> IndexLookupRanges { + match lookup { + IndexLookup::Static(Range::SortedRanges(ranges)) => { + IndexLookupRanges::Many(ranges.into_iter()) + } + IndexLookup::Static(range) => IndexLookupRanges::One([range].into_iter()), + IndexLookup::Probe(param) => match arena.runtime_param(param) { + RuntimeIndexProbe::Eq(value) => { + IndexLookupRanges::One([Range::Eq(value.clone())].into_iter()) + } + RuntimeIndexProbe::Scope { min, max } => IndexLookupRanges::One( + [Range::Scope { + min: min.clone(), + max: max.clone(), + }] + .into_iter(), + ), + }, + } + } + pub(crate) fn next_tuple(&mut self, arena: &mut ExecArena<'a, T>) -> Result<(), DatabaseError> { if self.iter.is_none() { let Some(TableScanOperator { @@ -111,13 +145,17 @@ impl<'a, T: Transaction + 'a> IndexScan<'a, T> { arena.finish(); return Ok(()); }; + let ranges = Self::ranges_from_lookup( + self.lookup.take().expect("index scan lookup initialized"), + arena, + ); self.iter = Some(arena.transaction().read_by_index( arena.table_cache(), table_name, limit, columns, self.index_by.clone(), - std::mem::take(&mut self.ranges), + ranges, with_pk, self.covered_deserializers.take(), self.cover_mapping.take(), diff --git a/src/execution/dql/mark_apply.rs b/src/execution/dql/mark_apply.rs index 23838dc6..28e1f4d9 100644 --- a/src/execution/dql/mark_apply.rs +++ b/src/execution/dql/mark_apply.rs @@ -17,15 +17,24 @@ use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, use crate::planner::operator::mark_apply::{MarkApplyKind, MarkApplyOperator}; use crate::planner::LogicalPlan; use crate::storage::Transaction; +use crate::types::index::RuntimeIndexProbe; use crate::types::tuple::{Schema, SchemaRef, SplitTupleRef, Tuple}; use crate::types::value::DataValue; use std::mem; use std::sync::Arc; +#[derive(PartialEq, Eq)] +enum InPredicateOutcome { + Match, + Null, + Continue, +} + pub struct MarkApply { op: MarkApplyOperator, right_input_plan: LogicalPlan, left_input: ExecId, + left_schema: SchemaRef, predicate_schema: SchemaRef, left_tuple: Tuple, } @@ -39,9 +48,9 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for MarkApply { cache: ExecutionCaches<'a>, transaction: *mut T, ) -> ExecId { + let left_schema = left_input.output_schema().clone(); let predicate_schema = Arc::new( - left_input - .output_schema() + left_schema .iter() .chain(right_input.output_schema().iter()) .cloned() @@ -52,6 +61,7 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for MarkApply { op, right_input_plan: right_input, left_input, + left_schema, predicate_schema, left_tuple: Tuple::default(), })) @@ -64,8 +74,7 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for MarkApply { } self.left_tuple = mem::take(arena.result_tuple_mut()); - let right_input = self.build_right_input(arena); - let marker = self.mark_value(arena, right_input)?; + let marker = self.mark_value(arena)?; arena.produce_tuple(mem::take(&mut self.left_tuple)); arena.result_tuple_mut().values.push(marker); @@ -76,22 +85,51 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for MarkApply { impl MarkApply { fn build_right_input<'a, T: Transaction + 'a>( - &mut self, + &self, arena: &mut ExecArena<'a, T>, + param_value: Option, ) -> ExecId { + if let Some(probe) = self.op.parameterized_probe() { + let runtime_probe = match param_value { + Some(value) => Some(RuntimeIndexProbe::Eq(value)), + None if matches!(self.op.kind, MarkApplyKind::In) => { + Some(RuntimeIndexProbe::Scope { + min: std::collections::Bound::Unbounded, + max: std::collections::Bound::Unbounded, + }) + } + None => None, + }; + if let Some(runtime_probe) = runtime_probe { + arena.set_runtime_param(probe.param(), runtime_probe); + } + } + let cache = (arena.table_cache(), arena.view_cache(), arena.meta_cache()); let transaction = arena.transaction_mut() as *mut T; // Fixme: Executor reset build_read(arena, self.right_input_plan.clone(), cache, transaction) } + fn parameterized_probe_value(&self) -> Result, DatabaseError> { + self.op + .parameterized_probe() + .map(|probe| { + probe + .left_expr() + .eval(Some((&self.left_tuple, self.left_schema.as_ref()))) + }) + .transpose() + } + fn mark_value<'a, T: Transaction + 'a>( - &self, + &mut self, arena: &mut ExecArena<'a, T>, - right_input: ExecId, ) -> Result { match self.op.kind { MarkApplyKind::Exists => { + let right_input = self.build_right_input(arena, self.parameterized_probe_value()?); + while arena.next_tuple(right_input)? { let right_tuple = arena.result_tuple(); if self.exists_predicate_matched(&self.left_tuple, right_tuple)? { @@ -102,15 +140,41 @@ impl MarkApply { Ok(DataValue::Boolean(false)) } MarkApplyKind::In => { + if let Some(probe_value) = self.parameterized_probe_value()? { + if !probe_value.is_null() { + let right_input = self.build_right_input(arena, Some(probe_value)); + while arena.next_tuple(right_input)? { + let right_tuple = arena.result_tuple(); + if self.in_predicate_outcome(&self.left_tuple, right_tuple)? + == InPredicateOutcome::Match + { + return Ok(DataValue::Boolean(true)); + } + } + + let right_input = self.build_right_input(arena, Some(DataValue::Null)); + while arena.next_tuple(right_input)? { + let right_tuple = arena.result_tuple(); + if self.in_predicate_outcome(&self.left_tuple, right_tuple)? + == InPredicateOutcome::Null + { + return Ok(DataValue::Null); + } + } + + return Ok(DataValue::Boolean(false)); + } + } + + let right_input = self.build_right_input(arena, None); let mut saw_null = false; while arena.next_tuple(right_input)? { let right_tuple = arena.result_tuple(); - match self.in_predicate_value(&self.left_tuple, right_tuple)? { - Some(DataValue::Boolean(true)) => return Ok(DataValue::Boolean(true)), - Some(DataValue::Boolean(false)) | None => {} - Some(DataValue::Null) => saw_null = true, - Some(_) => return Err(DatabaseError::InvalidType), + match self.in_predicate_outcome(&self.left_tuple, right_tuple)? { + InPredicateOutcome::Match => return Ok(DataValue::Boolean(true)), + InPredicateOutcome::Null => saw_null = true, + InPredicateOutcome::Continue => {} } } @@ -141,6 +205,19 @@ impl MarkApply { Ok(true) } + fn in_predicate_outcome( + &self, + left_tuple: &Tuple, + right_tuple: &Tuple, + ) -> Result { + match self.in_predicate_value(left_tuple, right_tuple)? { + Some(DataValue::Boolean(true)) => Ok(InPredicateOutcome::Match), + Some(DataValue::Null) => Ok(InPredicateOutcome::Null), + Some(DataValue::Boolean(false)) | None => Ok(InPredicateOutcome::Continue), + Some(_) => Err(DatabaseError::InvalidType), + } + } + fn in_predicate_value( &self, left_tuple: &Tuple, @@ -172,16 +249,17 @@ impl MarkApply { mod tests { use super::*; use crate::catalog::{ColumnCatalog, ColumnDesc, ColumnRef}; - use crate::execution::{execute_input, try_collect}; + use crate::execution::{execute_input, try_collect, ExecArena}; use crate::expression::{BinaryOperator, ScalarExpression}; - use crate::planner::operator::mark_apply::MarkApplyOperator; + use crate::planner::operator::mark_apply::ParameterizedMarkProbe; use crate::planner::operator::values::ValuesOperator; use crate::planner::operator::Operator; use crate::planner::{Childrens, LogicalPlan}; use crate::storage::rocksdb::RocksStorage; use crate::storage::{StatisticsMetaCache, Storage, TableCache, ViewCache}; use crate::types::evaluator::EvaluatorFactory; - use crate::types::value::DataValue; + use crate::types::index::RuntimeIndexProbe; + use crate::types::tuple::Tuple; use crate::types::LogicalType; use crate::utils::lru::SharedLruCache; use std::hash::RandomState; @@ -243,6 +321,24 @@ mod tests { )) } + fn build_equality_predicate( + left_column: ColumnRef, + left_position: usize, + right_column: ColumnRef, + right_position: usize, + ) -> Result { + Ok(ScalarExpression::Binary { + op: BinaryOperator::Eq, + left_expr: Box::new(ScalarExpression::column_expr(left_column, left_position)), + right_expr: Box::new(ScalarExpression::column_expr(right_column, right_position)), + evaluator: Some(EvaluatorFactory::binary_create( + LogicalType::Integer, + BinaryOperator::Eq, + )?), + ty: LogicalType::Boolean, + }) + } + #[test] fn mark_exists_apply_appends_boolean_match_column() -> Result<(), DatabaseError> { let mut left = build_values( @@ -256,16 +352,7 @@ mod tests { let left_column = left.output_schema()[0].clone(); let right_column = right.output_schema()[0].clone(); - let predicate = ScalarExpression::Binary { - op: BinaryOperator::Eq, - left_expr: Box::new(ScalarExpression::column_expr(left_column, 0)), - right_expr: Box::new(ScalarExpression::column_expr(right_column, 1)), - evaluator: Some(EvaluatorFactory::binary_create( - LogicalType::Integer, - BinaryOperator::Eq, - )?), - ty: LogicalType::Boolean, - }; + let predicate = build_equality_predicate(left_column, 0, right_column, 1)?; let (table_cache, view_cache, meta_cache, _temp_dir, storage) = build_test_storage()?; let mut transaction = storage.transaction()?; @@ -308,16 +395,7 @@ mod tests { let left_column = left.output_schema()[0].clone(); let right_column = right.output_schema()[0].clone(); - let predicate = ScalarExpression::Binary { - op: BinaryOperator::Eq, - left_expr: Box::new(ScalarExpression::column_expr(left_column, 0)), - right_expr: Box::new(ScalarExpression::column_expr(right_column, 1)), - evaluator: Some(EvaluatorFactory::binary_create( - LogicalType::Integer, - BinaryOperator::Eq, - )?), - ty: LogicalType::Boolean, - }; + let predicate = build_equality_predicate(left_column, 0, right_column, 1)?; let (table_cache, view_cache, meta_cache, _temp_dir, storage) = build_test_storage()?; let mut transaction = storage.transaction()?; @@ -347,6 +425,177 @@ mod tests { Ok(()) } + #[test] + fn mark_exists_apply_sets_runtime_probe_before_residual_predicates() -> Result<(), DatabaseError> + { + let mut left = build_values_with_schema( + vec![ + ("left_c1", LogicalType::Integer), + ("left_flag", LogicalType::Integer), + ], + vec![], + ); + let mut right = build_values_with_schema( + vec![ + ("right_c1", LogicalType::Integer), + ("right_flag", LogicalType::Integer), + ], + vec![ + vec![DataValue::Int32(2), DataValue::Int32(1)], + vec![DataValue::Int32(2), DataValue::Null], + ], + ); + let left_value_column = left.output_schema()[0].clone(); + let left_flag_column = left.output_schema()[1].clone(); + let right_value_column = right.output_schema()[0].clone(); + let right_flag_column = right.output_schema()[1].clone(); + + let probe_predicate = + build_equality_predicate(left_value_column.clone(), 0, right_value_column, 2)?; + let flag_predicate = + build_equality_predicate(left_flag_column.clone(), 1, right_flag_column, 3)?; + let mut op = MarkApplyOperator::new_exists( + build_marker_column(), + vec![probe_predicate, flag_predicate], + ); + op.set_parameterized_probe(Some(ParameterizedMarkProbe::new( + 0, + ScalarExpression::column_expr(left_value_column, 0), + ))); + + let left_schema = left.output_schema().clone(); + let predicate_schema = Arc::new( + left_schema + .iter() + .chain(right.output_schema().iter()) + .cloned() + .collect::(), + ); + let (table_cache, view_cache, meta_cache, _temp_dir, storage) = build_test_storage()?; + let mut transaction = storage.transaction()?; + let mut arena = ExecArena::default(); + arena.init_context((&table_cache, &view_cache, &meta_cache), &mut transaction); + arena.init_runtime_params(1); + + let mut exec = MarkApply { + op, + right_input_plan: right, + left_input: 0, + left_schema, + predicate_schema, + left_tuple: Tuple::new(None, vec![DataValue::Int32(2), DataValue::Int32(1)]), + }; + + assert_eq!(exec.mark_value(&mut arena)?, DataValue::Boolean(true)); + assert_eq!( + arena.runtime_param(0), + &RuntimeIndexProbe::Eq(DataValue::Int32(2)) + ); + + Ok(()) + } + + #[test] + fn mark_in_apply_sets_eq_runtime_probe_for_non_null_value() -> Result<(), DatabaseError> { + let mut left = build_values_with_schema(vec![("left_c1", LogicalType::Integer)], vec![]); + let mut right = build_values_with_schema( + vec![("right_c1", LogicalType::Integer)], + vec![vec![DataValue::Int32(2)]], + ); + let left_value_column = left.output_schema()[0].clone(); + let right_value_column = right.output_schema()[0].clone(); + let predicate = + build_equality_predicate(left_value_column.clone(), 0, right_value_column, 1)?; + let mut op = MarkApplyOperator::new_in(build_marker_column(), vec![predicate]); + op.set_parameterized_probe(Some(ParameterizedMarkProbe::new( + 0, + ScalarExpression::column_expr(left_value_column, 0), + ))); + + let left_schema = left.output_schema().clone(); + let predicate_schema = Arc::new( + left_schema + .iter() + .chain(right.output_schema().iter()) + .cloned() + .collect::(), + ); + let (table_cache, view_cache, meta_cache, _temp_dir, storage) = build_test_storage()?; + let mut transaction = storage.transaction()?; + let mut arena = ExecArena::default(); + arena.init_context((&table_cache, &view_cache, &meta_cache), &mut transaction); + arena.init_runtime_params(1); + + let mut exec = MarkApply { + op, + right_input_plan: right, + left_input: 0, + left_schema, + predicate_schema, + left_tuple: Tuple::new(None, vec![DataValue::Int32(2)]), + }; + + assert_eq!(exec.mark_value(&mut arena)?, DataValue::Boolean(true)); + assert_eq!( + arena.runtime_param(0), + &RuntimeIndexProbe::Eq(DataValue::Int32(2)) + ); + + Ok(()) + } + + #[test] + fn mark_in_apply_sets_scope_runtime_probe_for_null_value() -> Result<(), DatabaseError> { + let mut left = build_values_with_schema(vec![("left_c1", LogicalType::Integer)], vec![]); + let mut right = build_values_with_schema( + vec![("right_c1", LogicalType::Integer)], + vec![vec![DataValue::Null], vec![DataValue::Int32(2)]], + ); + let left_value_column = left.output_schema()[0].clone(); + let right_value_column = right.output_schema()[0].clone(); + let predicate = + build_equality_predicate(left_value_column.clone(), 0, right_value_column, 1)?; + let mut op = MarkApplyOperator::new_in(build_marker_column(), vec![predicate]); + op.set_parameterized_probe(Some(ParameterizedMarkProbe::new( + 0, + ScalarExpression::column_expr(left_value_column, 0), + ))); + + let left_schema = left.output_schema().clone(); + let predicate_schema = Arc::new( + left_schema + .iter() + .chain(right.output_schema().iter()) + .cloned() + .collect::(), + ); + let (table_cache, view_cache, meta_cache, _temp_dir, storage) = build_test_storage()?; + let mut transaction = storage.transaction()?; + let mut arena = ExecArena::default(); + arena.init_context((&table_cache, &view_cache, &meta_cache), &mut transaction); + arena.init_runtime_params(1); + + let mut exec = MarkApply { + op, + right_input_plan: right, + left_input: 0, + left_schema, + predicate_schema, + left_tuple: Tuple::new(None, vec![DataValue::Null]), + }; + + assert_eq!(exec.mark_value(&mut arena)?, DataValue::Null); + assert_eq!( + arena.runtime_param(0), + &RuntimeIndexProbe::Scope { + min: std::collections::Bound::Unbounded, + max: std::collections::Bound::Unbounded, + } + ); + + Ok(()) + } + #[test] fn mark_in_apply_appends_boolean_match_column() -> Result<(), DatabaseError> { let mut left = build_values( @@ -360,16 +609,7 @@ mod tests { let left_column = left.output_schema()[0].clone(); let right_column = right.output_schema()[0].clone(); - let predicate = ScalarExpression::Binary { - op: BinaryOperator::Eq, - left_expr: Box::new(ScalarExpression::column_expr(left_column, 0)), - right_expr: Box::new(ScalarExpression::column_expr(right_column, 1)), - evaluator: Some(EvaluatorFactory::binary_create( - LogicalType::Integer, - BinaryOperator::Eq, - )?), - ty: LogicalType::Boolean, - }; + let predicate = build_equality_predicate(left_column, 0, right_column, 1)?; let (table_cache, view_cache, meta_cache, _temp_dir, storage) = build_test_storage()?; let mut transaction = storage.transaction()?; @@ -412,16 +652,7 @@ mod tests { let left_column = left.output_schema()[0].clone(); let right_column = right.output_schema()[0].clone(); - let predicate = ScalarExpression::Binary { - op: BinaryOperator::Eq, - left_expr: Box::new(ScalarExpression::column_expr(left_column, 0)), - right_expr: Box::new(ScalarExpression::column_expr(right_column, 1)), - evaluator: Some(EvaluatorFactory::binary_create( - LogicalType::Integer, - BinaryOperator::Eq, - )?), - ty: LogicalType::Boolean, - }; + let predicate = build_equality_predicate(left_column, 0, right_column, 1)?; let (table_cache, view_cache, meta_cache, _temp_dir, storage) = build_test_storage()?; let mut transaction = storage.transaction()?; @@ -468,16 +699,7 @@ mod tests { let right_value_column = right.output_schema()[0].clone(); let right_flag_column = right.output_schema()[1].clone(); - let probe_predicate = ScalarExpression::Binary { - op: BinaryOperator::Eq, - left_expr: Box::new(ScalarExpression::column_expr(left_column, 0)), - right_expr: Box::new(ScalarExpression::column_expr(right_value_column, 1)), - evaluator: Some(EvaluatorFactory::binary_create( - LogicalType::Integer, - BinaryOperator::Eq, - )?), - ty: LogicalType::Boolean, - }; + let probe_predicate = build_equality_predicate(left_column, 0, right_value_column, 1)?; let correlated_predicate = ScalarExpression::Binary { op: BinaryOperator::Eq, left_expr: Box::new(ScalarExpression::column_expr(right_flag_column, 2)), diff --git a/src/execution/mod.rs b/src/execution/mod.rs index 85e77dba..b0669661 100644 --- a/src/execution/mod.rs +++ b/src/execution/mod.rs @@ -61,7 +61,9 @@ use crate::planner::operator::join::JoinCondition; use crate::planner::operator::{Operator, PhysicalOption, PlanImpl}; use crate::planner::LogicalPlan; use crate::storage::{StatisticsMetaCache, TableCache, Transaction, ViewCache}; +use crate::types::index::RuntimeIndexProbe; use crate::types::tuple::Tuple; +use crate::types::value::DataValue; pub(crate) type ExecutionCaches<'a> = (&'a TableCache, &'a ViewCache, &'a StatisticsMetaCache); pub(crate) type ExecId = usize; @@ -265,6 +267,7 @@ pub(crate) struct ExecArena<'a, T: Transaction + 'a> { result: ExecResult, cache: Option>, transaction: *mut T, + runtime_params: Vec, } impl<'a, T: Transaction + 'a> Default for ExecArena<'a, T> { @@ -274,6 +277,7 @@ impl<'a, T: Transaction + 'a> Default for ExecArena<'a, T> { result: ExecResult::default(), cache: None, transaction: std::ptr::null_mut(), + runtime_params: Vec::new(), } } } @@ -291,6 +295,11 @@ impl<'a, T: Transaction + 'a> ExecArena<'a, T> { } } + pub(crate) fn init_runtime_params(&mut self, count: usize) { + debug_assert!(self.runtime_params.is_empty() || self.runtime_params.len() == count); + self.runtime_params = vec![RuntimeIndexProbe::Eq(DataValue::Null); count]; + } + pub(crate) fn push(&mut self, node: ExecNode<'a, T>) -> ExecId { let id = self.nodes.len(); self.nodes.push(node); @@ -317,6 +326,20 @@ impl<'a, T: Transaction + 'a> ExecArena<'a, T> { unsafe { &mut *self.transaction } } + pub(crate) fn set_runtime_param(&mut self, param: usize, value: RuntimeIndexProbe) { + debug_assert!(param < self.runtime_params.len()); + *self + .runtime_params + .get_mut(param) + .expect("runtime parameter slot initialized") = value; + } + + pub(crate) fn runtime_param(&self, param: usize) -> &RuntimeIndexProbe { + self.runtime_params + .get(param) + .expect("runtime parameter initialized") + } + #[inline] pub(crate) fn result_tuple(&self) -> &Tuple { &self.result.tuple @@ -686,12 +709,12 @@ pub(crate) fn build_read<'a, T: Transaction + 'a>( .. }) = physical_option { - if let Some(range) = index_info.range.clone() { + if let Some(lookup) = index_info.lookup.clone() { return as ExecutorNode<'a, T>>::into_executor( ( op, index_info.meta.clone(), - range, + lookup, index_info.covered_deserializers.clone(), index_info.cover_mapping.clone(), ), diff --git a/src/optimizer/core/rule.rs b/src/optimizer/core/rule.rs index 303cb578..3f75b708 100644 --- a/src/optimizer/core/rule.rs +++ b/src/optimizer/core/rule.rs @@ -27,9 +27,41 @@ pub trait MatchPattern { fn pattern(&self) -> &Pattern; } +pub struct NormalizationContext { + runtime_param_count: usize, +} + +impl NormalizationContext { + pub fn new() -> Self { + Self { + runtime_param_count: 0, + } + } + + pub fn alloc_runtime_param(&mut self) -> usize { + let param = self.runtime_param_count; + self.runtime_param_count += 1; + param + } + + pub fn runtime_param_count(&self) -> usize { + self.runtime_param_count + } +} + +impl Default for NormalizationContext { + fn default() -> Self { + Self::new() + } +} + pub trait NormalizationRule { /// Returns true when the plan tree is modified. - fn apply(&self, plan: &mut LogicalPlan) -> Result; + fn apply( + &self, + plan: &mut LogicalPlan, + ctx: &mut NormalizationContext, + ) -> Result; } fn compare_costs(candidate_cost: Option, best_cost: Option) -> Ordering { diff --git a/src/optimizer/heuristic/optimizer.rs b/src/optimizer/heuristic/optimizer.rs index ea7f19e2..825b22ed 100644 --- a/src/optimizer/heuristic/optimizer.rs +++ b/src/optimizer/heuristic/optimizer.rs @@ -14,7 +14,7 @@ use crate::errors::DatabaseError; use crate::optimizer::core::rule::{ - BestPhysicalOption, ImplementationRule, MatchPattern, NormalizationRule, + BestPhysicalOption, ImplementationRule, MatchPattern, NormalizationContext, NormalizationRule, }; use crate::optimizer::core::statistics_meta::StatisticMetaLoader; use crate::optimizer::heuristic::batch::{ @@ -40,6 +40,7 @@ pub struct HepOptimizer<'a> { after_batches: &'a [HepBatch], implementation_index: &'a ImplementationRuleIndex, plan: LogicalPlan, + runtime_param_count: usize, } impl<'a> HepOptimizer<'a> { @@ -54,14 +55,17 @@ impl<'a> HepOptimizer<'a> { after_batches, implementation_index, plan, + runtime_param_count: 0, } } - pub fn find_best( - mut self, + pub fn optimize( + &mut self, loader: Option<&StatisticMetaLoader<'_, T>>, - ) -> Result { - Self::apply_batches(&mut self.plan, self.before_batches)?; + ) -> Result<(), DatabaseError> { + self.runtime_param_count = 0; + let mut ctx = NormalizationContext::new(); + Self::apply_batches(&mut self.plan, self.before_batches, &mut ctx)?; if let Some(loader) = loader { if self.implementation_index.is_empty().not() { @@ -73,44 +77,71 @@ impl<'a> HepOptimizer<'a> { self.implementation_index, &apply_no_sort_hints, &apply_no_stream_distinct_hints, + &mut ctx, )?; } } - Self::apply_batches(&mut self.plan, self.after_batches)?; + Self::apply_batches(&mut self.plan, self.after_batches, &mut ctx)?; + self.runtime_param_count = ctx.runtime_param_count(); + + Ok(()) + } + pub fn runtime_param_count(&self) -> usize { + self.runtime_param_count + } + + pub fn into_plan(self) -> LogicalPlan { + self.plan + } + + #[allow(dead_code)] + pub fn find_best( + mut self, + loader: Option<&StatisticMetaLoader<'_, T>>, + ) -> Result { + self.optimize(loader)?; Ok(self.plan) } #[inline] - fn apply_batches(plan: &mut LogicalPlan, batches: &[HepBatch]) -> Result<(), DatabaseError> { + fn apply_batches( + plan: &mut LogicalPlan, + batches: &[HepBatch], + ctx: &mut NormalizationContext, + ) -> Result<(), DatabaseError> { for batch in batches { match batch.strategy { HepBatchStrategy::MaxTimes(max_iteration) => { for _ in 0..max_iteration { - if !Self::apply_batch(plan, batch)? { + if !Self::apply_batch(plan, batch, ctx)? { break; } } } - HepBatchStrategy::LoopIfApplied => while Self::apply_batch(plan, batch)? {}, + HepBatchStrategy::LoopIfApplied => while Self::apply_batch(plan, batch, ctx)? {}, } } Ok(()) } #[inline] - fn apply_batch(plan: &mut LogicalPlan, batch: &HepBatch) -> Result { + fn apply_batch( + plan: &mut LogicalPlan, + batch: &HepBatch, + ctx: &mut NormalizationContext, + ) -> Result { let mut applied = false; for step in &batch.steps { match step { HepBatchStep::WholeTree(pass) => { - if Self::apply_whole_tree_pass(plan, pass)? { + if Self::apply_whole_tree_pass(plan, pass, ctx)? { plan.reset_output_schema_cache_recursive(); applied = true; } } HepBatchStep::LocalRewrite(rules) => { - if Self::apply_local_rules(plan, rules)? { + if Self::apply_local_rules(plan, rules, ctx)? { applied = true; } } @@ -122,12 +153,13 @@ impl<'a> HepOptimizer<'a> { fn apply_whole_tree_pass( plan: &mut LogicalPlan, pass: &HepWholeTreePass, + ctx: &mut NormalizationContext, ) -> Result { match pass.kind { WholeTreePassKind::ColumnPruning => { let mut applied = false; for rule in &pass.rules { - applied |= rule.apply(plan)?; + applied |= rule.apply(plan, ctx)?; } Ok(applied) } @@ -207,6 +239,7 @@ impl<'a> HepOptimizer<'a> { implementation_index: &ImplementationRuleIndex, inherited_sort_hints: &'plan ScanHintApplier<'plan>, inherited_stream_distinct_hints: &'plan ScanHintApplier<'plan>, + ctx: &mut NormalizationContext, ) -> Result<(), DatabaseError> { if let Operator::TableScan(scan_op) = &mut plan.operator { inherited_sort_hints(scan_op); @@ -249,6 +282,7 @@ impl<'a> HepOptimizer<'a> { implementation_index, child_sort_hints, child_stream_distinct_hints, + ctx, ), Childrens::Twins { left, right } => { Self::annotate_hints_and_physical_options( @@ -257,6 +291,7 @@ impl<'a> HepOptimizer<'a> { implementation_index, child_sort_hints, child_stream_distinct_hints, + ctx, )?; Self::annotate_hints_and_physical_options( right, @@ -264,6 +299,7 @@ impl<'a> HepOptimizer<'a> { implementation_index, child_sort_hints, child_stream_distinct_hints, + ctx, ) } Childrens::None => Ok(()), @@ -272,7 +308,7 @@ impl<'a> HepOptimizer<'a> { })?; } - apply_annotated_post_rules(plan)?; + apply_annotated_post_rules(plan, ctx)?; Ok(()) } @@ -349,15 +385,17 @@ impl<'a> HepOptimizer<'a> { fn apply_local_rules( plan: &mut LogicalPlan, rules: &HepLocalRewriteBatch, + ctx: &mut NormalizationContext, ) -> Result { let mut applied_rules = vec![false; rules.len()]; - Self::apply_local_rules_inner(plan, rules, &mut applied_rules) + Self::apply_local_rules_inner(plan, rules, &mut applied_rules, ctx) } fn apply_local_rules_inner( plan: &mut LogicalPlan, rules: &HepLocalRewriteBatch, applied_rules: &mut [bool], + ctx: &mut NormalizationContext, ) -> Result { let mut applied = false; let mut next_rule_idx = 0; @@ -368,7 +406,8 @@ impl<'a> HepOptimizer<'a> { if applied_rules[idx] { continue; } - if rule.apply(plan)? { + let applied_rule = rule.apply(plan, ctx)?; + if applied_rule { plan.reset_output_schema_cache_recursive(); applied_rules[idx] = true; applied = true; @@ -377,12 +416,14 @@ impl<'a> HepOptimizer<'a> { match plan.childrens.as_mut() { Childrens::Only(child) => { - let child_applied = Self::apply_local_rules_inner(child, rules, applied_rules)?; + let child_applied = + Self::apply_local_rules_inner(child, rules, applied_rules, ctx)?; applied |= child_applied; } Childrens::Twins { left, right } => { - let left_applied = Self::apply_local_rules_inner(left, rules, applied_rules)?; - let right_applied = Self::apply_local_rules_inner(right, rules, applied_rules)?; + let left_applied = Self::apply_local_rules_inner(left, rules, applied_rules, ctx)?; + let right_applied = + Self::apply_local_rules_inner(right, rules, applied_rules, ctx)?; applied |= left_applied || right_applied; } Childrens::None => {} @@ -569,7 +610,7 @@ mod tests { use crate::planner::operator::sort::SortField; use crate::planner::operator::{PhysicalOption, PlanImpl, SortOption}; use crate::storage::{Storage, Transaction}; - use crate::types::index::{IndexInfo, IndexMeta, IndexType}; + use crate::types::index::{IndexInfo, IndexLookup, IndexMeta, IndexType}; use crate::types::value::DataValue; use crate::types::LogicalType; use std::ops::Bound; @@ -677,13 +718,13 @@ mod tests { fields: sort_fields.clone(), ignore_prefix_len: 0, }, - range: Some(Range::SortedRanges(vec![ + lookup: Some(IndexLookup::Static(Range::SortedRanges(vec![ Range::Eq(DataValue::Int32(2)), Range::Scope { min: Bound::Excluded(DataValue::Int32(40)), max: Bound::Unbounded, } - ])), + ]))), covered_deserializers: None, cover_mapping: None, sort_elimination_hint: None, diff --git a/src/optimizer/rule/implementation/dql/table_scan.rs b/src/optimizer/rule/implementation/dql/table_scan.rs index e15e488a..bfb0f75c 100644 --- a/src/optimizer/rule/implementation/dql/table_scan.rs +++ b/src/optimizer/rule/implementation/dql/table_scan.rs @@ -18,7 +18,7 @@ use crate::optimizer::core::rule::{BestPhysicalOption, ImplementationRule, Match use crate::optimizer::core::statistics_meta::StatisticMetaLoader; use crate::planner::operator::{Operator, PhysicalOption, PlanImpl, SortOption}; use crate::storage::Transaction; -use crate::types::index::IndexType; +use crate::types::index::{IndexLookup, IndexType}; use std::sync::LazyLock; static TABLE_SCAN_PATTERN: LazyLock = LazyLock::new(|| Pattern { @@ -81,23 +81,21 @@ impl ImplementationRule for IndexScanImplementation { ) -> Result<(), DatabaseError> { if let Operator::TableScan(scan_op) = op { for index_info in scan_op.index_infos.iter() { - if index_info.range.is_none() { + let Some(IndexLookup::Static(range)) = &index_info.lookup else { continue; - } + }; let mut cost = None; - if let Some(range) = &index_info.range { - if let Some(mut row_count) = - loader.collect_count(&scan_op.table_name, index_info.meta.id, range)? + if let Some(mut row_count) = + loader.collect_count(&scan_op.table_name, index_info.meta.id, range)? + { + if index_info.covered_deserializers.is_none() + && !matches!(index_info.meta.ty, IndexType::PrimaryKey { .. }) { - if index_info.covered_deserializers.is_none() - && !matches!(index_info.meta.ty, IndexType::PrimaryKey { .. }) - { - // need to return table query(non-covering index) - row_count *= 2; - } - cost = Some(row_count); + // need to return table query(non-covering index) + row_count *= 2; } + cost = Some(row_count); } if let Some(row_count) = cost { diff --git a/src/optimizer/rule/normalization/agg_elimination.rs b/src/optimizer/rule/normalization/agg_elimination.rs index b6354420..d9c297f5 100644 --- a/src/optimizer/rule/normalization/agg_elimination.rs +++ b/src/optimizer/rule/normalization/agg_elimination.rs @@ -14,7 +14,7 @@ use crate::errors::DatabaseError; use crate::expression::ScalarExpression; -use crate::optimizer::core::rule::NormalizationRule; +use crate::optimizer::core::rule::{NormalizationContext, NormalizationRule}; use crate::optimizer::plan_utils::{only_child_mut, replace_with_only_child}; use crate::planner::operator::limit::LimitOperator; use crate::planner::operator::sort::SortField; @@ -25,7 +25,11 @@ use crate::planner::{Childrens, LogicalPlan}; pub struct EliminateRedundantSort; impl NormalizationRule for EliminateRedundantSort { - fn apply(&self, plan: &mut LogicalPlan) -> Result { + fn apply( + &self, + plan: &mut LogicalPlan, + _ctx: &mut NormalizationContext, + ) -> Result { let (sort_fields, topk_limit) = match &plan.operator { Operator::Sort(sort_op) => (sort_op.sort_fields.clone(), None), Operator::TopK(topk_op) => ( @@ -185,7 +189,11 @@ pub(crate) fn distinct_sort_fields(groupby_exprs: &[ScalarExpression]) -> Vec Result { + fn apply( + &self, + plan: &mut LogicalPlan, + _ctx: &mut NormalizationContext, + ) -> Result { let Operator::Aggregate(op) = &plan.operator else { return Ok(false); }; @@ -219,14 +227,17 @@ impl NormalizationRule for UseStreamDistinct { } } -pub(crate) fn apply_annotated_post_rules(plan: &mut LogicalPlan) -> Result { +pub(crate) fn apply_annotated_post_rules( + plan: &mut LogicalPlan, + ctx: &mut NormalizationContext, +) -> Result { let mut changed = false; - if EliminateRedundantSort.apply(plan)? { + if EliminateRedundantSort.apply(plan, ctx)? { plan.reset_output_schema_cache_recursive(); changed = true; } - if UseStreamDistinct.apply(plan)? { + if UseStreamDistinct.apply(plan, ctx)? { changed = true; } @@ -340,7 +351,7 @@ mod tests { use crate::errors::DatabaseError; use crate::expression::range_detacher::Range; use crate::expression::ScalarExpression; - use crate::optimizer::core::rule::NormalizationRule; + use crate::optimizer::core::rule::{NormalizationContext, NormalizationRule}; use crate::planner::operator::aggregate::AggregateOperator; use crate::planner::operator::filter::FilterOperator; use crate::planner::operator::sort::{SortField, SortOperator}; @@ -348,7 +359,7 @@ mod tests { use crate::planner::operator::top_k::TopKOperator; use crate::planner::operator::{Operator, PhysicalOption, PlanImpl, SortOption}; use crate::planner::{Childrens, LogicalPlan}; - use crate::types::index::{IndexInfo, IndexMeta, IndexType}; + use crate::types::index::{IndexInfo, IndexLookup, IndexMeta, IndexType}; use crate::types::value::DataValue; use crate::types::LogicalType; use std::collections::BTreeMap; @@ -421,7 +432,7 @@ mod tests { let index_info = IndexInfo { meta, sort_option: sort_option.clone(), - range: None, + lookup: None, covered_deserializers: None, cover_mapping: None, sort_elimination_hint: None, @@ -457,7 +468,7 @@ mod tests { ty: IndexType::PrimaryKey { is_multiple: false }, }), sort_option: sort_option.clone(), - range: None, + lookup: None, covered_deserializers: None, cover_mapping: None, sort_elimination_hint: None, @@ -494,7 +505,7 @@ mod tests { let mut plan = build_plan(vec![sort_field.clone()], vec![sort_field], 0); let rule = EliminateRedundantSort; - assert!(rule.apply(&mut plan)?); + assert!(rule.apply(&mut plan, &mut NormalizationContext::new())?); assert!(matches!(plan.operator, Operator::Filter(_))); Ok(()) } @@ -510,7 +521,7 @@ mod tests { }); let rule = EliminateRedundantSort; - assert!(rule.apply(&mut plan)?); + assert!(rule.apply(&mut plan, &mut NormalizationContext::new())?); match plan.operator { Operator::Limit(limit_op) => { assert_eq!(limit_op.limit, Some(10)); @@ -529,7 +540,7 @@ mod tests { super::mark_sort_preserving_indexes(&mut plan, &[c2]); let rule = EliminateRedundantSort; - assert!(rule.apply(&mut plan)?); + assert!(rule.apply(&mut plan, &mut NormalizationContext::new())?); Ok(()) } @@ -553,7 +564,7 @@ mod tests { }); let rule = EliminateRedundantSort; - assert!(rule.apply(&mut plan)?); + assert!(rule.apply(&mut plan, &mut NormalizationContext::new())?); assert!(matches!(plan.operator, Operator::Limit(_))); Ok(()) } @@ -650,7 +661,7 @@ mod tests { )); let rule = UseStreamDistinct; - assert!(rule.apply(&mut plan)?); + assert!(rule.apply(&mut plan, &mut NormalizationContext::new())?); assert!(matches!( plan.physical_option, Some(PhysicalOption { @@ -669,7 +680,7 @@ mod tests { super::mark_sort_preserving_indexes(&mut plan, &[c2]); let rule = EliminateRedundantSort; - assert!(!rule.apply(&mut plan)?); + assert!(!rule.apply(&mut plan, &mut NormalizationContext::new())?); assert!(matches!(plan.operator, Operator::Sort(_))); Ok(()) } @@ -683,10 +694,10 @@ mod tests { false, ); let (mut index_info, _) = build_index_info(vec![sort_field.clone()], 0); - index_info.range = Some(Range::Scope { + index_info.lookup = Some(IndexLookup::Static(Range::Scope { min: Bound::Unbounded, max: Bound::Unbounded, - }); + })); let mut columns = BTreeMap::new(); columns.insert(0, column); @@ -734,7 +745,7 @@ mod tests { }; super::mark_sort_preserving_indexes(&mut plan, &sort_fields); let rule = EliminateRedundantSort; - assert!(rule.apply(&mut plan)?); + assert!(rule.apply(&mut plan, &mut NormalizationContext::new())?); assert!(matches!(plan.operator, Operator::Filter(_))); let table_plan = plan.childrens.pop_only(); diff --git a/src/optimizer/rule/normalization/column_pruning.rs b/src/optimizer/rule/normalization/column_pruning.rs index 186eee86..d61365c3 100644 --- a/src/optimizer/rule/normalization/column_pruning.rs +++ b/src/optimizer/rule/normalization/column_pruning.rs @@ -17,7 +17,7 @@ use crate::errors::DatabaseError; use crate::expression::agg::AggKind; use crate::expression::visitor::Visitor; use crate::expression::{HasCountStar, ScalarExpression}; -use crate::optimizer::core::rule::NormalizationRule; +use crate::optimizer::core::rule::{NormalizationContext, NormalizationRule}; use crate::optimizer::rule::normalization::{remap_expr_positions, remap_exprs_positions}; use crate::planner::operator::join::JoinCondition; use crate::planner::operator::Operator; @@ -658,7 +658,11 @@ impl ColumnPruning { } impl NormalizationRule for ColumnPruning { - fn apply(&self, plan: &mut LogicalPlan) -> Result { + fn apply( + &self, + plan: &mut LogicalPlan, + _ctx: &mut NormalizationContext, + ) -> Result { let arena = Bump::new(); let outcome = Self::_apply(HashSet::<&ColumnSummary>::new(), true, plan, &arena)?; Ok(outcome.changed) diff --git a/src/optimizer/rule/normalization/combine_operators.rs b/src/optimizer/rule/normalization/combine_operators.rs index a548145e..d4a179ba 100644 --- a/src/optimizer/rule/normalization/combine_operators.rs +++ b/src/optimizer/rule/normalization/combine_operators.rs @@ -14,7 +14,7 @@ use crate::errors::DatabaseError; use crate::expression::{AliasType, BinaryOperator, ScalarExpression}; -use crate::optimizer::core::rule::NormalizationRule; +use crate::optimizer::core::rule::{NormalizationContext, NormalizationRule}; use crate::optimizer::plan_utils::{only_child_mut, replace_with_only_child}; use crate::optimizer::rule::normalization::{is_subset_exprs, strip_alias}; use crate::planner::operator::filter::FilterOperator; @@ -92,7 +92,11 @@ fn groupby_exprs_match( pub struct CollapseProject; impl NormalizationRule for CollapseProject { - fn apply(&self, plan: &mut LogicalPlan) -> Result { + fn apply( + &self, + plan: &mut LogicalPlan, + _ctx: &mut NormalizationContext, + ) -> Result { let Operator::Project(parent_op) = &mut plan.operator else { return Ok(false); }; @@ -125,7 +129,11 @@ impl NormalizationRule for CollapseProject { pub struct CombineFilter; impl NormalizationRule for CombineFilter { - fn apply(&self, plan: &mut LogicalPlan) -> Result { + fn apply( + &self, + plan: &mut LogicalPlan, + _ctx: &mut NormalizationContext, + ) -> Result { let parent_filter = match mem::replace(&mut plan.operator, Operator::Dummy) { Operator::Filter(op) => op, operator => { @@ -185,7 +193,11 @@ impl NormalizationRule for CombineFilter { pub struct CollapseGroupByAgg; impl NormalizationRule for CollapseGroupByAgg { - fn apply(&self, plan: &mut LogicalPlan) -> Result { + fn apply( + &self, + plan: &mut LogicalPlan, + _ctx: &mut NormalizationContext, + ) -> Result { let can_collapse = { let LogicalPlan { operator, @@ -222,7 +234,7 @@ mod tests { use crate::catalog::{ColumnCatalog, ColumnRef}; use crate::errors::DatabaseError; use crate::expression::{BinaryOperator, ScalarExpression}; - use crate::optimizer::core::rule::NormalizationRule; + use crate::optimizer::core::rule::{NormalizationContext, NormalizationRule}; use crate::optimizer::heuristic::batch::HepBatchStrategy; use crate::optimizer::heuristic::optimizer::HepOptimizerPipeline; use crate::optimizer::rule::normalization::combine_operators::{ @@ -328,7 +340,7 @@ mod tests { Childrens::Only(Box::new(child)), ); - assert!(CollapseProject.apply(&mut plan)?); + assert!(CollapseProject.apply(&mut plan, &mut NormalizationContext::new())?); let Operator::Project(op) = &plan.operator else { unreachable!("expected project"); @@ -414,7 +426,7 @@ mod tests { ); let mut plan = AggregateOperator::build(child, vec![], vec![column_expr("c2", 0)], true); - assert!(CollapseGroupByAgg.apply(&mut plan)?); + assert!(CollapseGroupByAgg.apply(&mut plan, &mut NormalizationContext::new())?); let Operator::Aggregate(op) = &plan.operator else { unreachable!("expected aggregate"); }; diff --git a/src/optimizer/rule/normalization/compilation_in_advance.rs b/src/optimizer/rule/normalization/compilation_in_advance.rs index cb6b0aa4..eb1d9547 100644 --- a/src/optimizer/rule/normalization/compilation_in_advance.rs +++ b/src/optimizer/rule/normalization/compilation_in_advance.rs @@ -15,7 +15,7 @@ use crate::errors::DatabaseError; use crate::expression::visitor_mut::VisitorMut; use crate::expression::BindEvaluator; -use crate::optimizer::core::rule::NormalizationRule; +use crate::optimizer::core::rule::{NormalizationContext, NormalizationRule}; use crate::planner::operator::join::JoinCondition; use crate::planner::operator::Operator; use crate::planner::{Childrens, LogicalPlan}; @@ -138,7 +138,11 @@ impl EvaluatorBind { } impl NormalizationRule for EvaluatorBind { - fn apply(&self, plan: &mut LogicalPlan) -> Result { + fn apply( + &self, + plan: &mut LogicalPlan, + _ctx: &mut NormalizationContext, + ) -> Result { Self::_apply(plan)?; Ok(true) } diff --git a/src/optimizer/rule/normalization/min_max_top_k.rs b/src/optimizer/rule/normalization/min_max_top_k.rs index 048a39cd..418866ee 100644 --- a/src/optimizer/rule/normalization/min_max_top_k.rs +++ b/src/optimizer/rule/normalization/min_max_top_k.rs @@ -15,7 +15,7 @@ use crate::errors::DatabaseError; use crate::expression::agg::AggKind; use crate::expression::ScalarExpression; -use crate::optimizer::core::rule::NormalizationRule; +use crate::optimizer::core::rule::{NormalizationContext, NormalizationRule}; use crate::optimizer::plan_utils::{only_child, wrap_child_with}; use crate::planner::operator::sort::SortField; use crate::planner::operator::top_k::TopKOperator; @@ -25,7 +25,11 @@ use crate::planner::LogicalPlan; pub struct MinMaxToTopK; impl NormalizationRule for MinMaxToTopK { - fn apply(&self, plan: &mut LogicalPlan) -> Result { + fn apply( + &self, + plan: &mut LogicalPlan, + _ctx: &mut NormalizationContext, + ) -> Result { let Operator::Aggregate(op) = &plan.operator else { return Ok(false); }; @@ -81,7 +85,7 @@ mod tests { use super::MinMaxToTopK; use crate::binder::test::build_t1_table; use crate::errors::DatabaseError; - use crate::optimizer::core::rule::NormalizationRule; + use crate::optimizer::core::rule::{NormalizationContext, NormalizationRule}; use crate::planner::operator::Operator; use crate::planner::Childrens; @@ -113,7 +117,7 @@ mod tests { let mut plan = table_state.plan("select min(c1) from t1")?; let agg_plan = find_aggregate_mut(&mut plan); - assert!(MinMaxToTopK.apply(agg_plan)?); + assert!(MinMaxToTopK.apply(agg_plan, &mut NormalizationContext::new())?); let agg_plan = find_aggregate(&plan); let Operator::Aggregate(op) = &agg_plan.operator else { @@ -150,7 +154,7 @@ mod tests { let mut plan = table_state.plan("select max(c2) from t1")?; let agg_plan = find_aggregate_mut(&mut plan); - assert!(MinMaxToTopK.apply(agg_plan)?); + assert!(MinMaxToTopK.apply(agg_plan, &mut NormalizationContext::new())?); let agg_plan = find_aggregate(&plan); let child = match agg_plan.childrens.as_ref() { diff --git a/src/optimizer/rule/normalization/mod.rs b/src/optimizer/rule/normalization/mod.rs index 6f52d8ad..60c1063d 100644 --- a/src/optimizer/rule/normalization/mod.rs +++ b/src/optimizer/rule/normalization/mod.rs @@ -15,7 +15,7 @@ use crate::errors::DatabaseError; use crate::expression::visitor_mut::{walk_mut_expr, VisitorMut}; use crate::expression::{AliasType, ScalarExpression}; -use crate::optimizer::core::rule::NormalizationRule; +use crate::optimizer::core::rule::{NormalizationContext, NormalizationRule}; use crate::optimizer::rule::normalization::column_pruning::ColumnPruning; use crate::optimizer::rule::normalization::combine_operators::{ CollapseGroupByAgg, CollapseProject, CombineFilter, @@ -39,6 +39,7 @@ mod column_pruning; mod combine_operators; mod compilation_in_advance; mod min_max_top_k; +mod parameterized_index; mod pushdown_limit; mod pushdown_predicates; mod simplification; @@ -47,6 +48,7 @@ pub(crate) use agg_elimination::{ apply_annotated_post_rules, apply_scan_order_hint, OrderHintKind, ScanOrderHint, }; pub(crate) use compilation_in_advance::evaluator_bind_current; +pub(crate) use parameterized_index::ParameterizeMarkApply; pub(crate) use simplification::constant_calculation_current; #[derive(Debug, Copy, Clone)] @@ -72,6 +74,7 @@ pub enum NormalizationRuleImpl { EvaluatorBind, MinMaxToTopK, TopK, + ParameterizeMarkApply, } #[derive(Debug, Copy, Clone, Eq, PartialEq)] @@ -88,6 +91,7 @@ pub enum NormalizationRuleRootTag { Filter, Join, Limit, + MarkApply, Project, SortLike, } @@ -98,7 +102,8 @@ impl NormalizationRuleRootTag { pub fn from_operator(operator: &Operator) -> Option { match operator { Operator::Aggregate(_) => Some(Self::Aggregate), - Operator::ScalarApply(_) | Operator::MarkApply(_) => Some(Self::Any), + Operator::MarkApply(_) => Some(Self::MarkApply), + Operator::ScalarApply(_) => Some(Self::Any), Operator::Filter(_) => Some(Self::Filter), Operator::Join(_) => Some(Self::Join), Operator::Limit(_) => Some(Self::Limit), @@ -171,30 +176,38 @@ impl NormalizationRuleImpl { NormalizationRuleImpl::ConstantCalculation => NormalizationRuleRootTag::Any, NormalizationRuleImpl::EvaluatorBind => NormalizationRuleRootTag::Any, NormalizationRuleImpl::MinMaxToTopK => NormalizationRuleRootTag::Aggregate, + NormalizationRuleImpl::ParameterizeMarkApply => NormalizationRuleRootTag::MarkApply, } } } impl NormalizationRule for NormalizationRuleImpl { - fn apply(&self, plan: &mut LogicalPlan) -> Result { + fn apply( + &self, + plan: &mut LogicalPlan, + ctx: &mut NormalizationContext, + ) -> Result { match self { - NormalizationRuleImpl::ColumnPruning => ColumnPruning.apply(plan), - NormalizationRuleImpl::CollapseProject => CollapseProject.apply(plan), - NormalizationRuleImpl::CollapseGroupByAgg => CollapseGroupByAgg.apply(plan), - NormalizationRuleImpl::CombineFilter => CombineFilter.apply(plan), - NormalizationRuleImpl::LimitProjectTranspose => LimitProjectTranspose.apply(plan), - NormalizationRuleImpl::PushLimitThroughJoin => PushLimitThroughJoin.apply(plan), - NormalizationRuleImpl::PushLimitIntoTableScan => PushLimitIntoScan.apply(plan), - NormalizationRuleImpl::PushPredicateThroughJoin => PushPredicateThroughJoin.apply(plan), + NormalizationRuleImpl::ColumnPruning => ColumnPruning.apply(plan, ctx), + NormalizationRuleImpl::CollapseProject => CollapseProject.apply(plan, ctx), + NormalizationRuleImpl::CollapseGroupByAgg => CollapseGroupByAgg.apply(plan, ctx), + NormalizationRuleImpl::CombineFilter => CombineFilter.apply(plan, ctx), + NormalizationRuleImpl::LimitProjectTranspose => LimitProjectTranspose.apply(plan, ctx), + NormalizationRuleImpl::PushLimitThroughJoin => PushLimitThroughJoin.apply(plan, ctx), + NormalizationRuleImpl::PushLimitIntoTableScan => PushLimitIntoScan.apply(plan, ctx), + NormalizationRuleImpl::PushPredicateThroughJoin => { + PushPredicateThroughJoin.apply(plan, ctx) + } NormalizationRuleImpl::PushJoinPredicateIntoScan => { - PushJoinPredicateIntoScan.apply(plan) + PushJoinPredicateIntoScan.apply(plan, ctx) } - NormalizationRuleImpl::SimplifyFilter => SimplifyFilter.apply(plan), - NormalizationRuleImpl::PushPredicateIntoScan => PushPredicateIntoScan.apply(plan), - NormalizationRuleImpl::ConstantCalculation => ConstantCalculation.apply(plan), - NormalizationRuleImpl::EvaluatorBind => EvaluatorBind.apply(plan), - NormalizationRuleImpl::MinMaxToTopK => MinMaxToTopK.apply(plan), - NormalizationRuleImpl::TopK => TopK.apply(plan), + NormalizationRuleImpl::SimplifyFilter => SimplifyFilter.apply(plan, ctx), + NormalizationRuleImpl::PushPredicateIntoScan => PushPredicateIntoScan.apply(plan, ctx), + NormalizationRuleImpl::ConstantCalculation => ConstantCalculation.apply(plan, ctx), + NormalizationRuleImpl::EvaluatorBind => EvaluatorBind.apply(plan, ctx), + NormalizationRuleImpl::MinMaxToTopK => MinMaxToTopK.apply(plan, ctx), + NormalizationRuleImpl::TopK => TopK.apply(plan, ctx), + NormalizationRuleImpl::ParameterizeMarkApply => ParameterizeMarkApply.apply(plan, ctx), } } } diff --git a/src/optimizer/rule/normalization/parameterized_index.rs b/src/optimizer/rule/normalization/parameterized_index.rs new file mode 100644 index 00000000..03cef4ea --- /dev/null +++ b/src/optimizer/rule/normalization/parameterized_index.rs @@ -0,0 +1,193 @@ +// Copyright 2024 KipData/KiteSQL +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::catalog::ColumnRef; +use crate::errors::DatabaseError; +use crate::expression::{BinaryOperator, ScalarExpression}; +use crate::optimizer::core::rule::{NormalizationContext, NormalizationRule}; +use crate::planner::operator::mark_apply::{MarkApplyKind, ParameterizedMarkProbe}; +use crate::planner::operator::table_scan::TableScanOperator; +use crate::planner::operator::{Operator, PhysicalOption, PlanImpl}; +use crate::planner::{Childrens, LogicalPlan}; +use crate::types::index::{IndexLookup, IndexType}; +use crate::types::tuple::Schema; + +pub(crate) struct ParameterizeMarkApply; + +impl NormalizationRule for ParameterizeMarkApply { + fn apply( + &self, + plan: &mut LogicalPlan, + ctx: &mut NormalizationContext, + ) -> Result { + let (op, new_probe) = match (&mut plan.operator, plan.childrens.as_mut()) { + (Operator::MarkApply(op), Childrens::Twins { left, right }) => { + let new_probe = find_parameterized_probe( + op.kind.clone(), + op.predicates(), + left.output_schema().as_ref(), + right.output_schema().as_ref(), + ) + .and_then(|(right_column, left_expr)| { + parameterize_right_subtree(right, &right_column, ctx) + .map(|param| ParameterizedMarkProbe::new(param, left_expr)) + }); + (op, new_probe) + } + _ => return Ok(false), + }; + + let changed = op.parameterized_probe().cloned() != new_probe; + op.set_parameterized_probe(new_probe); + Ok(changed) + } +} + +fn find_parameterized_probe( + kind: MarkApplyKind, + predicates: &[ScalarExpression], + left_schema: &Schema, + right_schema: &Schema, +) -> Option<(ColumnRef, ScalarExpression)> { + match kind { + MarkApplyKind::Exists => predicates.iter().find_map(|predicate| { + extract_parameterized_probe(predicate, left_schema, right_schema) + }), + MarkApplyKind::In => predicates.first().and_then(|predicate| { + extract_parameterized_probe(predicate, left_schema, right_schema) + }), + } +} + +fn extract_parameterized_probe( + predicate: &ScalarExpression, + left_schema: &Schema, + right_schema: &Schema, +) -> Option<(ColumnRef, ScalarExpression)> { + match predicate.unpack_alias_ref() { + ScalarExpression::Binary { + op: BinaryOperator::Eq, + left_expr, + right_expr, + .. + } => extract_parameterized_probe_side(left_expr, right_expr, left_schema, right_schema) + .or_else(|| { + extract_parameterized_probe_side(right_expr, left_expr, left_schema, right_schema) + }), + _ => None, + } +} + +fn extract_parameterized_probe_side( + right_expr: &ScalarExpression, + left_expr: &ScalarExpression, + left_schema: &Schema, + right_schema: &Schema, +) -> Option<(ColumnRef, ScalarExpression)> { + let (right_column, _) = right_expr.unpack_alias_ref().unpack_bound_col(false)?; + + if !schema_contains_column(right_schema, &right_column) { + return None; + } + if !left_expr.all_referenced_columns(true, |candidate| { + schema_contains_column(left_schema, candidate) + }) { + return None; + } + if left_expr.any_referenced_column(true, |candidate| { + schema_contains_column(right_schema, candidate) + }) { + return None; + } + + Some((right_column, left_expr.clone())) +} + +fn parameterize_right_subtree( + plan: &mut LogicalPlan, + right_column: &ColumnRef, + ctx: &mut NormalizationContext, +) -> Option { + if matches!(plan.operator, Operator::TableScan(_)) { + let (param, index_info) = { + let Operator::TableScan(scan_op) = &mut plan.operator else { + unreachable!(); + }; + let target_index = pick_parameterized_index_position(scan_op, right_column)?; + let param = ctx.alloc_runtime_param(); + scan_op.index_infos[target_index].lookup = Some(IndexLookup::Probe(param)); + (param, scan_op.index_infos[target_index].clone()) + }; + let sort_option = index_info.sort_option.clone(); + plan.physical_option = Some(PhysicalOption::new( + PlanImpl::IndexScan(Box::new(index_info)), + sort_option, + )); + return Some(param); + } + + let passthrough = matches!( + plan.operator, + Operator::Filter(_) + | Operator::Project(_) + | Operator::Limit(_) + | Operator::Sort(_) + | Operator::TopK(_) + ); + + if !passthrough { + return None; + } + + match plan.childrens.as_mut() { + Childrens::Only(child) => parameterize_right_subtree(child, right_column, ctx), + _ => None, + } +} + +fn pick_parameterized_index_position( + scan_op: &TableScanOperator, + right_column: &ColumnRef, +) -> Option { + let column_id = right_column.id()?; + let table_name = right_column.table_name()?; + + if &scan_op.table_name != table_name { + return None; + } + + scan_op + .index_infos + .iter() + .enumerate() + .filter(|(_, index_info)| { + index_info.meta.table_name == *table_name + && index_info.meta.column_ids.first().copied() == Some(column_id) + }) + .min_by_key(|(_, index_info)| index_priority(index_info.meta.ty)) + .map(|(position, _)| position) +} + +fn index_priority(index_type: IndexType) -> usize { + match index_type { + IndexType::PrimaryKey { .. } => 0, + IndexType::Unique => 1, + IndexType::Composite => 2, + IndexType::Normal => 3, + } +} + +fn schema_contains_column(schema: &Schema, column: &ColumnRef) -> bool { + schema.iter().any(|candidate| candidate.same_column(column)) +} diff --git a/src/optimizer/rule/normalization/pushdown_limit.rs b/src/optimizer/rule/normalization/pushdown_limit.rs index bbdb0c8d..16af362c 100644 --- a/src/optimizer/rule/normalization/pushdown_limit.rs +++ b/src/optimizer/rule/normalization/pushdown_limit.rs @@ -13,7 +13,7 @@ // limitations under the License. use crate::errors::DatabaseError; -use crate::optimizer::core::rule::NormalizationRule; +use crate::optimizer::core::rule::{NormalizationContext, NormalizationRule}; use crate::optimizer::plan_utils::{only_child_mut, replace_with_only_child, wrap_child_with}; use crate::planner::operator::join::JoinType; use crate::planner::operator::Operator; @@ -22,7 +22,11 @@ use crate::planner::LogicalPlan; pub struct LimitProjectTranspose; impl NormalizationRule for LimitProjectTranspose { - fn apply(&self, plan: &mut LogicalPlan) -> Result { + fn apply( + &self, + plan: &mut LogicalPlan, + _ctx: &mut NormalizationContext, + ) -> Result { let operator = std::mem::replace(&mut plan.operator, Operator::Dummy); let limit_op = match operator { @@ -63,7 +67,11 @@ impl NormalizationRule for LimitProjectTranspose { pub struct PushLimitThroughJoin; impl NormalizationRule for PushLimitThroughJoin { - fn apply(&self, plan: &mut LogicalPlan) -> Result { + fn apply( + &self, + plan: &mut LogicalPlan, + _ctx: &mut NormalizationContext, + ) -> Result { let limit_op = match &plan.operator { Operator::Limit(op) => op.clone(), _ => return Ok(false), @@ -93,7 +101,11 @@ impl NormalizationRule for PushLimitThroughJoin { pub struct PushLimitIntoScan; impl NormalizationRule for PushLimitIntoScan { - fn apply(&self, plan: &mut LogicalPlan) -> Result { + fn apply( + &self, + plan: &mut LogicalPlan, + _ctx: &mut NormalizationContext, + ) -> Result { let (offset, limit) = match &plan.operator { Operator::Limit(limit_op) => (limit_op.offset, limit_op.limit), _ => return Ok(false), diff --git a/src/optimizer/rule/normalization/pushdown_predicates.rs b/src/optimizer/rule/normalization/pushdown_predicates.rs index 2d552794..cdde59e3 100644 --- a/src/optimizer/rule/normalization/pushdown_predicates.rs +++ b/src/optimizer/rule/normalization/pushdown_predicates.rs @@ -17,7 +17,7 @@ use crate::errors::DatabaseError; use crate::expression::range_detacher::{Range, RangeDetacher}; use crate::expression::visitor_mut::{PositionShift, VisitorMut}; use crate::expression::{BinaryOperator, ScalarExpression}; -use crate::optimizer::core::rule::NormalizationRule; +use crate::optimizer::core::rule::{NormalizationContext, NormalizationRule}; use crate::optimizer::plan_utils::{ left_child, replace_with_only_child, right_child, wrap_child_with, }; @@ -25,7 +25,7 @@ use crate::planner::operator::filter::FilterOperator; use crate::planner::operator::join::{JoinCondition, JoinType}; use crate::planner::operator::{Operator, SortOption}; use crate::planner::{Childrens, LogicalPlan, SchemaOutput}; -use crate::types::index::{IndexInfo, IndexMetaRef, IndexType}; +use crate::types::index::{IndexInfo, IndexLookup, IndexMetaRef, IndexType}; use crate::types::value::DataValue; use crate::types::LogicalType; use itertools::Itertools; @@ -84,7 +84,11 @@ fn plan_output_columns(plan: &LogicalPlan) -> Vec { pub struct PushPredicateThroughJoin; impl NormalizationRule for PushPredicateThroughJoin { - fn apply(&self, plan: &mut LogicalPlan) -> Result { + fn apply( + &self, + plan: &mut LogicalPlan, + _ctx: &mut NormalizationContext, + ) -> Result { let mut applied = false; let parent_replacement = { @@ -197,7 +201,11 @@ impl NormalizationRule for PushPredicateThroughJoin { pub struct PushPredicateIntoScan; impl NormalizationRule for PushPredicateIntoScan { - fn apply(&self, plan: &mut LogicalPlan) -> Result { + fn apply( + &self, + plan: &mut LogicalPlan, + _ctx: &mut NormalizationContext, + ) -> Result { let LogicalPlan { operator, childrens, @@ -218,7 +226,7 @@ impl NormalizationRule for PushPredicateIntoScan { let mut changed = false; for IndexInfo { meta, - range, + lookup, covered_deserializers, cover_mapping, sort_option, @@ -226,7 +234,7 @@ impl NormalizationRule for PushPredicateIntoScan { stream_distinct_hint: _, } in &mut scan_op.index_infos { - if range.is_some() { + if lookup.is_some() { continue; } let SortOption::OrderBy { @@ -235,18 +243,20 @@ impl NormalizationRule for PushPredicateIntoScan { else { return Err(DatabaseError::InvalidIndex); }; - *range = match meta.ty { + *lookup = match meta.ty { IndexType::PrimaryKey { is_multiple: false } | IndexType::Unique | IndexType::Normal => { RangeDetacher::new(meta.table_name.as_ref(), &meta.column_ids[0]) .detach(&filter_op.predicate)? + .map(IndexLookup::Static) } IndexType::PrimaryKey { is_multiple: true } | IndexType::Composite => { Self::composite_range(filter_op, meta, ignore_prefix_len)? + .map(IndexLookup::Static) } }; - if range.is_none() { + if lookup.is_none() { continue; } changed = true; @@ -351,7 +361,11 @@ impl PushPredicateIntoScan { pub struct PushJoinPredicateIntoScan; impl NormalizationRule for PushJoinPredicateIntoScan { - fn apply(&self, plan: &mut LogicalPlan) -> Result { + fn apply( + &self, + plan: &mut LogicalPlan, + _ctx: &mut NormalizationContext, + ) -> Result { let (join_type, filter_expr) = { let Operator::Join(join_op) = &mut plan.operator else { return Ok(false); @@ -476,7 +490,7 @@ mod tests { use crate::planner::operator::{Operator, SortOption}; use crate::planner::{Childrens, LogicalPlan}; use crate::storage::rocksdb::RocksTransaction; - use crate::types::index::{IndexInfo, IndexMeta, IndexType}; + use crate::types::index::{IndexInfo, IndexLookup, IndexMeta, IndexType}; use crate::types::value::DataValue; use crate::types::LogicalType; use std::collections::{BTreeMap, Bound}; @@ -521,7 +535,10 @@ mod tests { max: Bound::Unbounded, }; - assert_eq!(op.index_infos[0].range, Some(mock_range)); + assert_eq!( + op.index_infos[0].lookup, + Some(IndexLookup::Static(mock_range)) + ); } else { unreachable!("Should be a filter operator") } @@ -599,7 +616,7 @@ mod tests { fields: vec![], ignore_prefix_len: 0, }, - range: None, + lookup: None, covered_deserializers: None, cover_mapping: None, sort_elimination_hint: None, @@ -611,7 +628,7 @@ mod tests { fields: vec![], ignore_prefix_len: 0, }, - range: None, + lookup: None, covered_deserializers: None, cover_mapping: None, sort_elimination_hint: None, diff --git a/src/optimizer/rule/normalization/simplification.rs b/src/optimizer/rule/normalization/simplification.rs index fe8ad75e..1a36b9ae 100644 --- a/src/optimizer/rule/normalization/simplification.rs +++ b/src/optimizer/rule/normalization/simplification.rs @@ -15,7 +15,7 @@ use crate::errors::DatabaseError; use crate::expression::simplify::{ConstantCalculator, Simplify}; use crate::expression::visitor_mut::VisitorMut; -use crate::optimizer::core::rule::NormalizationRule; +use crate::optimizer::core::rule::{NormalizationContext, NormalizationRule}; use crate::planner::operator::join::JoinCondition; use crate::planner::operator::Operator; use crate::planner::{Childrens, LogicalPlan}; @@ -79,7 +79,11 @@ impl ConstantCalculation { } impl NormalizationRule for ConstantCalculation { - fn apply(&self, plan: &mut LogicalPlan) -> Result { + fn apply( + &self, + plan: &mut LogicalPlan, + _ctx: &mut NormalizationContext, + ) -> Result { Self::_apply(plan)?; Ok(true) } @@ -103,7 +107,11 @@ fn has_aggregate_descendant(plan: &LogicalPlan) -> bool { } impl NormalizationRule for SimplifyFilter { - fn apply(&self, plan: &mut LogicalPlan) -> Result { + fn apply( + &self, + plan: &mut LogicalPlan, + _ctx: &mut NormalizationContext, + ) -> Result { if let Operator::Filter(filter_op) = &mut plan.operator { if filter_op.is_optimized { return Ok(false); diff --git a/src/optimizer/rule/normalization/top_k.rs b/src/optimizer/rule/normalization/top_k.rs index 193ec959..b0d16d77 100644 --- a/src/optimizer/rule/normalization/top_k.rs +++ b/src/optimizer/rule/normalization/top_k.rs @@ -13,7 +13,7 @@ // limitations under the License. use crate::errors::DatabaseError; -use crate::optimizer::core::rule::NormalizationRule; +use crate::optimizer::core::rule::{NormalizationContext, NormalizationRule}; use crate::optimizer::plan_utils::{only_child_mut, replace_with_only_child}; use crate::planner::operator::top_k::TopKOperator; use crate::planner::operator::Operator; @@ -22,7 +22,11 @@ use crate::planner::LogicalPlan; pub struct TopK; impl NormalizationRule for TopK { - fn apply(&self, plan: &mut LogicalPlan) -> Result { + fn apply( + &self, + plan: &mut LogicalPlan, + _ctx: &mut NormalizationContext, + ) -> Result { let (offset, limit) = match &plan.operator { Operator::Limit(op) => match op.limit { Some(limit) => (op.offset, limit), diff --git a/src/planner/operator/mark_apply.rs b/src/planner/operator/mark_apply.rs index 0b60452b..661a57b0 100644 --- a/src/planner/operator/mark_apply.rs +++ b/src/planner/operator/mark_apply.rs @@ -16,6 +16,7 @@ use super::Operator; use crate::catalog::ColumnRef; use crate::expression::ScalarExpression; use crate::planner::{Childrens, LogicalPlan}; +use crate::types::index::RuntimeParam; use kite_sql_serde_macros::ReferenceSerialization; use std::fmt; use std::fmt::Formatter; @@ -26,11 +27,32 @@ pub enum MarkApplyKind { In, } +#[derive(Debug, PartialEq, Eq, Clone, Hash, ReferenceSerialization)] +pub struct ParameterizedMarkProbe { + param: RuntimeParam, + left_expr: ScalarExpression, +} + +impl ParameterizedMarkProbe { + pub fn new(param: RuntimeParam, left_expr: ScalarExpression) -> Self { + Self { param, left_expr } + } + + pub fn param(&self) -> RuntimeParam { + self.param + } + + pub fn left_expr(&self) -> &ScalarExpression { + &self.left_expr + } +} + #[derive(Debug, PartialEq, Eq, Clone, Hash, ReferenceSerialization)] pub struct MarkApplyOperator { pub kind: MarkApplyKind, predicates: Vec, output_column: ColumnRef, + parameterized_probe: Option, } impl MarkApplyOperator { @@ -39,6 +61,7 @@ impl MarkApplyOperator { kind: MarkApplyKind::Exists, predicates, output_column, + parameterized_probe: None, } } @@ -62,6 +85,7 @@ impl MarkApplyOperator { kind: MarkApplyKind::In, predicates, output_column, + parameterized_probe: None, } } @@ -91,6 +115,14 @@ impl MarkApplyOperator { pub fn output_column(&self) -> &ColumnRef { &self.output_column } + + pub fn parameterized_probe(&self) -> Option<&ParameterizedMarkProbe> { + self.parameterized_probe.as_ref() + } + + pub fn set_parameterized_probe(&mut self, probe: Option) { + self.parameterized_probe = probe; + } } impl fmt::Display for MarkApplyOperator { diff --git a/src/planner/operator/table_scan.rs b/src/planner/operator/table_scan.rs index 2536421f..c31709a4 100644 --- a/src/planner/operator/table_scan.rs +++ b/src/planner/operator/table_scan.rs @@ -80,7 +80,7 @@ impl TableScanOperator { fields: sort_fields, ignore_prefix_len: 0, }, - range: None, + lookup: None, covered_deserializers: None, cover_mapping: None, sort_elimination_hint: None, diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 4e0d2a3b..b3c3736e 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -49,7 +49,6 @@ use std::path::Path; use std::sync::Arc; pub type KeyValueRef<'a> = (&'a [u8], &'a [u8]); -use std::vec::IntoIter; use ulid::Generator; pub(crate) type StatisticsMetaCache = SharedLruCache<(TableName, IndexId), Option>; @@ -159,18 +158,21 @@ pub trait Transaction: Sized { } #[allow(clippy::too_many_arguments)] - fn read_by_index<'a>( + fn read_by_index<'a, I>( &'a self, table_cache: &'a TableCache, table_name: TableName, (offset_option, limit_option): Bounds, mut columns: BTreeMap, index_meta: IndexMetaRef, - ranges: Vec, + ranges: I, with_pk: bool, covered_deserializers: Option>, cover_mapping_indices: Option>, - ) -> Result, DatabaseError> { + ) -> Result, DatabaseError> + where + I: IntoIterator, + { debug_assert!(columns.keys().all_unique()); let table = self .table(table_cache, table_name.clone())? @@ -1661,13 +1663,13 @@ impl<'a, T: Transaction + 'a> Iter for TupleIter<'a, T> { } } -pub struct IndexIter<'a, T: Transaction> { +pub struct IndexIter<'a, T: Transaction, I: IntoIterator> { offset: usize, limit: Option, params: IndexImplParams<'a, T>, inner: IndexImplEnum, // for buffering data - ranges: IntoIter, + ranges: I::IntoIter, state: IndexIterState<'a, T>, encode_min_buffer: Bytes, encode_max_buffer: Bytes, @@ -1679,7 +1681,7 @@ pub enum IndexIterState<'a, T: Transaction + 'a> { Over, } -impl<'a, T: Transaction + 'a> IndexIter<'a, T> { +impl<'a, T: Transaction + 'a, I: IntoIterator> IndexIter<'a, T, I> { fn offset_move(offset: &mut usize) -> bool { if *offset > 0 { offset.sub_assign(1); @@ -1698,7 +1700,7 @@ impl<'a, T: Transaction + 'a> IndexIter<'a, T> { } /// expression -> index value -> tuple -impl Iter for IndexIter<'_, T> { +impl> Iter for IndexIter<'_, T, I> { fn next_tuple_into(&mut self, tuple: &mut Tuple) -> Result { if matches!(self.limit, Some(0)) { self.state = IndexIterState::Over; @@ -2215,7 +2217,7 @@ mod test { transaction: &'a RocksTransaction<'a>, table_cache: &'a Arc, index_column_id: ColumnId, - ) -> Result>, DatabaseError> { + ) -> Result, Vec>, DatabaseError> { transaction.read_by_index( table_cache, "t1".to_string().into(), diff --git a/src/storage/rocksdb.rs b/src/storage/rocksdb.rs index 9d5f9fb4..a5809367 100644 --- a/src/storage/rocksdb.rs +++ b/src/storage/rocksdb.rs @@ -868,7 +868,7 @@ mod test { .columns() .map(|column| column.datatype().serializable()) .collect_vec(); - let mut iter = IndexIter { + let mut iter: IndexIter<'_, _, Vec> = IndexIter { offset: 0, limit: None, params: IndexImplParams { diff --git a/src/types/index.rs b/src/types/index.rs index cc27f33e..bd6eca53 100644 --- a/src/types/index.rs +++ b/src/types/index.rs @@ -21,12 +21,14 @@ use crate::types::serialize::TupleValueSerializableImpl; use crate::types::value::DataValue; use crate::types::{ColumnId, LogicalType}; use kite_sql_serde_macros::ReferenceSerialization; +use std::collections::Bound; use std::fmt; use std::fmt::Formatter; use std::sync::Arc; pub type IndexId = u32; pub type IndexMetaRef = Arc; +pub type RuntimeParam = usize; pub const INDEX_ID_LEN: usize = 4; @@ -38,11 +40,26 @@ pub enum IndexType { Composite, } +#[derive(Debug, Clone, Eq, PartialEq, Hash, ReferenceSerialization)] +pub enum RuntimeIndexProbe { + Eq(DataValue), + Scope { + min: Bound, + max: Bound, + }, +} + +#[derive(Debug, Clone, Eq, PartialEq, Hash, ReferenceSerialization)] +pub enum IndexLookup { + Static(Range), + Probe(RuntimeParam), +} + #[derive(Debug, Clone, Eq, PartialEq, Hash, ReferenceSerialization)] pub struct IndexInfo { pub(crate) meta: IndexMetaRef, pub(crate) sort_option: SortOption, - pub(crate) range: Option, + pub(crate) lookup: Option, pub(crate) covered_deserializers: Option>, pub(crate) cover_mapping: Option>, pub(crate) sort_elimination_hint: Option, @@ -100,8 +117,11 @@ impl fmt::Display for IndexInfo { write!(f, "{}", self.meta)?; write!(f, " => ")?; - if let Some(range) = &self.range { - write!(f, "{range}")?; + if let Some(lookup) = &self.lookup { + match lookup { + IndexLookup::Static(range) => write!(f, "{range}")?, + IndexLookup::Probe(param) => write!(f, "Probe ${param}")?, + } } else { write!(f, "EMPTY")?; } diff --git a/tests/slt/subquery.slt b/tests/slt/subquery.slt index 40d8057b..f73e8953 100644 --- a/tests/slt/subquery.slt +++ b/tests/slt/subquery.slt @@ -75,6 +75,18 @@ select * from t1 where a not in (select 1) and b = 3 ---- 2 3 3 +query I rowsort +select id from t1 where a in (select 1) or b = 3; +---- +0 +2 +3 + +query I rowsort +select id from t1 where not (a in (select 1) or b = 3); +---- +1 + statement ok drop table t1; @@ -132,6 +144,75 @@ drop table in_corr_null_outer; statement ok drop table in_corr_null_inner; +statement ok +create table in_param_null_outer(id int primary key, a int); + +statement ok +create table in_param_null_inner(id int primary key, v int); + +statement ok +create index in_param_null_inner_v_index on in_param_null_inner(v); + +statement ok +insert into in_param_null_outer values (0, null), (1, 1), (2, 2); + +statement ok +insert into in_param_null_inner values (0, 2), (1, null); + +query I rowsort +select id from in_param_null_outer where a in (select v from in_param_null_inner); +---- +2 + +query I rowsort +select id from in_param_null_outer where a not in (select v from in_param_null_inner); +---- + +statement ok +drop table in_param_null_outer; + +statement ok +drop table in_param_null_inner; + +statement ok +create table exists_param_outer(id int primary key, a int); + +statement ok +create table exists_param_inner(id int primary key, v int); + +statement ok +create index exists_param_inner_v_index on exists_param_inner(v); + +statement ok +insert into exists_param_outer values (0, null), (1, 1), (2, 2), (3, 3); + +statement ok +insert into exists_param_inner values (0, 2), (1, null); + +query I rowsort +select id from exists_param_outer +where exists ( + select 1 from exists_param_inner where exists_param_inner.v = exists_param_outer.a +); +---- +2 + +query I rowsort +select id from exists_param_outer +where not exists ( + select 1 from exists_param_inner where exists_param_inner.v = exists_param_outer.a +); +---- +0 +1 +3 + +statement ok +drop table exists_param_outer; + +statement ok +drop table exists_param_inner; + # https://github.com/KipData/KiteSQL/issues/169 statement ok create table t2(id int primary key, a int not null, b int not null); @@ -190,6 +271,17 @@ insert into users values (1, 18), (2, 30), (3, 40); statement ok insert into orders values (1, 1, 100), (2, 1, 200), (3, 2, 300); +statement ok +create index orders_user_id_index on orders(user_id); + +query T +explain select id from users +where exists ( + select 1 from orders where orders.user_id = users.id +); +---- +Projection [users.id] [Project => (Sort Option: Follow)] Filter _temp_table_0_.true, Is Having: false [Filter => (Sort Option: Follow)] MarkExistsApply TableScan users -> [id] [SeqScan => (Sort Option: None)] TableScan orders -> [user_id] [IndexScan By orders_user_id_index => Probe $0 => (Sort Option: OrderBy: (orders.user_id Asc Nulls Last) ignore_prefix_len: 0)] + query I rowsort select id from users where exists ( @@ -341,6 +433,49 @@ drop table scalar_outer_err; statement ok drop table scalar_inner_err; +statement ok +create table exists_extra_outer(id int primary key, a int, b int); + +statement ok +create table exists_extra_inner(id int primary key, v int, flag int); + +statement ok +create index exists_extra_inner_v_index on exists_extra_inner(v); + +statement ok +insert into exists_extra_outer values (0, 1, 1), (1, 1, 2), (2, 2, null), (3, 3, 1); + +statement ok +insert into exists_extra_inner values (0, 1, 1), (1, 1, null), (2, 2, 1); + +query I rowsort +select id from exists_extra_outer +where exists ( + select 1 from exists_extra_inner + where exists_extra_inner.v = exists_extra_outer.a + and exists_extra_inner.flag = exists_extra_outer.b +); +---- +0 + +query I rowsort +select id from exists_extra_outer +where not exists ( + select 1 from exists_extra_inner + where exists_extra_inner.v = exists_extra_outer.a + and exists_extra_inner.flag = exists_extra_outer.b +); +---- +1 +2 +3 + +statement ok +drop table exists_extra_outer; + +statement ok +drop table exists_extra_inner; + statement error select count(*) from users where exists ( From bacb318a892f7fc8ad078d5c423bf9b1694d42d2 Mon Sep 17 00:00:00 2001 From: kould Date: Sat, 4 Apr 2026 22:31:01 +0800 Subject: [PATCH 07/10] refactor: simplify parameterized index scan runtime probe flow --- src/db.rs | 30 +-- src/execution/dql/index_scan.rs | 44 +--- src/execution/dql/mark_apply.rs | 193 ++++++++++-------- src/execution/mod.rs | 30 ++- src/optimizer/core/rule.rs | 34 +-- src/optimizer/heuristic/optimizer.rs | 59 ++---- .../rule/normalization/agg_elimination.rs | 39 ++-- .../rule/normalization/column_pruning.rs | 8 +- .../rule/normalization/combine_operators.rs | 26 +-- .../normalization/compilation_in_advance.rs | 8 +- .../rule/normalization/min_max_top_k.rs | 14 +- src/optimizer/rule/normalization/mod.rs | 42 ++-- .../rule/normalization/parameterized_index.rs | 39 ++-- .../rule/normalization/pushdown_limit.rs | 20 +- .../rule/normalization/pushdown_predicates.rs | 20 +- .../rule/normalization/simplification.rs | 14 +- src/optimizer/rule/normalization/top_k.rs | 8 +- src/planner/operator/mark_apply.rs | 27 +-- src/storage/mod.rs | 193 ++++++++++++------ src/storage/rocksdb.rs | 11 +- src/types/index.rs | 5 +- 21 files changed, 369 insertions(+), 495 deletions(-) diff --git a/src/db.rs b/src/db.rs index fa7bdfb0..778f7d78 100644 --- a/src/db.rs +++ b/src/db.rs @@ -463,12 +463,12 @@ impl State { &self.view_cache } - fn build_plan_and_runtime_params>( + fn build_plan>( &self, stmt: &Statement, params: A, transaction: &::TransactionType<'_>, - ) -> Result<(LogicalPlan, usize), DatabaseError> { + ) -> Result { let mut binder = Binder::new( BinderContext::new( self.table_cache(), @@ -491,7 +491,6 @@ impl State { let source_plan = binder.bind(stmt)?; let mut optimizer = self.optimizer_pipeline.instantiate(source_plan); optimizer.optimize(Some(&transaction.meta_loader(self.meta_cache())))?; - let runtime_param_count = optimizer.runtime_param_count(); let mut best_plan = optimizer.into_plan(); if let Operator::Analyze(op) = &mut best_plan.operator { @@ -500,7 +499,7 @@ impl State { } } - Ok((best_plan, runtime_param_count)) + Ok(best_plan) } fn execute<'a, 'txn, A: AsRef<[(&'static str, DataValue)]>>( @@ -512,11 +511,9 @@ impl State { where S: 'txn, { - let (mut plan, runtime_param_count) = - self.build_plan_and_runtime_params(stmt, params, transaction)?; + let mut plan = self.build_plan(stmt, params, transaction)?; let schema = plan.output_schema().clone(); let mut arena = ExecArena::default(); - arena.init_runtime_params(runtime_param_count); let root = build_write( &mut arena, plan, @@ -1165,10 +1162,7 @@ pub(crate) mod test { None, ); let source_plan = binder.bind(&stmt)?; - let (best_plan, _) = - kite_sql - .state - .build_plan_and_runtime_params(&stmt, [], &transaction)?; + let best_plan = kite_sql.state.build_plan(&stmt, [], &transaction)?; let join_plan = match source_plan.operator { Operator::Project(_) => source_plan.childrens.pop_only(), @@ -1252,10 +1246,7 @@ pub(crate) mod test { None, ); let source_plan = binder.bind(&stmt)?; - let (best_plan, _) = - kite_sql - .state - .build_plan_and_runtime_params(&stmt, [], &transaction)?; + let best_plan = kite_sql.state.build_plan(&stmt, [], &transaction)?; let join_plan = match source_plan.operator { Operator::Project(_) => source_plan.childrens.pop_only(), @@ -1353,10 +1344,7 @@ pub(crate) mod test { "SELECT o.x, t.y FROM onecolumn o INNER JOIN twocolumn t ON (o.x=t.x AND t.y=53)", )?; let transaction = kite_sql.storage.transaction()?; - let (best_plan, _) = - kite_sql - .state - .build_plan_and_runtime_params(&stmt, [], &transaction)?; + let best_plan = kite_sql.state.build_plan(&stmt, [], &transaction)?; let join_plan = match best_plan.operator { Operator::Project(_) => best_plan.childrens.pop_only(), Operator::Join(_) => best_plan, @@ -1556,7 +1544,7 @@ pub(crate) mod test { "unexpected explain plan: {explain_plan}" ); assert!( - explain_plan.contains(&format!("IndexScan By {index_name} => Probe $0")), + explain_plan.contains(&format!("IndexScan By {index_name} => Probe")), "unexpected explain plan: {explain_plan}" ); Ok(()) @@ -1727,7 +1715,7 @@ pub(crate) mod test { "unexpected explain plan: {explain_plan}" ); assert!( - explain_plan.contains("IndexScan By exists_inner_v_index => Probe $0"), + explain_plan.contains("IndexScan By exists_inner_v_index => Probe"), "unexpected explain plan: {explain_plan}" ); Ok(()) diff --git a/src/execution/dql/index_scan.rs b/src/execution/dql/index_scan.rs index 5955be17..87adc236 100644 --- a/src/execution/dql/index_scan.rs +++ b/src/execution/dql/index_scan.rs @@ -16,27 +16,9 @@ use crate::errors::DatabaseError; use crate::execution::{ExecArena, ExecId, ExecNode, ExecutionCaches, ExecutorNode, ReadExecutor}; use crate::expression::range_detacher::Range; use crate::planner::operator::table_scan::TableScanOperator; -use crate::storage::{IndexIter, Iter, Transaction}; +use crate::storage::{IndexIter, IndexRanges, Iter, Transaction}; use crate::types::index::{IndexLookup, IndexMetaRef, RuntimeIndexProbe}; use crate::types::serialize::TupleValueSerializableImpl; -use std::array; -use std::vec; - -enum IndexLookupRanges { - One(array::IntoIter), - Many(vec::IntoIter), -} - -impl Iterator for IndexLookupRanges { - type Item = Range; - - fn next(&mut self) -> Option { - match self { - IndexLookupRanges::One(iter) => iter.next(), - IndexLookupRanges::Many(iter) => iter.next(), - } - } -} pub(crate) struct IndexScan<'a, T: Transaction + 'a> { op: Option, @@ -44,7 +26,7 @@ pub(crate) struct IndexScan<'a, T: Transaction + 'a> { lookup: Option, covered_deserializers: Option>, cover_mapping: Option>, - iter: Option>, + iter: Option>, } impl<'a, T: Transaction + 'a> @@ -111,23 +93,13 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for IndexScan<'a, T> { } impl<'a, T: Transaction + 'a> IndexScan<'a, T> { - fn ranges_from_lookup(lookup: IndexLookup, arena: &ExecArena<'a, T>) -> IndexLookupRanges { + fn ranges_from_lookup(lookup: IndexLookup, arena: &mut ExecArena<'a, T>) -> IndexRanges { match lookup { - IndexLookup::Static(Range::SortedRanges(ranges)) => { - IndexLookupRanges::Many(ranges.into_iter()) - } - IndexLookup::Static(range) => IndexLookupRanges::One([range].into_iter()), - IndexLookup::Probe(param) => match arena.runtime_param(param) { - RuntimeIndexProbe::Eq(value) => { - IndexLookupRanges::One([Range::Eq(value.clone())].into_iter()) - } - RuntimeIndexProbe::Scope { min, max } => IndexLookupRanges::One( - [Range::Scope { - min: min.clone(), - max: max.clone(), - }] - .into_iter(), - ), + IndexLookup::Static(Range::SortedRanges(ranges)) => ranges.into(), + IndexLookup::Static(range) => range.into(), + IndexLookup::Probe => match arena.pop_runtime_probe() { + RuntimeIndexProbe::Eq(value) => Range::Eq(value).into(), + RuntimeIndexProbe::Scope { min, max } => Range::Scope { min, max }.into(), }, } } diff --git a/src/execution/dql/mark_apply.rs b/src/execution/dql/mark_apply.rs index 28e1f4d9..2e127938 100644 --- a/src/execution/dql/mark_apply.rs +++ b/src/execution/dql/mark_apply.rs @@ -84,41 +84,54 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for MarkApply { } impl MarkApply { - fn build_right_input<'a, T: Transaction + 'a>( + fn runtime_probe_for(&self, param_value: Option) -> Option { + self.op.parameterized_probe()?; + + match param_value { + Some(value) => Some(RuntimeIndexProbe::Eq(value)), + None if matches!(self.op.kind, MarkApplyKind::In) => Some(RuntimeIndexProbe::Scope { + min: std::collections::Bound::Unbounded, + max: std::collections::Bound::Unbounded, + }), + None => None, + } + } + + fn with_right_input<'a, T: Transaction + 'a, R>( &self, arena: &mut ExecArena<'a, T>, param_value: Option, - ) -> ExecId { - if let Some(probe) = self.op.parameterized_probe() { - let runtime_probe = match param_value { - Some(value) => Some(RuntimeIndexProbe::Eq(value)), - None if matches!(self.op.kind, MarkApplyKind::In) => { - Some(RuntimeIndexProbe::Scope { - min: std::collections::Bound::Unbounded, - max: std::collections::Bound::Unbounded, - }) - } - None => None, - }; - if let Some(runtime_probe) = runtime_probe { - arena.set_runtime_param(probe.param(), runtime_probe); - } + f: impl FnOnce(&mut ExecArena<'a, T>, ExecId) -> Result, + ) -> Result { + let runtime_probe = self.runtime_probe_for(param_value); + let depth_before = arena.runtime_probe_depth(); + if let Some(runtime_probe) = runtime_probe { + arena.push_runtime_probe(runtime_probe); } let cache = (arena.table_cache(), arena.view_cache(), arena.meta_cache()); let transaction = arena.transaction_mut() as *mut T; - // Fixme: Executor reset - build_read(arena, self.right_input_plan.clone(), cache, transaction) + let result = { + let right_input = build_read(arena, self.right_input_plan.clone(), cache, transaction); + f(arena, right_input) + }; + + let depth_after = arena.runtime_probe_depth(); + debug_assert!( + depth_after == depth_before || depth_after == depth_before + 1, + "parameterized right input should consume at most one runtime probe" + ); + if depth_after > depth_before { + let _ = arena.pop_runtime_probe(); + } + + result } fn parameterized_probe_value(&self) -> Result, DatabaseError> { self.op .parameterized_probe() - .map(|probe| { - probe - .left_expr() - .eval(Some((&self.left_tuple, self.left_schema.as_ref()))) - }) + .map(|probe| probe.eval(Some((&self.left_tuple, self.left_schema.as_ref())))) .transpose() } @@ -127,62 +140,83 @@ impl MarkApply { arena: &mut ExecArena<'a, T>, ) -> Result { match self.op.kind { - MarkApplyKind::Exists => { - let right_input = self.build_right_input(arena, self.parameterized_probe_value()?); - - while arena.next_tuple(right_input)? { - let right_tuple = arena.result_tuple(); - if self.exists_predicate_matched(&self.left_tuple, right_tuple)? { - return Ok(DataValue::Boolean(true)); + MarkApplyKind::Exists => self.with_right_input( + arena, + self.parameterized_probe_value()?, + |arena, right_input| { + while arena.next_tuple(right_input)? { + let right_tuple = arena.result_tuple(); + if self.exists_predicate_matched(&self.left_tuple, right_tuple)? { + return Ok(DataValue::Boolean(true)); + } } - } - Ok(DataValue::Boolean(false)) - } + Ok(DataValue::Boolean(false)) + }, + ), MarkApplyKind::In => { if let Some(probe_value) = self.parameterized_probe_value()? { if !probe_value.is_null() { - let right_input = self.build_right_input(arena, Some(probe_value)); - while arena.next_tuple(right_input)? { - let right_tuple = arena.result_tuple(); - if self.in_predicate_outcome(&self.left_tuple, right_tuple)? - == InPredicateOutcome::Match - { - return Ok(DataValue::Boolean(true)); - } + if self.with_right_input( + arena, + Some(probe_value), + |arena, right_input| { + while arena.next_tuple(right_input)? { + let right_tuple = arena.result_tuple(); + if self.in_predicate_outcome(&self.left_tuple, right_tuple)? + == InPredicateOutcome::Match + { + return Ok(true); + } + } + + Ok(false) + }, + )? { + return Ok(DataValue::Boolean(true)); } - let right_input = self.build_right_input(arena, Some(DataValue::Null)); - while arena.next_tuple(right_input)? { - let right_tuple = arena.result_tuple(); - if self.in_predicate_outcome(&self.left_tuple, right_tuple)? - == InPredicateOutcome::Null - { - return Ok(DataValue::Null); - } + if self.with_right_input( + arena, + Some(DataValue::Null), + |arena, right_input| { + while arena.next_tuple(right_input)? { + let right_tuple = arena.result_tuple(); + if self.in_predicate_outcome(&self.left_tuple, right_tuple)? + == InPredicateOutcome::Null + { + return Ok(true); + } + } + + Ok(false) + }, + )? { + return Ok(DataValue::Null); } return Ok(DataValue::Boolean(false)); } } - let right_input = self.build_right_input(arena, None); - let mut saw_null = false; + self.with_right_input(arena, None, |arena, right_input| { + let mut saw_null = false; - while arena.next_tuple(right_input)? { - let right_tuple = arena.result_tuple(); - match self.in_predicate_outcome(&self.left_tuple, right_tuple)? { - InPredicateOutcome::Match => return Ok(DataValue::Boolean(true)), - InPredicateOutcome::Null => saw_null = true, - InPredicateOutcome::Continue => {} + while arena.next_tuple(right_input)? { + let right_tuple = arena.result_tuple(); + match self.in_predicate_outcome(&self.left_tuple, right_tuple)? { + InPredicateOutcome::Match => return Ok(DataValue::Boolean(true)), + InPredicateOutcome::Null => saw_null = true, + InPredicateOutcome::Continue => {} + } } - } - if saw_null { - Ok(DataValue::Null) - } else { - Ok(DataValue::Boolean(false)) - } + if saw_null { + Ok(DataValue::Null) + } else { + Ok(DataValue::Boolean(false)) + } + }) } } } @@ -251,7 +285,6 @@ mod tests { use crate::catalog::{ColumnCatalog, ColumnDesc, ColumnRef}; use crate::execution::{execute_input, try_collect, ExecArena}; use crate::expression::{BinaryOperator, ScalarExpression}; - use crate::planner::operator::mark_apply::ParameterizedMarkProbe; use crate::planner::operator::values::ValuesOperator; use crate::planner::operator::Operator; use crate::planner::{Childrens, LogicalPlan}; @@ -458,10 +491,7 @@ mod tests { build_marker_column(), vec![probe_predicate, flag_predicate], ); - op.set_parameterized_probe(Some(ParameterizedMarkProbe::new( - 0, - ScalarExpression::column_expr(left_value_column, 0), - ))); + op.set_parameterized_probe(Some(ScalarExpression::column_expr(left_value_column, 0))); let left_schema = left.output_schema().clone(); let predicate_schema = Arc::new( @@ -475,7 +505,6 @@ mod tests { let mut transaction = storage.transaction()?; let mut arena = ExecArena::default(); arena.init_context((&table_cache, &view_cache, &meta_cache), &mut transaction); - arena.init_runtime_params(1); let mut exec = MarkApply { op, @@ -488,8 +517,8 @@ mod tests { assert_eq!(exec.mark_value(&mut arena)?, DataValue::Boolean(true)); assert_eq!( - arena.runtime_param(0), - &RuntimeIndexProbe::Eq(DataValue::Int32(2)) + exec.runtime_probe_for(Some(DataValue::Int32(2))), + Some(RuntimeIndexProbe::Eq(DataValue::Int32(2))) ); Ok(()) @@ -507,10 +536,7 @@ mod tests { let predicate = build_equality_predicate(left_value_column.clone(), 0, right_value_column, 1)?; let mut op = MarkApplyOperator::new_in(build_marker_column(), vec![predicate]); - op.set_parameterized_probe(Some(ParameterizedMarkProbe::new( - 0, - ScalarExpression::column_expr(left_value_column, 0), - ))); + op.set_parameterized_probe(Some(ScalarExpression::column_expr(left_value_column, 0))); let left_schema = left.output_schema().clone(); let predicate_schema = Arc::new( @@ -524,7 +550,6 @@ mod tests { let mut transaction = storage.transaction()?; let mut arena = ExecArena::default(); arena.init_context((&table_cache, &view_cache, &meta_cache), &mut transaction); - arena.init_runtime_params(1); let mut exec = MarkApply { op, @@ -537,8 +562,8 @@ mod tests { assert_eq!(exec.mark_value(&mut arena)?, DataValue::Boolean(true)); assert_eq!( - arena.runtime_param(0), - &RuntimeIndexProbe::Eq(DataValue::Int32(2)) + exec.runtime_probe_for(Some(DataValue::Int32(2))), + Some(RuntimeIndexProbe::Eq(DataValue::Int32(2))) ); Ok(()) @@ -556,10 +581,7 @@ mod tests { let predicate = build_equality_predicate(left_value_column.clone(), 0, right_value_column, 1)?; let mut op = MarkApplyOperator::new_in(build_marker_column(), vec![predicate]); - op.set_parameterized_probe(Some(ParameterizedMarkProbe::new( - 0, - ScalarExpression::column_expr(left_value_column, 0), - ))); + op.set_parameterized_probe(Some(ScalarExpression::column_expr(left_value_column, 0))); let left_schema = left.output_schema().clone(); let predicate_schema = Arc::new( @@ -573,7 +595,6 @@ mod tests { let mut transaction = storage.transaction()?; let mut arena = ExecArena::default(); arena.init_context((&table_cache, &view_cache, &meta_cache), &mut transaction); - arena.init_runtime_params(1); let mut exec = MarkApply { op, @@ -586,11 +607,11 @@ mod tests { assert_eq!(exec.mark_value(&mut arena)?, DataValue::Null); assert_eq!( - arena.runtime_param(0), - &RuntimeIndexProbe::Scope { + exec.runtime_probe_for(None), + Some(RuntimeIndexProbe::Scope { min: std::collections::Bound::Unbounded, max: std::collections::Bound::Unbounded, - } + }) ); Ok(()) diff --git a/src/execution/mod.rs b/src/execution/mod.rs index b0669661..ff08ec31 100644 --- a/src/execution/mod.rs +++ b/src/execution/mod.rs @@ -63,7 +63,6 @@ use crate::planner::LogicalPlan; use crate::storage::{StatisticsMetaCache, TableCache, Transaction, ViewCache}; use crate::types::index::RuntimeIndexProbe; use crate::types::tuple::Tuple; -use crate::types::value::DataValue; pub(crate) type ExecutionCaches<'a> = (&'a TableCache, &'a ViewCache, &'a StatisticsMetaCache); pub(crate) type ExecId = usize; @@ -267,7 +266,7 @@ pub(crate) struct ExecArena<'a, T: Transaction + 'a> { result: ExecResult, cache: Option>, transaction: *mut T, - runtime_params: Vec, + runtime_probe_stack: Vec, } impl<'a, T: Transaction + 'a> Default for ExecArena<'a, T> { @@ -277,7 +276,7 @@ impl<'a, T: Transaction + 'a> Default for ExecArena<'a, T> { result: ExecResult::default(), cache: None, transaction: std::ptr::null_mut(), - runtime_params: Vec::new(), + runtime_probe_stack: Vec::new(), } } } @@ -295,11 +294,6 @@ impl<'a, T: Transaction + 'a> ExecArena<'a, T> { } } - pub(crate) fn init_runtime_params(&mut self, count: usize) { - debug_assert!(self.runtime_params.is_empty() || self.runtime_params.len() == count); - self.runtime_params = vec![RuntimeIndexProbe::Eq(DataValue::Null); count]; - } - pub(crate) fn push(&mut self, node: ExecNode<'a, T>) -> ExecId { let id = self.nodes.len(); self.nodes.push(node); @@ -326,18 +320,18 @@ impl<'a, T: Transaction + 'a> ExecArena<'a, T> { unsafe { &mut *self.transaction } } - pub(crate) fn set_runtime_param(&mut self, param: usize, value: RuntimeIndexProbe) { - debug_assert!(param < self.runtime_params.len()); - *self - .runtime_params - .get_mut(param) - .expect("runtime parameter slot initialized") = value; + pub(crate) fn push_runtime_probe(&mut self, value: RuntimeIndexProbe) { + self.runtime_probe_stack.push(value); + } + + pub(crate) fn pop_runtime_probe(&mut self) -> RuntimeIndexProbe { + self.runtime_probe_stack + .pop() + .expect("runtime probe scope initialized") } - pub(crate) fn runtime_param(&self, param: usize) -> &RuntimeIndexProbe { - self.runtime_params - .get(param) - .expect("runtime parameter initialized") + pub(crate) fn runtime_probe_depth(&self) -> usize { + self.runtime_probe_stack.len() } #[inline] diff --git a/src/optimizer/core/rule.rs b/src/optimizer/core/rule.rs index 3f75b708..303cb578 100644 --- a/src/optimizer/core/rule.rs +++ b/src/optimizer/core/rule.rs @@ -27,41 +27,9 @@ pub trait MatchPattern { fn pattern(&self) -> &Pattern; } -pub struct NormalizationContext { - runtime_param_count: usize, -} - -impl NormalizationContext { - pub fn new() -> Self { - Self { - runtime_param_count: 0, - } - } - - pub fn alloc_runtime_param(&mut self) -> usize { - let param = self.runtime_param_count; - self.runtime_param_count += 1; - param - } - - pub fn runtime_param_count(&self) -> usize { - self.runtime_param_count - } -} - -impl Default for NormalizationContext { - fn default() -> Self { - Self::new() - } -} - pub trait NormalizationRule { /// Returns true when the plan tree is modified. - fn apply( - &self, - plan: &mut LogicalPlan, - ctx: &mut NormalizationContext, - ) -> Result; + fn apply(&self, plan: &mut LogicalPlan) -> Result; } fn compare_costs(candidate_cost: Option, best_cost: Option) -> Ordering { diff --git a/src/optimizer/heuristic/optimizer.rs b/src/optimizer/heuristic/optimizer.rs index 825b22ed..14a8d1d9 100644 --- a/src/optimizer/heuristic/optimizer.rs +++ b/src/optimizer/heuristic/optimizer.rs @@ -14,7 +14,7 @@ use crate::errors::DatabaseError; use crate::optimizer::core::rule::{ - BestPhysicalOption, ImplementationRule, MatchPattern, NormalizationContext, NormalizationRule, + BestPhysicalOption, ImplementationRule, MatchPattern, NormalizationRule, }; use crate::optimizer::core::statistics_meta::StatisticMetaLoader; use crate::optimizer::heuristic::batch::{ @@ -40,7 +40,6 @@ pub struct HepOptimizer<'a> { after_batches: &'a [HepBatch], implementation_index: &'a ImplementationRuleIndex, plan: LogicalPlan, - runtime_param_count: usize, } impl<'a> HepOptimizer<'a> { @@ -55,7 +54,6 @@ impl<'a> HepOptimizer<'a> { after_batches, implementation_index, plan, - runtime_param_count: 0, } } @@ -63,9 +61,7 @@ impl<'a> HepOptimizer<'a> { &mut self, loader: Option<&StatisticMetaLoader<'_, T>>, ) -> Result<(), DatabaseError> { - self.runtime_param_count = 0; - let mut ctx = NormalizationContext::new(); - Self::apply_batches(&mut self.plan, self.before_batches, &mut ctx)?; + Self::apply_batches(&mut self.plan, self.before_batches)?; if let Some(loader) = loader { if self.implementation_index.is_empty().not() { @@ -77,20 +73,14 @@ impl<'a> HepOptimizer<'a> { self.implementation_index, &apply_no_sort_hints, &apply_no_stream_distinct_hints, - &mut ctx, )?; } } - Self::apply_batches(&mut self.plan, self.after_batches, &mut ctx)?; - self.runtime_param_count = ctx.runtime_param_count(); + Self::apply_batches(&mut self.plan, self.after_batches)?; Ok(()) } - pub fn runtime_param_count(&self) -> usize { - self.runtime_param_count - } - pub fn into_plan(self) -> LogicalPlan { self.plan } @@ -105,43 +95,35 @@ impl<'a> HepOptimizer<'a> { } #[inline] - fn apply_batches( - plan: &mut LogicalPlan, - batches: &[HepBatch], - ctx: &mut NormalizationContext, - ) -> Result<(), DatabaseError> { + fn apply_batches(plan: &mut LogicalPlan, batches: &[HepBatch]) -> Result<(), DatabaseError> { for batch in batches { match batch.strategy { HepBatchStrategy::MaxTimes(max_iteration) => { for _ in 0..max_iteration { - if !Self::apply_batch(plan, batch, ctx)? { + if !Self::apply_batch(plan, batch)? { break; } } } - HepBatchStrategy::LoopIfApplied => while Self::apply_batch(plan, batch, ctx)? {}, + HepBatchStrategy::LoopIfApplied => while Self::apply_batch(plan, batch)? {}, } } Ok(()) } #[inline] - fn apply_batch( - plan: &mut LogicalPlan, - batch: &HepBatch, - ctx: &mut NormalizationContext, - ) -> Result { + fn apply_batch(plan: &mut LogicalPlan, batch: &HepBatch) -> Result { let mut applied = false; for step in &batch.steps { match step { HepBatchStep::WholeTree(pass) => { - if Self::apply_whole_tree_pass(plan, pass, ctx)? { + if Self::apply_whole_tree_pass(plan, pass)? { plan.reset_output_schema_cache_recursive(); applied = true; } } HepBatchStep::LocalRewrite(rules) => { - if Self::apply_local_rules(plan, rules, ctx)? { + if Self::apply_local_rules(plan, rules)? { applied = true; } } @@ -153,13 +135,12 @@ impl<'a> HepOptimizer<'a> { fn apply_whole_tree_pass( plan: &mut LogicalPlan, pass: &HepWholeTreePass, - ctx: &mut NormalizationContext, ) -> Result { match pass.kind { WholeTreePassKind::ColumnPruning => { let mut applied = false; for rule in &pass.rules { - applied |= rule.apply(plan, ctx)?; + applied |= rule.apply(plan)?; } Ok(applied) } @@ -239,7 +220,6 @@ impl<'a> HepOptimizer<'a> { implementation_index: &ImplementationRuleIndex, inherited_sort_hints: &'plan ScanHintApplier<'plan>, inherited_stream_distinct_hints: &'plan ScanHintApplier<'plan>, - ctx: &mut NormalizationContext, ) -> Result<(), DatabaseError> { if let Operator::TableScan(scan_op) = &mut plan.operator { inherited_sort_hints(scan_op); @@ -282,7 +262,6 @@ impl<'a> HepOptimizer<'a> { implementation_index, child_sort_hints, child_stream_distinct_hints, - ctx, ), Childrens::Twins { left, right } => { Self::annotate_hints_and_physical_options( @@ -291,7 +270,6 @@ impl<'a> HepOptimizer<'a> { implementation_index, child_sort_hints, child_stream_distinct_hints, - ctx, )?; Self::annotate_hints_and_physical_options( right, @@ -299,7 +277,6 @@ impl<'a> HepOptimizer<'a> { implementation_index, child_sort_hints, child_stream_distinct_hints, - ctx, ) } Childrens::None => Ok(()), @@ -308,7 +285,7 @@ impl<'a> HepOptimizer<'a> { })?; } - apply_annotated_post_rules(plan, ctx)?; + apply_annotated_post_rules(plan)?; Ok(()) } @@ -385,17 +362,15 @@ impl<'a> HepOptimizer<'a> { fn apply_local_rules( plan: &mut LogicalPlan, rules: &HepLocalRewriteBatch, - ctx: &mut NormalizationContext, ) -> Result { let mut applied_rules = vec![false; rules.len()]; - Self::apply_local_rules_inner(plan, rules, &mut applied_rules, ctx) + Self::apply_local_rules_inner(plan, rules, &mut applied_rules) } fn apply_local_rules_inner( plan: &mut LogicalPlan, rules: &HepLocalRewriteBatch, applied_rules: &mut [bool], - ctx: &mut NormalizationContext, ) -> Result { let mut applied = false; let mut next_rule_idx = 0; @@ -406,7 +381,7 @@ impl<'a> HepOptimizer<'a> { if applied_rules[idx] { continue; } - let applied_rule = rule.apply(plan, ctx)?; + let applied_rule = rule.apply(plan)?; if applied_rule { plan.reset_output_schema_cache_recursive(); applied_rules[idx] = true; @@ -416,14 +391,12 @@ impl<'a> HepOptimizer<'a> { match plan.childrens.as_mut() { Childrens::Only(child) => { - let child_applied = - Self::apply_local_rules_inner(child, rules, applied_rules, ctx)?; + let child_applied = Self::apply_local_rules_inner(child, rules, applied_rules)?; applied |= child_applied; } Childrens::Twins { left, right } => { - let left_applied = Self::apply_local_rules_inner(left, rules, applied_rules, ctx)?; - let right_applied = - Self::apply_local_rules_inner(right, rules, applied_rules, ctx)?; + let left_applied = Self::apply_local_rules_inner(left, rules, applied_rules)?; + let right_applied = Self::apply_local_rules_inner(right, rules, applied_rules)?; applied |= left_applied || right_applied; } Childrens::None => {} diff --git a/src/optimizer/rule/normalization/agg_elimination.rs b/src/optimizer/rule/normalization/agg_elimination.rs index d9c297f5..6c08ac05 100644 --- a/src/optimizer/rule/normalization/agg_elimination.rs +++ b/src/optimizer/rule/normalization/agg_elimination.rs @@ -14,7 +14,7 @@ use crate::errors::DatabaseError; use crate::expression::ScalarExpression; -use crate::optimizer::core::rule::{NormalizationContext, NormalizationRule}; +use crate::optimizer::core::rule::NormalizationRule; use crate::optimizer::plan_utils::{only_child_mut, replace_with_only_child}; use crate::planner::operator::limit::LimitOperator; use crate::planner::operator::sort::SortField; @@ -25,11 +25,7 @@ use crate::planner::{Childrens, LogicalPlan}; pub struct EliminateRedundantSort; impl NormalizationRule for EliminateRedundantSort { - fn apply( - &self, - plan: &mut LogicalPlan, - _ctx: &mut NormalizationContext, - ) -> Result { + fn apply(&self, plan: &mut LogicalPlan) -> Result { let (sort_fields, topk_limit) = match &plan.operator { Operator::Sort(sort_op) => (sort_op.sort_fields.clone(), None), Operator::TopK(topk_op) => ( @@ -189,11 +185,7 @@ pub(crate) fn distinct_sort_fields(groupby_exprs: &[ScalarExpression]) -> Vec Result { + fn apply(&self, plan: &mut LogicalPlan) -> Result { let Operator::Aggregate(op) = &plan.operator else { return Ok(false); }; @@ -227,17 +219,14 @@ impl NormalizationRule for UseStreamDistinct { } } -pub(crate) fn apply_annotated_post_rules( - plan: &mut LogicalPlan, - ctx: &mut NormalizationContext, -) -> Result { +pub(crate) fn apply_annotated_post_rules(plan: &mut LogicalPlan) -> Result { let mut changed = false; - if EliminateRedundantSort.apply(plan, ctx)? { + if EliminateRedundantSort.apply(plan)? { plan.reset_output_schema_cache_recursive(); changed = true; } - if UseStreamDistinct.apply(plan, ctx)? { + if UseStreamDistinct.apply(plan)? { changed = true; } @@ -351,7 +340,7 @@ mod tests { use crate::errors::DatabaseError; use crate::expression::range_detacher::Range; use crate::expression::ScalarExpression; - use crate::optimizer::core::rule::{NormalizationContext, NormalizationRule}; + use crate::optimizer::core::rule::NormalizationRule; use crate::planner::operator::aggregate::AggregateOperator; use crate::planner::operator::filter::FilterOperator; use crate::planner::operator::sort::{SortField, SortOperator}; @@ -505,7 +494,7 @@ mod tests { let mut plan = build_plan(vec![sort_field.clone()], vec![sort_field], 0); let rule = EliminateRedundantSort; - assert!(rule.apply(&mut plan, &mut NormalizationContext::new())?); + assert!(rule.apply(&mut plan)?); assert!(matches!(plan.operator, Operator::Filter(_))); Ok(()) } @@ -521,7 +510,7 @@ mod tests { }); let rule = EliminateRedundantSort; - assert!(rule.apply(&mut plan, &mut NormalizationContext::new())?); + assert!(rule.apply(&mut plan)?); match plan.operator { Operator::Limit(limit_op) => { assert_eq!(limit_op.limit, Some(10)); @@ -540,7 +529,7 @@ mod tests { super::mark_sort_preserving_indexes(&mut plan, &[c2]); let rule = EliminateRedundantSort; - assert!(rule.apply(&mut plan, &mut NormalizationContext::new())?); + assert!(rule.apply(&mut plan)?); Ok(()) } @@ -564,7 +553,7 @@ mod tests { }); let rule = EliminateRedundantSort; - assert!(rule.apply(&mut plan, &mut NormalizationContext::new())?); + assert!(rule.apply(&mut plan)?); assert!(matches!(plan.operator, Operator::Limit(_))); Ok(()) } @@ -661,7 +650,7 @@ mod tests { )); let rule = UseStreamDistinct; - assert!(rule.apply(&mut plan, &mut NormalizationContext::new())?); + assert!(rule.apply(&mut plan)?); assert!(matches!( plan.physical_option, Some(PhysicalOption { @@ -680,7 +669,7 @@ mod tests { super::mark_sort_preserving_indexes(&mut plan, &[c2]); let rule = EliminateRedundantSort; - assert!(!rule.apply(&mut plan, &mut NormalizationContext::new())?); + assert!(!rule.apply(&mut plan)?); assert!(matches!(plan.operator, Operator::Sort(_))); Ok(()) } @@ -745,7 +734,7 @@ mod tests { }; super::mark_sort_preserving_indexes(&mut plan, &sort_fields); let rule = EliminateRedundantSort; - assert!(rule.apply(&mut plan, &mut NormalizationContext::new())?); + assert!(rule.apply(&mut plan)?); assert!(matches!(plan.operator, Operator::Filter(_))); let table_plan = plan.childrens.pop_only(); diff --git a/src/optimizer/rule/normalization/column_pruning.rs b/src/optimizer/rule/normalization/column_pruning.rs index d61365c3..186eee86 100644 --- a/src/optimizer/rule/normalization/column_pruning.rs +++ b/src/optimizer/rule/normalization/column_pruning.rs @@ -17,7 +17,7 @@ use crate::errors::DatabaseError; use crate::expression::agg::AggKind; use crate::expression::visitor::Visitor; use crate::expression::{HasCountStar, ScalarExpression}; -use crate::optimizer::core::rule::{NormalizationContext, NormalizationRule}; +use crate::optimizer::core::rule::NormalizationRule; use crate::optimizer::rule::normalization::{remap_expr_positions, remap_exprs_positions}; use crate::planner::operator::join::JoinCondition; use crate::planner::operator::Operator; @@ -658,11 +658,7 @@ impl ColumnPruning { } impl NormalizationRule for ColumnPruning { - fn apply( - &self, - plan: &mut LogicalPlan, - _ctx: &mut NormalizationContext, - ) -> Result { + fn apply(&self, plan: &mut LogicalPlan) -> Result { let arena = Bump::new(); let outcome = Self::_apply(HashSet::<&ColumnSummary>::new(), true, plan, &arena)?; Ok(outcome.changed) diff --git a/src/optimizer/rule/normalization/combine_operators.rs b/src/optimizer/rule/normalization/combine_operators.rs index d4a179ba..a548145e 100644 --- a/src/optimizer/rule/normalization/combine_operators.rs +++ b/src/optimizer/rule/normalization/combine_operators.rs @@ -14,7 +14,7 @@ use crate::errors::DatabaseError; use crate::expression::{AliasType, BinaryOperator, ScalarExpression}; -use crate::optimizer::core::rule::{NormalizationContext, NormalizationRule}; +use crate::optimizer::core::rule::NormalizationRule; use crate::optimizer::plan_utils::{only_child_mut, replace_with_only_child}; use crate::optimizer::rule::normalization::{is_subset_exprs, strip_alias}; use crate::planner::operator::filter::FilterOperator; @@ -92,11 +92,7 @@ fn groupby_exprs_match( pub struct CollapseProject; impl NormalizationRule for CollapseProject { - fn apply( - &self, - plan: &mut LogicalPlan, - _ctx: &mut NormalizationContext, - ) -> Result { + fn apply(&self, plan: &mut LogicalPlan) -> Result { let Operator::Project(parent_op) = &mut plan.operator else { return Ok(false); }; @@ -129,11 +125,7 @@ impl NormalizationRule for CollapseProject { pub struct CombineFilter; impl NormalizationRule for CombineFilter { - fn apply( - &self, - plan: &mut LogicalPlan, - _ctx: &mut NormalizationContext, - ) -> Result { + fn apply(&self, plan: &mut LogicalPlan) -> Result { let parent_filter = match mem::replace(&mut plan.operator, Operator::Dummy) { Operator::Filter(op) => op, operator => { @@ -193,11 +185,7 @@ impl NormalizationRule for CombineFilter { pub struct CollapseGroupByAgg; impl NormalizationRule for CollapseGroupByAgg { - fn apply( - &self, - plan: &mut LogicalPlan, - _ctx: &mut NormalizationContext, - ) -> Result { + fn apply(&self, plan: &mut LogicalPlan) -> Result { let can_collapse = { let LogicalPlan { operator, @@ -234,7 +222,7 @@ mod tests { use crate::catalog::{ColumnCatalog, ColumnRef}; use crate::errors::DatabaseError; use crate::expression::{BinaryOperator, ScalarExpression}; - use crate::optimizer::core::rule::{NormalizationContext, NormalizationRule}; + use crate::optimizer::core::rule::NormalizationRule; use crate::optimizer::heuristic::batch::HepBatchStrategy; use crate::optimizer::heuristic::optimizer::HepOptimizerPipeline; use crate::optimizer::rule::normalization::combine_operators::{ @@ -340,7 +328,7 @@ mod tests { Childrens::Only(Box::new(child)), ); - assert!(CollapseProject.apply(&mut plan, &mut NormalizationContext::new())?); + assert!(CollapseProject.apply(&mut plan)?); let Operator::Project(op) = &plan.operator else { unreachable!("expected project"); @@ -426,7 +414,7 @@ mod tests { ); let mut plan = AggregateOperator::build(child, vec![], vec![column_expr("c2", 0)], true); - assert!(CollapseGroupByAgg.apply(&mut plan, &mut NormalizationContext::new())?); + assert!(CollapseGroupByAgg.apply(&mut plan)?); let Operator::Aggregate(op) = &plan.operator else { unreachable!("expected aggregate"); }; diff --git a/src/optimizer/rule/normalization/compilation_in_advance.rs b/src/optimizer/rule/normalization/compilation_in_advance.rs index eb1d9547..cb6b0aa4 100644 --- a/src/optimizer/rule/normalization/compilation_in_advance.rs +++ b/src/optimizer/rule/normalization/compilation_in_advance.rs @@ -15,7 +15,7 @@ use crate::errors::DatabaseError; use crate::expression::visitor_mut::VisitorMut; use crate::expression::BindEvaluator; -use crate::optimizer::core::rule::{NormalizationContext, NormalizationRule}; +use crate::optimizer::core::rule::NormalizationRule; use crate::planner::operator::join::JoinCondition; use crate::planner::operator::Operator; use crate::planner::{Childrens, LogicalPlan}; @@ -138,11 +138,7 @@ impl EvaluatorBind { } impl NormalizationRule for EvaluatorBind { - fn apply( - &self, - plan: &mut LogicalPlan, - _ctx: &mut NormalizationContext, - ) -> Result { + fn apply(&self, plan: &mut LogicalPlan) -> Result { Self::_apply(plan)?; Ok(true) } diff --git a/src/optimizer/rule/normalization/min_max_top_k.rs b/src/optimizer/rule/normalization/min_max_top_k.rs index 418866ee..048a39cd 100644 --- a/src/optimizer/rule/normalization/min_max_top_k.rs +++ b/src/optimizer/rule/normalization/min_max_top_k.rs @@ -15,7 +15,7 @@ use crate::errors::DatabaseError; use crate::expression::agg::AggKind; use crate::expression::ScalarExpression; -use crate::optimizer::core::rule::{NormalizationContext, NormalizationRule}; +use crate::optimizer::core::rule::NormalizationRule; use crate::optimizer::plan_utils::{only_child, wrap_child_with}; use crate::planner::operator::sort::SortField; use crate::planner::operator::top_k::TopKOperator; @@ -25,11 +25,7 @@ use crate::planner::LogicalPlan; pub struct MinMaxToTopK; impl NormalizationRule for MinMaxToTopK { - fn apply( - &self, - plan: &mut LogicalPlan, - _ctx: &mut NormalizationContext, - ) -> Result { + fn apply(&self, plan: &mut LogicalPlan) -> Result { let Operator::Aggregate(op) = &plan.operator else { return Ok(false); }; @@ -85,7 +81,7 @@ mod tests { use super::MinMaxToTopK; use crate::binder::test::build_t1_table; use crate::errors::DatabaseError; - use crate::optimizer::core::rule::{NormalizationContext, NormalizationRule}; + use crate::optimizer::core::rule::NormalizationRule; use crate::planner::operator::Operator; use crate::planner::Childrens; @@ -117,7 +113,7 @@ mod tests { let mut plan = table_state.plan("select min(c1) from t1")?; let agg_plan = find_aggregate_mut(&mut plan); - assert!(MinMaxToTopK.apply(agg_plan, &mut NormalizationContext::new())?); + assert!(MinMaxToTopK.apply(agg_plan)?); let agg_plan = find_aggregate(&plan); let Operator::Aggregate(op) = &agg_plan.operator else { @@ -154,7 +150,7 @@ mod tests { let mut plan = table_state.plan("select max(c2) from t1")?; let agg_plan = find_aggregate_mut(&mut plan); - assert!(MinMaxToTopK.apply(agg_plan, &mut NormalizationContext::new())?); + assert!(MinMaxToTopK.apply(agg_plan)?); let agg_plan = find_aggregate(&plan); let child = match agg_plan.childrens.as_ref() { diff --git a/src/optimizer/rule/normalization/mod.rs b/src/optimizer/rule/normalization/mod.rs index 60c1063d..4cabfde4 100644 --- a/src/optimizer/rule/normalization/mod.rs +++ b/src/optimizer/rule/normalization/mod.rs @@ -15,7 +15,7 @@ use crate::errors::DatabaseError; use crate::expression::visitor_mut::{walk_mut_expr, VisitorMut}; use crate::expression::{AliasType, ScalarExpression}; -use crate::optimizer::core::rule::{NormalizationContext, NormalizationRule}; +use crate::optimizer::core::rule::NormalizationRule; use crate::optimizer::rule::normalization::column_pruning::ColumnPruning; use crate::optimizer::rule::normalization::combine_operators::{ CollapseGroupByAgg, CollapseProject, CombineFilter, @@ -182,32 +182,26 @@ impl NormalizationRuleImpl { } impl NormalizationRule for NormalizationRuleImpl { - fn apply( - &self, - plan: &mut LogicalPlan, - ctx: &mut NormalizationContext, - ) -> Result { + fn apply(&self, plan: &mut LogicalPlan) -> Result { match self { - NormalizationRuleImpl::ColumnPruning => ColumnPruning.apply(plan, ctx), - NormalizationRuleImpl::CollapseProject => CollapseProject.apply(plan, ctx), - NormalizationRuleImpl::CollapseGroupByAgg => CollapseGroupByAgg.apply(plan, ctx), - NormalizationRuleImpl::CombineFilter => CombineFilter.apply(plan, ctx), - NormalizationRuleImpl::LimitProjectTranspose => LimitProjectTranspose.apply(plan, ctx), - NormalizationRuleImpl::PushLimitThroughJoin => PushLimitThroughJoin.apply(plan, ctx), - NormalizationRuleImpl::PushLimitIntoTableScan => PushLimitIntoScan.apply(plan, ctx), - NormalizationRuleImpl::PushPredicateThroughJoin => { - PushPredicateThroughJoin.apply(plan, ctx) - } + NormalizationRuleImpl::ColumnPruning => ColumnPruning.apply(plan), + NormalizationRuleImpl::CollapseProject => CollapseProject.apply(plan), + NormalizationRuleImpl::CollapseGroupByAgg => CollapseGroupByAgg.apply(plan), + NormalizationRuleImpl::CombineFilter => CombineFilter.apply(plan), + NormalizationRuleImpl::LimitProjectTranspose => LimitProjectTranspose.apply(plan), + NormalizationRuleImpl::PushLimitThroughJoin => PushLimitThroughJoin.apply(plan), + NormalizationRuleImpl::PushLimitIntoTableScan => PushLimitIntoScan.apply(plan), + NormalizationRuleImpl::PushPredicateThroughJoin => PushPredicateThroughJoin.apply(plan), NormalizationRuleImpl::PushJoinPredicateIntoScan => { - PushJoinPredicateIntoScan.apply(plan, ctx) + PushJoinPredicateIntoScan.apply(plan) } - NormalizationRuleImpl::SimplifyFilter => SimplifyFilter.apply(plan, ctx), - NormalizationRuleImpl::PushPredicateIntoScan => PushPredicateIntoScan.apply(plan, ctx), - NormalizationRuleImpl::ConstantCalculation => ConstantCalculation.apply(plan, ctx), - NormalizationRuleImpl::EvaluatorBind => EvaluatorBind.apply(plan, ctx), - NormalizationRuleImpl::MinMaxToTopK => MinMaxToTopK.apply(plan, ctx), - NormalizationRuleImpl::TopK => TopK.apply(plan, ctx), - NormalizationRuleImpl::ParameterizeMarkApply => ParameterizeMarkApply.apply(plan, ctx), + NormalizationRuleImpl::SimplifyFilter => SimplifyFilter.apply(plan), + NormalizationRuleImpl::PushPredicateIntoScan => PushPredicateIntoScan.apply(plan), + NormalizationRuleImpl::ConstantCalculation => ConstantCalculation.apply(plan), + NormalizationRuleImpl::EvaluatorBind => EvaluatorBind.apply(plan), + NormalizationRuleImpl::MinMaxToTopK => MinMaxToTopK.apply(plan), + NormalizationRuleImpl::TopK => TopK.apply(plan), + NormalizationRuleImpl::ParameterizeMarkApply => ParameterizeMarkApply.apply(plan), } } } diff --git a/src/optimizer/rule/normalization/parameterized_index.rs b/src/optimizer/rule/normalization/parameterized_index.rs index 03cef4ea..0aa114bf 100644 --- a/src/optimizer/rule/normalization/parameterized_index.rs +++ b/src/optimizer/rule/normalization/parameterized_index.rs @@ -15,8 +15,8 @@ use crate::catalog::ColumnRef; use crate::errors::DatabaseError; use crate::expression::{BinaryOperator, ScalarExpression}; -use crate::optimizer::core::rule::{NormalizationContext, NormalizationRule}; -use crate::planner::operator::mark_apply::{MarkApplyKind, ParameterizedMarkProbe}; +use crate::optimizer::core::rule::NormalizationRule; +use crate::planner::operator::mark_apply::MarkApplyKind; use crate::planner::operator::table_scan::TableScanOperator; use crate::planner::operator::{Operator, PhysicalOption, PlanImpl}; use crate::planner::{Childrens, LogicalPlan}; @@ -26,11 +26,7 @@ use crate::types::tuple::Schema; pub(crate) struct ParameterizeMarkApply; impl NormalizationRule for ParameterizeMarkApply { - fn apply( - &self, - plan: &mut LogicalPlan, - ctx: &mut NormalizationContext, - ) -> Result { + fn apply(&self, plan: &mut LogicalPlan) -> Result { let (op, new_probe) = match (&mut plan.operator, plan.childrens.as_mut()) { (Operator::MarkApply(op), Childrens::Twins { left, right }) => { let new_probe = find_parameterized_probe( @@ -40,8 +36,7 @@ impl NormalizationRule for ParameterizeMarkApply { right.output_schema().as_ref(), ) .and_then(|(right_column, left_expr)| { - parameterize_right_subtree(right, &right_column, ctx) - .map(|param| ParameterizedMarkProbe::new(param, left_expr)) + parameterize_right_subtree(right, &right_column).then_some(left_expr) }); (op, new_probe) } @@ -114,27 +109,25 @@ fn extract_parameterized_probe_side( Some((right_column, left_expr.clone())) } -fn parameterize_right_subtree( - plan: &mut LogicalPlan, - right_column: &ColumnRef, - ctx: &mut NormalizationContext, -) -> Option { +fn parameterize_right_subtree(plan: &mut LogicalPlan, right_column: &ColumnRef) -> bool { if matches!(plan.operator, Operator::TableScan(_)) { - let (param, index_info) = { + let index_info = { let Operator::TableScan(scan_op) = &mut plan.operator else { unreachable!(); }; - let target_index = pick_parameterized_index_position(scan_op, right_column)?; - let param = ctx.alloc_runtime_param(); - scan_op.index_infos[target_index].lookup = Some(IndexLookup::Probe(param)); - (param, scan_op.index_infos[target_index].clone()) + let Some(target_index) = pick_parameterized_index_position(scan_op, right_column) + else { + return false; + }; + scan_op.index_infos[target_index].lookup = Some(IndexLookup::Probe); + scan_op.index_infos[target_index].clone() }; let sort_option = index_info.sort_option.clone(); plan.physical_option = Some(PhysicalOption::new( PlanImpl::IndexScan(Box::new(index_info)), sort_option, )); - return Some(param); + return true; } let passthrough = matches!( @@ -147,12 +140,12 @@ fn parameterize_right_subtree( ); if !passthrough { - return None; + return false; } match plan.childrens.as_mut() { - Childrens::Only(child) => parameterize_right_subtree(child, right_column, ctx), - _ => None, + Childrens::Only(child) => parameterize_right_subtree(child, right_column), + _ => false, } } diff --git a/src/optimizer/rule/normalization/pushdown_limit.rs b/src/optimizer/rule/normalization/pushdown_limit.rs index 16af362c..bbdb0c8d 100644 --- a/src/optimizer/rule/normalization/pushdown_limit.rs +++ b/src/optimizer/rule/normalization/pushdown_limit.rs @@ -13,7 +13,7 @@ // limitations under the License. use crate::errors::DatabaseError; -use crate::optimizer::core::rule::{NormalizationContext, NormalizationRule}; +use crate::optimizer::core::rule::NormalizationRule; use crate::optimizer::plan_utils::{only_child_mut, replace_with_only_child, wrap_child_with}; use crate::planner::operator::join::JoinType; use crate::planner::operator::Operator; @@ -22,11 +22,7 @@ use crate::planner::LogicalPlan; pub struct LimitProjectTranspose; impl NormalizationRule for LimitProjectTranspose { - fn apply( - &self, - plan: &mut LogicalPlan, - _ctx: &mut NormalizationContext, - ) -> Result { + fn apply(&self, plan: &mut LogicalPlan) -> Result { let operator = std::mem::replace(&mut plan.operator, Operator::Dummy); let limit_op = match operator { @@ -67,11 +63,7 @@ impl NormalizationRule for LimitProjectTranspose { pub struct PushLimitThroughJoin; impl NormalizationRule for PushLimitThroughJoin { - fn apply( - &self, - plan: &mut LogicalPlan, - _ctx: &mut NormalizationContext, - ) -> Result { + fn apply(&self, plan: &mut LogicalPlan) -> Result { let limit_op = match &plan.operator { Operator::Limit(op) => op.clone(), _ => return Ok(false), @@ -101,11 +93,7 @@ impl NormalizationRule for PushLimitThroughJoin { pub struct PushLimitIntoScan; impl NormalizationRule for PushLimitIntoScan { - fn apply( - &self, - plan: &mut LogicalPlan, - _ctx: &mut NormalizationContext, - ) -> Result { + fn apply(&self, plan: &mut LogicalPlan) -> Result { let (offset, limit) = match &plan.operator { Operator::Limit(limit_op) => (limit_op.offset, limit_op.limit), _ => return Ok(false), diff --git a/src/optimizer/rule/normalization/pushdown_predicates.rs b/src/optimizer/rule/normalization/pushdown_predicates.rs index cdde59e3..95cc4bb6 100644 --- a/src/optimizer/rule/normalization/pushdown_predicates.rs +++ b/src/optimizer/rule/normalization/pushdown_predicates.rs @@ -17,7 +17,7 @@ use crate::errors::DatabaseError; use crate::expression::range_detacher::{Range, RangeDetacher}; use crate::expression::visitor_mut::{PositionShift, VisitorMut}; use crate::expression::{BinaryOperator, ScalarExpression}; -use crate::optimizer::core::rule::{NormalizationContext, NormalizationRule}; +use crate::optimizer::core::rule::NormalizationRule; use crate::optimizer::plan_utils::{ left_child, replace_with_only_child, right_child, wrap_child_with, }; @@ -84,11 +84,7 @@ fn plan_output_columns(plan: &LogicalPlan) -> Vec { pub struct PushPredicateThroughJoin; impl NormalizationRule for PushPredicateThroughJoin { - fn apply( - &self, - plan: &mut LogicalPlan, - _ctx: &mut NormalizationContext, - ) -> Result { + fn apply(&self, plan: &mut LogicalPlan) -> Result { let mut applied = false; let parent_replacement = { @@ -201,11 +197,7 @@ impl NormalizationRule for PushPredicateThroughJoin { pub struct PushPredicateIntoScan; impl NormalizationRule for PushPredicateIntoScan { - fn apply( - &self, - plan: &mut LogicalPlan, - _ctx: &mut NormalizationContext, - ) -> Result { + fn apply(&self, plan: &mut LogicalPlan) -> Result { let LogicalPlan { operator, childrens, @@ -361,11 +353,7 @@ impl PushPredicateIntoScan { pub struct PushJoinPredicateIntoScan; impl NormalizationRule for PushJoinPredicateIntoScan { - fn apply( - &self, - plan: &mut LogicalPlan, - _ctx: &mut NormalizationContext, - ) -> Result { + fn apply(&self, plan: &mut LogicalPlan) -> Result { let (join_type, filter_expr) = { let Operator::Join(join_op) = &mut plan.operator else { return Ok(false); diff --git a/src/optimizer/rule/normalization/simplification.rs b/src/optimizer/rule/normalization/simplification.rs index 1a36b9ae..fe8ad75e 100644 --- a/src/optimizer/rule/normalization/simplification.rs +++ b/src/optimizer/rule/normalization/simplification.rs @@ -15,7 +15,7 @@ use crate::errors::DatabaseError; use crate::expression::simplify::{ConstantCalculator, Simplify}; use crate::expression::visitor_mut::VisitorMut; -use crate::optimizer::core::rule::{NormalizationContext, NormalizationRule}; +use crate::optimizer::core::rule::NormalizationRule; use crate::planner::operator::join::JoinCondition; use crate::planner::operator::Operator; use crate::planner::{Childrens, LogicalPlan}; @@ -79,11 +79,7 @@ impl ConstantCalculation { } impl NormalizationRule for ConstantCalculation { - fn apply( - &self, - plan: &mut LogicalPlan, - _ctx: &mut NormalizationContext, - ) -> Result { + fn apply(&self, plan: &mut LogicalPlan) -> Result { Self::_apply(plan)?; Ok(true) } @@ -107,11 +103,7 @@ fn has_aggregate_descendant(plan: &LogicalPlan) -> bool { } impl NormalizationRule for SimplifyFilter { - fn apply( - &self, - plan: &mut LogicalPlan, - _ctx: &mut NormalizationContext, - ) -> Result { + fn apply(&self, plan: &mut LogicalPlan) -> Result { if let Operator::Filter(filter_op) = &mut plan.operator { if filter_op.is_optimized { return Ok(false); diff --git a/src/optimizer/rule/normalization/top_k.rs b/src/optimizer/rule/normalization/top_k.rs index b0d16d77..193ec959 100644 --- a/src/optimizer/rule/normalization/top_k.rs +++ b/src/optimizer/rule/normalization/top_k.rs @@ -13,7 +13,7 @@ // limitations under the License. use crate::errors::DatabaseError; -use crate::optimizer::core::rule::{NormalizationContext, NormalizationRule}; +use crate::optimizer::core::rule::NormalizationRule; use crate::optimizer::plan_utils::{only_child_mut, replace_with_only_child}; use crate::planner::operator::top_k::TopKOperator; use crate::planner::operator::Operator; @@ -22,11 +22,7 @@ use crate::planner::LogicalPlan; pub struct TopK; impl NormalizationRule for TopK { - fn apply( - &self, - plan: &mut LogicalPlan, - _ctx: &mut NormalizationContext, - ) -> Result { + fn apply(&self, plan: &mut LogicalPlan) -> Result { let (offset, limit) = match &plan.operator { Operator::Limit(op) => match op.limit { Some(limit) => (op.offset, limit), diff --git a/src/planner/operator/mark_apply.rs b/src/planner/operator/mark_apply.rs index 661a57b0..ed379cbf 100644 --- a/src/planner/operator/mark_apply.rs +++ b/src/planner/operator/mark_apply.rs @@ -16,7 +16,6 @@ use super::Operator; use crate::catalog::ColumnRef; use crate::expression::ScalarExpression; use crate::planner::{Childrens, LogicalPlan}; -use crate::types::index::RuntimeParam; use kite_sql_serde_macros::ReferenceSerialization; use std::fmt; use std::fmt::Formatter; @@ -27,32 +26,12 @@ pub enum MarkApplyKind { In, } -#[derive(Debug, PartialEq, Eq, Clone, Hash, ReferenceSerialization)] -pub struct ParameterizedMarkProbe { - param: RuntimeParam, - left_expr: ScalarExpression, -} - -impl ParameterizedMarkProbe { - pub fn new(param: RuntimeParam, left_expr: ScalarExpression) -> Self { - Self { param, left_expr } - } - - pub fn param(&self) -> RuntimeParam { - self.param - } - - pub fn left_expr(&self) -> &ScalarExpression { - &self.left_expr - } -} - #[derive(Debug, PartialEq, Eq, Clone, Hash, ReferenceSerialization)] pub struct MarkApplyOperator { pub kind: MarkApplyKind, predicates: Vec, output_column: ColumnRef, - parameterized_probe: Option, + parameterized_probe: Option, } impl MarkApplyOperator { @@ -116,11 +95,11 @@ impl MarkApplyOperator { &self.output_column } - pub fn parameterized_probe(&self) -> Option<&ParameterizedMarkProbe> { + pub fn parameterized_probe(&self) -> Option<&ScalarExpression> { self.parameterized_probe.as_ref() } - pub fn set_parameterized_probe(&mut self, probe: Option) { + pub fn set_parameterized_probe(&mut self, probe: Option) { self.parameterized_probe = probe; } } diff --git a/src/storage/mod.rs b/src/storage/mod.rs index b3c3736e..c54a8896 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -142,13 +142,13 @@ pub trait Transaction: Sized { } let deserializers = Self::create_deserializers(&columns, table); let pk_ty = with_pk.then(|| table.primary_keys_type().clone()); + let offset = bounds.0.unwrap_or(0); unsafe { &*self.table_codec() }.with_tuple_bound(&table_name, |min, max| { let iter = self.range(Bound::Included(min), Bound::Included(max))?; Ok(TupleIter { - offset: bounds.0.unwrap_or(0), - limit: bounds.1, + bounds: IterBounds::new(offset, bounds.1), pk_ty, deserializers, total_len: table.columns_len(), @@ -158,20 +158,20 @@ pub trait Transaction: Sized { } #[allow(clippy::too_many_arguments)] - fn read_by_index<'a, I>( + fn read_by_index<'a, R>( &'a self, table_cache: &'a TableCache, table_name: TableName, (offset_option, limit_option): Bounds, mut columns: BTreeMap, index_meta: IndexMetaRef, - ranges: I, + ranges: R, with_pk: bool, covered_deserializers: Option>, cover_mapping_indices: Option>, - ) -> Result, DatabaseError> + ) -> Result, DatabaseError> where - I: IntoIterator, + R: Into, { debug_assert!(columns.keys().all_unique()); let table = self @@ -211,8 +211,7 @@ pub trait Transaction: Sized { }; Ok(IndexIter { - offset, - limit: limit_option, + bounds: IterBounds::new(offset, limit_option), params: IndexImplParams { index_meta, table_name, @@ -223,7 +222,7 @@ pub trait Transaction: Sized { with_pk, }, inner, - ranges: ranges.into_iter(), + ranges: ranges.into(), state: IndexIterState::Init, encode_min_buffer: Bytes::new(), encode_max_buffer: Bytes::new(), @@ -1118,20 +1117,18 @@ fn encode_bound_key(buffer: &mut Bytes, key: &[u8], is_upper: bool) { #[inline] fn encode_bound<'a>( - bound: Bound, + bound: &Bound, is_upper: bool, buffer: &'a mut Bytes, params: &IndexImplParams<'_, impl Transaction>, inner: &IndexImplEnum, ) -> Result, DatabaseError> { match bound { - Bound::Included(mut val) => { - val = params.try_cast(val)?; + Bound::Included(val) => { inner.bound_key(params, &val, is_upper, buffer)?; Ok(Bound::Included(buffer.as_slice())) } - Bound::Excluded(mut val) => { - val = params.try_cast(val)?; + Bound::Excluded(val) => { inner.bound_key(params, &val, is_upper, buffer)?; Ok(Bound::Excluded(buffer.as_slice())) } @@ -1235,15 +1232,6 @@ impl IndexImplParams<'_, T> { self.tx.table_codec() } - pub(crate) fn try_cast(&self, mut val: DataValue) -> Result { - let value_ty = self.value_ty(); - - if &val.logical_type() != value_ty { - val = val.cast(value_ty)?; - } - Ok(val) - } - fn get_tuple_by_id_into( &self, tuple_id: &TupleId, @@ -1618,8 +1606,7 @@ fn eq_to_res_scope<'a, T: Transaction + 'a>( } pub struct TupleIter<'a, T: Transaction + 'a> { - offset: usize, - limit: Option, + bounds: IterBounds, pk_ty: Option, deserializers: Vec, total_len: usize, @@ -1628,21 +1615,18 @@ pub struct TupleIter<'a, T: Transaction + 'a> { impl<'a, T: Transaction + 'a> Iter for TupleIter<'a, T> { fn next_tuple_into(&mut self, tuple: &mut Tuple) -> Result { - while self.offset > 0 { + while self.bounds.consume_offset() { if self.iter.try_next()?.is_none() { return Ok(false); } - self.offset -= 1; } #[allow(clippy::never_loop)] while let Some((key, value)) = self.iter.try_next()? { - if let Some(limit) = self.limit.as_mut() { - if *limit == 0 { - return Ok(false); - } - *limit -= 1; + if self.bounds.limit_reached() { + return Ok(false); } + self.bounds.consume_limit(); let tuple_id = if let Some(pk_ty) = &self.pk_ty { Some(TableCodec::decode_tuple_key(key, pk_ty)?) } else { @@ -1663,46 +1647,103 @@ impl<'a, T: Transaction + 'a> Iter for TupleIter<'a, T> { } } -pub struct IndexIter<'a, T: Transaction, I: IntoIterator> { - offset: usize, - limit: Option, - params: IndexImplParams<'a, T>, - inner: IndexImplEnum, - // for buffering data - ranges: I::IntoIter, - state: IndexIterState<'a, T>, - encode_min_buffer: Bytes, - encode_max_buffer: Bytes, +enum IndexRangesInner { + One(Range), + Many(Vec), } -pub enum IndexIterState<'a, T: Transaction + 'a> { - Init, - Range(T::IterType<'a>), - Over, +pub struct IndexRanges { + inner: IndexRangesInner, + next_idx: usize, +} + +impl IndexRanges { + fn next(&mut self) -> Option<&Range> { + let range = match &self.inner { + IndexRangesInner::One(range) => (self.next_idx == 0).then_some(range), + IndexRangesInner::Many(ranges) => ranges.get(self.next_idx), + }; + + if range.is_some() { + self.next_idx += 1; + } + + range + } + + pub(crate) fn reset(&mut self) { + self.next_idx = 0; + } +} + +impl From> for IndexRanges { + fn from(value: Vec) -> Self { + Self { + inner: IndexRangesInner::Many(value), + next_idx: 0, + } + } +} + +impl From for IndexRanges { + fn from(value: Range) -> Self { + Self { + inner: IndexRangesInner::One(value), + next_idx: 0, + } + } +} + +#[derive(Debug, Clone, Copy)] +struct IterBounds { + offset: usize, + limit: Option, } -impl<'a, T: Transaction + 'a, I: IntoIterator> IndexIter<'a, T, I> { - fn offset_move(offset: &mut usize) -> bool { - if *offset > 0 { - offset.sub_assign(1); +impl IterBounds { + fn new(offset: usize, limit: Option) -> Self { + Self { offset, limit } + } + fn consume_offset(&mut self) -> bool { + if self.offset > 0 { + self.offset.sub_assign(1); true } else { false } } - fn limit_sub(limit: &mut Option) { - if let Some(num) = limit.as_mut() { + fn limit_reached(&self) -> bool { + matches!(self.limit, Some(0)) + } + + fn consume_limit(&mut self) { + if let Some(num) = self.limit.as_mut() { num.sub_assign(1); } } } -/// expression -> index value -> tuple -impl> Iter for IndexIter<'_, T, I> { +pub struct IndexIter<'a, T: Transaction> { + bounds: IterBounds, + params: IndexImplParams<'a, T>, + inner: IndexImplEnum, + ranges: IndexRanges, + state: IndexIterState<'a, T>, + encode_min_buffer: Bytes, + encode_max_buffer: Bytes, +} + +pub enum IndexIterState<'a, T: Transaction + 'a> { + Init, + Range(T::IterType<'a>), + Over, +} + +impl Iter for IndexIter<'_, T> { fn next_tuple_into(&mut self, tuple: &mut Tuple) -> Result { - if matches!(self.limit, Some(0)) { + if self.bounds.limit_reached() { self.state = IndexIterState::Over; return Ok(false); @@ -1753,21 +1794,19 @@ impl> Iter for IndexIter<'_, T, I> }; self.state = IndexIterState::Range(iter); } - Range::Eq(mut val) => { - val = self.params.try_cast(val)?; - + Range::Eq(val) => { match self.inner.eq_to_res( tuple, - &val, + val, &self.params, &mut self.encode_min_buffer, &mut self.encode_max_buffer, )? { IndexResult::Hit => { - if Self::offset_move(&mut self.offset) { + if self.bounds.consume_offset() { continue; } - Self::limit_sub(&mut self.limit); + self.bounds.consume_limit(); return Ok(true); } IndexResult::Miss => return Ok(false), @@ -1781,10 +1820,10 @@ impl> Iter for IndexIter<'_, T, I> } IndexIterState::Range(iter) => { while let Some((key, value)) = iter.try_next()? { - if Self::offset_move(&mut self.offset) { + if self.bounds.consume_offset() { continue; } - Self::limit_sub(&mut self.limit); + self.bounds.consume_limit(); self.inner .index_lookup_into(tuple, key, value, &self.params)?; @@ -1870,6 +1909,32 @@ mod test { ); columns } + + #[test] + fn test_index_ranges_next_and_reset() { + let mut ranges = super::IndexRanges::from(vec![ + Range::Eq(DataValue::Int32(1)), + Range::Eq(DataValue::Int32(2)), + ]); + + assert!(matches!( + ranges.next(), + Some(range) if *range == Range::Eq(DataValue::Int32(1)) + )); + assert!(matches!( + ranges.next(), + Some(range) if *range == Range::Eq(DataValue::Int32(2)) + )); + assert!(ranges.next().is_none()); + + ranges.reset(); + + assert!(matches!( + ranges.next(), + Some(range) if *range == Range::Eq(DataValue::Int32(1)) + )); + } + fn build_tuples() -> Vec { vec![ Tuple::new( @@ -2217,7 +2282,7 @@ mod test { transaction: &'a RocksTransaction<'a>, table_cache: &'a Arc, index_column_id: ColumnId, - ) -> Result, Vec>, DatabaseError> { + ) -> Result>, DatabaseError> { transaction.read_by_index( table_cache, "t1".to_string().into(), diff --git a/src/storage/rocksdb.rs b/src/storage/rocksdb.rs index a5809367..b8377c62 100644 --- a/src/storage/rocksdb.rs +++ b/src/storage/rocksdb.rs @@ -715,8 +715,8 @@ mod test { use crate::expression::range_detacher::Range; use crate::storage::rocksdb::RocksStorage; use crate::storage::{ - IndexImplEnum, IndexImplParams, IndexIter, IndexIterState, PrimaryKeyIndexImpl, Storage, - Transaction, + IndexImplEnum, IndexImplParams, IndexIter, IndexIterState, IterBounds, PrimaryKeyIndexImpl, + Storage, Transaction, }; use crate::types::index::{IndexMeta, IndexType}; use crate::types::tuple::Tuple; @@ -868,9 +868,8 @@ mod test { .columns() .map(|column| column.datatype().serializable()) .collect_vec(); - let mut iter: IndexIter<'_, _, Vec> = IndexIter { - offset: 0, - limit: None, + let mut iter: IndexIter<'_, _> = IndexIter { + bounds: IterBounds::new(0, None), params: IndexImplParams { index_meta: Arc::new(IndexMeta { id: 0, @@ -895,7 +894,7 @@ mod test { max: Bound::Included(DataValue::Int32(4)), }, ] - .into_iter(), + .into(), state: IndexIterState::Init, inner: IndexImplEnum::PrimaryKey(PrimaryKeyIndexImpl), encode_min_buffer: Vec::new(), diff --git a/src/types/index.rs b/src/types/index.rs index bd6eca53..1da63242 100644 --- a/src/types/index.rs +++ b/src/types/index.rs @@ -28,7 +28,6 @@ use std::sync::Arc; pub type IndexId = u32; pub type IndexMetaRef = Arc; -pub type RuntimeParam = usize; pub const INDEX_ID_LEN: usize = 4; @@ -52,7 +51,7 @@ pub enum RuntimeIndexProbe { #[derive(Debug, Clone, Eq, PartialEq, Hash, ReferenceSerialization)] pub enum IndexLookup { Static(Range), - Probe(RuntimeParam), + Probe, } #[derive(Debug, Clone, Eq, PartialEq, Hash, ReferenceSerialization)] @@ -120,7 +119,7 @@ impl fmt::Display for IndexInfo { if let Some(lookup) = &self.lookup { match lookup { IndexLookup::Static(range) => write!(f, "{range}")?, - IndexLookup::Probe(param) => write!(f, "Probe ${param}")?, + IndexLookup::Probe => write!(f, "Probe ?")?, } } else { write!(f, "EMPTY")?; From a34775a62e84915a9800375b358e960e2a4157ad Mon Sep 17 00:00:00 2001 From: kould Date: Sun, 5 Apr 2026 01:19:05 +0800 Subject: [PATCH 08/10] test: update SLT explain output for runtime probes --- tests/slt/subquery.slt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/slt/subquery.slt b/tests/slt/subquery.slt index f73e8953..2974cdb4 100644 --- a/tests/slt/subquery.slt +++ b/tests/slt/subquery.slt @@ -280,7 +280,7 @@ where exists ( select 1 from orders where orders.user_id = users.id ); ---- -Projection [users.id] [Project => (Sort Option: Follow)] Filter _temp_table_0_.true, Is Having: false [Filter => (Sort Option: Follow)] MarkExistsApply TableScan users -> [id] [SeqScan => (Sort Option: None)] TableScan orders -> [user_id] [IndexScan By orders_user_id_index => Probe $0 => (Sort Option: OrderBy: (orders.user_id Asc Nulls Last) ignore_prefix_len: 0)] +Projection [users.id] [Project => (Sort Option: Follow)] Filter _temp_table_0_.true, Is Having: false [Filter => (Sort Option: Follow)] MarkExistsApply TableScan users -> [id] [SeqScan => (Sort Option: None)] TableScan orders -> [user_id] [IndexScan By orders_user_id_index => Probe ? => (Sort Option: OrderBy: (orders.user_id Asc Nulls Last) ignore_prefix_len: 0)] query I rowsort select id from users From 7a7538e201f576d20517b905f2af9fd178bf9352 Mon Sep 17 00:00:00 2001 From: kould Date: Sun, 5 Apr 2026 01:24:31 +0800 Subject: [PATCH 09/10] refactor: remove unused index range reset helper --- src/storage/mod.rs | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/src/storage/mod.rs b/src/storage/mod.rs index c54a8896..95c2cbd2 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -1670,10 +1670,6 @@ impl IndexRanges { range } - - pub(crate) fn reset(&mut self) { - self.next_idx = 0; - } } impl From> for IndexRanges { @@ -1910,31 +1906,6 @@ mod test { columns } - #[test] - fn test_index_ranges_next_and_reset() { - let mut ranges = super::IndexRanges::from(vec![ - Range::Eq(DataValue::Int32(1)), - Range::Eq(DataValue::Int32(2)), - ]); - - assert!(matches!( - ranges.next(), - Some(range) if *range == Range::Eq(DataValue::Int32(1)) - )); - assert!(matches!( - ranges.next(), - Some(range) if *range == Range::Eq(DataValue::Int32(2)) - )); - assert!(ranges.next().is_none()); - - ranges.reset(); - - assert!(matches!( - ranges.next(), - Some(range) if *range == Range::Eq(DataValue::Int32(1)) - )); - } - fn build_tuples() -> Vec { vec![ Tuple::new( From 6b289e4ef0b1315a950f49963813e74012b5a688 Mon Sep 17 00:00:00 2001 From: kould Date: Sun, 5 Apr 2026 02:12:03 +0800 Subject: [PATCH 10/10] chore: codefmt --- src/binder/select.rs | 222 ++++++++---------- src/db.rs | 7 +- src/execution/ddl/create_index.rs | 6 +- src/execution/dml/analyze.rs | 6 +- src/execution/dml/copy_to_file.rs | 6 +- src/execution/dml/delete.rs | 6 +- src/execution/dml/insert.rs | 6 +- src/execution/dml/update.rs | 8 +- src/execution/dql/aggregate/hash_agg.rs | 10 +- src/execution/dql/aggregate/simple_agg.rs | 8 +- .../dql/aggregate/stream_distinct.rs | 9 +- src/execution/dql/except.rs | 6 +- src/execution/dql/filter.rs | 17 +- src/execution/dql/join/hash/full_join.rs | 11 +- src/execution/dql/join/hash/inner_join.rs | 9 +- src/execution/dql/join/hash/left_join.rs | 11 +- src/execution/dql/join/hash/mod.rs | 41 ++-- src/execution/dql/join/hash/right_join.rs | 9 +- src/execution/dql/join/hash_join.rs | 47 +--- src/execution/dql/join/nested_loop_join.rs | 43 +--- src/execution/dql/mark_apply.rs | 57 +---- src/execution/dql/projection.rs | 14 +- src/execution/dql/sort.rs | 39 +-- src/execution/dql/top_k.rs | 65 +---- src/execution/dql/union.rs | 6 +- src/execution/mod.rs | 7 - src/expression/evaluator.rs | 28 +-- src/expression/function/scala.rs | 3 +- src/function/char_length.rs | 3 +- src/function/current_date.rs | 3 +- src/function/current_timestamp.rs | 3 +- src/function/lower.rs | 3 +- src/function/octet_length.rs | 3 +- src/function/upper.rs | 3 +- src/macros/mod.rs | 2 +- src/optimizer/heuristic/optimizer.rs | 19 +- src/planner/mod.rs | 4 + 37 files changed, 226 insertions(+), 524 deletions(-) diff --git a/src/binder/select.rs b/src/binder/select.rs index 56c150ac..e378861e 100644 --- a/src/binder/select.rs +++ b/src/binder/select.rs @@ -268,34 +268,6 @@ impl<'a: 'b, 'b, T: Transaction, A: AsRef<[(&'static str, DataValue)]>> Binder<' Ok(()) } - fn globalize_mark_predicate( - predicate: &mut ScalarExpression, - output_column: &ColumnRef, - left_len: usize, - ) -> Result<(), DatabaseError> { - MarkerPositionGlobalizer { - output_column, - left_len, - } - .visit(predicate) - } - - fn globalize_right_side_exprs<'expr>( - exprs: impl Iterator, - left_len: usize, - right_schema: &Schema, - ) -> Result<(), DatabaseError> { - for expr in exprs { - RightSidePositionGlobalizer { - right_schema, - left_len, - } - .visit(expr)?; - } - - Ok(()) - } - fn localize_appended_right_outputs<'expr>( exprs: impl Iterator, appended_outputs: &[AppendedRightOutput], @@ -1314,71 +1286,98 @@ impl<'a: 'b, 'b, T: Transaction, A: AsRef<[(&'static str, DataValue)]>> Binder<' let mut predicate = self.bind_expr(predicate)?; if let Some(sub_queries) = self.context.sub_queries_at_now() { - if sub_queries.iter().all(|sub_query| { - matches!( - sub_query, - SubQueryType::ExistsSubQuery { .. } | SubQueryType::InSubQuery { .. } - ) - }) { - let passthrough_exprs = children - .output_schema() - .iter() - .cloned() - .enumerate() - .map(|(position, column)| ScalarExpression::column_expr(column, position)) - .collect(); - for sub_query in sub_queries { - match sub_query { - SubQueryType::ExistsSubQuery { + let mut uses_mark_apply = None; + for sub_query in sub_queries { + match sub_query { + SubQueryType::ExistsSubQuery { + plan, + correlated, + output_column, + } => { + if matches!(uses_mark_apply, Some(false)) { + return Err(DatabaseError::UnsupportedStmt( + "mixed EXISTS/IN with other WHERE subqueries is not supported yet" + .to_string(), + )); + } + uses_mark_apply = Some(true); + let (plan, predicates) = Self::prepare_mark_apply( + &mut predicate, + &output_column, + children.output_schema(), plan, correlated, + false, + Vec::new(), + )?; + children = MarkApplyOperator::build_exists( + children, + plan, output_column, - } => { - let (plan, predicates) = Self::prepare_mark_apply( - &mut predicate, - &output_column, - children.output_schema(), - plan, - correlated, - false, - Vec::new(), - )?; - children = MarkApplyOperator::build_exists( - children, - plan, - output_column, - predicates, - ); + predicates, + ); + } + SubQueryType::InSubQuery { + plan, + correlated, + output_column, + predicate: mut in_predicate, + .. + } => { + if matches!(uses_mark_apply, Some(false)) { + return Err(DatabaseError::UnsupportedStmt( + "mixed EXISTS/IN with other WHERE subqueries is not supported yet" + .to_string(), + )); + } + uses_mark_apply = Some(true); + if correlated { + in_predicate = Self::rewrite_correlated_in_predicate(in_predicate); } - SubQueryType::InSubQuery { + let (plan, predicates) = Self::prepare_mark_apply( + &mut predicate, + &output_column, + children.output_schema(), plan, correlated, - output_column, - predicate: mut in_predicate, - .. - } => { - if correlated { - in_predicate = Self::rewrite_correlated_in_predicate(in_predicate); - } - let (plan, predicates) = Self::prepare_mark_apply( - &mut predicate, - &output_column, - children.output_schema(), - plan, - correlated, - true, - vec![in_predicate], - )?; - children = MarkApplyOperator::build_in( - children, - plan, - output_column, - predicates, - ); + true, + vec![in_predicate], + )?; + children = + MarkApplyOperator::build_in(children, plan, output_column, predicates); + } + SubQueryType::SubQuery { plan, correlated } => { + if matches!(uses_mark_apply, Some(true)) { + return Err(DatabaseError::UnsupportedStmt( + "mixed EXISTS/IN with other WHERE subqueries is not supported yet" + .to_string(), + )); + } + uses_mark_apply = Some(false); + if correlated { + return Err(DatabaseError::UnsupportedStmt( + "correlated scalar subqueries in WHERE are not supported" + .to_string(), + )); } - SubQueryType::SubQuery { .. } => unreachable!(), + children = Self::build_join_from_split_scope_predicates( + children, + plan, + JoinType::Inner, + std::iter::once(predicate.clone()), + true, + )?; } } + } + if matches!(uses_mark_apply, Some(true)) { + let passthrough_exprs = children + .output_schema() + .iter() + .cloned() + .enumerate() + .map(|(position, column)| ScalarExpression::column_expr(column, position)) + .collect(); let filter = FilterOperator::build(predicate, children, false); return Ok(LogicalPlan::new( Operator::Project(ProjectOperator { @@ -1387,40 +1386,6 @@ impl<'a: 'b, 'b, T: Transaction, A: AsRef<[(&'static str, DataValue)]>> Binder<' Childrens::Only(Box::new(filter)), )); } - if sub_queries.iter().any(|sub_query| { - matches!( - sub_query, - SubQueryType::ExistsSubQuery { .. } | SubQueryType::InSubQuery { .. } - ) - }) { - return Err(DatabaseError::UnsupportedStmt( - "mixed EXISTS/IN with other WHERE subqueries is not supported yet".to_string(), - )); - } - for sub_query in sub_queries { - let (plan, join_ty) = match sub_query { - SubQueryType::SubQuery { plan, correlated } => { - if correlated { - return Err(DatabaseError::UnsupportedStmt( - "correlated scalar subqueries in WHERE are not supported" - .to_string(), - )); - } - (plan, JoinType::Inner) - } - SubQueryType::ExistsSubQuery { .. } | SubQueryType::InSubQuery { .. } => { - unreachable!() - } - }; - - children = Self::build_join_from_split_scope_predicates( - children, - plan, - join_ty, - std::iter::once(predicate.clone()), - true, - )?; - } return Ok(children); } Ok(FilterOperator::build(predicate, children, false)) @@ -1483,16 +1448,19 @@ impl<'a: 'b, 'b, T: Transaction, A: AsRef<[(&'static str, DataValue)]>> Binder<' mut apply_predicates: Vec, ) -> Result<(LogicalPlan, Vec), DatabaseError> { let left_len = left_schema.len(); - Self::globalize_mark_predicate(predicate, output_column, left_len)?; + MarkerPositionGlobalizer { + output_column, + left_len, + } + .visit(predicate)?; - let (plan, correlated_filters) = if correlated { + let (mut plan, correlated_filters) = if correlated { Self::prepare_correlated_subquery_plan(plan, left_schema, preserve_projection)? } else { (plan, Vec::new()) }; apply_predicates.extend(correlated_filters); - let mut plan = plan; if correlated { let appended_right_outputs = Self::ensure_mark_apply_right_outputs(&mut plan, &apply_predicates); @@ -1504,11 +1472,13 @@ impl<'a: 'b, 'b, T: Transaction, A: AsRef<[(&'static str, DataValue)]>> Binder<' } } let right_schema = plan.output_schema().clone(); - Self::globalize_right_side_exprs( - apply_predicates.iter_mut(), - left_len, - right_schema.as_ref(), - )?; + for expr in apply_predicates.iter_mut() { + RightSidePositionGlobalizer { + right_schema: right_schema.as_ref(), + left_len, + } + .visit(expr)?; + } Ok((plan, apply_predicates)) } diff --git a/src/db.rs b/src/db.rs index 778f7d78..66b02649 100644 --- a/src/db.rs +++ b/src/db.rs @@ -489,9 +489,10 @@ impl State { /// Limit(1) /// Project(a,b) let source_plan = binder.bind(stmt)?; - let mut optimizer = self.optimizer_pipeline.instantiate(source_plan); - optimizer.optimize(Some(&transaction.meta_loader(self.meta_cache())))?; - let mut best_plan = optimizer.into_plan(); + let mut best_plan = self + .optimizer_pipeline + .instantiate(source_plan) + .find_best(Some(&transaction.meta_loader(self.meta_cache())))?; if let Operator::Analyze(op) = &mut best_plan.operator { if op.histogram_buckets.is_none() { diff --git a/src/execution/ddl/create_index.rs b/src/execution/ddl/create_index.rs index acfed681..004de6f0 100644 --- a/src/execution/ddl/create_index.rs +++ b/src/execution/ddl/create_index.rs @@ -14,9 +14,7 @@ use crate::errors::DatabaseError; use crate::execution::dql::projection::Projection; -use crate::execution::{ - build_read, take_plan, ExecArena, ExecId, ExecNode, ExecutionCaches, WriteExecutor, -}; +use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, WriteExecutor}; use crate::expression::ScalarExpression; use crate::planner::operator::create_index::CreateIndexOperator; use crate::planner::LogicalPlan; @@ -52,7 +50,7 @@ impl<'a, T: Transaction + 'a> WriteExecutor<'a, T> for CreateIndex { cache: ExecutionCaches<'a>, transaction: *mut T, ) -> ExecId { - self.input = build_read(arena, take_plan(&mut self.input_plan), cache, transaction); + self.input = build_read(arena, self.input_plan.take(), cache, transaction); arena.push(ExecNode::CreateIndex(self)) } } diff --git a/src/execution/dml/analyze.rs b/src/execution/dml/analyze.rs index b96b78eb..280dc8ba 100644 --- a/src/execution/dml/analyze.rs +++ b/src/execution/dml/analyze.rs @@ -15,9 +15,7 @@ use crate::catalog::TableName; use crate::errors::DatabaseError; use crate::execution::dql::projection::Projection; -use crate::execution::{ - build_read, take_plan, ExecArena, ExecId, ExecNode, ExecutionCaches, WriteExecutor, -}; +use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, WriteExecutor}; use crate::expression::ScalarExpression; use crate::optimizer::core::histogram::HistogramBuilder; use crate::optimizer::core::statistics_meta::StatisticsMeta; @@ -73,7 +71,7 @@ impl<'a, T: Transaction + 'a> WriteExecutor<'a, T> for Analyze { ) -> ExecId { self.input = Some(build_read( arena, - take_plan(&mut self.input_plan), + self.input_plan.take(), cache, transaction, )); diff --git a/src/execution/dml/copy_to_file.rs b/src/execution/dml/copy_to_file.rs index a7c6cc48..c1d587ab 100644 --- a/src/execution/dml/copy_to_file.rs +++ b/src/execution/dml/copy_to_file.rs @@ -14,9 +14,7 @@ use crate::binder::copy::FileFormat; use crate::errors::DatabaseError; -use crate::execution::{ - build_read, take_plan, ExecArena, ExecId, ExecNode, ExecutionCaches, ReadExecutor, -}; +use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ReadExecutor}; use crate::planner::operator::copy_to_file::CopyToFileOperator; use crate::planner::LogicalPlan; use crate::storage::Transaction; @@ -47,7 +45,7 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for CopyToFile { ) -> ExecId { self.input = Some(build_read( arena, - take_plan(&mut self.input_plan), + self.input_plan.take(), cache, transaction, )); diff --git a/src/execution/dml/delete.rs b/src/execution/dml/delete.rs index 56b4aeb5..c0f31059 100644 --- a/src/execution/dml/delete.rs +++ b/src/execution/dml/delete.rs @@ -15,9 +15,7 @@ use crate::catalog::TableName; use crate::errors::DatabaseError; use crate::execution::dql::projection::Projection; -use crate::execution::{ - build_read, take_plan, ExecArena, ExecId, ExecNode, ExecutionCaches, WriteExecutor, -}; +use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, WriteExecutor}; use crate::expression::ScalarExpression; use crate::planner::operator::delete::DeleteOperator; use crate::planner::LogicalPlan; @@ -55,7 +53,7 @@ impl<'a, T: Transaction + 'a> WriteExecutor<'a, T> for Delete { ) -> ExecId { self.input = Some(build_read( arena, - take_plan(&mut self.input_plan), + self.input_plan.take(), cache, transaction, )); diff --git a/src/execution/dml/insert.rs b/src/execution/dml/insert.rs index a39b8bee..ff4a9036 100644 --- a/src/execution/dml/insert.rs +++ b/src/execution/dml/insert.rs @@ -15,9 +15,7 @@ use crate::catalog::{ColumnCatalog, TableName}; use crate::errors::DatabaseError; use crate::execution::dql::projection::Projection; -use crate::execution::{ - build_read, take_plan, ExecArena, ExecId, ExecNode, ExecutionCaches, WriteExecutor, -}; +use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, WriteExecutor}; use crate::planner::operator::insert::InsertOperator; use crate::planner::LogicalPlan; use crate::storage::Transaction; @@ -86,7 +84,7 @@ impl<'a, T: Transaction + 'a> WriteExecutor<'a, T> for Insert { ) -> ExecId { self.input = Some(build_read( arena, - take_plan(&mut self.input_plan), + self.input_plan.take(), cache, transaction, )); diff --git a/src/execution/dml/update.rs b/src/execution/dml/update.rs index c31fa17b..d3d4b4b4 100644 --- a/src/execution/dml/update.rs +++ b/src/execution/dml/update.rs @@ -15,9 +15,7 @@ use crate::catalog::{ColumnRef, TableName}; use crate::errors::DatabaseError; use crate::execution::dql::projection::Projection; -use crate::execution::{ - build_read, take_plan, ExecArena, ExecId, ExecNode, ExecutionCaches, WriteExecutor, -}; +use crate::execution::{build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, WriteExecutor}; use crate::expression::ScalarExpression; use crate::planner::operator::update::UpdateOperator; use crate::planner::LogicalPlan; @@ -67,7 +65,7 @@ impl<'a, T: Transaction + 'a> WriteExecutor<'a, T> for Update { ) -> ExecId { self.input = Some(build_read( arena, - take_plan(&mut self.input_plan), + self.input_plan.take(), cache, transaction, )); @@ -125,7 +123,7 @@ impl Update { } for (i, column) in self.input_schema.iter().enumerate() { if let Some(expr) = exprs_map.get(&column.id()) { - let value = expr.eval(Some((&tuple, &self.input_schema)))?; + let value = expr.eval(Some(&tuple))?; tuple.values[i] = value; } } diff --git a/src/execution/dql/aggregate/hash_agg.rs b/src/execution/dql/aggregate/hash_agg.rs index f59ea27b..9df8c310 100644 --- a/src/execution/dql/aggregate/hash_agg.rs +++ b/src/execution/dql/aggregate/hash_agg.rs @@ -19,7 +19,6 @@ use crate::expression::ScalarExpression; use crate::planner::operator::aggregate::AggregateOperator; use crate::planner::LogicalPlan; use crate::storage::Transaction; -use crate::types::tuple::SchemaRef; use crate::types::value::DataValue; use ahash::{HashMap, HashMapExt}; use itertools::Itertools; @@ -30,7 +29,6 @@ type HashAggOutput = HashMapIntoIter, Vec>>; pub struct HashAggExecutor { agg_calls: Vec, groupby_exprs: Vec, - input_schema: SchemaRef, input: ExecId, output: Option, } @@ -45,18 +43,16 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for HashAggExecutor { groupby_exprs, .. }, - mut input, + input, ): Self::Input, arena: &mut ExecArena<'a, T>, cache: ExecutionCaches<'a>, transaction: *mut T, ) -> ExecId { - let input_schema = input.output_schema().clone(); let input = build_read(arena, input, cache, transaction); arena.push(ExecNode::HashAgg(HashAggExecutor { agg_calls, groupby_exprs, - input_schema, input, output: None, })) @@ -72,7 +68,7 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for HashAggExecutor { let group_keys = self .groupby_exprs .iter() - .map(|expr| expr.eval(Some((tuple, &self.input_schema)))) + .map(|expr| expr.eval(Some(tuple))) .try_collect()?; let entry = match group_hash_accs.entry(group_keys) { @@ -90,7 +86,7 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for HashAggExecutor { .to_string(), )); } - let value = args[0].eval(Some((tuple, &self.input_schema)))?; + let value = args[0].eval(Some(tuple))?; acc.update_value(&value)?; } } diff --git a/src/execution/dql/aggregate/simple_agg.rs b/src/execution/dql/aggregate/simple_agg.rs index 185fa78d..39ac775c 100644 --- a/src/execution/dql/aggregate/simple_agg.rs +++ b/src/execution/dql/aggregate/simple_agg.rs @@ -19,10 +19,8 @@ use crate::expression::ScalarExpression; use crate::planner::operator::aggregate::AggregateOperator; use crate::planner::LogicalPlan; use crate::storage::Transaction; -use crate::types::tuple::SchemaRef; pub struct SimpleAggExecutor { agg_calls: Vec, - input_schema: SchemaRef, input: ExecId, returned: bool, } @@ -31,16 +29,14 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for SimpleAggExecutor { type Input = (AggregateOperator, LogicalPlan); fn into_executor( - (AggregateOperator { agg_calls, .. }, mut input): Self::Input, + (AggregateOperator { agg_calls, .. }, input): Self::Input, arena: &mut ExecArena<'a, T>, cache: ExecutionCaches<'a>, transaction: *mut T, ) -> ExecId { - let input_schema = input.output_schema().clone(); let input = build_read(arena, input, cache, transaction); arena.push(ExecNode::SimpleAgg(SimpleAggExecutor { agg_calls, - input_schema, input, returned: false, })) @@ -67,7 +63,7 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for SimpleAggExecutor { )); } - let value = args[0].eval(Some((tuple, &self.input_schema)))?; + let value = args[0].eval(Some(tuple))?; acc.update_value(&value)?; } } diff --git a/src/execution/dql/aggregate/stream_distinct.rs b/src/execution/dql/aggregate/stream_distinct.rs index 133e3826..bd83a635 100644 --- a/src/execution/dql/aggregate/stream_distinct.rs +++ b/src/execution/dql/aggregate/stream_distinct.rs @@ -18,13 +18,12 @@ use crate::expression::ScalarExpression; use crate::planner::operator::aggregate::AggregateOperator; use crate::planner::LogicalPlan; use crate::storage::Transaction; -use crate::types::tuple::{SchemaRef, Tuple}; +use crate::types::tuple::Tuple; use crate::types::value::DataValue; use itertools::Itertools; pub struct StreamDistinctExecutor { groupby_exprs: Vec, - input_schema: SchemaRef, input: ExecId, last_keys: Option>, scratch: Tuple, @@ -34,16 +33,14 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for StreamDistinctExecutor { type Input = (AggregateOperator, LogicalPlan); fn into_executor( - (op, mut input): Self::Input, + (op, input): Self::Input, arena: &mut ExecArena<'a, T>, cache: ExecutionCaches<'a>, transaction: *mut T, ) -> ExecId { - let input_schema = input.output_schema().clone(); let input = build_read(arena, input, cache, transaction); arena.push(ExecNode::StreamDistinct(StreamDistinctExecutor { groupby_exprs: op.groupby_exprs, - input_schema, input, last_keys: None, scratch: Tuple::default(), @@ -61,7 +58,7 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for StreamDistinctExecutor { let group_keys = self .groupby_exprs .iter() - .map(|expr| expr.eval(Some((tuple, &self.input_schema)))) + .map(|expr| expr.eval(Some(tuple))) .try_collect()?; if self.last_keys.as_ref() != Some(&group_keys) { diff --git a/src/execution/dql/except.rs b/src/execution/dql/except.rs index 704fe703..c2dd2e3f 100644 --- a/src/execution/dql/except.rs +++ b/src/execution/dql/except.rs @@ -14,7 +14,7 @@ use crate::errors::DatabaseError; use crate::execution::{ - build_read, take_plan, ExecArena, ExecId, ExecNode, ExecutionCaches, ExecutorNode, ReadExecutor, + build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ExecutorNode, ReadExecutor, }; use crate::planner::LogicalPlan; use crate::storage::Transaction; @@ -49,8 +49,8 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for Except { cache: ExecutionCaches<'a>, transaction: *mut T, ) -> ExecId { - self.left_input = build_read(arena, take_plan(&mut self.left_plan), cache, transaction); - self.right_input = build_read(arena, take_plan(&mut self.right_plan), cache, transaction); + self.left_input = build_read(arena, self.left_plan.take(), cache, transaction); + self.right_input = build_read(arena, self.right_plan.take(), cache, transaction); arena.push(ExecNode::Except(self)) } } diff --git a/src/execution/dql/filter.rs b/src/execution/dql/filter.rs index 2d809a04..c414eff8 100644 --- a/src/execution/dql/filter.rs +++ b/src/execution/dql/filter.rs @@ -18,10 +18,8 @@ use crate::expression::ScalarExpression; use crate::planner::operator::filter::FilterOperator; use crate::planner::LogicalPlan; use crate::storage::Transaction; -use crate::types::tuple::SchemaRef; pub struct Filter { predicate: ScalarExpression, - input_schema: SchemaRef, input: ExecId, } @@ -29,18 +27,13 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for Filter { type Input = (FilterOperator, LogicalPlan); fn into_executor( - (FilterOperator { predicate, .. }, mut input): Self::Input, + (FilterOperator { predicate, .. }, input): Self::Input, arena: &mut ExecArena<'a, T>, cache: ExecutionCaches<'a>, transaction: *mut T, ) -> ExecId { - let input_schema = input.output_schema().clone(); let input = build_read(arena, input, cache, transaction); - arena.push(ExecNode::Filter(Filter { - predicate, - input_schema, - input, - })) + arena.push(ExecNode::Filter(Filter { predicate, input })) } fn next_tuple(&mut self, arena: &mut ExecArena<'a, T>) -> Result<(), DatabaseError> { @@ -50,11 +43,7 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for Filter { return Ok(()); }; let tuple = arena.result_tuple(); - if self - .predicate - .eval(Some((tuple, &self.input_schema)))? - .is_true()? - { + if self.predicate.eval(Some(tuple))?.is_true()? { arena.resume(); return Ok(()); } diff --git a/src/execution/dql/join/hash/full_join.rs b/src/execution/dql/join/hash/full_join.rs index 93c057e3..561c5883 100644 --- a/src/execution/dql/join/hash/full_join.rs +++ b/src/execution/dql/join/hash/full_join.rs @@ -14,9 +14,10 @@ use crate::errors::DatabaseError; use crate::execution::dql::join::hash::{ - filter, FilterArgs, JoinProbeState, LeftDropState, LeftDropTuples, ProbeState, + filter, JoinProbeState, LeftDropState, LeftDropTuples, ProbeState, }; use crate::execution::dql::join::hash_join::BuildState; +use crate::expression::ScalarExpression; use crate::types::tuple::{SplitTupleRef, Tuple}; use crate::types::value::DataValue; use fixedbitset::FixedBitSet; @@ -32,7 +33,7 @@ impl JoinProbeState for FullJoinState { &mut self, probe_state: &mut ProbeState, build_state: Option<&mut BuildState>, - filter_args: Option<&FilterArgs>, + filter_expr: Option<&ScalarExpression>, ) -> Result, DatabaseError> { if probe_state.is_keys_has_null { if probe_state.emitted_unmatched { @@ -64,10 +65,10 @@ impl JoinProbeState for FullJoinState { let (i, Tuple { values, pk }) = &build_state.tuples[probe_state.index]; probe_state.index += 1; - if let Some(filter_args) = filter_args { + if let Some(filter_expr) = filter_expr { let full_values = SplitTupleRef::from_slices(values, &probe_state.probe_tuple.values); - if !filter(&full_values, filter_args)? { + if !filter(&full_values, filter_expr)? { probe_state.has_filtered = true; self.bits.set(*i, true); return Ok(Some(Self::full_right_row( @@ -96,7 +97,7 @@ impl JoinProbeState for FullJoinState { fn left_drop_next( &mut self, left_drop_state: &mut LeftDropState, - _filter_args: Option<&FilterArgs>, + _filter_expr: Option<&ScalarExpression>, ) -> Result, DatabaseError> { let full_schema_len = self.right_schema_len + self.left_schema_len; diff --git a/src/execution/dql/join/hash/inner_join.rs b/src/execution/dql/join/hash/inner_join.rs index e14aadb3..810dc083 100644 --- a/src/execution/dql/join/hash/inner_join.rs +++ b/src/execution/dql/join/hash/inner_join.rs @@ -13,8 +13,9 @@ // limitations under the License. use crate::errors::DatabaseError; -use crate::execution::dql::join::hash::{filter, FilterArgs, JoinProbeState, ProbeState}; +use crate::execution::dql::join::hash::{filter, JoinProbeState, ProbeState}; use crate::execution::dql::join::hash_join::BuildState; +use crate::expression::ScalarExpression; use crate::types::tuple::{SplitTupleRef, Tuple}; pub(crate) struct InnerJoinState; @@ -24,7 +25,7 @@ impl JoinProbeState for InnerJoinState { &mut self, probe_state: &mut ProbeState, build_state: Option<&mut BuildState>, - filter_args: Option<&FilterArgs>, + filter_expr: Option<&ScalarExpression>, ) -> Result, DatabaseError> { if probe_state.is_keys_has_null { probe_state.finished = true; @@ -41,10 +42,10 @@ impl JoinProbeState for InnerJoinState { let (_, Tuple { values, pk }) = &build_state.tuples[probe_state.index]; probe_state.index += 1; - if let Some(filter_args) = filter_args { + if let Some(filter_expr) = filter_expr { let full_values = SplitTupleRef::from_slices(values, &probe_state.probe_tuple.values); - if !filter(&full_values, filter_args)? { + if !filter(&full_values, filter_expr)? { continue; } } diff --git a/src/execution/dql/join/hash/left_join.rs b/src/execution/dql/join/hash/left_join.rs index 99594d59..68caff68 100644 --- a/src/execution/dql/join/hash/left_join.rs +++ b/src/execution/dql/join/hash/left_join.rs @@ -14,9 +14,10 @@ use crate::errors::DatabaseError; use crate::execution::dql::join::hash::{ - filter, FilterArgs, JoinProbeState, LeftDropState, LeftDropTuples, ProbeState, + filter, JoinProbeState, LeftDropState, LeftDropTuples, ProbeState, }; use crate::execution::dql::join::hash_join::BuildState; +use crate::expression::ScalarExpression; use crate::types::tuple::{SplitTupleRef, Tuple}; use crate::types::value::DataValue; use fixedbitset::FixedBitSet; @@ -32,7 +33,7 @@ impl JoinProbeState for LeftJoinState { &mut self, probe_state: &mut ProbeState, build_state: Option<&mut BuildState>, - filter_args: Option<&FilterArgs>, + filter_expr: Option<&ScalarExpression>, ) -> Result, DatabaseError> { if probe_state.is_keys_has_null { probe_state.finished = true; @@ -48,10 +49,10 @@ impl JoinProbeState for LeftJoinState { let (i, Tuple { values, pk }) = &build_state.tuples[probe_state.index]; probe_state.index += 1; - if let Some(filter_args) = filter_args { + if let Some(filter_expr) = filter_expr { let full_values = SplitTupleRef::from_slices(values, &probe_state.probe_tuple.values); - if !filter(&full_values, filter_args)? { + if !filter(&full_values, filter_expr)? { probe_state.has_filtered = true; self.bits.set(*i, true); continue; @@ -76,7 +77,7 @@ impl JoinProbeState for LeftJoinState { fn left_drop_next( &mut self, left_drop_state: &mut LeftDropState, - _filter_args: Option<&FilterArgs>, + _filter_expr: Option<&ScalarExpression>, ) -> Result, DatabaseError> { let full_schema_len = self.right_schema_len + self.left_schema_len; diff --git a/src/execution/dql/join/hash/mod.rs b/src/execution/dql/join/hash/mod.rs index f139ad64..b5d832fe 100644 --- a/src/execution/dql/join/hash/mod.rs +++ b/src/execution/dql/join/hash/mod.rs @@ -25,15 +25,10 @@ use crate::execution::dql::join::hash::right_join::RightJoinState; use crate::execution::dql::join::hash_join::BuildState; use crate::execution::dql::sort::BumpVec; use crate::expression::ScalarExpression; -use crate::types::tuple::{SchemaRef, Tuple, TupleLike}; +use crate::types::tuple::{Tuple, TupleLike}; use crate::types::value::DataValue; use std::collections::hash_map::IntoIter as HashMapIntoIter; -pub(crate) struct FilterArgs { - pub(crate) full_schema: SchemaRef, - pub(crate) filter_expr: ScalarExpression, -} - pub(crate) struct ProbeState { pub(crate) is_keys_has_null: bool, pub(crate) probe_tuple: Tuple, @@ -59,13 +54,13 @@ pub(crate) trait JoinProbeState { &mut self, probe_state: &mut ProbeState, build_state: Option<&mut BuildState>, - filter_args: Option<&FilterArgs>, + filter_expr: Option<&ScalarExpression>, ) -> Result, DatabaseError>; fn left_drop_next( &mut self, _left_drop_state: &mut LeftDropState, - _filter_args: Option<&FilterArgs>, + _filter_expr: Option<&ScalarExpression>, ) -> Result, DatabaseError> { Ok(None) } @@ -83,20 +78,20 @@ impl JoinProbeState for JoinProbeStateImpl { &mut self, probe_state: &mut ProbeState, build_state: Option<&mut BuildState>, - filter_args: Option<&FilterArgs>, + filter_expr: Option<&ScalarExpression>, ) -> Result, DatabaseError> { match self { JoinProbeStateImpl::Inner(state) => { - state.probe_next(probe_state, build_state, filter_args) + state.probe_next(probe_state, build_state, filter_expr) } JoinProbeStateImpl::Left(state) => { - state.probe_next(probe_state, build_state, filter_args) + state.probe_next(probe_state, build_state, filter_expr) } JoinProbeStateImpl::Right(state) => { - state.probe_next(probe_state, build_state, filter_args) + state.probe_next(probe_state, build_state, filter_expr) } JoinProbeStateImpl::Full(state) => { - state.probe_next(probe_state, build_state, filter_args) + state.probe_next(probe_state, build_state, filter_expr) } } } @@ -104,28 +99,22 @@ impl JoinProbeState for JoinProbeStateImpl { fn left_drop_next( &mut self, left_drop_state: &mut LeftDropState, - filter_args: Option<&FilterArgs>, + filter_expr: Option<&ScalarExpression>, ) -> Result, DatabaseError> { match self { - JoinProbeStateImpl::Inner(state) => state.left_drop_next(left_drop_state, filter_args), - JoinProbeStateImpl::Left(state) => state.left_drop_next(left_drop_state, filter_args), - JoinProbeStateImpl::Right(state) => state.left_drop_next(left_drop_state, filter_args), - JoinProbeStateImpl::Full(state) => state.left_drop_next(left_drop_state, filter_args), + JoinProbeStateImpl::Inner(state) => state.left_drop_next(left_drop_state, filter_expr), + JoinProbeStateImpl::Left(state) => state.left_drop_next(left_drop_state, filter_expr), + JoinProbeStateImpl::Right(state) => state.left_drop_next(left_drop_state, filter_expr), + JoinProbeStateImpl::Full(state) => state.left_drop_next(left_drop_state, filter_expr), } } } pub(crate) fn filter( values: &T, - filter_arg: &FilterArgs, + filter_expr: &ScalarExpression, ) -> Result { - let FilterArgs { - full_schema, - filter_expr, - .. - } = filter_arg; - - match &filter_expr.eval(Some((values as &dyn TupleLike, full_schema)))? { + match &filter_expr.eval(Some(values as &dyn TupleLike))? { DataValue::Boolean(false) | DataValue::Null => Ok(false), DataValue::Boolean(true) => Ok(true), _ => Err(DatabaseError::InvalidType), diff --git a/src/execution/dql/join/hash/right_join.rs b/src/execution/dql/join/hash/right_join.rs index 03a7ec2e..74578603 100644 --- a/src/execution/dql/join/hash/right_join.rs +++ b/src/execution/dql/join/hash/right_join.rs @@ -14,8 +14,9 @@ use crate::errors::DatabaseError; use crate::execution::dql::join::hash::full_join::FullJoinState; -use crate::execution::dql::join::hash::{filter, FilterArgs, JoinProbeState, ProbeState}; +use crate::execution::dql::join::hash::{filter, JoinProbeState, ProbeState}; use crate::execution::dql::join::hash_join::BuildState; +use crate::expression::ScalarExpression; use crate::types::tuple::{SplitTupleRef, Tuple}; pub(crate) struct RightJoinState { @@ -27,7 +28,7 @@ impl JoinProbeState for RightJoinState { &mut self, probe_state: &mut ProbeState, build_state: Option<&mut BuildState>, - filter_args: Option<&FilterArgs>, + filter_expr: Option<&ScalarExpression>, ) -> Result, DatabaseError> { if probe_state.is_keys_has_null { if probe_state.emitted_unmatched { @@ -59,10 +60,10 @@ impl JoinProbeState for RightJoinState { let (_, Tuple { values, pk }) = &build_state.tuples[probe_state.index]; probe_state.index += 1; - if let Some(filter_args) = filter_args { + if let Some(filter_expr) = filter_expr { let full_values = SplitTupleRef::from_slices(values, &probe_state.probe_tuple.values); - if !filter(&full_values, filter_args)? { + if !filter(&full_values, filter_expr)? { probe_state.has_filtered = true; continue; } diff --git a/src/execution/dql/join/hash_join.rs b/src/execution/dql/join/hash_join.rs index ab0a43db..00f6608e 100644 --- a/src/execution/dql/join/hash_join.rs +++ b/src/execution/dql/join/hash_join.rs @@ -19,32 +19,30 @@ use crate::execution::dql::join::hash::inner_join::InnerJoinState; use crate::execution::dql::join::hash::left_join::LeftJoinState; use crate::execution::dql::join::hash::right_join::RightJoinState; use crate::execution::dql::join::hash::{ - FilterArgs, JoinProbeState, JoinProbeStateImpl, LeftDropState, ProbeState, + JoinProbeState, JoinProbeStateImpl, LeftDropState, ProbeState, }; use crate::execution::dql::join::joins_nullable; use crate::execution::dql::sort::BumpVec; use crate::execution::{ - build_read, take_plan, ExecArena, ExecId, ExecNode, ExecutionCaches, ExecutorNode, ReadExecutor, + build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ExecutorNode, ReadExecutor, }; use crate::expression::ScalarExpression; use crate::planner::operator::join::{JoinCondition, JoinOperator, JoinType}; use crate::planner::LogicalPlan; use crate::storage::Transaction; -use crate::types::tuple::{SchemaRef, Tuple}; +use crate::types::tuple::Tuple; use crate::types::value::DataValue; use ahash::{HashMap, HashMapExt}; use bumpalo::Bump; use fixedbitset::FixedBitSet; use std::mem::transmute; -use std::sync::Arc; pub struct HashJoin { state: HashJoinState, ty: JoinType, on_left_keys: Vec, on_right_keys: Vec, - full_schema: SchemaRef, - filter: Option, + filter: Option, left_schema_len: usize, right_schema_len: usize, left_input_plan: LogicalPlan, @@ -113,11 +111,7 @@ impl From<(JoinOperator, LogicalPlan, LogicalPlan)> for HashJoin { ty: join_type, on_left_keys, on_right_keys, - full_schema: Arc::new(full_schema_ref.clone()), - filter: filter_expr.map(|filter_expr| FilterArgs { - full_schema: Arc::new(full_schema_ref), - filter_expr, - }), + filter: filter_expr, left_schema_len, right_schema_len, left_input_plan: left_input, @@ -159,12 +153,11 @@ impl HashJoin { fn eval_keys( on_keys: &[ScalarExpression], tuple: &Tuple, - schema: &[ColumnRef], build_buf: &mut BumpVec<'_, DataValue>, ) -> Result<(), DatabaseError> { build_buf.clear(); for expr in on_keys { - build_buf.push(expr.eval(Some((tuple, schema)))?); + build_buf.push(expr.eval(Some(tuple))?); } Ok(()) } @@ -187,12 +180,7 @@ impl HashJoin { while arena.next_tuple(self.left_input)? { let tuple = arena.result_tuple().clone(); - Self::eval_keys( - &self.on_left_keys, - &tuple, - &self.full_schema[0..self.left_schema_len], - &mut build_buf, - )?; + Self::eval_keys(&self.on_left_keys, &tuple, &mut build_buf)?; match build_map.get_mut(&build_buf) { None => { @@ -264,18 +252,8 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for HashJoin { cache: ExecutionCaches<'a>, transaction: *mut T, ) -> ExecId { - self.left_input = build_read( - arena, - take_plan(&mut self.left_input_plan), - cache, - transaction, - ); - self.right_input = build_read( - arena, - take_plan(&mut self.right_input_plan), - cache, - transaction, - ); + self.left_input = build_read(arena, self.left_input_plan.take(), cache, transaction); + self.right_input = build_read(arena, self.right_input_plan.take(), cache, transaction); arena.push(ExecNode::HashJoin(self)) } } @@ -324,12 +302,7 @@ impl HashJoin { break true; } let tuple = arena.result_tuple().clone(); - Self::eval_keys( - &self.on_right_keys, - &tuple, - &self.full_schema[self.left_schema_len..], - &mut probe_buf, - )?; + Self::eval_keys(&self.on_right_keys, &tuple, &mut probe_buf)?; probe_state = Some(ProbeState { is_keys_has_null: probe_buf.iter().any(DataValue::is_null), probe_tuple: tuple, diff --git a/src/execution/dql/join/nested_loop_join.rs b/src/execution/dql/join/nested_loop_join.rs index 6aaa34f0..cab57fa8 100644 --- a/src/execution/dql/join/nested_loop_join.rs +++ b/src/execution/dql/join/nested_loop_join.rs @@ -15,12 +15,10 @@ //! Defines the nested loop join executor, it supports [`JoinType::Inner`], [`JoinType::LeftOuter`], //! [`JoinType::RightOuter`], [`JoinType::Cross`], [`JoinType::Full`]. -use super::joins_nullable; -use crate::catalog::ColumnRef; use crate::errors::DatabaseError; use crate::execution::dql::projection::Projection; use crate::execution::{ - build_read, take_plan, ExecArena, ExecId, ExecNode, ExecutionCaches, ExecutorNode, ReadExecutor, + build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ExecutorNode, ReadExecutor, }; use crate::expression::ScalarExpression; use crate::planner::operator::join::{JoinCondition, JoinOperator, JoinType}; @@ -89,7 +87,6 @@ impl EqualCondition { pub struct NestedLoopJoin { left_input_plan: LogicalPlan, right_input_plan: LogicalPlan, - output_schema_ref: SchemaRef, ty: JoinType, filter: Option, eq_cond: EqualCondition, @@ -137,7 +134,6 @@ impl From<(JoinOperator, LogicalPlan, LogicalPlan)> for NestedLoopJoin { let (mut left_input, mut right_input) = (left_input, right_input); let mut left_schema = left_input.output_schema().clone(); let mut right_schema = right_input.output_schema().clone(); - let output_schema_ref = Self::merge_schema(&left_schema, &right_schema, join_type); if matches!(join_type, JoinType::RightOuter) { std::mem::swap(&mut left_input, &mut right_input); @@ -155,7 +151,6 @@ impl From<(JoinOperator, LogicalPlan, LogicalPlan)> for NestedLoopJoin { NestedLoopJoin { left_input_plan: left_input, right_input_plan: right_input, - output_schema_ref, ty: join_type, filter, eq_cond, @@ -172,12 +167,7 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for NestedLoopJoin { cache: ExecutionCaches<'a>, transaction: *mut T, ) -> ExecId { - self.left_input = build_read( - arena, - take_plan(&mut self.left_input_plan), - cache, - transaction, - ); + self.left_input = build_read(arena, self.left_input_plan.take(), cache, transaction); arena.push(ExecNode::NestedLoopJoin(self)) } } @@ -282,7 +272,7 @@ impl NestedLoopJoin { } else { SplitTupleRef::new(&active_left.left_tuple, &right_tuple) }; - let value = filter.eval(Some((values, &self.output_schema_ref)))?; + let value = filter.eval(Some(values))?; match &value { DataValue::Boolean(true) => { let tuple = match self.ty { @@ -453,37 +443,12 @@ impl NestedLoopJoin { values, )) } - - fn merge_schema( - left_schema: &[ColumnRef], - right_schema: &[ColumnRef], - ty: JoinType, - ) -> Arc> { - let (left_force_nullable, right_force_nullable) = joins_nullable(&ty); - - let mut join_schema = vec![]; - for column in left_schema.iter() { - join_schema.push( - column - .nullable_for_join(left_force_nullable) - .unwrap_or_else(|| column.clone()), - ); - } - for column in right_schema.iter() { - join_schema.push( - column - .nullable_for_join(right_force_nullable) - .unwrap_or_else(|| column.clone()), - ); - } - Arc::new(join_schema) - } } #[cfg(all(test, not(target_arch = "wasm32")))] mod test { use super::*; - use crate::catalog::{ColumnCatalog, ColumnDesc}; + use crate::catalog::{ColumnCatalog, ColumnDesc, ColumnRef}; use crate::db::DataBaseBuilder; use crate::execution::dql::test::build_integers; use crate::execution::try_collect; diff --git a/src/execution/dql/mark_apply.rs b/src/execution/dql/mark_apply.rs index 2e127938..d1dd9c4f 100644 --- a/src/execution/dql/mark_apply.rs +++ b/src/execution/dql/mark_apply.rs @@ -18,10 +18,9 @@ use crate::planner::operator::mark_apply::{MarkApplyKind, MarkApplyOperator}; use crate::planner::LogicalPlan; use crate::storage::Transaction; use crate::types::index::RuntimeIndexProbe; -use crate::types::tuple::{Schema, SchemaRef, SplitTupleRef, Tuple}; +use crate::types::tuple::{SplitTupleRef, Tuple}; use crate::types::value::DataValue; use std::mem; -use std::sync::Arc; #[derive(PartialEq, Eq)] enum InPredicateOutcome { @@ -34,8 +33,6 @@ pub struct MarkApply { op: MarkApplyOperator, right_input_plan: LogicalPlan, left_input: ExecId, - left_schema: SchemaRef, - predicate_schema: SchemaRef, left_tuple: Tuple, } @@ -43,26 +40,16 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for MarkApply { type Input = (MarkApplyOperator, LogicalPlan, LogicalPlan); fn into_executor( - (op, mut left_input, mut right_input): Self::Input, + (op, left_input, right_input): Self::Input, arena: &mut ExecArena<'a, T>, cache: ExecutionCaches<'a>, transaction: *mut T, ) -> ExecId { - let left_schema = left_input.output_schema().clone(); - let predicate_schema = Arc::new( - left_schema - .iter() - .chain(right_input.output_schema().iter()) - .cloned() - .collect::(), - ); let left_input = build_read(arena, left_input, cache, transaction); arena.push(ExecNode::MarkApply(Self { op, right_input_plan: right_input, left_input, - left_schema, - predicate_schema, left_tuple: Tuple::default(), })) } @@ -131,7 +118,7 @@ impl MarkApply { fn parameterized_probe_value(&self) -> Result, DatabaseError> { self.op .parameterized_probe() - .map(|probe| probe.eval(Some((&self.left_tuple, self.left_schema.as_ref())))) + .map(|probe| probe.eval(Some(&self.left_tuple))) .transpose() } @@ -229,7 +216,7 @@ impl MarkApply { let values = SplitTupleRef::new(left_tuple, right_tuple); for predicate in self.op.predicates() { - match predicate.eval(Some((values, self.predicate_schema.as_ref())))? { + match predicate.eval(Some(values))? { DataValue::Boolean(true) => {} DataValue::Boolean(false) | DataValue::Null => return Ok(false), _ => return Err(DatabaseError::InvalidType), @@ -266,16 +253,14 @@ impl MarkApply { .ok_or(DatabaseError::InvalidType)?; for predicate in correlated_predicates { - match predicate.eval(Some((values, self.predicate_schema.as_ref())))? { + match predicate.eval(Some(values))? { DataValue::Boolean(true) => {} DataValue::Boolean(false) | DataValue::Null => return Ok(None), _ => return Err(DatabaseError::InvalidType), } } - Ok(Some( - probe_predicate.eval(Some((values, self.predicate_schema.as_ref())))?, - )) + Ok(Some(probe_predicate.eval(Some(values))?)) } } @@ -493,14 +478,6 @@ mod tests { ); op.set_parameterized_probe(Some(ScalarExpression::column_expr(left_value_column, 0))); - let left_schema = left.output_schema().clone(); - let predicate_schema = Arc::new( - left_schema - .iter() - .chain(right.output_schema().iter()) - .cloned() - .collect::(), - ); let (table_cache, view_cache, meta_cache, _temp_dir, storage) = build_test_storage()?; let mut transaction = storage.transaction()?; let mut arena = ExecArena::default(); @@ -510,8 +487,6 @@ mod tests { op, right_input_plan: right, left_input: 0, - left_schema, - predicate_schema, left_tuple: Tuple::new(None, vec![DataValue::Int32(2), DataValue::Int32(1)]), }; @@ -538,14 +513,6 @@ mod tests { let mut op = MarkApplyOperator::new_in(build_marker_column(), vec![predicate]); op.set_parameterized_probe(Some(ScalarExpression::column_expr(left_value_column, 0))); - let left_schema = left.output_schema().clone(); - let predicate_schema = Arc::new( - left_schema - .iter() - .chain(right.output_schema().iter()) - .cloned() - .collect::(), - ); let (table_cache, view_cache, meta_cache, _temp_dir, storage) = build_test_storage()?; let mut transaction = storage.transaction()?; let mut arena = ExecArena::default(); @@ -555,8 +522,6 @@ mod tests { op, right_input_plan: right, left_input: 0, - left_schema, - predicate_schema, left_tuple: Tuple::new(None, vec![DataValue::Int32(2)]), }; @@ -583,14 +548,6 @@ mod tests { let mut op = MarkApplyOperator::new_in(build_marker_column(), vec![predicate]); op.set_parameterized_probe(Some(ScalarExpression::column_expr(left_value_column, 0))); - let left_schema = left.output_schema().clone(); - let predicate_schema = Arc::new( - left_schema - .iter() - .chain(right.output_schema().iter()) - .cloned() - .collect::(), - ); let (table_cache, view_cache, meta_cache, _temp_dir, storage) = build_test_storage()?; let mut transaction = storage.transaction()?; let mut arena = ExecArena::default(); @@ -600,8 +557,6 @@ mod tests { op, right_input_plan: right, left_input: 0, - left_schema, - predicate_schema, left_tuple: Tuple::new(None, vec![DataValue::Null]), }; diff --git a/src/execution/dql/projection.rs b/src/execution/dql/projection.rs index 38540538..80dda08d 100644 --- a/src/execution/dql/projection.rs +++ b/src/execution/dql/projection.rs @@ -19,12 +19,10 @@ use crate::expression::ScalarExpression; use crate::planner::operator::project::ProjectOperator; use crate::planner::LogicalPlan; use crate::storage::Transaction; -use crate::types::tuple::SchemaRef; use crate::types::tuple::Tuple; use crate::types::value::DataValue; pub struct Projection { exprs: Vec, - input_schema: SchemaRef, input: ExecId, scratch: Tuple, } @@ -33,16 +31,14 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for Projection { type Input = (ProjectOperator, LogicalPlan); fn into_executor( - (ProjectOperator { exprs }, mut input): Self::Input, + (ProjectOperator { exprs }, input): Self::Input, arena: &mut ExecArena<'a, T>, cache: ExecutionCaches<'a>, transaction: *mut T, ) -> ExecId { - let input_schema = input.output_schema().clone(); let input = build_read(arena, input, cache, transaction); arena.push(ExecNode::Projection(Projection { exprs, - input_schema, input, scratch: Tuple::default(), })) @@ -61,9 +57,7 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for Projection { output.values.clear(); output.values.reserve(self.exprs.len()); for expr in self.exprs.iter() { - output - .values - .push(expr.eval(Some((tuple, &self.input_schema)))?); + output.values.push(expr.eval(Some(tuple))?); } arena.resume(); Ok(()) @@ -74,12 +68,12 @@ impl Projection { pub fn projection( tuple: &Tuple, exprs: &[ScalarExpression], - schema: &[ColumnRef], + _schema: &[ColumnRef], ) -> Result, DatabaseError> { let mut values = Vec::with_capacity(exprs.len()); for expr in exprs.iter() { - values.push(expr.eval(Some((tuple, schema)))?); + values.push(expr.eval(Some(tuple))?); } Ok(values) } diff --git a/src/execution/dql/sort.rs b/src/execution/dql/sort.rs index ba2b0811..0c80a6e9 100644 --- a/src/execution/dql/sort.rs +++ b/src/execution/dql/sort.rs @@ -18,7 +18,7 @@ use crate::planner::operator::sort::{SortField, SortOperator}; use crate::planner::LogicalPlan; use crate::storage::table_codec::BumpBytes; use crate::storage::Transaction; -use crate::types::tuple::{Schema, SchemaRef, Tuple}; +use crate::types::tuple::Tuple; use bumpalo::Bump; use std::cmp::Ordering; use std::mem::{transmute, MaybeUninit}; @@ -176,7 +176,6 @@ impl SortBy { pub(crate) fn sorted_tuples<'a>( &self, arena: &'a Bump, - schema: &Schema, sort_fields: &[SortField], mut tuples: NullableVec<'a, (usize, Tuple)>, ) -> Result + 'a>, DatabaseError> { @@ -195,7 +194,7 @@ impl SortBy { { let mut key = BumpBytes::new_in(arena); - expr.eval(Some((tuple, schema)))? + expr.eval(Some(tuple))? .memcomparable_encode_with_null_order(&mut key, *nulls_first)?; if !asc && key.len() > 1 { @@ -228,7 +227,7 @@ impl SortBy { for (x, SortField { expr, .. }) in sort_fields.iter().enumerate() { for (_, tuple) in tuples.iter() { - eval_values[x].push(expr.eval(Some((tuple, schema)))?); + eval_values[x].push(expr.eval(Some(tuple))?); } } @@ -279,7 +278,6 @@ pub struct Sort { arena: Box, sort_fields: Vec, limit: Option, - input_schema: SchemaRef, input: ExecId, } @@ -287,19 +285,17 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for Sort { type Input = (SortOperator, LogicalPlan); fn into_executor( - (SortOperator { sort_fields, limit }, mut input): Self::Input, + (SortOperator { sort_fields, limit }, input): Self::Input, arena: &mut ExecArena<'a, T>, cache: ExecutionCaches<'a>, transaction: *mut T, ) -> ExecId { - let input_schema = input.output_schema().clone(); let input = build_read(arena, input, cache, transaction); arena.push(ExecNode::Sort(Sort { output: None, arena: Box::::default(), sort_fields, limit, - input_schema, input, })) } @@ -319,12 +315,7 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for Sort { SortBy::Fast }; let limit = self.limit.unwrap_or(tuples.len()); - let rows = sort_by.sorted_tuples( - &self.arena, - &self.input_schema, - &self.sort_fields, - tuples, - )?; + let rows = sort_by.sorted_tuples(&self.arena, &self.sort_fields, tuples)?; let rows: Box + '_> = Box::new(rows.take(limit)); // The arena lives at a stable boxed address, so we can keep the old iterator shape // and resume it across executor polls. @@ -394,7 +385,7 @@ mod test { nulls_first, }] }; - let schema = Arc::new(vec![ColumnRef::from(ColumnCatalog::new( + let _schema = Arc::new(vec![ColumnRef::from(ColumnCatalog::new( "c1".to_string(), true, ColumnDesc::new(LogicalType::Integer, None, false, None).unwrap(), @@ -481,25 +472,21 @@ mod test { // RadixSort fn_asc_and_nulls_first_eq(SortBy::Radix.sorted_tuples( &arena, - &schema, &fn_sort_fields(true, true), fn_tuples(), )?); fn_asc_and_nulls_last_eq(SortBy::Radix.sorted_tuples( &arena, - &schema, &fn_sort_fields(true, false), fn_tuples(), )?); fn_desc_and_nulls_first_eq(SortBy::Radix.sorted_tuples( &arena, - &schema, &fn_sort_fields(false, true), fn_tuples(), )?); fn_desc_and_nulls_last_eq(SortBy::Radix.sorted_tuples( &arena, - &schema, &fn_sort_fields(false, false), fn_tuples(), )?); @@ -507,25 +494,21 @@ mod test { // FastSort fn_asc_and_nulls_first_eq(SortBy::Fast.sorted_tuples( &arena, - &schema, &fn_sort_fields(true, true), fn_tuples(), )?); fn_asc_and_nulls_last_eq(SortBy::Fast.sorted_tuples( &arena, - &schema, &fn_sort_fields(true, false), fn_tuples(), )?); fn_desc_and_nulls_first_eq(SortBy::Fast.sorted_tuples( &arena, - &schema, &fn_sort_fields(false, true), fn_tuples(), )?); fn_desc_and_nulls_last_eq(SortBy::Fast.sorted_tuples( &arena, - &schema, &fn_sort_fields(false, false), fn_tuples(), )?); @@ -566,7 +549,7 @@ mod test { }, ] }; - let schema = Arc::new(vec![ + let _schema = Arc::new(vec![ ColumnRef::from(ColumnCatalog::new( "c1".to_string(), true, @@ -744,25 +727,21 @@ mod test { // RadixSort fn_asc_1_and_nulls_first_1_and_asc_2_and_nulls_first_2_eq(SortBy::Radix.sorted_tuples( &arena, - &schema, &fn_sort_fields(true, true, true, true), fn_tuples(), )?); fn_asc_1_and_nulls_last_1_and_asc_2_and_nulls_first_2_eq(SortBy::Radix.sorted_tuples( &arena, - &schema, &fn_sort_fields(true, false, true, true), fn_tuples(), )?); fn_desc_1_and_nulls_first_1_and_asc_2_and_nulls_first_2_eq(SortBy::Radix.sorted_tuples( &arena, - &schema, &fn_sort_fields(false, true, true, true), fn_tuples(), )?); fn_desc_1_and_nulls_last_1_and_asc_2_and_nulls_first_2_eq(SortBy::Radix.sorted_tuples( &arena, - &schema, &fn_sort_fields(false, false, true, true), fn_tuples(), )?); @@ -770,25 +749,21 @@ mod test { // FastSort fn_asc_1_and_nulls_first_1_and_asc_2_and_nulls_first_2_eq(SortBy::Fast.sorted_tuples( &arena, - &schema, &fn_sort_fields(true, true, true, true), fn_tuples(), )?); fn_asc_1_and_nulls_last_1_and_asc_2_and_nulls_first_2_eq(SortBy::Fast.sorted_tuples( &arena, - &schema, &fn_sort_fields(true, false, true, true), fn_tuples(), )?); fn_desc_1_and_nulls_first_1_and_asc_2_and_nulls_first_2_eq(SortBy::Fast.sorted_tuples( &arena, - &schema, &fn_sort_fields(false, true, true, true), fn_tuples(), )?); fn_desc_1_and_nulls_last_1_and_asc_2_and_nulls_first_2_eq(SortBy::Fast.sorted_tuples( &arena, - &schema, &fn_sort_fields(false, false, true, true), fn_tuples(), )?); diff --git a/src/execution/dql/top_k.rs b/src/execution/dql/top_k.rs index 849571eb..d6d8e47e 100644 --- a/src/execution/dql/top_k.rs +++ b/src/execution/dql/top_k.rs @@ -20,7 +20,7 @@ use crate::planner::operator::top_k::TopKOperator; use crate::planner::LogicalPlan; use crate::storage::table_codec::BumpBytes; use crate::storage::Transaction; -use crate::types::tuple::{Schema, SchemaRef, Tuple}; +use crate::types::tuple::Tuple; use bumpalo::Bump; use std::cmp::Ordering; use std::collections::{btree_set::IntoIter as BTreeSetIntoIter, BTreeSet}; @@ -47,7 +47,6 @@ impl PartialOrd for CmpItem<'_> { #[allow(clippy::mutable_key_type)] fn top_sort<'a>( arena: &'a Bump, - schema: &Schema, sort_fields: &[SortField], heap: &mut BTreeSet>, tuple: Tuple, @@ -61,7 +60,7 @@ fn top_sort<'a>( } in sort_fields { let mut key = BumpBytes::new_in(arena); - expr.eval(Some((&tuple, &**schema)))? + expr.eval(Some(&tuple))? .memcomparable_encode_with_null_order(&mut key, *nulls_first)?; if !asc && key.len() > 1 { for byte in key.iter_mut().skip(1) { @@ -94,7 +93,6 @@ pub struct TopK { sort_fields: Vec, limit: usize, offset: Option, - input_schema: SchemaRef, input: ExecId, } @@ -108,13 +106,12 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for TopK { limit, offset, }, - mut input, + input, ): Self::Input, arena: &mut ExecArena<'a, T>, cache: ExecutionCaches<'a>, transaction: *mut T, ) -> ExecId { - let input_schema = input.output_schema().clone(); let input = build_read(arena, input, cache, transaction); arena.push(ExecNode::TopK(TopK { output: None, @@ -122,7 +119,6 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for TopK { sort_fields, limit, offset, - input_schema, input, })) } @@ -136,7 +132,6 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for TopK { while arena.next_tuple(self.input)? { top_sort( &self.arena, - &self.input_schema, &self.sort_fields, &mut set, arena.result_tuple().clone(), @@ -196,12 +191,6 @@ mod test { nulls_first, }] }; - let schema = Arc::new(vec![ColumnRef::from(ColumnCatalog::new( - "c1".to_string(), - true, - ColumnDesc::new(LogicalType::Integer, None, false, None).unwrap(), - ))]); - let arena = Bump::new(); let fn_asc_and_nulls_last_eq = |mut heap: BTreeSet>| { @@ -257,7 +246,6 @@ mod test { top_sort( &arena, - &schema, &fn_sort_fields(true, true), &mut indices, Tuple::new(None, vec![DataValue::Null]), @@ -265,7 +253,6 @@ mod test { )?; top_sort( &arena, - &schema, &fn_sort_fields(true, true), &mut indices, Tuple::new(None, vec![DataValue::Int32(0)]), @@ -273,7 +260,6 @@ mod test { )?; top_sort( &arena, - &schema, &fn_sort_fields(true, true), &mut indices, Tuple::new(None, vec![DataValue::Int32(1)]), @@ -285,7 +271,6 @@ mod test { top_sort( &arena, - &schema, &fn_sort_fields(true, false), &mut indices, Tuple::new(None, vec![DataValue::Null]), @@ -293,7 +278,6 @@ mod test { )?; top_sort( &arena, - &schema, &fn_sort_fields(true, false), &mut indices, Tuple::new(None, vec![DataValue::Int32(0)]), @@ -301,7 +285,6 @@ mod test { )?; top_sort( &arena, - &schema, &fn_sort_fields(true, false), &mut indices, Tuple::new(None, vec![DataValue::Int32(1)]), @@ -313,7 +296,6 @@ mod test { top_sort( &arena, - &schema, &fn_sort_fields(false, true), &mut indices, Tuple::new(None, vec![DataValue::Null]), @@ -321,7 +303,6 @@ mod test { )?; top_sort( &arena, - &schema, &fn_sort_fields(false, true), &mut indices, Tuple::new(None, vec![DataValue::Int32(0)]), @@ -329,7 +310,6 @@ mod test { )?; top_sort( &arena, - &schema, &fn_sort_fields(false, true), &mut indices, Tuple::new(None, vec![DataValue::Int32(1)]), @@ -341,7 +321,6 @@ mod test { top_sort( &arena, - &schema, &fn_sort_fields(false, false), &mut indices, Tuple::new(None, vec![DataValue::Null]), @@ -349,7 +328,6 @@ mod test { )?; top_sort( &arena, - &schema, &fn_sort_fields(false, false), &mut indices, Tuple::new(None, vec![DataValue::Int32(0)]), @@ -357,7 +335,6 @@ mod test { )?; top_sort( &arena, - &schema, &fn_sort_fields(false, false), &mut indices, Tuple::new(None, vec![DataValue::Int32(1)]), @@ -401,18 +378,6 @@ mod test { }, ] }; - let schema = Arc::new(vec![ - ColumnRef::from(ColumnCatalog::new( - "c1".to_string(), - true, - ColumnDesc::new(LogicalType::Integer, None, false, None).unwrap(), - )), - ColumnRef::from(ColumnCatalog::new( - "c2".to_string(), - true, - ColumnDesc::new(LogicalType::Integer, None, false, None).unwrap(), - )), - ]); let arena = Bump::new(); let fn_asc_1_and_nulls_first_1_and_asc_2_and_nulls_first_2_eq = @@ -554,7 +519,6 @@ mod test { top_sort( &arena, - &schema, &fn_sort_fields(true, true, true, true), &mut indices, Tuple::new(None, vec![DataValue::Null, DataValue::Null]), @@ -562,7 +526,6 @@ mod test { )?; top_sort( &arena, - &schema, &fn_sort_fields(true, true, true, true), &mut indices, Tuple::new(None, vec![DataValue::Int32(0), DataValue::Null]), @@ -570,7 +533,6 @@ mod test { )?; top_sort( &arena, - &schema, &fn_sort_fields(true, true, true, true), &mut indices, Tuple::new(None, vec![DataValue::Int32(1), DataValue::Null]), @@ -578,7 +540,6 @@ mod test { )?; top_sort( &arena, - &schema, &fn_sort_fields(true, true, true, true), &mut indices, Tuple::new(None, vec![DataValue::Null, DataValue::Int32(0)]), @@ -586,7 +547,6 @@ mod test { )?; top_sort( &arena, - &schema, &fn_sort_fields(true, true, true, true), &mut indices, Tuple::new(None, vec![DataValue::Int32(0), DataValue::Int32(0)]), @@ -594,7 +554,6 @@ mod test { )?; top_sort( &arena, - &schema, &fn_sort_fields(true, true, true, true), &mut indices, Tuple::new(None, vec![DataValue::Int32(1), DataValue::Int32(0)]), @@ -606,7 +565,6 @@ mod test { top_sort( &arena, - &schema, &fn_sort_fields(true, false, true, true), &mut indices, Tuple::new(None, vec![DataValue::Null, DataValue::Null]), @@ -614,7 +572,6 @@ mod test { )?; top_sort( &arena, - &schema, &fn_sort_fields(true, false, true, true), &mut indices, Tuple::new(None, vec![DataValue::Int32(0), DataValue::Null]), @@ -622,7 +579,6 @@ mod test { )?; top_sort( &arena, - &schema, &fn_sort_fields(true, false, true, true), &mut indices, Tuple::new(None, vec![DataValue::Int32(1), DataValue::Null]), @@ -630,7 +586,6 @@ mod test { )?; top_sort( &arena, - &schema, &fn_sort_fields(true, false, true, true), &mut indices, Tuple::new(None, vec![DataValue::Null, DataValue::Int32(0)]), @@ -638,7 +593,6 @@ mod test { )?; top_sort( &arena, - &schema, &fn_sort_fields(true, false, true, true), &mut indices, Tuple::new(None, vec![DataValue::Int32(0), DataValue::Int32(0)]), @@ -646,7 +600,6 @@ mod test { )?; top_sort( &arena, - &schema, &fn_sort_fields(true, false, true, true), &mut indices, Tuple::new(None, vec![DataValue::Int32(1), DataValue::Int32(0)]), @@ -658,7 +611,6 @@ mod test { top_sort( &arena, - &schema, &fn_sort_fields(false, true, true, true), &mut indices, Tuple::new(None, vec![DataValue::Null, DataValue::Null]), @@ -666,7 +618,6 @@ mod test { )?; top_sort( &arena, - &schema, &fn_sort_fields(false, true, true, true), &mut indices, Tuple::new(None, vec![DataValue::Int32(0), DataValue::Null]), @@ -674,7 +625,6 @@ mod test { )?; top_sort( &arena, - &schema, &fn_sort_fields(false, true, true, true), &mut indices, Tuple::new(None, vec![DataValue::Int32(1), DataValue::Null]), @@ -682,7 +632,6 @@ mod test { )?; top_sort( &arena, - &schema, &fn_sort_fields(false, true, true, true), &mut indices, Tuple::new(None, vec![DataValue::Null, DataValue::Int32(0)]), @@ -690,7 +639,6 @@ mod test { )?; top_sort( &arena, - &schema, &fn_sort_fields(false, true, true, true), &mut indices, Tuple::new(None, vec![DataValue::Int32(0), DataValue::Int32(0)]), @@ -698,7 +646,6 @@ mod test { )?; top_sort( &arena, - &schema, &fn_sort_fields(false, true, true, true), &mut indices, Tuple::new(None, vec![DataValue::Int32(1), DataValue::Int32(0)]), @@ -710,7 +657,6 @@ mod test { top_sort( &arena, - &schema, &fn_sort_fields(false, false, true, true), &mut indices, Tuple::new(None, vec![DataValue::Null, DataValue::Null]), @@ -718,7 +664,6 @@ mod test { )?; top_sort( &arena, - &schema, &fn_sort_fields(false, false, true, true), &mut indices, Tuple::new(None, vec![DataValue::Int32(0), DataValue::Null]), @@ -726,7 +671,6 @@ mod test { )?; top_sort( &arena, - &schema, &fn_sort_fields(false, false, true, true), &mut indices, Tuple::new(None, vec![DataValue::Int32(1), DataValue::Null]), @@ -734,7 +678,6 @@ mod test { )?; top_sort( &arena, - &schema, &fn_sort_fields(false, false, true, true), &mut indices, Tuple::new(None, vec![DataValue::Null, DataValue::Int32(0)]), @@ -742,7 +685,6 @@ mod test { )?; top_sort( &arena, - &schema, &fn_sort_fields(false, false, true, true), &mut indices, Tuple::new(None, vec![DataValue::Int32(0), DataValue::Int32(0)]), @@ -750,7 +692,6 @@ mod test { )?; top_sort( &arena, - &schema, &fn_sort_fields(false, false, true, true), &mut indices, Tuple::new(None, vec![DataValue::Int32(1), DataValue::Int32(0)]), diff --git a/src/execution/dql/union.rs b/src/execution/dql/union.rs index 7e8ba5e0..ac5807df 100644 --- a/src/execution/dql/union.rs +++ b/src/execution/dql/union.rs @@ -14,7 +14,7 @@ use crate::errors::DatabaseError; use crate::execution::{ - build_read, take_plan, ExecArena, ExecId, ExecNode, ExecutionCaches, ExecutorNode, ReadExecutor, + build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ExecutorNode, ReadExecutor, }; use crate::planner::LogicalPlan; use crate::storage::Transaction; @@ -45,8 +45,8 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for Union { cache: ExecutionCaches<'a>, transaction: *mut T, ) -> ExecId { - self.left_input = build_read(arena, take_plan(&mut self.left_plan), cache, transaction); - self.right_input = build_read(arena, take_plan(&mut self.right_plan), cache, transaction); + self.left_input = build_read(arena, self.left_plan.take(), cache, transaction); + self.right_input = build_read(arena, self.right_plan.take(), cache, transaction); arena.push(ExecNode::Union(self)) } } diff --git a/src/execution/mod.rs b/src/execution/mod.rs index ff08ec31..8c0c5caa 100644 --- a/src/execution/mod.rs +++ b/src/execution/mod.rs @@ -568,13 +568,6 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for ShowViews { } } -pub(crate) fn take_plan(plan: &mut LogicalPlan) -> LogicalPlan { - std::mem::replace( - plan, - LogicalPlan::new(Operator::Dummy, crate::planner::Childrens::None), - ) -} - pub(crate) fn build_read<'a, T: Transaction + 'a>( arena: &mut ExecArena<'a, T>, plan: LogicalPlan, diff --git a/src/expression/evaluator.rs b/src/expression/evaluator.rs index 5f7b8f71..8f0f8a33 100644 --- a/src/expression/evaluator.rs +++ b/src/expression/evaluator.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::catalog::ColumnRef; use crate::errors::DatabaseError; use crate::expression::function::scala::ScalarFunction; use crate::expression::{AliasType, BinaryOperator, ScalarExpression}; @@ -36,10 +35,7 @@ macro_rules! eval_to_num { } impl ScalarExpression { - pub fn eval( - &self, - tuple: Option<(T, &[ColumnRef])>, - ) -> Result { + pub fn eval(&self, tuple: Option) -> Result { let check_cast = |value: DataValue, return_type: &LogicalType| { if value.logical_type() != *return_type { return value.cast(return_type); @@ -50,24 +46,22 @@ impl ScalarExpression { match self { ScalarExpression::Constant(val) => Ok(val.clone()), ScalarExpression::ColumnRef { position, .. } => { - let Some((tuple, _)) = tuple else { + let Some(tuple) = tuple else { return Ok(DataValue::Null); }; Ok(tuple.value_at(*position).clone()) } ScalarExpression::Alias { expr, alias } => { - let Some((tuple, schema)) = tuple else { + let Some(tuple) = tuple else { return Ok(DataValue::Null); }; if let AliasType::Expr(inner_expr) = alias { - match inner_expr.eval(Some((tuple, schema))) { - Err(DatabaseError::UnbindExpressionPosition(_)) => { - expr.eval(Some((tuple, schema))) - } + match inner_expr.eval(Some(tuple)) { + Err(DatabaseError::UnbindExpressionPosition(_)) => expr.eval(Some(tuple)), res => res, } } else { - expr.eval(Some((tuple, schema))) + expr.eval(Some(tuple)) } } ScalarExpression::TypeCast { expr, ty, .. } => Ok(expr.eval(tuple)?.cast(ty)?), @@ -269,12 +263,10 @@ impl ScalarExpression { Ok(DataValue::Tuple(values, false)) } ScalarExpression::ScalaFunction(ScalarFunction { inner, args, .. }) => { - let value = inner.eval( - args, - tuple - .as_ref() - .map(|(tuple, schema)| (tuple as &dyn TupleLike, *schema)), - )?; + let value = match tuple { + Some(tuple) => inner.eval(args, Some(&tuple as &dyn TupleLike))?, + None => inner.eval(args, None)?, + }; value.cast(inner.return_type()) } ScalarExpression::Empty => unreachable!(), diff --git a/src/expression/function/scala.rs b/src/expression/function/scala.rs index 589c4c40..a61c7344 100644 --- a/src/expression/function/scala.rs +++ b/src/expression/function/scala.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::catalog::ColumnRef; use crate::errors::DatabaseError; use crate::expression::function::FunctionSummary; use crate::expression::ScalarExpression; @@ -68,7 +67,7 @@ pub trait ScalarFunctionImpl: Debug + Send + Sync { fn eval( &self, args: &[ScalarExpression], - tuple: Option<(&dyn TupleLike, &[ColumnRef])>, + tuple: Option<&dyn TupleLike>, ) -> Result; // TODO: Exploiting monotonicity when optimizing `ScalarFunctionImpl::monotonicity()` diff --git a/src/function/char_length.rs b/src/function/char_length.rs index 99bc7c46..d284a549 100644 --- a/src/function/char_length.rs +++ b/src/function/char_length.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::catalog::ColumnRef; use crate::errors::DatabaseError; use crate::expression::function::scala::FuncMonotonicity; use crate::expression::function::scala::ScalarFunctionImpl; @@ -49,7 +48,7 @@ impl ScalarFunctionImpl for CharLength { fn eval( &self, exprs: &[ScalarExpression], - tuples: Option<(&dyn TupleLike, &[ColumnRef])>, + tuples: Option<&dyn TupleLike>, ) -> Result { let mut value = exprs[0].eval(tuples)?; if !matches!(value.logical_type(), LogicalType::Varchar(_, _)) { diff --git a/src/function/current_date.rs b/src/function/current_date.rs index 8790d09d..52b867c9 100644 --- a/src/function/current_date.rs +++ b/src/function/current_date.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::catalog::ColumnRef; use crate::errors::DatabaseError; use crate::expression::function::scala::FuncMonotonicity; use crate::expression::function::scala::ScalarFunctionImpl; @@ -51,7 +50,7 @@ impl ScalarFunctionImpl for CurrentDate { fn eval( &self, _: &[ScalarExpression], - _: Option<(&dyn TupleLike, &[ColumnRef])>, + _: Option<&dyn TupleLike>, ) -> Result { Ok(DataValue::Date32(Local::now().num_days_from_ce())) } diff --git a/src/function/current_timestamp.rs b/src/function/current_timestamp.rs index 15ddf6a2..e5c00c76 100644 --- a/src/function/current_timestamp.rs +++ b/src/function/current_timestamp.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::catalog::ColumnRef; use crate::errors::DatabaseError; use crate::expression::function::scala::FuncMonotonicity; use crate::expression::function::scala::ScalarFunctionImpl; @@ -51,7 +50,7 @@ impl ScalarFunctionImpl for CurrentTimeStamp { fn eval( &self, _: &[ScalarExpression], - _: Option<(&dyn TupleLike, &[ColumnRef])>, + _: Option<&dyn TupleLike>, ) -> Result { Ok(DataValue::Time64(Utc::now().timestamp(), 0, false)) } diff --git a/src/function/lower.rs b/src/function/lower.rs index 72eea7db..2995aae9 100644 --- a/src/function/lower.rs +++ b/src/function/lower.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::catalog::ColumnRef; use crate::errors::DatabaseError; use crate::expression::function::scala::FuncMonotonicity; use crate::expression::function::scala::ScalarFunctionImpl; @@ -51,7 +50,7 @@ impl ScalarFunctionImpl for Lower { fn eval( &self, exprs: &[ScalarExpression], - tuples: Option<(&dyn TupleLike, &[ColumnRef])>, + tuples: Option<&dyn TupleLike>, ) -> Result { let mut value = exprs[0].eval(tuples)?; if !matches!(value.logical_type(), LogicalType::Varchar(_, _)) { diff --git a/src/function/octet_length.rs b/src/function/octet_length.rs index e317fe44..e970c336 100644 --- a/src/function/octet_length.rs +++ b/src/function/octet_length.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::catalog::ColumnRef; use crate::errors::DatabaseError; use crate::expression::function::scala::FuncMonotonicity; use crate::expression::function::scala::ScalarFunctionImpl; @@ -50,7 +49,7 @@ impl ScalarFunctionImpl for OctetLength { fn eval( &self, exprs: &[ScalarExpression], - tuples: Option<(&dyn TupleLike, &[ColumnRef])>, + tuples: Option<&dyn TupleLike>, ) -> Result { let mut value = exprs[0].eval(tuples)?; if !matches!(value.logical_type(), LogicalType::Varchar(_, _)) { diff --git a/src/function/upper.rs b/src/function/upper.rs index f308f24d..00bb44ec 100644 --- a/src/function/upper.rs +++ b/src/function/upper.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::catalog::ColumnRef; use crate::errors::DatabaseError; use crate::expression::function::scala::FuncMonotonicity; use crate::expression::function::scala::ScalarFunctionImpl; @@ -51,7 +50,7 @@ impl ScalarFunctionImpl for Upper { fn eval( &self, exprs: &[ScalarExpression], - tuples: Option<(&dyn TupleLike, &[ColumnRef])>, + tuples: Option<&dyn TupleLike>, ) -> Result { let mut value = exprs[0].eval(tuples)?; if !matches!(value.logical_type(), LogicalType::Varchar(_, _)) { diff --git a/src/macros/mod.rs b/src/macros/mod.rs index 94a737f8..a83e2d0e 100644 --- a/src/macros/mod.rs +++ b/src/macros/mod.rs @@ -107,7 +107,7 @@ macro_rules! scala_function { #[typetag::serde] impl ::kite_sql::expression::function::scala::ScalarFunctionImpl for $struct_name { #[allow(unused_variables, clippy::redundant_closure_call)] - fn eval(&self, args: &[::kite_sql::expression::ScalarExpression], tuple: Option<(&dyn ::kite_sql::types::tuple::TupleLike, &[::kite_sql::catalog::column::ColumnRef])>) -> Result<::kite_sql::types::value::DataValue, ::kite_sql::errors::DatabaseError> { + fn eval(&self, args: &[::kite_sql::expression::ScalarExpression], tuple: Option<&dyn ::kite_sql::types::tuple::TupleLike>) -> Result<::kite_sql::types::value::DataValue, ::kite_sql::errors::DatabaseError> { let mut _index = 0; $closure($({ diff --git a/src/optimizer/heuristic/optimizer.rs b/src/optimizer/heuristic/optimizer.rs index 14a8d1d9..d29c445a 100644 --- a/src/optimizer/heuristic/optimizer.rs +++ b/src/optimizer/heuristic/optimizer.rs @@ -57,10 +57,10 @@ impl<'a> HepOptimizer<'a> { } } - pub fn optimize( - &mut self, + pub fn find_best( + mut self, loader: Option<&StatisticMetaLoader<'_, T>>, - ) -> Result<(), DatabaseError> { + ) -> Result { Self::apply_batches(&mut self.plan, self.before_batches)?; if let Some(loader) = loader { @@ -78,19 +78,6 @@ impl<'a> HepOptimizer<'a> { } Self::apply_batches(&mut self.plan, self.after_batches)?; - Ok(()) - } - - pub fn into_plan(self) -> LogicalPlan { - self.plan - } - - #[allow(dead_code)] - pub fn find_best( - mut self, - loader: Option<&StatisticMetaLoader<'_, T>>, - ) -> Result { - self.optimize(loader)?; Ok(self.plan) } diff --git a/src/planner/mod.rs b/src/planner/mod.rs index e12cdaa9..4c18ccf3 100644 --- a/src/planner/mod.rs +++ b/src/planner/mod.rs @@ -124,6 +124,10 @@ impl LogicalPlan { } } + pub(crate) fn take(&mut self) -> Self { + std::mem::replace(self, Self::new(Operator::Dummy, Childrens::None)) + } + pub(crate) fn reset_output_schema_cache(&mut self) { self._output_schema_ref = None; }