Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions datafusion/common/src/alias.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,16 @@ impl AliasGenerator {
Self::default()
}

/// Advance the counter to at least `min_id`, ensuring future aliases
/// won't collide with already-existing ones.
///
/// For example, if the query already contains an alias `alias_42`, then calling
/// `update_min_id(42)` will ensure that future aliases generated by this
/// [`AliasGenerator`] will start from `alias_43`.
pub fn update_min_id(&self, min_id: usize) {
self.next_id.fetch_max(min_id + 1, Ordering::Relaxed);
}

/// Return a unique alias with the provided prefix
pub fn next(&self, prefix: &str) -> String {
let id = self.next_id.fetch_add(1, Ordering::Relaxed);
Expand Down
41 changes: 36 additions & 5 deletions datafusion/optimizer/src/extract_leaf_expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,10 +114,6 @@ impl OptimizerRule for ExtractLeafExpressions {
"extract_leaf_expressions"
}

fn apply_order(&self) -> Option<ApplyOrder> {
Some(ApplyOrder::TopDown)
}

fn rewrite(
&self,
plan: LogicalPlan,
Expand All @@ -127,10 +123,45 @@ impl OptimizerRule for ExtractLeafExpressions {
return Ok(Transformed::no(plan));
}
let alias_generator = config.alias_generator();
extract_from_plan(plan, alias_generator)

// Advance the alias generator past any user-provided __datafusion_extracted_N
// aliases to prevent collisions when generating new extraction aliases.
advance_generator_past_existing(&plan, alias_generator)?;

plan.transform_down_with_subqueries(|plan| {
extract_from_plan(plan, alias_generator)
})
}
}

/// Scans the current plan node's expressions for pre-existing
/// `__datafusion_extracted_N` aliases and advances the generator
/// counter past them to avoid collisions with user-provided aliases.
fn advance_generator_past_existing(
plan: &LogicalPlan,
alias_generator: &AliasGenerator,
) -> Result<()> {
plan.apply(|plan| {
plan.expressions().iter().try_for_each(|expr| {
expr.apply(|e| {
if let Expr::Alias(alias) = e
&& let Some(id_str) = alias
.name
.strip_prefix(EXTRACTED_EXPR_PREFIX)
.and_then(|s| s.strip_prefix('_'))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Couldn't this be .and_then(|id| id_str.parse().ok())

&& let Ok(id) = id_str.parse::<usize>()
{
alias_generator.update_min_id(id);
}
Ok(TreeNodeRecursion::Continue)
})?;
Ok::<(), datafusion_common::error::DataFusionError>(())
})?;
Ok(TreeNodeRecursion::Continue)
})
.map(|_| ())
}

/// Extracts `MoveTowardsLeafNodes` sub-expressions from a plan node.
///
/// Works for any number of inputs (0, 1, 2, …N). For multi-input nodes
Expand Down
24 changes: 24 additions & 0 deletions datafusion/sqllogictest/test_files/projection_pushdown.slt
Original file line number Diff line number Diff line change
Expand Up @@ -1949,3 +1949,27 @@ ORDER BY simple_struct.id;
3 3
4 4
5 5

# =========================================================================
# Regression: user-provided __datafusion_extracted aliases must not
# collide with optimizer-generated ones
# (https://github.com/apache/datafusion/issues/20430)
# =========================================================================

statement ok
COPY ( select {f1: 1, f2: 2} as s
) TO 'test_files/scratch/projection_pushdown/test.parquet'
STORED AS PARQUET;

statement ok
CREATE EXTERNAL TABLE t
STORED AS PARQUET
LOCATION 'test_files/scratch/projection_pushdown/test.parquet';

query I
SELECT
get_field(s, 'f1') AS __datafusion_extracted_2
FROM t
WHERE COALESCE(get_field(s, 'f1'), get_field(s, 'f2')) = 1;
----
1