From 7e16978ee014584cae6fe21296912ba8e7e63e9f Mon Sep 17 00:00:00 2001
From: Zhang Xiaofeng <zhangxffff@gmail.com>
Date: Sat, 14 Feb 2026 08:23:32 +0000
Subject: [PATCH 1/4] perf: Vectorize IN list evaluation with arrow eq kernel

---
 datafusion/physical-expr/benches/in_list.rs   | 163 ++++++++++++++++++
 .../physical-expr/src/expressions/in_list.rs  |  94 ++++++----
 2 files changed, 223 insertions(+), 34 deletions(-)
diff --git a/datafusion/physical-expr/benches/in_list.rs b/datafusion/physical-expr/benches/in_list.rs
index 954715d0e5a9c..fd5a6d0e88dce 100644
--- a/datafusion/physical-expr/benches/in_list.rs
+++ b/datafusion/physical-expr/benches/in_list.rs
@@ -24,6 +24,7 @@ use arrow::record_batch::RecordBatch;
 use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_common::ScalarValue;
 use datafusion_physical_expr::expressions::{col, in_list, lit};
+use datafusion_physical_expr::PhysicalExpr;
 use rand::distr::Alphanumeric;
 use rand::prelude::*;
 use std::any::TypeId;
@@ -219,6 +220,164 @@ fn bench_realistic_mixed_strings<A>(
     }
 }
 
+/// Benchmarks the dynamic evaluation path (no static filter) by including
+/// a column reference in the IN list, which prevents static filter creation.
+fn do_bench_dynamic(
+    c: &mut Criterion,
+    name: &str,
+    values: ArrayRef,
+    list_cols: Vec<ArrayRef>,
+    _match_percent: f64,
+) {
+    let mut fields = vec![Field::new(
+        "a",
+        values.data_type().clone(),
+        true,
+    )];
+    let mut columns: Vec<ArrayRef> = vec![values];
+
+    // Build list expressions: mix of column refs (forces dynamic path)
+    let schema_fields: Vec<Field> = list_cols
+        .iter()
+        .enumerate()
+        .map(|(i, col_arr)| {
+            let name = format!("b{i}");
+            fields.push(Field::new(
+                &name,
+                col_arr.data_type().clone(),
+                true,
+            ));
+            columns.push(Arc::clone(col_arr));
+            Field::new(&name, col_arr.data_type().clone(), true)
+        })
+        .collect();
+
+    let schema = Schema::new(fields);
+    let list_exprs: Vec<Arc<dyn PhysicalExpr>> = schema_fields
+        .iter()
+        .map(|f| col(f.name(), &schema).unwrap())
+        .collect();
+
+    let expr = in_list(
+        col("a", &schema).unwrap(),
+        list_exprs,
+        &false,
+        &schema,
+    )
+    .unwrap();
+    let batch =
+        RecordBatch::try_new(Arc::new(schema), columns).unwrap();
+
+    c.bench_function(name, |b| {
+        b.iter(|| black_box(expr.evaluate(black_box(&batch)).unwrap()))
+    });
+}
+
+/// Benchmarks the dynamic IN list path for Int32 arrays with column references.
+fn bench_dynamic_int32(c: &mut Criterion) {
+    let mut rng = StdRng::seed_from_u64(42);
+
+    for list_size in [3, 8, 28] {
+        for match_percent in [0.0, 0.5, 1.0] {
+            for null_percent in [0.0, 0.2] {
+                // Generate the "needle" column
+                let values: Int32Array = (0..ARRAY_LENGTH)
+                    .map(|_| {
+                        rng.random_bool(1.0 - null_percent)
+                            .then(|| rng.random_range(0..1000))
+                    })
+                    .collect();
+
+                // Generate list columns with controlled match rate
+                let list_cols: Vec<ArrayRef> = (0..list_size)
+                    .map(|_| {
+                        let col: Int32Array = (0..ARRAY_LENGTH)
+                            .map(|row| {
+                                if rng.random_bool(1.0 - null_percent) {
+                                    if rng.random_bool(match_percent) {
+                                        // Copy from values to create a match
+                                        if values.is_null(row) {
+                                            Some(rng.random_range(0..1000))
+                                        } else {
+                                            Some(values.value(row))
+                                        }
+                                    } else {
+                                        // Random value (unlikely to match)
+                                        Some(rng.random_range(1000..2000))
+                                    }
+                                } else {
+                                    None
+                                }
+                            })
+                            .collect();
+                        Arc::new(col) as ArrayRef
+                    })
+                    .collect();
+
+                do_bench_dynamic(
+                    c,
+                    &format!(
+                        "in_list_dynamic/Int32/list={}/match={}%/nulls={}%",
+                        list_size,
+                        (match_percent * 100.0) as u32,
+                        (null_percent * 100.0) as u32
+                    ),
+                    Arc::new(values),
+                    list_cols,
+                    match_percent,
+                );
+            }
+        }
+    }
+}
+
+/// Benchmarks the dynamic IN list path for Utf8 arrays with column references.
+fn bench_dynamic_utf8(c: &mut Criterion) {
+    let mut rng = StdRng::seed_from_u64(99);
+
+    for list_size in [3, 8, 28] {
+        for match_percent in [0.0, 0.5, 1.0] {
+            // Generate the "needle" column
+            let value_strings: Vec<Option<String>> = (0..ARRAY_LENGTH)
+                .map(|_| {
+                    rng.random_bool(0.8)
+                        .then(|| random_string(&mut rng, 12))
+                })
+                .collect();
+            let values: StringArray = value_strings.iter().map(|s| s.as_deref()).collect();
+
+            // Generate list columns with controlled match rate
+            let list_cols: Vec<ArrayRef> = (0..list_size)
+                .map(|_| {
+                    let col: StringArray = (0..ARRAY_LENGTH)
+                        .map(|row| {
+                            if rng.random_bool(match_percent) {
+                                // Copy from values to create a match
+                                value_strings[row].as_deref()
+                            } else {
+                                Some("no_match_value_xyz")
+                            }
+                        })
+                        .collect();
+                    Arc::new(col) as ArrayRef
+                })
+                .collect();
+
+            do_bench_dynamic(
+                c,
+                &format!(
+                    "in_list_dynamic/Utf8/list={}/match={}%",
+                    list_size,
+                    (match_percent * 100.0) as u32,
+                ),
+                Arc::new(values),
+                list_cols,
+                match_percent,
+            );
+        }
+    }
+}
+
 /// Entry point: registers in_list benchmarks for string and numeric array types.
 fn criterion_benchmark(c: &mut Criterion) {
     let mut rng = StdRng::seed_from_u64(120320);
@@ -266,6 +425,10 @@ fn criterion_benchmark(c: &mut Criterion) {
         |rng| rng.random(),
         |v| ScalarValue::TimestampNanosecond(Some(v), None),
     );
+
+    // Dynamic path benchmarks (non-constant list expressions)
+    bench_dynamic_int32(c);
+    bench_dynamic_utf8(c);
 }
 
 criterion_group! {
diff --git a/datafusion/physical-expr/src/expressions/in_list.rs b/datafusion/physical-expr/src/expressions/in_list.rs
index 5c2f1adcd0cf3..87de5c4f9cd8a 100644
--- a/datafusion/physical-expr/src/expressions/in_list.rs
+++ b/datafusion/physical-expr/src/expressions/in_list.rs
@@ -28,6 +28,7 @@ use crate::physical_expr::physical_exprs_bag_equal;
 use arrow::array::*;
 use arrow::buffer::{BooleanBuffer, NullBuffer};
 use arrow::compute::kernels::boolean::{not, or_kleene};
+use arrow::compute::kernels::cmp::eq as arrow_eq;
 use arrow::compute::{SortOptions, take};
 use arrow::datatypes::*;
 use arrow::util::bit_iterator::BitIndexIterator;
@@ -773,11 +774,26 @@ impl PhysicalExpr for InListExpr {
             None => {
                 // No static filter: iterate through each expression, compare, and OR results
                 let value = value.into_array(num_rows)?;
-                let found = self.list.iter().map(|expr| expr.evaluate(batch)).try_fold(
-                    BooleanArray::new(BooleanBuffer::new_unset(num_rows), None),
-                    |result, expr| -> Result<BooleanArray> {
-                        let rhs = match expr? {
-                            ColumnarValue::Array(array) => {
+                let use_arrow_eq =
+                    !value.data_type().is_nested();
+                let mut found = BooleanArray::new(
+                    BooleanBuffer::new_unset(num_rows),
+                    None,
+                );
+
+                for expr in &self.list {
+                    // Short-circuit: if every row is already true, skip remaining list items
+                    if found.true_count() == num_rows {
+                        break;
+                    }
+
+                    let rhs = match expr.evaluate(batch)? {
+                        ColumnarValue::Array(array) => {
+                            if use_arrow_eq {
+                                // Vectorized Arrow eq kernel for primitive/string/binary types
+                                arrow_eq(&value, &array)?
+                            } else {
+                                // Row-by-row comparator for nested types (Struct, List, etc.)
                                 let cmp = make_comparator(
                                     value.as_ref(),
                                     array.as_ref(),
@@ -785,42 +801,52 @@ impl PhysicalExpr for InListExpr {
                                 )?;
                                 (0..num_rows)
                                     .map(|i| {
-                                        if value.is_null(i) || array.is_null(i) {
+                                        if value.is_null(i)
+                                            || array.is_null(i)
+                                        {
                                             return None;
                                         }
                                         Some(cmp(i, i).is_eq())
                                     })
                                     .collect::<BooleanArray>()
                             }
-                            ColumnarValue::Scalar(scalar) => {
-                                // Check if scalar is null once, before the loop
-                                if scalar.is_null() {
-                                    // If scalar is null, all comparisons return null
-                                    BooleanArray::from(vec![None; num_rows])
-                                } else {
-                                    // Convert scalar to 1-element array
-                                    let array = scalar.to_array()?;
-                                    let cmp = make_comparator(
-                                        value.as_ref(),
-                                        array.as_ref(),
-                                        SortOptions::default(),
-                                    )?;
-                                    // Compare each row of value with the single scalar element
-                                    (0..num_rows)
-                                        .map(|i| {
-                                            if value.is_null(i) {
-                                                None
-                                            } else {
-                                                Some(cmp(i, 0).is_eq())
-                                            }
-                                        })
-                                        .collect::<BooleanArray>()
-                                }
+                        }
+                        ColumnarValue::Scalar(scalar) => {
+                            if scalar.is_null() {
+                                // null compared to anything is null
+                                BooleanArray::new(
+                                    BooleanBuffer::new_unset(num_rows),
+                                    Some(NullBuffer::new_null(num_rows)),
+                                )
+                            } else if use_arrow_eq {
+                                // Vectorized scalar comparison
+                                let scalar_datum =
+                                    scalar.to_scalar()?;
+                                arrow_eq(&value, &scalar_datum)?
+                            } else {
+                                // Row-by-row comparator for nested types
+                                let array = scalar.to_array()?;
+                                let cmp = make_comparator(
+                                    value.as_ref(),
+                                    array.as_ref(),
+                                    SortOptions::default(),
+                                )?;
+                                (0..num_rows)
+                                    .map(|i| {
+                                        if value.is_null(i) {
+                                            None
+                                        } else {
+                                            Some(
+                                                cmp(i, 0).is_eq(),
+                                            )
+                                        }
+                                    })
+                                    .collect::<BooleanArray>()
                             }
-                        };
-                        Ok(or_kleene(&result, &rhs)?)
-                    },
-                )?;
+                        }
+                    };
+                    found = or_kleene(&found, &rhs)?;
+                }
 
                 if self.negated { not(&found)? } else { found }
             }

From 93daecc05ac9e21c6bbc36ab96885a99a9ff1a3f Mon Sep 17 00:00:00 2001
From: Zhang Xiaofeng <zhangxffff@gmail.com>
Date: Thu, 19 Feb 2026 09:02:27 +0000
Subject: [PATCH 2/4] add dynamic in list test

---
 .../physical-expr/src/expressions/in_list.rs  | 283 ++++++++++++++++++
 1 file changed, 283 insertions(+)

diff --git a/datafusion/physical-expr/src/expressions/in_list.rs b/datafusion/physical-expr/src/expressions/in_list.rs
index 87de5c4f9cd8a..c544f6eb53960 100644
--- a/datafusion/physical-expr/src/expressions/in_list.rs
+++ b/datafusion/physical-expr/src/expressions/in_list.rs
@@ -3533,4 +3533,287 @@ mod tests {
 
         Ok(())
     }
+
+    /// Helper: creates an InListExpr with `static_filter = None`
+    /// to force the dynamic evaluation path.
+    fn make_dynamic_in_list(
+        expr: Arc<dyn PhysicalExpr>,
+        list: Vec<Arc<dyn PhysicalExpr>>,
+        negated: bool,
+    ) -> Arc<InListExpr> {
+        Arc::new(InListExpr::new(expr, list, negated, None))
+    }
+
+    #[test]
+    fn test_dynamic_path_int32_scalars() -> Result<()> {
+        // Dynamic path with scalar literals (bypassing static filter)
+        let schema =
+            Schema::new(vec![Field::new("a", DataType::Int32, true)]);
+        let col_a = col("a", &schema)?;
+        let batch = RecordBatch::try_new(
+            Arc::new(schema),
+            vec![Arc::new(Int32Array::from(vec![
+                Some(1),
+                Some(2),
+                Some(3),
+                None,
+            ]))],
+        )?;
+
+        let list = vec![
+            lit(ScalarValue::Int32(Some(1))),
+            lit(ScalarValue::Int32(Some(3))),
+        ];
+        let expr = make_dynamic_in_list(col_a, list, false);
+
+        let result = expr
+            .evaluate(&batch)?
+            .into_array(batch.num_rows())?;
+        let result = as_boolean_array(&result);
+        assert_eq!(
+            result,
+            &BooleanArray::from(vec![
+                Some(true),
+                Some(false),
+                Some(true),
+                None,
+            ])
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn test_dynamic_path_int32_column_refs() -> Result<()> {
+        // Dynamic path with column references in the list
+        let schema = Schema::new(vec![
+            Field::new("a", DataType::Int32, true),
+            Field::new("b", DataType::Int32, true),
+            Field::new("c", DataType::Int32, true),
+        ]);
+        let batch = RecordBatch::try_new(
+            Arc::new(schema.clone()),
+            vec![
+                Arc::new(Int32Array::from(vec![
+                    Some(1),
+                    Some(2),
+                    Some(3),
+                    None,
+                ])),
+                Arc::new(Int32Array::from(vec![
+                    Some(1),
+                    Some(99),
+                    Some(99),
+                    Some(99),
+                ])),
+                Arc::new(Int32Array::from(vec![
+                    Some(99),
+                    Some(99),
+                    Some(3),
+                    None,
+                ])),
+            ],
+        )?;
+
+        let col_a = col("a", &schema)?;
+        let list = vec![
+            col("b", &schema)?,
+            col("c", &schema)?,
+        ];
+        let expr = make_dynamic_in_list(col_a, list, false);
+
+        let result = expr
+            .evaluate(&batch)?
+            .into_array(batch.num_rows())?;
+        let result = as_boolean_array(&result);
+        // row 0: 1 IN (1, 99) → true
+        // row 1: 2 IN (99, 99) → false
+        // row 2: 3 IN (99, 3) → true
+        // row 3: NULL IN (99, NULL) → NULL
+        assert_eq!(
+            result,
+            &BooleanArray::from(vec![
+                Some(true),
+                Some(false),
+                Some(true),
+                None,
+            ])
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn test_dynamic_path_utf8_column_refs() -> Result<()> {
+        // Dynamic path with Utf8 column references
+        let schema = Schema::new(vec![
+            Field::new("a", DataType::Utf8, false),
+            Field::new("b", DataType::Utf8, false),
+        ]);
+        let batch = RecordBatch::try_new(
+            Arc::new(schema.clone()),
+            vec![
+                Arc::new(StringArray::from(vec!["x", "y", "z"])),
+                Arc::new(StringArray::from(vec!["x", "x", "z"])),
+            ],
+        )?;
+
+        let col_a = col("a", &schema)?;
+        let list = vec![col("b", &schema)?];
+        let expr = make_dynamic_in_list(col_a, list, false);
+
+        let result = expr
+            .evaluate(&batch)?
+            .into_array(batch.num_rows())?;
+        let result = as_boolean_array(&result);
+        // row 0: "x" IN ("x") → true
+        // row 1: "y" IN ("x") → false
+        // row 2: "z" IN ("z") → true
+        assert_eq!(
+            result,
+            &BooleanArray::from(vec![true, false, true])
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn test_dynamic_path_negated() -> Result<()> {
+        // Dynamic path with NOT IN
+        let schema = Schema::new(vec![
+            Field::new("a", DataType::Int32, false),
+            Field::new("b", DataType::Int32, false),
+        ]);
+        let batch = RecordBatch::try_new(
+            Arc::new(schema.clone()),
+            vec![
+                Arc::new(Int32Array::from(vec![1, 2, 3])),
+                Arc::new(Int32Array::from(vec![1, 99, 3])),
+            ],
+        )?;
+
+        let col_a = col("a", &schema)?;
+        let list = vec![col("b", &schema)?];
+        let expr = make_dynamic_in_list(col_a, list, true);
+
+        let result = expr
+            .evaluate(&batch)?
+            .into_array(batch.num_rows())?;
+        let result = as_boolean_array(&result);
+        // row 0: 1 NOT IN (1) → false
+        // row 1: 2 NOT IN (99) → true
+        // row 2: 3 NOT IN (3) → false
+        assert_eq!(
+            result,
+            &BooleanArray::from(vec![false, true, false])
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn test_dynamic_path_null_in_list() -> Result<()> {
+        // Dynamic path: list contains NULL scalar
+        let schema =
+            Schema::new(vec![Field::new("a", DataType::Int32, false)]);
+        let col_a = col("a", &schema)?;
+        let batch = RecordBatch::try_new(
+            Arc::new(schema),
+            vec![Arc::new(Int32Array::from(vec![1, 2]))],
+        )?;
+
+        let list = vec![
+            lit(ScalarValue::Int32(None)),
+            lit(ScalarValue::Int32(Some(1))),
+        ];
+        let expr = make_dynamic_in_list(col_a, list, false);
+
+        let result = expr
+            .evaluate(&batch)?
+            .into_array(batch.num_rows())?;
+        let result = as_boolean_array(&result);
+        // row 0: 1 IN (NULL, 1) → true (true OR null = true)
+        // row 1: 2 IN (NULL, 1) → NULL (false OR null = null)
+        assert_eq!(
+            result,
+            &BooleanArray::from(vec![Some(true), None])
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn test_dynamic_path_short_circuit() -> Result<()> {
+        // When all rows match from the first list item,
+        // remaining items should be skipped (short-circuit).
+        // Correctness test: result should be all true.
+        let schema = Schema::new(vec![
+            Field::new("a", DataType::Int32, false),
+            Field::new("b", DataType::Int32, false),
+            Field::new("c", DataType::Int32, false),
+        ]);
+        let batch = RecordBatch::try_new(
+            Arc::new(schema.clone()),
+            vec![
+                Arc::new(Int32Array::from(vec![1, 2, 3])),
+                // b == a for every row → all match after first item
+                Arc::new(Int32Array::from(vec![1, 2, 3])),
+                Arc::new(Int32Array::from(vec![99, 99, 99])),
+            ],
+        )?;
+
+        let col_a = col("a", &schema)?;
+        let list = vec![
+            col("b", &schema)?,
+            col("c", &schema)?,
+        ];
+        let expr = make_dynamic_in_list(col_a, list, false);
+
+        let result = expr
+            .evaluate(&batch)?
+            .into_array(batch.num_rows())?;
+        let result = as_boolean_array(&result);
+        assert_eq!(
+            result,
+            &BooleanArray::from(vec![true, true, true])
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn test_dynamic_path_float_nan() -> Result<()> {
+        // Verify NaN == NaN is true in the dynamic path
+        // (consistent with Arrow's totalOrder semantics)
+        let schema = Schema::new(vec![
+            Field::new("a", DataType::Float64, false),
+            Field::new("b", DataType::Float64, false),
+        ]);
+        let batch = RecordBatch::try_new(
+            Arc::new(schema.clone()),
+            vec![
+                Arc::new(Float64Array::from(vec![
+                    f64::NAN,
+                    1.0,
+                    f64::NAN,
+                ])),
+                Arc::new(Float64Array::from(vec![
+                    f64::NAN,
+                    2.0,
+                    0.0,
+                ])),
+            ],
+        )?;
+
+        let col_a = col("a", &schema)?;
+        let list = vec![col("b", &schema)?];
+        let expr = make_dynamic_in_list(col_a, list, false);
+
+        let result = expr
+            .evaluate(&batch)?
+            .into_array(batch.num_rows())?;
+        let result = as_boolean_array(&result);
+        // row 0: NaN IN (NaN) → true
+        // row 1: 1.0 IN (2.0) → false
+        // row 2: NaN IN (0.0) → false
+        assert_eq!(
+            result,
+            &BooleanArray::from(vec![true, false, false])
+        );
+        Ok(())
+    }
 }

From 9c2fe211e7134c941ccb6b2c72ece44a7b278409 Mon Sep 17 00:00:00 2001
From: Zhang Xiaofeng <zhangxffff@gmail.com>
Date: Thu, 19 Feb 2026 09:13:57 +0000
Subject: [PATCH 3/4] fix cargo fmt and clippy

---
 datafusion/physical-expr/benches/in_list.rs   |  39 ++----
 .../physical-expr/src/expressions/in_list.rs  | 129 ++++--------------
 2 files changed, 39 insertions(+), 129 deletions(-)

diff --git a/datafusion/physical-expr/benches/in_list.rs b/datafusion/physical-expr/benches/in_list.rs
index fd5a6d0e88dce..74b6a9538d335 100644
--- a/datafusion/physical-expr/benches/in_list.rs
+++ b/datafusion/physical-expr/benches/in_list.rs
@@ -23,8 +23,8 @@ use arrow::datatypes::{Field, Schema};
 use arrow::record_batch::RecordBatch;
 use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_common::ScalarValue;
-use datafusion_physical_expr::expressions::{col, in_list, lit};
 use datafusion_physical_expr::PhysicalExpr;
+use datafusion_physical_expr::expressions::{col, in_list, lit};
 use rand::distr::Alphanumeric;
 use rand::prelude::*;
 use std::any::TypeId;
@@ -226,14 +226,10 @@ fn do_bench_dynamic(
     c: &mut Criterion,
     name: &str,
     values: ArrayRef,
-    list_cols: Vec<ArrayRef>,
+    list_cols: &[ArrayRef],
     _match_percent: f64,
 ) {
-    let mut fields = vec![Field::new(
-        "a",
-        values.data_type().clone(),
-        true,
-    )];
+    let mut fields = vec![Field::new("a", values.data_type().clone(), true)];
     let mut columns: Vec<ArrayRef> = vec![values];
 
     // Build list expressions: mix of column refs (forces dynamic path)
@@ -242,11 +238,7 @@ fn do_bench_dynamic(
         .enumerate()
         .map(|(i, col_arr)| {
             let name = format!("b{i}");
-            fields.push(Field::new(
-                &name,
-                col_arr.data_type().clone(),
-                true,
-            ));
+            fields.push(Field::new(&name, col_arr.data_type().clone(), true));
             columns.push(Arc::clone(col_arr));
             Field::new(&name, col_arr.data_type().clone(), true)
         })
@@ -258,15 +250,8 @@ fn do_bench_dynamic(
         .map(|f| col(f.name(), &schema).unwrap())
         .collect();
 
-    let expr = in_list(
-        col("a", &schema).unwrap(),
-        list_exprs,
-        &false,
-        &schema,
-    )
-    .unwrap();
-    let batch =
-        RecordBatch::try_new(Arc::new(schema), columns).unwrap();
+    let expr = in_list(col("a", &schema).unwrap(), list_exprs, &false, &schema).unwrap();
+    let batch = RecordBatch::try_new(Arc::new(schema), columns).unwrap();
 
     c.bench_function(name, |b| {
         b.iter(|| black_box(expr.evaluate(black_box(&batch)).unwrap()))
@@ -323,7 +308,7 @@ fn bench_dynamic_int32(c: &mut Criterion) {
                         (null_percent * 100.0) as u32
                     ),
                     Arc::new(values),
-                    list_cols,
+                    &list_cols,
                     match_percent,
                 );
             }
@@ -339,12 +324,10 @@ fn bench_dynamic_utf8(c: &mut Criterion) {
         for match_percent in [0.0, 0.5, 1.0] {
             // Generate the "needle" column
             let value_strings: Vec<Option<String>> = (0..ARRAY_LENGTH)
-                .map(|_| {
-                    rng.random_bool(0.8)
-                        .then(|| random_string(&mut rng, 12))
-                })
+                .map(|_| rng.random_bool(0.8).then(|| random_string(&mut rng, 12)))
                 .collect();
-            let values: StringArray = value_strings.iter().map(|s| s.as_deref()).collect();
+            let values: StringArray =
+                value_strings.iter().map(|s| s.as_deref()).collect();
 
             // Generate list columns with controlled match rate
             let list_cols: Vec<ArrayRef> = (0..list_size)
@@ -371,7 +354,7 @@ fn bench_dynamic_utf8(c: &mut Criterion) {
                     (match_percent * 100.0) as u32,
                 ),
                 Arc::new(values),
-                list_cols,
+                &list_cols,
                 match_percent,
             );
         }
diff --git a/datafusion/physical-expr/src/expressions/in_list.rs b/datafusion/physical-expr/src/expressions/in_list.rs
index c544f6eb53960..58ccf69a18786 100644
--- a/datafusion/physical-expr/src/expressions/in_list.rs
+++ b/datafusion/physical-expr/src/expressions/in_list.rs
@@ -774,12 +774,9 @@ impl PhysicalExpr for InListExpr {
             None => {
                 // No static filter: iterate through each expression, compare, and OR results
                 let value = value.into_array(num_rows)?;
-                let use_arrow_eq =
-                    !value.data_type().is_nested();
-                let mut found = BooleanArray::new(
-                    BooleanBuffer::new_unset(num_rows),
-                    None,
-                );
+                let use_arrow_eq = !value.data_type().is_nested();
+                let mut found =
+                    BooleanArray::new(BooleanBuffer::new_unset(num_rows), None);
 
                 for expr in &self.list {
                     // Short-circuit: if every row is already true, skip remaining list items
@@ -801,9 +798,7 @@ impl PhysicalExpr for InListExpr {
                                 )?;
                                 (0..num_rows)
                                     .map(|i| {
-                                        if value.is_null(i)
-                                            || array.is_null(i)
-                                        {
+                                        if value.is_null(i) || array.is_null(i) {
                                             return None;
                                         }
                                         Some(cmp(i, i).is_eq())
@@ -820,8 +815,7 @@ impl PhysicalExpr for InListExpr {
                                 )
                             } else if use_arrow_eq {
                                 // Vectorized scalar comparison
-                                let scalar_datum =
-                                    scalar.to_scalar()?;
+                                let scalar_datum = scalar.to_scalar()?;
                                 arrow_eq(&value, &scalar_datum)?
                             } else {
                                 // Row-by-row comparator for nested types
@@ -836,9 +830,7 @@ impl PhysicalExpr for InListExpr {
                                         if value.is_null(i) {
                                             None
                                         } else {
-                                            Some(
-                                                cmp(i, 0).is_eq(),
-                                            )
+                                            Some(cmp(i, 0).is_eq())
                                         }
                                     })
                                     .collect::<BooleanArray>()
@@ -3547,8 +3539,7 @@ mod tests {
     #[test]
     fn test_dynamic_path_int32_scalars() -> Result<()> {
         // Dynamic path with scalar literals (bypassing static filter)
-        let schema =
-            Schema::new(vec![Field::new("a", DataType::Int32, true)]);
+        let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
         let col_a = col("a", &schema)?;
         let batch = RecordBatch::try_new(
             Arc::new(schema),
@@ -3566,18 +3557,11 @@ mod tests {
         ];
         let expr = make_dynamic_in_list(col_a, list, false);
 
-        let result = expr
-            .evaluate(&batch)?
-            .into_array(batch.num_rows())?;
+        let result = expr.evaluate(&batch)?.into_array(batch.num_rows())?;
         let result = as_boolean_array(&result);
         assert_eq!(
             result,
-            &BooleanArray::from(vec![
-                Some(true),
-                Some(false),
-                Some(true),
-                None,
-            ])
+            &BooleanArray::from(vec![Some(true), Some(false), Some(true), None,])
         );
         Ok(())
     }
@@ -3593,37 +3577,22 @@ mod tests {
         let batch = RecordBatch::try_new(
             Arc::new(schema.clone()),
             vec![
-                Arc::new(Int32Array::from(vec![
-                    Some(1),
-                    Some(2),
-                    Some(3),
-                    None,
-                ])),
+                Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3), None])),
                 Arc::new(Int32Array::from(vec![
                     Some(1),
                     Some(99),
                     Some(99),
                     Some(99),
                 ])),
-                Arc::new(Int32Array::from(vec![
-                    Some(99),
-                    Some(99),
-                    Some(3),
-                    None,
-                ])),
+                Arc::new(Int32Array::from(vec![Some(99), Some(99), Some(3), None])),
             ],
         )?;
 
         let col_a = col("a", &schema)?;
-        let list = vec![
-            col("b", &schema)?,
-            col("c", &schema)?,
-        ];
+        let list = vec![col("b", &schema)?, col("c", &schema)?];
         let expr = make_dynamic_in_list(col_a, list, false);
 
-        let result = expr
-            .evaluate(&batch)?
-            .into_array(batch.num_rows())?;
+        let result = expr.evaluate(&batch)?.into_array(batch.num_rows())?;
         let result = as_boolean_array(&result);
         // row 0: 1 IN (1, 99) → true
         // row 1: 2 IN (99, 99) → false
@@ -3631,12 +3600,7 @@ mod tests {
         // row 3: NULL IN (99, NULL) → NULL
         assert_eq!(
             result,
-            &BooleanArray::from(vec![
-                Some(true),
-                Some(false),
-                Some(true),
-                None,
-            ])
+            &BooleanArray::from(vec![Some(true), Some(false), Some(true), None,])
         );
         Ok(())
     }
@@ -3660,17 +3624,12 @@ mod tests {
         let list = vec![col("b", &schema)?];
         let expr = make_dynamic_in_list(col_a, list, false);
 
-        let result = expr
-            .evaluate(&batch)?
-            .into_array(batch.num_rows())?;
+        let result = expr.evaluate(&batch)?.into_array(batch.num_rows())?;
         let result = as_boolean_array(&result);
         // row 0: "x" IN ("x") → true
         // row 1: "y" IN ("x") → false
         // row 2: "z" IN ("z") → true
-        assert_eq!(
-            result,
-            &BooleanArray::from(vec![true, false, true])
-        );
+        assert_eq!(result, &BooleanArray::from(vec![true, false, true]));
         Ok(())
     }
 
@@ -3693,25 +3652,19 @@ mod tests {
         let list = vec![col("b", &schema)?];
         let expr = make_dynamic_in_list(col_a, list, true);
 
-        let result = expr
-            .evaluate(&batch)?
-            .into_array(batch.num_rows())?;
+        let result = expr.evaluate(&batch)?.into_array(batch.num_rows())?;
         let result = as_boolean_array(&result);
         // row 0: 1 NOT IN (1) → false
         // row 1: 2 NOT IN (99) → true
         // row 2: 3 NOT IN (3) → false
-        assert_eq!(
-            result,
-            &BooleanArray::from(vec![false, true, false])
-        );
+        assert_eq!(result, &BooleanArray::from(vec![false, true, false]));
         Ok(())
     }
 
     #[test]
     fn test_dynamic_path_null_in_list() -> Result<()> {
         // Dynamic path: list contains NULL scalar
-        let schema =
-            Schema::new(vec![Field::new("a", DataType::Int32, false)]);
+        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
         let col_a = col("a", &schema)?;
         let batch = RecordBatch::try_new(
             Arc::new(schema),
@@ -3724,16 +3677,11 @@ mod tests {
         ];
         let expr = make_dynamic_in_list(col_a, list, false);
 
-        let result = expr
-            .evaluate(&batch)?
-            .into_array(batch.num_rows())?;
+        let result = expr.evaluate(&batch)?.into_array(batch.num_rows())?;
         let result = as_boolean_array(&result);
         // row 0: 1 IN (NULL, 1) → true (true OR null = true)
         // row 1: 2 IN (NULL, 1) → NULL (false OR null = null)
-        assert_eq!(
-            result,
-            &BooleanArray::from(vec![Some(true), None])
-        );
+        assert_eq!(result, &BooleanArray::from(vec![Some(true), None]));
         Ok(())
     }
 
@@ -3758,20 +3706,12 @@ mod tests {
         )?;
 
         let col_a = col("a", &schema)?;
-        let list = vec![
-            col("b", &schema)?,
-            col("c", &schema)?,
-        ];
+        let list = vec![col("b", &schema)?, col("c", &schema)?];
         let expr = make_dynamic_in_list(col_a, list, false);
 
-        let result = expr
-            .evaluate(&batch)?
-            .into_array(batch.num_rows())?;
+        let result = expr.evaluate(&batch)?.into_array(batch.num_rows())?;
         let result = as_boolean_array(&result);
-        assert_eq!(
-            result,
-            &BooleanArray::from(vec![true, true, true])
-        );
+        assert_eq!(result, &BooleanArray::from(vec![true, true, true]));
         Ok(())
     }
 
@@ -3786,16 +3726,8 @@ mod tests {
         let batch = RecordBatch::try_new(
             Arc::new(schema.clone()),
             vec![
-                Arc::new(Float64Array::from(vec![
-                    f64::NAN,
-                    1.0,
-                    f64::NAN,
-                ])),
-                Arc::new(Float64Array::from(vec![
-                    f64::NAN,
-                    2.0,
-                    0.0,
-                ])),
+                Arc::new(Float64Array::from(vec![f64::NAN, 1.0, f64::NAN])),
+                Arc::new(Float64Array::from(vec![f64::NAN, 2.0, 0.0])),
             ],
         )?;
 
@@ -3803,17 +3735,12 @@ mod tests {
         let list = vec![col("b", &schema)?];
         let expr = make_dynamic_in_list(col_a, list, false);
 
-        let result = expr
-            .evaluate(&batch)?
-            .into_array(batch.num_rows())?;
+        let result = expr.evaluate(&batch)?.into_array(batch.num_rows())?;
         let result = as_boolean_array(&result);
         // row 0: NaN IN (NaN) → true
         // row 1: 1.0 IN (2.0) → false
         // row 2: NaN IN (0.0) → false
-        assert_eq!(
-            result,
-            &BooleanArray::from(vec![true, false, false])
-        );
+        assert_eq!(result, &BooleanArray::from(vec![true, false, false]));
         Ok(())
     }
 }

From 61af0e2c917097617bb4de3a80bcaba26dde8d0f Mon Sep 17 00:00:00 2001
From: Zhang Xiaofeng <zhangxffff@gmail.com>
Date: Thu, 19 Feb 2026 10:09:42 +0000
Subject: [PATCH 4/4] remove unused match_percent

---
 datafusion/physical-expr/benches/in_list.rs | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/datafusion/physical-expr/benches/in_list.rs b/datafusion/physical-expr/benches/in_list.rs
index 74b6a9538d335..405088f88fbb2 100644
--- a/datafusion/physical-expr/benches/in_list.rs
+++ b/datafusion/physical-expr/benches/in_list.rs
@@ -51,7 +51,9 @@ fn random_string(rng: &mut StdRng, len: usize) -> String {
 }
 
 const IN_LIST_LENGTHS: [usize; 4] = [3, 8, 28, 100];
+const DYNAMIC_LIST_LENGTHS: [usize; 3] = [3, 8, 28];
 const NULL_PERCENTS: [f64; 2] = [0., 0.2];
+const MATCH_PERCENTS: [f64; 3] = [0.0, 0.5, 1.0];
 const STRING_LENGTHS: [usize; 3] = [3, 12, 100];
 const ARRAY_LENGTH: usize = 8192;
 
@@ -227,7 +229,6 @@ fn do_bench_dynamic(
     name: &str,
     values: ArrayRef,
     list_cols: &[ArrayRef],
-    _match_percent: f64,
 ) {
     let mut fields = vec![Field::new("a", values.data_type().clone(), true)];
     let mut columns: Vec<ArrayRef> = vec![values];
@@ -262,9 +263,9 @@ fn do_bench_dynamic(
 fn bench_dynamic_int32(c: &mut Criterion) {
     let mut rng = StdRng::seed_from_u64(42);
 
-    for list_size in [3, 8, 28] {
-        for match_percent in [0.0, 0.5, 1.0] {
-            for null_percent in [0.0, 0.2] {
+    for list_size in DYNAMIC_LIST_LENGTHS {
+        for match_percent in MATCH_PERCENTS {
+            for null_percent in NULL_PERCENTS {
                 // Generate the "needle" column
                 let values: Int32Array = (0..ARRAY_LENGTH)
                     .map(|_| {
@@ -309,7 +310,6 @@ fn bench_dynamic_int32(c: &mut Criterion) {
                     ),
                     Arc::new(values),
                     &list_cols,
-                    match_percent,
                 );
             }
         }
@@ -320,8 +320,8 @@ fn bench_dynamic_int32(c: &mut Criterion) {
 fn bench_dynamic_utf8(c: &mut Criterion) {
     let mut rng = StdRng::seed_from_u64(99);
 
-    for list_size in [3, 8, 28] {
-        for match_percent in [0.0, 0.5, 1.0] {
+    for list_size in DYNAMIC_LIST_LENGTHS {
+        for match_percent in MATCH_PERCENTS {
             // Generate the "needle" column
             let value_strings: Vec<Option<String>> = (0..ARRAY_LENGTH)
                 .map(|_| rng.random_bool(0.8).then(|| random_string(&mut rng, 12)))
@@ -355,7 +355,6 @@ fn bench_dynamic_utf8(c: &mut Criterion) {
                 ),
                 Arc::new(values),
                 &list_cols,
-                match_percent,
             );
         }
     }